summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArto Merilainen <amerilainen@nvidia.com>2014-03-19 03:38:25 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:08:53 -0400
commita9785995d5f22aaeb659285f8aeb64d8b56982e0 (patch)
treecc75f75bcf43db316a002a7a240b81f299bf6d7f
parent61efaf843c22b85424036ec98015121c08f5f16c (diff)
gpu: nvgpu: Add NVIDIA GPU Driver
This patch moves the NVIDIA GPU driver to a new location. Bug 1482562 Change-Id: I24293810b9d0f1504fd9be00135e21dad656ccb6 Signed-off-by: Arto Merilainen <amerilainen@nvidia.com> Reviewed-on: http://git-master/r/383722 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/Kconfig60
-rw-r--r--drivers/gpu/nvgpu/gk20a/Makefile36
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c293
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.h50
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c2111
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h172
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c356
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h102
-rw-r--r--drivers/gpu/nvgpu/gk20a/clk_gk20a.c865
-rw-r--r--drivers/gpu/nvgpu/gk20a/clk_gk20a.h94
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c240
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h28
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c699
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h83
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.c295
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.h25
-rw-r--r--drivers/gpu/nvgpu/gk20a/fb_gk20a.c37
-rw-r--r--drivers/gpu/nvgpu/gk20a/fb_gk20a.h21
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c1836
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h164
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c1681
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h559
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.c1247
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.h177
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c374
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h39
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_scale.c358
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_scale.h51
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c335
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c333
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h149
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c256
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c6747
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h406
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h179
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal.c33
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal.h25
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal_gk20a.c50
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal_gk20a.h28
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h105
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h113
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h85
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h245
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h213
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h565
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h141
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h1141
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h3173
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h221
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h253
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h469
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h137
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h226
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h226
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h69
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h141
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h737
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h389
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h2150
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h225
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h101
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h137
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h301
-rw-r--r--drivers/gpu/nvgpu/gk20a/kind_gk20a.c424
-rw-r--r--drivers/gpu/nvgpu/gk20a/kind_gk20a.h67
-rw-r--r--drivers/gpu/nvgpu/gk20a/ltc_common.c243
-rw-r--r--drivers/gpu/nvgpu/gk20a/ltc_gk20a.c203
-rw-r--r--drivers/gpu/nvgpu/gk20a/ltc_gk20a.h21
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c2984
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h464
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a.h160
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c35
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c561
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c3796
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.h1097
-rw-r--r--drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c91
-rw-r--r--drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h27
-rw-r--r--drivers/gpu/nvgpu/gk20a/regops_gk20a.c704
-rw-r--r--drivers/gpu/nvgpu/gk20a/regops_gk20a.h47
-rw-r--r--drivers/gpu/nvgpu/gk20a/sim_gk20a.h62
-rw-r--r--drivers/gpu/nvgpu/gk20a/therm_gk20a.c142
-rw-r--r--drivers/gpu/nvgpu/gk20a/therm_gk20a.h33
82 files changed, 43318 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig
new file mode 100644
index 00000000..160ec8be
--- /dev/null
+++ b/drivers/gpu/nvgpu/Kconfig
@@ -0,0 +1,60 @@
1config GK20A
2 bool "Nvidia GK20A GPU support"
3 help
4 Choose this option if you have an SoC with integrated
5 Nvidia GPU IP.
6
7config GK20A_DEFAULT_TIMEOUT
8 depends on GK20A
9 int "Default timeout for submits"
10 default 10000
11 help
12 Default timeout for jobs in milliseconds. Set to zero for no timeout.
13
14config GK20A_PMU
15 bool "Support GK20A PMU"
16 depends on GK20A
17 default n
18 help
19 Say Y here to enable GK20A PMU features.
20
21choice
22 depends on GK20A
23 prompt "Enable GK20A frequency scaling"
24 default GK20A_PERFMON
25 optional
26 help
27 Select this entry to enable gk20a scaling
28
29config GK20A_PERFMON
30 bool "Use Perfmon"
31 help
32 Select this to enable built-in perfmon scaling.
33 The built-in scaling option uses simplistic
34 scaling mechanism (if busy, increase frequency and
35 decrease frequency if idle).
36
37config GK20A_DEVFREQ
38 bool "Use Devfreq"
39 help
40 Select this to use devfreq based scaling.
41 Devfreq is a common framework that allows using
42 variety of different governors and changing
43 between governors on the fly. By default, no
44 governor is selected.
45
46endchoice
47
48config GK20A_CYCLE_STATS
49 bool "Support GK20A GPU CYCLE STATS"
50 depends on GK20A
51 default y
52 help
53 Say Y here to enable the cycle stats debugging features.
54
55config GK20A_PHYS_PAGE_TABLES
56 bool "Use physical addressing for gk20a page tables"
57 default y if TEGRA_SIMULATION_PLATFORM
58 help
59 Use physical addressing for gk20a page tables. If this is off, we
60 use SMMU translation.
diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile
new file mode 100644
index 00000000..f9b06b72
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/Makefile
@@ -0,0 +1,36 @@
1
2GCOV_PROFILE := y
3ccflags-y += -Idrivers/devfreq
4ccflags-y += -Wno-multichar
5ccflags-y += -Werror
6
7obj-$(CONFIG_GK20A) += \
8 gk20a.o \
9 as_gk20a.o \
10 ctrl_gk20a.o \
11 fifo_gk20a.o \
12 channel_gk20a.o \
13 channel_sync_gk20a.o \
14 debug_gk20a.o \
15 dbg_gpu_gk20a.o \
16 regops_gk20a.o \
17 gr_gk20a.o \
18 kind_gk20a.o \
19 mm_gk20a.o \
20 pmu_gk20a.o \
21 priv_ring_gk20a.o \
22 clk_gk20a.o \
23 therm_gk20a.o \
24 gr_ctx_gk20a_sim.o \
25 gr_ctx_gk20a.o \
26 gk20a_gating_reglist.o \
27 gk20a_scale.o \
28 gk20a_sysfs.o \
29 ltc_gk20a.o \
30 fb_gk20a.o \
31 hal.o \
32 hal_gk20a.o \
33 gk20a_allocator.o
34
35obj-$(CONFIG_GK20A) += platform_gk20a_generic.o
36obj-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
new file mode 100644
index 00000000..65c26938
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -0,0 +1,293 @@
1/*
2 * drivers/video/tegra/host/gk20a/as_gk20a.c
3 *
4 * GK20A Address Spaces
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#include <linux/slab.h>
19#include <linux/fs.h>
20#include <linux/cdev.h>
21#include <linux/uaccess.h>
22
23#include <trace/events/gk20a.h>
24
25#include "gk20a.h"
26
27/* dumb allocator... */
28static int generate_as_share_id(struct gk20a_as *as)
29{
30 gk20a_dbg_fn("");
31 return ++as->last_share_id;
32}
33/* still dumb */
34static void release_as_share_id(struct gk20a_as *as, int id)
35{
36 gk20a_dbg_fn("");
37 return;
38}
39
40static int gk20a_as_alloc_share(struct gk20a_as *as,
41 struct gk20a_as_share **out)
42{
43 struct gk20a_as_share *as_share;
44 int err = 0;
45
46 gk20a_dbg_fn("");
47
48 *out = 0;
49 as_share = kzalloc(sizeof(*as_share), GFP_KERNEL);
50 if (!as_share)
51 return -ENOMEM;
52
53 as_share->as = as;
54 as_share->id = generate_as_share_id(as_share->as);
55 as_share->ref_cnt.counter = 1;
56
57 /* this will set as_share->vm. */
58 err = gk20a_vm_alloc_share(as_share);
59 if (err)
60 goto failed;
61
62 *out = as_share;
63 return 0;
64
65 failed:
66 kfree(as_share);
67 return err;
68}
69
70/*
71 * channels and the device nodes call this to release.
72 * once the ref_cnt hits zero the share is deleted.
73 */
74int gk20a_as_release_share(struct gk20a_as_share *as_share)
75{
76 int err;
77
78 gk20a_dbg_fn("");
79
80 if (atomic_dec_return(&as_share->ref_cnt) > 0)
81 return 0;
82
83 err = gk20a_vm_release_share(as_share);
84 release_as_share_id(as_share->as, as_share->id);
85 kfree(as_share);
86 return err;
87}
88
89static int gk20a_as_ioctl_bind_channel(
90 struct gk20a_as_share *as_share,
91 struct nvhost_as_bind_channel_args *args)
92{
93 int err = 0;
94 struct channel_gk20a *ch;
95
96 gk20a_dbg_fn("");
97
98 ch = gk20a_get_channel_from_file(args->channel_fd);
99 if (!ch || gk20a_channel_as_bound(ch))
100 return -EINVAL;
101
102 atomic_inc(&as_share->ref_cnt);
103
104 /* this will set channel_gk20a->vm */
105 err = gk20a_vm_bind_channel(as_share, ch);
106 if (err) {
107 atomic_dec(&as_share->ref_cnt);
108 return err;
109 }
110
111 return err;
112}
113
114static int gk20a_as_ioctl_alloc_space(
115 struct gk20a_as_share *as_share,
116 struct nvhost_as_alloc_space_args *args)
117{
118 gk20a_dbg_fn("");
119 return gk20a_vm_alloc_space(as_share, args);
120}
121
122static int gk20a_as_ioctl_free_space(
123 struct gk20a_as_share *as_share,
124 struct nvhost_as_free_space_args *args)
125{
126 gk20a_dbg_fn("");
127 return gk20a_vm_free_space(as_share, args);
128}
129
130static int gk20a_as_ioctl_map_buffer_ex(
131 struct gk20a_as_share *as_share,
132 struct nvhost_as_map_buffer_ex_args *args)
133{
134 int i;
135
136 gk20a_dbg_fn("");
137
138 /* ensure that padding is not set. this is required for ensuring that
139 * we can safely use these fields later */
140 for (i = 0; i < ARRAY_SIZE(args->padding); i++)
141 if (args->padding[i])
142 return -EINVAL;
143
144 return gk20a_vm_map_buffer(as_share, args->dmabuf_fd,
145 &args->offset, args->flags,
146 args->kind);
147}
148
149static int gk20a_as_ioctl_map_buffer(
150 struct gk20a_as_share *as_share,
151 struct nvhost_as_map_buffer_args *args)
152{
153 gk20a_dbg_fn("");
154 return gk20a_vm_map_buffer(as_share, args->nvmap_handle,
155 &args->o_a.align,
156 args->flags, NV_KIND_DEFAULT);
157 /* args->o_a.offset will be set if !err */
158}
159
160static int gk20a_as_ioctl_unmap_buffer(
161 struct gk20a_as_share *as_share,
162 struct nvhost_as_unmap_buffer_args *args)
163{
164 gk20a_dbg_fn("");
165 return gk20a_vm_unmap_buffer(as_share, args->offset);
166}
167
168int gk20a_as_dev_open(struct inode *inode, struct file *filp)
169{
170 struct gk20a_as_share *as_share;
171 struct gk20a *g;
172 int err;
173
174 gk20a_dbg_fn("");
175
176 g = container_of(inode->i_cdev, struct gk20a, as.cdev);
177
178 err = gk20a_get_client(g);
179 if (err) {
180 gk20a_dbg_fn("fail to get channel!");
181 return err;
182 }
183
184 err = gk20a_as_alloc_share(&g->as, &as_share);
185 if (err) {
186 gk20a_dbg_fn("failed to alloc share");
187 gk20a_put_client(g);
188 return err;
189 }
190
191 filp->private_data = as_share;
192 return 0;
193}
194
195int gk20a_as_dev_release(struct inode *inode, struct file *filp)
196{
197 struct gk20a_as_share *as_share = filp->private_data;
198 int ret;
199 struct gk20a *g = gk20a_from_as(as_share->as);
200
201 gk20a_dbg_fn("");
202
203 ret = gk20a_as_release_share(as_share);
204
205 gk20a_put_client(g);
206
207 return ret;
208}
209
210long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
211{
212 int err = 0;
213 struct gk20a_as_share *as_share = filp->private_data;
214 struct gk20a *g = gk20a_from_as(as_share->as);
215
216 u8 buf[NVHOST_AS_IOCTL_MAX_ARG_SIZE];
217
218 if ((_IOC_TYPE(cmd) != NVHOST_AS_IOCTL_MAGIC) ||
219 (_IOC_NR(cmd) == 0) ||
220 (_IOC_NR(cmd) > NVHOST_AS_IOCTL_LAST))
221 return -EFAULT;
222
223 BUG_ON(_IOC_SIZE(cmd) > NVHOST_AS_IOCTL_MAX_ARG_SIZE);
224
225 if (_IOC_DIR(cmd) & _IOC_WRITE) {
226 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
227 return -EFAULT;
228 }
229
230 err = gk20a_channel_busy(g->dev);
231 if (err)
232 return err;
233
234 switch (cmd) {
235 case NVHOST_AS_IOCTL_BIND_CHANNEL:
236 trace_gk20a_as_ioctl_bind_channel(dev_name(dev_from_gk20a(g)));
237 err = gk20a_as_ioctl_bind_channel(as_share,
238 (struct nvhost_as_bind_channel_args *)buf);
239
240 break;
241 case NVHOST32_AS_IOCTL_ALLOC_SPACE:
242 {
243 struct nvhost32_as_alloc_space_args *args32 =
244 (struct nvhost32_as_alloc_space_args *)buf;
245 struct nvhost_as_alloc_space_args args;
246
247 args.pages = args32->pages;
248 args.page_size = args32->page_size;
249 args.flags = args32->flags;
250 args.o_a.offset = args32->o_a.offset;
251 trace_gk20a_as_ioctl_alloc_space(dev_name(dev_from_gk20a(g)));
252 err = gk20a_as_ioctl_alloc_space(as_share, &args);
253 args32->o_a.offset = args.o_a.offset;
254 break;
255 }
256 case NVHOST_AS_IOCTL_ALLOC_SPACE:
257 trace_gk20a_as_ioctl_alloc_space(dev_name(dev_from_gk20a(g)));
258 err = gk20a_as_ioctl_alloc_space(as_share,
259 (struct nvhost_as_alloc_space_args *)buf);
260 break;
261 case NVHOST_AS_IOCTL_FREE_SPACE:
262 trace_gk20a_as_ioctl_free_space(dev_name(dev_from_gk20a(g)));
263 err = gk20a_as_ioctl_free_space(as_share,
264 (struct nvhost_as_free_space_args *)buf);
265 break;
266 case NVHOST_AS_IOCTL_MAP_BUFFER:
267 trace_gk20a_as_ioctl_map_buffer(dev_name(dev_from_gk20a(g)));
268 err = gk20a_as_ioctl_map_buffer(as_share,
269 (struct nvhost_as_map_buffer_args *)buf);
270 break;
271 case NVHOST_AS_IOCTL_MAP_BUFFER_EX:
272 trace_gk20a_as_ioctl_map_buffer(dev_name(dev_from_gk20a(g)));
273 err = gk20a_as_ioctl_map_buffer_ex(as_share,
274 (struct nvhost_as_map_buffer_ex_args *)buf);
275 break;
276 case NVHOST_AS_IOCTL_UNMAP_BUFFER:
277 trace_gk20a_as_ioctl_unmap_buffer(dev_name(dev_from_gk20a(g)));
278 err = gk20a_as_ioctl_unmap_buffer(as_share,
279 (struct nvhost_as_unmap_buffer_args *)buf);
280 break;
281 default:
282 dev_err(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd);
283 err = -ENOTTY;
284 break;
285 }
286
287 gk20a_channel_idle(g->dev);
288
289 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
290 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
291
292 return err;
293}
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.h b/drivers/gpu/nvgpu/gk20a/as_gk20a.h
new file mode 100644
index 00000000..be0e9707
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.h
@@ -0,0 +1,50 @@
1/*
2 * drivers/video/tegra/host/gk20a/as_gk20a.h
3 *
4 * GK20A Address Space
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17#ifndef __GK20A_AS_H
18#define __GK20A_AS_H
19
20#include <linux/atomic.h>
21#include <linux/cdev.h>
22#include <linux/fs.h>
23
24#include <linux/nvhost_as_ioctl.h>
25
26struct gk20a_as;
27struct gk20a_as_share;
28struct vm_gk20a;
29
30struct gk20a_as_share {
31 struct gk20a_as *as;
32 atomic_t ref_cnt;
33 int id;
34 struct vm_gk20a *vm;
35};
36
37struct gk20a_as {
38 int last_share_id; /* dummy allocator for now */
39 struct cdev cdev;
40 struct device *node;
41};
42
43int gk20a_as_release_share(struct gk20a_as_share *as_share);
44
45/* struct file_operations driver interface */
46int gk20a_as_dev_open(struct inode *inode, struct file *filp);
47int gk20a_as_dev_release(struct inode *inode, struct file *filp);
48long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
49
50#endif
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
new file mode 100644
index 00000000..6056f558
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -0,0 +1,2111 @@
1/*
2 * drivers/video/tegra/host/gk20a/channel_gk20a.c
3 *
4 * GK20A Graphics channel
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#include <linux/nvhost.h>
23#include <linux/list.h>
24#include <linux/delay.h>
25#include <linux/highmem.h> /* need for nvmap.h*/
26#include <trace/events/gk20a.h>
27#include <linux/scatterlist.h>
28#include <linux/file.h>
29#include <linux/anon_inodes.h>
30#include <linux/dma-buf.h>
31
32#include "debug_gk20a.h"
33
34#include "gk20a.h"
35#include "dbg_gpu_gk20a.h"
36
37#include "hw_ram_gk20a.h"
38#include "hw_fifo_gk20a.h"
39#include "hw_pbdma_gk20a.h"
40#include "hw_ccsr_gk20a.h"
41#include "hw_ltc_gk20a.h"
42
43#define NVMAP_HANDLE_PARAM_SIZE 1
44
45static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
46static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
47
48static void free_priv_cmdbuf(struct channel_gk20a *c,
49 struct priv_cmd_entry *e);
50static void recycle_priv_cmdbuf(struct channel_gk20a *c);
51
52static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
53static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
54
55static int channel_gk20a_commit_userd(struct channel_gk20a *c);
56static int channel_gk20a_setup_userd(struct channel_gk20a *c);
57static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
58 u64 gpfifo_base, u32 gpfifo_entries);
59
60static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
61static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
62
63static int channel_gk20a_alloc_inst(struct gk20a *g,
64 struct channel_gk20a *ch);
65static void channel_gk20a_free_inst(struct gk20a *g,
66 struct channel_gk20a *ch);
67
68static int channel_gk20a_update_runlist(struct channel_gk20a *c,
69 bool add);
70static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
71
72static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
73{
74 struct channel_gk20a *ch = NULL;
75 int chid;
76
77 mutex_lock(&f->ch_inuse_mutex);
78 for (chid = 0; chid < f->num_channels; chid++) {
79 if (!f->channel[chid].in_use) {
80 f->channel[chid].in_use = true;
81 ch = &f->channel[chid];
82 break;
83 }
84 }
85 mutex_unlock(&f->ch_inuse_mutex);
86
87 return ch;
88}
89
90static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
91{
92 mutex_lock(&f->ch_inuse_mutex);
93 f->channel[c->hw_chid].in_use = false;
94 mutex_unlock(&f->ch_inuse_mutex);
95}
96
97int channel_gk20a_commit_va(struct channel_gk20a *c)
98{
99 u64 addr;
100 u32 addr_lo;
101 u32 addr_hi;
102 void *inst_ptr;
103
104 gk20a_dbg_fn("");
105
106 inst_ptr = c->inst_block.cpuva;
107 if (!inst_ptr)
108 return -ENOMEM;
109
110 addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl);
111 addr_lo = u64_lo32(addr >> 12);
112 addr_hi = u64_hi32(addr);
113
114 gk20a_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
115 (u64)addr, addr_lo, addr_hi);
116
117 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
118 ram_in_page_dir_base_target_vid_mem_f() |
119 ram_in_page_dir_base_vol_true_f() |
120 ram_in_page_dir_base_lo_f(addr_lo));
121
122 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
123 ram_in_page_dir_base_hi_f(addr_hi));
124
125 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
126 u64_lo32(c->vm->va_limit) | 0xFFF);
127
128 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
129 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
130
131 gk20a_mm_l2_invalidate(c->g);
132
133 return 0;
134}
135
136static int channel_gk20a_commit_userd(struct channel_gk20a *c)
137{
138 u32 addr_lo;
139 u32 addr_hi;
140 void *inst_ptr;
141
142 gk20a_dbg_fn("");
143
144 inst_ptr = c->inst_block.cpuva;
145 if (!inst_ptr)
146 return -ENOMEM;
147
148 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
149 addr_hi = u64_hi32(c->userd_iova);
150
151 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
152 c->hw_chid, (u64)c->userd_iova);
153
154 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
155 pbdma_userd_target_vid_mem_f() |
156 pbdma_userd_addr_f(addr_lo));
157
158 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
159 pbdma_userd_target_vid_mem_f() |
160 pbdma_userd_hi_addr_f(addr_hi));
161
162 gk20a_mm_l2_invalidate(c->g);
163
164 return 0;
165}
166
167static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
168 u32 timeslice_timeout)
169{
170 void *inst_ptr;
171 int shift = 3;
172 int value = timeslice_timeout;
173
174 inst_ptr = c->inst_block.cpuva;
175 if (!inst_ptr)
176 return -ENOMEM;
177
178 /* disable channel */
179 gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
180 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
181 ccsr_channel_enable_clr_true_f());
182
183 /* preempt the channel */
184 WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
185
186 /* flush GPU cache */
187 gk20a_mm_l2_flush(c->g, true);
188
189 /* value field is 8 bits long */
190 while (value >= 1 << 8) {
191 value >>= 1;
192 shift++;
193 }
194
195 /* time slice register is only 18bits long */
196 if ((value << shift) >= 1<<19) {
197 pr_err("Requested timeslice value is clamped to 18 bits\n");
198 value = 255;
199 shift = 10;
200 }
201
202 /* set new timeslice */
203 gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
204 value | (shift << 12) |
205 fifo_eng_timeslice_enable_true_f());
206
207 /* enable channel */
208 gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
209 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
210 ccsr_channel_enable_set_true_f());
211
212 gk20a_mm_l2_invalidate(c->g);
213
214 return 0;
215}
216
217static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
218 u64 gpfifo_base, u32 gpfifo_entries)
219{
220 void *inst_ptr;
221
222 gk20a_dbg_fn("");
223
224 inst_ptr = c->inst_block.cpuva;
225 if (!inst_ptr)
226 return -ENOMEM;
227
228 memset(inst_ptr, 0, ram_fc_size_val_v());
229
230 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
231 pbdma_gp_base_offset_f(
232 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
233
234 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
235 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
236 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
237
238 gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
239 pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
240
241 gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
242 pbdma_formats_gp_fermi0_f() |
243 pbdma_formats_pb_fermi1_f() |
244 pbdma_formats_mp_fermi0_f());
245
246 gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
247 pbdma_pb_header_priv_user_f() |
248 pbdma_pb_header_method_zero_f() |
249 pbdma_pb_header_subchannel_zero_f() |
250 pbdma_pb_header_level_main_f() |
251 pbdma_pb_header_first_true_f() |
252 pbdma_pb_header_type_inc_f());
253
254 gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
255 pbdma_subdevice_id_f(1) |
256 pbdma_subdevice_status_active_f() |
257 pbdma_subdevice_channel_dma_enable_f());
258
259 gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
260
261 gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
262 pbdma_acquire_retry_man_2_f() |
263 pbdma_acquire_retry_exp_2_f() |
264 pbdma_acquire_timeout_exp_max_f() |
265 pbdma_acquire_timeout_man_max_f() |
266 pbdma_acquire_timeout_en_disable_f());
267
268 gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
269 fifo_eng_timeslice_timeout_128_f() |
270 fifo_eng_timeslice_timescale_3_f() |
271 fifo_eng_timeslice_enable_true_f());
272
273 gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
274 fifo_pb_timeslice_timeout_16_f() |
275 fifo_pb_timeslice_timescale_0_f() |
276 fifo_pb_timeslice_enable_true_f());
277
278 gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
279
280 /* TBD: alwasy priv mode? */
281 gk20a_mem_wr32(inst_ptr, ram_fc_hce_ctrl_w(),
282 pbdma_hce_ctrl_hce_priv_mode_yes_f());
283
284 gk20a_mm_l2_invalidate(c->g);
285
286 return 0;
287}
288
289static int channel_gk20a_setup_userd(struct channel_gk20a *c)
290{
291 BUG_ON(!c->userd_cpu_va);
292
293 gk20a_dbg_fn("");
294
295 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
296 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
297 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
298 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
299 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
300 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
301 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
302 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
303 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
304 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
305
306 gk20a_mm_l2_invalidate(c->g);
307
308 return 0;
309}
310
311static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
312{
313 struct gk20a *g = ch_gk20a->g;
314 struct fifo_gk20a *f = &g->fifo;
315 struct fifo_engine_info_gk20a *engine_info =
316 f->engine_info + ENGINE_GR_GK20A;
317
318 u32 inst_ptr = ch_gk20a->inst_block.cpu_pa
319 >> ram_in_base_shift_v();
320
321 gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
322 ch_gk20a->hw_chid, inst_ptr);
323
324 ch_gk20a->bound = true;
325
326 gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
327 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
328 ~ccsr_channel_runlist_f(~0)) |
329 ccsr_channel_runlist_f(engine_info->runlist_id));
330
331 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
332 ccsr_channel_inst_ptr_f(inst_ptr) |
333 ccsr_channel_inst_target_vid_mem_f() |
334 ccsr_channel_inst_bind_true_f());
335
336 gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
337 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
338 ~ccsr_channel_enable_set_f(~0)) |
339 ccsr_channel_enable_set_true_f());
340}
341
342static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
343{
344 struct gk20a *g = ch_gk20a->g;
345
346 gk20a_dbg_fn("");
347
348 if (ch_gk20a->bound)
349 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
350 ccsr_channel_inst_ptr_f(0) |
351 ccsr_channel_inst_bind_false_f());
352
353 ch_gk20a->bound = false;
354}
355
356static int channel_gk20a_alloc_inst(struct gk20a *g,
357 struct channel_gk20a *ch)
358{
359 struct device *d = dev_from_gk20a(g);
360 int err = 0;
361 dma_addr_t iova;
362
363 gk20a_dbg_fn("");
364
365 ch->inst_block.size = ram_in_alloc_size_v();
366 ch->inst_block.cpuva = dma_alloc_coherent(d,
367 ch->inst_block.size,
368 &iova,
369 GFP_KERNEL);
370 if (!ch->inst_block.cpuva) {
371 gk20a_err(d, "%s: memory allocation failed\n", __func__);
372 err = -ENOMEM;
373 goto clean_up;
374 }
375
376 ch->inst_block.iova = iova;
377 ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d,
378 ch->inst_block.iova);
379 if (!ch->inst_block.cpu_pa) {
380 gk20a_err(d, "%s: failed to get physical address\n", __func__);
381 err = -ENOMEM;
382 goto clean_up;
383 }
384
385 gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
386 ch->hw_chid, (u64)ch->inst_block.cpu_pa);
387
388 gk20a_dbg_fn("done");
389 return 0;
390
391clean_up:
392 gk20a_err(d, "fail");
393 channel_gk20a_free_inst(g, ch);
394 return err;
395}
396
397static void channel_gk20a_free_inst(struct gk20a *g,
398 struct channel_gk20a *ch)
399{
400 struct device *d = dev_from_gk20a(g);
401
402 if (ch->inst_block.cpuva)
403 dma_free_coherent(d, ch->inst_block.size,
404 ch->inst_block.cpuva, ch->inst_block.iova);
405 ch->inst_block.cpuva = NULL;
406 ch->inst_block.iova = 0;
407 memset(&ch->inst_block, 0, sizeof(struct inst_desc));
408}
409
410static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
411{
412 return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true);
413}
414
415void gk20a_disable_channel_no_update(struct channel_gk20a *ch)
416{
417 /* ensure no fences are pending */
418 if (ch->sync)
419 ch->sync->set_min_eq_max(ch->sync);
420
421 /* disable channel */
422 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
423 gk20a_readl(ch->g,
424 ccsr_channel_r(ch->hw_chid)) |
425 ccsr_channel_enable_clr_true_f());
426}
427
428static int gk20a_wait_channel_idle(struct channel_gk20a *ch)
429{
430 bool channel_idle = false;
431 unsigned long end_jiffies = jiffies +
432 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
433
434 do {
435 mutex_lock(&ch->jobs_lock);
436 channel_idle = list_empty(&ch->jobs);
437 mutex_unlock(&ch->jobs_lock);
438 if (channel_idle)
439 break;
440
441 usleep_range(1000, 3000);
442 } while (time_before(jiffies, end_jiffies)
443 || !tegra_platform_is_silicon());
444
445 if (!channel_idle)
446 gk20a_err(dev_from_gk20a(ch->g), "channel jobs not freed");
447
448 return 0;
449}
450
451void gk20a_disable_channel(struct channel_gk20a *ch,
452 bool finish,
453 unsigned long finish_timeout)
454{
455 if (finish) {
456 int err = gk20a_channel_finish(ch, finish_timeout);
457 WARN_ON(err);
458 }
459
460 /* disable the channel from hw and increment syncpoints */
461 gk20a_disable_channel_no_update(ch);
462
463 gk20a_wait_channel_idle(ch);
464
465 /* preempt the channel */
466 gk20a_fifo_preempt_channel(ch->g, ch->hw_chid);
467
468 /* remove channel from runlist */
469 channel_gk20a_update_runlist(ch, false);
470}
471
472#if defined(CONFIG_GK20A_CYCLE_STATS)
473
474static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
475{
476 /* disable existing cyclestats buffer */
477 mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
478 if (ch->cyclestate.cyclestate_buffer_handler) {
479 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
480 ch->cyclestate.cyclestate_buffer);
481 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
482 ch->cyclestate.cyclestate_buffer_handler = NULL;
483 ch->cyclestate.cyclestate_buffer = NULL;
484 ch->cyclestate.cyclestate_buffer_size = 0;
485 }
486 mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
487}
488
489static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
490 struct nvhost_cycle_stats_args *args)
491{
492 struct dma_buf *dmabuf;
493 void *virtual_address;
494
495 if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) {
496
497 /* set up new cyclestats buffer */
498 dmabuf = dma_buf_get(args->nvmap_handle);
499 if (IS_ERR(dmabuf))
500 return PTR_ERR(dmabuf);
501 virtual_address = dma_buf_vmap(dmabuf);
502 if (!virtual_address)
503 return -ENOMEM;
504
505 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
506 ch->cyclestate.cyclestate_buffer = virtual_address;
507 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
508 return 0;
509
510 } else if (!args->nvmap_handle &&
511 ch->cyclestate.cyclestate_buffer_handler) {
512 gk20a_free_cycle_stats_buffer(ch);
513 return 0;
514
515 } else if (!args->nvmap_handle &&
516 !ch->cyclestate.cyclestate_buffer_handler) {
517 /* no requst from GL */
518 return 0;
519
520 } else {
521 pr_err("channel already has cyclestats buffer\n");
522 return -EINVAL;
523 }
524}
525#endif
526
527static int gk20a_init_error_notifier(struct channel_gk20a *ch,
528 struct nvhost_set_error_notifier *args) {
529 void *va;
530
531 struct dma_buf *dmabuf;
532
533 if (!args->mem) {
534 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
535 return -EINVAL;
536 }
537
538 dmabuf = dma_buf_get(args->mem);
539
540 if (ch->error_notifier_ref)
541 gk20a_free_error_notifiers(ch);
542
543 if (IS_ERR(dmabuf)) {
544 pr_err("Invalid handle: %d\n", args->mem);
545 return -EINVAL;
546 }
547 /* map handle */
548 va = dma_buf_vmap(dmabuf);
549 if (!va) {
550 dma_buf_put(dmabuf);
551 pr_err("Cannot map notifier handle\n");
552 return -ENOMEM;
553 }
554
555 /* set channel notifiers pointer */
556 ch->error_notifier_ref = dmabuf;
557 ch->error_notifier = va + args->offset;
558 ch->error_notifier_va = va;
559 memset(ch->error_notifier, 0, sizeof(struct nvhost_notification));
560 return 0;
561}
562
563void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
564{
565 if (ch->error_notifier_ref) {
566 struct timespec time_data;
567 u64 nsec;
568 getnstimeofday(&time_data);
569 nsec = ((u64)time_data.tv_sec) * 1000000000u +
570 (u64)time_data.tv_nsec;
571 ch->error_notifier->time_stamp.nanoseconds[0] =
572 (u32)nsec;
573 ch->error_notifier->time_stamp.nanoseconds[1] =
574 (u32)(nsec >> 32);
575 ch->error_notifier->info32 = error;
576 ch->error_notifier->status = 0xffff;
577 gk20a_err(dev_from_gk20a(ch->g),
578 "error notifier set to %d\n", error);
579 }
580}
581
582static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
583{
584 if (ch->error_notifier_ref) {
585 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
586 dma_buf_put(ch->error_notifier_ref);
587 ch->error_notifier_ref = 0;
588 ch->error_notifier = 0;
589 ch->error_notifier_va = 0;
590 }
591}
592
593void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
594{
595 struct gk20a *g = ch->g;
596 struct device *d = dev_from_gk20a(g);
597 struct fifo_gk20a *f = &g->fifo;
598 struct gr_gk20a *gr = &g->gr;
599 struct vm_gk20a *ch_vm = ch->vm;
600 unsigned long timeout = gk20a_get_gr_idle_timeout(g);
601 struct dbg_session_gk20a *dbg_s;
602
603 gk20a_dbg_fn("");
604
605 /* if engine reset was deferred, perform it now */
606 mutex_lock(&f->deferred_reset_mutex);
607 if (g->fifo.deferred_reset_pending) {
608 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
609 " deferred, running now");
610 fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines);
611 g->fifo.mmu_fault_engines = 0;
612 g->fifo.deferred_reset_pending = false;
613 }
614 mutex_unlock(&f->deferred_reset_mutex);
615
616 if (!ch->bound)
617 return;
618
619 if (!gk20a_channel_as_bound(ch))
620 goto unbind;
621
622 gk20a_dbg_info("freeing bound channel context, timeout=%ld",
623 timeout);
624
625 gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout);
626
627 gk20a_free_error_notifiers(ch);
628
629 /* release channel ctx */
630 gk20a_free_channel_ctx(ch);
631
632 gk20a_gr_flush_channel_tlb(gr);
633
634 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
635
636 /* free gpfifo */
637 if (ch->gpfifo.gpu_va)
638 gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
639 ch->gpfifo.size, gk20a_mem_flag_none);
640 if (ch->gpfifo.cpu_va)
641 dma_free_coherent(d, ch->gpfifo.size,
642 ch->gpfifo.cpu_va, ch->gpfifo.iova);
643 ch->gpfifo.cpu_va = NULL;
644 ch->gpfifo.iova = 0;
645
646 gk20a_mm_l2_invalidate(ch->g);
647
648 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
649
650#if defined(CONFIG_GK20A_CYCLE_STATS)
651 gk20a_free_cycle_stats_buffer(ch);
652#endif
653
654 channel_gk20a_free_priv_cmdbuf(ch);
655
656 if (ch->sync) {
657 ch->sync->destroy(ch->sync);
658 ch->sync = NULL;
659 }
660
661 /* release channel binding to the as_share */
662 gk20a_as_release_share(ch_vm->as_share);
663
664unbind:
665 channel_gk20a_unbind(ch);
666 channel_gk20a_free_inst(g, ch);
667
668 ch->vpr = false;
669 ch->vm = NULL;
670 WARN_ON(ch->sync);
671
672 /* unlink all debug sessions */
673 mutex_lock(&ch->dbg_s_lock);
674
675 list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
676 dbg_s->ch = NULL;
677 list_del_init(&dbg_s->dbg_s_list_node);
678 }
679
680 mutex_unlock(&ch->dbg_s_lock);
681
682 /* ALWAYS last */
683 release_used_channel(f, ch);
684}
685
686int gk20a_channel_release(struct inode *inode, struct file *filp)
687{
688 struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
689 struct gk20a *g = ch->g;
690
691 trace_gk20a_channel_release(dev_name(&g->dev->dev));
692
693 gk20a_channel_busy(ch->g->dev);
694 gk20a_free_channel(ch, true);
695 gk20a_channel_idle(ch->g->dev);
696
697 gk20a_put_client(g);
698 filp->private_data = NULL;
699 return 0;
700}
701
702static struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
703{
704 struct fifo_gk20a *f = &g->fifo;
705 struct channel_gk20a *ch;
706
707 ch = acquire_unused_channel(f);
708 if (ch == NULL) {
709 /* TBD: we want to make this virtualizable */
710 gk20a_err(dev_from_gk20a(g), "out of hw chids");
711 return 0;
712 }
713
714 ch->g = g;
715
716 if (channel_gk20a_alloc_inst(g, ch)) {
717 ch->in_use = false;
718 gk20a_err(dev_from_gk20a(g),
719 "failed to open gk20a channel, out of inst mem");
720
721 return 0;
722 }
723 g->ops.fifo.bind_channel(ch);
724 ch->pid = current->pid;
725
726 /* reset timeout counter and update timestamp */
727 ch->timeout_accumulated_ms = 0;
728 ch->timeout_gpfifo_get = 0;
729 /* set gr host default timeout */
730 ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
731 ch->timeout_debug_dump = true;
732 ch->has_timedout = false;
733
734 /* The channel is *not* runnable at this point. It still needs to have
735 * an address space bound and allocate a gpfifo and grctx. */
736
737 init_waitqueue_head(&ch->notifier_wq);
738 init_waitqueue_head(&ch->semaphore_wq);
739 init_waitqueue_head(&ch->submit_wq);
740
741 return ch;
742}
743
744static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
745{
746 int err;
747 struct channel_gk20a *ch;
748
749 trace_gk20a_channel_open(dev_name(&g->dev->dev));
750
751 err = gk20a_get_client(g);
752 if (err) {
753 gk20a_err(dev_from_gk20a(g),
754 "failed to get client ref");
755 return err;
756 }
757
758 err = gk20a_channel_busy(g->dev);
759 if (err) {
760 gk20a_put_client(g);
761 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
762 return err;
763 }
764 ch = gk20a_open_new_channel(g);
765 gk20a_channel_idle(g->dev);
766 if (!ch) {
767 gk20a_put_client(g);
768 gk20a_err(dev_from_gk20a(g),
769 "failed to get f");
770 return -ENOMEM;
771 }
772
773 filp->private_data = ch;
774 return 0;
775}
776
777int gk20a_channel_open(struct inode *inode, struct file *filp)
778{
779 struct gk20a *g = container_of(inode->i_cdev,
780 struct gk20a, channel.cdev);
781 return __gk20a_channel_open(g, filp);
782}
783
784/* allocate private cmd buffer.
785 used for inserting commands before/after user submitted buffers. */
786static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
787{
788 struct device *d = dev_from_gk20a(c->g);
789 struct vm_gk20a *ch_vm = c->vm;
790 struct priv_cmd_queue *q = &c->priv_cmd_q;
791 struct priv_cmd_entry *e;
792 u32 i = 0, size;
793 int err = 0;
794 struct sg_table *sgt;
795 dma_addr_t iova;
796
797 /* Kernel can insert gpfifos before and after user gpfifos.
798 Before user gpfifos, kernel inserts fence_wait, which takes
799 syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
800 After user gpfifos, kernel inserts fence_get, which takes
801 wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
802 = 6 dwords.
803 Worse case if kernel adds both of them for every user gpfifo,
804 max size of priv_cmdbuf is :
805 (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
806 size = roundup_pow_of_two(
807 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
808
809 q->mem.base_cpuva = dma_alloc_coherent(d, size,
810 &iova,
811 GFP_KERNEL);
812 if (!q->mem.base_cpuva) {
813 gk20a_err(d, "%s: memory allocation failed\n", __func__);
814 err = -ENOMEM;
815 goto clean_up;
816 }
817
818 q->mem.base_iova = iova;
819 q->mem.size = size;
820
821 err = gk20a_get_sgtable(d, &sgt,
822 q->mem.base_cpuva, q->mem.base_iova, size);
823 if (err) {
824 gk20a_err(d, "%s: failed to create sg table\n", __func__);
825 goto clean_up;
826 }
827
828 memset(q->mem.base_cpuva, 0, size);
829
830 q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
831 size,
832 0, /* flags */
833 gk20a_mem_flag_none);
834 if (!q->base_gpuva) {
835 gk20a_err(d, "ch %d : failed to map gpu va"
836 "for priv cmd buffer", c->hw_chid);
837 err = -ENOMEM;
838 goto clean_up_sgt;
839 }
840
841 q->size = q->mem.size / sizeof (u32);
842
843 INIT_LIST_HEAD(&q->head);
844 INIT_LIST_HEAD(&q->free);
845
846 /* pre-alloc 25% of priv cmdbuf entries and put them on free list */
847 for (i = 0; i < q->size / 4; i++) {
848 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
849 if (!e) {
850 gk20a_err(d, "ch %d: fail to pre-alloc cmd entry",
851 c->hw_chid);
852 err = -ENOMEM;
853 goto clean_up_sgt;
854 }
855 e->pre_alloc = true;
856 list_add(&e->list, &q->free);
857 }
858
859 gk20a_free_sgtable(&sgt);
860
861 return 0;
862
863clean_up_sgt:
864 gk20a_free_sgtable(&sgt);
865clean_up:
866 channel_gk20a_free_priv_cmdbuf(c);
867 return err;
868}
869
870static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
871{
872 struct device *d = dev_from_gk20a(c->g);
873 struct vm_gk20a *ch_vm = c->vm;
874 struct priv_cmd_queue *q = &c->priv_cmd_q;
875 struct priv_cmd_entry *e;
876 struct list_head *pos, *tmp, *head;
877
878 if (q->size == 0)
879 return;
880
881 if (q->base_gpuva)
882 gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
883 q->mem.size, gk20a_mem_flag_none);
884 if (q->mem.base_cpuva)
885 dma_free_coherent(d, q->mem.size,
886 q->mem.base_cpuva, q->mem.base_iova);
887 q->mem.base_cpuva = NULL;
888 q->mem.base_iova = 0;
889
890 /* free used list */
891 head = &q->head;
892 list_for_each_safe(pos, tmp, head) {
893 e = container_of(pos, struct priv_cmd_entry, list);
894 free_priv_cmdbuf(c, e);
895 }
896
897 /* free free list */
898 head = &q->free;
899 list_for_each_safe(pos, tmp, head) {
900 e = container_of(pos, struct priv_cmd_entry, list);
901 e->pre_alloc = false;
902 free_priv_cmdbuf(c, e);
903 }
904
905 memset(q, 0, sizeof(struct priv_cmd_queue));
906}
907
908/* allocate a cmd buffer with given size. size is number of u32 entries */
909int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
910 struct priv_cmd_entry **entry)
911{
912 struct priv_cmd_queue *q = &c->priv_cmd_q;
913 struct priv_cmd_entry *e;
914 struct list_head *node;
915 u32 free_count;
916 u32 size = orig_size;
917 bool no_retry = false;
918
919 gk20a_dbg_fn("size %d", orig_size);
920
921 *entry = NULL;
922
923 /* if free space in the end is less than requested, increase the size
924 * to make the real allocated space start from beginning. */
925 if (q->put + size > q->size)
926 size = orig_size + (q->size - q->put);
927
928 gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
929 c->hw_chid, q->get, q->put);
930
931TRY_AGAIN:
932 free_count = (q->size - (q->put - q->get) - 1) % q->size;
933
934 if (size > free_count) {
935 if (!no_retry) {
936 recycle_priv_cmdbuf(c);
937 no_retry = true;
938 goto TRY_AGAIN;
939 } else
940 return -EAGAIN;
941 }
942
943 if (unlikely(list_empty(&q->free))) {
944
945 gk20a_dbg_info("ch %d: run out of pre-alloc entries",
946 c->hw_chid);
947
948 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
949 if (!e) {
950 gk20a_err(dev_from_gk20a(c->g),
951 "ch %d: fail to allocate priv cmd entry",
952 c->hw_chid);
953 return -ENOMEM;
954 }
955 } else {
956 node = q->free.next;
957 list_del(node);
958 e = container_of(node, struct priv_cmd_entry, list);
959 }
960
961 e->size = orig_size;
962 e->gp_get = c->gpfifo.get;
963 e->gp_put = c->gpfifo.put;
964 e->gp_wrap = c->gpfifo.wrap;
965
966 /* if we have increased size to skip free space in the end, set put
967 to beginning of cmd buffer (0) + size */
968 if (size != orig_size) {
969 e->ptr = q->mem.base_cpuva;
970 e->gva = q->base_gpuva;
971 q->put = orig_size;
972 } else {
973 e->ptr = q->mem.base_cpuva + q->put;
974 e->gva = q->base_gpuva + q->put * sizeof(u32);
975 q->put = (q->put + orig_size) & (q->size - 1);
976 }
977
978 /* we already handled q->put + size > q->size so BUG_ON this */
979 BUG_ON(q->put > q->size);
980
981 /* add new entry to head since we free from head */
982 list_add(&e->list, &q->head);
983
984 *entry = e;
985
986 gk20a_dbg_fn("done");
987
988 return 0;
989}
990
991/* Don't call this to free an explict cmd entry.
992 * It doesn't update priv_cmd_queue get/put */
993static void free_priv_cmdbuf(struct channel_gk20a *c,
994 struct priv_cmd_entry *e)
995{
996 struct priv_cmd_queue *q = &c->priv_cmd_q;
997
998 if (!e)
999 return;
1000
1001 list_del(&e->list);
1002
1003 if (unlikely(!e->pre_alloc))
1004 kfree(e);
1005 else {
1006 memset(e, 0, sizeof(struct priv_cmd_entry));
1007 e->pre_alloc = true;
1008 list_add(&e->list, &q->free);
1009 }
1010}
1011
1012/* free entries if they're no longer being used */
1013static void recycle_priv_cmdbuf(struct channel_gk20a *c)
1014{
1015 struct priv_cmd_queue *q = &c->priv_cmd_q;
1016 struct priv_cmd_entry *e, *tmp;
1017 struct list_head *head = &q->head;
1018 bool wrap_around, found = false;
1019
1020 gk20a_dbg_fn("");
1021
1022 /* Find the most recent free entry. Free it and everything before it */
1023 list_for_each_entry(e, head, list) {
1024
1025 gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
1026 "curr get:put:wrap %d:%d:%d",
1027 c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
1028 c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
1029
1030 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
1031 if (e->gp_get < e->gp_put) {
1032 if (c->gpfifo.get >= e->gp_put ||
1033 wrap_around) {
1034 found = true;
1035 break;
1036 } else
1037 e->gp_get = c->gpfifo.get;
1038 } else if (e->gp_get > e->gp_put) {
1039 if (wrap_around &&
1040 c->gpfifo.get >= e->gp_put) {
1041 found = true;
1042 break;
1043 } else
1044 e->gp_get = c->gpfifo.get;
1045 }
1046 }
1047
1048 if (found)
1049 q->get = (e->ptr - q->mem.base_cpuva) + e->size;
1050 else {
1051 gk20a_dbg_info("no free entry recycled");
1052 return;
1053 }
1054
1055 list_for_each_entry_safe_continue(e, tmp, head, list) {
1056 free_priv_cmdbuf(c, e);
1057 }
1058
1059 gk20a_dbg_fn("done");
1060}
1061
1062
1063static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1064 struct nvhost_alloc_gpfifo_args *args)
1065{
1066 struct gk20a *g = c->g;
1067 struct device *d = dev_from_gk20a(g);
1068 struct vm_gk20a *ch_vm;
1069 u32 gpfifo_size;
1070 int err = 0;
1071 struct sg_table *sgt;
1072 dma_addr_t iova;
1073
1074 /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1075 and another one after, for internal usage. Triple the requested size. */
1076 gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1077
1078 if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1079 c->vpr = true;
1080
1081 /* an address space needs to have been bound at this point. */
1082 if (!gk20a_channel_as_bound(c)) {
1083 gk20a_err(d,
1084 "not bound to an address space at time of gpfifo"
1085 " allocation. Attempting to create and bind to"
1086 " one...");
1087 return -EINVAL;
1088 }
1089 ch_vm = c->vm;
1090
1091 c->cmds_pending = false;
1092 c->last_submit_fence.valid = false;
1093
1094 c->ramfc.offset = 0;
1095 c->ramfc.size = ram_in_ramfc_s() / 8;
1096
1097 if (c->gpfifo.cpu_va) {
1098 gk20a_err(d, "channel %d :"
1099 "gpfifo already allocated", c->hw_chid);
1100 return -EEXIST;
1101 }
1102
1103 c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
1104 c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
1105 c->gpfifo.size,
1106 &iova,
1107 GFP_KERNEL);
1108 if (!c->gpfifo.cpu_va) {
1109 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1110 err = -ENOMEM;
1111 goto clean_up;
1112 }
1113
1114 c->gpfifo.iova = iova;
1115 c->gpfifo.entry_num = gpfifo_size;
1116
1117 c->gpfifo.get = c->gpfifo.put = 0;
1118
1119 err = gk20a_get_sgtable(d, &sgt,
1120 c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
1121 if (err) {
1122 gk20a_err(d, "%s: failed to allocate sg table\n", __func__);
1123 goto clean_up;
1124 }
1125
1126 c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
1127 &sgt,
1128 c->gpfifo.size,
1129 0, /* flags */
1130 gk20a_mem_flag_none);
1131 if (!c->gpfifo.gpu_va) {
1132 gk20a_err(d, "channel %d : failed to map"
1133 " gpu_va for gpfifo", c->hw_chid);
1134 err = -ENOMEM;
1135 goto clean_up_sgt;
1136 }
1137
1138 gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1139 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1140
1141 channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1142
1143 channel_gk20a_setup_userd(c);
1144 channel_gk20a_commit_userd(c);
1145
1146 gk20a_mm_l2_invalidate(c->g);
1147
1148 /* TBD: setup engine contexts */
1149
1150 err = channel_gk20a_alloc_priv_cmdbuf(c);
1151 if (err)
1152 goto clean_up_unmap;
1153
1154 err = channel_gk20a_update_runlist(c, true);
1155 if (err)
1156 goto clean_up_unmap;
1157
1158 gk20a_free_sgtable(&sgt);
1159
1160 gk20a_dbg_fn("done");
1161 return 0;
1162
1163clean_up_unmap:
1164 gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
1165 c->gpfifo.size, gk20a_mem_flag_none);
1166clean_up_sgt:
1167 gk20a_free_sgtable(&sgt);
1168clean_up:
1169 dma_free_coherent(d, c->gpfifo.size,
1170 c->gpfifo.cpu_va, c->gpfifo.iova);
1171 c->gpfifo.cpu_va = NULL;
1172 c->gpfifo.iova = 0;
1173 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1174 gk20a_err(d, "fail");
1175 return err;
1176}
1177
1178static inline int wfi_cmd_size(void)
1179{
1180 return 2;
1181}
1182void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i)
1183{
1184 /* wfi */
1185 cmd->ptr[(*i)++] = 0x2001001E;
1186 /* handle, ignored */
1187 cmd->ptr[(*i)++] = 0x00000000;
1188}
1189
1190static inline bool check_gp_put(struct gk20a *g,
1191 struct channel_gk20a *c)
1192{
1193 u32 put;
1194 /* gp_put changed unexpectedly since last update? */
1195 put = gk20a_bar1_readl(g,
1196 c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1197 if (c->gpfifo.put != put) {
1198 /*TBD: BUG_ON/teardown on this*/
1199 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1200 "since last update");
1201 c->gpfifo.put = put;
1202 return false; /* surprise! */
1203 }
1204 return true; /* checked out ok */
1205}
1206
1207/* Update with this periodically to determine how the gpfifo is draining. */
1208static inline u32 update_gp_get(struct gk20a *g,
1209 struct channel_gk20a *c)
1210{
1211 u32 new_get = gk20a_bar1_readl(g,
1212 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1213 if (new_get < c->gpfifo.get)
1214 c->gpfifo.wrap = !c->gpfifo.wrap;
1215 c->gpfifo.get = new_get;
1216 return new_get;
1217}
1218
1219static inline u32 gp_free_count(struct channel_gk20a *c)
1220{
1221 return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1222 c->gpfifo.entry_num;
1223}
1224
1225bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1226 u32 timeout_delta_ms)
1227{
1228 u32 gpfifo_get = update_gp_get(ch->g, ch);
1229 /* Count consequent timeout isr */
1230 if (gpfifo_get == ch->timeout_gpfifo_get) {
1231 /* we didn't advance since previous channel timeout check */
1232 ch->timeout_accumulated_ms += timeout_delta_ms;
1233 } else {
1234 /* first timeout isr encountered */
1235 ch->timeout_accumulated_ms = timeout_delta_ms;
1236 }
1237
1238 ch->timeout_gpfifo_get = gpfifo_get;
1239
1240 return ch->g->timeouts_enabled &&
1241 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1242}
1243
1244
1245/* Issue a syncpoint increment *preceded* by a wait-for-idle
1246 * command. All commands on the channel will have been
1247 * consumed at the time the fence syncpoint increment occurs.
1248 */
1249static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
1250{
1251 struct priv_cmd_entry *cmd = NULL;
1252 struct gk20a *g = c->g;
1253 u32 free_count;
1254 int err;
1255
1256 if (c->has_timedout)
1257 return -ETIMEDOUT;
1258
1259 if (!c->sync) {
1260 c->sync = gk20a_channel_sync_create(c);
1261 if (!c->sync)
1262 return -ENOMEM;
1263 }
1264
1265 update_gp_get(g, c);
1266 free_count = gp_free_count(c);
1267 if (unlikely(!free_count)) {
1268 gk20a_err(dev_from_gk20a(g),
1269 "not enough gpfifo space");
1270 return -EAGAIN;
1271 }
1272
1273 err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit_fence);
1274 if (unlikely(err))
1275 return err;
1276
1277 WARN_ON(!c->last_submit_fence.wfi);
1278
1279 c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1280 c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
1281 pbdma_gp_entry1_length_f(cmd->size);
1282
1283 c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
1284
1285 /* save gp_put */
1286 cmd->gp_put = c->gpfifo.put;
1287
1288 gk20a_bar1_writel(g,
1289 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1290 c->gpfifo.put);
1291
1292 gk20a_dbg_info("post-submit put %d, get %d, size %d",
1293 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1294
1295 return 0;
1296}
1297
1298static u32 get_gp_free_count(struct channel_gk20a *c)
1299{
1300 update_gp_get(c->g, c);
1301 return gp_free_count(c);
1302}
1303
1304static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1305{
1306 void *mem = NULL;
1307 unsigned int words;
1308 u64 offset;
1309 struct dma_buf *dmabuf = NULL;
1310
1311 if (gk20a_debug_trace_cmdbuf) {
1312 u64 gpu_va = (u64)g->entry0 |
1313 (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1314 int err;
1315
1316 words = pbdma_gp_entry1_length_v(g->entry1);
1317 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1318 if (!err)
1319 mem = dma_buf_vmap(dmabuf);
1320 }
1321
1322 if (mem) {
1323 u32 i;
1324 /*
1325 * Write in batches of 128 as there seems to be a limit
1326 * of how much you can output to ftrace at once.
1327 */
1328 for (i = 0; i < words; i += 128U) {
1329 trace_gk20a_push_cmdbuf(
1330 c->g->dev->name,
1331 0,
1332 min(words - i, 128U),
1333 offset + i * sizeof(u32),
1334 mem);
1335 }
1336 dma_buf_vunmap(dmabuf, mem);
1337 }
1338}
1339
1340static int gk20a_channel_add_job(struct channel_gk20a *c,
1341 struct gk20a_channel_fence *fence)
1342{
1343 struct vm_gk20a *vm = c->vm;
1344 struct channel_gk20a_job *job = NULL;
1345 struct mapped_buffer_node **mapped_buffers = NULL;
1346 int err = 0, num_mapped_buffers;
1347
1348 /* job needs reference to this vm */
1349 gk20a_vm_get(vm);
1350
1351 err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
1352 if (err) {
1353 gk20a_vm_put(vm);
1354 return err;
1355 }
1356
1357 job = kzalloc(sizeof(*job), GFP_KERNEL);
1358 if (!job) {
1359 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1360 gk20a_vm_put(vm);
1361 return -ENOMEM;
1362 }
1363
1364 job->num_mapped_buffers = num_mapped_buffers;
1365 job->mapped_buffers = mapped_buffers;
1366 job->fence = *fence;
1367
1368 mutex_lock(&c->jobs_lock);
1369 list_add_tail(&job->list, &c->jobs);
1370 mutex_unlock(&c->jobs_lock);
1371
1372 return 0;
1373}
1374
1375void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1376{
1377 struct gk20a *g = c->g;
1378 struct vm_gk20a *vm = c->vm;
1379 struct channel_gk20a_job *job, *n;
1380 int i;
1381
1382 wake_up(&c->submit_wq);
1383
1384 mutex_lock(&c->jobs_lock);
1385 list_for_each_entry_safe(job, n, &c->jobs, list) {
1386 bool completed = WARN_ON(!c->sync) ||
1387 c->sync->is_expired(c->sync, &job->fence);
1388 if (!completed)
1389 break;
1390
1391 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1392 job->num_mapped_buffers);
1393
1394 /* job is done. release its reference to vm */
1395 gk20a_vm_put(vm);
1396
1397 list_del_init(&job->list);
1398 kfree(job);
1399 gk20a_channel_idle(g->dev);
1400 }
1401 mutex_unlock(&c->jobs_lock);
1402
1403 for (i = 0; i < nr_completed; i++)
1404 gk20a_channel_idle(c->g->dev);
1405}
1406
1407static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1408 struct nvhost_gpfifo *gpfifo,
1409 u32 num_entries,
1410 struct nvhost_fence *fence,
1411 u32 flags)
1412{
1413 struct gk20a *g = c->g;
1414 struct device *d = dev_from_gk20a(g);
1415 u32 err = 0;
1416 int i;
1417 struct priv_cmd_entry *wait_cmd = NULL;
1418 struct priv_cmd_entry *incr_cmd = NULL;
1419 /* we might need two extra gpfifo entries - one for pre fence
1420 * and one for post fence. */
1421 const int extra_entries = 2;
1422
1423 if (c->has_timedout)
1424 return -ETIMEDOUT;
1425
1426 if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1427 NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1428 !fence)
1429 return -EINVAL;
1430
1431 if (!c->sync) {
1432 c->sync = gk20a_channel_sync_create(c);
1433 if (!c->sync)
1434 return -ENOMEM;
1435 }
1436
1437#ifdef CONFIG_DEBUG_FS
1438 /* update debug settings */
1439 if (g->ops.ltc.sync_debugfs)
1440 g->ops.ltc.sync_debugfs(g);
1441#endif
1442
1443 gk20a_dbg_info("channel %d", c->hw_chid);
1444
1445 /* gk20a_channel_update releases this ref. */
1446 gk20a_channel_busy(g->dev);
1447
1448 trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1449 c->hw_chid,
1450 num_entries,
1451 flags,
1452 fence->syncpt_id, fence->value);
1453 check_gp_put(g, c);
1454 update_gp_get(g, c);
1455
1456 gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1457 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1458
1459 /* Invalidate tlb if it's dirty... */
1460 /* TBD: this should be done in the cmd stream, not with PRIs. */
1461 /* We don't know what context is currently running... */
1462 /* Note also: there can be more than one context associated with the */
1463 /* address space (vm). */
1464 gk20a_mm_tlb_invalidate(c->vm);
1465
1466 /* Make sure we have enough space for gpfifo entries. If not,
1467 * wait for signals from completed submits */
1468 if (gp_free_count(c) < num_entries + extra_entries) {
1469 err = wait_event_interruptible(c->submit_wq,
1470 get_gp_free_count(c) >= num_entries + extra_entries ||
1471 c->has_timedout);
1472 }
1473
1474 if (c->has_timedout) {
1475 err = -ETIMEDOUT;
1476 goto clean_up;
1477 }
1478
1479 if (err) {
1480 gk20a_err(d, "not enough gpfifo space");
1481 err = -EAGAIN;
1482 goto clean_up;
1483 }
1484
1485 /*
1486 * optionally insert syncpt wait in the beginning of gpfifo submission
1487 * when user requested and the wait hasn't expired.
1488 * validate that the id makes sense, elide if not
1489 * the only reason this isn't being unceremoniously killed is to
1490 * keep running some tests which trigger this condition
1491 */
1492 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1493 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
1494 err = c->sync->wait_fd(c->sync, fence->syncpt_id,
1495 &wait_cmd);
1496 else
1497 err = c->sync->wait_syncpt(c->sync, fence->syncpt_id,
1498 fence->value, &wait_cmd);
1499 }
1500 if (err)
1501 goto clean_up;
1502
1503
1504 /* always insert syncpt increment at end of gpfifo submission
1505 to keep track of method completion for idle railgating */
1506 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
1507 flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
1508 err = c->sync->incr_user_fd(c->sync, &incr_cmd,
1509 &c->last_submit_fence,
1510 &fence->syncpt_id);
1511 else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1512 err = c->sync->incr_user_syncpt(c->sync, &incr_cmd,
1513 &c->last_submit_fence,
1514 &fence->syncpt_id,
1515 &fence->value);
1516 else
1517 err = c->sync->incr(c->sync, &incr_cmd,
1518 &c->last_submit_fence);
1519 if (err)
1520 goto clean_up;
1521
1522 if (wait_cmd) {
1523 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1524 u64_lo32(wait_cmd->gva);
1525 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1526 u64_hi32(wait_cmd->gva) |
1527 pbdma_gp_entry1_length_f(wait_cmd->size);
1528 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1529
1530 c->gpfifo.put = (c->gpfifo.put + 1) &
1531 (c->gpfifo.entry_num - 1);
1532
1533 /* save gp_put */
1534 wait_cmd->gp_put = c->gpfifo.put;
1535 }
1536
1537 for (i = 0; i < num_entries; i++) {
1538 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1539 gpfifo[i].entry0; /* cmd buf va low 32 */
1540 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1541 gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
1542 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1543 c->gpfifo.put = (c->gpfifo.put + 1) &
1544 (c->gpfifo.entry_num - 1);
1545 }
1546
1547 if (incr_cmd) {
1548 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1549 u64_lo32(incr_cmd->gva);
1550 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1551 u64_hi32(incr_cmd->gva) |
1552 pbdma_gp_entry1_length_f(incr_cmd->size);
1553 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1554
1555 c->gpfifo.put = (c->gpfifo.put + 1) &
1556 (c->gpfifo.entry_num - 1);
1557
1558 /* save gp_put */
1559 incr_cmd->gp_put = c->gpfifo.put;
1560 }
1561
1562 /* Invalidate tlb if it's dirty... */
1563 /* TBD: this should be done in the cmd stream, not with PRIs. */
1564 /* We don't know what context is currently running... */
1565 /* Note also: there can be more than one context associated with the */
1566 /* address space (vm). */
1567 gk20a_mm_tlb_invalidate(c->vm);
1568
1569 trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
1570 c->hw_chid,
1571 num_entries,
1572 flags,
1573 fence->syncpt_id, fence->value);
1574
1575 /* TODO! Check for errors... */
1576 gk20a_channel_add_job(c, &c->last_submit_fence);
1577
1578 c->cmds_pending = true;
1579 gk20a_bar1_writel(g,
1580 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1581 c->gpfifo.put);
1582
1583 gk20a_dbg_info("post-submit put %d, get %d, size %d",
1584 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1585
1586 gk20a_dbg_fn("done");
1587 return err;
1588
1589clean_up:
1590 gk20a_err(d, "fail");
1591 free_priv_cmdbuf(c, wait_cmd);
1592 free_priv_cmdbuf(c, incr_cmd);
1593 gk20a_channel_idle(g->dev);
1594 return err;
1595}
1596
1597void gk20a_remove_channel_support(struct channel_gk20a *c)
1598{
1599
1600}
1601
1602int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1603{
1604 struct channel_gk20a *c = g->fifo.channel+chid;
1605 c->g = g;
1606 c->in_use = false;
1607 c->hw_chid = chid;
1608 c->bound = false;
1609 c->remove_support = gk20a_remove_channel_support;
1610 mutex_init(&c->jobs_lock);
1611 INIT_LIST_HEAD(&c->jobs);
1612#if defined(CONFIG_GK20A_CYCLE_STATS)
1613 mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1614#endif
1615 INIT_LIST_HEAD(&c->dbg_s_list);
1616 mutex_init(&c->dbg_s_lock);
1617
1618 return 0;
1619}
1620
1621int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1622{
1623 int err = 0;
1624
1625 if (!ch->cmds_pending)
1626 return 0;
1627
1628 /* Do not wait for a timedout channel */
1629 if (ch->has_timedout)
1630 return -ETIMEDOUT;
1631
1632 if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) {
1633 gk20a_dbg_fn("issuing wfi, incr to finish the channel");
1634 err = gk20a_channel_submit_wfi(ch);
1635 }
1636 if (err)
1637 return err;
1638
1639 BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi));
1640
1641 gk20a_dbg_fn("waiting for channel to finish thresh:%d",
1642 ch->last_submit_fence.thresh);
1643
1644 err = ch->sync->wait_cpu(ch->sync, &ch->last_submit_fence, timeout);
1645 if (WARN_ON(err))
1646 dev_warn(dev_from_gk20a(ch->g),
1647 "timed out waiting for gk20a channel to finish");
1648 else
1649 ch->cmds_pending = false;
1650
1651 return err;
1652}
1653
1654static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
1655 ulong id, u32 offset,
1656 u32 payload, long timeout)
1657{
1658 struct platform_device *pdev = ch->g->dev;
1659 struct dma_buf *dmabuf;
1660 void *data;
1661 u32 *semaphore;
1662 int ret = 0;
1663 long remain;
1664
1665 /* do not wait if channel has timed out */
1666 if (ch->has_timedout)
1667 return -ETIMEDOUT;
1668
1669 dmabuf = dma_buf_get(id);
1670 if (IS_ERR(dmabuf)) {
1671 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
1672 id);
1673 return -EINVAL;
1674 }
1675
1676 data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
1677 if (!data) {
1678 gk20a_err(&pdev->dev, "failed to map notifier memory");
1679 ret = -EINVAL;
1680 goto cleanup_put;
1681 }
1682
1683 semaphore = data + (offset & ~PAGE_MASK);
1684
1685 remain = wait_event_interruptible_timeout(
1686 ch->semaphore_wq,
1687 *semaphore == payload || ch->has_timedout,
1688 timeout);
1689
1690 if (remain == 0 && *semaphore != payload)
1691 ret = -ETIMEDOUT;
1692 else if (remain < 0)
1693 ret = remain;
1694
1695 dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
1696cleanup_put:
1697 dma_buf_put(dmabuf);
1698 return ret;
1699}
1700
1701static int gk20a_channel_wait(struct channel_gk20a *ch,
1702 struct nvhost_wait_args *args)
1703{
1704 struct device *d = dev_from_gk20a(ch->g);
1705 struct dma_buf *dmabuf;
1706 struct notification *notif;
1707 struct timespec tv;
1708 u64 jiffies;
1709 ulong id;
1710 u32 offset;
1711 unsigned long timeout;
1712 int remain, ret = 0;
1713
1714 gk20a_dbg_fn("");
1715
1716 if (ch->has_timedout)
1717 return -ETIMEDOUT;
1718
1719 if (args->timeout == NVHOST_NO_TIMEOUT)
1720 timeout = MAX_SCHEDULE_TIMEOUT;
1721 else
1722 timeout = (u32)msecs_to_jiffies(args->timeout);
1723
1724 switch (args->type) {
1725 case NVHOST_WAIT_TYPE_NOTIFIER:
1726 id = args->condition.notifier.nvmap_handle;
1727 offset = args->condition.notifier.offset;
1728
1729 dmabuf = dma_buf_get(id);
1730 if (IS_ERR(dmabuf)) {
1731 gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
1732 id);
1733 return -EINVAL;
1734 }
1735
1736 notif = dma_buf_vmap(dmabuf);
1737 if (!notif) {
1738 gk20a_err(d, "failed to map notifier memory");
1739 return -ENOMEM;
1740 }
1741
1742 notif = (struct notification *)((uintptr_t)notif + offset);
1743
1744 /* user should set status pending before
1745 * calling this ioctl */
1746 remain = wait_event_interruptible_timeout(
1747 ch->notifier_wq,
1748 notif->status == 0 || ch->has_timedout,
1749 timeout);
1750
1751 if (remain == 0 && notif->status != 0) {
1752 ret = -ETIMEDOUT;
1753 goto notif_clean_up;
1754 } else if (remain < 0) {
1755 ret = -EINTR;
1756 goto notif_clean_up;
1757 }
1758
1759 /* TBD: fill in correct information */
1760 jiffies = get_jiffies_64();
1761 jiffies_to_timespec(jiffies, &tv);
1762 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
1763 notif->timestamp.nanoseconds[1] = tv.tv_sec;
1764 notif->info32 = 0xDEADBEEF; /* should be object name */
1765 notif->info16 = ch->hw_chid; /* should be method offset */
1766
1767notif_clean_up:
1768 dma_buf_vunmap(dmabuf, notif);
1769 return ret;
1770
1771 case NVHOST_WAIT_TYPE_SEMAPHORE:
1772 ret = gk20a_channel_wait_semaphore(ch,
1773 args->condition.semaphore.nvmap_handle,
1774 args->condition.semaphore.offset,
1775 args->condition.semaphore.payload,
1776 timeout);
1777
1778 break;
1779
1780 default:
1781 ret = -EINVAL;
1782 break;
1783 }
1784
1785 return ret;
1786}
1787
1788static int gk20a_channel_set_priority(struct channel_gk20a *ch,
1789 u32 priority)
1790{
1791 u32 timeslice_timeout;
1792 /* set priority of graphics channel */
1793 switch (priority) {
1794 case NVHOST_PRIORITY_LOW:
1795 /* 64 << 3 = 512us */
1796 timeslice_timeout = 64;
1797 break;
1798 case NVHOST_PRIORITY_MEDIUM:
1799 /* 128 << 3 = 1024us */
1800 timeslice_timeout = 128;
1801 break;
1802 case NVHOST_PRIORITY_HIGH:
1803 /* 255 << 3 = 2048us */
1804 timeslice_timeout = 255;
1805 break;
1806 default:
1807 pr_err("Unsupported priority");
1808 return -EINVAL;
1809 }
1810 channel_gk20a_set_schedule_params(ch,
1811 timeslice_timeout);
1812 return 0;
1813}
1814
1815static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
1816 struct nvhost_zcull_bind_args *args)
1817{
1818 struct gk20a *g = ch->g;
1819 struct gr_gk20a *gr = &g->gr;
1820
1821 gk20a_dbg_fn("");
1822
1823 return gr_gk20a_bind_ctxsw_zcull(g, gr, ch,
1824 args->gpu_va, args->mode);
1825}
1826
1827/* in this context the "channel" is the host1x channel which
1828 * maps to *all* gk20a channels */
1829int gk20a_channel_suspend(struct gk20a *g)
1830{
1831 struct fifo_gk20a *f = &g->fifo;
1832 u32 chid;
1833 bool channels_in_use = false;
1834 struct device *d = dev_from_gk20a(g);
1835 int err;
1836
1837 gk20a_dbg_fn("");
1838
1839 /* idle the engine by submitting WFI on non-KEPLER_C channel */
1840 for (chid = 0; chid < f->num_channels; chid++) {
1841 struct channel_gk20a *c = &f->channel[chid];
1842 if (c->in_use && c->obj_class != KEPLER_C) {
1843 err = gk20a_channel_submit_wfi(c);
1844 if (err) {
1845 gk20a_err(d, "cannot idle channel %d\n",
1846 chid);
1847 return err;
1848 }
1849
1850 c->sync->wait_cpu(c->sync, &c->last_submit_fence,
1851 500000);
1852 break;
1853 }
1854 }
1855
1856 for (chid = 0; chid < f->num_channels; chid++) {
1857 if (f->channel[chid].in_use) {
1858
1859 gk20a_dbg_info("suspend channel %d", chid);
1860 /* disable channel */
1861 gk20a_writel(g, ccsr_channel_r(chid),
1862 gk20a_readl(g, ccsr_channel_r(chid)) |
1863 ccsr_channel_enable_clr_true_f());
1864 /* preempt the channel */
1865 gk20a_fifo_preempt_channel(g, chid);
1866
1867 channels_in_use = true;
1868 }
1869 }
1870
1871 if (channels_in_use) {
1872 gk20a_fifo_update_runlist(g, 0, ~0, false, true);
1873
1874 for (chid = 0; chid < f->num_channels; chid++) {
1875 if (f->channel[chid].in_use)
1876 channel_gk20a_unbind(&f->channel[chid]);
1877 }
1878 }
1879
1880 gk20a_dbg_fn("done");
1881 return 0;
1882}
1883
1884/* in this context the "channel" is the host1x channel which
1885 * maps to *all* gk20a channels */
1886int gk20a_channel_resume(struct gk20a *g)
1887{
1888 struct fifo_gk20a *f = &g->fifo;
1889 u32 chid;
1890 bool channels_in_use = false;
1891
1892 gk20a_dbg_fn("");
1893
1894 for (chid = 0; chid < f->num_channels; chid++) {
1895 if (f->channel[chid].in_use) {
1896 gk20a_dbg_info("resume channel %d", chid);
1897 g->ops.fifo.bind_channel(&f->channel[chid]);
1898 channels_in_use = true;
1899 }
1900 }
1901
1902 if (channels_in_use)
1903 gk20a_fifo_update_runlist(g, 0, ~0, true, true);
1904
1905 gk20a_dbg_fn("done");
1906 return 0;
1907}
1908
1909void gk20a_channel_semaphore_wakeup(struct gk20a *g)
1910{
1911 struct fifo_gk20a *f = &g->fifo;
1912 u32 chid;
1913
1914 gk20a_dbg_fn("");
1915
1916 for (chid = 0; chid < f->num_channels; chid++) {
1917 struct channel_gk20a *c = g->fifo.channel+chid;
1918 if (c->in_use)
1919 wake_up_interruptible_all(&c->semaphore_wq);
1920 }
1921}
1922
1923static int gk20a_ioctl_channel_submit_gpfifo(
1924 struct channel_gk20a *ch,
1925 struct nvhost_submit_gpfifo_args *args)
1926{
1927 void *gpfifo;
1928 u32 size;
1929 int ret = 0;
1930
1931 gk20a_dbg_fn("");
1932
1933 if (ch->has_timedout)
1934 return -ETIMEDOUT;
1935
1936 size = args->num_entries * sizeof(struct nvhost_gpfifo);
1937
1938 gpfifo = kzalloc(size, GFP_KERNEL);
1939 if (!gpfifo)
1940 return -ENOMEM;
1941
1942 if (copy_from_user(gpfifo,
1943 (void __user *)(uintptr_t)args->gpfifo, size)) {
1944 ret = -EINVAL;
1945 goto clean_up;
1946 }
1947
1948 ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
1949 &args->fence, args->flags);
1950
1951clean_up:
1952 kfree(gpfifo);
1953 return ret;
1954}
1955
1956void gk20a_init_fifo(struct gpu_ops *gops)
1957{
1958 gops->fifo.bind_channel = channel_gk20a_bind;
1959}
1960
1961long gk20a_channel_ioctl(struct file *filp,
1962 unsigned int cmd, unsigned long arg)
1963{
1964 struct channel_gk20a *ch = filp->private_data;
1965 struct platform_device *dev = ch->g->dev;
1966 u8 buf[NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE];
1967 int err = 0;
1968
1969 if ((_IOC_TYPE(cmd) != NVHOST_IOCTL_MAGIC) ||
1970 (_IOC_NR(cmd) == 0) ||
1971 (_IOC_NR(cmd) > NVHOST_IOCTL_CHANNEL_LAST) ||
1972 (_IOC_SIZE(cmd) > NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE))
1973 return -EFAULT;
1974
1975 if (_IOC_DIR(cmd) & _IOC_WRITE) {
1976 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
1977 return -EFAULT;
1978 }
1979
1980 switch (cmd) {
1981 case NVHOST_IOCTL_CHANNEL_OPEN:
1982 {
1983 int fd;
1984 struct file *file;
1985 char *name;
1986
1987 err = get_unused_fd_flags(O_RDWR);
1988 if (err < 0)
1989 break;
1990 fd = err;
1991
1992 name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
1993 dev_name(&dev->dev), fd);
1994 if (!name) {
1995 err = -ENOMEM;
1996 put_unused_fd(fd);
1997 break;
1998 }
1999
2000 file = anon_inode_getfile(name, filp->f_op, NULL, O_RDWR);
2001 kfree(name);
2002 if (IS_ERR(file)) {
2003 err = PTR_ERR(file);
2004 put_unused_fd(fd);
2005 break;
2006 }
2007 fd_install(fd, file);
2008
2009 err = __gk20a_channel_open(ch->g, file);
2010 if (err) {
2011 put_unused_fd(fd);
2012 fput(file);
2013 break;
2014 }
2015
2016 ((struct nvhost_channel_open_args *)buf)->channel_fd = fd;
2017 break;
2018 }
2019 case NVHOST_IOCTL_CHANNEL_SET_NVMAP_FD:
2020 break;
2021 case NVHOST_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2022 gk20a_channel_busy(dev);
2023 err = gk20a_alloc_obj_ctx(ch,
2024 (struct nvhost_alloc_obj_ctx_args *)buf);
2025 gk20a_channel_idle(dev);
2026 break;
2027 case NVHOST_IOCTL_CHANNEL_FREE_OBJ_CTX:
2028 gk20a_channel_busy(dev);
2029 err = gk20a_free_obj_ctx(ch,
2030 (struct nvhost_free_obj_ctx_args *)buf);
2031 gk20a_channel_idle(dev);
2032 break;
2033 case NVHOST_IOCTL_CHANNEL_ALLOC_GPFIFO:
2034 gk20a_channel_busy(dev);
2035 err = gk20a_alloc_channel_gpfifo(ch,
2036 (struct nvhost_alloc_gpfifo_args *)buf);
2037 gk20a_channel_idle(dev);
2038 break;
2039 case NVHOST_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2040 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2041 (struct nvhost_submit_gpfifo_args *)buf);
2042 break;
2043 case NVHOST_IOCTL_CHANNEL_WAIT:
2044 gk20a_channel_busy(dev);
2045 err = gk20a_channel_wait(ch,
2046 (struct nvhost_wait_args *)buf);
2047 gk20a_channel_idle(dev);
2048 break;
2049 case NVHOST_IOCTL_CHANNEL_ZCULL_BIND:
2050 gk20a_channel_busy(dev);
2051 err = gk20a_channel_zcull_bind(ch,
2052 (struct nvhost_zcull_bind_args *)buf);
2053 gk20a_channel_idle(dev);
2054 break;
2055 case NVHOST_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2056 gk20a_channel_busy(dev);
2057 err = gk20a_init_error_notifier(ch,
2058 (struct nvhost_set_error_notifier *)buf);
2059 gk20a_channel_idle(dev);
2060 break;
2061#ifdef CONFIG_GK20A_CYCLE_STATS
2062 case NVHOST_IOCTL_CHANNEL_CYCLE_STATS:
2063 gk20a_channel_busy(dev);
2064 err = gk20a_channel_cycle_stats(ch,
2065 (struct nvhost_cycle_stats_args *)buf);
2066 gk20a_channel_idle(dev);
2067 break;
2068#endif
2069 case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT:
2070 {
2071 u32 timeout =
2072 (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
2073 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2074 timeout, ch->hw_chid);
2075 ch->timeout_ms_max = timeout;
2076 break;
2077 }
2078 case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2079 {
2080 u32 timeout =
2081 (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
2082 bool timeout_debug_dump = !((u32)
2083 ((struct nvhost_set_timeout_ex_args *)buf)->flags &
2084 (1 << NVHOST_TIMEOUT_FLAG_DISABLE_DUMP));
2085 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2086 timeout, ch->hw_chid);
2087 ch->timeout_ms_max = timeout;
2088 ch->timeout_debug_dump = timeout_debug_dump;
2089 break;
2090 }
2091 case NVHOST_IOCTL_CHANNEL_GET_TIMEDOUT:
2092 ((struct nvhost_get_param_args *)buf)->value =
2093 ch->has_timedout;
2094 break;
2095 case NVHOST_IOCTL_CHANNEL_SET_PRIORITY:
2096 gk20a_channel_busy(dev);
2097 gk20a_channel_set_priority(ch,
2098 ((struct nvhost_set_priority_args *)buf)->priority);
2099 gk20a_channel_idle(dev);
2100 break;
2101 default:
2102 dev_err(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2103 err = -ENOTTY;
2104 break;
2105 }
2106
2107 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2108 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2109
2110 return err;
2111}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
new file mode 100644
index 00000000..429db85d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -0,0 +1,172 @@
1/*
2 * drivers/video/tegra/host/gk20a/channel_gk20a.h
3 *
4 * GK20A graphics channel
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21#ifndef __CHANNEL_GK20A_H__
22#define __CHANNEL_GK20A_H__
23
24#include <linux/log2.h>
25#include <linux/slab.h>
26#include <linux/wait.h>
27#include <linux/mutex.h>
28#include <linux/nvhost_ioctl.h>
29struct gk20a;
30struct gr_gk20a;
31struct dbg_session_gk20a;
32
33#include "channel_sync_gk20a.h"
34
35#include "mm_gk20a.h"
36#include "gr_gk20a.h"
37
38struct gpfifo {
39 u32 entry0;
40 u32 entry1;
41};
42
43struct notification {
44 struct {
45 u32 nanoseconds[2];
46 } timestamp;
47 u32 info32;
48 u16 info16;
49 u16 status;
50};
51
52struct fence {
53 u32 hw_chid;
54 u32 syncpt_val;
55};
56
57/* contexts associated with a channel */
58struct channel_ctx_gk20a {
59 struct gr_ctx_desc gr_ctx;
60 struct pm_ctx_desc pm_ctx;
61 struct patch_desc patch_ctx;
62 struct zcull_ctx_desc zcull_ctx;
63 u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA];
64 u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA];
65 bool global_ctx_buffer_mapped;
66};
67
68struct channel_gk20a_job {
69 struct mapped_buffer_node **mapped_buffers;
70 int num_mapped_buffers;
71 struct gk20a_channel_fence fence;
72 struct list_head list;
73};
74
75/* this is the priv element of struct nvhost_channel */
76struct channel_gk20a {
77 struct gk20a *g;
78 bool in_use;
79 int hw_chid;
80 bool bound;
81 bool first_init;
82 bool vpr;
83 pid_t pid;
84
85 struct list_head jobs;
86 struct mutex jobs_lock;
87
88 struct vm_gk20a *vm;
89
90 struct gpfifo_desc gpfifo;
91
92 struct channel_ctx_gk20a ch_ctx;
93
94 struct inst_desc inst_block;
95 struct mem_desc_sub ramfc;
96
97 void *userd_cpu_va;
98 u64 userd_iova;
99 u64 userd_gpu_va;
100
101 s32 num_objects;
102 u32 obj_class; /* we support only one obj per channel */
103
104 struct priv_cmd_queue priv_cmd_q;
105
106 wait_queue_head_t notifier_wq;
107 wait_queue_head_t semaphore_wq;
108 wait_queue_head_t submit_wq;
109
110 u32 timeout_accumulated_ms;
111 u32 timeout_gpfifo_get;
112
113 bool cmds_pending;
114 struct gk20a_channel_fence last_submit_fence;
115
116 void (*remove_support)(struct channel_gk20a *);
117#if defined(CONFIG_GK20A_CYCLE_STATS)
118 struct {
119 void *cyclestate_buffer;
120 u32 cyclestate_buffer_size;
121 struct dma_buf *cyclestate_buffer_handler;
122 struct mutex cyclestate_buffer_mutex;
123 } cyclestate;
124#endif
125 struct mutex dbg_s_lock;
126 struct list_head dbg_s_list;
127
128 bool has_timedout;
129 u32 timeout_ms_max;
130 bool timeout_debug_dump;
131
132 struct dma_buf *error_notifier_ref;
133 struct nvhost_notification *error_notifier;
134 void *error_notifier_va;
135
136 struct gk20a_channel_sync *sync;
137};
138
139static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
140{
141 return !!ch->vm;
142}
143int channel_gk20a_commit_va(struct channel_gk20a *c);
144int gk20a_init_channel_support(struct gk20a *, u32 chid);
145void gk20a_free_channel(struct channel_gk20a *ch, bool finish);
146bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
147 u32 timeout_delta_ms);
148void gk20a_disable_channel(struct channel_gk20a *ch,
149 bool wait_for_finish,
150 unsigned long finish_timeout);
151void gk20a_disable_channel_no_update(struct channel_gk20a *ch);
152int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout);
153void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error);
154void gk20a_channel_semaphore_wakeup(struct gk20a *g);
155int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
156 struct priv_cmd_entry **entry);
157
158int gk20a_channel_suspend(struct gk20a *g);
159int gk20a_channel_resume(struct gk20a *g);
160
161/* Channel file operations */
162int gk20a_channel_open(struct inode *inode, struct file *filp);
163long gk20a_channel_ioctl(struct file *filp,
164 unsigned int cmd,
165 unsigned long arg);
166int gk20a_channel_release(struct inode *inode, struct file *filp);
167struct channel_gk20a *gk20a_get_channel_from_file(int fd);
168void gk20a_channel_update(struct channel_gk20a *c, int nr_completed);
169
170void gk20a_init_fifo(struct gpu_ops *gops);
171
172#endif /*__CHANNEL_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
new file mode 100644
index 00000000..9f9c3ba7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -0,0 +1,356 @@
1/*
2 * drivers/video/tegra/host/gk20a/channel_sync_gk20a.c
3 *
4 * GK20A Channel Synchronization Abstraction
5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#include <linux/gk20a.h>
19
20#include "channel_sync_gk20a.h"
21#include "gk20a.h"
22
23#ifdef CONFIG_SYNC
24#include "../../../staging/android/sync.h"
25#endif
26
27#ifdef CONFIG_TEGRA_GK20A
28#include <linux/nvhost.h>
29#endif
30
31#ifdef CONFIG_TEGRA_GK20A
32
33struct gk20a_channel_syncpt {
34 struct gk20a_channel_sync ops;
35 struct channel_gk20a *c;
36 struct platform_device *host1x_pdev;
37 u32 id;
38};
39
40static void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)
41{
42 /* syncpoint_a */
43 ptr[0] = 0x2001001C;
44 /* payload */
45 ptr[1] = thresh;
46 /* syncpoint_b */
47 ptr[2] = 0x2001001D;
48 /* syncpt_id, switch_en, wait */
49 ptr[3] = (id << 8) | 0x10;
50}
51
52int gk20a_channel_syncpt_wait_cpu(struct gk20a_channel_sync *s,
53 struct gk20a_channel_fence *fence,
54 int timeout)
55{
56 struct gk20a_channel_syncpt *sp =
57 container_of(s, struct gk20a_channel_syncpt, ops);
58 if (!fence->valid)
59 return 0;
60 return nvhost_syncpt_wait_timeout_ext(
61 sp->host1x_pdev, sp->id, fence->thresh,
62 timeout, NULL, NULL);
63}
64
65bool gk20a_channel_syncpt_is_expired(struct gk20a_channel_sync *s,
66 struct gk20a_channel_fence *fence)
67{
68 struct gk20a_channel_syncpt *sp =
69 container_of(s, struct gk20a_channel_syncpt, ops);
70 if (!fence->valid)
71 return true;
72 return nvhost_syncpt_is_expired_ext(sp->host1x_pdev, sp->id,
73 fence->thresh);
74}
75
76int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id,
77 u32 thresh, struct priv_cmd_entry **entry)
78{
79 struct gk20a_channel_syncpt *sp =
80 container_of(s, struct gk20a_channel_syncpt, ops);
81 struct priv_cmd_entry *wait_cmd = NULL;
82
83 if (id >= nvhost_syncpt_nb_pts_ext(sp->host1x_pdev)) {
84 dev_warn(dev_from_gk20a(sp->c->g),
85 "invalid wait id in gpfifo submit, elided");
86 return 0;
87 }
88
89 if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh))
90 return 0;
91
92 gk20a_channel_alloc_priv_cmdbuf(sp->c, 4, &wait_cmd);
93 if (wait_cmd == NULL) {
94 gk20a_err(dev_from_gk20a(sp->c->g),
95 "not enough priv cmd buffer space");
96 return -EAGAIN;
97 }
98
99 add_wait_cmd(&wait_cmd->ptr[0], id, thresh);
100
101 *entry = wait_cmd;
102 return 0;
103}
104
105int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
106 struct priv_cmd_entry **entry)
107{
108#ifdef CONFIG_SYNC
109 int i;
110 int num_wait_cmds;
111 struct sync_pt *pt;
112 struct sync_fence *sync_fence;
113 struct priv_cmd_entry *wait_cmd = NULL;
114 struct gk20a_channel_syncpt *sp =
115 container_of(s, struct gk20a_channel_syncpt, ops);
116 struct channel_gk20a *c = sp->c;
117
118 sync_fence = nvhost_sync_fdget(fd);
119 if (!sync_fence)
120 return -EINVAL;
121
122 num_wait_cmds = nvhost_sync_num_pts(sync_fence);
123 gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd);
124 if (wait_cmd == NULL) {
125 gk20a_err(dev_from_gk20a(c->g),
126 "not enough priv cmd buffer space");
127 sync_fence_put(sync_fence);
128 return -EAGAIN;
129 }
130
131 i = 0;
132 list_for_each_entry(pt, &sync_fence->pt_list_head, pt_list) {
133 u32 wait_id = nvhost_sync_pt_id(pt);
134 u32 wait_value = nvhost_sync_pt_thresh(pt);
135
136 if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev,
137 wait_id, wait_value)) {
138 wait_cmd->ptr[i * 4 + 0] = 0;
139 wait_cmd->ptr[i * 4 + 1] = 0;
140 wait_cmd->ptr[i * 4 + 2] = 0;
141 wait_cmd->ptr[i * 4 + 3] = 0;
142 } else
143 add_wait_cmd(&wait_cmd->ptr[i * 4], wait_id,
144 wait_value);
145 i++;
146 }
147 WARN_ON(i != num_wait_cmds);
148 sync_fence_put(sync_fence);
149
150 *entry = wait_cmd;
151 return 0;
152#else
153 return -ENODEV;
154#endif
155}
156
157static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
158{
159 struct channel_gk20a *ch20a = priv;
160 gk20a_channel_update(ch20a, nr_completed);
161}
162
163static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
164 bool gfx_class, bool wfi_cmd,
165 struct priv_cmd_entry **entry,
166 struct gk20a_channel_fence *fence)
167{
168 u32 thresh;
169 int incr_cmd_size;
170 int j = 0;
171 int err;
172 struct priv_cmd_entry *incr_cmd = NULL;
173 struct gk20a_channel_syncpt *sp =
174 container_of(s, struct gk20a_channel_syncpt, ops);
175 struct channel_gk20a *c = sp->c;
176
177 /* nvhost action_gpfifo_submit_complete releases this ref. */
178 err = gk20a_channel_busy(c->g->dev);
179 if (err)
180 return err;
181
182 incr_cmd_size = 4;
183 if (wfi_cmd)
184 incr_cmd_size += 2;
185
186 gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
187 if (incr_cmd == NULL) {
188 gk20a_channel_idle(c->g->dev);
189 gk20a_err(dev_from_gk20a(c->g),
190 "not enough priv cmd buffer space");
191 return -EAGAIN;
192 }
193
194 if (gfx_class) {
195 WARN_ON(wfi_cmd); /* No sense to use gfx class + wfi. */
196 /* setobject KEPLER_C */
197 incr_cmd->ptr[j++] = 0x20010000;
198 incr_cmd->ptr[j++] = KEPLER_C;
199 /* syncpt incr */
200 incr_cmd->ptr[j++] = 0x200100B2;
201 incr_cmd->ptr[j++] = sp->id |
202 (0x1 << 20) | (0x1 << 16);
203 } else {
204 if (wfi_cmd) {
205 /* wfi */
206 incr_cmd->ptr[j++] = 0x2001001E;
207 /* handle, ignored */
208 incr_cmd->ptr[j++] = 0x00000000;
209 }
210 /* syncpoint_a */
211 incr_cmd->ptr[j++] = 0x2001001C;
212 /* payload, ignored */
213 incr_cmd->ptr[j++] = 0;
214 /* syncpoint_b */
215 incr_cmd->ptr[j++] = 0x2001001D;
216 /* syncpt_id, incr */
217 incr_cmd->ptr[j++] = (sp->id << 8) | 0x1;
218 }
219 WARN_ON(j != incr_cmd_size);
220
221 thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 1);
222
223 err = nvhost_intr_register_notifier(sp->host1x_pdev, sp->id, thresh,
224 gk20a_channel_syncpt_update, c);
225
226 /* Adding interrupt action should never fail. A proper error handling
227 * here would require us to decrement the syncpt max back to its
228 * original value. */
229 if (WARN(err, "failed to set submit complete interrupt")) {
230 gk20a_channel_idle(c->g->dev);
231 err = 0; /* Ignore this error. */
232 }
233
234 fence->thresh = thresh;
235 fence->valid = true;
236 fence->wfi = wfi_cmd;
237 *entry = incr_cmd;
238 return 0;
239}
240
241int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
242 struct priv_cmd_entry **entry,
243 struct gk20a_channel_fence *fence)
244{
245 return __gk20a_channel_syncpt_incr(s,
246 false /* use host class */,
247 true /* wfi */,
248 entry, fence);
249}
250
251int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
252 struct priv_cmd_entry **entry,
253 struct gk20a_channel_fence *fence)
254{
255 struct gk20a_channel_syncpt *sp =
256 container_of(s, struct gk20a_channel_syncpt, ops);
257 /* Don't put wfi cmd to this one since we're not returning
258 * a fence to user space. */
259 return __gk20a_channel_syncpt_incr(s,
260 sp->c->obj_class == KEPLER_C /* may use gfx class */,
261 false /* no wfi */,
262 entry, fence);
263}
264
265int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s,
266 struct priv_cmd_entry **entry,
267 struct gk20a_channel_fence *fence,
268 u32 *id, u32 *thresh)
269{
270 struct gk20a_channel_syncpt *sp =
271 container_of(s, struct gk20a_channel_syncpt, ops);
272 /* Need to do 'host incr + wfi' or 'gfx incr' since we return the fence
273 * to user space. */
274 int err = __gk20a_channel_syncpt_incr(s,
275 sp->c->obj_class == KEPLER_C /* use gfx class? */,
276 sp->c->obj_class != KEPLER_C /* wfi if host class */,
277 entry, fence);
278 if (err)
279 return err;
280 *id = sp->id;
281 *thresh = fence->thresh;
282 return 0;
283}
284
285int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s,
286 struct priv_cmd_entry **entry,
287 struct gk20a_channel_fence *fence,
288 int *fd)
289{
290#ifdef CONFIG_SYNC
291 int err;
292 struct nvhost_ctrl_sync_fence_info pt;
293 struct gk20a_channel_syncpt *sp =
294 container_of(s, struct gk20a_channel_syncpt, ops);
295 err = gk20a_channel_syncpt_incr_user_syncpt(s, entry, fence,
296 &pt.id, &pt.thresh);
297 if (err)
298 return err;
299 return nvhost_sync_create_fence_fd(sp->host1x_pdev, &pt, 1,
300 "fence", fd);
301#else
302 return -ENODEV;
303#endif
304}
305
306void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
307{
308 struct gk20a_channel_syncpt *sp =
309 container_of(s, struct gk20a_channel_syncpt, ops);
310 nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
311}
312
313static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s)
314{
315 struct gk20a_channel_syncpt *sp =
316 container_of(s, struct gk20a_channel_syncpt, ops);
317 nvhost_free_syncpt(sp->id);
318 kfree(sp);
319}
320
321static struct gk20a_channel_sync *
322gk20a_channel_syncpt_create(struct channel_gk20a *c)
323{
324 struct gk20a_channel_syncpt *sp;
325
326 sp = kzalloc(sizeof(*sp), GFP_KERNEL);
327 if (!sp)
328 return NULL;
329
330 sp->c = c;
331 sp->host1x_pdev = to_platform_device(c->g->dev->dev.parent);
332 sp->id = nvhost_get_syncpt_host_managed(sp->host1x_pdev, c->hw_chid);
333
334 sp->ops.wait_cpu = gk20a_channel_syncpt_wait_cpu;
335 sp->ops.is_expired = gk20a_channel_syncpt_is_expired;
336 sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt;
337 sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd;
338 sp->ops.incr = gk20a_channel_syncpt_incr;
339 sp->ops.incr_wfi = gk20a_channel_syncpt_incr_wfi;
340 sp->ops.incr_user_syncpt = gk20a_channel_syncpt_incr_user_syncpt;
341 sp->ops.incr_user_fd = gk20a_channel_syncpt_incr_user_fd;
342 sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max;
343 sp->ops.destroy = gk20a_channel_syncpt_destroy;
344 return &sp->ops;
345}
346#endif /* CONFIG_TEGRA_GK20A */
347
348struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
349{
350#ifdef CONFIG_TEGRA_GK20A
351 if (gk20a_platform_has_syncpoints(c->g->dev))
352 return gk20a_channel_syncpt_create(c);
353#endif
354 WARN_ON(1);
355 return NULL;
356}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
new file mode 100644
index 00000000..69feb89f
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -0,0 +1,102 @@
1/*
2 * drivers/video/tegra/host/gk20a/channel_sync_gk20a.h
3 *
4 * GK20A Channel Synchronization Abstraction
5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#ifndef _GK20A_CHANNEL_SYNC_H_
19#define _GK20A_CHANNEL_SYNC_H_
20
21#include <linux/types.h>
22
23struct gk20a_channel_sync;
24struct priv_cmd_entry;
25struct channel_gk20a;
26
27struct gk20a_channel_fence {
28 bool valid;
29 bool wfi; /* was issued with preceding wfi */
30 u32 thresh; /* either semaphore or syncpoint value */
31};
32
33struct gk20a_channel_sync {
34 /* CPU wait for a fence returned by incr_syncpt() or incr_fd(). */
35 int (*wait_cpu)(struct gk20a_channel_sync *s,
36 struct gk20a_channel_fence *fence,
37 int timeout);
38
39 /* Test whether a fence returned by incr_syncpt() or incr_fd() is
40 * expired. */
41 bool (*is_expired)(struct gk20a_channel_sync *s,
42 struct gk20a_channel_fence *fence);
43
44 /* Generate a gpu wait cmdbuf from syncpoint. */
45 int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh,
46 struct priv_cmd_entry **entry);
47
48 /* Generate a gpu wait cmdbuf from sync fd. */
49 int (*wait_fd)(struct gk20a_channel_sync *s, int fd,
50 struct priv_cmd_entry **entry);
51
52 /* Increment syncpoint/semaphore.
53 * Returns
54 * - a gpu cmdbuf that performs the increment when executed,
55 * - a fence that can be passed to wait_cpu() and is_expired().
56 */
57 int (*incr)(struct gk20a_channel_sync *s,
58 struct priv_cmd_entry **entry,
59 struct gk20a_channel_fence *fence);
60
61 /* Increment syncpoint/semaphore, preceded by a wfi.
62 * Returns
63 * - a gpu cmdbuf that performs the increment when executed,
64 * - a fence that can be passed to wait_cpu() and is_expired().
65 */
66 int (*incr_wfi)(struct gk20a_channel_sync *s,
67 struct priv_cmd_entry **entry,
68 struct gk20a_channel_fence *fence);
69
70 /* Increment syncpoint, so that the returned fence represents
71 * work completion (may need wfi) and can be returned to user space.
72 * Returns
73 * - a gpu cmdbuf that performs the increment when executed,
74 * - a fence that can be passed to wait_cpu() and is_expired(),
75 * - a syncpoint id/value pair that can be returned to user space.
76 */
77 int (*incr_user_syncpt)(struct gk20a_channel_sync *s,
78 struct priv_cmd_entry **entry,
79 struct gk20a_channel_fence *fence,
80 u32 *id, u32 *thresh);
81
82 /* Increment syncpoint/semaphore, so that the returned fence represents
83 * work completion (may need wfi) and can be returned to user space.
84 * Returns
85 * - a gpu cmdbuf that performs the increment when executed,
86 * - a fence that can be passed to wait_cpu() and is_expired(),
87 * - a sync fd that can be returned to user space.
88 */
89 int (*incr_user_fd)(struct gk20a_channel_sync *s,
90 struct priv_cmd_entry **entry,
91 struct gk20a_channel_fence *fence,
92 int *fd);
93
94 /* Reset the channel syncpoint/semaphore. */
95 void (*set_min_eq_max)(struct gk20a_channel_sync *s);
96
97 /* Free the resources allocated by gk20a_channel_sync_create. */
98 void (*destroy)(struct gk20a_channel_sync *s);
99};
100
101struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
102#endif
diff --git a/drivers/gpu/nvgpu/gk20a/clk_gk20a.c b/drivers/gpu/nvgpu/gk20a/clk_gk20a.c
new file mode 100644
index 00000000..151a332b
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/clk_gk20a.c
@@ -0,0 +1,865 @@
1/*
2 * drivers/video/tegra/host/gk20a/clk_gk20a.c
3 *
4 * GK20A Clocks
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#include <linux/clk.h>
22#include <linux/delay.h> /* for mdelay */
23#include <linux/module.h>
24#include <linux/debugfs.h>
25#include <linux/clk/tegra.h>
26#include <mach/thermal.h>
27
28#include "gk20a.h"
29#include "hw_trim_gk20a.h"
30#include "hw_timer_gk20a.h"
31
32#define gk20a_dbg_clk(fmt, arg...) \
33 gk20a_dbg(gpu_dbg_clk, fmt, ##arg)
34
35/* from vbios PLL info table */
36struct pll_parms gpc_pll_params = {
37 144, 2064, /* freq */
38 1000, 2064, /* vco */
39 12, 38, /* u */
40 1, 255, /* M */
41 8, 255, /* N */
42 1, 32, /* PL */
43};
44
45static int num_gpu_cooling_freq;
46static struct gpufreq_table_data *gpu_cooling_freq;
47
48struct gpufreq_table_data *tegra_gpufreq_table_get(void)
49{
50 return gpu_cooling_freq;
51}
52
53unsigned int tegra_gpufreq_table_size_get(void)
54{
55 return num_gpu_cooling_freq;
56}
57
58static u8 pl_to_div[] = {
59/* PL: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 */
60/* p: */ 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 12, 16, 20, 24, 32 };
61
62/* Calculate and update M/N/PL as well as pll->freq
63 ref_clk_f = clk_in_f / src_div = clk_in_f; (src_div = 1 on gk20a)
64 u_f = ref_clk_f / M;
65 PLL output = vco_f = u_f * N = ref_clk_f * N / M;
66 gpc2clk = target clock frequency = vco_f / PL;
67 gpcclk = gpc2clk / 2; */
68static int clk_config_pll(struct clk_gk20a *clk, struct pll *pll,
69 struct pll_parms *pll_params, u32 *target_freq, bool best_fit)
70{
71 u32 min_vco_f, max_vco_f;
72 u32 best_M, best_N;
73 u32 low_PL, high_PL, best_PL;
74 u32 m, n, n2;
75 u32 target_vco_f, vco_f;
76 u32 ref_clk_f, target_clk_f, u_f;
77 u32 delta, lwv, best_delta = ~0;
78 int pl;
79
80 BUG_ON(target_freq == NULL);
81
82 gk20a_dbg_fn("request target freq %d MHz", *target_freq);
83
84 ref_clk_f = pll->clk_in;
85 target_clk_f = *target_freq;
86 max_vco_f = pll_params->max_vco;
87 min_vco_f = pll_params->min_vco;
88 best_M = pll_params->max_M;
89 best_N = pll_params->min_N;
90 best_PL = pll_params->min_PL;
91
92 target_vco_f = target_clk_f + target_clk_f / 50;
93 if (max_vco_f < target_vco_f)
94 max_vco_f = target_vco_f;
95
96 high_PL = (max_vco_f + target_vco_f - 1) / target_vco_f;
97 high_PL = min(high_PL, pll_params->max_PL);
98 high_PL = max(high_PL, pll_params->min_PL);
99
100 low_PL = min_vco_f / target_vco_f;
101 low_PL = min(low_PL, pll_params->max_PL);
102 low_PL = max(low_PL, pll_params->min_PL);
103
104 /* Find Indices of high_PL and low_PL */
105 for (pl = 0; pl < 14; pl++) {
106 if (pl_to_div[pl] >= low_PL) {
107 low_PL = pl;
108 break;
109 }
110 }
111 for (pl = 0; pl < 14; pl++) {
112 if (pl_to_div[pl] >= high_PL) {
113 high_PL = pl;
114 break;
115 }
116 }
117 gk20a_dbg_info("low_PL %d(div%d), high_PL %d(div%d)",
118 low_PL, pl_to_div[low_PL], high_PL, pl_to_div[high_PL]);
119
120 for (pl = low_PL; pl <= high_PL; pl++) {
121 target_vco_f = target_clk_f * pl_to_div[pl];
122
123 for (m = pll_params->min_M; m <= pll_params->max_M; m++) {
124 u_f = ref_clk_f / m;
125
126 if (u_f < pll_params->min_u)
127 break;
128 if (u_f > pll_params->max_u)
129 continue;
130
131 n = (target_vco_f * m) / ref_clk_f;
132 n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f;
133
134 if (n > pll_params->max_N)
135 break;
136
137 for (; n <= n2; n++) {
138 if (n < pll_params->min_N)
139 continue;
140 if (n > pll_params->max_N)
141 break;
142
143 vco_f = ref_clk_f * n / m;
144
145 if (vco_f >= min_vco_f && vco_f <= max_vco_f) {
146 lwv = (vco_f + (pl_to_div[pl] / 2))
147 / pl_to_div[pl];
148 delta = abs(lwv - target_clk_f);
149
150 if (delta < best_delta) {
151 best_delta = delta;
152 best_M = m;
153 best_N = n;
154 best_PL = pl;
155
156 if (best_delta == 0 ||
157 /* 0.45% for non best fit */
158 (!best_fit && (vco_f / best_delta > 218))) {
159 goto found_match;
160 }
161
162 gk20a_dbg_info("delta %d @ M %d, N %d, PL %d",
163 delta, m, n, pl);
164 }
165 }
166 }
167 }
168 }
169
170found_match:
171 BUG_ON(best_delta == ~0);
172
173 if (best_fit && best_delta != 0)
174 gk20a_dbg_clk("no best match for target @ %dMHz on gpc_pll",
175 target_clk_f);
176
177 pll->M = best_M;
178 pll->N = best_N;
179 pll->PL = best_PL;
180
181 /* save current frequency */
182 pll->freq = ref_clk_f * pll->N / (pll->M * pl_to_div[pll->PL]);
183
184 *target_freq = pll->freq;
185
186 gk20a_dbg_clk("actual target freq %d MHz, M %d, N %d, PL %d(div%d)",
187 *target_freq, pll->M, pll->N, pll->PL, pl_to_div[pll->PL]);
188
189 gk20a_dbg_fn("done");
190
191 return 0;
192}
193
194static int clk_slide_gpc_pll(struct gk20a *g, u32 n)
195{
196 u32 data, coeff;
197 u32 nold;
198 int ramp_timeout = 500;
199
200 /* get old coefficients */
201 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
202 nold = trim_sys_gpcpll_coeff_ndiv_v(coeff);
203
204 /* do nothing if NDIV is same */
205 if (n == nold)
206 return 0;
207
208 /* setup */
209 data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
210 data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(),
211 trim_sys_gpcpll_cfg2_pll_stepa_f(0x2b));
212 gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data);
213 data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
214 data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(),
215 trim_sys_gpcpll_cfg3_pll_stepb_f(0xb));
216 gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data);
217
218 /* pll slowdown mode */
219 data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
220 data = set_field(data,
221 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
222 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f());
223 gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
224
225 /* new ndiv ready for ramp */
226 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
227 coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(),
228 trim_sys_gpcpll_coeff_ndiv_f(n));
229 udelay(1);
230 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
231
232 /* dynamic ramp to new ndiv */
233 data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
234 data = set_field(data,
235 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
236 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f());
237 udelay(1);
238 gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
239
240 do {
241 udelay(1);
242 ramp_timeout--;
243 data = gk20a_readl(
244 g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r());
245 if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data))
246 break;
247 } while (ramp_timeout > 0);
248
249 /* exit slowdown mode */
250 data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
251 data = set_field(data,
252 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
253 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f());
254 data = set_field(data,
255 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
256 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f());
257 gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
258 gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
259
260 if (ramp_timeout <= 0) {
261 gk20a_err(dev_from_gk20a(g), "gpcpll dynamic ramp timeout");
262 return -ETIMEDOUT;
263 }
264 return 0;
265}
266
267static int clk_program_gpc_pll(struct gk20a *g, struct clk_gk20a *clk,
268 int allow_slide)
269{
270 u32 data, cfg, coeff, timeout;
271 u32 m, n, pl;
272 u32 nlo;
273
274 gk20a_dbg_fn("");
275
276 if (!tegra_platform_is_silicon())
277 return 0;
278
279 /* get old coefficients */
280 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
281 m = trim_sys_gpcpll_coeff_mdiv_v(coeff);
282 n = trim_sys_gpcpll_coeff_ndiv_v(coeff);
283 pl = trim_sys_gpcpll_coeff_pldiv_v(coeff);
284
285 /* do NDIV slide if there is no change in M and PL */
286 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
287 if (allow_slide && clk->gpc_pll.M == m && clk->gpc_pll.PL == pl
288 && trim_sys_gpcpll_cfg_enable_v(cfg)) {
289 return clk_slide_gpc_pll(g, clk->gpc_pll.N);
290 }
291
292 /* slide down to NDIV_LO */
293 nlo = DIV_ROUND_UP(m * gpc_pll_params.min_vco, clk->gpc_pll.clk_in);
294 if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) {
295 int ret = clk_slide_gpc_pll(g, nlo);
296 if (ret)
297 return ret;
298 }
299
300 /* split FO-to-bypass jump in halfs by setting out divider 1:2 */
301 data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
302 data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
303 trim_sys_gpc2clk_out_vcodiv_f(2));
304 gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
305
306 /* put PLL in bypass before programming it */
307 data = gk20a_readl(g, trim_sys_sel_vco_r());
308 data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
309 trim_sys_sel_vco_gpc2clk_out_bypass_f());
310 udelay(2);
311 gk20a_writel(g, trim_sys_sel_vco_r(), data);
312
313 /* get out from IDDQ */
314 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
315 if (trim_sys_gpcpll_cfg_iddq_v(cfg)) {
316 cfg = set_field(cfg, trim_sys_gpcpll_cfg_iddq_m(),
317 trim_sys_gpcpll_cfg_iddq_power_on_v());
318 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
319 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
320 udelay(2);
321 }
322
323 /* disable PLL before changing coefficients */
324 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
325 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
326 trim_sys_gpcpll_cfg_enable_no_f());
327 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
328 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
329
330 /* change coefficients */
331 nlo = DIV_ROUND_UP(clk->gpc_pll.M * gpc_pll_params.min_vco,
332 clk->gpc_pll.clk_in);
333 coeff = trim_sys_gpcpll_coeff_mdiv_f(clk->gpc_pll.M) |
334 trim_sys_gpcpll_coeff_ndiv_f(allow_slide ?
335 nlo : clk->gpc_pll.N) |
336 trim_sys_gpcpll_coeff_pldiv_f(clk->gpc_pll.PL);
337 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
338
339 /* enable PLL after changing coefficients */
340 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
341 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
342 trim_sys_gpcpll_cfg_enable_yes_f());
343 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
344
345 /* lock pll */
346 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
347 if (cfg & trim_sys_gpcpll_cfg_enb_lckdet_power_off_f()){
348 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enb_lckdet_m(),
349 trim_sys_gpcpll_cfg_enb_lckdet_power_on_f());
350 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
351 }
352
353 /* wait pll lock */
354 timeout = clk->pll_delay / 2 + 1;
355 do {
356 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
357 if (cfg & trim_sys_gpcpll_cfg_pll_lock_true_f())
358 goto pll_locked;
359 udelay(2);
360 } while (--timeout > 0);
361
362 /* PLL is messed up. What can we do here? */
363 BUG();
364 return -EBUSY;
365
366pll_locked:
367 /* put PLL back on vco */
368 data = gk20a_readl(g, trim_sys_sel_vco_r());
369 data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
370 trim_sys_sel_vco_gpc2clk_out_vco_f());
371 gk20a_writel(g, trim_sys_sel_vco_r(), data);
372 clk->gpc_pll.enabled = true;
373
374 /* restore out divider 1:1 */
375 data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
376 data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
377 trim_sys_gpc2clk_out_vcodiv_by1_f());
378 udelay(2);
379 gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
380
381 /* slide up to target NDIV */
382 return clk_slide_gpc_pll(g, clk->gpc_pll.N);
383}
384
385static int clk_disable_gpcpll(struct gk20a *g, int allow_slide)
386{
387 u32 cfg, coeff, m, nlo;
388 struct clk_gk20a *clk = &g->clk;
389
390 /* slide to VCO min */
391 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
392 if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) {
393 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
394 m = trim_sys_gpcpll_coeff_mdiv_v(coeff);
395 nlo = DIV_ROUND_UP(m * gpc_pll_params.min_vco,
396 clk->gpc_pll.clk_in);
397 clk_slide_gpc_pll(g, nlo);
398 }
399
400 /* put PLL in bypass before disabling it */
401 cfg = gk20a_readl(g, trim_sys_sel_vco_r());
402 cfg = set_field(cfg, trim_sys_sel_vco_gpc2clk_out_m(),
403 trim_sys_sel_vco_gpc2clk_out_bypass_f());
404 gk20a_writel(g, trim_sys_sel_vco_r(), cfg);
405
406 /* disable PLL */
407 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
408 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
409 trim_sys_gpcpll_cfg_enable_no_f());
410 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
411 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
412
413 clk->gpc_pll.enabled = false;
414 return 0;
415}
416
417static int gk20a_init_clk_reset_enable_hw(struct gk20a *g)
418{
419 gk20a_dbg_fn("");
420 return 0;
421}
422
423struct clk *gk20a_clk_get(struct gk20a *g)
424{
425 if (!g->clk.tegra_clk) {
426 struct clk *clk;
427
428 clk = clk_get_sys("tegra_gk20a", "gpu");
429 if (IS_ERR(clk)) {
430 gk20a_err(dev_from_gk20a(g),
431 "fail to get tegra gpu clk tegra_gk20a/gpu");
432 return NULL;
433 }
434 g->clk.tegra_clk = clk;
435 }
436
437 return g->clk.tegra_clk;
438}
439
440static int gk20a_init_clk_setup_sw(struct gk20a *g)
441{
442 struct clk_gk20a *clk = &g->clk;
443 static int initialized;
444 unsigned long *freqs;
445 int err, num_freqs;
446 struct clk *ref;
447 unsigned long ref_rate;
448
449 gk20a_dbg_fn("");
450
451 if (clk->sw_ready) {
452 gk20a_dbg_fn("skip init");
453 return 0;
454 }
455
456 if (!gk20a_clk_get(g))
457 return -EINVAL;
458
459 ref = clk_get_parent(clk_get_parent(clk->tegra_clk));
460 if (IS_ERR(ref)) {
461 gk20a_err(dev_from_gk20a(g),
462 "failed to get GPCPLL reference clock");
463 return -EINVAL;
464 }
465 ref_rate = clk_get_rate(ref);
466
467 clk->pll_delay = 300; /* usec */
468
469 clk->gpc_pll.id = GK20A_GPC_PLL;
470 clk->gpc_pll.clk_in = ref_rate / 1000000; /* MHz */
471
472 /* Decide initial frequency */
473 if (!initialized) {
474 initialized = 1;
475 clk->gpc_pll.M = 1;
476 clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco,
477 clk->gpc_pll.clk_in);
478 clk->gpc_pll.PL = 1;
479 clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N;
480 clk->gpc_pll.freq /= pl_to_div[clk->gpc_pll.PL];
481 }
482
483 err = tegra_dvfs_get_freqs(clk_get_parent(clk->tegra_clk),
484 &freqs, &num_freqs);
485 if (!err) {
486 int i, j;
487
488 /* init j for inverse traversal of frequencies */
489 j = num_freqs - 1;
490
491 gpu_cooling_freq = kzalloc(
492 (1 + num_freqs) * sizeof(*gpu_cooling_freq),
493 GFP_KERNEL);
494
495 /* store frequencies in inverse order */
496 for (i = 0; i < num_freqs; ++i, --j) {
497 gpu_cooling_freq[i].index = i;
498 gpu_cooling_freq[i].frequency = freqs[j];
499 }
500
501 /* add 'end of table' marker */
502 gpu_cooling_freq[i].index = i;
503 gpu_cooling_freq[i].frequency = GPUFREQ_TABLE_END;
504
505 /* store number of frequencies */
506 num_gpu_cooling_freq = num_freqs + 1;
507 }
508
509 mutex_init(&clk->clk_mutex);
510
511 clk->sw_ready = true;
512
513 gk20a_dbg_fn("done");
514 return 0;
515}
516
517static int gk20a_init_clk_setup_hw(struct gk20a *g)
518{
519 u32 data;
520
521 gk20a_dbg_fn("");
522
523 data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
524 data = set_field(data,
525 trim_sys_gpc2clk_out_sdiv14_m() |
526 trim_sys_gpc2clk_out_vcodiv_m() |
527 trim_sys_gpc2clk_out_bypdiv_m(),
528 trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f() |
529 trim_sys_gpc2clk_out_vcodiv_by1_f() |
530 trim_sys_gpc2clk_out_bypdiv_f(0));
531 gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
532
533 return 0;
534}
535
536static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq)
537{
538 struct clk_gk20a *clk = &g->clk;
539
540 if (freq > gpc_pll_params.max_freq)
541 freq = gpc_pll_params.max_freq;
542 else if (freq < gpc_pll_params.min_freq)
543 freq = gpc_pll_params.min_freq;
544
545 if (freq != old_freq) {
546 /* gpc_pll.freq is changed to new value here */
547 if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params,
548 &freq, true)) {
549 gk20a_err(dev_from_gk20a(g),
550 "failed to set pll target for %d", freq);
551 return -EINVAL;
552 }
553 }
554 return 0;
555}
556
557static int set_pll_freq(struct gk20a *g, u32 freq, u32 old_freq)
558{
559 struct clk_gk20a *clk = &g->clk;
560 int err = 0;
561
562 gk20a_dbg_fn("curr freq: %dMHz, target freq %dMHz", old_freq, freq);
563
564 if ((freq == old_freq) && clk->gpc_pll.enabled)
565 return 0;
566
567 /* change frequency only if power is on */
568 if (g->clk.clk_hw_on) {
569 err = clk_program_gpc_pll(g, clk, 1);
570 if (err)
571 err = clk_program_gpc_pll(g, clk, 0);
572 }
573
574 /* Just report error but not restore PLL since dvfs could already change
575 voltage even when it returns error. */
576 if (err)
577 gk20a_err(dev_from_gk20a(g),
578 "failed to set pll to %d", freq);
579 return err;
580}
581
582static int gk20a_clk_export_set_rate(void *data, unsigned long *rate)
583{
584 u32 old_freq;
585 int ret = -ENODATA;
586 struct gk20a *g = data;
587 struct clk_gk20a *clk = &g->clk;
588
589 if (rate) {
590 mutex_lock(&clk->clk_mutex);
591 old_freq = clk->gpc_pll.freq;
592 ret = set_pll_target(g, rate_gpu_to_gpc2clk(*rate), old_freq);
593 if (!ret && clk->gpc_pll.enabled)
594 ret = set_pll_freq(g, clk->gpc_pll.freq, old_freq);
595 if (!ret)
596 *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
597 mutex_unlock(&clk->clk_mutex);
598 }
599 return ret;
600}
601
602static int gk20a_clk_export_enable(void *data)
603{
604 int ret;
605 struct gk20a *g = data;
606 struct clk_gk20a *clk = &g->clk;
607
608 mutex_lock(&clk->clk_mutex);
609 ret = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq);
610 mutex_unlock(&clk->clk_mutex);
611 return ret;
612}
613
614static void gk20a_clk_export_disable(void *data)
615{
616 struct gk20a *g = data;
617 struct clk_gk20a *clk = &g->clk;
618
619 mutex_lock(&clk->clk_mutex);
620 if (g->clk.clk_hw_on)
621 clk_disable_gpcpll(g, 1);
622 mutex_unlock(&clk->clk_mutex);
623}
624
625static void gk20a_clk_export_init(void *data, unsigned long *rate, bool *state)
626{
627 struct gk20a *g = data;
628 struct clk_gk20a *clk = &g->clk;
629
630 mutex_lock(&clk->clk_mutex);
631 if (state)
632 *state = clk->gpc_pll.enabled;
633 if (rate)
634 *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
635 mutex_unlock(&clk->clk_mutex);
636}
637
638static struct tegra_clk_export_ops gk20a_clk_export_ops = {
639 .init = gk20a_clk_export_init,
640 .enable = gk20a_clk_export_enable,
641 .disable = gk20a_clk_export_disable,
642 .set_rate = gk20a_clk_export_set_rate,
643};
644
645static int gk20a_clk_register_export_ops(struct gk20a *g)
646{
647 int ret;
648 struct clk *c;
649
650 if (gk20a_clk_export_ops.data)
651 return 0;
652
653 gk20a_clk_export_ops.data = (void *)g;
654 c = g->clk.tegra_clk;
655 if (!c || !clk_get_parent(c))
656 return -ENOSYS;
657
658 ret = tegra_clk_register_export_ops(clk_get_parent(c),
659 &gk20a_clk_export_ops);
660
661 return ret;
662}
663
664int gk20a_init_clk_support(struct gk20a *g)
665{
666 struct clk_gk20a *clk = &g->clk;
667 u32 err;
668
669 gk20a_dbg_fn("");
670
671 clk->g = g;
672
673 err = gk20a_init_clk_reset_enable_hw(g);
674 if (err)
675 return err;
676
677 err = gk20a_init_clk_setup_sw(g);
678 if (err)
679 return err;
680
681 mutex_lock(&clk->clk_mutex);
682 clk->clk_hw_on = true;
683
684 err = gk20a_init_clk_setup_hw(g);
685 mutex_unlock(&clk->clk_mutex);
686 if (err)
687 return err;
688
689 err = gk20a_clk_register_export_ops(g);
690 if (err)
691 return err;
692
693 /* FIXME: this effectively prevents host level clock gating */
694 err = clk_enable(g->clk.tegra_clk);
695 if (err)
696 return err;
697
698 /* The prev call may not enable PLL if gbus is unbalanced - force it */
699 mutex_lock(&clk->clk_mutex);
700 err = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq);
701 mutex_unlock(&clk->clk_mutex);
702 if (err)
703 return err;
704
705 return err;
706}
707
708unsigned long gk20a_clk_get_rate(struct gk20a *g)
709{
710 struct clk_gk20a *clk = &g->clk;
711 return rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
712}
713
714long gk20a_clk_round_rate(struct gk20a *g, unsigned long rate)
715{
716 /* make sure the clock is available */
717 if (!gk20a_clk_get(g))
718 return rate;
719
720 return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate);
721}
722
723int gk20a_clk_set_rate(struct gk20a *g, unsigned long rate)
724{
725 return clk_set_rate(g->clk.tegra_clk, rate);
726}
727
728int gk20a_suspend_clk_support(struct gk20a *g)
729{
730 int ret;
731
732 clk_disable(g->clk.tegra_clk);
733
734 /* The prev call may not disable PLL if gbus is unbalanced - force it */
735 mutex_lock(&g->clk.clk_mutex);
736 ret = clk_disable_gpcpll(g, 1);
737 g->clk.clk_hw_on = false;
738 mutex_unlock(&g->clk.clk_mutex);
739 return ret;
740}
741
742#ifdef CONFIG_DEBUG_FS
743
744static int rate_get(void *data, u64 *val)
745{
746 struct gk20a *g = (struct gk20a *)data;
747 *val = (u64)gk20a_clk_get_rate(g);
748 return 0;
749}
750static int rate_set(void *data, u64 val)
751{
752 struct gk20a *g = (struct gk20a *)data;
753 return gk20a_clk_set_rate(g, (u32)val);
754}
755DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n");
756
757static int pll_reg_show(struct seq_file *s, void *data)
758{
759 struct gk20a *g = s->private;
760 u32 reg, m, n, pl, f;
761
762 mutex_lock(&g->clk.clk_mutex);
763 if (!g->clk.clk_hw_on) {
764 seq_printf(s, "gk20a powered down - no access to registers\n");
765 mutex_unlock(&g->clk.clk_mutex);
766 return 0;
767 }
768
769 reg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
770 seq_printf(s, "cfg = 0x%x : %s : %s\n", reg,
771 trim_sys_gpcpll_cfg_enable_v(reg) ? "enabled" : "disabled",
772 trim_sys_gpcpll_cfg_pll_lock_v(reg) ? "locked" : "unlocked");
773
774 reg = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
775 m = trim_sys_gpcpll_coeff_mdiv_v(reg);
776 n = trim_sys_gpcpll_coeff_ndiv_v(reg);
777 pl = trim_sys_gpcpll_coeff_pldiv_v(reg);
778 f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div[pl]);
779 seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
780 seq_printf(s, " : pll_f(gpu_f) = %u(%u) MHz\n", f, f/2);
781 mutex_unlock(&g->clk.clk_mutex);
782 return 0;
783}
784
785static int pll_reg_open(struct inode *inode, struct file *file)
786{
787 return single_open(file, pll_reg_show, inode->i_private);
788}
789
790static const struct file_operations pll_reg_fops = {
791 .open = pll_reg_open,
792 .read = seq_read,
793 .llseek = seq_lseek,
794 .release = single_release,
795};
796
797static int monitor_get(void *data, u64 *val)
798{
799 struct gk20a *g = (struct gk20a *)data;
800 struct clk_gk20a *clk = &g->clk;
801 int err;
802
803 u32 ncycle = 100; /* count GPCCLK for ncycle of clkin */
804 u32 clkin = clk->gpc_pll.clk_in;
805 u32 count1, count2;
806
807 err = gk20a_busy(g->dev);
808 if (err)
809 return err;
810
811 gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
812 trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f());
813 gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
814 trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f() |
815 trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f() |
816 trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(ncycle));
817 /* start */
818
819 /* It should take about 8us to finish 100 cycle of 12MHz.
820 But longer than 100us delay is required here. */
821 gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0));
822 udelay(2000);
823
824 count1 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
825 udelay(100);
826 count2 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
827 *val = (u64)(trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(count2) * clkin / ncycle);
828 gk20a_idle(g->dev);
829
830 if (count1 != count2)
831 return -EBUSY;
832 return 0;
833}
834DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n");
835
836int clk_gk20a_debugfs_init(struct platform_device *dev)
837{
838 struct dentry *d;
839 struct gk20a_platform *platform = platform_get_drvdata(dev);
840 struct gk20a *g = get_gk20a(dev);
841
842 d = debugfs_create_file(
843 "rate", S_IRUGO|S_IWUSR, platform->debugfs, g, &rate_fops);
844 if (!d)
845 goto err_out;
846
847 d = debugfs_create_file(
848 "pll_reg", S_IRUGO, platform->debugfs, g, &pll_reg_fops);
849 if (!d)
850 goto err_out;
851
852 d = debugfs_create_file(
853 "monitor", S_IRUGO, platform->debugfs, g, &monitor_fops);
854 if (!d)
855 goto err_out;
856
857 return 0;
858
859err_out:
860 pr_err("%s: Failed to make debugfs node\n", __func__);
861 debugfs_remove_recursive(platform->debugfs);
862 return -ENOMEM;
863}
864
865#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/nvgpu/gk20a/clk_gk20a.h b/drivers/gpu/nvgpu/gk20a/clk_gk20a.h
new file mode 100644
index 00000000..d2665259
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/clk_gk20a.h
@@ -0,0 +1,94 @@
1/*
2 * drivers/video/tegra/host/gk20a/clk_gk20a.h
3 *
4 * GK20A Graphics
5 *
6 * Copyright (c) 2011 - 2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21#ifndef _NVHOST_CLK_GK20A_H_
22#define _NVHOST_CLK_GK20A_H_
23
24#include <linux/mutex.h>
25
26#define GPUFREQ_TABLE_END ~(u32)1
27enum {
28 /* only one PLL for gk20a */
29 GK20A_GPC_PLL = 0,
30};
31
32struct pll {
33 u32 id;
34 u32 clk_in; /* MHz */
35 u32 M;
36 u32 N;
37 u32 PL;
38 u32 freq; /* MHz */
39 bool enabled;
40};
41
42struct pll_parms {
43 u32 min_freq, max_freq; /* MHz */
44 u32 min_vco, max_vco; /* MHz */
45 u32 min_u, max_u; /* MHz */
46 u32 min_M, max_M;
47 u32 min_N, max_N;
48 u32 min_PL, max_PL;
49};
50
51struct clk_gk20a {
52 struct gk20a *g;
53 struct clk *tegra_clk;
54 struct pll gpc_pll;
55 u32 pll_delay; /* default PLL settle time */
56 struct mutex clk_mutex;
57 bool sw_ready;
58 bool clk_hw_on;
59};
60
61struct gpufreq_table_data {
62 unsigned int index;
63 unsigned int frequency; /* MHz */
64};
65
66struct gpufreq_table_data *tegra_gpufreq_table_get(void);
67
68unsigned int tegra_gpufreq_table_size_get(void);
69
70int gk20a_init_clk_support(struct gk20a *g);
71
72unsigned long gk20a_clk_get_rate(struct gk20a *g);
73int gk20a_clk_set_rate(struct gk20a *g, unsigned long rate);
74int gk20a_suspend_clk_support(struct gk20a *g);
75struct clk *gk20a_clk_get(struct gk20a *g);
76long gk20a_clk_round_rate(struct gk20a *g, unsigned long rate);
77
78extern struct pll_parms gpc_pll_params;
79
80#define KHZ 1000
81#define MHZ 1000000
82
83static inline unsigned long rate_gpc2clk_to_gpu(unsigned long rate)
84{
85 /* convert the MHz gpc2clk frequency to Hz gpcpll frequency */
86 return (rate * MHZ) / 2;
87}
88static inline unsigned long rate_gpu_to_gpc2clk(unsigned long rate)
89{
90 /* convert the Hz gpcpll frequency to MHz gpc2clk frequency */
91 return (rate * 2) / MHZ;
92}
93
94#endif /* _NVHOST_CLK_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
new file mode 100644
index 00000000..9128959f
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -0,0 +1,240 @@
1/*
2 * GK20A Ctrl
3 *
4 * Copyright (c) 2011-2014, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/highmem.h>
20#include <linux/cdev.h>
21#include <linux/nvhost_gpu_ioctl.h>
22
23#include "gk20a.h"
24
25int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
26{
27 int err;
28 struct gk20a *g;
29
30 gk20a_dbg_fn("");
31
32 g = container_of(inode->i_cdev,
33 struct gk20a, ctrl.cdev);
34
35 filp->private_data = g->dev;
36
37 err = gk20a_get_client(g);
38 if (err) {
39 gk20a_dbg_fn("fail to get channel!");
40 return err;
41 }
42
43 return 0;
44}
45
46int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp)
47{
48 struct platform_device *dev = filp->private_data;
49
50 gk20a_dbg_fn("");
51
52 gk20a_put_client(get_gk20a(dev));
53 return 0;
54}
55
56static long
57gk20a_ctrl_ioctl_gpu_characteristics(
58 struct gk20a *g,
59 struct nvhost_gpu_get_characteristics *request)
60{
61 struct nvhost_gpu_characteristics *pgpu = &g->gpu_characteristics;
62 long err = 0;
63
64 if (request->gpu_characteristics_buf_size > 0) {
65 size_t write_size = sizeof(*pgpu);
66
67 if (write_size > request->gpu_characteristics_buf_size)
68 write_size = request->gpu_characteristics_buf_size;
69
70 err = copy_to_user((void __user *)(uintptr_t)
71 request->gpu_characteristics_buf_addr,
72 pgpu, write_size);
73 }
74
75 if (err == 0)
76 request->gpu_characteristics_buf_size = sizeof(*pgpu);
77
78 return err;
79}
80
81long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
82{
83 struct platform_device *dev = filp->private_data;
84 struct gk20a *g = get_gk20a(dev);
85 struct nvhost_gpu_zcull_get_ctx_size_args *get_ctx_size_args;
86 struct nvhost_gpu_zcull_get_info_args *get_info_args;
87 struct nvhost_gpu_zbc_set_table_args *set_table_args;
88 struct nvhost_gpu_zbc_query_table_args *query_table_args;
89 u8 buf[NVHOST_GPU_IOCTL_MAX_ARG_SIZE];
90 struct gr_zcull_info *zcull_info;
91 struct zbc_entry *zbc_val;
92 struct zbc_query_params *zbc_tbl;
93 int i, err = 0;
94
95 gk20a_dbg_fn("");
96
97 if ((_IOC_TYPE(cmd) != NVHOST_GPU_IOCTL_MAGIC) ||
98 (_IOC_NR(cmd) == 0) ||
99 (_IOC_NR(cmd) > NVHOST_GPU_IOCTL_LAST))
100 return -EFAULT;
101
102 BUG_ON(_IOC_SIZE(cmd) > NVHOST_GPU_IOCTL_MAX_ARG_SIZE);
103
104 if (_IOC_DIR(cmd) & _IOC_WRITE) {
105 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
106 return -EFAULT;
107 }
108
109 if (!g->gr.sw_ready) {
110 err = gk20a_busy(g->dev);
111 if (err)
112 return err;
113
114 gk20a_idle(g->dev);
115 }
116
117 switch (cmd) {
118 case NVHOST_GPU_IOCTL_ZCULL_GET_CTX_SIZE:
119 get_ctx_size_args = (struct nvhost_gpu_zcull_get_ctx_size_args *)buf;
120
121 get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr);
122
123 break;
124 case NVHOST_GPU_IOCTL_ZCULL_GET_INFO:
125 get_info_args = (struct nvhost_gpu_zcull_get_info_args *)buf;
126
127 memset(get_info_args, 0, sizeof(struct nvhost_gpu_zcull_get_info_args));
128
129 zcull_info = kzalloc(sizeof(struct gr_zcull_info), GFP_KERNEL);
130 if (zcull_info == NULL)
131 return -ENOMEM;
132
133 err = gr_gk20a_get_zcull_info(g, &g->gr, zcull_info);
134 if (err) {
135 kfree(zcull_info);
136 break;
137 }
138
139 get_info_args->width_align_pixels = zcull_info->width_align_pixels;
140 get_info_args->height_align_pixels = zcull_info->height_align_pixels;
141 get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots;
142 get_info_args->aliquot_total = zcull_info->aliquot_total;
143 get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier;
144 get_info_args->region_header_size = zcull_info->region_header_size;
145 get_info_args->subregion_header_size = zcull_info->subregion_header_size;
146 get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels;
147 get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels;
148 get_info_args->subregion_count = zcull_info->subregion_count;
149
150 kfree(zcull_info);
151 break;
152 case NVHOST_GPU_IOCTL_ZBC_SET_TABLE:
153 set_table_args = (struct nvhost_gpu_zbc_set_table_args *)buf;
154
155 zbc_val = kzalloc(sizeof(struct zbc_entry), GFP_KERNEL);
156 if (zbc_val == NULL)
157 return -ENOMEM;
158
159 zbc_val->format = set_table_args->format;
160 zbc_val->type = set_table_args->type;
161
162 switch (zbc_val->type) {
163 case GK20A_ZBC_TYPE_COLOR:
164 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
165 zbc_val->color_ds[i] = set_table_args->color_ds[i];
166 zbc_val->color_l2[i] = set_table_args->color_l2[i];
167 }
168 break;
169 case GK20A_ZBC_TYPE_DEPTH:
170 zbc_val->depth = set_table_args->depth;
171 break;
172 default:
173 err = -EINVAL;
174 }
175
176 if (!err) {
177 gk20a_busy(dev);
178 err = gk20a_gr_zbc_set_table(g, &g->gr, zbc_val);
179 gk20a_idle(dev);
180 }
181
182 if (zbc_val)
183 kfree(zbc_val);
184 break;
185 case NVHOST_GPU_IOCTL_ZBC_QUERY_TABLE:
186 query_table_args = (struct nvhost_gpu_zbc_query_table_args *)buf;
187
188 zbc_tbl = kzalloc(sizeof(struct zbc_query_params), GFP_KERNEL);
189 if (zbc_tbl == NULL)
190 return -ENOMEM;
191
192 zbc_tbl->type = query_table_args->type;
193 zbc_tbl->index_size = query_table_args->index_size;
194
195 err = gr_gk20a_query_zbc(g, &g->gr, zbc_tbl);
196
197 if (!err) {
198 switch (zbc_tbl->type) {
199 case GK20A_ZBC_TYPE_COLOR:
200 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
201 query_table_args->color_ds[i] = zbc_tbl->color_ds[i];
202 query_table_args->color_l2[i] = zbc_tbl->color_l2[i];
203 }
204 break;
205 case GK20A_ZBC_TYPE_DEPTH:
206 query_table_args->depth = zbc_tbl->depth;
207 break;
208 case GK20A_ZBC_TYPE_INVALID:
209 query_table_args->index_size = zbc_tbl->index_size;
210 break;
211 default:
212 err = -EINVAL;
213 }
214 if (!err) {
215 query_table_args->format = zbc_tbl->format;
216 query_table_args->ref_cnt = zbc_tbl->ref_cnt;
217 }
218 }
219
220 if (zbc_tbl)
221 kfree(zbc_tbl);
222 break;
223
224 case NVHOST_GPU_IOCTL_GET_CHARACTERISTICS:
225 err = gk20a_ctrl_ioctl_gpu_characteristics(
226 g, (struct nvhost_gpu_get_characteristics *)buf);
227 break;
228
229 default:
230 gk20a_err(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
231 err = -ENOTTY;
232 break;
233 }
234
235 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
236 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
237
238 return err;
239}
240
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h
new file mode 100644
index 00000000..ac9c253e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h
@@ -0,0 +1,28 @@
1/*
2 * drivers/video/tegra/host/gk20a/gk20a_ctrl.h
3 *
4 * GK20A Ctrl
5 *
6 * Copyright (c) 2011-2012, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21#ifndef _NVHOST_GK20A_CTRL_H_
22#define _NVHOST_GK20A_CTRL_H_
23
24int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp);
25int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp);
26long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
27
28#endif /* _NVHOST_GK20A_CTRL_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
new file mode 100644
index 00000000..da7d733e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -0,0 +1,699 @@
1/*
2 * Tegra GK20A GPU Debugger/Profiler Driver
3 *
4 * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/fs.h>
20#include <linux/file.h>
21#include <linux/cdev.h>
22#include <linux/uaccess.h>
23#include <linux/nvhost.h>
24#include <linux/nvhost_dbg_gpu_ioctl.h>
25
26#include "gk20a.h"
27#include "gr_gk20a.h"
28#include "dbg_gpu_gk20a.h"
29#include "regops_gk20a.h"
30#include "hw_therm_gk20a.h"
31
32struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = {
33 .exec_reg_ops = exec_regops_gk20a,
34};
35
36/* silly allocator - just increment session id */
37static atomic_t session_id = ATOMIC_INIT(0);
38static int generate_session_id(void)
39{
40 return atomic_add_return(1, &session_id);
41}
42
43static int alloc_session(struct dbg_session_gk20a **_dbg_s)
44{
45 struct dbg_session_gk20a *dbg_s;
46 *_dbg_s = NULL;
47
48 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
49
50 dbg_s = kzalloc(sizeof(*dbg_s), GFP_KERNEL);
51 if (!dbg_s)
52 return -ENOMEM;
53
54 dbg_s->id = generate_session_id();
55 dbg_s->ops = &dbg_gpu_session_ops_gk20a;
56 *_dbg_s = dbg_s;
57 return 0;
58}
59
60int gk20a_dbg_gpu_do_dev_open(struct inode *inode, struct file *filp, bool is_profiler)
61{
62 struct dbg_session_gk20a *dbg_session;
63 struct gk20a *g;
64
65 struct platform_device *pdev;
66 struct device *dev;
67
68 int err;
69
70 if (!is_profiler)
71 g = container_of(inode->i_cdev,
72 struct gk20a, dbg.cdev);
73 else
74 g = container_of(inode->i_cdev,
75 struct gk20a, prof.cdev);
76 pdev = g->dev;
77 dev = &pdev->dev;
78
79 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", dev_name(dev));
80
81 err = alloc_session(&dbg_session);
82 if (err)
83 return err;
84
85 filp->private_data = dbg_session;
86 dbg_session->pdev = pdev;
87 dbg_session->dev = dev;
88 dbg_session->g = g;
89 dbg_session->is_profiler = is_profiler;
90 dbg_session->is_pg_disabled = false;
91
92 INIT_LIST_HEAD(&dbg_session->dbg_s_list_node);
93 init_waitqueue_head(&dbg_session->dbg_events.wait_queue);
94 dbg_session->dbg_events.events_enabled = false;
95 dbg_session->dbg_events.num_pending_events = 0;
96
97 return 0;
98}
99
100/* used in scenarios where the debugger session can take just the inter-session
101 * lock for performance, but the profiler session must take the per-gpu lock
102 * since it might not have an associated channel. */
103static void gk20a_dbg_session_mutex_lock(struct dbg_session_gk20a *dbg_s)
104{
105 if (dbg_s->is_profiler)
106 mutex_lock(&dbg_s->g->dbg_sessions_lock);
107 else
108 mutex_lock(&dbg_s->ch->dbg_s_lock);
109}
110
111static void gk20a_dbg_session_mutex_unlock(struct dbg_session_gk20a *dbg_s)
112{
113 if (dbg_s->is_profiler)
114 mutex_unlock(&dbg_s->g->dbg_sessions_lock);
115 else
116 mutex_unlock(&dbg_s->ch->dbg_s_lock);
117}
118
119static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s)
120{
121 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
122
123 gk20a_dbg_session_mutex_lock(dbg_s);
124
125 dbg_s->dbg_events.events_enabled = true;
126 dbg_s->dbg_events.num_pending_events = 0;
127
128 gk20a_dbg_session_mutex_unlock(dbg_s);
129}
130
131static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s)
132{
133 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
134
135 gk20a_dbg_session_mutex_lock(dbg_s);
136
137 dbg_s->dbg_events.events_enabled = false;
138 dbg_s->dbg_events.num_pending_events = 0;
139
140 gk20a_dbg_session_mutex_unlock(dbg_s);
141}
142
143static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s)
144{
145 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
146
147 gk20a_dbg_session_mutex_lock(dbg_s);
148
149 if (dbg_s->dbg_events.events_enabled &&
150 dbg_s->dbg_events.num_pending_events > 0)
151 dbg_s->dbg_events.num_pending_events--;
152
153 gk20a_dbg_session_mutex_unlock(dbg_s);
154}
155
156static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
157 struct nvhost_dbg_gpu_events_ctrl_args *args)
158{
159 int ret = 0;
160
161 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd);
162
163 if (!dbg_s->ch) {
164 gk20a_err(dev_from_gk20a(dbg_s->g),
165 "no channel bound to dbg session\n");
166 return -EINVAL;
167 }
168
169 switch (args->cmd) {
170 case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_ENABLE:
171 gk20a_dbg_gpu_events_enable(dbg_s);
172 break;
173
174 case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_DISABLE:
175 gk20a_dbg_gpu_events_disable(dbg_s);
176 break;
177
178 case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_CLEAR:
179 gk20a_dbg_gpu_events_clear(dbg_s);
180 break;
181
182 default:
183 gk20a_err(dev_from_gk20a(dbg_s->g),
184 "unrecognized dbg gpu events ctrl cmd: 0x%x",
185 args->cmd);
186 ret = -EINVAL;
187 break;
188 }
189
190 return ret;
191}
192
193unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
194{
195 unsigned int mask = 0;
196 struct dbg_session_gk20a *dbg_s = filep->private_data;
197
198 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
199
200 poll_wait(filep, &dbg_s->dbg_events.wait_queue, wait);
201
202 gk20a_dbg_session_mutex_lock(dbg_s);
203
204 if (dbg_s->dbg_events.events_enabled &&
205 dbg_s->dbg_events.num_pending_events > 0) {
206 gk20a_dbg(gpu_dbg_gpu_dbg, "found pending event on session id %d",
207 dbg_s->id);
208 gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending",
209 dbg_s->dbg_events.num_pending_events);
210 mask = (POLLPRI | POLLIN);
211 }
212
213 gk20a_dbg_session_mutex_unlock(dbg_s);
214
215 return mask;
216}
217
218int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp)
219{
220 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
221 return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */);
222}
223
224int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp)
225{
226 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
227 return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */);
228}
229
230void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch)
231{
232 struct dbg_session_gk20a *dbg_s;
233
234 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
235
236 /* guard against the session list being modified */
237 mutex_lock(&ch->dbg_s_lock);
238
239 list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
240 if (dbg_s->dbg_events.events_enabled) {
241 gk20a_dbg(gpu_dbg_gpu_dbg, "posting event on session id %d",
242 dbg_s->id);
243 gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending",
244 dbg_s->dbg_events.num_pending_events);
245
246 dbg_s->dbg_events.num_pending_events++;
247
248 wake_up_interruptible_all(&dbg_s->dbg_events.wait_queue);
249 }
250 }
251
252 mutex_unlock(&ch->dbg_s_lock);
253}
254
255
256static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
257 __u32 powermode);
258
259static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s)
260{
261 struct channel_gk20a *ch_gk20a = dbg_s->ch;
262 struct gk20a *g = dbg_s->g;
263
264 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
265
266 /* wasn't bound to start with ? */
267 if (!ch_gk20a) {
268 gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "not bound already?");
269 return -ENODEV;
270 }
271
272 mutex_lock(&g->dbg_sessions_lock);
273 mutex_lock(&ch_gk20a->dbg_s_lock);
274
275 --g->dbg_sessions;
276
277 /* Powergate enable is called here as possibility of dbg_session
278 * which called powergate disable ioctl, to be killed without calling
279 * powergate enable ioctl
280 */
281 dbg_set_powergate(dbg_s, NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE);
282
283 dbg_s->ch = NULL;
284 fput(dbg_s->ch_f);
285 dbg_s->ch_f = NULL;
286
287 list_del_init(&dbg_s->dbg_s_list_node);
288
289 mutex_unlock(&ch_gk20a->dbg_s_lock);
290 mutex_unlock(&g->dbg_sessions_lock);
291
292 return 0;
293}
294
295int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
296{
297 struct dbg_session_gk20a *dbg_s = filp->private_data;
298
299 gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", dev_name(dbg_s->dev));
300
301 /* unbind if it was bound */
302 if (!dbg_s->ch)
303 return 0;
304 dbg_unbind_channel_gk20a(dbg_s);
305
306 kfree(dbg_s);
307 return 0;
308}
309
310static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
311 struct nvhost_dbg_gpu_bind_channel_args *args)
312{
313 struct file *f;
314 struct gk20a *g;
315 struct channel_gk20a *ch;
316
317 gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d",
318 dev_name(dbg_s->dev), args->channel_fd);
319
320 if (args->channel_fd == ~0)
321 return dbg_unbind_channel_gk20a(dbg_s);
322
323 /* even though get_file_channel is doing this it releases it as well */
324 /* by holding it here we'll keep it from disappearing while the
325 * debugger is in session */
326 f = fget(args->channel_fd);
327 if (!f)
328 return -ENODEV;
329
330 ch = gk20a_get_channel_from_file(args->channel_fd);
331 if (!ch) {
332 gk20a_dbg_fn("no channel found for fd");
333 fput(f);
334 return -EINVAL;
335 }
336
337 g = dbg_s->g;
338 gk20a_dbg_fn("%s hwchid=%d", dev_name(dbg_s->dev), ch->hw_chid);
339
340 mutex_lock(&g->dbg_sessions_lock);
341 mutex_lock(&ch->dbg_s_lock);
342
343 dbg_s->ch_f = f;
344 dbg_s->ch = ch;
345 list_add(&dbg_s->dbg_s_list_node, &dbg_s->ch->dbg_s_list);
346
347 g->dbg_sessions++;
348
349 mutex_unlock(&ch->dbg_s_lock);
350 mutex_unlock(&g->dbg_sessions_lock);
351 return 0;
352}
353
354static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
355 struct nvhost_dbg_gpu_exec_reg_ops_args *args);
356
357static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
358 struct nvhost_dbg_gpu_powergate_args *args);
359
360static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
361 struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args);
362
363long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
364 unsigned long arg)
365{
366 struct dbg_session_gk20a *dbg_s = filp->private_data;
367 struct gk20a *g = get_gk20a(dbg_s->pdev);
368 u8 buf[NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE];
369 int err = 0;
370
371 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
372
373 if ((_IOC_TYPE(cmd) != NVHOST_DBG_GPU_IOCTL_MAGIC) ||
374 (_IOC_NR(cmd) == 0) ||
375 (_IOC_NR(cmd) > NVHOST_DBG_GPU_IOCTL_LAST))
376 return -EFAULT;
377
378 BUG_ON(_IOC_SIZE(cmd) > NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE);
379
380 if (_IOC_DIR(cmd) & _IOC_WRITE) {
381 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
382 return -EFAULT;
383 }
384
385 if (!g->gr.sw_ready) {
386 err = gk20a_busy(g->dev);
387 if (err)
388 return err;
389
390 gk20a_idle(g->dev);
391 }
392
393 switch (cmd) {
394 case NVHOST_DBG_GPU_IOCTL_BIND_CHANNEL:
395 err = dbg_bind_channel_gk20a(dbg_s,
396 (struct nvhost_dbg_gpu_bind_channel_args *)buf);
397 gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
398 break;
399
400 case NVHOST_DBG_GPU_IOCTL_REG_OPS:
401 err = nvhost_ioctl_channel_reg_ops(dbg_s,
402 (struct nvhost_dbg_gpu_exec_reg_ops_args *)buf);
403 gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
404 break;
405
406 case NVHOST_DBG_GPU_IOCTL_POWERGATE:
407 err = nvhost_ioctl_powergate_gk20a(dbg_s,
408 (struct nvhost_dbg_gpu_powergate_args *)buf);
409 gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
410 break;
411
412 case NVHOST_DBG_GPU_IOCTL_EVENTS_CTRL:
413 err = gk20a_dbg_gpu_events_ctrl(dbg_s,
414 (struct nvhost_dbg_gpu_events_ctrl_args *)buf);
415 break;
416
417 case NVHOST_DBG_GPU_IOCTL_SMPC_CTXSW_MODE:
418 err = nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s,
419 (struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *)buf);
420 break;
421
422 default:
423 gk20a_err(dev_from_gk20a(g),
424 "unrecognized dbg gpu ioctl cmd: 0x%x",
425 cmd);
426 err = -ENOTTY;
427 break;
428 }
429
430 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
431 err = copy_to_user((void __user *)arg,
432 buf, _IOC_SIZE(cmd));
433
434 return err;
435}
436
437/* In order to perform a context relative op the context has
438 * to be created already... which would imply that the
439 * context switch mechanism has already been put in place.
440 * So by the time we perform such an opertation it should always
441 * be possible to query for the appropriate context offsets, etc.
442 *
443 * But note: while the dbg_gpu bind requires the a channel fd,
444 * it doesn't require an allocated gr/compute obj at that point...
445 */
446static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s,
447 struct gr_gk20a *gr)
448{
449 int err;
450
451 mutex_lock(&gr->ctx_mutex);
452 err = !gr->ctx_vars.golden_image_initialized;
453 mutex_unlock(&gr->ctx_mutex);
454 if (err)
455 return false;
456 return true;
457
458}
459
460static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
461 struct nvhost_dbg_gpu_exec_reg_ops_args *args)
462{
463 int err;
464 struct device *dev = dbg_s->dev;
465 struct gk20a *g = get_gk20a(dbg_s->pdev);
466 struct nvhost_dbg_gpu_reg_op *ops;
467 u64 ops_size = sizeof(ops[0]) * args->num_ops;
468
469 gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size);
470
471 if (!dbg_s->ops) {
472 gk20a_err(dev, "can't call reg_ops on an unbound debugger session");
473 return -EINVAL;
474 }
475
476 if (!dbg_s->is_profiler && !dbg_s->ch) {
477 gk20a_err(dev, "bind a channel before regops for a debugging session");
478 return -EINVAL;
479 }
480
481 /* be sure that ctx info is in place */
482 if (!gr_context_info_available(dbg_s, &g->gr)) {
483 gk20a_err(dev, "gr context data not available\n");
484 return -ENODEV;
485 }
486
487 ops = kzalloc(ops_size, GFP_KERNEL);
488 if (!ops) {
489 gk20a_err(dev, "Allocating memory failed!");
490 return -ENOMEM;
491 }
492
493 gk20a_dbg_fn("Copying regops from userspace");
494
495 if (copy_from_user(ops, (void *)(uintptr_t)args->ops, ops_size)) {
496 dev_err(dev, "copy_from_user failed!");
497 err = -EFAULT;
498 goto clean_up;
499 }
500
501 /* since exec_reg_ops sends methods to the ucode, it must take the
502 * global gpu lock to protect against mixing methods from debug sessions
503 * on other channels */
504 mutex_lock(&g->dbg_sessions_lock);
505
506 err = dbg_s->ops->exec_reg_ops(dbg_s, ops, args->num_ops);
507
508 mutex_unlock(&g->dbg_sessions_lock);
509
510 if (err) {
511 gk20a_err(dev, "dbg regops failed");
512 goto clean_up;
513 }
514
515 gk20a_dbg_fn("Copying result to userspace");
516
517 if (copy_to_user((void *)(uintptr_t)args->ops, ops, ops_size)) {
518 dev_err(dev, "copy_to_user failed!");
519 err = -EFAULT;
520 goto clean_up;
521 }
522 return 0;
523 clean_up:
524 kfree(ops);
525 return err;
526}
527
528static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
529 __u32 powermode)
530{
531 int err = 0;
532 struct gk20a *g = get_gk20a(dbg_s->pdev);
533
534 /* This function must be called with g->dbg_sessions_lock held */
535
536 gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %d",
537 dev_name(dbg_s->dev), powermode);
538
539 switch (powermode) {
540 case NVHOST_DBG_GPU_POWERGATE_MODE_DISABLE:
541 /* save off current powergate, clk state.
542 * set gpu module's can_powergate = 0.
543 * set gpu module's clk to max.
544 * while *a* debug session is active there will be no power or
545 * clocking state changes allowed from mainline code (but they
546 * should be saved).
547 */
548 /* Allow powergate disable if the current dbg_session doesn't
549 * call a powergate disable ioctl and the global
550 * powergating_disabled_refcount is zero
551 */
552
553 if ((dbg_s->is_pg_disabled == false) &&
554 (g->dbg_powergating_disabled_refcount++ == 0)) {
555
556 gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module busy");
557 gk20a_busy(g->dev);
558 gk20a_channel_busy(dbg_s->pdev);
559
560 g->ops.clock_gating.slcg_gr_load_gating_prod(g,
561 false);
562 g->ops.clock_gating.slcg_perf_load_gating_prod(g,
563 false);
564 gr_gk20a_init_blcg_mode(g, BLCG_RUN, ENGINE_GR_GK20A);
565
566 g->elcg_enabled = false;
567 gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
568 gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
569
570 gk20a_pmu_disable_elpg(g);
571 }
572
573 dbg_s->is_pg_disabled = true;
574 break;
575
576 case NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE:
577 /* restore (can) powergate, clk state */
578 /* release pending exceptions to fault/be handled as usual */
579 /*TBD: ordering of these? */
580
581 /* Re-enabling powergate as no other sessions want
582 * powergate disabled and the current dbg-sessions had
583 * requested the powergate disable through ioctl
584 */
585 if (dbg_s->is_pg_disabled &&
586 --g->dbg_powergating_disabled_refcount == 0) {
587
588 g->elcg_enabled = true;
589 gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
590 gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
591 gr_gk20a_init_blcg_mode(g, BLCG_AUTO, ENGINE_GR_GK20A);
592
593 g->ops.clock_gating.slcg_gr_load_gating_prod(g,
594 g->slcg_enabled);
595 g->ops.clock_gating.slcg_perf_load_gating_prod(g,
596 g->slcg_enabled);
597
598 gk20a_pmu_enable_elpg(g);
599
600 gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle");
601 gk20a_channel_idle(dbg_s->pdev);
602 gk20a_idle(g->dev);
603 }
604
605 dbg_s->is_pg_disabled = false;
606 break;
607
608 default:
609 gk20a_err(dev_from_gk20a(g),
610 "unrecognized dbg gpu powergate mode: 0x%x",
611 powermode);
612 err = -ENOTTY;
613 break;
614 }
615
616 return err;
617}
618
619static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
620 struct nvhost_dbg_gpu_powergate_args *args)
621{
622 int err;
623 struct gk20a *g = get_gk20a(dbg_s->pdev);
624 gk20a_dbg_fn("%s powergate mode = %d",
625 dev_name(dbg_s->dev), args->mode);
626
627 mutex_lock(&g->dbg_sessions_lock);
628 err = dbg_set_powergate(dbg_s, args->mode);
629 mutex_unlock(&g->dbg_sessions_lock);
630 return err;
631}
632
633static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
634 struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args)
635{
636 int err;
637 struct gk20a *g = get_gk20a(dbg_s->pdev);
638 struct channel_gk20a *ch_gk20a;
639
640 gk20a_dbg_fn("%s smpc ctxsw mode = %d",
641 dev_name(dbg_s->dev), args->mode);
642
643 /* Take the global lock, since we'll be doing global regops */
644 mutex_lock(&g->dbg_sessions_lock);
645
646 ch_gk20a = dbg_s->ch;
647
648 if (!ch_gk20a) {
649 gk20a_err(dev_from_gk20a(dbg_s->g),
650 "no bound channel for smpc ctxsw mode update\n");
651 err = -EINVAL;
652 goto clean_up;
653 }
654
655 err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a,
656 args->mode == NVHOST_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
657 if (err) {
658 gk20a_err(dev_from_gk20a(dbg_s->g),
659 "error (%d) during smpc ctxsw mode update\n", err);
660 goto clean_up;
661 }
662 /* The following regops are a hack/war to make up for the fact that we
663 * just scribbled into the ctxsw image w/o really knowing whether
664 * it was already swapped out in/out once or not, etc.
665 */
666 {
667 struct nvhost_dbg_gpu_reg_op ops[4];
668 int i;
669 for (i = 0; i < ARRAY_SIZE(ops); i++) {
670 ops[i].op = NVHOST_DBG_GPU_REG_OP_WRITE_32;
671 ops[i].type = NVHOST_DBG_GPU_REG_OP_TYPE_GR_CTX;
672 ops[i].status = NVHOST_DBG_GPU_REG_OP_STATUS_SUCCESS;
673 ops[i].value_hi = 0;
674 ops[i].and_n_mask_lo = 0;
675 ops[i].and_n_mask_hi = 0;
676 }
677 /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control_sel1_r();*/
678 ops[0].offset = 0x00419e08;
679 ops[0].value_lo = 0x1d;
680
681 /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control5_r(); */
682 ops[1].offset = 0x00419e58;
683 ops[1].value_lo = 0x1;
684
685 /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control3_r(); */
686 ops[2].offset = 0x00419e68;
687 ops[2].value_lo = 0xaaaa;
688
689 /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter4_control_r(); */
690 ops[3].offset = 0x00419f40;
691 ops[3].value_lo = 0x18;
692
693 err = dbg_s->ops->exec_reg_ops(dbg_s, ops, ARRAY_SIZE(ops));
694 }
695
696 clean_up:
697 mutex_unlock(&g->dbg_sessions_lock);
698 return err;
699}
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
new file mode 100644
index 00000000..49827608
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
@@ -0,0 +1,83 @@
1/*
2 * Tegra GK20A GPU Debugger Driver
3 *
4 * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18#ifndef __DBG_GPU_GK20A_H_
19#define __DBG_GPU_GK20A_H_
20#include <linux/poll.h>
21
22/* module debug driver interface */
23int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp);
24int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp);
25long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
26unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait);
27
28/* used by profiler driver interface */
29int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp);
30
31/* used by the interrupt handler to post events */
32void gk20a_dbg_gpu_post_events(struct channel_gk20a *fault_ch);
33
34struct dbg_gpu_session_ops {
35 int (*exec_reg_ops)(struct dbg_session_gk20a *dbg_s,
36 struct nvhost_dbg_gpu_reg_op *ops,
37 u64 num_ops);
38};
39
40struct dbg_gpu_session_events {
41 wait_queue_head_t wait_queue;
42 bool events_enabled;
43 int num_pending_events;
44};
45
46struct dbg_session_gk20a {
47 /* dbg session id used for trace/prints */
48 int id;
49
50 /* profiler session, if any */
51 bool is_profiler;
52
53 /* power enabled or disabled */
54 bool is_pg_disabled;
55
56 /*
57 * There can be different versions of the whitelists
58 * between both global and per-context sets; as well
59 * as between debugger and profiler interfaces.
60 */
61 struct regops_whitelist *global;
62 struct regops_whitelist *per_context;
63
64 /* gpu module vagaries */
65 struct device *dev;
66 struct platform_device *pdev;
67 struct gk20a *g;
68
69 /* bound channel, if any */
70 struct file *ch_f;
71 struct channel_gk20a *ch;
72
73 /* session operations */
74 struct dbg_gpu_session_ops *ops;
75
76 /* event support */
77 struct dbg_gpu_session_events dbg_events;
78 struct list_head dbg_s_list_node;
79};
80
81extern struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a;
82
83#endif /* __DBG_GPU_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
new file mode 100644
index 00000000..c5b6953c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -0,0 +1,295 @@
1/*
2 * drivers/video/tegra/host/t20/debug_gk20a.c
3 *
4 * Copyright (C) 2011-2014 NVIDIA Corporation. All rights reserved.
5 *
6 * This software is licensed under the terms of the GNU General Public
7 * License version 2, as published by the Free Software Foundation, and
8 * may be copied, distributed, and modified under those terms.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 */
16
17#include <linux/nvhost.h>
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21#include <linux/io.h>
22
23#include "gk20a.h"
24#include "debug_gk20a.h"
25
26#include "hw_ram_gk20a.h"
27#include "hw_fifo_gk20a.h"
28#include "hw_ccsr_gk20a.h"
29#include "hw_pbdma_gk20a.h"
30
31unsigned int gk20a_debug_trace_cmdbuf;
32struct platform_device *gk20a_device;
33
34struct gk20a_debug_output {
35 void (*fn)(void *ctx, const char *str, size_t len);
36 void *ctx;
37 char buf[256];
38};
39
40static const char * const ccsr_chan_status_str[] = {
41 "idle",
42 "pending",
43 "pending_ctx_reload",
44 "pending_acquire",
45 "pending_acq_ctx_reload",
46 "on_pbdma",
47 "on_pbdma_and_eng",
48 "on_eng",
49 "on_eng_pending_acquire",
50 "on_eng_pending",
51 "on_pbdma_ctx_reload",
52 "on_pbdma_and_eng_ctx_reload",
53 "on_eng_ctx_reload",
54 "on_eng_pending_ctx_reload",
55 "on_eng_pending_acq_ctx_reload",
56};
57
58static const char * const chan_status_str[] = {
59 "invalid",
60 "valid",
61 "chsw_load",
62 "chsw_save",
63 "chsw_switch",
64};
65
66static const char * const ctx_status_str[] = {
67 "invalid",
68 "valid",
69 NULL,
70 NULL,
71 NULL,
72 "ctxsw_load",
73 "ctxsw_save",
74 "ctxsw_switch",
75};
76
77static inline void gk20a_debug_write_printk(void *ctx, const char *str,
78 size_t len)
79{
80 pr_info("%s", str);
81}
82
83static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
84 size_t len)
85{
86 seq_write((struct seq_file *)ctx, str, len);
87}
88
89void gk20a_debug_output(struct gk20a_debug_output *o, const char *fmt, ...)
90{
91 va_list args;
92 int len;
93
94 va_start(args, fmt);
95 len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
96 va_end(args);
97 o->fn(o->ctx, o->buf, len);
98}
99
100static void gk20a_debug_show_channel(struct gk20a *g,
101 struct gk20a_debug_output *o,
102 struct channel_gk20a *ch)
103{
104 u32 channel = gk20a_readl(g, ccsr_channel_r(ch->hw_chid));
105 u32 status = ccsr_channel_status_v(channel);
106 u32 syncpointa, syncpointb;
107 void *inst_ptr;
108
109 inst_ptr = ch->inst_block.cpuva;
110 if (!inst_ptr)
111 return;
112
113 syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w());
114 syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w());
115
116 gk20a_debug_output(o, "%d-%s, pid %d: ", ch->hw_chid,
117 ch->g->dev->name,
118 ch->pid);
119 gk20a_debug_output(o, "%s in use %s %s\n",
120 ccsr_channel_enable_v(channel) ? "" : "not",
121 ccsr_chan_status_str[status],
122 ccsr_channel_busy_v(channel) ? "busy" : "not busy");
123 gk20a_debug_output(o, "TOP: %016llx PUT: %016llx GET: %016llx "
124 "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n"
125 "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n",
126 (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_top_level_get_w()) +
127 ((u64)gk20a_mem_rd32(inst_ptr,
128 ram_fc_pb_top_level_get_hi_w()) << 32ULL),
129 (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_w()) +
130 ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_hi_w()) << 32ULL),
131 (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_w()) +
132 ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_hi_w()) << 32ULL),
133 (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_w()) +
134 ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_hi_w()) << 32ULL),
135 gk20a_mem_rd32(inst_ptr, ram_fc_pb_header_w()),
136 gk20a_mem_rd32(inst_ptr, ram_fc_pb_count_w()),
137 syncpointa,
138 syncpointb,
139 gk20a_mem_rd32(inst_ptr, ram_fc_semaphorea_w()),
140 gk20a_mem_rd32(inst_ptr, ram_fc_semaphoreb_w()),
141 gk20a_mem_rd32(inst_ptr, ram_fc_semaphorec_w()),
142 gk20a_mem_rd32(inst_ptr, ram_fc_semaphored_w()));
143
144 if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v())
145 && (pbdma_syncpointb_wait_switch_v(syncpointb) ==
146 pbdma_syncpointb_wait_switch_en_v()))
147 gk20a_debug_output(o, "Waiting on syncpt %u (%s) val %u\n",
148 pbdma_syncpointb_syncpt_index_v(syncpointb),
149 nvhost_syncpt_get_name(
150 to_platform_device(g->dev->dev.parent),
151 pbdma_syncpointb_syncpt_index_v(syncpointb)),
152 pbdma_syncpointa_payload_v(syncpointa));
153
154 gk20a_debug_output(o, "\n");
155}
156
157void gk20a_debug_show_dump(struct platform_device *pdev,
158 struct gk20a_debug_output *o)
159{
160 struct gk20a_platform *platform = gk20a_get_platform(pdev);
161 struct gk20a *g = platform->g;
162 struct fifo_gk20a *f = &g->fifo;
163 u32 chid;
164 int i;
165
166 gk20a_busy(g->dev);
167 for (i = 0; i < fifo_pbdma_status__size_1_v(); i++) {
168 u32 status = gk20a_readl(g, fifo_pbdma_status_r(i));
169 u32 chan_status = fifo_pbdma_status_chan_status_v(status);
170
171 gk20a_debug_output(o, "%s pbdma %d: ", g->dev->name, i);
172 gk20a_debug_output(o,
173 "id: %d (%s), next_id: %d (%s) status: %s\n",
174 fifo_pbdma_status_id_v(status),
175 fifo_pbdma_status_id_type_v(status) ?
176 "tsg" : "channel",
177 fifo_pbdma_status_next_id_v(status),
178 fifo_pbdma_status_next_id_type_v(status) ?
179 "tsg" : "channel",
180 chan_status_str[chan_status]);
181 gk20a_debug_output(o, "PUT: %016llx GET: %016llx "
182 "FETCH: %08x HEADER: %08x\n",
183 (u64)gk20a_readl(g, pbdma_put_r(i)) +
184 ((u64)gk20a_readl(g, pbdma_put_hi_r(i)) << 32ULL),
185 (u64)gk20a_readl(g, pbdma_get_r(i)) +
186 ((u64)gk20a_readl(g, pbdma_get_hi_r(i)) << 32ULL),
187 gk20a_readl(g, pbdma_gp_fetch_r(i)),
188 gk20a_readl(g, pbdma_pb_header_r(i)));
189 }
190 gk20a_debug_output(o, "\n");
191
192 for (i = 0; i < fifo_engine_status__size_1_v(); i++) {
193 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
194 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
195
196 gk20a_debug_output(o, "%s eng %d: ", g->dev->name, i);
197 gk20a_debug_output(o,
198 "id: %d (%s), next_id: %d (%s), ctx: %s ",
199 fifo_engine_status_id_v(status),
200 fifo_engine_status_id_type_v(status) ?
201 "tsg" : "channel",
202 fifo_engine_status_next_id_v(status),
203 fifo_engine_status_next_id_type_v(status) ?
204 "tsg" : "channel",
205 ctx_status_str[ctx_status]);
206
207 if (fifo_engine_status_faulted_v(status))
208 gk20a_debug_output(o, "faulted ");
209 if (fifo_engine_status_engine_v(status))
210 gk20a_debug_output(o, "busy ");
211 gk20a_debug_output(o, "\n");
212 }
213 gk20a_debug_output(o, "\n");
214
215 for (chid = 0; chid < f->num_channels; chid++) {
216 if (f->channel[chid].in_use) {
217 struct channel_gk20a *gpu_ch = &f->channel[chid];
218 gk20a_debug_show_channel(g, o, gpu_ch);
219 }
220 }
221 gk20a_idle(g->dev);
222}
223
224void gk20a_debug_dump(struct platform_device *pdev)
225{
226 struct gk20a_platform *platform = gk20a_get_platform(pdev);
227 struct gk20a_debug_output o = {
228 .fn = gk20a_debug_write_printk
229 };
230
231 if (platform->dump_platform_dependencies)
232 platform->dump_platform_dependencies(pdev);
233
234 gk20a_debug_show_dump(pdev, &o);
235}
236
237void gk20a_debug_dump_device(struct platform_device *pdev)
238{
239 struct gk20a_debug_output o = {
240 .fn = gk20a_debug_write_printk
241 };
242
243 /* Dump the first device if no info is provided */
244 if (!pdev && gk20a_device)
245 pdev = gk20a_device;
246
247 gk20a_debug_show_dump(pdev, &o);
248}
249EXPORT_SYMBOL(gk20a_debug_dump_device);
250
251static int gk20a_debug_show(struct seq_file *s, void *unused)
252{
253 struct platform_device *pdev = s->private;
254 struct gk20a_debug_output o = {
255 .fn = gk20a_debug_write_to_seqfile,
256 .ctx = s,
257 };
258 gk20a_debug_show_dump(pdev, &o);
259 return 0;
260}
261
262static int gk20a_debug_open(struct inode *inode, struct file *file)
263{
264 return single_open(file, gk20a_debug_show, inode->i_private);
265}
266
267static const struct file_operations gk20a_debug_fops = {
268 .open = gk20a_debug_open,
269 .read = seq_read,
270 .llseek = seq_lseek,
271 .release = single_release,
272};
273
274void gk20a_debug_init(struct platform_device *pdev)
275{
276 struct gk20a_platform *platform = platform_get_drvdata(pdev);
277
278 /* Store the first device */
279 if (!gk20a_device)
280 gk20a_device = pdev;
281
282 platform->debugfs = debugfs_create_dir(pdev->name, NULL);
283
284 debugfs_create_file("status", S_IRUGO, platform->debugfs,
285 pdev, &gk20a_debug_fops);
286 debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, platform->debugfs,
287 &gk20a_debug_trace_cmdbuf);
288
289#if defined(GK20A_DEBUG)
290 debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR, platform->debugfs,
291 &gk20a_dbg_mask);
292 debugfs_create_u32("dbg_ftrace", S_IRUGO|S_IWUSR, platform->debugfs,
293 &gk20a_dbg_ftrace);
294#endif
295}
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.h b/drivers/gpu/nvgpu/gk20a/debug_gk20a.h
new file mode 100644
index 00000000..cd2e09c3
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.h
@@ -0,0 +1,25 @@
1/*
2 * GK20A Debug functionality
3 *
4 * Copyright (C) 2011-2014 NVIDIA CORPORATION. All rights reserved.
5 *
6 * This software is licensed under the terms of the GNU General Public
7 * License version 2, as published by the Free Software Foundation, and
8 * may be copied, distributed, and modified under those terms.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 */
16
17#ifndef _DEBUG_GK20A_H_
18#define _DEBUG_GK20A_H_
19
20extern unsigned int gk20a_debug_trace_cmdbuf;
21
22void gk20a_debug_dump(struct platform_device *pdev);
23void gk20a_debug_init(struct platform_device *pdev);
24
25#endif
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
new file mode 100644
index 00000000..52f2db4d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
@@ -0,0 +1,37 @@
1/*
2 * GK20A memory interface
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/types.h>
17
18#include "gk20a.h"
19#include "kind_gk20a.h"
20#include "hw_mc_gk20a.h"
21
22static void fb_gk20a_reset(struct gk20a *g)
23{
24 gk20a_dbg_info("reset gk20a fb");
25
26 gk20a_reset(g, mc_enable_pfb_enabled_f()
27 | mc_enable_l2_enabled_f()
28 | mc_enable_xbar_enabled_f()
29 | mc_enable_hub_enabled_f());
30}
31
32void gk20a_init_fb(struct gpu_ops *gops)
33{
34 gops->fb.reset = fb_gk20a_reset;
35 gk20a_init_uncompressed_kind_map();
36 gk20a_init_kind_attr();
37}
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.h b/drivers/gpu/nvgpu/gk20a/fb_gk20a.h
new file mode 100644
index 00000000..34c21c9b
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.h
@@ -0,0 +1,21 @@
1/*
2 * GK20A FB
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef _NVHOST_GK20A_FB
17#define _NVHOST_GK20A_FB
18struct gk20a;
19
20void gk20a_init_fb(struct gpu_ops *gops);
21#endif
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
new file mode 100644
index 00000000..5575b995
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -0,0 +1,1836 @@
1/*
2 * drivers/video/tegra/host/gk20a/fifo_gk20a.c
3 *
4 * GK20A Graphics FIFO (gr host)
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21#include <linux/delay.h>
22#include <linux/slab.h>
23#include <linux/scatterlist.h>
24#include <trace/events/gk20a.h>
25#include <linux/dma-mapping.h>
26#include <linux/nvhost.h>
27
28#include "gk20a.h"
29#include "debug_gk20a.h"
30#include "hw_fifo_gk20a.h"
31#include "hw_pbdma_gk20a.h"
32#include "hw_ccsr_gk20a.h"
33#include "hw_ram_gk20a.h"
34#include "hw_proj_gk20a.h"
35#include "hw_top_gk20a.h"
36#include "hw_mc_gk20a.h"
37#include "hw_gr_gk20a.h"
38
39static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
40 u32 hw_chid, bool add,
41 bool wait_for_finish);
42static void gk20a_fifo_handle_mmu_fault_thread(struct work_struct *work);
43
44/*
45 * Link engine IDs to MMU IDs and vice versa.
46 */
47
48static inline u32 gk20a_engine_id_to_mmu_id(u32 engine_id)
49{
50 switch (engine_id) {
51 case ENGINE_GR_GK20A:
52 return 0x00;
53 case ENGINE_CE2_GK20A:
54 return 0x1b;
55 default:
56 return ~0;
57 }
58}
59
60static inline u32 gk20a_mmu_id_to_engine_id(u32 engine_id)
61{
62 switch (engine_id) {
63 case 0x00:
64 return ENGINE_GR_GK20A;
65 case 0x1b:
66 return ENGINE_CE2_GK20A;
67 default:
68 return ~0;
69 }
70}
71
72
73static int init_engine_info(struct fifo_gk20a *f)
74{
75 struct gk20a *g = f->g;
76 struct device *d = dev_from_gk20a(g);
77 struct fifo_engine_info_gk20a *gr_info;
78 const u32 gr_sw_id = ENGINE_GR_GK20A;
79 u32 i;
80 u32 max_info_entries = top_device_info__size_1_v();
81
82 gk20a_dbg_fn("");
83
84 /* all we really care about finding is the graphics entry */
85 /* especially early on in sim it probably thinks it has more */
86 f->num_engines = 1;
87
88 gr_info = f->engine_info + gr_sw_id;
89
90 gr_info->sw_id = gr_sw_id;
91 gr_info->name = "gr";
92 gr_info->dev_info_id = top_device_info_type_enum_graphics_v();
93 gr_info->mmu_fault_id = fifo_intr_mmu_fault_eng_id_graphics_v();
94 gr_info->runlist_id = ~0;
95 gr_info->pbdma_id = ~0;
96 gr_info->engine_id = ~0;
97
98 for (i = 0; i < max_info_entries; i++) {
99 u32 table_entry = gk20a_readl(f->g, top_device_info_r(i));
100 u32 entry = top_device_info_entry_v(table_entry);
101 u32 engine_enum = top_device_info_type_enum_v(table_entry);
102 u32 table_entry2 = 0;
103
104 if (entry == top_device_info_entry_not_valid_v())
105 continue;
106
107 if (top_device_info_chain_v(table_entry) ==
108 top_device_info_chain_enable_v()) {
109
110 table_entry2 = gk20a_readl(f->g,
111 top_device_info_r(++i));
112
113 engine_enum = top_device_info_type_enum_v(table_entry2);
114 }
115
116 /* we only care about GR engine here */
117 if (entry == top_device_info_entry_enum_v() &&
118 engine_enum == gr_info->dev_info_id) {
119 int pbdma_id;
120 u32 runlist_bit;
121
122 gr_info->runlist_id =
123 top_device_info_runlist_enum_v(table_entry);
124 gk20a_dbg_info("gr info: runlist_id %d", gr_info->runlist_id);
125
126 gr_info->engine_id =
127 top_device_info_engine_enum_v(table_entry);
128 gk20a_dbg_info("gr info: engine_id %d", gr_info->engine_id);
129
130 runlist_bit = 1 << gr_info->runlist_id;
131
132 for (pbdma_id = 0; pbdma_id < f->num_pbdma; pbdma_id++) {
133 gk20a_dbg_info("gr info: pbdma_map[%d]=%d",
134 pbdma_id, f->pbdma_map[pbdma_id]);
135 if (f->pbdma_map[pbdma_id] & runlist_bit)
136 break;
137 }
138
139 if (pbdma_id == f->num_pbdma) {
140 gk20a_err(d, "busted pbmda map");
141 return -EINVAL;
142 }
143 gr_info->pbdma_id = pbdma_id;
144
145 break;
146 }
147 }
148
149 if (gr_info->runlist_id == ~0) {
150 gk20a_err(d, "busted device info");
151 return -EINVAL;
152 }
153
154 return 0;
155}
156
157void gk20a_remove_fifo_support(struct fifo_gk20a *f)
158{
159 struct gk20a *g = f->g;
160 struct device *d = dev_from_gk20a(g);
161 struct fifo_engine_info_gk20a *engine_info;
162 struct fifo_runlist_info_gk20a *runlist;
163 u32 runlist_id;
164 u32 i;
165
166 gk20a_dbg_fn("");
167
168 if (f->channel) {
169 int c;
170 for (c = 0; c < f->num_channels; c++) {
171 if (f->channel[c].remove_support)
172 f->channel[c].remove_support(f->channel+c);
173 }
174 kfree(f->channel);
175 }
176 if (f->userd.gpu_va)
177 gk20a_gmmu_unmap(&g->mm.bar1.vm,
178 f->userd.gpu_va,
179 f->userd.size,
180 gk20a_mem_flag_none);
181
182 if (f->userd.sgt)
183 gk20a_free_sgtable(&f->userd.sgt);
184
185 if (f->userd.cpuva)
186 dma_free_coherent(d,
187 f->userd_total_size,
188 f->userd.cpuva,
189 f->userd.iova);
190 f->userd.cpuva = NULL;
191 f->userd.iova = 0;
192
193 engine_info = f->engine_info + ENGINE_GR_GK20A;
194 runlist_id = engine_info->runlist_id;
195 runlist = &f->runlist_info[runlist_id];
196
197 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
198 if (runlist->mem[i].cpuva)
199 dma_free_coherent(d,
200 runlist->mem[i].size,
201 runlist->mem[i].cpuva,
202 runlist->mem[i].iova);
203 runlist->mem[i].cpuva = NULL;
204 runlist->mem[i].iova = 0;
205 }
206
207 kfree(runlist->active_channels);
208
209 kfree(f->runlist_info);
210 kfree(f->pbdma_map);
211 kfree(f->engine_info);
212}
213
214/* reads info from hardware and fills in pbmda exception info record */
215static inline void get_exception_pbdma_info(
216 struct gk20a *g,
217 struct fifo_engine_info_gk20a *eng_info)
218{
219 struct fifo_pbdma_exception_info_gk20a *e =
220 &eng_info->pbdma_exception_info;
221
222 u32 pbdma_status_r = e->status_r = gk20a_readl(g,
223 fifo_pbdma_status_r(eng_info->pbdma_id));
224 e->id = fifo_pbdma_status_id_v(pbdma_status_r); /* vs. id_hw_v()? */
225 e->id_is_chid = fifo_pbdma_status_id_type_v(pbdma_status_r) ==
226 fifo_pbdma_status_id_type_chid_v();
227 e->chan_status_v = fifo_pbdma_status_chan_status_v(pbdma_status_r);
228 e->next_id_is_chid =
229 fifo_pbdma_status_next_id_type_v(pbdma_status_r) ==
230 fifo_pbdma_status_next_id_type_chid_v();
231 e->next_id = fifo_pbdma_status_next_id_v(pbdma_status_r);
232 e->chsw_in_progress =
233 fifo_pbdma_status_chsw_v(pbdma_status_r) ==
234 fifo_pbdma_status_chsw_in_progress_v();
235}
236
237static void fifo_pbdma_exception_status(struct gk20a *g,
238 struct fifo_engine_info_gk20a *eng_info)
239{
240 struct fifo_pbdma_exception_info_gk20a *e;
241 get_exception_pbdma_info(g, eng_info);
242 e = &eng_info->pbdma_exception_info;
243
244 gk20a_dbg_fn("pbdma_id %d, "
245 "id_type %s, id %d, chan_status %d, "
246 "next_id_type %s, next_id %d, "
247 "chsw_in_progress %d",
248 eng_info->pbdma_id,
249 e->id_is_chid ? "chid" : "tsgid", e->id, e->chan_status_v,
250 e->next_id_is_chid ? "chid" : "tsgid", e->next_id,
251 e->chsw_in_progress);
252}
253
254/* reads info from hardware and fills in pbmda exception info record */
255static inline void get_exception_engine_info(
256 struct gk20a *g,
257 struct fifo_engine_info_gk20a *eng_info)
258{
259 struct fifo_engine_exception_info_gk20a *e =
260 &eng_info->engine_exception_info;
261 u32 engine_status_r = e->status_r =
262 gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
263 e->id = fifo_engine_status_id_v(engine_status_r); /* vs. id_hw_v()? */
264 e->id_is_chid = fifo_engine_status_id_type_v(engine_status_r) ==
265 fifo_engine_status_id_type_chid_v();
266 e->ctx_status_v = fifo_engine_status_ctx_status_v(engine_status_r);
267 e->faulted =
268 fifo_engine_status_faulted_v(engine_status_r) ==
269 fifo_engine_status_faulted_true_v();
270 e->idle =
271 fifo_engine_status_engine_v(engine_status_r) ==
272 fifo_engine_status_engine_idle_v();
273 e->ctxsw_in_progress =
274 fifo_engine_status_ctxsw_v(engine_status_r) ==
275 fifo_engine_status_ctxsw_in_progress_v();
276}
277
278static void fifo_engine_exception_status(struct gk20a *g,
279 struct fifo_engine_info_gk20a *eng_info)
280{
281 struct fifo_engine_exception_info_gk20a *e;
282 get_exception_engine_info(g, eng_info);
283 e = &eng_info->engine_exception_info;
284
285 gk20a_dbg_fn("engine_id %d, id_type %s, id %d, ctx_status %d, "
286 "faulted %d, idle %d, ctxsw_in_progress %d, ",
287 eng_info->engine_id, e->id_is_chid ? "chid" : "tsgid",
288 e->id, e->ctx_status_v,
289 e->faulted, e->idle, e->ctxsw_in_progress);
290}
291
292static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
293{
294 struct fifo_engine_info_gk20a *engine_info;
295 struct fifo_runlist_info_gk20a *runlist;
296 struct device *d = dev_from_gk20a(g);
297 u32 runlist_id;
298 u32 i;
299 u64 runlist_size;
300
301 gk20a_dbg_fn("");
302
303 f->max_runlists = fifo_eng_runlist_base__size_1_v();
304 f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) *
305 f->max_runlists, GFP_KERNEL);
306 if (!f->runlist_info)
307 goto clean_up;
308
309 engine_info = f->engine_info + ENGINE_GR_GK20A;
310 runlist_id = engine_info->runlist_id;
311 runlist = &f->runlist_info[runlist_id];
312
313 runlist->active_channels =
314 kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
315 GFP_KERNEL);
316 if (!runlist->active_channels)
317 goto clean_up_runlist_info;
318
319 runlist_size = ram_rl_entry_size_v() * f->num_channels;
320 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
321 dma_addr_t iova;
322
323 runlist->mem[i].cpuva =
324 dma_alloc_coherent(d,
325 runlist_size,
326 &iova,
327 GFP_KERNEL);
328 if (!runlist->mem[i].cpuva) {
329 dev_err(d, "memory allocation failed\n");
330 goto clean_up_runlist;
331 }
332 runlist->mem[i].iova = iova;
333 runlist->mem[i].size = runlist_size;
334 }
335 mutex_init(&runlist->mutex);
336 init_waitqueue_head(&runlist->runlist_wq);
337
338 /* None of buffers is pinned if this value doesn't change.
339 Otherwise, one of them (cur_buffer) must have been pinned. */
340 runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
341
342 gk20a_dbg_fn("done");
343 return 0;
344
345clean_up_runlist:
346 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
347 if (runlist->mem[i].cpuva)
348 dma_free_coherent(d,
349 runlist->mem[i].size,
350 runlist->mem[i].cpuva,
351 runlist->mem[i].iova);
352 runlist->mem[i].cpuva = NULL;
353 runlist->mem[i].iova = 0;
354 }
355
356 kfree(runlist->active_channels);
357 runlist->active_channels = NULL;
358
359clean_up_runlist_info:
360 kfree(f->runlist_info);
361 f->runlist_info = NULL;
362
363clean_up:
364 gk20a_dbg_fn("fail");
365 return -ENOMEM;
366}
367
368#define GRFIFO_TIMEOUT_CHECK_PERIOD_US 100000
369
370int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
371{
372 u32 intr_stall;
373 u32 mask;
374 u32 timeout;
375 int i;
376
377 gk20a_dbg_fn("");
378 /* enable pmc pfifo */
379 gk20a_reset(g, mc_enable_pfifo_enabled_f()
380 | mc_enable_ce2_enabled_f());
381
382 /* enable pbdma */
383 mask = 0;
384 for (i = 0; i < proj_host_num_pbdma_v(); ++i)
385 mask |= mc_enable_pb_sel_f(mc_enable_pb_0_enabled_v(), i);
386 gk20a_writel(g, mc_enable_pb_r(), mask);
387
388 /* enable pfifo interrupt */
389 gk20a_writel(g, fifo_intr_0_r(), 0xFFFFFFFF);
390 gk20a_writel(g, fifo_intr_en_0_r(), 0x7FFFFFFF);
391 gk20a_writel(g, fifo_intr_en_1_r(), 0x80000000);
392
393 /* enable pbdma interrupt */
394 mask = 0;
395 for (i = 0; i < proj_host_num_pbdma_v(); i++) {
396 intr_stall = gk20a_readl(g, pbdma_intr_stall_r(i));
397 intr_stall &= ~pbdma_intr_stall_lbreq_enabled_f();
398 gk20a_writel(g, pbdma_intr_stall_r(i), intr_stall);
399 gk20a_writel(g, pbdma_intr_0_r(i), 0xFFFFFFFF);
400 gk20a_writel(g, pbdma_intr_en_0_r(i),
401 (~0) & ~pbdma_intr_en_0_lbreq_enabled_f());
402 gk20a_writel(g, pbdma_intr_1_r(i), 0xFFFFFFFF);
403 gk20a_writel(g, pbdma_intr_en_1_r(i), 0xFFFFFFFF);
404 }
405
406 /* TBD: apply overrides */
407
408 /* TBD: BLCG prod */
409
410 /* reset runlist interrupts */
411 gk20a_writel(g, fifo_intr_runlist_r(), ~0);
412
413 /* TBD: do we need those? */
414 timeout = gk20a_readl(g, fifo_fb_timeout_r());
415 timeout = set_field(timeout, fifo_fb_timeout_period_m(),
416 fifo_fb_timeout_period_max_f());
417 gk20a_writel(g, fifo_fb_timeout_r(), timeout);
418
419 if (tegra_platform_is_silicon()) {
420 timeout = gk20a_readl(g, fifo_pb_timeout_r());
421 timeout &= ~fifo_pb_timeout_detection_enabled_f();
422 gk20a_writel(g, fifo_pb_timeout_r(), timeout);
423 }
424
425 timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US |
426 fifo_eng_timeout_detection_enabled_f();
427 gk20a_writel(g, fifo_eng_timeout_r(), timeout);
428
429 gk20a_dbg_fn("done");
430
431 return 0;
432}
433
434static void gk20a_init_fifo_pbdma_intr_descs(struct fifo_gk20a *f)
435{
436 /* These are all errors which indicate something really wrong
437 * going on in the device. */
438 f->intr.pbdma.device_fatal_0 =
439 pbdma_intr_0_memreq_pending_f() |
440 pbdma_intr_0_memack_timeout_pending_f() |
441 pbdma_intr_0_memack_extra_pending_f() |
442 pbdma_intr_0_memdat_timeout_pending_f() |
443 pbdma_intr_0_memdat_extra_pending_f() |
444 pbdma_intr_0_memflush_pending_f() |
445 pbdma_intr_0_memop_pending_f() |
446 pbdma_intr_0_lbconnect_pending_f() |
447 pbdma_intr_0_lbreq_pending_f() |
448 pbdma_intr_0_lback_timeout_pending_f() |
449 pbdma_intr_0_lback_extra_pending_f() |
450 pbdma_intr_0_lbdat_timeout_pending_f() |
451 pbdma_intr_0_lbdat_extra_pending_f() |
452 pbdma_intr_0_xbarconnect_pending_f() |
453 pbdma_intr_0_pri_pending_f();
454
455 /* These are data parsing, framing errors or others which can be
456 * recovered from with intervention... or just resetting the
457 * channel. */
458 f->intr.pbdma.channel_fatal_0 =
459 pbdma_intr_0_gpfifo_pending_f() |
460 pbdma_intr_0_gpptr_pending_f() |
461 pbdma_intr_0_gpentry_pending_f() |
462 pbdma_intr_0_gpcrc_pending_f() |
463 pbdma_intr_0_pbptr_pending_f() |
464 pbdma_intr_0_pbentry_pending_f() |
465 pbdma_intr_0_pbcrc_pending_f() |
466 pbdma_intr_0_method_pending_f() |
467 pbdma_intr_0_methodcrc_pending_f() |
468 pbdma_intr_0_pbseg_pending_f() |
469 pbdma_intr_0_signature_pending_f();
470
471 /* Can be used for sw-methods, or represents
472 * a recoverable timeout. */
473 f->intr.pbdma.restartable_0 =
474 pbdma_intr_0_device_pending_f() |
475 pbdma_intr_0_acquire_pending_f();
476}
477
478static int gk20a_init_fifo_setup_sw(struct gk20a *g)
479{
480 struct fifo_gk20a *f = &g->fifo;
481 struct device *d = dev_from_gk20a(g);
482 int chid, i, err = 0;
483 dma_addr_t iova;
484
485 gk20a_dbg_fn("");
486
487 if (f->sw_ready) {
488 gk20a_dbg_fn("skip init");
489 return 0;
490 }
491
492 f->g = g;
493
494 INIT_WORK(&f->fault_restore_thread,
495 gk20a_fifo_handle_mmu_fault_thread);
496 mutex_init(&f->intr.isr.mutex);
497 gk20a_init_fifo_pbdma_intr_descs(f); /* just filling in data/tables */
498
499 f->num_channels = ccsr_channel__size_1_v();
500 f->num_pbdma = proj_host_num_pbdma_v();
501 f->max_engines = ENGINE_INVAL_GK20A;
502
503 f->userd_entry_size = 1 << ram_userd_base_shift_v();
504 f->userd_total_size = f->userd_entry_size * f->num_channels;
505
506 f->userd.cpuva = dma_alloc_coherent(d,
507 f->userd_total_size,
508 &iova,
509 GFP_KERNEL);
510 if (!f->userd.cpuva) {
511 dev_err(d, "memory allocation failed\n");
512 goto clean_up;
513 }
514
515 f->userd.iova = iova;
516 err = gk20a_get_sgtable(d, &f->userd.sgt,
517 f->userd.cpuva, f->userd.iova,
518 f->userd_total_size);
519 if (err) {
520 dev_err(d, "failed to create sg table\n");
521 goto clean_up;
522 }
523
524 /* bar1 va */
525 f->userd.gpu_va = gk20a_gmmu_map(&g->mm.bar1.vm,
526 &f->userd.sgt,
527 f->userd_total_size,
528 0, /* flags */
529 gk20a_mem_flag_none);
530 if (!f->userd.gpu_va) {
531 dev_err(d, "gmmu mapping failed\n");
532 goto clean_up;
533 }
534
535 gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);
536
537 f->userd.size = f->userd_total_size;
538
539 f->channel = kzalloc(f->num_channels * sizeof(*f->channel),
540 GFP_KERNEL);
541 f->pbdma_map = kzalloc(f->num_pbdma * sizeof(*f->pbdma_map),
542 GFP_KERNEL);
543 f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
544 GFP_KERNEL);
545
546 if (!(f->channel && f->pbdma_map && f->engine_info)) {
547 err = -ENOMEM;
548 goto clean_up;
549 }
550
551 /* pbdma map needs to be in place before calling engine info init */
552 for (i = 0; i < f->num_pbdma; ++i)
553 f->pbdma_map[i] = gk20a_readl(g, fifo_pbdma_map_r(i));
554
555 init_engine_info(f);
556
557 init_runlist(g, f);
558
559 for (chid = 0; chid < f->num_channels; chid++) {
560 f->channel[chid].userd_cpu_va =
561 f->userd.cpuva + chid * f->userd_entry_size;
562 f->channel[chid].userd_iova =
563 NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova)
564 + chid * f->userd_entry_size;
565 f->channel[chid].userd_gpu_va =
566 f->userd.gpu_va + chid * f->userd_entry_size;
567
568 gk20a_init_channel_support(g, chid);
569 }
570 mutex_init(&f->ch_inuse_mutex);
571
572 f->remove_support = gk20a_remove_fifo_support;
573
574 f->deferred_reset_pending = false;
575 mutex_init(&f->deferred_reset_mutex);
576
577 f->sw_ready = true;
578
579 gk20a_dbg_fn("done");
580 return 0;
581
582clean_up:
583 gk20a_dbg_fn("fail");
584 if (f->userd.gpu_va)
585 gk20a_gmmu_unmap(&g->mm.bar1.vm,
586 f->userd.gpu_va,
587 f->userd.size,
588 gk20a_mem_flag_none);
589 if (f->userd.sgt)
590 gk20a_free_sgtable(&f->userd.sgt);
591 if (f->userd.cpuva)
592 dma_free_coherent(d,
593 f->userd_total_size,
594 f->userd.cpuva,
595 f->userd.iova);
596 f->userd.cpuva = NULL;
597 f->userd.iova = 0;
598
599 memset(&f->userd, 0, sizeof(struct userd_desc));
600
601 kfree(f->channel);
602 f->channel = NULL;
603 kfree(f->pbdma_map);
604 f->pbdma_map = NULL;
605 kfree(f->engine_info);
606 f->engine_info = NULL;
607
608 return err;
609}
610
611static void gk20a_fifo_handle_runlist_event(struct gk20a *g)
612{
613 struct fifo_gk20a *f = &g->fifo;
614 struct fifo_runlist_info_gk20a *runlist;
615 unsigned long runlist_event;
616 u32 runlist_id;
617
618 runlist_event = gk20a_readl(g, fifo_intr_runlist_r());
619 gk20a_writel(g, fifo_intr_runlist_r(), runlist_event);
620
621 for_each_set_bit(runlist_id, &runlist_event, f->max_runlists) {
622 runlist = &f->runlist_info[runlist_id];
623 wake_up(&runlist->runlist_wq);
624 }
625
626}
627
628static int gk20a_init_fifo_setup_hw(struct gk20a *g)
629{
630 struct fifo_gk20a *f = &g->fifo;
631
632 gk20a_dbg_fn("");
633
634 /* test write, read through bar1 @ userd region before
635 * turning on the snooping */
636 {
637 struct fifo_gk20a *f = &g->fifo;
638 u32 v, v1 = 0x33, v2 = 0x55;
639
640 u32 bar1_vaddr = f->userd.gpu_va;
641 volatile u32 *cpu_vaddr = f->userd.cpuva;
642
643 gk20a_dbg_info("test bar1 @ vaddr 0x%x",
644 bar1_vaddr);
645
646 v = gk20a_bar1_readl(g, bar1_vaddr);
647
648 *cpu_vaddr = v1;
649 smp_mb();
650
651 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
652 gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
653 return -EINVAL;
654 }
655
656 gk20a_bar1_writel(g, bar1_vaddr, v2);
657
658 if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
659 gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
660 return -EINVAL;
661 }
662
663 /* is it visible to the cpu? */
664 if (*cpu_vaddr != v2) {
665 gk20a_err(dev_from_gk20a(g),
666 "cpu didn't see bar1 write @ %p!",
667 cpu_vaddr);
668 }
669
670 /* put it back */
671 gk20a_bar1_writel(g, bar1_vaddr, v);
672 }
673
674 /*XXX all manner of flushes and caching worries, etc */
675
676 /* set the base for the userd region now */
677 gk20a_writel(g, fifo_bar1_base_r(),
678 fifo_bar1_base_ptr_f(f->userd.gpu_va >> 12) |
679 fifo_bar1_base_valid_true_f());
680
681 gk20a_dbg_fn("done");
682
683 return 0;
684}
685
686int gk20a_init_fifo_support(struct gk20a *g)
687{
688 u32 err;
689
690 err = gk20a_init_fifo_setup_sw(g);
691 if (err)
692 return err;
693
694 err = gk20a_init_fifo_setup_hw(g);
695 if (err)
696 return err;
697
698 return err;
699}
700
701static struct channel_gk20a *
702channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr)
703{
704 int ci;
705 if (unlikely(!f->channel))
706 return NULL;
707 for (ci = 0; ci < f->num_channels; ci++) {
708 struct channel_gk20a *c = f->channel+ci;
709 if (c->inst_block.cpuva &&
710 (inst_ptr == c->inst_block.cpu_pa))
711 return f->channel+ci;
712 }
713 return NULL;
714}
715
716/* fault info/descriptions.
717 * tbd: move to setup
718 * */
719static const char * const fault_type_descs[] = {
720 "pde", /*fifo_intr_mmu_fault_info_type_pde_v() == 0 */
721 "pde size",
722 "pte",
723 "va limit viol",
724 "unbound inst",
725 "priv viol",
726 "ro viol",
727 "wo viol",
728 "pitch mask",
729 "work creation",
730 "bad aperture",
731 "compression failure",
732 "bad kind",
733 "region viol",
734 "dual ptes",
735 "poisoned",
736};
737/* engine descriptions */
738static const char * const engine_subid_descs[] = {
739 "gpc",
740 "hub",
741};
742
743static const char * const hub_client_descs[] = {
744 "vip", "ce0", "ce1", "dniso", "fe", "fecs", "host", "host cpu",
745 "host cpu nb", "iso", "mmu", "mspdec", "msppp", "msvld",
746 "niso", "p2p", "pd", "perf", "pmu", "raster twod", "scc",
747 "scc nb", "sec", "ssync", "gr copy", "ce2", "xv", "mmu nb",
748 "msenc", "d falcon", "sked", "a falcon", "n/a",
749};
750
751static const char * const gpc_client_descs[] = {
752 "l1 0", "t1 0", "pe 0",
753 "l1 1", "t1 1", "pe 1",
754 "l1 2", "t1 2", "pe 2",
755 "l1 3", "t1 3", "pe 3",
756 "rast", "gcc", "gpccs",
757 "prop 0", "prop 1", "prop 2", "prop 3",
758 "l1 4", "t1 4", "pe 4",
759 "l1 5", "t1 5", "pe 5",
760 "l1 6", "t1 6", "pe 6",
761 "l1 7", "t1 7", "pe 7",
762 "gpm",
763 "ltp utlb 0", "ltp utlb 1", "ltp utlb 2", "ltp utlb 3",
764 "rgg utlb",
765};
766
767/* reads info from hardware and fills in mmu fault info record */
768static inline void get_exception_mmu_fault_info(
769 struct gk20a *g, u32 engine_id,
770 struct fifo_mmu_fault_info_gk20a *f)
771{
772 u32 fault_info_v;
773
774 gk20a_dbg_fn("engine_id %d", engine_id);
775
776 memset(f, 0, sizeof(*f));
777
778 f->fault_info_v = fault_info_v = gk20a_readl(g,
779 fifo_intr_mmu_fault_info_r(engine_id));
780 f->fault_type_v =
781 fifo_intr_mmu_fault_info_type_v(fault_info_v);
782 f->engine_subid_v =
783 fifo_intr_mmu_fault_info_engine_subid_v(fault_info_v);
784 f->client_v = fifo_intr_mmu_fault_info_client_v(fault_info_v);
785
786 BUG_ON(f->fault_type_v >= ARRAY_SIZE(fault_type_descs));
787 f->fault_type_desc = fault_type_descs[f->fault_type_v];
788
789 BUG_ON(f->engine_subid_v >= ARRAY_SIZE(engine_subid_descs));
790 f->engine_subid_desc = engine_subid_descs[f->engine_subid_v];
791
792 if (f->engine_subid_v ==
793 fifo_intr_mmu_fault_info_engine_subid_hub_v()) {
794
795 BUG_ON(f->client_v >= ARRAY_SIZE(hub_client_descs));
796 f->client_desc = hub_client_descs[f->client_v];
797 } else if (f->engine_subid_v ==
798 fifo_intr_mmu_fault_info_engine_subid_gpc_v()) {
799 BUG_ON(f->client_v >= ARRAY_SIZE(gpc_client_descs));
800 f->client_desc = gpc_client_descs[f->client_v];
801 } else {
802 BUG_ON(1);
803 }
804
805 f->fault_hi_v = gk20a_readl(g, fifo_intr_mmu_fault_hi_r(engine_id));
806 f->fault_lo_v = gk20a_readl(g, fifo_intr_mmu_fault_lo_r(engine_id));
807 /* note:ignoring aperture on gk20a... */
808 f->inst_ptr = fifo_intr_mmu_fault_inst_ptr_v(
809 gk20a_readl(g, fifo_intr_mmu_fault_inst_r(engine_id)));
810 /* note: inst_ptr is a 40b phys addr. */
811 f->inst_ptr <<= fifo_intr_mmu_fault_inst_ptr_align_shift_v();
812}
813
814static void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
815{
816 gk20a_dbg_fn("");
817
818 if (engine_id == top_device_info_type_enum_graphics_v()) {
819 /* resetting engine using mc_enable_r() is not enough,
820 * we do full init sequence */
821 gk20a_gr_reset(g);
822 }
823 if (engine_id == top_device_info_type_enum_copy0_v())
824 gk20a_reset(g, mc_enable_ce2_m());
825}
826
827static void gk20a_fifo_handle_mmu_fault_thread(struct work_struct *work)
828{
829 struct fifo_gk20a *f = container_of(work, struct fifo_gk20a,
830 fault_restore_thread);
831 struct gk20a *g = f->g;
832 int i;
833
834 /* Reinitialise FECS and GR */
835 gk20a_init_pmu_setup_hw2(g);
836
837 /* It is safe to enable ELPG again. */
838 gk20a_pmu_enable_elpg(g);
839
840 /* Restore the runlist */
841 for (i = 0; i < g->fifo.max_runlists; i++)
842 gk20a_fifo_update_runlist_locked(g, i, ~0, true, true);
843
844 /* unlock all runlists */
845 for (i = 0; i < g->fifo.max_runlists; i++)
846 mutex_unlock(&g->fifo.runlist_info[i].mutex);
847
848}
849
850static void gk20a_fifo_handle_chsw_fault(struct gk20a *g)
851{
852 u32 intr;
853
854 intr = gk20a_readl(g, fifo_intr_chsw_error_r());
855 gk20a_err(dev_from_gk20a(g), "chsw: %08x\n", intr);
856 gk20a_fecs_dump_falcon_stats(g);
857 gk20a_writel(g, fifo_intr_chsw_error_r(), intr);
858}
859
860static void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g)
861{
862 struct device *dev = dev_from_gk20a(g);
863 u32 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
864 gk20a_err(dev, "dropped mmu fault (0x%08x)", fault_id);
865}
866
867static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
868 struct fifo_mmu_fault_info_gk20a *f, bool fake_fault)
869{
870 /* channel recovery is only deferred if an sm debugger
871 is attached and has MMU debug mode is enabled */
872 if (!gk20a_gr_sm_debugger_attached(g) ||
873 !gk20a_mm_mmu_debug_mode_enabled(g))
874 return false;
875
876 /* if this fault is fake (due to RC recovery), don't defer recovery */
877 if (fake_fault)
878 return false;
879
880 if (engine_id != ENGINE_GR_GK20A ||
881 f->engine_subid_v != fifo_intr_mmu_fault_info_engine_subid_gpc_v())
882 return false;
883
884 return true;
885}
886
887void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
888 unsigned long fault_id) {
889 u32 engine_mmu_id;
890 int i;
891
892 /* reset engines */
893 for_each_set_bit(engine_mmu_id, &fault_id, 32) {
894 u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id);
895 if (engine_id != ~0)
896 gk20a_fifo_reset_engine(g, engine_id);
897 }
898
899 /* CLEAR the runlists. Do not wait for runlist to start as
900 * some engines may not be available right now */
901 for (i = 0; i < g->fifo.max_runlists; i++)
902 gk20a_fifo_update_runlist_locked(g, i, ~0, false, false);
903
904 /* clear interrupt */
905 gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id);
906
907 /* resume scheduler */
908 gk20a_writel(g, fifo_error_sched_disable_r(),
909 gk20a_readl(g, fifo_error_sched_disable_r()));
910
911 /* Spawn a work to enable PMU and restore runlists */
912 schedule_work(&g->fifo.fault_restore_thread);
913}
914
915static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g,
916 struct channel_gk20a *ch) {
917 bool verbose = true;
918 if (!ch)
919 return verbose;
920
921 gk20a_err(dev_from_gk20a(g),
922 "channel %d generated a mmu fault",
923 ch->hw_chid);
924 if (ch->error_notifier) {
925 u32 err = ch->error_notifier->info32;
926 if (ch->error_notifier->status == 0xffff) {
927 /* If error code is already set, this mmu fault
928 * was triggered as part of recovery from other
929 * error condition.
930 * Don't overwrite error flag. */
931 /* Fifo timeout debug spew is controlled by user */
932 if (err == NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT)
933 verbose = ch->timeout_debug_dump;
934 } else {
935 gk20a_set_error_notifier(ch,
936 NVHOST_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
937 }
938 }
939 /* mark channel as faulted */
940 ch->has_timedout = true;
941 wmb();
942 /* unblock pending waits */
943 wake_up(&ch->semaphore_wq);
944 wake_up(&ch->notifier_wq);
945 wake_up(&ch->submit_wq);
946 return verbose;
947}
948
949
950static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
951{
952 bool fake_fault;
953 unsigned long fault_id;
954 unsigned long engine_mmu_id;
955 int i;
956 bool verbose = true;
957 gk20a_dbg_fn("");
958
959 g->fifo.deferred_reset_pending = false;
960
961 /* Disable ELPG */
962 gk20a_pmu_disable_elpg(g);
963
964 /* If we have recovery in progress, MMU fault id is invalid */
965 if (g->fifo.mmu_fault_engines) {
966 fault_id = g->fifo.mmu_fault_engines;
967 g->fifo.mmu_fault_engines = 0;
968 fake_fault = true;
969 } else {
970 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
971 fake_fault = false;
972 gk20a_debug_dump(g->dev);
973 }
974
975 /* lock all runlists. Note that locks are are released in
976 * gk20a_fifo_handle_mmu_fault_thread() */
977 for (i = 0; i < g->fifo.max_runlists; i++)
978 mutex_lock(&g->fifo.runlist_info[i].mutex);
979
980 /* go through all faulted engines */
981 for_each_set_bit(engine_mmu_id, &fault_id, 32) {
982 /* bits in fifo_intr_mmu_fault_id_r do not correspond 1:1 to
983 * engines. Convert engine_mmu_id to engine_id */
984 u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id);
985 struct fifo_runlist_info_gk20a *runlist = g->fifo.runlist_info;
986 struct fifo_mmu_fault_info_gk20a f;
987 struct channel_gk20a *ch = NULL;
988
989 get_exception_mmu_fault_info(g, engine_mmu_id, &f);
990 trace_gk20a_mmu_fault(f.fault_hi_v,
991 f.fault_lo_v,
992 f.fault_info_v,
993 f.inst_ptr,
994 engine_id,
995 f.engine_subid_desc,
996 f.client_desc,
997 f.fault_type_desc);
998 gk20a_err(dev_from_gk20a(g), "mmu fault on engine %d, "
999 "engine subid %d (%s), client %d (%s), "
1000 "addr 0x%08x:0x%08x, type %d (%s), info 0x%08x,"
1001 "inst_ptr 0x%llx\n",
1002 engine_id,
1003 f.engine_subid_v, f.engine_subid_desc,
1004 f.client_v, f.client_desc,
1005 f.fault_hi_v, f.fault_lo_v,
1006 f.fault_type_v, f.fault_type_desc,
1007 f.fault_info_v, f.inst_ptr);
1008
1009 /* get the channel */
1010 if (fake_fault) {
1011 /* read and parse engine status */
1012 u32 status = gk20a_readl(g,
1013 fifo_engine_status_r(engine_id));
1014 u32 ctx_status =
1015 fifo_engine_status_ctx_status_v(status);
1016 bool type_ch = fifo_pbdma_status_id_type_v(status) ==
1017 fifo_pbdma_status_id_type_chid_v();
1018
1019 /* use next_id if context load is failing */
1020 u32 id = (ctx_status ==
1021 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1022 fifo_engine_status_next_id_v(status) :
1023 fifo_engine_status_id_v(status);
1024
1025 if (type_ch) {
1026 ch = g->fifo.channel + id;
1027 } else {
1028 gk20a_err(dev_from_gk20a(g), "non-chid type not supported");
1029 WARN_ON(1);
1030 }
1031 } else {
1032 /* read channel based on instruction pointer */
1033 ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr);
1034 }
1035
1036 if (ch) {
1037 if (ch->in_use) {
1038 /* disable the channel from hw and increment
1039 * syncpoints */
1040 gk20a_disable_channel_no_update(ch);
1041
1042 /* remove the channel from runlist */
1043 clear_bit(ch->hw_chid,
1044 runlist->active_channels);
1045 }
1046
1047 /* check if engine reset should be deferred */
1048 if (gk20a_fifo_should_defer_engine_reset(g, engine_id, &f, fake_fault)) {
1049 g->fifo.mmu_fault_engines = fault_id;
1050
1051 /* handled during channel free */
1052 g->fifo.deferred_reset_pending = true;
1053 } else
1054 verbose = gk20a_fifo_set_ctx_mmu_error(g, ch);
1055
1056 } else if (f.inst_ptr ==
1057 g->mm.bar1.inst_block.cpu_pa) {
1058 gk20a_err(dev_from_gk20a(g), "mmu fault from bar1");
1059 } else if (f.inst_ptr ==
1060 g->mm.pmu.inst_block.cpu_pa) {
1061 gk20a_err(dev_from_gk20a(g), "mmu fault from pmu");
1062 } else
1063 gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault");
1064 }
1065
1066 if (g->fifo.deferred_reset_pending) {
1067 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "sm debugger attached,"
1068 " deferring channel recovery to channel free");
1069 /* clear interrupt */
1070 gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id);
1071 return verbose;
1072 }
1073
1074 /* resetting the engines and clearing the runlists is done in
1075 a separate function to allow deferred reset. */
1076 fifo_gk20a_finish_mmu_fault_handling(g, fault_id);
1077 return verbose;
1078}
1079
1080static void gk20a_fifo_get_faulty_channel(struct gk20a *g, int engine_id,
1081 u32 *chid, bool *type_ch)
1082{
1083 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
1084 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
1085
1086 *type_ch = fifo_pbdma_status_id_type_v(status) ==
1087 fifo_pbdma_status_id_type_chid_v();
1088 /* use next_id if context load is failing */
1089 *chid = (ctx_status ==
1090 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1091 fifo_engine_status_next_id_v(status) :
1092 fifo_engine_status_id_v(status);
1093}
1094
1095void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1096 bool verbose)
1097{
1098 unsigned long end_jiffies = jiffies +
1099 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
1100 unsigned long delay = GR_IDLE_CHECK_DEFAULT;
1101 unsigned long engine_id, i;
1102 unsigned long _engine_ids = __engine_ids;
1103 unsigned long engine_ids = 0;
1104 int ret;
1105
1106 if (verbose)
1107 gk20a_debug_dump(g->dev);
1108
1109 /* store faulted engines in advance */
1110 g->fifo.mmu_fault_engines = 0;
1111 for_each_set_bit(engine_id, &_engine_ids, 32) {
1112 bool ref_type_ch;
1113 int ref_chid;
1114 gk20a_fifo_get_faulty_channel(g, engine_id, &ref_chid,
1115 &ref_type_ch);
1116
1117 /* Reset *all* engines that use the
1118 * same channel as faulty engine */
1119 for (i = 0; i < g->fifo.max_engines; i++) {
1120 bool type_ch;
1121 u32 chid;
1122 gk20a_fifo_get_faulty_channel(g, i, &chid, &type_ch);
1123 if (ref_type_ch == type_ch && ref_chid == chid) {
1124 engine_ids |= BIT(i);
1125 g->fifo.mmu_fault_engines |=
1126 BIT(gk20a_engine_id_to_mmu_id(i));
1127 }
1128 }
1129
1130 }
1131
1132 /* trigger faults for all bad engines */
1133 for_each_set_bit(engine_id, &engine_ids, 32) {
1134 if (engine_id > g->fifo.max_engines) {
1135 WARN_ON(true);
1136 break;
1137 }
1138
1139 gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id),
1140 fifo_trigger_mmu_fault_id_f(
1141 gk20a_engine_id_to_mmu_id(engine_id)) |
1142 fifo_trigger_mmu_fault_enable_f(1));
1143 }
1144
1145 /* Wait for MMU fault to trigger */
1146 ret = -EBUSY;
1147 do {
1148 if (gk20a_readl(g, fifo_intr_0_r()) &
1149 fifo_intr_0_mmu_fault_pending_f()) {
1150 ret = 0;
1151 break;
1152 }
1153
1154 usleep_range(delay, delay * 2);
1155 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
1156 } while (time_before(jiffies, end_jiffies) ||
1157 !tegra_platform_is_silicon());
1158
1159 if (ret)
1160 gk20a_err(dev_from_gk20a(g), "mmu fault timeout");
1161
1162 /* release mmu fault trigger */
1163 for_each_set_bit(engine_id, &engine_ids, 32)
1164 gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id), 0);
1165}
1166
1167
1168static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
1169{
1170 u32 sched_error;
1171 u32 engine_id;
1172 int id = -1;
1173 bool non_chid = false;
1174
1175 /* read and reset the scheduler error register */
1176 sched_error = gk20a_readl(g, fifo_intr_sched_error_r());
1177 gk20a_writel(g, fifo_intr_0_r(), fifo_intr_0_sched_error_reset_f());
1178
1179 for (engine_id = 0; engine_id < g->fifo.max_engines; engine_id++) {
1180 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
1181 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
1182 bool failing_engine;
1183
1184 /* we are interested in busy engines */
1185 failing_engine = fifo_engine_status_engine_v(status) ==
1186 fifo_engine_status_engine_busy_v();
1187
1188 /* ..that are doing context switch */
1189 failing_engine = failing_engine &&
1190 (ctx_status ==
1191 fifo_engine_status_ctx_status_ctxsw_switch_v()
1192 || ctx_status ==
1193 fifo_engine_status_ctx_status_ctxsw_save_v()
1194 || ctx_status ==
1195 fifo_engine_status_ctx_status_ctxsw_load_v());
1196
1197 if (failing_engine) {
1198 id = (ctx_status ==
1199 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1200 fifo_engine_status_next_id_v(status) :
1201 fifo_engine_status_id_v(status);
1202 non_chid = fifo_pbdma_status_id_type_v(status) !=
1203 fifo_pbdma_status_id_type_chid_v();
1204 break;
1205 }
1206 }
1207
1208 /* could not find the engine - should never happen */
1209 if (unlikely(engine_id >= g->fifo.max_engines))
1210 goto err;
1211
1212 if (fifo_intr_sched_error_code_f(sched_error) ==
1213 fifo_intr_sched_error_code_ctxsw_timeout_v()) {
1214 struct fifo_gk20a *f = &g->fifo;
1215 struct channel_gk20a *ch = &f->channel[id];
1216
1217 if (non_chid) {
1218 gk20a_fifo_recover(g, BIT(engine_id), true);
1219 goto err;
1220 }
1221
1222 if (gk20a_channel_update_and_check_timeout(ch,
1223 GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000)) {
1224 gk20a_set_error_notifier(ch,
1225 NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
1226 gk20a_err(dev_from_gk20a(g),
1227 "fifo sched ctxsw timeout error:"
1228 "engine = %u, ch = %d", engine_id, id);
1229 gk20a_fifo_recover(g, BIT(engine_id),
1230 ch->timeout_debug_dump);
1231 } else {
1232 gk20a_warn(dev_from_gk20a(g),
1233 "fifo is waiting for ctx switch for %d ms,"
1234 "ch = %d\n",
1235 ch->timeout_accumulated_ms,
1236 id);
1237 }
1238 return ch->timeout_debug_dump;
1239 }
1240err:
1241 gk20a_err(dev_from_gk20a(g), "fifo sched error : 0x%08x, engine=%u, %s=%d",
1242 sched_error, engine_id, non_chid ? "non-ch" : "ch", id);
1243
1244 return true;
1245}
1246
1247static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr)
1248{
1249 bool print_channel_reset_log = false, reset_engine = false;
1250 struct device *dev = dev_from_gk20a(g);
1251 u32 handled = 0;
1252
1253 gk20a_dbg_fn("");
1254
1255 if (fifo_intr & fifo_intr_0_pio_error_pending_f()) {
1256 /* pio mode is unused. this shouldn't happen, ever. */
1257 /* should we clear it or just leave it pending? */
1258 gk20a_err(dev, "fifo pio error!\n");
1259 BUG_ON(1);
1260 }
1261
1262 if (fifo_intr & fifo_intr_0_bind_error_pending_f()) {
1263 u32 bind_error = gk20a_readl(g, fifo_intr_bind_error_r());
1264 gk20a_err(dev, "fifo bind error: 0x%08x", bind_error);
1265 print_channel_reset_log = true;
1266 handled |= fifo_intr_0_bind_error_pending_f();
1267 }
1268
1269 if (fifo_intr & fifo_intr_0_sched_error_pending_f()) {
1270 print_channel_reset_log = gk20a_fifo_handle_sched_error(g);
1271 handled |= fifo_intr_0_sched_error_pending_f();
1272 }
1273
1274 if (fifo_intr & fifo_intr_0_chsw_error_pending_f()) {
1275 gk20a_fifo_handle_chsw_fault(g);
1276 handled |= fifo_intr_0_chsw_error_pending_f();
1277 }
1278
1279 if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) {
1280 print_channel_reset_log = gk20a_fifo_handle_mmu_fault(g);
1281 reset_engine = true;
1282 handled |= fifo_intr_0_mmu_fault_pending_f();
1283 }
1284
1285 if (fifo_intr & fifo_intr_0_dropped_mmu_fault_pending_f()) {
1286 gk20a_fifo_handle_dropped_mmu_fault(g);
1287 handled |= fifo_intr_0_dropped_mmu_fault_pending_f();
1288 }
1289
1290 print_channel_reset_log = !g->fifo.deferred_reset_pending
1291 && print_channel_reset_log;
1292
1293 if (print_channel_reset_log) {
1294 int engine_id;
1295 gk20a_err(dev_from_gk20a(g),
1296 "channel reset initated from %s", __func__);
1297 for (engine_id = 0;
1298 engine_id < g->fifo.max_engines;
1299 engine_id++) {
1300 gk20a_dbg_fn("enum:%d -> engine_id:%d", engine_id,
1301 g->fifo.engine_info[engine_id].engine_id);
1302 fifo_pbdma_exception_status(g,
1303 &g->fifo.engine_info[engine_id]);
1304 fifo_engine_exception_status(g,
1305 &g->fifo.engine_info[engine_id]);
1306 }
1307 }
1308
1309 return handled;
1310}
1311
1312
1313static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev,
1314 struct gk20a *g,
1315 struct fifo_gk20a *f,
1316 u32 pbdma_id)
1317{
1318 u32 pbdma_intr_0 = gk20a_readl(g, pbdma_intr_0_r(pbdma_id));
1319 u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id));
1320 u32 handled = 0;
1321 bool reset_device = false;
1322 bool reset_channel = false;
1323
1324 gk20a_dbg_fn("");
1325
1326 gk20a_dbg(gpu_dbg_intr, "pbdma id intr pending %d %08x %08x", pbdma_id,
1327 pbdma_intr_0, pbdma_intr_1);
1328 if (pbdma_intr_0) {
1329 if (f->intr.pbdma.device_fatal_0 & pbdma_intr_0) {
1330 dev_err(dev, "unrecoverable device error: "
1331 "pbdma_intr_0(%d):0x%08x", pbdma_id, pbdma_intr_0);
1332 reset_device = true;
1333 /* TODO: disable pbdma intrs */
1334 handled |= f->intr.pbdma.device_fatal_0 & pbdma_intr_0;
1335 }
1336 if (f->intr.pbdma.channel_fatal_0 & pbdma_intr_0) {
1337 dev_warn(dev, "channel error: "
1338 "pbdma_intr_0(%d):0x%08x", pbdma_id, pbdma_intr_0);
1339 reset_channel = true;
1340 /* TODO: clear pbdma channel errors */
1341 handled |= f->intr.pbdma.channel_fatal_0 & pbdma_intr_0;
1342 }
1343 if (f->intr.pbdma.restartable_0 & pbdma_intr_0) {
1344 dev_warn(dev, "sw method: %08x %08x",
1345 gk20a_readl(g, pbdma_method0_r(0)),
1346 gk20a_readl(g, pbdma_method0_r(0)+4));
1347 gk20a_writel(g, pbdma_method0_r(0), 0);
1348 gk20a_writel(g, pbdma_method0_r(0)+4, 0);
1349 handled |= f->intr.pbdma.restartable_0 & pbdma_intr_0;
1350 }
1351
1352 gk20a_writel(g, pbdma_intr_0_r(pbdma_id), pbdma_intr_0);
1353 }
1354
1355 /* all intrs in _intr_1 are "host copy engine" related,
1356 * which gk20a doesn't have. for now just make them channel fatal. */
1357 if (pbdma_intr_1) {
1358 dev_err(dev, "channel hce error: pbdma_intr_1(%d): 0x%08x",
1359 pbdma_id, pbdma_intr_1);
1360 reset_channel = true;
1361 gk20a_writel(g, pbdma_intr_1_r(pbdma_id), pbdma_intr_1);
1362 }
1363
1364
1365
1366 return handled;
1367}
1368
1369static u32 fifo_channel_isr(struct gk20a *g, u32 fifo_intr)
1370{
1371 gk20a_channel_semaphore_wakeup(g);
1372 return fifo_intr_0_channel_intr_pending_f();
1373}
1374
1375
1376static u32 fifo_pbdma_isr(struct gk20a *g, u32 fifo_intr)
1377{
1378 struct device *dev = dev_from_gk20a(g);
1379 struct fifo_gk20a *f = &g->fifo;
1380 u32 clear_intr = 0, i;
1381 u32 pbdma_pending = gk20a_readl(g, fifo_intr_pbdma_id_r());
1382
1383 for (i = 0; i < fifo_intr_pbdma_id_status__size_1_v(); i++) {
1384 if (fifo_intr_pbdma_id_status_f(pbdma_pending, i)) {
1385 gk20a_dbg(gpu_dbg_intr, "pbdma id %d intr pending", i);
1386 clear_intr |=
1387 gk20a_fifo_handle_pbdma_intr(dev, g, f, i);
1388 }
1389 }
1390 return fifo_intr_0_pbdma_intr_pending_f();
1391}
1392
1393void gk20a_fifo_isr(struct gk20a *g)
1394{
1395 u32 error_intr_mask =
1396 fifo_intr_0_bind_error_pending_f() |
1397 fifo_intr_0_sched_error_pending_f() |
1398 fifo_intr_0_chsw_error_pending_f() |
1399 fifo_intr_0_fb_flush_timeout_pending_f() |
1400 fifo_intr_0_dropped_mmu_fault_pending_f() |
1401 fifo_intr_0_mmu_fault_pending_f() |
1402 fifo_intr_0_lb_error_pending_f() |
1403 fifo_intr_0_pio_error_pending_f();
1404
1405 u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
1406 u32 clear_intr = 0;
1407
1408 /* note we're not actually in an "isr", but rather
1409 * in a threaded interrupt context... */
1410 mutex_lock(&g->fifo.intr.isr.mutex);
1411
1412 gk20a_dbg(gpu_dbg_intr, "fifo isr %08x\n", fifo_intr);
1413
1414 /* handle runlist update */
1415 if (fifo_intr & fifo_intr_0_runlist_event_pending_f()) {
1416 gk20a_fifo_handle_runlist_event(g);
1417 clear_intr |= fifo_intr_0_runlist_event_pending_f();
1418 }
1419 if (fifo_intr & fifo_intr_0_pbdma_intr_pending_f())
1420 clear_intr |= fifo_pbdma_isr(g, fifo_intr);
1421
1422 if (unlikely(fifo_intr & error_intr_mask))
1423 clear_intr = fifo_error_isr(g, fifo_intr);
1424
1425 gk20a_writel(g, fifo_intr_0_r(), clear_intr);
1426
1427 mutex_unlock(&g->fifo.intr.isr.mutex);
1428
1429 return;
1430}
1431
1432void gk20a_fifo_nonstall_isr(struct gk20a *g)
1433{
1434 u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
1435 u32 clear_intr = 0;
1436
1437 gk20a_dbg(gpu_dbg_intr, "fifo nonstall isr %08x\n", fifo_intr);
1438
1439 if (fifo_intr & fifo_intr_0_channel_intr_pending_f())
1440 clear_intr |= fifo_channel_isr(g, fifo_intr);
1441
1442 gk20a_writel(g, fifo_intr_0_r(), clear_intr);
1443
1444 return;
1445}
1446
1447int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
1448{
1449 struct fifo_gk20a *f = &g->fifo;
1450 unsigned long end_jiffies = jiffies
1451 + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
1452 u32 delay = GR_IDLE_CHECK_DEFAULT;
1453 u32 ret = 0;
1454 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
1455 u32 elpg_off = 0;
1456 u32 i;
1457
1458 gk20a_dbg_fn("%d", hw_chid);
1459
1460 /* we have no idea which runlist we are using. lock all */
1461 for (i = 0; i < g->fifo.max_runlists; i++)
1462 mutex_lock(&f->runlist_info[i].mutex);
1463
1464 /* disable elpg if failed to acquire pmu mutex */
1465 elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
1466 if (elpg_off)
1467 gk20a_pmu_disable_elpg(g);
1468
1469 /* issue preempt */
1470 gk20a_writel(g, fifo_preempt_r(),
1471 fifo_preempt_chid_f(hw_chid) |
1472 fifo_preempt_type_channel_f());
1473
1474 /* wait for preempt */
1475 ret = -EBUSY;
1476 do {
1477 if (!(gk20a_readl(g, fifo_preempt_r()) &
1478 fifo_preempt_pending_true_f())) {
1479 ret = 0;
1480 break;
1481 }
1482
1483 usleep_range(delay, delay * 2);
1484 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
1485 } while (time_before(jiffies, end_jiffies) ||
1486 !tegra_platform_is_silicon());
1487
1488 if (ret) {
1489 int i;
1490 u32 engines = 0;
1491 struct fifo_gk20a *f = &g->fifo;
1492 struct channel_gk20a *ch = &f->channel[hw_chid];
1493
1494 gk20a_err(dev_from_gk20a(g), "preempt channel %d timeout\n",
1495 hw_chid);
1496
1497 /* forcefully reset all busy engines using this channel */
1498 for (i = 0; i < g->fifo.max_engines; i++) {
1499 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
1500 u32 ctx_status =
1501 fifo_engine_status_ctx_status_v(status);
1502 bool type_ch = fifo_pbdma_status_id_type_v(status) ==
1503 fifo_pbdma_status_id_type_chid_v();
1504 bool busy = fifo_engine_status_engine_v(status) ==
1505 fifo_engine_status_engine_busy_v();
1506 u32 id = (ctx_status ==
1507 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
1508 fifo_engine_status_next_id_v(status) :
1509 fifo_engine_status_id_v(status);
1510
1511 if (type_ch && busy && id == hw_chid)
1512 engines |= BIT(i);
1513 }
1514 gk20a_set_error_notifier(ch,
1515 NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
1516 gk20a_fifo_recover(g, engines, true);
1517 }
1518
1519 /* re-enable elpg or release pmu mutex */
1520 if (elpg_off)
1521 gk20a_pmu_enable_elpg(g);
1522 else
1523 pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
1524
1525 for (i = 0; i < g->fifo.max_runlists; i++)
1526 mutex_unlock(&f->runlist_info[i].mutex);
1527
1528 return ret;
1529}
1530
1531int gk20a_fifo_enable_engine_activity(struct gk20a *g,
1532 struct fifo_engine_info_gk20a *eng_info)
1533{
1534 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
1535 u32 elpg_off;
1536 u32 enable;
1537
1538 gk20a_dbg_fn("");
1539
1540 /* disable elpg if failed to acquire pmu mutex */
1541 elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
1542 if (elpg_off)
1543 gk20a_pmu_disable_elpg(g);
1544
1545 enable = gk20a_readl(g, fifo_sched_disable_r());
1546 enable &= ~(fifo_sched_disable_true_v() >> eng_info->runlist_id);
1547 gk20a_writel(g, fifo_sched_disable_r(), enable);
1548
1549 /* re-enable elpg or release pmu mutex */
1550 if (elpg_off)
1551 gk20a_pmu_enable_elpg(g);
1552 else
1553 pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
1554
1555 gk20a_dbg_fn("done");
1556 return 0;
1557}
1558
1559int gk20a_fifo_disable_engine_activity(struct gk20a *g,
1560 struct fifo_engine_info_gk20a *eng_info,
1561 bool wait_for_idle)
1562{
1563 u32 gr_stat, pbdma_stat, chan_stat, eng_stat, ctx_stat;
1564 u32 pbdma_chid = ~0, engine_chid = ~0, disable;
1565 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
1566 u32 elpg_off;
1567 u32 err = 0;
1568
1569 gk20a_dbg_fn("");
1570
1571 gr_stat =
1572 gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
1573 if (fifo_engine_status_engine_v(gr_stat) ==
1574 fifo_engine_status_engine_busy_v() && !wait_for_idle)
1575 return -EBUSY;
1576
1577 /* disable elpg if failed to acquire pmu mutex */
1578 elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
1579 if (elpg_off)
1580 gk20a_pmu_disable_elpg(g);
1581
1582 disable = gk20a_readl(g, fifo_sched_disable_r());
1583 disable = set_field(disable,
1584 fifo_sched_disable_runlist_m(eng_info->runlist_id),
1585 fifo_sched_disable_runlist_f(fifo_sched_disable_true_v(),
1586 eng_info->runlist_id));
1587 gk20a_writel(g, fifo_sched_disable_r(), disable);
1588
1589 /* chid from pbdma status */
1590 pbdma_stat = gk20a_readl(g, fifo_pbdma_status_r(eng_info->pbdma_id));
1591 chan_stat = fifo_pbdma_status_chan_status_v(pbdma_stat);
1592 if (chan_stat == fifo_pbdma_status_chan_status_valid_v() ||
1593 chan_stat == fifo_pbdma_status_chan_status_chsw_save_v())
1594 pbdma_chid = fifo_pbdma_status_id_v(pbdma_stat);
1595 else if (chan_stat == fifo_pbdma_status_chan_status_chsw_load_v() ||
1596 chan_stat == fifo_pbdma_status_chan_status_chsw_switch_v())
1597 pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat);
1598
1599 if (pbdma_chid != ~0) {
1600 err = gk20a_fifo_preempt_channel(g, pbdma_chid);
1601 if (err)
1602 goto clean_up;
1603 }
1604
1605 /* chid from engine status */
1606 eng_stat = gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
1607 ctx_stat = fifo_engine_status_ctx_status_v(eng_stat);
1608 if (ctx_stat == fifo_engine_status_ctx_status_valid_v() ||
1609 ctx_stat == fifo_engine_status_ctx_status_ctxsw_save_v())
1610 engine_chid = fifo_engine_status_id_v(eng_stat);
1611 else if (ctx_stat == fifo_engine_status_ctx_status_ctxsw_load_v() ||
1612 ctx_stat == fifo_engine_status_ctx_status_ctxsw_switch_v())
1613 engine_chid = fifo_engine_status_next_id_v(eng_stat);
1614
1615 if (engine_chid != ~0 && engine_chid != pbdma_chid) {
1616 err = gk20a_fifo_preempt_channel(g, engine_chid);
1617 if (err)
1618 goto clean_up;
1619 }
1620
1621clean_up:
1622 /* re-enable elpg or release pmu mutex */
1623 if (elpg_off)
1624 gk20a_pmu_enable_elpg(g);
1625 else
1626 pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
1627
1628 if (err) {
1629 gk20a_dbg_fn("failed");
1630 if (gk20a_fifo_enable_engine_activity(g, eng_info))
1631 gk20a_err(dev_from_gk20a(g),
1632 "failed to enable gr engine activity\n");
1633 } else {
1634 gk20a_dbg_fn("done");
1635 }
1636 return err;
1637}
1638
1639static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id)
1640{
1641 struct fifo_gk20a *f = &g->fifo;
1642 u32 engines = 0;
1643 int i;
1644
1645 for (i = 0; i < f->max_engines; i++) {
1646 u32 status = gk20a_readl(g, fifo_engine_status_r(i));
1647 bool engine_busy = fifo_engine_status_engine_v(status) ==
1648 fifo_engine_status_engine_busy_v();
1649
1650 if (engine_busy &&
1651 (f->engine_info[i].runlist_id == runlist_id))
1652 engines |= BIT(i);
1653 }
1654 gk20a_fifo_recover(g, engines, true);
1655}
1656
1657static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id)
1658{
1659 struct fifo_runlist_info_gk20a *runlist;
1660 u32 remain;
1661 bool pending;
1662
1663 runlist = &g->fifo.runlist_info[runlist_id];
1664 remain = wait_event_timeout(runlist->runlist_wq,
1665 ((pending = gk20a_readl(g, fifo_eng_runlist_r(runlist_id)) &
1666 fifo_eng_runlist_pending_true_f()) == 0),
1667 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)));
1668
1669 if (remain == 0 && pending != 0)
1670 return -ETIMEDOUT;
1671
1672 return 0;
1673}
1674
1675static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
1676 u32 hw_chid, bool add,
1677 bool wait_for_finish)
1678{
1679 u32 ret = 0;
1680 struct device *d = dev_from_gk20a(g);
1681 struct fifo_gk20a *f = &g->fifo;
1682 struct fifo_runlist_info_gk20a *runlist = NULL;
1683 u32 *runlist_entry_base = NULL;
1684 u32 *runlist_entry = NULL;
1685 phys_addr_t runlist_pa;
1686 u32 old_buf, new_buf;
1687 u32 chid;
1688 u32 count = 0;
1689 runlist = &f->runlist_info[runlist_id];
1690
1691 /* valid channel, add/remove it from active list.
1692 Otherwise, keep active list untouched for suspend/resume. */
1693 if (hw_chid != ~0) {
1694 if (add) {
1695 if (test_and_set_bit(hw_chid,
1696 runlist->active_channels) == 1)
1697 return 0;
1698 } else {
1699 if (test_and_clear_bit(hw_chid,
1700 runlist->active_channels) == 0)
1701 return 0;
1702 }
1703 }
1704
1705 old_buf = runlist->cur_buffer;
1706 new_buf = !runlist->cur_buffer;
1707
1708 gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx",
1709 runlist_id, runlist->mem[new_buf].iova);
1710
1711 runlist_pa = gk20a_get_phys_from_iova(d, runlist->mem[new_buf].iova);
1712 if (!runlist_pa) {
1713 ret = -EINVAL;
1714 goto clean_up;
1715 }
1716
1717 runlist_entry_base = runlist->mem[new_buf].cpuva;
1718 if (!runlist_entry_base) {
1719 ret = -ENOMEM;
1720 goto clean_up;
1721 }
1722
1723 if (hw_chid != ~0 || /* add/remove a valid channel */
1724 add /* resume to add all channels back */) {
1725 runlist_entry = runlist_entry_base;
1726 for_each_set_bit(chid,
1727 runlist->active_channels, f->num_channels) {
1728 gk20a_dbg_info("add channel %d to runlist", chid);
1729 runlist_entry[0] = chid;
1730 runlist_entry[1] = 0;
1731 runlist_entry += 2;
1732 count++;
1733 }
1734 } else /* suspend to remove all channels */
1735 count = 0;
1736
1737 if (count != 0) {
1738 gk20a_writel(g, fifo_runlist_base_r(),
1739 fifo_runlist_base_ptr_f(u64_lo32(runlist_pa >> 12)) |
1740 fifo_runlist_base_target_vid_mem_f());
1741 }
1742
1743 gk20a_writel(g, fifo_runlist_r(),
1744 fifo_runlist_engine_f(runlist_id) |
1745 fifo_eng_runlist_length_f(count));
1746
1747 if (wait_for_finish) {
1748 ret = gk20a_fifo_runlist_wait_pending(g, runlist_id);
1749
1750 if (ret == -ETIMEDOUT) {
1751 gk20a_err(dev_from_gk20a(g),
1752 "runlist update timeout");
1753
1754 gk20a_fifo_runlist_reset_engines(g, runlist_id);
1755
1756 /* engine reset needs the lock. drop it */
1757 mutex_unlock(&runlist->mutex);
1758 /* wait until the runlist is active again */
1759 ret = gk20a_fifo_runlist_wait_pending(g, runlist_id);
1760 /* get the lock back. at this point everything should
1761 * should be fine */
1762 mutex_lock(&runlist->mutex);
1763
1764 if (ret)
1765 gk20a_err(dev_from_gk20a(g),
1766 "runlist update failed: %d", ret);
1767 } else if (ret == -EINTR)
1768 gk20a_err(dev_from_gk20a(g),
1769 "runlist update interrupted");
1770 }
1771
1772 runlist->cur_buffer = new_buf;
1773
1774clean_up:
1775 return ret;
1776}
1777
1778/* add/remove a channel from runlist
1779 special cases below: runlist->active_channels will NOT be changed.
1780 (hw_chid == ~0 && !add) means remove all active channels from runlist.
1781 (hw_chid == ~0 && add) means restore all active channels on runlist. */
1782int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 hw_chid,
1783 bool add, bool wait_for_finish)
1784{
1785 struct fifo_runlist_info_gk20a *runlist = NULL;
1786 struct fifo_gk20a *f = &g->fifo;
1787 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
1788 u32 elpg_off;
1789 u32 ret = 0;
1790
1791 runlist = &f->runlist_info[runlist_id];
1792
1793 mutex_lock(&runlist->mutex);
1794
1795 /* disable elpg if failed to acquire pmu mutex */
1796 elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
1797 if (elpg_off)
1798 gk20a_pmu_disable_elpg(g);
1799
1800 ret = gk20a_fifo_update_runlist_locked(g, runlist_id, hw_chid, add,
1801 wait_for_finish);
1802
1803 /* re-enable elpg or release pmu mutex */
1804 if (elpg_off)
1805 gk20a_pmu_enable_elpg(g);
1806 else
1807 pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
1808
1809 mutex_unlock(&runlist->mutex);
1810 return ret;
1811}
1812
1813int gk20a_fifo_suspend(struct gk20a *g)
1814{
1815 gk20a_dbg_fn("");
1816
1817 /* stop bar1 snooping */
1818 gk20a_writel(g, fifo_bar1_base_r(),
1819 fifo_bar1_base_valid_false_f());
1820
1821 /* disable fifo intr */
1822 gk20a_writel(g, fifo_intr_en_0_r(), 0);
1823 gk20a_writel(g, fifo_intr_en_1_r(), 0);
1824
1825 gk20a_dbg_fn("done");
1826 return 0;
1827}
1828
1829bool gk20a_fifo_mmu_fault_pending(struct gk20a *g)
1830{
1831 if (gk20a_readl(g, fifo_intr_0_r()) &
1832 fifo_intr_0_mmu_fault_pending_f())
1833 return true;
1834 else
1835 return false;
1836}
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
new file mode 100644
index 00000000..051acda2
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -0,0 +1,164 @@
1/*
2 * drivers/video/tegra/host/gk20a/fifo_gk20a.h
3 *
4 * GK20A graphics fifo (gr host)
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21#ifndef __FIFO_GK20A_H__
22#define __FIFO_GK20A_H__
23
24#include "channel_gk20a.h"
25
26#define MAX_RUNLIST_BUFFERS 2
27
28/* generally corresponds to the "pbdma" engine */
29
30struct fifo_runlist_info_gk20a {
31 unsigned long *active_channels;
32 /* Each engine has its own SW and HW runlist buffer.*/
33 struct runlist_mem_desc mem[MAX_RUNLIST_BUFFERS];
34 u32 cur_buffer;
35 u32 total_entries;
36 bool stopped;
37 bool support_tsg;
38 struct mutex mutex; /* protect channel preempt and runlist upate */
39 wait_queue_head_t runlist_wq;
40};
41
42/* so far gk20a has two engines: gr and ce2(gr_copy) */
43enum {
44 ENGINE_GR_GK20A = 0,
45 ENGINE_CE2_GK20A = 1,
46 ENGINE_INVAL_GK20A
47};
48
49struct fifo_pbdma_exception_info_gk20a {
50 u32 status_r; /* raw register value from hardware */
51 u32 id, next_id;
52 u32 chan_status_v; /* raw value from hardware */
53 bool id_is_chid, next_id_is_chid;
54 bool chsw_in_progress;
55};
56
57struct fifo_engine_exception_info_gk20a {
58 u32 status_r; /* raw register value from hardware */
59 u32 id, next_id;
60 u32 ctx_status_v; /* raw value from hardware */
61 bool id_is_chid, next_id_is_chid;
62 bool faulted, idle, ctxsw_in_progress;
63};
64
65struct fifo_mmu_fault_info_gk20a {
66 u32 fault_info_v;
67 u32 fault_type_v;
68 u32 engine_subid_v;
69 u32 client_v;
70 u32 fault_hi_v;
71 u32 fault_lo_v;
72 u64 inst_ptr;
73 const char *fault_type_desc;
74 const char *engine_subid_desc;
75 const char *client_desc;
76};
77
78struct fifo_engine_info_gk20a {
79 u32 sw_id;
80 const char *name;
81 u32 dev_info_id;
82 u32 engine_id;
83 u32 runlist_id;
84 u32 pbdma_id;
85 u32 mmu_fault_id;
86 u32 rc_mask;
87 struct fifo_pbdma_exception_info_gk20a pbdma_exception_info;
88 struct fifo_engine_exception_info_gk20a engine_exception_info;
89 struct fifo_mmu_fault_info_gk20a mmu_fault_info;
90
91};
92
93struct fifo_gk20a {
94 struct gk20a *g;
95 int num_channels;
96
97 int num_pbdma;
98 u32 *pbdma_map;
99
100 struct fifo_engine_info_gk20a *engine_info;
101 u32 max_engines;
102 u32 num_engines;
103
104 struct fifo_runlist_info_gk20a *runlist_info;
105 u32 max_runlists;
106
107 struct userd_desc userd;
108 u32 userd_entry_size;
109 u32 userd_total_size;
110
111 struct channel_gk20a *channel;
112 struct mutex ch_inuse_mutex; /* protect unused chid look up */
113
114 void (*remove_support)(struct fifo_gk20a *);
115 bool sw_ready;
116 struct {
117 /* share info between isrs and non-isr code */
118 struct {
119 struct mutex mutex;
120 } isr;
121 struct {
122 u32 device_fatal_0;
123 u32 channel_fatal_0;
124 u32 restartable_0;
125 } pbdma;
126 struct {
127
128 } engine;
129
130
131 } intr;
132
133 u32 mmu_fault_engines;
134 bool deferred_reset_pending;
135 struct mutex deferred_reset_mutex;
136
137 struct work_struct fault_restore_thread;
138};
139
140int gk20a_init_fifo_support(struct gk20a *g);
141
142void gk20a_fifo_isr(struct gk20a *g);
143void gk20a_fifo_nonstall_isr(struct gk20a *g);
144
145int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid);
146
147int gk20a_fifo_enable_engine_activity(struct gk20a *g,
148 struct fifo_engine_info_gk20a *eng_info);
149int gk20a_fifo_disable_engine_activity(struct gk20a *g,
150 struct fifo_engine_info_gk20a *eng_info,
151 bool wait_for_idle);
152
153int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 hw_chid,
154 bool add, bool wait_for_finish);
155
156int gk20a_fifo_suspend(struct gk20a *g);
157
158bool gk20a_fifo_mmu_fault_pending(struct gk20a *g);
159void gk20a_fifo_recover(struct gk20a *g, u32 engine_ids, bool verbose);
160int gk20a_init_fifo_reset_enable_hw(struct gk20a *g);
161
162void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
163 unsigned long fault_id);
164#endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
new file mode 100644
index 00000000..4cc500de
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -0,0 +1,1681 @@
1/*
2 * drivers/video/tegra/host/gk20a/gk20a.c
3 *
4 * GK20A Graphics
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#define CREATE_TRACE_POINTS
22#include <trace/events/gk20a.h>
23
24#include <linux/dma-mapping.h>
25#include <linux/highmem.h>
26#include <linux/string.h>
27#include <linux/cdev.h>
28#include <linux/delay.h>
29#include <linux/firmware.h>
30#include <linux/interrupt.h>
31#include <linux/irq.h>
32#include <linux/export.h>
33#include <linux/file.h>
34#include <linux/of.h>
35#include <linux/of_device.h>
36#include <linux/of_platform.h>
37#include <linux/pm_runtime.h>
38#include <linux/thermal.h>
39#include <asm/cacheflush.h>
40#include <linux/debugfs.h>
41#include <linux/spinlock.h>
42#include <linux/tegra-powergate.h>
43
44#include <linux/sched.h>
45#include <linux/input-cfboost.h>
46
47#include <mach/pm_domains.h>
48
49#include "gk20a.h"
50#include "debug_gk20a.h"
51#include "ctrl_gk20a.h"
52#include "hw_mc_gk20a.h"
53#include "hw_timer_gk20a.h"
54#include "hw_bus_gk20a.h"
55#include "hw_sim_gk20a.h"
56#include "hw_top_gk20a.h"
57#include "hw_ltc_gk20a.h"
58#include "gk20a_scale.h"
59#include "dbg_gpu_gk20a.h"
60#include "hal.h"
61
62#ifdef CONFIG_ARM64
63#define __cpuc_flush_dcache_area __flush_dcache_area
64#endif
65
66#define CLASS_NAME "nvidia-gpu"
67/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
68#define INTERFACE_NAME "nvhost%s-gpu"
69
70#define GK20A_NUM_CDEVS 5
71
72#if defined(GK20A_DEBUG)
73u32 gk20a_dbg_mask = GK20A_DEFAULT_DBG_MASK;
74u32 gk20a_dbg_ftrace;
75#endif
76
77static int gk20a_pm_finalize_poweron(struct device *dev);
78static int gk20a_pm_prepare_poweroff(struct device *dev);
79
80static inline void set_gk20a(struct platform_device *dev, struct gk20a *gk20a)
81{
82 gk20a_get_platform(dev)->g = gk20a;
83}
84
85static const struct file_operations gk20a_channel_ops = {
86 .owner = THIS_MODULE,
87 .release = gk20a_channel_release,
88 .open = gk20a_channel_open,
89#ifdef CONFIG_COMPAT
90 .compat_ioctl = gk20a_channel_ioctl,
91#endif
92 .unlocked_ioctl = gk20a_channel_ioctl,
93};
94
95static const struct file_operations gk20a_ctrl_ops = {
96 .owner = THIS_MODULE,
97 .release = gk20a_ctrl_dev_release,
98 .open = gk20a_ctrl_dev_open,
99 .unlocked_ioctl = gk20a_ctrl_dev_ioctl,
100#ifdef CONFIG_COMPAT
101 .compat_ioctl = gk20a_ctrl_dev_ioctl,
102#endif
103};
104
105static const struct file_operations gk20a_dbg_ops = {
106 .owner = THIS_MODULE,
107 .release = gk20a_dbg_gpu_dev_release,
108 .open = gk20a_dbg_gpu_dev_open,
109 .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
110 .poll = gk20a_dbg_gpu_dev_poll,
111#ifdef CONFIG_COMPAT
112 .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
113#endif
114};
115
116static const struct file_operations gk20a_as_ops = {
117 .owner = THIS_MODULE,
118 .release = gk20a_as_dev_release,
119 .open = gk20a_as_dev_open,
120#ifdef CONFIG_COMPAT
121 .compat_ioctl = gk20a_as_dev_ioctl,
122#endif
123 .unlocked_ioctl = gk20a_as_dev_ioctl,
124};
125
126/*
127 * Note: We use a different 'open' to trigger handling of the profiler session.
128 * Most of the code is shared between them... Though, at some point if the
129 * code does get too tangled trying to handle each in the same path we can
130 * separate them cleanly.
131 */
132static const struct file_operations gk20a_prof_ops = {
133 .owner = THIS_MODULE,
134 .release = gk20a_dbg_gpu_dev_release,
135 .open = gk20a_prof_gpu_dev_open,
136 .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
137 /* .mmap = gk20a_prof_gpu_dev_mmap,*/
138 /*int (*mmap) (struct file *, struct vm_area_struct *);*/
139 .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
140#ifdef CONFIG_COMPAT
141 .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
142#endif
143};
144
145static inline void sim_writel(struct gk20a *g, u32 r, u32 v)
146{
147 writel(v, g->sim.regs+r);
148}
149
150static inline u32 sim_readl(struct gk20a *g, u32 r)
151{
152 return readl(g->sim.regs+r);
153}
154
155static void kunmap_and_free_iopage(void **kvaddr, struct page **page)
156{
157 if (*kvaddr) {
158 kunmap(*kvaddr);
159 *kvaddr = 0;
160 }
161 if (*page) {
162 __free_page(*page);
163 *page = 0;
164 }
165}
166
167static void gk20a_free_sim_support(struct gk20a *g)
168{
169 /* free sim mappings, bfrs */
170 kunmap_and_free_iopage(&g->sim.send_bfr.kvaddr,
171 &g->sim.send_bfr.page);
172
173 kunmap_and_free_iopage(&g->sim.recv_bfr.kvaddr,
174 &g->sim.recv_bfr.page);
175
176 kunmap_and_free_iopage(&g->sim.msg_bfr.kvaddr,
177 &g->sim.msg_bfr.page);
178}
179
180static void gk20a_remove_sim_support(struct sim_gk20a *s)
181{
182 struct gk20a *g = s->g;
183 if (g->sim.regs)
184 sim_writel(g, sim_config_r(), sim_config_mode_disabled_v());
185 gk20a_free_sim_support(g);
186}
187
188static int alloc_and_kmap_iopage(struct device *d,
189 void **kvaddr,
190 phys_addr_t *phys,
191 struct page **page)
192{
193 int err = 0;
194 *page = alloc_page(GFP_KERNEL);
195
196 if (!*page) {
197 err = -ENOMEM;
198 dev_err(d, "couldn't allocate io page\n");
199 goto fail;
200 }
201
202 *kvaddr = kmap(*page);
203 if (!*kvaddr) {
204 err = -ENOMEM;
205 dev_err(d, "couldn't kmap io page\n");
206 goto fail;
207 }
208 *phys = page_to_phys(*page);
209 return 0;
210
211 fail:
212 kunmap_and_free_iopage(kvaddr, page);
213 return err;
214
215}
216
217static void __iomem *gk20a_ioremap_resource(struct platform_device *dev, int i,
218 struct resource **out)
219{
220 struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
221 if (!r)
222 return NULL;
223 if (out)
224 *out = r;
225 return devm_request_and_ioremap(&dev->dev, r);
226}
227
228/* TBD: strip from released */
229static int gk20a_init_sim_support(struct platform_device *dev)
230{
231 int err = 0;
232 struct gk20a *g = get_gk20a(dev);
233 struct device *d = &dev->dev;
234 phys_addr_t phys;
235
236 g->sim.g = g;
237 g->sim.regs = gk20a_ioremap_resource(dev, GK20A_SIM_IORESOURCE_MEM,
238 &g->sim.reg_mem);
239 if (!g->sim.regs) {
240 dev_err(d, "failed to remap gk20a sim regs\n");
241 err = -ENXIO;
242 goto fail;
243 }
244
245 /* allocate sim event/msg buffers */
246 err = alloc_and_kmap_iopage(d, &g->sim.send_bfr.kvaddr,
247 &g->sim.send_bfr.phys,
248 &g->sim.send_bfr.page);
249
250 err = err || alloc_and_kmap_iopage(d, &g->sim.recv_bfr.kvaddr,
251 &g->sim.recv_bfr.phys,
252 &g->sim.recv_bfr.page);
253
254 err = err || alloc_and_kmap_iopage(d, &g->sim.msg_bfr.kvaddr,
255 &g->sim.msg_bfr.phys,
256 &g->sim.msg_bfr.page);
257
258 if (!(g->sim.send_bfr.kvaddr && g->sim.recv_bfr.kvaddr &&
259 g->sim.msg_bfr.kvaddr)) {
260 dev_err(d, "couldn't allocate all sim buffers\n");
261 goto fail;
262 }
263
264 /*mark send ring invalid*/
265 sim_writel(g, sim_send_ring_r(), sim_send_ring_status_invalid_f());
266
267 /*read get pointer and make equal to put*/
268 g->sim.send_ring_put = sim_readl(g, sim_send_get_r());
269 sim_writel(g, sim_send_put_r(), g->sim.send_ring_put);
270
271 /*write send ring address and make it valid*/
272 /*TBD: work for >32b physmem*/
273 phys = g->sim.send_bfr.phys;
274 sim_writel(g, sim_send_ring_hi_r(), 0);
275 sim_writel(g, sim_send_ring_r(),
276 sim_send_ring_status_valid_f() |
277 sim_send_ring_target_phys_pci_coherent_f() |
278 sim_send_ring_size_4kb_f() |
279 sim_send_ring_addr_lo_f(phys >> PAGE_SHIFT));
280
281 /*repeat for recv ring (but swap put,get as roles are opposite) */
282 sim_writel(g, sim_recv_ring_r(), sim_recv_ring_status_invalid_f());
283
284 /*read put pointer and make equal to get*/
285 g->sim.recv_ring_get = sim_readl(g, sim_recv_put_r());
286 sim_writel(g, sim_recv_get_r(), g->sim.recv_ring_get);
287
288 /*write send ring address and make it valid*/
289 /*TBD: work for >32b physmem*/
290 phys = g->sim.recv_bfr.phys;
291 sim_writel(g, sim_recv_ring_hi_r(), 0);
292 sim_writel(g, sim_recv_ring_r(),
293 sim_recv_ring_status_valid_f() |
294 sim_recv_ring_target_phys_pci_coherent_f() |
295 sim_recv_ring_size_4kb_f() |
296 sim_recv_ring_addr_lo_f(phys >> PAGE_SHIFT));
297
298 g->sim.remove_support = gk20a_remove_sim_support;
299 return 0;
300
301 fail:
302 gk20a_free_sim_support(g);
303 return err;
304}
305
306static inline u32 sim_msg_header_size(void)
307{
308 return 24;/*TBD: fix the header to gt this from NV_VGPU_MSG_HEADER*/
309}
310
311static inline u32 *sim_msg_bfr(struct gk20a *g, u32 byte_offset)
312{
313 return (u32 *)(g->sim.msg_bfr.kvaddr + byte_offset);
314}
315
316static inline u32 *sim_msg_hdr(struct gk20a *g, u32 byte_offset)
317{
318 return sim_msg_bfr(g, byte_offset); /*starts at 0*/
319}
320
321static inline u32 *sim_msg_param(struct gk20a *g, u32 byte_offset)
322{
323 /*starts after msg header/cmn*/
324 return sim_msg_bfr(g, byte_offset + sim_msg_header_size());
325}
326
327static inline void sim_write_hdr(struct gk20a *g, u32 func, u32 size)
328{
329 /*memset(g->sim.msg_bfr.kvaddr,0,min(PAGE_SIZE,size));*/
330 *sim_msg_hdr(g, sim_msg_signature_r()) = sim_msg_signature_valid_v();
331 *sim_msg_hdr(g, sim_msg_result_r()) = sim_msg_result_rpc_pending_v();
332 *sim_msg_hdr(g, sim_msg_spare_r()) = sim_msg_spare__init_v();
333 *sim_msg_hdr(g, sim_msg_function_r()) = func;
334 *sim_msg_hdr(g, sim_msg_length_r()) = size + sim_msg_header_size();
335}
336
337static inline u32 sim_escape_read_hdr_size(void)
338{
339 return 12; /*TBD: fix NV_VGPU_SIM_ESCAPE_READ_HEADER*/
340}
341
342static u32 *sim_send_ring_bfr(struct gk20a *g, u32 byte_offset)
343{
344 return (u32 *)(g->sim.send_bfr.kvaddr + byte_offset);
345}
346
347static int rpc_send_message(struct gk20a *g)
348{
349 /* calculations done in units of u32s */
350 u32 send_base = sim_send_put_pointer_v(g->sim.send_ring_put) * 2;
351 u32 dma_offset = send_base + sim_dma_r()/sizeof(u32);
352 u32 dma_hi_offset = send_base + sim_dma_hi_r()/sizeof(u32);
353
354 *sim_send_ring_bfr(g, dma_offset*sizeof(u32)) =
355 sim_dma_target_phys_pci_coherent_f() |
356 sim_dma_status_valid_f() |
357 sim_dma_size_4kb_f() |
358 sim_dma_addr_lo_f(g->sim.msg_bfr.phys >> PAGE_SHIFT);
359
360 *sim_send_ring_bfr(g, dma_hi_offset*sizeof(u32)) = 0; /*TBD >32b phys*/
361
362 *sim_msg_hdr(g, sim_msg_sequence_r()) = g->sim.sequence_base++;
363
364 g->sim.send_ring_put = (g->sim.send_ring_put + 2 * sizeof(u32)) %
365 PAGE_SIZE;
366
367 __cpuc_flush_dcache_area(g->sim.msg_bfr.kvaddr, PAGE_SIZE);
368 __cpuc_flush_dcache_area(g->sim.send_bfr.kvaddr, PAGE_SIZE);
369 __cpuc_flush_dcache_area(g->sim.recv_bfr.kvaddr, PAGE_SIZE);
370
371 /* Update the put pointer. This will trap into the host. */
372 sim_writel(g, sim_send_put_r(), g->sim.send_ring_put);
373
374 return 0;
375}
376
377static inline u32 *sim_recv_ring_bfr(struct gk20a *g, u32 byte_offset)
378{
379 return (u32 *)(g->sim.recv_bfr.kvaddr + byte_offset);
380}
381
382static int rpc_recv_poll(struct gk20a *g)
383{
384 phys_addr_t recv_phys_addr;
385
386 /* XXX This read is not required (?) */
387 /*pVGpu->recv_ring_get = VGPU_REG_RD32(pGpu, NV_VGPU_RECV_GET);*/
388
389 /* Poll the recv ring get pointer in an infinite loop*/
390 do {
391 g->sim.recv_ring_put = sim_readl(g, sim_recv_put_r());
392 } while (g->sim.recv_ring_put == g->sim.recv_ring_get);
393
394 /* process all replies */
395 while (g->sim.recv_ring_put != g->sim.recv_ring_get) {
396 /* these are in u32 offsets*/
397 u32 dma_lo_offset =
398 sim_recv_put_pointer_v(g->sim.recv_ring_get)*2 + 0;
399 /*u32 dma_hi_offset = dma_lo_offset + 1;*/
400 u32 recv_phys_addr_lo = sim_dma_addr_lo_v(*sim_recv_ring_bfr(g, dma_lo_offset*4));
401
402 /*u32 recv_phys_addr_hi = sim_dma_hi_addr_v(
403 (phys_addr_t)sim_recv_ring_bfr(g,dma_hi_offset*4));*/
404
405 /*TBD >32b phys addr */
406 recv_phys_addr = recv_phys_addr_lo << PAGE_SHIFT;
407
408 if (recv_phys_addr != g->sim.msg_bfr.phys) {
409 dev_err(dev_from_gk20a(g), "%s Error in RPC reply\n",
410 __func__);
411 return -1;
412 }
413
414 /* Update GET pointer */
415 g->sim.recv_ring_get = (g->sim.recv_ring_get + 2*sizeof(u32)) %
416 PAGE_SIZE;
417
418 __cpuc_flush_dcache_area(g->sim.msg_bfr.kvaddr, PAGE_SIZE);
419 __cpuc_flush_dcache_area(g->sim.send_bfr.kvaddr, PAGE_SIZE);
420 __cpuc_flush_dcache_area(g->sim.recv_bfr.kvaddr, PAGE_SIZE);
421
422 sim_writel(g, sim_recv_get_r(), g->sim.recv_ring_get);
423
424 g->sim.recv_ring_put = sim_readl(g, sim_recv_put_r());
425 }
426
427 return 0;
428}
429
430static int issue_rpc_and_wait(struct gk20a *g)
431{
432 int err;
433
434 err = rpc_send_message(g);
435 if (err) {
436 dev_err(dev_from_gk20a(g), "%s failed rpc_send_message\n",
437 __func__);
438 return err;
439 }
440
441 err = rpc_recv_poll(g);
442 if (err) {
443 dev_err(dev_from_gk20a(g), "%s failed rpc_recv_poll\n",
444 __func__);
445 return err;
446 }
447
448 /* Now check if RPC really succeeded */
449 if (*sim_msg_hdr(g, sim_msg_result_r()) != sim_msg_result_success_v()) {
450 dev_err(dev_from_gk20a(g), "%s received failed status!\n",
451 __func__);
452 return -(*sim_msg_hdr(g, sim_msg_result_r()));
453 }
454 return 0;
455}
456
457int gk20a_sim_esc_read(struct gk20a *g, char *path, u32 index, u32 count, u32 *data)
458{
459 int err;
460 size_t pathlen = strlen(path);
461 u32 data_offset;
462
463 sim_write_hdr(g, sim_msg_function_sim_escape_read_v(),
464 sim_escape_read_hdr_size());
465 *sim_msg_param(g, 0) = index;
466 *sim_msg_param(g, 4) = count;
467 data_offset = roundup(0xc + pathlen + 1, sizeof(u32));
468 *sim_msg_param(g, 8) = data_offset;
469 strcpy((char *)sim_msg_param(g, 0xc), path);
470
471 err = issue_rpc_and_wait(g);
472
473 if (!err)
474 memcpy(data, sim_msg_param(g, data_offset), count);
475 return err;
476}
477
478static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id)
479{
480 struct gk20a *g = dev_id;
481 u32 mc_intr_0;
482
483 if (!g->power_on)
484 return IRQ_NONE;
485
486 /* not from gpu when sharing irq with others */
487 mc_intr_0 = gk20a_readl(g, mc_intr_0_r());
488 if (unlikely(!mc_intr_0))
489 return IRQ_NONE;
490
491 gk20a_writel(g, mc_intr_en_0_r(),
492 mc_intr_en_0_inta_disabled_f());
493
494 /* flush previous write */
495 gk20a_readl(g, mc_intr_en_0_r());
496
497 return IRQ_WAKE_THREAD;
498}
499
500static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id)
501{
502 struct gk20a *g = dev_id;
503 u32 mc_intr_1;
504
505 if (!g->power_on)
506 return IRQ_NONE;
507
508 /* not from gpu when sharing irq with others */
509 mc_intr_1 = gk20a_readl(g, mc_intr_1_r());
510 if (unlikely(!mc_intr_1))
511 return IRQ_NONE;
512
513 gk20a_writel(g, mc_intr_en_1_r(),
514 mc_intr_en_1_inta_disabled_f());
515
516 /* flush previous write */
517 gk20a_readl(g, mc_intr_en_1_r());
518
519 return IRQ_WAKE_THREAD;
520}
521
522static void gk20a_pbus_isr(struct gk20a *g)
523{
524 u32 val;
525 val = gk20a_readl(g, bus_intr_0_r());
526 if (val & (bus_intr_0_pri_squash_m() |
527 bus_intr_0_pri_fecserr_m() |
528 bus_intr_0_pri_timeout_m())) {
529 gk20a_err(dev_from_gk20a(g), "top_fs_status_r : 0x%x",
530 gk20a_readl(g, top_fs_status_r()));
531 gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
532 gk20a_readl(g, mc_enable_r()));
533 gk20a_err(&g->dev->dev,
534 "NV_PTIMER_PRI_TIMEOUT_SAVE_0: 0x%x\n",
535 gk20a_readl(g, timer_pri_timeout_save_0_r()));
536 gk20a_err(&g->dev->dev,
537 "NV_PTIMER_PRI_TIMEOUT_SAVE_1: 0x%x\n",
538 gk20a_readl(g, timer_pri_timeout_save_1_r()));
539 gk20a_err(&g->dev->dev,
540 "NV_PTIMER_PRI_TIMEOUT_FECS_ERRCODE: 0x%x\n",
541 gk20a_readl(g, timer_pri_timeout_fecs_errcode_r()));
542 }
543
544 if (val)
545 gk20a_err(&g->dev->dev,
546 "Unhandled pending pbus interrupt\n");
547
548 gk20a_writel(g, bus_intr_0_r(), val);
549}
550
551static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
552{
553 struct gk20a *g = dev_id;
554 u32 mc_intr_0;
555
556 gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
557
558 mc_intr_0 = gk20a_readl(g, mc_intr_0_r());
559
560 gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0);
561
562 if (mc_intr_0 & mc_intr_0_pgraph_pending_f())
563 gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g));
564 if (mc_intr_0 & mc_intr_0_pfifo_pending_f())
565 gk20a_fifo_isr(g);
566 if (mc_intr_0 & mc_intr_0_pmu_pending_f())
567 gk20a_pmu_isr(g);
568 if (mc_intr_0 & mc_intr_0_priv_ring_pending_f())
569 gk20a_priv_ring_isr(g);
570 if (mc_intr_0 & mc_intr_0_ltc_pending_f())
571 gk20a_mm_ltc_isr(g);
572 if (mc_intr_0 & mc_intr_0_pbus_pending_f())
573 gk20a_pbus_isr(g);
574
575 gk20a_writel(g, mc_intr_en_0_r(),
576 mc_intr_en_0_inta_hardware_f());
577
578 /* flush previous write */
579 gk20a_readl(g, mc_intr_en_0_r());
580
581 return IRQ_HANDLED;
582}
583
584static irqreturn_t gk20a_intr_thread_nonstall(int irq, void *dev_id)
585{
586 struct gk20a *g = dev_id;
587 u32 mc_intr_1;
588
589 gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
590
591 mc_intr_1 = gk20a_readl(g, mc_intr_1_r());
592
593 gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1);
594
595 if (mc_intr_1 & mc_intr_0_pfifo_pending_f())
596 gk20a_fifo_nonstall_isr(g);
597 if (mc_intr_1 & mc_intr_0_pgraph_pending_f())
598 gk20a_gr_nonstall_isr(g);
599
600 gk20a_writel(g, mc_intr_en_1_r(),
601 mc_intr_en_1_inta_hardware_f());
602
603 /* flush previous write */
604 gk20a_readl(g, mc_intr_en_1_r());
605
606 return IRQ_HANDLED;
607}
608
609static void gk20a_remove_support(struct platform_device *dev)
610{
611 struct gk20a *g = get_gk20a(dev);
612
613 /* pmu support should already be removed when driver turns off
614 gpu power rail in prepapre_poweroff */
615 if (g->gk20a_cdev.gk20a_cooling_dev)
616 thermal_cooling_device_unregister(g->gk20a_cdev.gk20a_cooling_dev);
617
618 if (g->gr.remove_support)
619 g->gr.remove_support(&g->gr);
620
621 if (g->fifo.remove_support)
622 g->fifo.remove_support(&g->fifo);
623
624 if (g->mm.remove_support)
625 g->mm.remove_support(&g->mm);
626
627 if (g->sim.remove_support)
628 g->sim.remove_support(&g->sim);
629
630 release_firmware(g->pmu_fw);
631
632 if (g->irq_requested) {
633 free_irq(g->irq_stall, g);
634 free_irq(g->irq_nonstall, g);
635 g->irq_requested = false;
636 }
637
638 /* free mappings to registers, etc*/
639
640 if (g->regs) {
641 iounmap(g->regs);
642 g->regs = 0;
643 }
644 if (g->bar1) {
645 iounmap(g->bar1);
646 g->bar1 = 0;
647 }
648}
649
650static int gk20a_init_support(struct platform_device *dev)
651{
652 int err = 0;
653 struct gk20a *g = get_gk20a(dev);
654
655 g->regs = gk20a_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM,
656 &g->reg_mem);
657 if (!g->regs) {
658 dev_err(dev_from_gk20a(g), "failed to remap gk20a registers\n");
659 err = -ENXIO;
660 goto fail;
661 }
662
663 g->bar1 = gk20a_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM,
664 &g->bar1_mem);
665 if (!g->bar1) {
666 dev_err(dev_from_gk20a(g), "failed to remap gk20a bar1\n");
667 err = -ENXIO;
668 goto fail;
669 }
670
671 /* Get interrupt numbers */
672 g->irq_stall = platform_get_irq(dev, 0);
673 g->irq_nonstall = platform_get_irq(dev, 1);
674 if (g->irq_stall < 0 || g->irq_nonstall < 0) {
675 err = -ENXIO;
676 goto fail;
677 }
678
679 if (tegra_cpu_is_asim()) {
680 err = gk20a_init_sim_support(dev);
681 if (err)
682 goto fail;
683 }
684
685 mutex_init(&g->dbg_sessions_lock);
686 mutex_init(&g->client_lock);
687
688 g->remove_support = gk20a_remove_support;
689 return 0;
690
691 fail:
692 gk20a_remove_support(dev);
693 return err;
694}
695
696static int gk20a_init_client(struct platform_device *dev)
697{
698 struct gk20a *g = get_gk20a(dev);
699 int err;
700
701 gk20a_dbg_fn("");
702
703#ifndef CONFIG_PM_RUNTIME
704 gk20a_pm_finalize_poweron(&dev->dev);
705#endif
706
707 err = gk20a_init_mm_setup_sw(g);
708 if (err)
709 return err;
710
711 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
712 gk20a_scale_hw_init(dev);
713 return 0;
714}
715
716static void gk20a_deinit_client(struct platform_device *dev)
717{
718 gk20a_dbg_fn("");
719#ifndef CONFIG_PM_RUNTIME
720 gk20a_pm_prepare_poweroff(&dev->dev);
721#endif
722}
723
724int gk20a_get_client(struct gk20a *g)
725{
726 int err = 0;
727
728 mutex_lock(&g->client_lock);
729 if (g->client_refcount == 0)
730 err = gk20a_init_client(g->dev);
731 if (!err)
732 g->client_refcount++;
733 mutex_unlock(&g->client_lock);
734 return err;
735}
736
737void gk20a_put_client(struct gk20a *g)
738{
739 mutex_lock(&g->client_lock);
740 if (g->client_refcount == 1)
741 gk20a_deinit_client(g->dev);
742 g->client_refcount--;
743 mutex_unlock(&g->client_lock);
744 WARN_ON(g->client_refcount < 0);
745}
746
747static int gk20a_pm_prepare_poweroff(struct device *_dev)
748{
749 struct platform_device *dev = to_platform_device(_dev);
750 struct gk20a *g = get_gk20a(dev);
751 int ret = 0;
752
753 gk20a_dbg_fn("");
754
755 if (!g->power_on)
756 return 0;
757
758 ret |= gk20a_channel_suspend(g);
759
760 /* disable elpg before gr or fifo suspend */
761 ret |= gk20a_pmu_destroy(g);
762 ret |= gk20a_gr_suspend(g);
763 ret |= gk20a_mm_suspend(g);
764 ret |= gk20a_fifo_suspend(g);
765
766 /*
767 * After this point, gk20a interrupts should not get
768 * serviced.
769 */
770 if (g->irq_requested) {
771 free_irq(g->irq_stall, g);
772 free_irq(g->irq_nonstall, g);
773 g->irq_requested = false;
774 }
775
776 /* Disable GPCPLL */
777 ret |= gk20a_suspend_clk_support(g);
778 g->power_on = false;
779
780 return ret;
781}
782
783static void gk20a_detect_chip(struct gk20a *g)
784{
785 struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics;
786
787 u32 mc_boot_0_value = gk20a_readl(g, mc_boot_0_r());
788 gpu->arch = mc_boot_0_architecture_v(mc_boot_0_value) <<
789 NVHOST_GPU_ARCHITECTURE_SHIFT;
790 gpu->impl = mc_boot_0_implementation_v(mc_boot_0_value);
791 gpu->rev =
792 (mc_boot_0_major_revision_v(mc_boot_0_value) << 4) |
793 mc_boot_0_minor_revision_v(mc_boot_0_value);
794
795 gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
796 g->gpu_characteristics.arch,
797 g->gpu_characteristics.impl,
798 g->gpu_characteristics.rev);
799}
800
801static int gk20a_pm_finalize_poweron(struct device *_dev)
802{
803 struct platform_device *dev = to_platform_device(_dev);
804 struct gk20a *g = get_gk20a(dev);
805 int err, nice_value;
806
807 gk20a_dbg_fn("");
808
809 if (g->power_on)
810 return 0;
811
812 nice_value = task_nice(current);
813 set_user_nice(current, -20);
814
815 if (!g->irq_requested) {
816 err = request_threaded_irq(g->irq_stall,
817 gk20a_intr_isr_stall,
818 gk20a_intr_thread_stall,
819 0, "gk20a_stall", g);
820 if (err) {
821 dev_err(dev_from_gk20a(g),
822 "failed to request stall intr irq @ %lld\n",
823 (u64)g->irq_stall);
824 goto done;
825 }
826 err = request_threaded_irq(g->irq_nonstall,
827 gk20a_intr_isr_nonstall,
828 gk20a_intr_thread_nonstall,
829 0, "gk20a_nonstall", g);
830 if (err) {
831 dev_err(dev_from_gk20a(g),
832 "failed to request non-stall intr irq @ %lld\n",
833 (u64)g->irq_nonstall);
834 goto done;
835 }
836 g->irq_requested = true;
837 }
838
839 g->power_on = true;
840
841 gk20a_writel(g, mc_intr_mask_1_r(),
842 mc_intr_0_pfifo_pending_f()
843 | mc_intr_0_pgraph_pending_f());
844 gk20a_writel(g, mc_intr_en_1_r(),
845 mc_intr_en_1_inta_hardware_f());
846
847 gk20a_writel(g, mc_intr_mask_0_r(),
848 mc_intr_0_pgraph_pending_f()
849 | mc_intr_0_pfifo_pending_f()
850 | mc_intr_0_priv_ring_pending_f()
851 | mc_intr_0_ltc_pending_f()
852 | mc_intr_0_pbus_pending_f());
853 gk20a_writel(g, mc_intr_en_0_r(),
854 mc_intr_en_0_inta_hardware_f());
855
856 if (!tegra_platform_is_silicon())
857 gk20a_writel(g, bus_intr_en_0_r(), 0x0);
858 else
859 gk20a_writel(g, bus_intr_en_0_r(),
860 bus_intr_en_0_pri_squash_m() |
861 bus_intr_en_0_pri_fecserr_m() |
862 bus_intr_en_0_pri_timeout_m());
863 gk20a_reset_priv_ring(g);
864
865 gk20a_detect_chip(g);
866 err = gpu_init_hal(g);
867 if (err)
868 goto done;
869
870 /* TBD: move this after graphics init in which blcg/slcg is enabled.
871 This function removes SlowdownOnBoot which applies 32x divider
872 on gpcpll bypass path. The purpose of slowdown is to save power
873 during boot but it also significantly slows down gk20a init on
874 simulation and emulation. We should remove SOB after graphics power
875 saving features (blcg/slcg) are enabled. For now, do it here. */
876 err = gk20a_init_clk_support(g);
877 if (err) {
878 gk20a_err(&dev->dev, "failed to init gk20a clk");
879 goto done;
880 }
881
882 /* enable pri timeout only on silicon */
883 if (tegra_platform_is_silicon()) {
884 gk20a_writel(g,
885 timer_pri_timeout_r(),
886 timer_pri_timeout_period_f(0x186A0) |
887 timer_pri_timeout_en_en_enabled_f());
888 } else {
889 gk20a_writel(g,
890 timer_pri_timeout_r(),
891 timer_pri_timeout_period_f(0x186A0) |
892 timer_pri_timeout_en_en_disabled_f());
893 }
894
895 err = gk20a_init_fifo_reset_enable_hw(g);
896 if (err) {
897 gk20a_err(&dev->dev, "failed to reset gk20a fifo");
898 goto done;
899 }
900
901 err = gk20a_init_mm_support(g);
902 if (err) {
903 gk20a_err(&dev->dev, "failed to init gk20a mm");
904 goto done;
905 }
906
907 err = gk20a_init_pmu_support(g);
908 if (err) {
909 gk20a_err(&dev->dev, "failed to init gk20a pmu");
910 goto done;
911 }
912
913 err = gk20a_init_fifo_support(g);
914 if (err) {
915 gk20a_err(&dev->dev, "failed to init gk20a fifo");
916 goto done;
917 }
918
919 err = gk20a_init_gr_support(g);
920 if (err) {
921 gk20a_err(&dev->dev, "failed to init gk20a gr");
922 goto done;
923 }
924
925 err = gk20a_init_pmu_setup_hw2(g);
926 if (err) {
927 gk20a_err(&dev->dev, "failed to init gk20a pmu_hw2");
928 goto done;
929 }
930
931 err = gk20a_init_therm_support(g);
932 if (err) {
933 gk20a_err(&dev->dev, "failed to init gk20a therm");
934 goto done;
935 }
936
937 err = gk20a_init_gpu_characteristics(g);
938 if (err) {
939 gk20a_err(&dev->dev, "failed to init gk20a gpu characteristics");
940 goto done;
941 }
942
943 gk20a_channel_resume(g);
944 set_user_nice(current, nice_value);
945
946done:
947 return err;
948}
949
950static struct of_device_id tegra_gk20a_of_match[] = {
951#ifdef CONFIG_TEGRA_GK20A
952 { .compatible = "nvidia,tegra124-gk20a",
953 .data = &gk20a_tegra_platform },
954#endif
955 { .compatible = "nvidia,generic-gk20a",
956 .data = &gk20a_generic_platform },
957 { },
958};
959
960int tegra_gpu_get_max_state(struct thermal_cooling_device *cdev,
961 unsigned long *max_state)
962{
963 struct cooling_device_gk20a *gk20a_gpufreq_device = cdev->devdata;
964
965 *max_state = gk20a_gpufreq_device->gk20a_freq_table_size - 1;
966 return 0;
967}
968
969int tegra_gpu_get_cur_state(struct thermal_cooling_device *cdev,
970 unsigned long *cur_state)
971{
972 struct cooling_device_gk20a *gk20a_gpufreq_device = cdev->devdata;
973
974 *cur_state = gk20a_gpufreq_device->gk20a_freq_state;
975 return 0;
976}
977
978int tegra_gpu_set_cur_state(struct thermal_cooling_device *c_dev,
979 unsigned long cur_state)
980{
981 u32 target_freq;
982 struct gk20a *g;
983 struct gpufreq_table_data *gpu_cooling_table;
984 struct cooling_device_gk20a *gk20a_gpufreq_device = c_dev->devdata;
985
986 BUG_ON(cur_state >= gk20a_gpufreq_device->gk20a_freq_table_size);
987
988 g = container_of(gk20a_gpufreq_device, struct gk20a, gk20a_cdev);
989
990 gpu_cooling_table = tegra_gpufreq_table_get();
991 target_freq = gpu_cooling_table[cur_state].frequency;
992
993 /* ensure a query for state will get the proper value */
994 gk20a_gpufreq_device->gk20a_freq_state = cur_state;
995
996 gk20a_clk_set_rate(g, target_freq);
997
998 return 0;
999}
1000
1001static struct thermal_cooling_device_ops tegra_gpu_cooling_ops = {
1002 .get_max_state = tegra_gpu_get_max_state,
1003 .get_cur_state = tegra_gpu_get_cur_state,
1004 .set_cur_state = tegra_gpu_set_cur_state,
1005};
1006
1007static int gk20a_create_device(
1008 struct platform_device *pdev, int devno, const char *cdev_name,
1009 struct cdev *cdev, struct device **out,
1010 const struct file_operations *ops)
1011{
1012 struct device *dev;
1013 int err;
1014 struct gk20a *g = get_gk20a(pdev);
1015
1016 gk20a_dbg_fn("");
1017
1018 cdev_init(cdev, ops);
1019 cdev->owner = THIS_MODULE;
1020
1021 err = cdev_add(cdev, devno, 1);
1022 if (err) {
1023 dev_err(&pdev->dev,
1024 "failed to add %s cdev\n", cdev_name);
1025 return err;
1026 }
1027
1028 dev = device_create(g->class, NULL, devno, NULL,
1029 (pdev->id <= 0) ? INTERFACE_NAME : INTERFACE_NAME ".%d",
1030 cdev_name, pdev->id);
1031
1032 if (IS_ERR(dev)) {
1033 err = PTR_ERR(dev);
1034 cdev_del(cdev);
1035 dev_err(&pdev->dev,
1036 "failed to create %s device for %s\n",
1037 cdev_name, pdev->name);
1038 return err;
1039 }
1040
1041 *out = dev;
1042 return 0;
1043}
1044
1045static void gk20a_user_deinit(struct platform_device *dev)
1046{
1047 struct gk20a *g = get_gk20a(dev);
1048
1049 if (g->channel.node) {
1050 device_destroy(g->class, g->channel.cdev.dev);
1051 cdev_del(&g->channel.cdev);
1052 }
1053
1054 if (g->as.node) {
1055 device_destroy(g->class, g->as.cdev.dev);
1056 cdev_del(&g->as.cdev);
1057 }
1058
1059 if (g->ctrl.node) {
1060 device_destroy(g->class, g->ctrl.cdev.dev);
1061 cdev_del(&g->ctrl.cdev);
1062 }
1063
1064 if (g->dbg.node) {
1065 device_destroy(g->class, g->dbg.cdev.dev);
1066 cdev_del(&g->dbg.cdev);
1067 }
1068
1069 if (g->prof.node) {
1070 device_destroy(g->class, g->prof.cdev.dev);
1071 cdev_del(&g->prof.cdev);
1072 }
1073
1074 if (g->cdev_region)
1075 unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS);
1076
1077 if (g->class)
1078 class_destroy(g->class);
1079}
1080
1081static int gk20a_user_init(struct platform_device *dev)
1082{
1083 int err;
1084 dev_t devno;
1085 struct gk20a *g = get_gk20a(dev);
1086
1087 g->class = class_create(THIS_MODULE, CLASS_NAME);
1088 if (IS_ERR(g->class)) {
1089 err = PTR_ERR(g->class);
1090 g->class = NULL;
1091 dev_err(&dev->dev,
1092 "failed to create " CLASS_NAME " class\n");
1093 goto fail;
1094 }
1095
1096 err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, CLASS_NAME);
1097 if (err) {
1098 dev_err(&dev->dev, "failed to allocate devno\n");
1099 goto fail;
1100 }
1101 g->cdev_region = devno;
1102
1103 err = gk20a_create_device(dev, devno++, "",
1104 &g->channel.cdev, &g->channel.node,
1105 &gk20a_channel_ops);
1106 if (err)
1107 goto fail;
1108
1109 err = gk20a_create_device(dev, devno++, "-as",
1110 &g->as.cdev, &g->as.node,
1111 &gk20a_as_ops);
1112 if (err)
1113 goto fail;
1114
1115 err = gk20a_create_device(dev, devno++, "-ctrl",
1116 &g->ctrl.cdev, &g->ctrl.node,
1117 &gk20a_ctrl_ops);
1118 if (err)
1119 goto fail;
1120
1121 err = gk20a_create_device(dev, devno++, "-dbg",
1122 &g->dbg.cdev, &g->dbg.node,
1123 &gk20a_dbg_ops);
1124 if (err)
1125 goto fail;
1126
1127 err = gk20a_create_device(dev, devno++, "-prof",
1128 &g->prof.cdev, &g->prof.node,
1129 &gk20a_prof_ops);
1130 if (err)
1131 goto fail;
1132
1133 return 0;
1134fail:
1135 gk20a_user_deinit(dev);
1136 return err;
1137}
1138
1139struct channel_gk20a *gk20a_get_channel_from_file(int fd)
1140{
1141 struct channel_gk20a *ch;
1142 struct file *f = fget(fd);
1143 if (!f)
1144 return 0;
1145
1146 if (f->f_op != &gk20a_channel_ops) {
1147 fput(f);
1148 return 0;
1149 }
1150
1151 ch = (struct channel_gk20a *)f->private_data;
1152 fput(f);
1153 return ch;
1154}
1155
1156static int gk20a_pm_enable_clk(struct device *dev)
1157{
1158 int index = 0;
1159 struct gk20a_platform *platform;
1160
1161 platform = dev_get_drvdata(dev);
1162 if (!platform)
1163 return -EINVAL;
1164
1165 for (index = 0; index < platform->num_clks; index++) {
1166 int err = clk_prepare_enable(platform->clk[index]);
1167 if (err)
1168 return -EINVAL;
1169 }
1170
1171 return 0;
1172}
1173
1174static int gk20a_pm_disable_clk(struct device *dev)
1175{
1176 int index = 0;
1177 struct gk20a_platform *platform;
1178
1179 platform = dev_get_drvdata(dev);
1180 if (!platform)
1181 return -EINVAL;
1182
1183 for (index = 0; index < platform->num_clks; index++)
1184 clk_disable_unprepare(platform->clk[index]);
1185
1186 return 0;
1187}
1188
1189#ifdef CONFIG_PM
1190const struct dev_pm_ops gk20a_pm_ops = {
1191#if defined(CONFIG_PM_RUNTIME) && !defined(CONFIG_PM_GENERIC_DOMAINS)
1192 .runtime_resume = gk20a_pm_enable_clk,
1193 .runtime_suspend = gk20a_pm_disable_clk,
1194#endif
1195};
1196#endif
1197
1198static int gk20a_pm_railgate(struct generic_pm_domain *domain)
1199{
1200 struct gk20a *g = container_of(domain, struct gk20a, pd);
1201 struct gk20a_platform *platform = platform_get_drvdata(g->dev);
1202 int ret = 0;
1203
1204 if (platform->railgate)
1205 ret = platform->railgate(platform->g->dev);
1206
1207 return ret;
1208}
1209
1210static int gk20a_pm_unrailgate(struct generic_pm_domain *domain)
1211{
1212 struct gk20a *g = container_of(domain, struct gk20a, pd);
1213 struct gk20a_platform *platform = platform_get_drvdata(g->dev);
1214 int ret = 0;
1215
1216 if (platform->unrailgate)
1217 ret = platform->unrailgate(platform->g->dev);
1218
1219 return ret;
1220}
1221
1222static int gk20a_pm_suspend(struct device *dev)
1223{
1224 struct gk20a_platform *platform = dev_get_drvdata(dev);
1225 int ret = 0;
1226
1227 if (atomic_read(&dev->power.usage_count) > 1)
1228 return -EBUSY;
1229
1230 ret = gk20a_pm_prepare_poweroff(dev);
1231 if (ret)
1232 return ret;
1233
1234 gk20a_scale_suspend(to_platform_device(dev));
1235
1236 if (platform->suspend)
1237 platform->suspend(dev);
1238
1239 return 0;
1240}
1241
1242static int gk20a_pm_resume(struct device *dev)
1243{
1244 int ret = 0;
1245
1246 ret = gk20a_pm_finalize_poweron(dev);
1247 if (ret)
1248 return ret;
1249
1250 gk20a_scale_resume(to_platform_device(dev));
1251
1252 return 0;
1253}
1254
1255static int gk20a_pm_initialise_domain(struct platform_device *pdev)
1256{
1257 struct gk20a_platform *platform = platform_get_drvdata(pdev);
1258 struct dev_power_governor *pm_domain_gov = NULL;
1259 struct generic_pm_domain *domain = &platform->g->pd;
1260 int ret = 0;
1261
1262 domain->name = kstrdup(pdev->name, GFP_KERNEL);
1263
1264 if (!platform->can_railgate)
1265 pm_domain_gov = &pm_domain_always_on_gov;
1266
1267 pm_genpd_init(domain, pm_domain_gov, true);
1268
1269 domain->power_off = gk20a_pm_railgate;
1270 domain->power_on = gk20a_pm_unrailgate;
1271 domain->dev_ops.start = gk20a_pm_enable_clk;
1272 domain->dev_ops.stop = gk20a_pm_disable_clk;
1273 domain->dev_ops.save_state = gk20a_pm_prepare_poweroff;
1274 domain->dev_ops.restore_state = gk20a_pm_finalize_poweron;
1275 domain->dev_ops.suspend = gk20a_pm_suspend;
1276 domain->dev_ops.resume = gk20a_pm_resume;
1277
1278 device_set_wakeup_capable(&pdev->dev, 0);
1279 ret = pm_genpd_add_device(domain, &pdev->dev);
1280
1281 if (platform->railgate_delay)
1282 pm_genpd_set_poweroff_delay(domain, platform->railgate_delay);
1283
1284 return ret;
1285}
1286
1287static int gk20a_pm_init(struct platform_device *dev)
1288{
1289 struct gk20a_platform *platform = platform_get_drvdata(dev);
1290 int err = 0;
1291
1292 /* Initialise pm runtime */
1293 if (platform->clockgate_delay) {
1294 pm_runtime_set_autosuspend_delay(&dev->dev,
1295 platform->clockgate_delay);
1296 pm_runtime_use_autosuspend(&dev->dev);
1297 }
1298
1299 pm_runtime_enable(&dev->dev);
1300 if (!pm_runtime_enabled(&dev->dev))
1301 gk20a_pm_enable_clk(&dev->dev);
1302
1303 /* Enable runtime railgating if possible. If not,
1304 * turn on the rail now. */
1305 if (platform->can_railgate && IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
1306 platform->railgate(dev);
1307 else
1308 platform->unrailgate(dev);
1309
1310 /* genpd will take care of runtime power management if it is enabled */
1311 if (IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
1312 err = gk20a_pm_initialise_domain(dev);
1313
1314 return err;
1315}
1316
1317static int gk20a_probe(struct platform_device *dev)
1318{
1319 struct gk20a *gk20a;
1320 int err;
1321 struct gk20a_platform *platform = NULL;
1322 struct cooling_device_gk20a *gpu_cdev = NULL;
1323
1324 if (dev->dev.of_node) {
1325 const struct of_device_id *match;
1326
1327 match = of_match_device(tegra_gk20a_of_match, &dev->dev);
1328 if (match)
1329 platform = (struct gk20a_platform *)match->data;
1330 } else
1331 platform = (struct gk20a_platform *)dev->dev.platform_data;
1332
1333 if (!platform) {
1334 dev_err(&dev->dev, "no platform data\n");
1335 return -ENODATA;
1336 }
1337
1338 gk20a_dbg_fn("");
1339
1340 platform_set_drvdata(dev, platform);
1341
1342 gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
1343 if (!gk20a) {
1344 dev_err(&dev->dev, "couldn't allocate gk20a support");
1345 return -ENOMEM;
1346 }
1347
1348 set_gk20a(dev, gk20a);
1349 gk20a->dev = dev;
1350
1351 err = gk20a_user_init(dev);
1352 if (err)
1353 return err;
1354
1355 gk20a_init_support(dev);
1356
1357 spin_lock_init(&gk20a->mc_enable_lock);
1358
1359 /* Initialize the platform interface. */
1360 err = platform->probe(dev);
1361 if (err) {
1362 dev_err(&dev->dev, "platform probe failed");
1363 return err;
1364 }
1365
1366 err = gk20a_pm_init(dev);
1367 if (err) {
1368 dev_err(&dev->dev, "pm init failed");
1369 return err;
1370 }
1371
1372 /* Initialise scaling */
1373 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
1374 gk20a_scale_init(dev);
1375
1376 if (platform->late_probe) {
1377 err = platform->late_probe(dev);
1378 if (err) {
1379 dev_err(&dev->dev, "late probe failed");
1380 return err;
1381 }
1382 }
1383
1384 gk20a_debug_init(dev);
1385
1386 /* Set DMA parameters to allow larger sgt lists */
1387 dev->dev.dma_parms = &gk20a->dma_parms;
1388 dma_set_max_seg_size(&dev->dev, UINT_MAX);
1389
1390 gpu_cdev = &gk20a->gk20a_cdev;
1391 gpu_cdev->gk20a_freq_table_size = tegra_gpufreq_table_size_get();
1392 gpu_cdev->gk20a_freq_state = 0;
1393 gpu_cdev->g = gk20a;
1394 gpu_cdev->gk20a_cooling_dev = thermal_cooling_device_register("gk20a_cdev", gpu_cdev,
1395 &tegra_gpu_cooling_ops);
1396
1397 gk20a->gr_idle_timeout_default =
1398 CONFIG_GK20A_DEFAULT_TIMEOUT;
1399 gk20a->timeouts_enabled = true;
1400
1401 /* Set up initial clock gating settings */
1402 if (tegra_platform_is_silicon()) {
1403 gk20a->slcg_enabled = true;
1404 gk20a->blcg_enabled = true;
1405 gk20a->elcg_enabled = true;
1406 gk20a->elpg_enabled = true;
1407 gk20a->aelpg_enabled = true;
1408 }
1409
1410 gk20a_create_sysfs(dev);
1411
1412#ifdef CONFIG_DEBUG_FS
1413 clk_gk20a_debugfs_init(dev);
1414
1415 spin_lock_init(&gk20a->debugfs_lock);
1416 gk20a->mm.ltc_enabled = true;
1417 gk20a->mm.ltc_enabled_debug = true;
1418 gk20a->debugfs_ltc_enabled =
1419 debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
1420 platform->debugfs,
1421 &gk20a->mm.ltc_enabled_debug);
1422 gk20a->mm.ltc_enabled_debug = true;
1423 gk20a->debugfs_gr_idle_timeout_default =
1424 debugfs_create_u32("gr_idle_timeout_default_us",
1425 S_IRUGO|S_IWUSR, platform->debugfs,
1426 &gk20a->gr_idle_timeout_default);
1427 gk20a->debugfs_timeouts_enabled =
1428 debugfs_create_bool("timeouts_enabled",
1429 S_IRUGO|S_IWUSR,
1430 platform->debugfs,
1431 &gk20a->timeouts_enabled);
1432 gk20a_pmu_debugfs_init(dev);
1433#endif
1434
1435#ifdef CONFIG_INPUT_CFBOOST
1436 cfb_add_device(&dev->dev);
1437#endif
1438
1439 return 0;
1440}
1441
1442static int __exit gk20a_remove(struct platform_device *dev)
1443{
1444 struct gk20a *g = get_gk20a(dev);
1445 gk20a_dbg_fn("");
1446
1447#ifdef CONFIG_INPUT_CFBOOST
1448 cfb_remove_device(&dev->dev);
1449#endif
1450
1451 if (g->remove_support)
1452 g->remove_support(dev);
1453
1454 gk20a_user_deinit(dev);
1455
1456 set_gk20a(dev, 0);
1457#ifdef CONFIG_DEBUG_FS
1458 debugfs_remove(g->debugfs_ltc_enabled);
1459 debugfs_remove(g->debugfs_gr_idle_timeout_default);
1460 debugfs_remove(g->debugfs_timeouts_enabled);
1461#endif
1462
1463 kfree(g);
1464
1465#ifdef CONFIG_PM_RUNTIME
1466 pm_runtime_put(&dev->dev);
1467 pm_runtime_disable(&dev->dev);
1468#else
1469 nvhost_module_disable_clk(&dev->dev);
1470#endif
1471
1472 return 0;
1473}
1474
1475static struct platform_driver gk20a_driver = {
1476 .probe = gk20a_probe,
1477 .remove = __exit_p(gk20a_remove),
1478 .driver = {
1479 .owner = THIS_MODULE,
1480 .name = "gk20a",
1481#ifdef CONFIG_OF
1482 .of_match_table = tegra_gk20a_of_match,
1483#endif
1484#ifdef CONFIG_PM
1485 .pm = &gk20a_pm_ops,
1486#endif
1487 }
1488};
1489
1490static int __init gk20a_init(void)
1491{
1492 return platform_driver_register(&gk20a_driver);
1493}
1494
1495static void __exit gk20a_exit(void)
1496{
1497 platform_driver_unregister(&gk20a_driver);
1498}
1499
1500bool is_gk20a_module(struct platform_device *dev)
1501{
1502 return &gk20a_driver.driver == dev->dev.driver;
1503}
1504
1505void gk20a_busy_noresume(struct platform_device *pdev)
1506{
1507 pm_runtime_get_noresume(&pdev->dev);
1508}
1509
1510int gk20a_channel_busy(struct platform_device *pdev)
1511{
1512 int ret = 0;
1513
1514 ret = gk20a_platform_channel_busy(pdev);
1515 if (ret)
1516 return ret;
1517
1518 ret = gk20a_busy(pdev);
1519 if (ret)
1520 gk20a_platform_channel_idle(pdev);
1521
1522 return ret;
1523}
1524
1525void gk20a_channel_idle(struct platform_device *pdev)
1526{
1527 gk20a_idle(pdev);
1528 gk20a_platform_channel_idle(pdev);
1529}
1530
1531int gk20a_busy(struct platform_device *pdev)
1532{
1533 int ret = 0;
1534
1535#ifdef CONFIG_PM_RUNTIME
1536 ret = pm_runtime_get_sync(&pdev->dev);
1537#endif
1538 gk20a_scale_notify_busy(pdev);
1539
1540 return ret < 0 ? ret : 0;
1541}
1542
1543void gk20a_idle(struct platform_device *pdev)
1544{
1545#ifdef CONFIG_PM_RUNTIME
1546 if (atomic_read(&pdev->dev.power.usage_count) == 1)
1547 gk20a_scale_notify_idle(pdev);
1548 pm_runtime_mark_last_busy(&pdev->dev);
1549 pm_runtime_put_sync_autosuspend(&pdev->dev);
1550#else
1551 gk20a_scale_notify_idle(pdev);
1552#endif
1553}
1554
1555void gk20a_disable(struct gk20a *g, u32 units)
1556{
1557 u32 pmc;
1558
1559 gk20a_dbg(gpu_dbg_info, "pmc disable: %08x\n", units);
1560
1561 spin_lock(&g->mc_enable_lock);
1562 pmc = gk20a_readl(g, mc_enable_r());
1563 pmc &= ~units;
1564 gk20a_writel(g, mc_enable_r(), pmc);
1565 spin_unlock(&g->mc_enable_lock);
1566}
1567
1568void gk20a_enable(struct gk20a *g, u32 units)
1569{
1570 u32 pmc;
1571
1572 gk20a_dbg(gpu_dbg_info, "pmc enable: %08x\n", units);
1573
1574 spin_lock(&g->mc_enable_lock);
1575 pmc = gk20a_readl(g, mc_enable_r());
1576 pmc |= units;
1577 gk20a_writel(g, mc_enable_r(), pmc);
1578 spin_unlock(&g->mc_enable_lock);
1579 gk20a_readl(g, mc_enable_r());
1580
1581 udelay(20);
1582}
1583
1584void gk20a_reset(struct gk20a *g, u32 units)
1585{
1586 gk20a_disable(g, units);
1587 udelay(20);
1588 gk20a_enable(g, units);
1589}
1590
1591int gk20a_init_gpu_characteristics(struct gk20a *g)
1592{
1593 struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics;
1594
1595 gpu->L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g);
1596 gpu->on_board_video_memory_size = 0; /* integrated GPU */
1597
1598 gpu->num_gpc = g->gr.gpc_count;
1599 gpu->num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count;
1600
1601 gpu->bus_type = NVHOST_GPU_BUS_TYPE_AXI; /* always AXI for now */
1602
1603 gpu->big_page_size = g->mm.big_page_size;
1604 gpu->compression_page_size = g->mm.compression_page_size;
1605
1606 return 0;
1607}
1608
1609int nvhost_vpr_info_fetch(void)
1610{
1611 struct gk20a *g = get_gk20a(to_platform_device(
1612 bus_find_device_by_name(&platform_bus_type,
1613 NULL, "gk20a.0")));
1614
1615 if (!g) {
1616 pr_info("gk20a ins't ready yet\n");
1617 return 0;
1618 }
1619
1620 return gk20a_mm_mmu_vpr_info_fetch(g);
1621}
1622
1623static const struct firmware *
1624do_request_firmware(struct device *dev, const char *prefix, const char *fw_name)
1625{
1626 const struct firmware *fw;
1627 char *fw_path = NULL;
1628 int path_len, err;
1629
1630 if (prefix) {
1631 path_len = strlen(prefix) + strlen(fw_name);
1632 path_len += 2; /* for the path separator and zero terminator*/
1633
1634 fw_path = kzalloc(sizeof(*fw_path) * path_len, GFP_KERNEL);
1635 if (!fw_path)
1636 return NULL;
1637
1638 sprintf(fw_path, "%s/%s", prefix, fw_name);
1639 fw_name = fw_path;
1640 }
1641
1642 err = request_firmware(&fw, fw_name, dev);
1643 kfree(fw_path);
1644 if (err)
1645 return NULL;
1646 return fw;
1647}
1648
1649/* This is a simple wrapper around request_firmware that takes 'fw_name' and
1650 * applies an IP specific relative path prefix to it. The caller is
1651 * responsible for calling release_firmware later. */
1652const struct firmware *
1653gk20a_request_firmware(struct gk20a *g, const char *fw_name)
1654{
1655 struct device *dev = &g->dev->dev;
1656 const struct firmware *fw;
1657
1658 /* current->fs is NULL when calling from SYS_EXIT.
1659 Add a check here to prevent crash in request_firmware */
1660 if (!current->fs || !fw_name)
1661 return NULL;
1662
1663 BUG_ON(!g->ops.name);
1664 fw = do_request_firmware(dev, g->ops.name, fw_name);
1665
1666#ifdef CONFIG_TEGRA_GK20A
1667 /* TO BE REMOVED - Support loading from legacy SOC specific path. */
1668 if (!fw)
1669 fw = nvhost_client_request_firmware(g->dev, fw_name);
1670#endif
1671
1672 if (!fw) {
1673 dev_err(dev, "failed to get firmware\n");
1674 return NULL;
1675 }
1676
1677 return fw;
1678}
1679
1680module_init(gk20a_init);
1681module_exit(gk20a_exit);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
new file mode 100644
index 00000000..a9081a9d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -0,0 +1,559 @@
1/*
2 * drivers/video/tegra/host/gk20a/gk20a.h
3 *
4 * GK20A Graphics
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21#ifndef _NVHOST_GK20A_H_
22#define _NVHOST_GK20A_H_
23
24
25struct gk20a;
26struct fifo_gk20a;
27struct channel_gk20a;
28struct gr_gk20a;
29struct sim_gk20a;
30
31#include <linux/sched.h>
32#include <linux/spinlock.h>
33#include <linux/nvhost_gpu_ioctl.h>
34#include <linux/tegra-soc.h>
35
36#include "../../../arch/arm/mach-tegra/iomap.h"
37
38#include "as_gk20a.h"
39#include "clk_gk20a.h"
40#include "fifo_gk20a.h"
41#include "gr_gk20a.h"
42#include "sim_gk20a.h"
43#include "pmu_gk20a.h"
44#include "priv_ring_gk20a.h"
45#include "therm_gk20a.h"
46#include "platform_gk20a.h"
47
48extern struct platform_device tegra_gk20a_device;
49
50bool is_gk20a_module(struct platform_device *dev);
51
52struct cooling_device_gk20a {
53 struct thermal_cooling_device *gk20a_cooling_dev;
54 unsigned int gk20a_freq_state;
55 unsigned int gk20a_freq_table_size;
56 struct gk20a *g;
57};
58
59struct gpu_ops {
60 struct {
61 int (*determine_L2_size_bytes)(struct gk20a *gk20a);
62 void (*set_max_ways_evict_last)(struct gk20a *g, u32 max_ways);
63 int (*init_comptags)(struct gk20a *g, struct gr_gk20a *gr);
64 int (*clear_comptags)(struct gk20a *g, u32 min, u32 max);
65 void (*set_zbc_color_entry)(struct gk20a *g,
66 struct zbc_entry *color_val,
67 u32 index);
68 void (*set_zbc_depth_entry)(struct gk20a *g,
69 struct zbc_entry *depth_val,
70 u32 index);
71 void (*clear_zbc_color_entry)(struct gk20a *g, u32 index);
72 void (*clear_zbc_depth_entry)(struct gk20a *g, u32 index);
73 int (*init_zbc)(struct gk20a *g, struct gr_gk20a *gr);
74 void (*init_cbc)(struct gk20a *g, struct gr_gk20a *gr);
75 void (*sync_debugfs)(struct gk20a *g);
76 void (*elpg_flush)(struct gk20a *g);
77 } ltc;
78 struct {
79 int (*init_fs_state)(struct gk20a *g);
80 void (*access_smpc_reg)(struct gk20a *g, u32 quad, u32 offset);
81 void (*bundle_cb_defaults)(struct gk20a *g);
82 void (*cb_size_default)(struct gk20a *g);
83 int (*calc_global_ctx_buffer_size)(struct gk20a *g);
84 void (*commit_global_attrib_cb)(struct gk20a *g,
85 struct channel_ctx_gk20a *ch_ctx,
86 u64 addr, bool patch);
87 void (*commit_global_bundle_cb)(struct gk20a *g,
88 struct channel_ctx_gk20a *ch_ctx,
89 u64 addr, u64 size, bool patch);
90 int (*commit_global_cb_manager)(struct gk20a *g,
91 struct channel_gk20a *ch,
92 bool patch);
93 void (*commit_global_pagepool)(struct gk20a *g,
94 struct channel_ctx_gk20a *ch_ctx,
95 u64 addr, u32 size, bool patch);
96 void (*init_gpc_mmu)(struct gk20a *g);
97 int (*handle_sw_method)(struct gk20a *g, u32 addr,
98 u32 class_num, u32 offset, u32 data);
99 void (*set_alpha_circular_buffer_size)(struct gk20a *g,
100 u32 data);
101 void (*set_circular_buffer_size)(struct gk20a *g, u32 data);
102 void (*enable_hww_exceptions)(struct gk20a *g);
103 bool (*is_valid_class)(struct gk20a *g, u32 class_num);
104 void (*get_sm_dsm_perf_regs)(struct gk20a *g,
105 u32 *num_sm_dsm_perf_regs,
106 u32 **sm_dsm_perf_regs,
107 u32 *perf_register_stride);
108 void (*get_sm_dsm_perf_ctrl_regs)(struct gk20a *g,
109 u32 *num_sm_dsm_perf_regs,
110 u32 **sm_dsm_perf_regs,
111 u32 *perf_register_stride);
112 void (*set_hww_esr_report_mask)(struct gk20a *g);
113 int (*setup_alpha_beta_tables)(struct gk20a *g,
114 struct gr_gk20a *gr);
115 } gr;
116 const char *name;
117 struct {
118 void (*init_fs_state)(struct gk20a *g);
119 void (*reset)(struct gk20a *g);
120 void (*init_uncompressed_kind_map)(struct gk20a *g);
121 void (*init_kind_attr)(struct gk20a *g);
122 } fb;
123 struct {
124 void (*slcg_gr_load_gating_prod)(struct gk20a *g, bool prod);
125 void (*slcg_perf_load_gating_prod)(struct gk20a *g, bool prod);
126 void (*blcg_gr_load_gating_prod)(struct gk20a *g, bool prod);
127 void (*pg_gr_load_gating_prod)(struct gk20a *g, bool prod);
128 void (*slcg_therm_load_gating_prod)(struct gk20a *g, bool prod);
129 } clock_gating;
130 struct {
131 void (*bind_channel)(struct channel_gk20a *ch_gk20a);
132 } fifo;
133 struct pmu_v {
134 /*used for change of enum zbc update cmd id from ver 0 to ver1*/
135 u32 cmd_id_zbc_table_update;
136 u32 (*get_pmu_cmdline_args_size)(struct pmu_gk20a *pmu);
137 void (*set_pmu_cmdline_args_cpu_freq)(struct pmu_gk20a *pmu,
138 u32 freq);
139 void * (*get_pmu_cmdline_args_ptr)(struct pmu_gk20a *pmu);
140 u32 (*get_pmu_allocation_struct_size)(struct pmu_gk20a *pmu);
141 void (*set_pmu_allocation_ptr)(struct pmu_gk20a *pmu,
142 void **pmu_alloc_ptr, void *assign_ptr);
143 void (*pmu_allocation_set_dmem_size)(struct pmu_gk20a *pmu,
144 void *pmu_alloc_ptr, u16 size);
145 u16 (*pmu_allocation_get_dmem_size)(struct pmu_gk20a *pmu,
146 void *pmu_alloc_ptr);
147 u32 (*pmu_allocation_get_dmem_offset)(struct pmu_gk20a *pmu,
148 void *pmu_alloc_ptr);
149 u32 * (*pmu_allocation_get_dmem_offset_addr)(
150 struct pmu_gk20a *pmu, void *pmu_alloc_ptr);
151 void (*pmu_allocation_set_dmem_offset)(struct pmu_gk20a *pmu,
152 void *pmu_alloc_ptr, u32 offset);
153 void (*get_pmu_init_msg_pmu_queue_params)(
154 struct pmu_queue *queue, u32 id,
155 void *pmu_init_msg);
156 void *(*get_pmu_msg_pmu_init_msg_ptr)(
157 struct pmu_init_msg *init);
158 u16 (*get_pmu_init_msg_pmu_sw_mg_off)(
159 union pmu_init_msg_pmu *init_msg);
160 u16 (*get_pmu_init_msg_pmu_sw_mg_size)(
161 union pmu_init_msg_pmu *init_msg);
162 u32 (*get_pmu_perfmon_cmd_start_size)(void);
163 int (*get_perfmon_cmd_start_offsetofvar)(
164 enum pmu_perfmon_cmd_start_fields field);
165 void (*perfmon_start_set_cmd_type)(struct pmu_perfmon_cmd *pc,
166 u8 value);
167 void (*perfmon_start_set_group_id)(struct pmu_perfmon_cmd *pc,
168 u8 value);
169 void (*perfmon_start_set_state_id)(struct pmu_perfmon_cmd *pc,
170 u8 value);
171 void (*perfmon_start_set_flags)(struct pmu_perfmon_cmd *pc,
172 u8 value);
173 u8 (*perfmon_start_get_flags)(struct pmu_perfmon_cmd *pc);
174 u32 (*get_pmu_perfmon_cmd_init_size)(void);
175 int (*get_perfmon_cmd_init_offsetofvar)(
176 enum pmu_perfmon_cmd_start_fields field);
177 void (*perfmon_cmd_init_set_sample_buffer)(
178 struct pmu_perfmon_cmd *pc, u16 value);
179 void (*perfmon_cmd_init_set_dec_cnt)(
180 struct pmu_perfmon_cmd *pc, u8 value);
181 void (*perfmon_cmd_init_set_base_cnt_id)(
182 struct pmu_perfmon_cmd *pc, u8 value);
183 void (*perfmon_cmd_init_set_samp_period_us)(
184 struct pmu_perfmon_cmd *pc, u32 value);
185 void (*perfmon_cmd_init_set_num_cnt)(struct pmu_perfmon_cmd *pc,
186 u8 value);
187 void (*perfmon_cmd_init_set_mov_avg)(struct pmu_perfmon_cmd *pc,
188 u8 value);
189 void *(*get_pmu_seq_in_a_ptr)(
190 struct pmu_sequence *seq);
191 void *(*get_pmu_seq_out_a_ptr)(
192 struct pmu_sequence *seq);
193 } pmu_ver;
194};
195
196struct gk20a {
197 struct platform_device *dev;
198
199 struct resource *reg_mem;
200 void __iomem *regs;
201
202 struct resource *bar1_mem;
203 void __iomem *bar1;
204
205 bool power_on;
206 bool irq_requested;
207
208 struct clk_gk20a clk;
209 struct fifo_gk20a fifo;
210 struct gr_gk20a gr;
211 struct sim_gk20a sim;
212 struct mm_gk20a mm;
213 struct pmu_gk20a pmu;
214 struct cooling_device_gk20a gk20a_cdev;
215
216 /* Save pmu fw here so that it lives cross suspend/resume.
217 pmu suspend destroys all pmu sw/hw states. Loading pmu
218 fw in resume crashes when the resume is from sys_exit. */
219 const struct firmware *pmu_fw;
220
221 u32 gr_idle_timeout_default;
222 u32 timeouts_enabled;
223
224 bool slcg_enabled;
225 bool blcg_enabled;
226 bool elcg_enabled;
227 bool elpg_enabled;
228 bool aelpg_enabled;
229
230#ifdef CONFIG_DEBUG_FS
231 spinlock_t debugfs_lock;
232 struct dentry *debugfs_ltc_enabled;
233 struct dentry *debugfs_timeouts_enabled;
234 struct dentry *debugfs_gr_idle_timeout_default;
235#endif
236 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info;
237
238 /* held while manipulating # of debug/profiler sessions present */
239 /* also prevents debug sessions from attaching until released */
240 struct mutex dbg_sessions_lock;
241 int dbg_sessions; /* number attached */
242 int dbg_powergating_disabled_refcount; /*refcount for pg disable */
243
244 void (*remove_support)(struct platform_device *);
245
246 u64 pg_ingating_time_us;
247 u64 pg_ungating_time_us;
248 u32 pg_gating_cnt;
249
250 spinlock_t mc_enable_lock;
251
252 struct nvhost_gpu_characteristics gpu_characteristics;
253
254 struct {
255 struct cdev cdev;
256 struct device *node;
257 } channel;
258
259 struct gk20a_as as;
260
261 struct {
262 struct cdev cdev;
263 struct device *node;
264 } ctrl;
265
266 struct {
267 struct cdev cdev;
268 struct device *node;
269 } dbg;
270
271 struct {
272 struct cdev cdev;
273 struct device *node;
274 } prof;
275
276 struct mutex client_lock;
277 int client_refcount; /* open channels and ctrl nodes */
278
279 dev_t cdev_region;
280 struct class *class;
281
282 struct gpu_ops ops;
283
284 int irq_stall;
285 int irq_nonstall;
286
287 struct generic_pm_domain pd;
288
289 struct devfreq *devfreq;
290
291 struct gk20a_scale_profile *scale_profile;
292
293 struct device_dma_parameters dma_parms;
294};
295
296static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g)
297{
298 return g->timeouts_enabled ?
299 g->gr_idle_timeout_default : MAX_SCHEDULE_TIMEOUT;
300}
301
302static inline struct gk20a *get_gk20a(struct platform_device *dev)
303{
304 return gk20a_get_platform(dev)->g;
305}
306
307enum BAR0_DEBUG_OPERATION {
308 BARO_ZERO_NOP = 0,
309 OP_END = 'DONE',
310 BAR0_READ32 = '0R32',
311 BAR0_WRITE32 = '0W32',
312};
313
314struct share_buffer_head {
315 enum BAR0_DEBUG_OPERATION operation;
316/* size of the operation item */
317 u32 size;
318 u32 completed;
319 u32 failed;
320 u64 context;
321 u64 completion_callback;
322};
323
324struct gk20a_cyclestate_buffer_elem {
325 struct share_buffer_head head;
326/* in */
327 u64 p_data;
328 u64 p_done;
329 u32 offset_bar0;
330 u16 first_bit;
331 u16 last_bit;
332/* out */
333/* keep 64 bits to be consistent */
334 u64 data;
335};
336
337/* debug accessories */
338
339#ifdef CONFIG_DEBUG_FS
340 /* debug info, default is compiled-in but effectively disabled (0 mask) */
341 #define GK20A_DEBUG
342 /*e.g: echo 1 > /d/tegra_host/dbg_mask */
343 #define GK20A_DEFAULT_DBG_MASK 0
344#else
345 /* manually enable and turn it on the mask */
346 /*#define NVHOST_DEBUG*/
347 #define GK20A_DEFAULT_DBG_MASK (dbg_info)
348#endif
349
350enum gk20a_dbg_categories {
351 gpu_dbg_info = BIT(0), /* lightly verbose info */
352 gpu_dbg_fn = BIT(2), /* fn name tracing */
353 gpu_dbg_reg = BIT(3), /* register accesses, very verbose */
354 gpu_dbg_pte = BIT(4), /* gmmu ptes */
355 gpu_dbg_intr = BIT(5), /* interrupts */
356 gpu_dbg_pmu = BIT(6), /* gk20a pmu */
357 gpu_dbg_clk = BIT(7), /* gk20a clk */
358 gpu_dbg_map = BIT(8), /* mem mappings */
359 gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */
360 gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */
361};
362
363#if defined(GK20A_DEBUG)
364extern u32 gk20a_dbg_mask;
365extern u32 gk20a_dbg_ftrace;
366#define gk20a_dbg(dbg_mask, format, arg...) \
367do { \
368 if (unlikely((dbg_mask) & gk20a_dbg_mask)) { \
369 if (gk20a_dbg_ftrace) \
370 trace_printk(format "\n", ##arg); \
371 else \
372 pr_info("gk20a %s: " format "\n", \
373 __func__, ##arg); \
374 } \
375} while (0)
376
377#else /* GK20A_DEBUG */
378#define gk20a_dbg(dbg_mask, format, arg...) \
379do { \
380 if (0) \
381 pr_info("gk20a %s: " format "\n", __func__, ##arg);\
382} while (0)
383
384#endif
385
386#define gk20a_err(d, fmt, arg...) \
387 dev_err(d, "%s: " fmt "\n", __func__, ##arg)
388
389#define gk20a_warn(d, fmt, arg...) \
390 dev_warn(d, "%s: " fmt "\n", __func__, ##arg)
391
392#define gk20a_dbg_fn(fmt, arg...) \
393 gk20a_dbg(gpu_dbg_fn, fmt, ##arg)
394
395#define gk20a_dbg_info(fmt, arg...) \
396 gk20a_dbg(gpu_dbg_info, fmt, ##arg)
397
398/* mem access with dbg_mem logging */
399static inline u8 gk20a_mem_rd08(void *ptr, int b)
400{
401 u8 _b = ((const u8 *)ptr)[b];
402#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
403 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, _b);
404#endif
405 return _b;
406}
407static inline u16 gk20a_mem_rd16(void *ptr, int s)
408{
409 u16 _s = ((const u16 *)ptr)[s];
410#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
411 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, _s);
412#endif
413 return _s;
414}
415static inline u32 gk20a_mem_rd32(void *ptr, int w)
416{
417 u32 _w = ((const u32 *)ptr)[w];
418#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
419 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + sizeof(u32)*w, _w);
420#endif
421 return _w;
422}
423static inline void gk20a_mem_wr08(void *ptr, int b, u8 data)
424{
425#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
426 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, data);
427#endif
428 ((u8 *)ptr)[b] = data;
429}
430static inline void gk20a_mem_wr16(void *ptr, int s, u16 data)
431{
432#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
433 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, data);
434#endif
435 ((u16 *)ptr)[s] = data;
436}
437static inline void gk20a_mem_wr32(void *ptr, int w, u32 data)
438{
439#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
440 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u32)*w, data);
441#endif
442 ((u32 *)ptr)[w] = data;
443}
444
445/* register accessors */
446static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v)
447{
448 gk20a_dbg(gpu_dbg_reg, " r=0x%x v=0x%x", r, v);
449 writel(v, g->regs + r);
450}
451static inline u32 gk20a_readl(struct gk20a *g, u32 r)
452{
453 u32 v = readl(g->regs + r);
454 gk20a_dbg(gpu_dbg_reg, " r=0x%x v=0x%x", r, v);
455 return v;
456}
457
458static inline void gk20a_bar1_writel(struct gk20a *g, u32 b, u32 v)
459{
460 gk20a_dbg(gpu_dbg_reg, " b=0x%x v=0x%x", b, v);
461 writel(v, g->bar1 + b);
462}
463
464static inline u32 gk20a_bar1_readl(struct gk20a *g, u32 b)
465{
466 u32 v = readl(g->bar1 + b);
467 gk20a_dbg(gpu_dbg_reg, " b=0x%x v=0x%x", b, v);
468 return v;
469}
470
471/* convenience */
472static inline struct device *dev_from_gk20a(struct gk20a *g)
473{
474 return &g->dev->dev;
475}
476static inline struct gk20a *gk20a_from_as(struct gk20a_as *as)
477{
478 return container_of(as, struct gk20a, as);
479}
480static inline u32 u64_hi32(u64 n)
481{
482 return (u32)((n >> 32) & ~(u32)0);
483}
484
485static inline u32 u64_lo32(u64 n)
486{
487 return (u32)(n & ~(u32)0);
488}
489
490static inline u32 set_field(u32 val, u32 mask, u32 field)
491{
492 return ((val & ~mask) | field);
493}
494
495/* invalidate channel lookup tlb */
496static inline void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr)
497{
498 spin_lock(&gr->ch_tlb_lock);
499 memset(gr->chid_tlb, 0,
500 sizeof(struct gr_channel_map_tlb_entry) *
501 GR_CHANNEL_MAP_TLB_SIZE);
502 spin_unlock(&gr->ch_tlb_lock);
503}
504
505/* classes that the device supports */
506/* TBD: get these from an open-sourced SDK? */
507enum {
508 KEPLER_C = 0xA297,
509 FERMI_TWOD_A = 0x902D,
510 KEPLER_COMPUTE_A = 0xA0C0,
511 KEPLER_INLINE_TO_MEMORY_A = 0xA040,
512 KEPLER_DMA_COPY_A = 0xA0B5, /*not sure about this one*/
513};
514
515#if defined(CONFIG_GK20A_PMU)
516static inline int support_gk20a_pmu(void)
517{
518 return 1;
519}
520#else
521static inline int support_gk20a_pmu(void){return 0;}
522#endif
523
524void gk20a_create_sysfs(struct platform_device *dev);
525
526#ifdef CONFIG_DEBUG_FS
527int clk_gk20a_debugfs_init(struct platform_device *dev);
528#endif
529
530#define GK20A_BAR0_IORESOURCE_MEM 0
531#define GK20A_BAR1_IORESOURCE_MEM 1
532#define GK20A_SIM_IORESOURCE_MEM 2
533
534void gk20a_busy_noresume(struct platform_device *pdev);
535int gk20a_busy(struct platform_device *pdev);
536void gk20a_idle(struct platform_device *pdev);
537int gk20a_channel_busy(struct platform_device *pdev);
538void gk20a_channel_idle(struct platform_device *pdev);
539void gk20a_disable(struct gk20a *g, u32 units);
540void gk20a_enable(struct gk20a *g, u32 units);
541void gk20a_reset(struct gk20a *g, u32 units);
542int gk20a_get_client(struct gk20a *g);
543void gk20a_put_client(struct gk20a *g);
544
545const struct firmware *
546gk20a_request_firmware(struct gk20a *g, const char *fw_name);
547
548#define NVHOST_GPU_ARCHITECTURE_SHIFT 4
549
550/* constructs unique and compact GPUID from nvhost_gpu_characteristics
551 * arch/impl fields */
552#define GK20A_GPUID(arch, impl) ((u32) ((arch) | (impl)))
553
554#define GK20A_GPUID_GK20A \
555 GK20A_GPUID(NVHOST_GPU_ARCH_GK100, NVHOST_GPU_IMPL_GK20A)
556
557int gk20a_init_gpu_characteristics(struct gk20a *g);
558
559#endif /* _NVHOST_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
new file mode 100644
index 00000000..32c003b6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
@@ -0,0 +1,1247 @@
1/*
2 * gk20a allocator
3 *
4 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "gk20a_allocator.h"
20
21static inline void link_block_list(struct gk20a_allocator *allocator,
22 struct gk20a_alloc_block *block,
23 struct gk20a_alloc_block *prev,
24 struct rb_node *rb_parent);
25static inline void link_block_rb(struct gk20a_allocator *allocator,
26 struct gk20a_alloc_block *block,
27 struct rb_node **rb_link,
28 struct rb_node *rb_parent);
29static void link_block(struct gk20a_allocator *allocator,
30 struct gk20a_alloc_block *block,
31 struct gk20a_alloc_block *prev, struct rb_node **rb_link,
32 struct rb_node *rb_parent);
33static void insert_block(struct gk20a_allocator *allocator,
34 struct gk20a_alloc_block *block);
35
36static void unlink_block(struct gk20a_allocator *allocator,
37 struct gk20a_alloc_block *block,
38 struct gk20a_alloc_block *prev);
39static struct gk20a_alloc_block *unlink_blocks(
40 struct gk20a_allocator *allocator,
41 struct gk20a_alloc_block *block,
42 struct gk20a_alloc_block *prev, u32 end);
43
44static struct gk20a_alloc_block *find_block(
45 struct gk20a_allocator *allocator, u32 addr);
46static struct gk20a_alloc_block *find_block_prev(
47 struct gk20a_allocator *allocator, u32 addr,
48 struct gk20a_alloc_block **pprev);
49static struct gk20a_alloc_block *find_block_prepare(
50 struct gk20a_allocator *allocator, u32 addr,
51 struct gk20a_alloc_block **pprev, struct rb_node ***rb_link,
52 struct rb_node **rb_parent);
53
54static u32 check_free_space(u32 addr, u32 limit, u32 len, u32 align);
55static void update_free_addr_cache(struct gk20a_allocator *allocator,
56 struct gk20a_alloc_block *block,
57 u32 addr, u32 len, bool free);
58static int find_free_area(struct gk20a_allocator *allocator,
59 u32 *addr, u32 len);
60static int find_free_area_nc(struct gk20a_allocator *allocator,
61 u32 *addr, u32 *len);
62
63static void adjust_block(struct gk20a_alloc_block *block,
64 u32 start, u32 end,
65 struct gk20a_alloc_block *insert);
66static struct gk20a_alloc_block *merge_block(
67 struct gk20a_allocator *allocator,
68 struct gk20a_alloc_block *block, u32 addr, u32 end);
69static int split_block(struct gk20a_allocator *allocator,
70 struct gk20a_alloc_block *block,
71 u32 addr, int new_below);
72
73static int block_alloc_single_locked(struct gk20a_allocator *allocator,
74 u32 *addr, u32 len);
75static int block_alloc_list_locked(struct gk20a_allocator *allocator,
76 u32 *addr, u32 len,
77 struct gk20a_alloc_block **pblock);
78static int block_free_locked(struct gk20a_allocator *allocator,
79 u32 addr, u32 len);
80static void block_free_list_locked(struct gk20a_allocator *allocator,
81 struct gk20a_alloc_block *list);
82
83/* link a block into allocator block list */
84static inline void link_block_list(struct gk20a_allocator *allocator,
85 struct gk20a_alloc_block *block,
86 struct gk20a_alloc_block *prev,
87 struct rb_node *rb_parent)
88{
89 struct gk20a_alloc_block *next;
90
91 block->prev = prev;
92 if (prev) {
93 next = prev->next;
94 prev->next = block;
95 } else {
96 allocator->block_first = block;
97 if (rb_parent)
98 next = rb_entry(rb_parent,
99 struct gk20a_alloc_block, rb);
100 else
101 next = NULL;
102 }
103 block->next = next;
104 if (next)
105 next->prev = block;
106}
107
108/* link a block into allocator rb tree */
109static inline void link_block_rb(struct gk20a_allocator *allocator,
110 struct gk20a_alloc_block *block, struct rb_node **rb_link,
111 struct rb_node *rb_parent)
112{
113 rb_link_node(&block->rb, rb_parent, rb_link);
114 rb_insert_color(&block->rb, &allocator->rb_root);
115}
116
117/* add a block to allocator with known location */
118static void link_block(struct gk20a_allocator *allocator,
119 struct gk20a_alloc_block *block,
120 struct gk20a_alloc_block *prev, struct rb_node **rb_link,
121 struct rb_node *rb_parent)
122{
123 struct gk20a_alloc_block *next;
124
125 link_block_list(allocator, block, prev, rb_parent);
126 link_block_rb(allocator, block, rb_link, rb_parent);
127 allocator->block_count++;
128
129 next = block->next;
130 allocator_dbg(allocator, "link new block %d:%d between block %d:%d and block %d:%d",
131 block->start, block->end,
132 prev ? prev->start : -1, prev ? prev->end : -1,
133 next ? next->start : -1, next ? next->end : -1);
134}
135
136/* add a block to allocator */
137static void insert_block(struct gk20a_allocator *allocator,
138 struct gk20a_alloc_block *block)
139{
140 struct gk20a_alloc_block *prev;
141 struct rb_node **rb_link, *rb_parent;
142
143 find_block_prepare(allocator, block->start,
144 &prev, &rb_link, &rb_parent);
145 link_block(allocator, block, prev, rb_link, rb_parent);
146}
147
148/* remove a block from allocator */
149static void unlink_block(struct gk20a_allocator *allocator,
150 struct gk20a_alloc_block *block,
151 struct gk20a_alloc_block *prev)
152{
153 struct gk20a_alloc_block *next = block->next;
154
155 allocator_dbg(allocator, "unlink block %d:%d between block %d:%d and block %d:%d",
156 block->start, block->end,
157 prev ? prev->start : -1, prev ? prev->end : -1,
158 next ? next->start : -1, next ? next->end : -1);
159
160 BUG_ON(block->start < allocator->base);
161 BUG_ON(block->end > allocator->limit);
162
163 if (prev)
164 prev->next = next;
165 else
166 allocator->block_first = next;
167
168 if (next)
169 next->prev = prev;
170 rb_erase(&block->rb, &allocator->rb_root);
171 if (allocator->block_recent == block)
172 allocator->block_recent = prev;
173
174 allocator->block_count--;
175}
176
177/* remove a list of blocks from allocator. the list can contain both
178 regular blocks and non-contiguous blocks. skip all non-contiguous
179 blocks, remove regular blocks into a separate list, return list head */
180static struct gk20a_alloc_block *
181unlink_blocks(struct gk20a_allocator *allocator,
182 struct gk20a_alloc_block *block,
183 struct gk20a_alloc_block *prev,
184 u32 end)
185{
186 struct gk20a_alloc_block **insertion_point;
187 struct gk20a_alloc_block *last_unfreed_block = prev;
188 struct gk20a_alloc_block *last_freed_block = NULL;
189 struct gk20a_alloc_block *first_freed_block = NULL;
190
191 insertion_point = (prev ? &prev->next : &allocator->block_first);
192 *insertion_point = NULL;
193
194 do {
195 if (!block->nc_block) {
196 allocator_dbg(allocator, "unlink block %d:%d",
197 block->start, block->end);
198 if (last_freed_block)
199 last_freed_block->next = block;
200 block->prev = last_freed_block;
201 rb_erase(&block->rb, &allocator->rb_root);
202 last_freed_block = block;
203 allocator->block_count--;
204 if (!first_freed_block)
205 first_freed_block = block;
206 } else {
207 allocator_dbg(allocator, "skip nc block %d:%d",
208 block->start, block->end);
209 if (!*insertion_point)
210 *insertion_point = block;
211 if (last_unfreed_block)
212 last_unfreed_block->next = block;
213 block->prev = last_unfreed_block;
214 last_unfreed_block = block;
215 }
216 block = block->next;
217 } while (block && block->start < end);
218
219 if (!*insertion_point)
220 *insertion_point = block;
221
222 if (block)
223 block->prev = last_unfreed_block;
224 if (last_unfreed_block)
225 last_unfreed_block->next = block;
226 if (last_freed_block)
227 last_freed_block->next = NULL;
228
229 allocator->block_recent = NULL;
230
231 return first_freed_block;
232}
233
234/* Look up the first block which satisfies addr < block->end,
235 NULL if none */
236static struct gk20a_alloc_block *
237find_block(struct gk20a_allocator *allocator, u32 addr)
238{
239 struct gk20a_alloc_block *block = allocator->block_recent;
240
241 if (!(block && block->end > addr && block->start <= addr)) {
242 struct rb_node *rb_node;
243
244 rb_node = allocator->rb_root.rb_node;
245 block = NULL;
246
247 while (rb_node) {
248 struct gk20a_alloc_block *block_tmp;
249
250 block_tmp = rb_entry(rb_node,
251 struct gk20a_alloc_block, rb);
252
253 if (block_tmp->end > addr) {
254 block = block_tmp;
255 if (block_tmp->start <= addr)
256 break;
257 rb_node = rb_node->rb_left;
258 } else
259 rb_node = rb_node->rb_right;
260 if (block)
261 allocator->block_recent = block;
262 }
263 }
264 return block;
265}
266
267/* Same as find_block, but also return a pointer to the previous block */
268static struct gk20a_alloc_block *
269find_block_prev(struct gk20a_allocator *allocator, u32 addr,
270 struct gk20a_alloc_block **pprev)
271{
272 struct gk20a_alloc_block *block = NULL, *prev = NULL;
273 struct rb_node *rb_node;
274 if (!allocator)
275 goto out;
276
277 block = allocator->block_first;
278
279 rb_node = allocator->rb_root.rb_node;
280
281 while (rb_node) {
282 struct gk20a_alloc_block *block_tmp;
283 block_tmp = rb_entry(rb_node, struct gk20a_alloc_block, rb);
284
285 if (addr < block_tmp->end)
286 rb_node = rb_node->rb_left;
287 else {
288 prev = block_tmp;
289 if (!prev->next || addr < prev->next->end)
290 break;
291 rb_node = rb_node->rb_right;
292 }
293 }
294
295out:
296 *pprev = prev;
297 return prev ? prev->next : block;
298}
299
300/* Same as find_block, but also return a pointer to the previous block
301 and return rb_node to prepare for rbtree insertion */
302static struct gk20a_alloc_block *
303find_block_prepare(struct gk20a_allocator *allocator, u32 addr,
304 struct gk20a_alloc_block **pprev, struct rb_node ***rb_link,
305 struct rb_node **rb_parent)
306{
307 struct gk20a_alloc_block *block;
308 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
309
310 __rb_link = &allocator->rb_root.rb_node;
311 rb_prev = __rb_parent = NULL;
312 block = NULL;
313
314 while (*__rb_link) {
315 struct gk20a_alloc_block *block_tmp;
316
317 __rb_parent = *__rb_link;
318 block_tmp = rb_entry(__rb_parent,
319 struct gk20a_alloc_block, rb);
320
321 if (block_tmp->end > addr) {
322 block = block_tmp;
323 if (block_tmp->start <= addr)
324 break;
325 __rb_link = &__rb_parent->rb_left;
326 } else {
327 rb_prev = __rb_parent;
328 __rb_link = &__rb_parent->rb_right;
329 }
330 }
331
332 *pprev = NULL;
333 if (rb_prev)
334 *pprev = rb_entry(rb_prev, struct gk20a_alloc_block, rb);
335 *rb_link = __rb_link;
336 *rb_parent = __rb_parent;
337 return block;
338}
339
340/* return available space */
341static u32 check_free_space(u32 addr, u32 limit, u32 len, u32 align)
342{
343 if (addr >= limit)
344 return 0;
345 if (addr + len <= limit)
346 return len;
347 return (limit - addr) & ~(align - 1);
348}
349
350/* update first_free_addr/last_free_addr based on new free addr
351 called when free block(s) and allocate block(s) */
352static void update_free_addr_cache(struct gk20a_allocator *allocator,
353 struct gk20a_alloc_block *next,
354 u32 addr, u32 len, bool free)
355{
356 /* update from block free */
357 if (free) {
358 if (allocator->first_free_addr > addr)
359 allocator->first_free_addr = addr;
360 } else { /* update from block alloc */
361 if (allocator->last_free_addr < addr + len)
362 allocator->last_free_addr = addr + len;
363 if (allocator->first_free_addr == addr) {
364 if (!next || next->start > addr + len)
365 allocator->first_free_addr = addr + len;
366 else
367 allocator->first_free_addr = next->end;
368 }
369 }
370
371 if (allocator->first_free_addr > allocator->last_free_addr)
372 allocator->first_free_addr = allocator->last_free_addr;
373}
374
375/* find a free address range for a fixed len */
376static int find_free_area(struct gk20a_allocator *allocator,
377 u32 *addr, u32 len)
378{
379 struct gk20a_alloc_block *block;
380 u32 start_addr, search_base, search_limit;
381
382 /* fixed addr allocation */
383 /* note: constraints for fixed are handled by caller */
384 if (*addr) {
385 block = find_block(allocator, *addr);
386 if (allocator->limit - len >= *addr &&
387 (!block || *addr + len <= block->start)) {
388 update_free_addr_cache(allocator, block,
389 *addr, len, false);
390 return 0;
391 } else
392 return -ENOMEM;
393 }
394
395 if (!allocator->constraint.enable) {
396 search_base = allocator->base;
397 search_limit = allocator->limit;
398 } else {
399 start_addr = *addr = allocator->constraint.base;
400 search_base = allocator->constraint.base;
401 search_limit = allocator->constraint.limit;
402 }
403
404 /* cached_hole_size has max free space up to last_free_addr */
405 if (len > allocator->cached_hole_size)
406 start_addr = *addr = allocator->last_free_addr;
407 else {
408 start_addr = *addr = allocator->base;
409 allocator->cached_hole_size = 0;
410 }
411
412 allocator_dbg(allocator, "start search addr : %d", start_addr);
413
414full_search:
415 for (block = find_block(allocator, *addr);; block = block->next) {
416 if (search_limit - len < *addr) {
417 /* start a new search in case we missed any hole */
418 if (start_addr != search_base) {
419 start_addr = *addr = search_base;
420 allocator->cached_hole_size = 0;
421 allocator_dbg(allocator, "start a new search from base");
422 goto full_search;
423 }
424 return -ENOMEM;
425 }
426 if (!block || *addr + len <= block->start) {
427 update_free_addr_cache(allocator, block,
428 *addr, len, false);
429 allocator_dbg(allocator, "free space from %d, len %d",
430 *addr, len);
431 allocator_dbg(allocator, "next free addr: %d",
432 allocator->last_free_addr);
433 return 0;
434 }
435 if (*addr + allocator->cached_hole_size < block->start)
436 allocator->cached_hole_size = block->start - *addr;
437 *addr = block->end;
438 }
439}
440
441/* find a free address range for as long as it meets alignment or meet len */
442static int find_free_area_nc(struct gk20a_allocator *allocator,
443 u32 *addr, u32 *len)
444{
445 struct gk20a_alloc_block *block;
446 u32 start_addr;
447 u32 avail_len;
448
449 /* fixed addr allocation */
450 if (*addr) {
451 block = find_block(allocator, *addr);
452 if (allocator->limit - *len >= *addr) {
453 if (!block)
454 return 0;
455
456 avail_len = check_free_space(*addr, block->start,
457 *len, allocator->align);
458 if (avail_len != 0) {
459 update_free_addr_cache(allocator, block,
460 *addr, avail_len, false);
461 allocator_dbg(allocator,
462 "free space between %d, %d, len %d",
463 *addr, block->start, avail_len);
464 allocator_dbg(allocator, "next free addr: %d",
465 allocator->last_free_addr);
466 *len = avail_len;
467 return 0;
468 } else
469 return -ENOMEM;
470 } else
471 return -ENOMEM;
472 }
473
474 start_addr = *addr = allocator->first_free_addr;
475
476 allocator_dbg(allocator, "start search addr : %d", start_addr);
477
478 for (block = find_block(allocator, *addr);; block = block->next) {
479 if (allocator->limit - *len < *addr)
480 return -ENOMEM;
481 if (!block) {
482 update_free_addr_cache(allocator, block,
483 *addr, *len, false);
484 allocator_dbg(allocator, "free space from %d, len %d",
485 *addr, *len);
486 allocator_dbg(allocator, "next free addr: %d",
487 allocator->first_free_addr);
488 return 0;
489 }
490
491 avail_len = check_free_space(*addr, block->start,
492 *len, allocator->align);
493 if (avail_len != 0) {
494 update_free_addr_cache(allocator, block,
495 *addr, avail_len, false);
496 allocator_dbg(allocator, "free space between %d, %d, len %d",
497 *addr, block->start, avail_len);
498 allocator_dbg(allocator, "next free addr: %d",
499 allocator->first_free_addr);
500 *len = avail_len;
501 return 0;
502 }
503 if (*addr + allocator->cached_hole_size < block->start)
504 allocator->cached_hole_size = block->start - *addr;
505 *addr = block->end;
506 }
507}
508
509/* expand/shrink a block with new start and new end
510 split_block function provides insert block for shrink */
511static void adjust_block(struct gk20a_alloc_block *block,
512 u32 start, u32 end, struct gk20a_alloc_block *insert)
513{
514 struct gk20a_allocator *allocator = block->allocator;
515
516 allocator_dbg(allocator, "curr block %d:%d, new start %d, new end %d",
517 block->start, block->end, start, end);
518
519 /* expand */
520 if (!insert) {
521 if (start == block->end) {
522 struct gk20a_alloc_block *next = block->next;
523
524 if (next && end == next->start) {
525 /* ....AAAA.... */
526 /* PPPP....NNNN */
527 /* PPPPPPPPPPPP */
528 unlink_block(allocator, next, block);
529 block->end = next->end;
530 kmem_cache_free(allocator->block_cache, next);
531 } else {
532 /* ....AAAA.... */
533 /* PPPP........ */
534 /* PPPPPPPP.... */
535 block->end = end;
536 }
537 }
538
539 if (end == block->start) {
540 /* ....AAAA.... */
541 /* ........NNNN */
542 /* PP..NNNNNNNN ....NNNNNNNN */
543 block->start = start;
544 }
545 } else { /* shrink */
546 /* BBBBBBBB -> BBBBIIII OR BBBBBBBB -> IIIIBBBB */
547 block->start = start;
548 block->end = end;
549 insert_block(allocator, insert);
550 }
551}
552
553/* given a range [addr, end], merge it with blocks before or after or both
554 if they can be combined into a contiguous block */
555static struct gk20a_alloc_block *
556merge_block(struct gk20a_allocator *allocator,
557 struct gk20a_alloc_block *prev, u32 addr, u32 end)
558{
559 struct gk20a_alloc_block *next;
560
561 if (prev)
562 next = prev->next;
563 else
564 next = allocator->block_first;
565
566 allocator_dbg(allocator, "curr block %d:%d", addr, end);
567 if (prev)
568 allocator_dbg(allocator, "prev block %d:%d",
569 prev->start, prev->end);
570 if (next)
571 allocator_dbg(allocator, "next block %d:%d",
572 next->start, next->end);
573
574 /* don't merge with non-contiguous allocation block */
575 if (prev && prev->end == addr && !prev->nc_block) {
576 adjust_block(prev, addr, end, NULL);
577 return prev;
578 }
579
580 /* don't merge with non-contiguous allocation block */
581 if (next && end == next->start && !next->nc_block) {
582 adjust_block(next, addr, end, NULL);
583 return next;
584 }
585
586 return NULL;
587}
588
589/* split a block based on addr. addr must be within (start, end).
590 if new_below == 1, link new block before adjusted current block */
591static int split_block(struct gk20a_allocator *allocator,
592 struct gk20a_alloc_block *block, u32 addr, int new_below)
593{
594 struct gk20a_alloc_block *new_block;
595
596 allocator_dbg(allocator, "start %d, split %d, end %d, new_below %d",
597 block->start, addr, block->end, new_below);
598
599 BUG_ON(!(addr > block->start && addr < block->end));
600
601 new_block = kmem_cache_alloc(allocator->block_cache, GFP_KERNEL);
602 if (!new_block)
603 return -ENOMEM;
604
605 *new_block = *block;
606
607 if (new_below)
608 new_block->end = addr;
609 else
610 new_block->start = addr;
611
612 if (new_below)
613 adjust_block(block, addr, block->end, new_block);
614 else
615 adjust_block(block, block->start, addr, new_block);
616
617 return 0;
618}
619
620/* free a list of blocks */
621static void free_blocks(struct gk20a_allocator *allocator,
622 struct gk20a_alloc_block *block)
623{
624 struct gk20a_alloc_block *curr_block;
625 while (block) {
626 curr_block = block;
627 block = block->next;
628 kmem_cache_free(allocator->block_cache, curr_block);
629 }
630}
631
632/* called with rw_sema acquired */
633static int block_alloc_single_locked(struct gk20a_allocator *allocator,
634 u32 *addr_req, u32 len)
635{
636 struct gk20a_alloc_block *block, *prev;
637 struct rb_node **rb_link, *rb_parent;
638 u32 addr = *addr_req;
639 int err;
640
641 *addr_req = ~0;
642
643 err = find_free_area(allocator, &addr, len);
644 if (err)
645 return err;
646
647 find_block_prepare(allocator, addr, &prev, &rb_link, &rb_parent);
648
649 /* merge requested free space with existing block(s)
650 if they can be combined into one contiguous block */
651 block = merge_block(allocator, prev, addr, addr + len);
652 if (block) {
653 *addr_req = addr;
654 return 0;
655 }
656
657 /* create a new block if cannot merge */
658 block = kmem_cache_zalloc(allocator->block_cache, GFP_KERNEL);
659 if (!block)
660 return -ENOMEM;
661
662 block->allocator = allocator;
663 block->start = addr;
664 block->end = addr + len;
665
666 link_block(allocator, block, prev, rb_link, rb_parent);
667
668 *addr_req = addr;
669
670 return 0;
671}
672
673static int block_alloc_list_locked(struct gk20a_allocator *allocator,
674 u32 *addr_req, u32 nc_len, struct gk20a_alloc_block **pblock)
675{
676 struct gk20a_alloc_block *block;
677 struct gk20a_alloc_block *nc_head = NULL, *nc_prev = NULL;
678 u32 addr = *addr_req, len = nc_len;
679 int err = 0;
680
681 *addr_req = ~0;
682
683 while (nc_len > 0) {
684 err = find_free_area_nc(allocator, &addr, &len);
685 if (err) {
686 allocator_dbg(allocator, "not enough free space");
687 goto clean_up;
688 }
689
690 /* never merge non-contiguous allocation block,
691 just create a new block */
692 block = kmem_cache_zalloc(allocator->block_cache,
693 GFP_KERNEL);
694 if (!block) {
695 err = -ENOMEM;
696 goto clean_up;
697 }
698
699 block->allocator = allocator;
700 block->start = addr;
701 block->end = addr + len;
702
703 insert_block(allocator, block);
704
705 block->nc_prev = nc_prev;
706 if (nc_prev)
707 nc_prev->nc_next = block;
708 nc_prev = block;
709 block->nc_block = true;
710
711 if (!nc_head)
712 nc_head = block;
713
714 if (*addr_req == ~0)
715 *addr_req = addr;
716
717 addr = 0;
718 nc_len -= len;
719 len = nc_len;
720 allocator_dbg(allocator, "remaining length %d", nc_len);
721 }
722
723clean_up:
724 if (err) {
725 while (nc_head) {
726 unlink_block(allocator, nc_head, nc_head->prev);
727 nc_prev = nc_head;
728 nc_head = nc_head->nc_next;
729 kmem_cache_free(allocator->block_cache, nc_prev);
730 }
731 *pblock = NULL;
732 *addr_req = ~0;
733 } else {
734 *pblock = nc_head;
735 }
736
737 return err;
738}
739
740/* called with rw_sema acquired */
741static int block_free_locked(struct gk20a_allocator *allocator,
742 u32 addr, u32 len)
743{
744 struct gk20a_alloc_block *block, *prev, *last;
745 u32 end;
746 int err;
747
748 /* no block has block->end > addr, already free */
749 block = find_block_prev(allocator, addr, &prev);
750 if (!block)
751 return 0;
752
753 allocator_dbg(allocator, "first block in free range %d:%d",
754 block->start, block->end);
755
756 end = addr + len;
757 /* not in any block, already free */
758 if (block->start >= end)
759 return 0;
760
761 /* don't touch nc_block in range free */
762 if (addr > block->start && !block->nc_block) {
763 int err = split_block(allocator, block, addr, 0);
764 if (err)
765 return err;
766 prev = block;
767 }
768
769 last = find_block(allocator, end);
770 if (last && end > last->start && !last->nc_block) {
771
772 allocator_dbg(allocator, "last block in free range %d:%d",
773 last->start, last->end);
774
775 err = split_block(allocator, last, end, 1);
776 if (err)
777 return err;
778 }
779
780 block = prev ? prev->next : allocator->block_first;
781
782 allocator_dbg(allocator, "first block for free %d:%d",
783 block->start, block->end);
784
785 /* remove blocks between [addr, addr + len) from rb tree
786 and put them in a list */
787 block = unlink_blocks(allocator, block, prev, end);
788 free_blocks(allocator, block);
789
790 update_free_addr_cache(allocator, NULL, addr, len, true);
791
792 return 0;
793}
794
795/* called with rw_sema acquired */
796static void block_free_list_locked(struct gk20a_allocator *allocator,
797 struct gk20a_alloc_block *list)
798{
799 struct gk20a_alloc_block *block;
800 u32 len;
801
802 update_free_addr_cache(allocator, NULL,
803 list->start, list->end - list->start, true);
804
805 while (list) {
806 block = list;
807 unlink_block(allocator, block, block->prev);
808
809 len = block->end - block->start;
810 if (allocator->cached_hole_size < len)
811 allocator->cached_hole_size = len;
812
813 list = block->nc_next;
814 kmem_cache_free(allocator->block_cache, block);
815 }
816}
817
818static int
819gk20a_allocator_constrain(struct gk20a_allocator *a,
820 bool enable, u32 base, u32 limit)
821{
822 if (enable) {
823 a->constraint.enable = (base >= a->base &&
824 limit <= a->limit);
825 if (!a->constraint.enable)
826 return -EINVAL;
827 a->constraint.base = base;
828 a->constraint.limit = limit;
829 a->first_free_addr = a->last_free_addr = base;
830
831 } else {
832 a->constraint.enable = false;
833 a->first_free_addr = a->last_free_addr = a->base;
834 }
835
836 a->cached_hole_size = 0;
837
838 return 0;
839}
840
841/* init allocator struct */
842int gk20a_allocator_init(struct gk20a_allocator *allocator,
843 const char *name, u32 start, u32 len, u32 align)
844{
845 memset(allocator, 0, sizeof(struct gk20a_allocator));
846
847 strncpy(allocator->name, name, 32);
848
849 allocator->block_cache =
850 kmem_cache_create(allocator->name,
851 sizeof(struct gk20a_alloc_block), 0,
852 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
853 if (!allocator->block_cache)
854 return -ENOMEM;
855
856 allocator->rb_root = RB_ROOT;
857
858 allocator->base = start;
859 allocator->limit = start + len - 1;
860 allocator->align = align;
861
862 allocator_dbg(allocator, "%s : base %d, limit %d, align %d",
863 allocator->name, allocator->base,
864 allocator->limit, allocator->align);
865
866 allocator->first_free_addr = allocator->last_free_addr = start;
867 allocator->cached_hole_size = len;
868
869 init_rwsem(&allocator->rw_sema);
870
871 allocator->alloc = gk20a_allocator_block_alloc;
872 allocator->alloc_nc = gk20a_allocator_block_alloc_nc;
873 allocator->free = gk20a_allocator_block_free;
874 allocator->free_nc = gk20a_allocator_block_free_nc;
875 allocator->constrain = gk20a_allocator_constrain;
876
877 return 0;
878}
879
880/* destroy allocator, free all remaining blocks if any */
881void gk20a_allocator_destroy(struct gk20a_allocator *allocator)
882{
883 struct gk20a_alloc_block *block, *next;
884 u32 free_count = 0;
885
886 down_write(&allocator->rw_sema);
887
888 for (block = allocator->block_first; block; ) {
889 allocator_dbg(allocator, "free remaining block %d:%d",
890 block->start, block->end);
891 next = block->next;
892 kmem_cache_free(allocator->block_cache, block);
893 free_count++;
894 block = next;
895 }
896
897 up_write(&allocator->rw_sema);
898
899 /* block_count doesn't match real number of blocks */
900 BUG_ON(free_count != allocator->block_count);
901
902 kmem_cache_destroy(allocator->block_cache);
903
904 memset(allocator, 0, sizeof(struct gk20a_allocator));
905}
906
907/*
908 * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is
909 * returned to caller in *addr.
910 *
911 * contiguous allocation, which allocates one block of
912 * contiguous address.
913*/
914int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator,
915 u32 *addr, u32 len)
916{
917 int ret;
918#if defined(ALLOCATOR_DEBUG)
919 struct gk20a_alloc_block *block;
920 bool should_fail = false;
921#endif
922
923 allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len);
924
925 if (*addr + len > allocator->limit || /* check addr range */
926 *addr & (allocator->align - 1) || /* check addr alignment */
927 len == 0) /* check len */
928 return -EINVAL;
929
930 if (allocator->constraint.enable &&
931 (*addr + len > allocator->constraint.limit ||
932 *addr > allocator->constraint.base))
933 return -EINVAL;
934
935 len = ALIGN(len, allocator->align);
936 if (!len)
937 return -ENOMEM;
938
939 down_write(&allocator->rw_sema);
940
941#if defined(ALLOCATOR_DEBUG)
942 if (*addr) {
943 for (block = allocator->block_first;
944 block; block = block->next) {
945 if (block->end > *addr && block->start < *addr + len) {
946 should_fail = true;
947 break;
948 }
949 }
950 }
951#endif
952
953 ret = block_alloc_single_locked(allocator, addr, len);
954
955#if defined(ALLOCATOR_DEBUG)
956 if (!ret) {
957 bool allocated = false;
958 BUG_ON(should_fail);
959 BUG_ON(*addr < allocator->base);
960 BUG_ON(*addr + len > allocator->limit);
961 for (block = allocator->block_first;
962 block; block = block->next) {
963 if (!block->nc_block &&
964 block->start <= *addr &&
965 block->end >= *addr + len) {
966 allocated = true;
967 break;
968 }
969 }
970 BUG_ON(!allocated);
971 }
972#endif
973
974 up_write(&allocator->rw_sema);
975
976 allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len);
977
978 return ret;
979}
980
981/*
982 * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is
983 * returned to caller in *addr.
984 *
985 * non-contiguous allocation, which returns a list of blocks with aggregated
986 * size == len. Individual block size must meet alignment requirement.
987 */
988int gk20a_allocator_block_alloc_nc(struct gk20a_allocator *allocator,
989 u32 *addr, u32 len, struct gk20a_alloc_block **pblock)
990{
991 int ret;
992
993 allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len);
994
995 BUG_ON(pblock == NULL);
996 *pblock = NULL;
997
998 if (*addr + len > allocator->limit || /* check addr range */
999 *addr & (allocator->align - 1) || /* check addr alignment */
1000 len == 0) /* check len */
1001 return -EINVAL;
1002
1003 len = ALIGN(len, allocator->align);
1004 if (!len)
1005 return -ENOMEM;
1006
1007 down_write(&allocator->rw_sema);
1008
1009 ret = block_alloc_list_locked(allocator, addr, len, pblock);
1010
1011#if defined(ALLOCATOR_DEBUG)
1012 if (!ret) {
1013 struct gk20a_alloc_block *block = *pblock;
1014 BUG_ON(!block);
1015 BUG_ON(block->start < allocator->base);
1016 while (block->nc_next) {
1017 BUG_ON(block->end > block->nc_next->start);
1018 block = block->nc_next;
1019 }
1020 BUG_ON(block->end > allocator->limit);
1021 }
1022#endif
1023
1024 up_write(&allocator->rw_sema);
1025
1026 allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len);
1027
1028 return ret;
1029}
1030
1031/* free all blocks between start and end */
1032int gk20a_allocator_block_free(struct gk20a_allocator *allocator,
1033 u32 addr, u32 len)
1034{
1035 int ret;
1036
1037 allocator_dbg(allocator, "[in] addr %d, len %d", addr, len);
1038
1039 if (addr + len > allocator->limit || /* check addr range */
1040 addr < allocator->base ||
1041 addr & (allocator->align - 1)) /* check addr alignment */
1042 return -EINVAL;
1043
1044 len = ALIGN(len, allocator->align);
1045 if (!len)
1046 return -EINVAL;
1047
1048 down_write(&allocator->rw_sema);
1049
1050 ret = block_free_locked(allocator, addr, len);
1051
1052#if defined(ALLOCATOR_DEBUG)
1053 if (!ret) {
1054 struct gk20a_alloc_block *block;
1055 for (block = allocator->block_first;
1056 block; block = block->next) {
1057 if (!block->nc_block)
1058 BUG_ON(block->start >= addr &&
1059 block->end <= addr + len);
1060 }
1061 }
1062#endif
1063 up_write(&allocator->rw_sema);
1064
1065 allocator_dbg(allocator, "[out] addr %d, len %d", addr, len);
1066
1067 return ret;
1068}
1069
1070/* free non-contiguous allocation block list */
1071void gk20a_allocator_block_free_nc(struct gk20a_allocator *allocator,
1072 struct gk20a_alloc_block *block)
1073{
1074 /* nothing to free */
1075 if (!block)
1076 return;
1077
1078 down_write(&allocator->rw_sema);
1079 block_free_list_locked(allocator, block);
1080 up_write(&allocator->rw_sema);
1081}
1082
1083#if defined(ALLOCATOR_DEBUG)
1084
1085#include <linux/random.h>
1086
1087/* test suite */
1088void gk20a_allocator_test(void)
1089{
1090 struct gk20a_allocator allocator;
1091 struct gk20a_alloc_block *list[5];
1092 u32 addr, len;
1093 u32 count;
1094 int n;
1095
1096 gk20a_allocator_init(&allocator, "test", 0, 10, 1);
1097
1098 /* alloc/free a single block in the beginning */
1099 addr = 0;
1100 gk20a_allocator_block_alloc(&allocator, &addr, 2);
1101 gk20a_allocator_dump(&allocator);
1102 gk20a_allocator_block_free(&allocator, addr, 2);
1103 gk20a_allocator_dump(&allocator);
1104 /* alloc/free a single block in the middle */
1105 addr = 4;
1106 gk20a_allocator_block_alloc(&allocator, &addr, 2);
1107 gk20a_allocator_dump(&allocator);
1108 gk20a_allocator_block_free(&allocator, addr, 2);
1109 gk20a_allocator_dump(&allocator);
1110 /* alloc/free a single block in the end */
1111 addr = 8;
1112 gk20a_allocator_block_alloc(&allocator, &addr, 2);
1113 gk20a_allocator_dump(&allocator);
1114 gk20a_allocator_block_free(&allocator, addr, 2);
1115 gk20a_allocator_dump(&allocator);
1116
1117 /* allocate contiguous blocks */
1118 addr = 0;
1119 gk20a_allocator_block_alloc(&allocator, &addr, 2);
1120 gk20a_allocator_dump(&allocator);
1121 addr = 0;
1122 gk20a_allocator_block_alloc(&allocator, &addr, 4);
1123 gk20a_allocator_dump(&allocator);
1124 addr = 0;
1125 gk20a_allocator_block_alloc(&allocator, &addr, 4);
1126 gk20a_allocator_dump(&allocator);
1127
1128 /* no free space */
1129 addr = 0;
1130 gk20a_allocator_block_alloc(&allocator, &addr, 2);
1131 gk20a_allocator_dump(&allocator);
1132
1133 /* free in the end */
1134 gk20a_allocator_block_free(&allocator, 8, 2);
1135 gk20a_allocator_dump(&allocator);
1136 /* free in the beginning */
1137 gk20a_allocator_block_free(&allocator, 0, 2);
1138 gk20a_allocator_dump(&allocator);
1139 /* free in the middle */
1140 gk20a_allocator_block_free(&allocator, 4, 2);
1141 gk20a_allocator_dump(&allocator);
1142
1143 /* merge case PPPPAAAANNNN */
1144 addr = 4;
1145 gk20a_allocator_block_alloc(&allocator, &addr, 2);
1146 gk20a_allocator_dump(&allocator);
1147 /* merge case ....AAAANNNN */
1148 addr = 0;
1149 gk20a_allocator_block_alloc(&allocator, &addr, 2);
1150 gk20a_allocator_dump(&allocator);
1151 /* merge case PPPPAAAA.... */
1152 addr = 8;
1153 gk20a_allocator_block_alloc(&allocator, &addr, 2);
1154 gk20a_allocator_dump(&allocator);
1155
1156 /* test free across multiple blocks and split */
1157 gk20a_allocator_block_free(&allocator, 2, 2);
1158 gk20a_allocator_dump(&allocator);
1159 gk20a_allocator_block_free(&allocator, 6, 2);
1160 gk20a_allocator_dump(&allocator);
1161 gk20a_allocator_block_free(&allocator, 1, 8);
1162 gk20a_allocator_dump(&allocator);
1163
1164 /* test non-contiguous allocation */
1165 addr = 4;
1166 gk20a_allocator_block_alloc(&allocator, &addr, 2);
1167 gk20a_allocator_dump(&allocator);
1168 addr = 0;
1169 gk20a_allocator_block_alloc_nc(&allocator, &addr, 5, &list[0]);
1170 gk20a_allocator_dump(&allocator);
1171 gk20a_allocator_dump_nc_list(&allocator, list[0]);
1172
1173 /* test free a range overlaping non-contiguous blocks */
1174 gk20a_allocator_block_free(&allocator, 2, 6);
1175 gk20a_allocator_dump(&allocator);
1176
1177 /* test non-contiguous free */
1178 gk20a_allocator_block_free_nc(&allocator, list[0]);
1179 gk20a_allocator_dump(&allocator);
1180
1181 gk20a_allocator_destroy(&allocator);
1182
1183 /* random stress test */
1184 gk20a_allocator_init(&allocator, "test", 4096, 4096 * 1024, 4096);
1185 for (;;) {
1186 pr_debug("alloc tests...\n");
1187 for (count = 0; count < 50; count++) {
1188 addr = 0;
1189 len = random32() % (4096 * 1024 / 16);
1190 gk20a_allocator_block_alloc(&allocator, &addr, len);
1191 gk20a_allocator_dump(&allocator);
1192 }
1193
1194 pr_debug("free tests...\n");
1195 for (count = 0; count < 30; count++) {
1196 addr = (random32() % (4096 * 1024)) & ~(4096 - 1);
1197 len = random32() % (4096 * 1024 / 16);
1198 gk20a_allocator_block_free(&allocator, addr, len);
1199 gk20a_allocator_dump(&allocator);
1200 }
1201
1202 pr_debug("non-contiguous alloc tests...\n");
1203 for (n = 0; n < 5; n++) {
1204 addr = 0;
1205 len = random32() % (4096 * 1024 / 8);
1206 gk20a_allocator_block_alloc_nc(&allocator, &addr,
1207 len, &list[n]);
1208 gk20a_allocator_dump(&allocator);
1209 gk20a_allocator_dump_nc_list(&allocator, list[n]);
1210 }
1211
1212 pr_debug("free tests...\n");
1213 for (count = 0; count < 10; count++) {
1214 addr = (random32() % (4096 * 1024)) & ~(4096 - 1);
1215 len = random32() % (4096 * 1024 / 16);
1216 gk20a_allocator_block_free(&allocator, addr, len);
1217 gk20a_allocator_dump(&allocator);
1218 }
1219
1220 pr_debug("non-contiguous free tests...\n");
1221 for (n = 4; n >= 0; n--) {
1222 gk20a_allocator_dump_nc_list(&allocator, list[n]);
1223 gk20a_allocator_block_free_nc(&allocator, list[n]);
1224 gk20a_allocator_dump(&allocator);
1225 }
1226
1227 pr_debug("fixed addr alloc tests...\n");
1228 for (count = 0; count < 10; count++) {
1229 addr = (random32() % (4096 * 1024)) & ~(4096 - 1);
1230 len = random32() % (4096 * 1024 / 32);
1231 gk20a_allocator_block_alloc(&allocator, &addr, len);
1232 gk20a_allocator_dump(&allocator);
1233 }
1234
1235 pr_debug("free tests...\n");
1236 for (count = 0; count < 10; count++) {
1237 addr = (random32() % (4096 * 1024)) & ~(4096 - 1);
1238 len = random32() % (4096 * 1024 / 16);
1239 gk20a_allocator_block_free(&allocator, addr, len);
1240 gk20a_allocator_dump(&allocator);
1241 }
1242 }
1243 gk20a_allocator_destroy(&allocator);
1244}
1245
1246#endif /* ALLOCATOR_DEBUG */
1247
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
new file mode 100644
index 00000000..dba397e2
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
@@ -0,0 +1,177 @@
1/*
2 * gk20a allocator
3 *
4 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef __NVHOST_ALLOCATOR_H__
20#define __NVHOST_ALLOCATOR_H__
21
22#include <linux/rbtree.h>
23#include <linux/rwsem.h>
24#include <linux/slab.h>
25
26/* #define ALLOCATOR_DEBUG */
27
28struct allocator_block;
29
30/* main struct */
31struct gk20a_allocator {
32
33 char name[32]; /* name for allocator */
34 struct rb_root rb_root; /* rb tree root for blocks */
35
36 u32 base; /* min value of this linear space */
37 u32 limit; /* max value = limit - 1 */
38 u32 align; /* alignment size, power of 2 */
39
40 struct gk20a_alloc_block *block_first; /* first block in list */
41 struct gk20a_alloc_block *block_recent; /* last visited block */
42
43 u32 first_free_addr; /* first free addr, non-contigous
44 allocation preferred start,
45 in order to pick up small holes */
46 u32 last_free_addr; /* last free addr, contiguous
47 allocation preferred start */
48 u32 cached_hole_size; /* max free hole size up to
49 last_free_addr */
50 u32 block_count; /* number of blocks */
51
52 struct rw_semaphore rw_sema; /* lock */
53 struct kmem_cache *block_cache; /* slab cache */
54
55 /* if enabled, constrain to [base, limit) */
56 struct {
57 bool enable;
58 u32 base;
59 u32 limit;
60 } constraint;
61
62 int (*alloc)(struct gk20a_allocator *allocator,
63 u32 *addr, u32 len);
64 int (*alloc_nc)(struct gk20a_allocator *allocator,
65 u32 *addr, u32 len,
66 struct gk20a_alloc_block **pblock);
67 int (*free)(struct gk20a_allocator *allocator,
68 u32 addr, u32 len);
69 void (*free_nc)(struct gk20a_allocator *allocator,
70 struct gk20a_alloc_block *block);
71
72 int (*constrain)(struct gk20a_allocator *a,
73 bool enable,
74 u32 base, u32 limit);
75};
76
77/* a block of linear space range [start, end) */
78struct gk20a_alloc_block {
79 struct gk20a_allocator *allocator; /* parent allocator */
80 struct rb_node rb; /* rb tree node */
81
82 u32 start; /* linear space range
83 [start, end) */
84 u32 end;
85
86 void *priv; /* backing structure for this
87 linear space block
88 page table, comp tag, etc */
89
90 struct gk20a_alloc_block *prev; /* prev block with lower address */
91 struct gk20a_alloc_block *next; /* next block with higher address */
92
93 bool nc_block;
94 struct gk20a_alloc_block *nc_prev; /* prev block for
95 non-contiguous allocation */
96 struct gk20a_alloc_block *nc_next; /* next block for
97 non-contiguous allocation */
98};
99
100int gk20a_allocator_init(struct gk20a_allocator *allocator,
101 const char *name, u32 base, u32 size, u32 align);
102void gk20a_allocator_destroy(struct gk20a_allocator *allocator);
103
104int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator,
105 u32 *addr, u32 len);
106int gk20a_allocator_block_alloc_nc(struct gk20a_allocator *allocator,
107 u32 *addr, u32 len,
108 struct gk20a_alloc_block **pblock);
109
110int gk20a_allocator_block_free(struct gk20a_allocator *allocator,
111 u32 addr, u32 len);
112void gk20a_allocator_block_free_nc(struct gk20a_allocator *allocator,
113 struct gk20a_alloc_block *block);
114
115#if defined(ALLOCATOR_DEBUG)
116
117#define allocator_dbg(alloctor, format, arg...) \
118do { \
119 if (1) \
120 pr_debug("gk20a_allocator (%s) %s: " format "\n",\
121 alloctor->name, __func__, ##arg);\
122} while (0)
123
124static inline void
125gk20a_allocator_dump(struct gk20a_allocator *allocator) {
126 struct gk20a_alloc_block *block;
127 u32 count = 0;
128
129 down_read(&allocator->rw_sema);
130 for (block = allocator->block_first; block; block = block->next) {
131 allocator_dbg(allocator, "block %d - %d:%d, nc %d",
132 count++, block->start, block->end, block->nc_block);
133
134 if (block->prev)
135 BUG_ON(block->prev->end > block->start);
136 if (block->next)
137 BUG_ON(block->next->start < block->end);
138 }
139 allocator_dbg(allocator, "tracked count %d, actual count %d",
140 allocator->block_count, count);
141 allocator_dbg(allocator, "first block %d:%d",
142 allocator->block_first ? allocator->block_first->start : -1,
143 allocator->block_first ? allocator->block_first->end : -1);
144 allocator_dbg(allocator, "first free addr %d",
145 allocator->first_free_addr);
146 allocator_dbg(allocator, "last free addr %d",
147 allocator->last_free_addr);
148 allocator_dbg(allocator, "cached hole size %d",
149 allocator->cached_hole_size);
150 up_read(&allocator->rw_sema);
151
152 BUG_ON(count != allocator->block_count);
153}
154
155static inline void
156gk20a_allocator_dump_nc_list(
157 struct gk20a_allocator *allocator,
158 struct gk20a_alloc_block *block)
159{
160 down_read(&allocator->rw_sema);
161 while (block) {
162 pr_debug("non-contiguous block %d:%d\n",
163 block->start, block->end);
164 block = block->nc_next;
165 }
166 up_read(&allocator->rw_sema);
167}
168
169void gk20a_allocator_test(void);
170
171#else /* ALLOCATOR_DEBUG */
172
173#define allocator_dbg(format, arg...)
174
175#endif /* ALLOCATOR_DEBUG */
176
177#endif /*__NVHOST_ALLOCATOR_H__ */
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c
new file mode 100644
index 00000000..c6478a5e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c
@@ -0,0 +1,374 @@
1/*
2 * Copyright (c) 2012-2014, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 *
17 * This file is autogenerated. Do not edit.
18 */
19
20#ifndef __gk20a_gating_reglist_h__
21#define __gk20a_gating_reglist_h__
22
23#include <linux/types.h>
24#include "gk20a_gating_reglist.h"
25
26struct gating_desc {
27 u32 addr;
28 u32 prod;
29 u32 disable;
30};
31/* slcg gr */
32const struct gating_desc gk20a_slcg_gr[] = {
33 {.addr = 0x004041f4, .prod = 0x00000000, .disable = 0x03fffffe},
34 {.addr = 0x00409894, .prod = 0x00000040, .disable = 0x0003fffe},
35 {.addr = 0x004078c4, .prod = 0x00000000, .disable = 0x000001fe},
36 {.addr = 0x00406004, .prod = 0x00000000, .disable = 0x0001fffe},
37 {.addr = 0x00405864, .prod = 0x00000000, .disable = 0x000001fe},
38 {.addr = 0x00405910, .prod = 0x00000000, .disable = 0xfffffffe},
39 {.addr = 0x00408044, .prod = 0x00000000, .disable = 0x000007fe},
40 {.addr = 0x00407004, .prod = 0x00000000, .disable = 0x0000001e},
41 {.addr = 0x0041a894, .prod = 0x00000000, .disable = 0x0003fffe},
42 {.addr = 0x00418504, .prod = 0x00000000, .disable = 0x0001fffe},
43 {.addr = 0x0041860c, .prod = 0x00000000, .disable = 0x000001fe},
44 {.addr = 0x0041868c, .prod = 0x00000000, .disable = 0x0000001e},
45 {.addr = 0x0041871c, .prod = 0x00000000, .disable = 0x0000003e},
46 {.addr = 0x00418388, .prod = 0x00000000, .disable = 0x00000001},
47 {.addr = 0x0041882c, .prod = 0x00000000, .disable = 0x0001fffe},
48 {.addr = 0x00418bc0, .prod = 0x00000000, .disable = 0x000001fe},
49 {.addr = 0x00418974, .prod = 0x00000000, .disable = 0x0001fffe},
50 {.addr = 0x00418c74, .prod = 0x00000000, .disable = 0xfffffffe},
51 {.addr = 0x00418cf4, .prod = 0x00000000, .disable = 0xfffffffe},
52 {.addr = 0x00418d74, .prod = 0x00000000, .disable = 0xfffffffe},
53 {.addr = 0x00418f10, .prod = 0x00000000, .disable = 0xfffffffe},
54 {.addr = 0x00418e10, .prod = 0x00000000, .disable = 0xfffffffe},
55 {.addr = 0x00419024, .prod = 0x00000000, .disable = 0x000001fe},
56 {.addr = 0x00419a44, .prod = 0x00000000, .disable = 0x0000000e},
57 {.addr = 0x00419a4c, .prod = 0x00000000, .disable = 0x000001fe},
58 {.addr = 0x00419a54, .prod = 0x00000000, .disable = 0x0000003e},
59 {.addr = 0x00419a5c, .prod = 0x00000000, .disable = 0x0000000e},
60 {.addr = 0x00419a64, .prod = 0x00000000, .disable = 0x000001fe},
61 {.addr = 0x00419a6c, .prod = 0x00000000, .disable = 0x0000000e},
62 {.addr = 0x00419a74, .prod = 0x00000000, .disable = 0x0000000e},
63 {.addr = 0x00419a7c, .prod = 0x00000000, .disable = 0x0000003e},
64 {.addr = 0x00419a84, .prod = 0x00000000, .disable = 0x0000000e},
65 {.addr = 0x00419ad0, .prod = 0x00000000, .disable = 0x0000000e},
66 {.addr = 0x0041986c, .prod = 0x0000dfc0, .disable = 0x00fffffe},
67 {.addr = 0x00419cd8, .prod = 0x00000000, .disable = 0x001ffffe},
68 {.addr = 0x00419ce0, .prod = 0x00000000, .disable = 0x001ffffe},
69 {.addr = 0x00419c74, .prod = 0x00000000, .disable = 0x0000001e},
70 {.addr = 0x00419fd4, .prod = 0x00000000, .disable = 0x0003fffe},
71 {.addr = 0x00419fdc, .prod = 0x00000000, .disable = 0xfffffffe},
72 {.addr = 0x00419fe4, .prod = 0x00000000, .disable = 0x0000000e},
73 {.addr = 0x00419ff4, .prod = 0x00000000, .disable = 0x00003ffe},
74 {.addr = 0x00419ffc, .prod = 0x00000000, .disable = 0x0001fffe},
75 {.addr = 0x0041be2c, .prod = 0x020bbfc0, .disable = 0xfffffffe},
76 {.addr = 0x0041bfec, .prod = 0x00000000, .disable = 0xfffffffe},
77 {.addr = 0x0041bed4, .prod = 0x00000000, .disable = 0xfffffffe},
78 {.addr = 0x00408814, .prod = 0x00000000, .disable = 0x0001fffe},
79 {.addr = 0x0040881c, .prod = 0x00000000, .disable = 0x0001fffe},
80 {.addr = 0x00408a84, .prod = 0x00000000, .disable = 0x0001fffe},
81 {.addr = 0x00408a8c, .prod = 0x00000000, .disable = 0x0001fffe},
82 {.addr = 0x00408a94, .prod = 0x00000000, .disable = 0x0001fffe},
83 {.addr = 0x00408a9c, .prod = 0x00000000, .disable = 0x0001fffe},
84 {.addr = 0x00408aa4, .prod = 0x00000000, .disable = 0x0001fffe},
85 {.addr = 0x00408aac, .prod = 0x00000000, .disable = 0x0001fffe},
86 {.addr = 0x004089ac, .prod = 0x00000000, .disable = 0x0001fffe},
87 {.addr = 0x00408a24, .prod = 0x00000000, .disable = 0x000001ff},
88 {.addr = 0x0017e050, .prod = 0x00000000, .disable = 0x00fffffe},
89 {.addr = 0x001200a8, .prod = 0x00000000, .disable = 0x00000001},
90 {.addr = 0x0010e48c, .prod = 0x00000000, .disable = 0x0000003e},
91 {.addr = 0x00001c04, .prod = 0x00000000, .disable = 0x000000fe},
92 {.addr = 0x00106f28, .prod = 0x00000040, .disable = 0x000007fe},
93 {.addr = 0x000206b8, .prod = 0x00000000, .disable = 0x0000000f},
94 {.addr = 0x0017ea98, .prod = 0x00000000, .disable = 0xfffffffe},
95 {.addr = 0x00106f28, .prod = 0x00000040, .disable = 0x000007fe},
96 {.addr = 0x00120048, .prod = 0x00000000, .disable = 0x00000049},
97};
98
99/* slcg perf */
100const struct gating_desc gk20a_slcg_perf[] = {
101 {.addr = 0x001be018, .prod = 0x000001ff, .disable = 0x00000000},
102 {.addr = 0x001bc018, .prod = 0x000001ff, .disable = 0x00000000},
103 {.addr = 0x001b8018, .prod = 0x000001ff, .disable = 0x00000000},
104 {.addr = 0x001b4124, .prod = 0x00000001, .disable = 0x00000000},
105};
106
107/* blcg gr */
108const struct gating_desc gk20a_blcg_gr[] = {
109 {.addr = 0x004041f0, .prod = 0x00004046, .disable = 0x00000000},
110 {.addr = 0x00409890, .prod = 0x0000007f, .disable = 0x00000000},
111 {.addr = 0x004098b0, .prod = 0x0000007f, .disable = 0x00000000},
112 {.addr = 0x004078c0, .prod = 0x00000042, .disable = 0x00000000},
113 {.addr = 0x00406000, .prod = 0x00004044, .disable = 0x00000000},
114 {.addr = 0x00405860, .prod = 0x00004042, .disable = 0x00000000},
115 {.addr = 0x0040590c, .prod = 0x00004044, .disable = 0x00000000},
116 {.addr = 0x00408040, .prod = 0x00004044, .disable = 0x00000000},
117 {.addr = 0x00407000, .prod = 0x00004041, .disable = 0x00000000},
118 {.addr = 0x00405bf0, .prod = 0x00004044, .disable = 0x00000000},
119 {.addr = 0x0041a890, .prod = 0x0000007f, .disable = 0x00000000},
120 {.addr = 0x0041a8b0, .prod = 0x0000007f, .disable = 0x00000000},
121 {.addr = 0x00418500, .prod = 0x00004044, .disable = 0x00000000},
122 {.addr = 0x00418608, .prod = 0x00004042, .disable = 0x00000000},
123 {.addr = 0x00418688, .prod = 0x00004042, .disable = 0x00000000},
124 {.addr = 0x00418718, .prod = 0x00000042, .disable = 0x00000000},
125 {.addr = 0x00418828, .prod = 0x00000044, .disable = 0x00000000},
126 {.addr = 0x00418bbc, .prod = 0x00004042, .disable = 0x00000000},
127 {.addr = 0x00418970, .prod = 0x00004042, .disable = 0x00000000},
128 {.addr = 0x00418c70, .prod = 0x00004044, .disable = 0x00000000},
129 {.addr = 0x00418cf0, .prod = 0x00004044, .disable = 0x00000000},
130 {.addr = 0x00418d70, .prod = 0x00004044, .disable = 0x00000000},
131 {.addr = 0x00418f0c, .prod = 0x00004044, .disable = 0x00000000},
132 {.addr = 0x00418e0c, .prod = 0x00004044, .disable = 0x00000000},
133 {.addr = 0x00419020, .prod = 0x00004042, .disable = 0x00000000},
134 {.addr = 0x00419038, .prod = 0x00000042, .disable = 0x00000000},
135 {.addr = 0x00419a40, .prod = 0x00004042, .disable = 0x00000000},
136 {.addr = 0x00419a48, .prod = 0x00004042, .disable = 0x00000000},
137 {.addr = 0x00419a50, .prod = 0x00004042, .disable = 0x00000000},
138 {.addr = 0x00419a58, .prod = 0x00004042, .disable = 0x00000000},
139 {.addr = 0x00419a60, .prod = 0x00004042, .disable = 0x00000000},
140 {.addr = 0x00419a68, .prod = 0x00004042, .disable = 0x00000000},
141 {.addr = 0x00419a70, .prod = 0x00004042, .disable = 0x00000000},
142 {.addr = 0x00419a78, .prod = 0x00004042, .disable = 0x00000000},
143 {.addr = 0x00419a80, .prod = 0x00004042, .disable = 0x00000000},
144 {.addr = 0x00419acc, .prod = 0x00004047, .disable = 0x00000000},
145 {.addr = 0x00419868, .prod = 0x00000043, .disable = 0x00000000},
146 {.addr = 0x00419cd4, .prod = 0x00004042, .disable = 0x00000000},
147 {.addr = 0x00419cdc, .prod = 0x00004042, .disable = 0x00000000},
148 {.addr = 0x00419c70, .prod = 0x00004045, .disable = 0x00000000},
149 {.addr = 0x00419fd0, .prod = 0x00004043, .disable = 0x00000000},
150 {.addr = 0x00419fd8, .prod = 0x00004045, .disable = 0x00000000},
151 {.addr = 0x00419fe0, .prod = 0x00004042, .disable = 0x00000000},
152 {.addr = 0x00419fe8, .prod = 0x00004042, .disable = 0x00000000},
153 {.addr = 0x00419ff0, .prod = 0x00004044, .disable = 0x00000000},
154 {.addr = 0x00419ff8, .prod = 0x00004042, .disable = 0x00000000},
155 {.addr = 0x00419f90, .prod = 0x00004042, .disable = 0x00000000},
156 {.addr = 0x0041be28, .prod = 0x00000042, .disable = 0x00000000},
157 {.addr = 0x0041bfe8, .prod = 0x00004044, .disable = 0x00000000},
158 {.addr = 0x0041bed0, .prod = 0x00004044, .disable = 0x00000000},
159 {.addr = 0x00408810, .prod = 0x00004042, .disable = 0x00000000},
160 {.addr = 0x00408818, .prod = 0x00004042, .disable = 0x00000000},
161 {.addr = 0x00408a80, .prod = 0x00004042, .disable = 0x00000000},
162 {.addr = 0x00408a88, .prod = 0x00004042, .disable = 0x00000000},
163 {.addr = 0x00408a90, .prod = 0x00004042, .disable = 0x00000000},
164 {.addr = 0x00408a98, .prod = 0x00004042, .disable = 0x00000000},
165 {.addr = 0x00408aa0, .prod = 0x00004042, .disable = 0x00000000},
166 {.addr = 0x00408aa8, .prod = 0x00004042, .disable = 0x00000000},
167 {.addr = 0x004089a8, .prod = 0x00004042, .disable = 0x00000000},
168 {.addr = 0x004089b0, .prod = 0x00000042, .disable = 0x00000000},
169 {.addr = 0x004089b8, .prod = 0x00004042, .disable = 0x00000000},
170 {.addr = 0x0017ea60, .prod = 0x00000044, .disable = 0x00000000},
171 {.addr = 0x0017ea68, .prod = 0x00000044, .disable = 0x00000000},
172 {.addr = 0x00100d30, .prod = 0x0000c242, .disable = 0x00000000},
173 {.addr = 0x00100d48, .prod = 0x0000c242, .disable = 0x00000000},
174 {.addr = 0x00100d3c, .prod = 0x00000242, .disable = 0x00000000},
175 {.addr = 0x0017ea78, .prod = 0x00000044, .disable = 0x00000000},
176 {.addr = 0x0017e040, .prod = 0x00000044, .disable = 0x00000000},
177 {.addr = 0x00100d1c, .prod = 0x00000042, .disable = 0x00000000},
178 {.addr = 0x00106f24, .prod = 0x0000c242, .disable = 0x00000000},
179 {.addr = 0x0041be00, .prod = 0x00000004, .disable = 0x00000007},
180 {.addr = 0x00100d10, .prod = 0x0000c242, .disable = 0x00000000},
181 {.addr = 0x0017ea70, .prod = 0x00000044, .disable = 0x00000000},
182 {.addr = 0x00001c00, .prod = 0x00000042, .disable = 0x00000000},
183 {.addr = 0x00100c98, .prod = 0x00000242, .disable = 0x00000000},
184 {.addr = 0x0017e030, .prod = 0x00000044, .disable = 0x00000000},
185};
186
187/* pg gr */
188const struct gating_desc gk20a_pg_gr[] = {
189 {.addr = 0x004041f8, .prod = 0x10940000, .disable = 0x00000000},
190 {.addr = 0x004041fc, .prod = 0xff00a725, .disable = 0x00000000},
191 {.addr = 0x00409898, .prod = 0x10140000, .disable = 0x00000000},
192 {.addr = 0x0040989c, .prod = 0xff00000a, .disable = 0x00000000},
193 {.addr = 0x004078c8, .prod = 0x10940000, .disable = 0x00000000},
194 {.addr = 0x004078cc, .prod = 0xff00a725, .disable = 0x00000000},
195 {.addr = 0x00406008, .prod = 0x10940000, .disable = 0x00000000},
196 {.addr = 0x0040600c, .prod = 0xff00a725, .disable = 0x00000000},
197 {.addr = 0x00405868, .prod = 0x10940000, .disable = 0x00000000},
198 {.addr = 0x0040586c, .prod = 0xff00a725, .disable = 0x00000000},
199 {.addr = 0x00405914, .prod = 0x10940000, .disable = 0x00000000},
200 {.addr = 0x00405924, .prod = 0xff00a725, .disable = 0x00000000},
201 {.addr = 0x00408048, .prod = 0x10940000, .disable = 0x00000000},
202 {.addr = 0x0040804c, .prod = 0xff00a725, .disable = 0x00000000},
203 {.addr = 0x00407008, .prod = 0x10140000, .disable = 0x00000000},
204 {.addr = 0x0040700c, .prod = 0xff00000a, .disable = 0x00000000},
205 {.addr = 0x00405bf8, .prod = 0x10940000, .disable = 0x00000000},
206 {.addr = 0x00405bfc, .prod = 0xff00a725, .disable = 0x00000000},
207 {.addr = 0x0041a898, .prod = 0x10140000, .disable = 0x00000000},
208 {.addr = 0x0041a89c, .prod = 0xff00000a, .disable = 0x00000000},
209 {.addr = 0x00418510, .prod = 0x10940000, .disable = 0x00000000},
210 {.addr = 0x00418514, .prod = 0xff00a725, .disable = 0x00000000},
211 {.addr = 0x00418610, .prod = 0x10940000, .disable = 0x00000000},
212 {.addr = 0x00418614, .prod = 0xff00a725, .disable = 0x00000000},
213 {.addr = 0x00418690, .prod = 0x10940000, .disable = 0x00000000},
214 {.addr = 0x00418694, .prod = 0xff00a725, .disable = 0x00000000},
215 {.addr = 0x00418720, .prod = 0x10940000, .disable = 0x00000000},
216 {.addr = 0x00418724, .prod = 0xff00a725, .disable = 0x00000000},
217 {.addr = 0x00418840, .prod = 0x10940000, .disable = 0x00000000},
218 {.addr = 0x00418844, .prod = 0xff00a725, .disable = 0x00000000},
219 {.addr = 0x00418bc4, .prod = 0x10940000, .disable = 0x00000000},
220 {.addr = 0x00418bc8, .prod = 0xff00a725, .disable = 0x00000000},
221 {.addr = 0x00418978, .prod = 0x10940000, .disable = 0x00000000},
222 {.addr = 0x0041897c, .prod = 0xff00a725, .disable = 0x00000000},
223 {.addr = 0x00418c78, .prod = 0x10940000, .disable = 0x00000000},
224 {.addr = 0x00418c7c, .prod = 0xff00a725, .disable = 0x00000000},
225 {.addr = 0x00418cf8, .prod = 0x10940000, .disable = 0x00000000},
226 {.addr = 0x00418cfc, .prod = 0xff00a725, .disable = 0x00000000},
227 {.addr = 0x00418d78, .prod = 0x10940000, .disable = 0x00000000},
228 {.addr = 0x00418d7c, .prod = 0xff00a725, .disable = 0x00000000},
229 {.addr = 0x00418f14, .prod = 0x10940000, .disable = 0x00000000},
230 {.addr = 0x00418f18, .prod = 0xff00a725, .disable = 0x00000000},
231 {.addr = 0x00418e14, .prod = 0x10940000, .disable = 0x00000000},
232 {.addr = 0x00418e18, .prod = 0xff00a725, .disable = 0x00000000},
233 {.addr = 0x00419030, .prod = 0x10940000, .disable = 0x00000000},
234 {.addr = 0x00419050, .prod = 0xff00a725, .disable = 0x00000000},
235 {.addr = 0x00419a88, .prod = 0x10940000, .disable = 0x00000000},
236 {.addr = 0x00419a8c, .prod = 0xff00a725, .disable = 0x00000000},
237 {.addr = 0x00419a90, .prod = 0x10940000, .disable = 0x00000000},
238 {.addr = 0x00419a94, .prod = 0xff00a725, .disable = 0x00000000},
239 {.addr = 0x00419a98, .prod = 0x10940000, .disable = 0x00000000},
240 {.addr = 0x00419a9c, .prod = 0xff00a725, .disable = 0x00000000},
241 {.addr = 0x00419aa0, .prod = 0x10940000, .disable = 0x00000000},
242 {.addr = 0x00419aa4, .prod = 0xff00a725, .disable = 0x00000000},
243 {.addr = 0x00419ad4, .prod = 0x10940000, .disable = 0x00000000},
244 {.addr = 0x00419ad8, .prod = 0xff00a725, .disable = 0x00000000},
245 {.addr = 0x00419870, .prod = 0x10940000, .disable = 0x00000000},
246 {.addr = 0x00419874, .prod = 0xff00a725, .disable = 0x00000000},
247 {.addr = 0x00419ce4, .prod = 0x10940000, .disable = 0x00000000},
248 {.addr = 0x00419cf0, .prod = 0xff00a725, .disable = 0x00000000},
249 {.addr = 0x00419c78, .prod = 0x10940000, .disable = 0x00000000},
250 {.addr = 0x00419c7c, .prod = 0xff00a725, .disable = 0x00000000},
251 {.addr = 0x00419fa0, .prod = 0x10940000, .disable = 0x00000000},
252 {.addr = 0x00419fa4, .prod = 0xff00a725, .disable = 0x00000000},
253 {.addr = 0x00419fa8, .prod = 0x10940000, .disable = 0x00000000},
254 {.addr = 0x00419fac, .prod = 0xff00a725, .disable = 0x00000000},
255 {.addr = 0x00419fb0, .prod = 0x10940000, .disable = 0x00000000},
256 {.addr = 0x00419fb4, .prod = 0xff00a725, .disable = 0x00000000},
257 {.addr = 0x00419fb8, .prod = 0x10940000, .disable = 0x00000000},
258 {.addr = 0x00419fbc, .prod = 0xff00a725, .disable = 0x00000000},
259 {.addr = 0x00419fc0, .prod = 0x10940000, .disable = 0x00000000},
260 {.addr = 0x00419fc4, .prod = 0xff00a725, .disable = 0x00000000},
261 {.addr = 0x00419fc8, .prod = 0x10940000, .disable = 0x00000000},
262 {.addr = 0x00419fcc, .prod = 0xff00a725, .disable = 0x00000000},
263 {.addr = 0x0041be30, .prod = 0x10940000, .disable = 0x00000000},
264 {.addr = 0x0041be34, .prod = 0xff00a725, .disable = 0x00000000},
265 {.addr = 0x0041bff0, .prod = 0x10747c00, .disable = 0x00000000},
266 {.addr = 0x0041bff4, .prod = 0xff00000a, .disable = 0x00000000},
267 {.addr = 0x0041bed8, .prod = 0x10240a00, .disable = 0x00000000},
268 {.addr = 0x0041bee0, .prod = 0xff00000a, .disable = 0x00000000},
269 {.addr = 0x00408820, .prod = 0x10940000, .disable = 0x00000000},
270 {.addr = 0x00408824, .prod = 0xff00a725, .disable = 0x00000000},
271 {.addr = 0x00408828, .prod = 0x10940000, .disable = 0x00000000},
272 {.addr = 0x0040882c, .prod = 0xff00a725, .disable = 0x00000000},
273 {.addr = 0x00408ac0, .prod = 0x10940000, .disable = 0x00000000},
274 {.addr = 0x00408ac4, .prod = 0xff00a725, .disable = 0x00000000},
275 {.addr = 0x00408ac8, .prod = 0x10940000, .disable = 0x00000000},
276 {.addr = 0x00408acc, .prod = 0xff00a725, .disable = 0x00000000},
277 {.addr = 0x00408ad0, .prod = 0x10940000, .disable = 0x00000000},
278 {.addr = 0x00408ad4, .prod = 0xff00a725, .disable = 0x00000000},
279 {.addr = 0x00408ad8, .prod = 0x10940000, .disable = 0x00000000},
280 {.addr = 0x00408adc, .prod = 0xff00a725, .disable = 0x00000000},
281 {.addr = 0x00408ae0, .prod = 0x10940000, .disable = 0x00000000},
282 {.addr = 0x00408ae4, .prod = 0xff00a725, .disable = 0x00000000},
283 {.addr = 0x00408ae8, .prod = 0x10940000, .disable = 0x00000000},
284 {.addr = 0x00408aec, .prod = 0xff00a725, .disable = 0x00000000},
285 {.addr = 0x004089c0, .prod = 0x10940000, .disable = 0x00000000},
286 {.addr = 0x004089c4, .prod = 0xff00a725, .disable = 0x00000000},
287 {.addr = 0x004089c8, .prod = 0x10940000, .disable = 0x00000000},
288 {.addr = 0x004089cc, .prod = 0xff00a725, .disable = 0x00000000},
289 {.addr = 0x004089d0, .prod = 0x10940000, .disable = 0x00000000},
290 {.addr = 0x004089d4, .prod = 0xff00a725, .disable = 0x00000000},
291};
292
293/* therm gr */
294const struct gating_desc gk20a_slcg_therm[] = {
295 {.addr = 0x000206b8, .prod = 0x00000000, .disable = 0x0000000f},
296};
297
298/* static inline functions */
299void gr_gk20a_slcg_gr_load_gating_prod(struct gk20a *g,
300 bool prod)
301{
302 u32 i;
303 u32 size = sizeof(gk20a_slcg_gr) / sizeof(struct gating_desc);
304 for (i = 0; i < size; i++) {
305 if (prod)
306 gk20a_writel(g, gk20a_slcg_gr[i].addr,
307 gk20a_slcg_gr[i].prod);
308 else
309 gk20a_writel(g, gk20a_slcg_gr[i].addr,
310 gk20a_slcg_gr[i].disable);
311 }
312}
313
314void gr_gk20a_slcg_perf_load_gating_prod(struct gk20a *g,
315 bool prod)
316{
317 u32 i;
318 u32 size = sizeof(gk20a_slcg_perf) / sizeof(struct gating_desc);
319 for (i = 0; i < size; i++) {
320 if (prod)
321 gk20a_writel(g, gk20a_slcg_perf[i].addr,
322 gk20a_slcg_perf[i].prod);
323 else
324 gk20a_writel(g, gk20a_slcg_perf[i].addr,
325 gk20a_slcg_perf[i].disable);
326 }
327}
328
329void gr_gk20a_blcg_gr_load_gating_prod(struct gk20a *g,
330 bool prod)
331{
332 u32 i;
333 u32 size = sizeof(gk20a_blcg_gr) / sizeof(struct gating_desc);
334 for (i = 0; i < size; i++) {
335 if (prod)
336 gk20a_writel(g, gk20a_blcg_gr[i].addr,
337 gk20a_blcg_gr[i].prod);
338 else
339 gk20a_writel(g, gk20a_blcg_gr[i].addr,
340 gk20a_blcg_gr[i].disable);
341 }
342}
343
344void gr_gk20a_pg_gr_load_gating_prod(struct gk20a *g,
345 bool prod)
346{
347 u32 i;
348 u32 size = sizeof(gk20a_pg_gr) / sizeof(struct gating_desc);
349 for (i = 0; i < size; i++) {
350 if (prod)
351 gk20a_writel(g, gk20a_pg_gr[i].addr,
352 gk20a_pg_gr[i].prod);
353 else
354 gk20a_writel(g, gk20a_pg_gr[i].addr,
355 gk20a_pg_gr[i].disable);
356 }
357}
358
359void gr_gk20a_slcg_therm_load_gating_prod(struct gk20a *g,
360 bool prod)
361{
362 u32 i;
363 u32 size = sizeof(gk20a_slcg_therm) / sizeof(struct gating_desc);
364 for (i = 0; i < size; i++) {
365 if (prod)
366 gk20a_writel(g, gk20a_slcg_therm[i].addr,
367 gk20a_slcg_therm[i].prod);
368 else
369 gk20a_writel(g, gk20a_slcg_therm[i].addr,
370 gk20a_slcg_therm[i].disable);
371 }
372}
373
374#endif /* __gk20a_gating_reglist_h__ */
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h
new file mode 100644
index 00000000..40a6c545
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h
@@ -0,0 +1,39 @@
1/*
2 * drivers/video/tegra/host/gk20a/gk20a_gating_reglist.h
3 *
4 * Copyright (c) 2012, NVIDIA Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 *
19 * This file is autogenerated. Do not edit.
20 */
21
22#include "gk20a.h"
23
24void gr_gk20a_slcg_gr_load_gating_prod(struct gk20a *g,
25 bool prod);
26
27void gr_gk20a_slcg_perf_load_gating_prod(struct gk20a *g,
28 bool prod);
29
30void gr_gk20a_blcg_gr_load_gating_prod(struct gk20a *g,
31 bool prod);
32
33void gr_gk20a_pg_gr_load_gating_prod(struct gk20a *g,
34 bool prod);
35
36void gr_gk20a_slcg_therm_load_gating_prod(struct gk20a *g,
37 bool prod);
38
39
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_scale.c b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c
new file mode 100644
index 00000000..d1fd71fe
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c
@@ -0,0 +1,358 @@
1/*
2 * gk20a clock scaling profile
3 *
4 * Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/devfreq.h>
20#include <linux/debugfs.h>
21#include <linux/types.h>
22#include <linux/clk.h>
23#include <linux/export.h>
24#include <linux/slab.h>
25#include <linux/clk/tegra.h>
26#include <linux/tegra-soc.h>
27#include <linux/platform_data/tegra_edp.h>
28#include <linux/pm_qos.h>
29
30#include <governor.h>
31
32#include "gk20a.h"
33#include "pmu_gk20a.h"
34#include "clk_gk20a.h"
35#include "gk20a_scale.h"
36
37static ssize_t gk20a_scale_load_show(struct device *dev,
38 struct device_attribute *attr,
39 char *buf)
40{
41 struct platform_device *pdev = to_platform_device(dev);
42 struct gk20a *g = get_gk20a(pdev);
43 u32 busy_time;
44 ssize_t res;
45
46 if (!g->power_on) {
47 busy_time = 0;
48 } else {
49 gk20a_busy(g->dev);
50 gk20a_pmu_load_norm(g, &busy_time);
51 gk20a_idle(g->dev);
52 }
53
54 res = snprintf(buf, PAGE_SIZE, "%u\n", busy_time);
55
56 return res;
57}
58
59static DEVICE_ATTR(load, S_IRUGO, gk20a_scale_load_show, NULL);
60
61/*
62 * gk20a_scale_qos_notify()
63 *
64 * This function is called when the minimum QoS requirement for the device
65 * has changed. The function calls postscaling callback if it is defined.
66 */
67
68static int gk20a_scale_qos_notify(struct notifier_block *nb,
69 unsigned long n, void *p)
70{
71 struct gk20a_scale_profile *profile =
72 container_of(nb, struct gk20a_scale_profile,
73 qos_notify_block);
74 struct gk20a_platform *platform = platform_get_drvdata(profile->pdev);
75 struct gk20a *g = get_gk20a(profile->pdev);
76 unsigned long freq;
77
78 if (!platform->postscale)
79 return NOTIFY_OK;
80
81 /* get the frequency requirement. if devfreq is enabled, check if it
82 * has higher demand than qos */
83 freq = gk20a_clk_round_rate(g, pm_qos_request(platform->qos_id));
84 if (g->devfreq)
85 freq = max(g->devfreq->previous_freq, freq);
86
87 platform->postscale(profile->pdev, freq);
88
89 return NOTIFY_OK;
90}
91
92/*
93 * gk20a_scale_make_freq_table(profile)
94 *
95 * This function initialises the frequency table for the given device profile
96 */
97
98static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile)
99{
100 struct gk20a *g = get_gk20a(profile->pdev);
101 unsigned long *freqs;
102 int num_freqs, err;
103
104 /* make sure the clock is available */
105 if (!gk20a_clk_get(g))
106 return -ENOSYS;
107
108 /* get gpu dvfs table */
109 err = tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk),
110 &freqs, &num_freqs);
111 if (err)
112 return -ENOSYS;
113
114 profile->devfreq_profile.freq_table = (unsigned long *)freqs;
115 profile->devfreq_profile.max_state = num_freqs;
116
117 return 0;
118}
119
120/*
121 * gk20a_scale_target(dev, *freq, flags)
122 *
123 * This function scales the clock
124 */
125
126static int gk20a_scale_target(struct device *dev, unsigned long *freq,
127 u32 flags)
128{
129 struct gk20a *g = get_gk20a(to_platform_device(dev));
130 struct gk20a_platform *platform = dev_get_drvdata(dev);
131 struct gk20a_scale_profile *profile = g->scale_profile;
132 unsigned long rounded_rate = gk20a_clk_round_rate(g, *freq);
133
134 if (gk20a_clk_get_rate(g) == rounded_rate) {
135 *freq = rounded_rate;
136 return 0;
137 }
138
139 gk20a_clk_set_rate(g, rounded_rate);
140 if (platform->postscale)
141 platform->postscale(profile->pdev, rounded_rate);
142 *freq = gk20a_clk_get_rate(g);
143
144 return 0;
145}
146
147/*
148 * update_load_estimate_gpmu(profile)
149 *
150 * Update load estimate using gpmu. The gpmu value is normalised
151 * based on the time it was asked last time.
152 */
153
154static void update_load_estimate_gpmu(struct platform_device *pdev)
155{
156 struct gk20a *g = get_gk20a(pdev);
157 struct gk20a_scale_profile *profile = g->scale_profile;
158 unsigned long dt;
159 u32 busy_time;
160 ktime_t t;
161
162 t = ktime_get();
163 dt = ktime_us_delta(t, profile->last_event_time);
164
165 profile->dev_stat.total_time = dt;
166 profile->last_event_time = t;
167 gk20a_pmu_load_norm(g, &busy_time);
168 profile->dev_stat.busy_time = (busy_time * dt) / 1000;
169}
170
171/*
172 * gk20a_scale_suspend(pdev)
173 *
174 * This function informs devfreq of suspend
175 */
176
177void gk20a_scale_suspend(struct platform_device *pdev)
178{
179 struct gk20a *g = get_gk20a(pdev);
180 struct devfreq *devfreq = g->devfreq;
181
182 if (!devfreq)
183 return;
184
185 devfreq_suspend_device(devfreq);
186}
187
188/*
189 * gk20a_scale_resume(pdev)
190 *
191 * This functions informs devfreq of resume
192 */
193
194void gk20a_scale_resume(struct platform_device *pdev)
195{
196 struct gk20a *g = get_gk20a(pdev);
197 struct devfreq *devfreq = g->devfreq;
198
199 if (!devfreq)
200 return;
201
202 devfreq_resume_device(devfreq);
203}
204
205/*
206 * gk20a_scale_notify(pdev, busy)
207 *
208 * Calling this function informs that the device is idling (..or busy). This
209 * data is used to estimate the current load
210 */
211
212static void gk20a_scale_notify(struct platform_device *pdev, bool busy)
213{
214 struct gk20a_platform *platform = platform_get_drvdata(pdev);
215 struct gk20a *g = get_gk20a(pdev);
216 struct gk20a_scale_profile *profile = g->scale_profile;
217 struct devfreq *devfreq = g->devfreq;
218
219 /* inform edp about new constraint */
220 if (platform->prescale)
221 platform->prescale(pdev);
222
223 /* Is the device profile initialised? */
224 if (!(profile && devfreq))
225 return;
226
227 mutex_lock(&devfreq->lock);
228 profile->dev_stat.busy = busy;
229 update_devfreq(devfreq);
230 mutex_unlock(&devfreq->lock);
231}
232
233void gk20a_scale_notify_idle(struct platform_device *pdev)
234{
235 gk20a_scale_notify(pdev, false);
236
237}
238
239void gk20a_scale_notify_busy(struct platform_device *pdev)
240{
241 gk20a_scale_notify(pdev, true);
242}
243
244/*
245 * gk20a_scale_get_dev_status(dev, *stat)
246 *
247 * This function queries the current device status.
248 */
249
250static int gk20a_scale_get_dev_status(struct device *dev,
251 struct devfreq_dev_status *stat)
252{
253 struct gk20a *g = get_gk20a(to_platform_device(dev));
254 struct gk20a_scale_profile *profile = g->scale_profile;
255
256 /* Make sure there are correct values for the current frequency */
257 profile->dev_stat.current_frequency = gk20a_clk_get_rate(g);
258
259 /* Update load estimate */
260 update_load_estimate_gpmu(to_platform_device(dev));
261
262 /* Copy the contents of the current device status */
263 *stat = profile->dev_stat;
264
265 /* Finally, clear out the local values */
266 profile->dev_stat.total_time = 0;
267 profile->dev_stat.busy_time = 0;
268
269 return 0;
270}
271
272/*
273 * gk20a_scale_init(pdev)
274 */
275
276void gk20a_scale_init(struct platform_device *pdev)
277{
278 struct gk20a_platform *platform = platform_get_drvdata(pdev);
279 struct gk20a *g = platform->g;
280 struct gk20a_scale_profile *profile;
281 int err;
282
283 if (g->scale_profile)
284 return;
285
286 profile = kzalloc(sizeof(*profile), GFP_KERNEL);
287
288 profile->pdev = pdev;
289 profile->dev_stat.busy = false;
290
291 /* Create frequency table */
292 err = gk20a_scale_make_freq_table(profile);
293 if (err || !profile->devfreq_profile.max_state)
294 goto err_get_freqs;
295
296 if (device_create_file(&pdev->dev, &dev_attr_load))
297 goto err_create_sysfs_entry;
298
299 /* Store device profile so we can access it if devfreq governor
300 * init needs that */
301 g->scale_profile = profile;
302
303 if (platform->devfreq_governor) {
304 struct devfreq *devfreq;
305
306 profile->devfreq_profile.initial_freq =
307 profile->devfreq_profile.freq_table[0];
308 profile->devfreq_profile.target = gk20a_scale_target;
309 profile->devfreq_profile.get_dev_status =
310 gk20a_scale_get_dev_status;
311
312 devfreq = devfreq_add_device(&pdev->dev,
313 &profile->devfreq_profile,
314 platform->devfreq_governor, NULL);
315
316 if (IS_ERR(devfreq))
317 devfreq = NULL;
318
319 g->devfreq = devfreq;
320 }
321
322 /* Should we register QoS callback for this device? */
323 if (platform->qos_id < PM_QOS_NUM_CLASSES &&
324 platform->qos_id != PM_QOS_RESERVED &&
325 platform->postscale) {
326 profile->qos_notify_block.notifier_call =
327 &gk20a_scale_qos_notify;
328 pm_qos_add_notifier(platform->qos_id,
329 &profile->qos_notify_block);
330 }
331
332 return;
333
334err_get_freqs:
335 device_remove_file(&pdev->dev, &dev_attr_load);
336err_create_sysfs_entry:
337 kfree(g->scale_profile);
338 g->scale_profile = NULL;
339}
340
341/*
342 * gk20a_scale_hw_init(dev)
343 *
344 * Initialize hardware portion of the device
345 */
346
347void gk20a_scale_hw_init(struct platform_device *pdev)
348{
349 struct gk20a_platform *platform = platform_get_drvdata(pdev);
350 struct gk20a_scale_profile *profile = platform->g->scale_profile;
351
352 /* make sure that scaling has bee initialised */
353 if (!profile)
354 return;
355
356 profile->dev_stat.total_time = 0;
357 profile->last_event_time = ktime_get();
358}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_scale.h b/drivers/gpu/nvgpu/gk20a/gk20a_scale.h
new file mode 100644
index 00000000..e76b1662
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_scale.h
@@ -0,0 +1,51 @@
1/*
2 * gk20a clock scaling profile
3 *
4 * Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef GK20A_SCALE_H
20#define GK20A_SCALE_H
21
22#include <linux/nvhost.h>
23#include <linux/devfreq.h>
24
25struct platform_device;
26struct clk;
27
28struct gk20a_scale_profile {
29 struct platform_device *pdev;
30 ktime_t last_event_time;
31 struct devfreq_dev_profile devfreq_profile;
32 struct devfreq_dev_status dev_stat;
33 struct notifier_block qos_notify_block;
34 void *private_data;
35};
36
37/* Initialization and de-initialization for module */
38void gk20a_scale_init(struct platform_device *);
39void gk20a_scale_hw_init(struct platform_device *pdev);
40
41/*
42 * call when performing submit to notify scaling mechanism that the module is
43 * in use
44 */
45void gk20a_scale_notify_busy(struct platform_device *);
46void gk20a_scale_notify_idle(struct platform_device *);
47
48void gk20a_scale_suspend(struct platform_device *);
49void gk20a_scale_resume(struct platform_device *);
50
51#endif
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
new file mode 100644
index 00000000..f6b43f50
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
@@ -0,0 +1,335 @@
1/*
2 * drivers/video/tegra/host/gk20a/gk20a_sysfs.c
3 *
4 * GK20A Graphics
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#include <linux/platform_device.h>
22#include <linux/pm_runtime.h>
23#include <linux/kernel.h>
24#include <linux/fb.h>
25
26#include <mach/clk.h>
27
28#include "gk20a.h"
29#include "gr_gk20a.h"
30#include "fifo_gk20a.h"
31
32
33#define PTIMER_FP_FACTOR 1000000
34/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. 32 ns is
35 the resolution of ptimer. */
36#define PTIMER_REF_FREQ_HZ 31250000
37
38
39static ssize_t elcg_enable_store(struct device *device,
40 struct device_attribute *attr, const char *buf, size_t count)
41{
42 struct platform_device *ndev = to_platform_device(device);
43 struct gk20a *g = get_gk20a(ndev);
44 unsigned long val = 0;
45
46 if (kstrtoul(buf, 10, &val) < 0)
47 return -EINVAL;
48
49 gk20a_busy(g->dev);
50 if (val) {
51 g->elcg_enabled = true;
52 gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
53 gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
54 } else {
55 g->elcg_enabled = false;
56 gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
57 gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
58 }
59 gk20a_idle(g->dev);
60
61 dev_info(device, "ELCG is %s.\n", g->elcg_enabled ? "enabled" :
62 "disabled");
63
64 return count;
65}
66
67static ssize_t elcg_enable_read(struct device *device,
68 struct device_attribute *attr, char *buf)
69{
70 struct platform_device *ndev = to_platform_device(device);
71 struct gk20a *g = get_gk20a(ndev);
72
73 return sprintf(buf, "%d\n", g->elcg_enabled ? 1 : 0);
74}
75
76static DEVICE_ATTR(elcg_enable, S_IRWXUGO, elcg_enable_read, elcg_enable_store);
77
78static ssize_t blcg_enable_store(struct device *device,
79 struct device_attribute *attr, const char *buf, size_t count)
80{
81 struct platform_device *ndev = to_platform_device(device);
82 struct gk20a *g = get_gk20a(ndev);
83 unsigned long val = 0;
84
85 if (kstrtoul(buf, 10, &val) < 0)
86 return -EINVAL;
87
88 if (val)
89 g->blcg_enabled = true;
90 else
91 g->blcg_enabled = false;
92
93 gk20a_busy(g->dev);
94 g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled);
95 gk20a_idle(g->dev);
96
97 dev_info(device, "BLCG is %s.\n", g->blcg_enabled ? "enabled" :
98 "disabled");
99
100 return count;
101}
102
103static ssize_t blcg_enable_read(struct device *device,
104 struct device_attribute *attr, char *buf)
105{
106 struct platform_device *ndev = to_platform_device(device);
107 struct gk20a *g = get_gk20a(ndev);
108
109 return sprintf(buf, "%d\n", g->blcg_enabled ? 1 : 0);
110}
111
112static DEVICE_ATTR(blcg_enable, S_IRWXUGO, blcg_enable_read, blcg_enable_store);
113
114static ssize_t slcg_enable_store(struct device *device,
115 struct device_attribute *attr, const char *buf, size_t count)
116{
117 struct platform_device *ndev = to_platform_device(device);
118 struct gk20a *g = get_gk20a(ndev);
119 unsigned long val = 0;
120
121 if (kstrtoul(buf, 10, &val) < 0)
122 return -EINVAL;
123
124 if (val)
125 g->slcg_enabled = true;
126 else
127 g->slcg_enabled = false;
128
129 /*
130 * TODO: slcg_therm_load_gating is not enabled anywhere during
131 * init. Therefore, it would be incongruous to add it here. Once
132 * it is added to init, we should add it here too.
133 */
134 gk20a_busy(g->dev);
135 g->ops.clock_gating.slcg_gr_load_gating_prod(g, g->slcg_enabled);
136 g->ops.clock_gating.slcg_perf_load_gating_prod(g, g->slcg_enabled);
137 gk20a_idle(g->dev);
138
139 dev_info(device, "SLCG is %s.\n", g->slcg_enabled ? "enabled" :
140 "disabled");
141
142 return count;
143}
144
145static ssize_t slcg_enable_read(struct device *device,
146 struct device_attribute *attr, char *buf)
147{
148 struct platform_device *ndev = to_platform_device(device);
149 struct gk20a *g = get_gk20a(ndev);
150
151 return sprintf(buf, "%d\n", g->slcg_enabled ? 1 : 0);
152}
153
154static DEVICE_ATTR(slcg_enable, S_IRWXUGO, slcg_enable_read, slcg_enable_store);
155
156static ssize_t ptimer_scale_factor_show(struct device *dev,
157 struct device_attribute *attr,
158 char *buf)
159{
160 u32 tsc_freq_hz = clk_get_rate(clk_get_sys(NULL, "clk_m"));
161 u32 scaling_factor_fp = (u32)(PTIMER_REF_FREQ_HZ) /
162 ((u32)(tsc_freq_hz) /
163 (u32)(PTIMER_FP_FACTOR));
164 ssize_t res = snprintf(buf,
165 PAGE_SIZE,
166 "%u.%u\n",
167 scaling_factor_fp / PTIMER_FP_FACTOR,
168 scaling_factor_fp % PTIMER_FP_FACTOR);
169
170 return res;
171}
172
173static DEVICE_ATTR(ptimer_scale_factor,
174 S_IRUGO,
175 ptimer_scale_factor_show,
176 NULL);
177
178static ssize_t railgate_delay_store(struct device *dev,
179 struct device_attribute *attr,
180 const char *buf, size_t count)
181{
182 struct gk20a_platform *platform = dev_get_drvdata(dev);
183 int railgate_delay = 0, ret = 0;
184
185 if (!platform->can_railgate) {
186 dev_info(dev, "does not support power-gating\n");
187 return count;
188 }
189
190 ret = sscanf(buf, "%d", &railgate_delay);
191 if (ret == 1 && railgate_delay >= 0) {
192 struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain);
193 platform->railgate_delay = railgate_delay;
194 pm_genpd_set_poweroff_delay(genpd, platform->railgate_delay);
195 } else
196 dev_err(dev, "Invalid powergate delay\n");
197
198 return count;
199}
200static ssize_t railgate_delay_show(struct device *dev,
201 struct device_attribute *attr, char *buf)
202{
203 struct gk20a_platform *platform = dev_get_drvdata(dev);
204 return snprintf(buf, PAGE_SIZE, "%d\n", platform->railgate_delay);
205}
206static DEVICE_ATTR(railgate_delay, S_IRWXUGO, railgate_delay_show,
207 railgate_delay_store);
208
209static ssize_t clockgate_delay_store(struct device *dev,
210 struct device_attribute *attr,
211 const char *buf, size_t count)
212{
213 struct gk20a_platform *platform = dev_get_drvdata(dev);
214 int clockgate_delay = 0, ret = 0;
215
216 ret = sscanf(buf, "%d", &clockgate_delay);
217 if (ret == 1 && clockgate_delay >= 0) {
218 platform->clockgate_delay = clockgate_delay;
219 pm_runtime_set_autosuspend_delay(dev,
220 platform->clockgate_delay);
221 } else
222 dev_err(dev, "Invalid clockgate delay\n");
223
224 return count;
225}
226static ssize_t clockgate_delay_show(struct device *dev,
227 struct device_attribute *attr, char *buf)
228{
229 struct gk20a_platform *platform = dev_get_drvdata(dev);
230 return snprintf(buf, PAGE_SIZE, "%d\n", platform->clockgate_delay);
231}
232static DEVICE_ATTR(clockgate_delay, S_IRWXUGO, clockgate_delay_show,
233 clockgate_delay_store);
234
235static ssize_t counters_show(struct device *dev,
236 struct device_attribute *attr, char *buf)
237{
238 struct platform_device *pdev = to_platform_device(dev);
239 struct gk20a *g = get_gk20a(pdev);
240 u32 busy_cycles, total_cycles;
241 ssize_t res;
242
243 gk20a_pmu_get_load_counters(g, &busy_cycles, &total_cycles);
244
245 res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles);
246
247 return res;
248}
249
250static DEVICE_ATTR(counters, S_IRUGO, counters_show, NULL);
251static ssize_t counters_show_reset(struct device *dev,
252 struct device_attribute *attr, char *buf)
253{
254 ssize_t res = counters_show(dev, attr, buf);
255 struct platform_device *pdev = to_platform_device(dev);
256 struct gk20a *g = get_gk20a(pdev);
257
258 gk20a_pmu_reset_load_counters(g);
259
260 return res;
261}
262
263static DEVICE_ATTR(counters_reset, S_IRUGO, counters_show_reset, NULL);
264
265static ssize_t elpg_enable_store(struct device *device,
266 struct device_attribute *attr, const char *buf, size_t count)
267{
268 struct platform_device *ndev = to_platform_device(device);
269 struct gk20a *g = get_gk20a(ndev);
270 unsigned long val = 0;
271
272 if (kstrtoul(buf, 10, &val) < 0)
273 return -EINVAL;
274
275 /*
276 * Since elpg is refcounted, we should not unnecessarily call
277 * enable/disable if it is already so.
278 */
279 gk20a_channel_busy(g->dev);
280 if (val && !g->elpg_enabled) {
281 g->elpg_enabled = true;
282 gk20a_pmu_enable_elpg(g);
283 } else if (!val && g->elpg_enabled) {
284 g->elpg_enabled = false;
285 gk20a_pmu_disable_elpg(g);
286 }
287 gk20a_channel_idle(g->dev);
288
289 dev_info(device, "ELPG is %s.\n", g->elpg_enabled ? "enabled" :
290 "disabled");
291
292 return count;
293}
294
295static ssize_t elpg_enable_read(struct device *device,
296 struct device_attribute *attr, char *buf)
297{
298 struct platform_device *ndev = to_platform_device(device);
299 struct gk20a *g = get_gk20a(ndev);
300
301 return sprintf(buf, "%d\n", g->elpg_enabled ? 1 : 0);
302}
303
304static DEVICE_ATTR(elpg_enable, S_IRWXUGO, elpg_enable_read, elpg_enable_store);
305
306void gk20a_remove_sysfs(struct device *dev)
307{
308 device_remove_file(dev, &dev_attr_elcg_enable);
309 device_remove_file(dev, &dev_attr_blcg_enable);
310 device_remove_file(dev, &dev_attr_slcg_enable);
311 device_remove_file(dev, &dev_attr_ptimer_scale_factor);
312 device_remove_file(dev, &dev_attr_elpg_enable);
313 device_remove_file(dev, &dev_attr_counters);
314 device_remove_file(dev, &dev_attr_counters_reset);
315 device_remove_file(dev, &dev_attr_railgate_delay);
316 device_remove_file(dev, &dev_attr_clockgate_delay);
317}
318
319void gk20a_create_sysfs(struct platform_device *dev)
320{
321 int error = 0;
322
323 error |= device_create_file(&dev->dev, &dev_attr_elcg_enable);
324 error |= device_create_file(&dev->dev, &dev_attr_blcg_enable);
325 error |= device_create_file(&dev->dev, &dev_attr_slcg_enable);
326 error |= device_create_file(&dev->dev, &dev_attr_ptimer_scale_factor);
327 error |= device_create_file(&dev->dev, &dev_attr_elpg_enable);
328 error |= device_create_file(&dev->dev, &dev_attr_counters);
329 error |= device_create_file(&dev->dev, &dev_attr_counters_reset);
330 error |= device_create_file(&dev->dev, &dev_attr_railgate_delay);
331 error |= device_create_file(&dev->dev, &dev_attr_clockgate_delay);
332
333 if (error)
334 dev_err(&dev->dev, "Failed to create sysfs attributes!\n");
335}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c
new file mode 100644
index 00000000..59404f1d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c
@@ -0,0 +1,333 @@
1/*
2 * drivers/video/tegra/host/gk20a/gr_ctx_gk20a.c
3 *
4 * GK20A Graphics Context
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#include <linux/firmware.h>
23
24#include "gk20a.h"
25#include "gr_ctx_gk20a.h"
26#include "hw_gr_gk20a.h"
27
28static int gr_gk20a_alloc_load_netlist_u32(u32 *src, u32 len,
29 struct u32_list_gk20a *u32_list)
30{
31 u32_list->count = (len + sizeof(u32) - 1) / sizeof(u32);
32 if (!alloc_u32_list_gk20a(u32_list))
33 return -ENOMEM;
34
35 memcpy(u32_list->l, src, len);
36
37 return 0;
38}
39
40static int gr_gk20a_alloc_load_netlist_av(u32 *src, u32 len,
41 struct av_list_gk20a *av_list)
42{
43 av_list->count = len / sizeof(struct av_gk20a);
44 if (!alloc_av_list_gk20a(av_list))
45 return -ENOMEM;
46
47 memcpy(av_list->l, src, len);
48
49 return 0;
50}
51
52static int gr_gk20a_alloc_load_netlist_aiv(u32 *src, u32 len,
53 struct aiv_list_gk20a *aiv_list)
54{
55 aiv_list->count = len / sizeof(struct aiv_gk20a);
56 if (!alloc_aiv_list_gk20a(aiv_list))
57 return -ENOMEM;
58
59 memcpy(aiv_list->l, src, len);
60
61 return 0;
62}
63
64static int gr_gk20a_get_netlist_name(int index, char *name)
65{
66 switch (index) {
67#ifdef GK20A_NETLIST_IMAGE_FW_NAME
68 case NETLIST_FINAL:
69 sprintf(name, GK20A_NETLIST_IMAGE_FW_NAME);
70 return 0;
71#endif
72#ifdef GK20A_NETLIST_IMAGE_A
73 case NETLIST_SLOT_A:
74 sprintf(name, GK20A_NETLIST_IMAGE_A);
75 return 0;
76#endif
77#ifdef GK20A_NETLIST_IMAGE_B
78 case NETLIST_SLOT_B:
79 sprintf(name, GK20A_NETLIST_IMAGE_B);
80 return 0;
81#endif
82#ifdef GK20A_NETLIST_IMAGE_C
83 case NETLIST_SLOT_C:
84 sprintf(name, GK20A_NETLIST_IMAGE_C);
85 return 0;
86#endif
87#ifdef GK20A_NETLIST_IMAGE_D
88 case NETLIST_SLOT_D:
89 sprintf(name, GK20A_NETLIST_IMAGE_D);
90 return 0;
91#endif
92 default:
93 return -1;
94 }
95
96 return -1;
97}
98
99static int gr_gk20a_init_ctx_vars_fw(struct gk20a *g, struct gr_gk20a *gr)
100{
101 struct device *d = dev_from_gk20a(g);
102 const struct firmware *netlist_fw;
103 struct netlist_image *netlist = NULL;
104 char name[MAX_NETLIST_NAME];
105 u32 i, major_v = ~0, major_v_hw, netlist_num;
106 int net, max, err = -ENOENT;
107
108 gk20a_dbg_fn("");
109
110#ifdef GK20A_NETLIST_IMAGE_FW_NAME
111 net = NETLIST_FINAL;
112 max = 0;
113 major_v_hw = ~0;
114 g->gr.ctx_vars.dynamic = false;
115#else
116 net = NETLIST_SLOT_A;
117 max = MAX_NETLIST;
118 major_v_hw = gk20a_readl(g, gr_fecs_ctx_state_store_major_rev_id_r());
119 g->gr.ctx_vars.dynamic = true;
120#endif
121
122 for (; net < max; net++) {
123
124 if (gr_gk20a_get_netlist_name(net, name) != 0) {
125 gk20a_warn(d, "invalid netlist index %d", net);
126 continue;
127 }
128
129 netlist_fw = gk20a_request_firmware(g, name);
130 if (!netlist_fw) {
131 gk20a_warn(d, "failed to load netlist %s", name);
132 continue;
133 }
134
135 netlist = (struct netlist_image *)netlist_fw->data;
136
137 for (i = 0; i < netlist->header.regions; i++) {
138 u32 *src = (u32 *)((u8 *)netlist + netlist->regions[i].data_offset);
139 u32 size = netlist->regions[i].data_size;
140
141 switch (netlist->regions[i].region_id) {
142 case NETLIST_REGIONID_FECS_UCODE_DATA:
143 gk20a_dbg_info("NETLIST_REGIONID_FECS_UCODE_DATA");
144 err = gr_gk20a_alloc_load_netlist_u32(
145 src, size, &g->gr.ctx_vars.ucode.fecs.data);
146 if (err)
147 goto clean_up;
148 break;
149 case NETLIST_REGIONID_FECS_UCODE_INST:
150 gk20a_dbg_info("NETLIST_REGIONID_FECS_UCODE_INST");
151 err = gr_gk20a_alloc_load_netlist_u32(
152 src, size, &g->gr.ctx_vars.ucode.fecs.inst);
153 if (err)
154 goto clean_up;
155 break;
156 case NETLIST_REGIONID_GPCCS_UCODE_DATA:
157 gk20a_dbg_info("NETLIST_REGIONID_GPCCS_UCODE_DATA");
158 err = gr_gk20a_alloc_load_netlist_u32(
159 src, size, &g->gr.ctx_vars.ucode.gpccs.data);
160 if (err)
161 goto clean_up;
162 break;
163 case NETLIST_REGIONID_GPCCS_UCODE_INST:
164 gk20a_dbg_info("NETLIST_REGIONID_GPCCS_UCODE_INST");
165 err = gr_gk20a_alloc_load_netlist_u32(
166 src, size, &g->gr.ctx_vars.ucode.gpccs.inst);
167 if (err)
168 goto clean_up;
169 break;
170 case NETLIST_REGIONID_SW_BUNDLE_INIT:
171 gk20a_dbg_info("NETLIST_REGIONID_SW_BUNDLE_INIT");
172 err = gr_gk20a_alloc_load_netlist_av(
173 src, size, &g->gr.ctx_vars.sw_bundle_init);
174 if (err)
175 goto clean_up;
176 break;
177 case NETLIST_REGIONID_SW_METHOD_INIT:
178 gk20a_dbg_info("NETLIST_REGIONID_SW_METHOD_INIT");
179 err = gr_gk20a_alloc_load_netlist_av(
180 src, size, &g->gr.ctx_vars.sw_method_init);
181 if (err)
182 goto clean_up;
183 break;
184 case NETLIST_REGIONID_SW_CTX_LOAD:
185 gk20a_dbg_info("NETLIST_REGIONID_SW_CTX_LOAD");
186 err = gr_gk20a_alloc_load_netlist_aiv(
187 src, size, &g->gr.ctx_vars.sw_ctx_load);
188 if (err)
189 goto clean_up;
190 break;
191 case NETLIST_REGIONID_SW_NON_CTX_LOAD:
192 gk20a_dbg_info("NETLIST_REGIONID_SW_NON_CTX_LOAD");
193 err = gr_gk20a_alloc_load_netlist_av(
194 src, size, &g->gr.ctx_vars.sw_non_ctx_load);
195 if (err)
196 goto clean_up;
197 break;
198 case NETLIST_REGIONID_CTXREG_SYS:
199 gk20a_dbg_info("NETLIST_REGIONID_CTXREG_SYS");
200 err = gr_gk20a_alloc_load_netlist_aiv(
201 src, size, &g->gr.ctx_vars.ctxsw_regs.sys);
202 if (err)
203 goto clean_up;
204 break;
205 case NETLIST_REGIONID_CTXREG_GPC:
206 gk20a_dbg_info("NETLIST_REGIONID_CTXREG_GPC");
207 err = gr_gk20a_alloc_load_netlist_aiv(
208 src, size, &g->gr.ctx_vars.ctxsw_regs.gpc);
209 if (err)
210 goto clean_up;
211 break;
212 case NETLIST_REGIONID_CTXREG_TPC:
213 gk20a_dbg_info("NETLIST_REGIONID_CTXREG_TPC");
214 err = gr_gk20a_alloc_load_netlist_aiv(
215 src, size, &g->gr.ctx_vars.ctxsw_regs.tpc);
216 if (err)
217 goto clean_up;
218 break;
219 case NETLIST_REGIONID_CTXREG_ZCULL_GPC:
220 gk20a_dbg_info("NETLIST_REGIONID_CTXREG_ZCULL_GPC");
221 err = gr_gk20a_alloc_load_netlist_aiv(
222 src, size, &g->gr.ctx_vars.ctxsw_regs.zcull_gpc);
223 if (err)
224 goto clean_up;
225 break;
226 case NETLIST_REGIONID_CTXREG_PPC:
227 gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PPC");
228 err = gr_gk20a_alloc_load_netlist_aiv(
229 src, size, &g->gr.ctx_vars.ctxsw_regs.ppc);
230 if (err)
231 goto clean_up;
232 break;
233 case NETLIST_REGIONID_CTXREG_PM_SYS:
234 gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PM_SYS");
235 err = gr_gk20a_alloc_load_netlist_aiv(
236 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_sys);
237 if (err)
238 goto clean_up;
239 break;
240 case NETLIST_REGIONID_CTXREG_PM_GPC:
241 gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PM_GPC");
242 err = gr_gk20a_alloc_load_netlist_aiv(
243 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_gpc);
244 if (err)
245 goto clean_up;
246 break;
247 case NETLIST_REGIONID_CTXREG_PM_TPC:
248 gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PM_TPC");
249 err = gr_gk20a_alloc_load_netlist_aiv(
250 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_tpc);
251 if (err)
252 goto clean_up;
253 break;
254 case NETLIST_REGIONID_BUFFER_SIZE:
255 g->gr.ctx_vars.buffer_size = *src;
256 gk20a_dbg_info("NETLIST_REGIONID_BUFFER_SIZE : %d",
257 g->gr.ctx_vars.buffer_size);
258 break;
259 case NETLIST_REGIONID_CTXSW_REG_BASE_INDEX:
260 g->gr.ctx_vars.regs_base_index = *src;
261 gk20a_dbg_info("NETLIST_REGIONID_CTXSW_REG_BASE_INDEX : %d",
262 g->gr.ctx_vars.regs_base_index);
263 break;
264 case NETLIST_REGIONID_MAJORV:
265 major_v = *src;
266 gk20a_dbg_info("NETLIST_REGIONID_MAJORV : %d",
267 major_v);
268 break;
269 case NETLIST_REGIONID_NETLIST_NUM:
270 netlist_num = *src;
271 gk20a_dbg_info("NETLIST_REGIONID_NETLIST_NUM : %d",
272 netlist_num);
273 break;
274 case NETLIST_REGIONID_CTXREG_PMPPC:
275 gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMPPC skipped");
276 break;
277 default:
278 gk20a_warn(d, "unrecognized region %d skipped", i);
279 break;
280 }
281 }
282
283 if (net != NETLIST_FINAL && major_v != major_v_hw) {
284 gk20a_dbg_info("skip %s: major_v 0x%08x doesn't match hw 0x%08x",
285 name, major_v, major_v_hw);
286 goto clean_up;
287 }
288
289 g->gr.ctx_vars.valid = true;
290 g->gr.netlist = net;
291
292 release_firmware(netlist_fw);
293 gk20a_dbg_fn("done");
294 goto done;
295
296clean_up:
297 kfree(g->gr.ctx_vars.ucode.fecs.inst.l);
298 kfree(g->gr.ctx_vars.ucode.fecs.data.l);
299 kfree(g->gr.ctx_vars.ucode.gpccs.inst.l);
300 kfree(g->gr.ctx_vars.ucode.gpccs.data.l);
301 kfree(g->gr.ctx_vars.sw_bundle_init.l);
302 kfree(g->gr.ctx_vars.sw_method_init.l);
303 kfree(g->gr.ctx_vars.sw_ctx_load.l);
304 kfree(g->gr.ctx_vars.sw_non_ctx_load.l);
305 kfree(g->gr.ctx_vars.ctxsw_regs.sys.l);
306 kfree(g->gr.ctx_vars.ctxsw_regs.gpc.l);
307 kfree(g->gr.ctx_vars.ctxsw_regs.tpc.l);
308 kfree(g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l);
309 kfree(g->gr.ctx_vars.ctxsw_regs.ppc.l);
310 kfree(g->gr.ctx_vars.ctxsw_regs.pm_sys.l);
311 kfree(g->gr.ctx_vars.ctxsw_regs.pm_gpc.l);
312 kfree(g->gr.ctx_vars.ctxsw_regs.pm_tpc.l);
313 release_firmware(netlist_fw);
314 err = -ENOENT;
315 }
316
317done:
318 if (g->gr.ctx_vars.valid) {
319 gk20a_dbg_info("netlist image %s loaded", name);
320 return 0;
321 } else {
322 gk20a_err(d, "failed to load netlist image!!");
323 return err;
324 }
325}
326
327int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr)
328{
329 if (tegra_platform_is_linsim())
330 return gr_gk20a_init_ctx_vars_sim(g, gr);
331 else
332 return gr_gk20a_init_ctx_vars_fw(g, gr);
333}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h
new file mode 100644
index 00000000..909a166a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h
@@ -0,0 +1,149 @@
1/*
2 * GK20A Graphics Context
3 *
4 * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18#ifndef __GR_CTX_GK20A_H__
19#define __GR_CTX_GK20A_H__
20
21
22/* production netlist, one and only one from below */
23/*#undef GK20A_NETLIST_IMAGE_FW_NAME*/
24#define GK20A_NETLIST_IMAGE_FW_NAME GK20A_NETLIST_IMAGE_B
25/* emulation netlists, match majorV with HW */
26#define GK20A_NETLIST_IMAGE_A "NETA_img.bin"
27#define GK20A_NETLIST_IMAGE_B "NETB_img.bin"
28#define GK20A_NETLIST_IMAGE_C "NETC_img.bin"
29#define GK20A_NETLIST_IMAGE_D "NETD_img.bin"
30
31union __max_name {
32#ifdef GK20A_NETLIST_IMAGE_A
33 char __name_a[sizeof(GK20A_NETLIST_IMAGE_A)];
34#endif
35#ifdef GK20A_NETLIST_IMAGE_B
36 char __name_b[sizeof(GK20A_NETLIST_IMAGE_B)];
37#endif
38#ifdef GK20A_NETLIST_IMAGE_C
39 char __name_c[sizeof(GK20A_NETLIST_IMAGE_C)];
40#endif
41#ifdef GK20A_NETLIST_IMAGE_D
42 char __name_d[sizeof(GK20A_NETLIST_IMAGE_D)];
43#endif
44};
45
46#define MAX_NETLIST_NAME sizeof(union __max_name)
47
48/* index for emulation netlists */
49#define NETLIST_FINAL -1
50#define NETLIST_SLOT_A 0
51#define NETLIST_SLOT_B 1
52#define NETLIST_SLOT_C 2
53#define NETLIST_SLOT_D 3
54#define MAX_NETLIST 4
55
56/* netlist regions */
57#define NETLIST_REGIONID_FECS_UCODE_DATA 0
58#define NETLIST_REGIONID_FECS_UCODE_INST 1
59#define NETLIST_REGIONID_GPCCS_UCODE_DATA 2
60#define NETLIST_REGIONID_GPCCS_UCODE_INST 3
61#define NETLIST_REGIONID_SW_BUNDLE_INIT 4
62#define NETLIST_REGIONID_SW_CTX_LOAD 5
63#define NETLIST_REGIONID_SW_NON_CTX_LOAD 6
64#define NETLIST_REGIONID_SW_METHOD_INIT 7
65#define NETLIST_REGIONID_CTXREG_SYS 8
66#define NETLIST_REGIONID_CTXREG_GPC 9
67#define NETLIST_REGIONID_CTXREG_TPC 10
68#define NETLIST_REGIONID_CTXREG_ZCULL_GPC 11
69#define NETLIST_REGIONID_CTXREG_PM_SYS 12
70#define NETLIST_REGIONID_CTXREG_PM_GPC 13
71#define NETLIST_REGIONID_CTXREG_PM_TPC 14
72#define NETLIST_REGIONID_MAJORV 15
73#define NETLIST_REGIONID_BUFFER_SIZE 16
74#define NETLIST_REGIONID_CTXSW_REG_BASE_INDEX 17
75#define NETLIST_REGIONID_NETLIST_NUM 18
76#define NETLIST_REGIONID_CTXREG_PPC 19
77#define NETLIST_REGIONID_CTXREG_PMPPC 20
78
79struct netlist_region {
80 u32 region_id;
81 u32 data_size;
82 u32 data_offset;
83};
84
85struct netlist_image_header {
86 u32 version;
87 u32 regions;
88};
89
90struct netlist_image {
91 struct netlist_image_header header;
92 struct netlist_region regions[1];
93};
94
95struct av_gk20a {
96 u32 addr;
97 u32 value;
98};
99struct aiv_gk20a {
100 u32 addr;
101 u32 index;
102 u32 value;
103};
104struct aiv_list_gk20a {
105 struct aiv_gk20a *l;
106 u32 count;
107};
108struct av_list_gk20a {
109 struct av_gk20a *l;
110 u32 count;
111};
112struct u32_list_gk20a {
113 u32 *l;
114 u32 count;
115};
116
117static inline
118struct av_gk20a *alloc_av_list_gk20a(struct av_list_gk20a *avl)
119{
120 avl->l = kzalloc(avl->count * sizeof(*avl->l), GFP_KERNEL);
121 return avl->l;
122}
123
124static inline
125struct aiv_gk20a *alloc_aiv_list_gk20a(struct aiv_list_gk20a *aivl)
126{
127 aivl->l = kzalloc(aivl->count * sizeof(*aivl->l), GFP_KERNEL);
128 return aivl->l;
129}
130
131static inline
132u32 *alloc_u32_list_gk20a(struct u32_list_gk20a *u32l)
133{
134 u32l->l = kzalloc(u32l->count * sizeof(*u32l->l), GFP_KERNEL);
135 return u32l->l;
136}
137
138struct gr_ucode_gk20a {
139 struct {
140 struct u32_list_gk20a inst;
141 struct u32_list_gk20a data;
142 } gpccs, fecs;
143};
144
145/* main entry for grctx loading */
146int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr);
147int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr);
148
149#endif /*__GR_CTX_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c
new file mode 100644
index 00000000..12bba1fd
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c
@@ -0,0 +1,256 @@
1/*
2 * drivers/video/tegra/host/gk20a/gr_ctx_sim_gk20a.c
3 *
4 * GK20A Graphics Context for Simulation
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#include "gk20a.h"
23#include "gr_ctx_gk20a.h"
24
25int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr)
26{
27 int err = 0;
28 u32 i, temp;
29 char *size_path = NULL;
30 char *reg_path = NULL;
31 char *value_path = NULL;
32
33 gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
34 "querying grctx info from chiplib");
35
36 g->gr.ctx_vars.dynamic = true;
37 g->gr.netlist = GR_NETLIST_DYNAMIC;
38
39 /* query sizes and counts */
40 gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS_COUNT", 0,
41 &g->gr.ctx_vars.ucode.fecs.inst.count);
42 gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS_COUNT", 0,
43 &g->gr.ctx_vars.ucode.fecs.data.count);
44 gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS_COUNT", 0,
45 &g->gr.ctx_vars.ucode.gpccs.inst.count);
46 gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS_COUNT", 0,
47 &g->gr.ctx_vars.ucode.gpccs.data.count);
48 gk20a_sim_esc_readl(g, "GRCTX_ALL_CTX_TOTAL_WORDS", 0, &temp);
49 g->gr.ctx_vars.buffer_size = temp << 2;
50 gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT_SIZE", 0,
51 &g->gr.ctx_vars.sw_bundle_init.count);
52 gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT_SIZE", 0,
53 &g->gr.ctx_vars.sw_method_init.count);
54 gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD_SIZE", 0,
55 &g->gr.ctx_vars.sw_ctx_load.count);
56
57 switch (0) { /*g->gr.ctx_vars.reg_init_override)*/
58#if 0
59 case NV_REG_STR_RM_GR_REG_INIT_OVERRIDE_PROD_DIFF:
60 sizePath = "GRCTX_NONCTXSW_PROD_DIFF_REG_SIZE";
61 regPath = "GRCTX_NONCTXSW_PROD_DIFF_REG:REG";
62 valuePath = "GRCTX_NONCTXSW_PROD_DIFF_REG:VALUE";
63 break;
64#endif
65 default:
66 size_path = "GRCTX_NONCTXSW_REG_SIZE";
67 reg_path = "GRCTX_NONCTXSW_REG:REG";
68 value_path = "GRCTX_NONCTXSW_REG:VALUE";
69 break;
70 }
71
72 gk20a_sim_esc_readl(g, size_path, 0,
73 &g->gr.ctx_vars.sw_non_ctx_load.count);
74
75 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS_COUNT", 0,
76 &g->gr.ctx_vars.ctxsw_regs.sys.count);
77 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC_COUNT", 0,
78 &g->gr.ctx_vars.ctxsw_regs.gpc.count);
79 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC_COUNT", 0,
80 &g->gr.ctx_vars.ctxsw_regs.tpc.count);
81#if 0
82 /* looks to be unused, actually chokes the sim */
83 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0,
84 &g->gr.ctx_vars.ctxsw_regs.ppc.count);
85#endif
86 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC_COUNT", 0,
87 &g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count);
88 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS_COUNT", 0,
89 &g->gr.ctx_vars.ctxsw_regs.pm_sys.count);
90 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC_COUNT", 0,
91 &g->gr.ctx_vars.ctxsw_regs.pm_gpc.count);
92 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC_COUNT", 0,
93 &g->gr.ctx_vars.ctxsw_regs.pm_tpc.count);
94
95 err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.inst);
96 err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.data);
97 err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.inst);
98 err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.data);
99 err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_bundle_init);
100 err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_method_init);
101 err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.sw_ctx_load);
102 err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_non_ctx_load);
103 err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.sys);
104 err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.gpc);
105 err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.tpc);
106 err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.zcull_gpc);
107 err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.ppc);
108 err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_sys);
109 err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_gpc);
110 err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_tpc);
111
112 if (err)
113 goto fail;
114
115 for (i = 0; i < g->gr.ctx_vars.ucode.fecs.inst.count; i++)
116 gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS",
117 i, &g->gr.ctx_vars.ucode.fecs.inst.l[i]);
118
119 for (i = 0; i < g->gr.ctx_vars.ucode.fecs.data.count; i++)
120 gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS",
121 i, &g->gr.ctx_vars.ucode.fecs.data.l[i]);
122
123 for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.inst.count; i++)
124 gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS",
125 i, &g->gr.ctx_vars.ucode.gpccs.inst.l[i]);
126
127 for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.data.count; i++)
128 gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS",
129 i, &g->gr.ctx_vars.ucode.gpccs.data.l[i]);
130
131 for (i = 0; i < g->gr.ctx_vars.sw_bundle_init.count; i++) {
132 struct av_gk20a *l = g->gr.ctx_vars.sw_bundle_init.l;
133 gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:ADDR",
134 i, &l[i].addr);
135 gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:VALUE",
136 i, &l[i].value);
137 }
138
139 for (i = 0; i < g->gr.ctx_vars.sw_method_init.count; i++) {
140 struct av_gk20a *l = g->gr.ctx_vars.sw_method_init.l;
141 gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:ADDR",
142 i, &l[i].addr);
143 gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:VALUE",
144 i, &l[i].value);
145 }
146
147 for (i = 0; i < g->gr.ctx_vars.sw_ctx_load.count; i++) {
148 struct aiv_gk20a *l = g->gr.ctx_vars.sw_ctx_load.l;
149 gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:ADDR",
150 i, &l[i].addr);
151 gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:INDEX",
152 i, &l[i].index);
153 gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:VALUE",
154 i, &l[i].value);
155 }
156
157 for (i = 0; i < g->gr.ctx_vars.sw_non_ctx_load.count; i++) {
158 struct av_gk20a *l = g->gr.ctx_vars.sw_non_ctx_load.l;
159 gk20a_sim_esc_readl(g, reg_path, i, &l[i].addr);
160 gk20a_sim_esc_readl(g, value_path, i, &l[i].value);
161 }
162
163 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) {
164 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.sys.l;
165 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:ADDR",
166 i, &l[i].addr);
167 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:INDEX",
168 i, &l[i].index);
169 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:VALUE",
170 i, &l[i].value);
171 }
172
173 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) {
174 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.gpc.l;
175 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:ADDR",
176 i, &l[i].addr);
177 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:INDEX",
178 i, &l[i].index);
179 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:VALUE",
180 i, &l[i].value);
181 }
182
183 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) {
184 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.tpc.l;
185 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:ADDR",
186 i, &l[i].addr);
187 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:INDEX",
188 i, &l[i].index);
189 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:VALUE",
190 i, &l[i].value);
191 }
192
193 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) {
194 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.ppc.l;
195 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:ADDR",
196 i, &l[i].addr);
197 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:INDEX",
198 i, &l[i].index);
199 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:VALUE",
200 i, &l[i].value);
201 }
202
203 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count; i++) {
204 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l;
205 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:ADDR",
206 i, &l[i].addr);
207 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:INDEX",
208 i, &l[i].index);
209 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:VALUE",
210 i, &l[i].value);
211 }
212
213 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_sys.count; i++) {
214 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_sys.l;
215 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:ADDR",
216 i, &l[i].addr);
217 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:INDEX",
218 i, &l[i].index);
219 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:VALUE",
220 i, &l[i].value);
221 }
222
223 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_gpc.count; i++) {
224 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_gpc.l;
225 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:ADDR",
226 i, &l[i].addr);
227 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:INDEX",
228 i, &l[i].index);
229 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:VALUE",
230 i, &l[i].value);
231 }
232
233 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_tpc.count; i++) {
234 struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_tpc.l;
235 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:ADDR",
236 i, &l[i].addr);
237 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:INDEX",
238 i, &l[i].index);
239 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:VALUE",
240 i, &l[i].value);
241 }
242
243 g->gr.ctx_vars.valid = true;
244
245 gk20a_sim_esc_readl(g, "GRCTX_GEN_CTX_REGS_BASE_INDEX", 0,
246 &g->gr.ctx_vars.regs_base_index);
247
248 gk20a_dbg(gpu_dbg_info | gpu_dbg_fn, "finished querying grctx info from chiplib");
249 return 0;
250fail:
251 gk20a_err(dev_from_gk20a(g),
252 "failed querying grctx info from chiplib");
253 return err;
254
255}
256
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
new file mode 100644
index 00000000..0f93940b
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -0,0 +1,6747 @@
1/*
2 * GK20A Graphics
3 *
4 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20#include <linux/delay.h> /* for udelay */
21#include <linux/mm.h> /* for totalram_pages */
22#include <linux/scatterlist.h>
23#include <linux/tegra-soc.h>
24#include <linux/nvhost_dbg_gpu_ioctl.h>
25#include <linux/vmalloc.h>
26#include <linux/dma-mapping.h>
27#include <linux/firmware.h>
28#include <linux/nvhost.h>
29
30#include "gk20a.h"
31#include "kind_gk20a.h"
32#include "gr_ctx_gk20a.h"
33
34#include "hw_ccsr_gk20a.h"
35#include "hw_ctxsw_prog_gk20a.h"
36#include "hw_fifo_gk20a.h"
37#include "hw_gr_gk20a.h"
38#include "hw_gmmu_gk20a.h"
39#include "hw_mc_gk20a.h"
40#include "hw_ram_gk20a.h"
41#include "hw_pri_ringmaster_gk20a.h"
42#include "hw_pri_ringstation_sys_gk20a.h"
43#include "hw_pri_ringstation_gpc_gk20a.h"
44#include "hw_pri_ringstation_fbp_gk20a.h"
45#include "hw_proj_gk20a.h"
46#include "hw_top_gk20a.h"
47#include "hw_ltc_gk20a.h"
48#include "hw_fb_gk20a.h"
49#include "hw_therm_gk20a.h"
50#include "hw_pbdma_gk20a.h"
51#include "gr_pri_gk20a.h"
52#include "regops_gk20a.h"
53#include "dbg_gpu_gk20a.h"
54
55#define BLK_SIZE (256)
56
57static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
58
59/* global ctx buffer */
60static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
61static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g);
62static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
63 struct channel_gk20a *c);
64static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c);
65
66/* channel gr ctx buffer */
67static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
68 struct channel_gk20a *c);
69static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c);
70
71/* channel patch ctx buffer */
72static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
73 struct channel_gk20a *c);
74static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c);
75
76/* golden ctx image */
77static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
78 struct channel_gk20a *c);
79static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
80 struct channel_gk20a *c);
81
82void gk20a_fecs_dump_falcon_stats(struct gk20a *g)
83{
84 int i;
85
86 gk20a_err(dev_from_gk20a(g), "gr_fecs_os_r : %d",
87 gk20a_readl(g, gr_fecs_os_r()));
88 gk20a_err(dev_from_gk20a(g), "gr_fecs_cpuctl_r : 0x%x",
89 gk20a_readl(g, gr_fecs_cpuctl_r()));
90 gk20a_err(dev_from_gk20a(g), "gr_fecs_idlestate_r : 0x%x",
91 gk20a_readl(g, gr_fecs_idlestate_r()));
92 gk20a_err(dev_from_gk20a(g), "gr_fecs_mailbox0_r : 0x%x",
93 gk20a_readl(g, gr_fecs_mailbox0_r()));
94 gk20a_err(dev_from_gk20a(g), "gr_fecs_mailbox1_r : 0x%x",
95 gk20a_readl(g, gr_fecs_mailbox1_r()));
96 gk20a_err(dev_from_gk20a(g), "gr_fecs_irqstat_r : 0x%x",
97 gk20a_readl(g, gr_fecs_irqstat_r()));
98 gk20a_err(dev_from_gk20a(g), "gr_fecs_irqmode_r : 0x%x",
99 gk20a_readl(g, gr_fecs_irqmode_r()));
100 gk20a_err(dev_from_gk20a(g), "gr_fecs_irqmask_r : 0x%x",
101 gk20a_readl(g, gr_fecs_irqmask_r()));
102 gk20a_err(dev_from_gk20a(g), "gr_fecs_irqdest_r : 0x%x",
103 gk20a_readl(g, gr_fecs_irqdest_r()));
104 gk20a_err(dev_from_gk20a(g), "gr_fecs_debug1_r : 0x%x",
105 gk20a_readl(g, gr_fecs_debug1_r()));
106 gk20a_err(dev_from_gk20a(g), "gr_fecs_debuginfo_r : 0x%x",
107 gk20a_readl(g, gr_fecs_debuginfo_r()));
108
109 for (i = 0; i < gr_fecs_ctxsw_mailbox__size_1_v(); i++)
110 gk20a_err(dev_from_gk20a(g), "gr_fecs_ctxsw_mailbox_r(%d) : 0x%x",
111 i, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(i)));
112
113 gk20a_err(dev_from_gk20a(g), "gr_fecs_engctl_r : 0x%x",
114 gk20a_readl(g, gr_fecs_engctl_r()));
115 gk20a_err(dev_from_gk20a(g), "gr_fecs_curctx_r : 0x%x",
116 gk20a_readl(g, gr_fecs_curctx_r()));
117 gk20a_err(dev_from_gk20a(g), "gr_fecs_nxtctx_r : 0x%x",
118 gk20a_readl(g, gr_fecs_nxtctx_r()));
119
120 gk20a_writel(g, gr_fecs_icd_cmd_r(),
121 gr_fecs_icd_cmd_opc_rreg_f() |
122 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
123 gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_IMB : 0x%x",
124 gk20a_readl(g, gr_fecs_icd_rdata_r()));
125
126 gk20a_writel(g, gr_fecs_icd_cmd_r(),
127 gr_fecs_icd_cmd_opc_rreg_f() |
128 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
129 gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_DMB : 0x%x",
130 gk20a_readl(g, gr_fecs_icd_rdata_r()));
131
132 gk20a_writel(g, gr_fecs_icd_cmd_r(),
133 gr_fecs_icd_cmd_opc_rreg_f() |
134 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
135 gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_CSW : 0x%x",
136 gk20a_readl(g, gr_fecs_icd_rdata_r()));
137
138 gk20a_writel(g, gr_fecs_icd_cmd_r(),
139 gr_fecs_icd_cmd_opc_rreg_f() |
140 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
141 gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_CTX : 0x%x",
142 gk20a_readl(g, gr_fecs_icd_rdata_r()));
143
144 gk20a_writel(g, gr_fecs_icd_cmd_r(),
145 gr_fecs_icd_cmd_opc_rreg_f() |
146 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
147 gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_EXCI : 0x%x",
148 gk20a_readl(g, gr_fecs_icd_rdata_r()));
149
150 for (i = 0; i < 4; i++) {
151 gk20a_writel(g, gr_fecs_icd_cmd_r(),
152 gr_fecs_icd_cmd_opc_rreg_f() |
153 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_PC));
154 gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_PC : 0x%x",
155 gk20a_readl(g, gr_fecs_icd_rdata_r()));
156
157 gk20a_writel(g, gr_fecs_icd_cmd_r(),
158 gr_fecs_icd_cmd_opc_rreg_f() |
159 gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_SP));
160 gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_SP : 0x%x",
161 gk20a_readl(g, gr_fecs_icd_rdata_r()));
162 }
163}
164
165static void gr_gk20a_load_falcon_dmem(struct gk20a *g)
166{
167 u32 i, ucode_u32_size;
168 const u32 *ucode_u32_data;
169 u32 checksum;
170
171 gk20a_dbg_fn("");
172
173 gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
174 gr_gpccs_dmemc_blk_f(0) |
175 gr_gpccs_dmemc_aincw_f(1)));
176
177 ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count;
178 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l;
179
180 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
181 gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
182 checksum += ucode_u32_data[i];
183 }
184
185 gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
186 gr_fecs_dmemc_blk_f(0) |
187 gr_fecs_dmemc_aincw_f(1)));
188
189 ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count;
190 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l;
191
192 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
193 gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
194 checksum += ucode_u32_data[i];
195 }
196 gk20a_dbg_fn("done");
197}
198
199static void gr_gk20a_load_falcon_imem(struct gk20a *g)
200{
201 u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size;
202 const u32 *ucode_u32_data;
203 u32 tag, i, pad_start, pad_end;
204 u32 checksum;
205
206 gk20a_dbg_fn("");
207
208 cfg = gk20a_readl(g, gr_fecs_cfg_r());
209 fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
210
211 cfg = gk20a_readl(g, gr_gpc0_cfg_r());
212 gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
213
214 /* Use the broadcast address to access all of the GPCCS units. */
215 gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
216 gr_gpccs_imemc_blk_f(0) |
217 gr_gpccs_imemc_aincw_f(1)));
218
219 /* Setup the tags for the instruction memory. */
220 tag = 0;
221 gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
222
223 ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count;
224 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l;
225
226 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
227 if (i && ((i % (256/sizeof(u32))) == 0)) {
228 tag++;
229 gk20a_writel(g, gr_gpccs_imemt_r(0),
230 gr_gpccs_imemt_tag_f(tag));
231 }
232 gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
233 checksum += ucode_u32_data[i];
234 }
235
236 pad_start = i*4;
237 pad_end = pad_start+(256-pad_start%256)+256;
238 for (i = pad_start;
239 (i < gpccs_imem_size * 256) && (i < pad_end);
240 i += 4) {
241 if (i && ((i % 256) == 0)) {
242 tag++;
243 gk20a_writel(g, gr_gpccs_imemt_r(0),
244 gr_gpccs_imemt_tag_f(tag));
245 }
246 gk20a_writel(g, gr_gpccs_imemd_r(0), 0);
247 }
248
249 gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
250 gr_fecs_imemc_blk_f(0) |
251 gr_fecs_imemc_aincw_f(1)));
252
253 /* Setup the tags for the instruction memory. */
254 tag = 0;
255 gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
256
257 ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count;
258 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l;
259
260 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
261 if (i && ((i % (256/sizeof(u32))) == 0)) {
262 tag++;
263 gk20a_writel(g, gr_fecs_imemt_r(0),
264 gr_fecs_imemt_tag_f(tag));
265 }
266 gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
267 checksum += ucode_u32_data[i];
268 }
269
270 pad_start = i*4;
271 pad_end = pad_start+(256-pad_start%256)+256;
272 for (i = pad_start; (i < fecs_imem_size * 256) && i < pad_end; i += 4) {
273 if (i && ((i % 256) == 0)) {
274 tag++;
275 gk20a_writel(g, gr_fecs_imemt_r(0),
276 gr_fecs_imemt_tag_f(tag));
277 }
278 gk20a_writel(g, gr_fecs_imemd_r(0), 0);
279 }
280}
281
282static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies,
283 u32 expect_delay)
284{
285 u32 delay = expect_delay;
286 bool gr_enabled;
287 bool ctxsw_active;
288 bool gr_busy;
289
290 gk20a_dbg_fn("");
291
292 do {
293 /* fmodel: host gets fifo_engine_status(gr) from gr
294 only when gr_status is read */
295 gk20a_readl(g, gr_status_r());
296
297 gr_enabled = gk20a_readl(g, mc_enable_r()) &
298 mc_enable_pgraph_enabled_f();
299
300 ctxsw_active = gk20a_readl(g,
301 fifo_engine_status_r(ENGINE_GR_GK20A)) &
302 fifo_engine_status_ctxsw_in_progress_f();
303
304 gr_busy = gk20a_readl(g, gr_engine_status_r()) &
305 gr_engine_status_value_busy_f();
306
307 if (!gr_enabled || (!gr_busy && !ctxsw_active)) {
308 gk20a_dbg_fn("done");
309 return 0;
310 }
311
312 usleep_range(delay, delay * 2);
313 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
314
315 } while (time_before(jiffies, end_jiffies)
316 || !tegra_platform_is_silicon());
317
318 gk20a_err(dev_from_gk20a(g),
319 "timeout, ctxsw busy : %d, gr busy : %d",
320 ctxsw_active, gr_busy);
321
322 return -EAGAIN;
323}
324
325static int gr_gk20a_ctx_reset(struct gk20a *g, u32 rst_mask)
326{
327 u32 delay = GR_IDLE_CHECK_DEFAULT;
328 unsigned long end_jiffies = jiffies +
329 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
330 u32 reg;
331
332 gk20a_dbg_fn("");
333
334 if (!tegra_platform_is_linsim()) {
335 /* Force clocks on */
336 gk20a_writel(g, gr_fe_pwr_mode_r(),
337 gr_fe_pwr_mode_req_send_f() |
338 gr_fe_pwr_mode_mode_force_on_f());
339
340 /* Wait for the clocks to indicate that they are on */
341 do {
342 reg = gk20a_readl(g, gr_fe_pwr_mode_r());
343
344 if (gr_fe_pwr_mode_req_v(reg) ==
345 gr_fe_pwr_mode_req_done_v())
346 break;
347
348 usleep_range(delay, delay * 2);
349 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
350
351 } while (time_before(jiffies, end_jiffies));
352
353 if (!time_before(jiffies, end_jiffies)) {
354 gk20a_err(dev_from_gk20a(g),
355 "failed to force the clocks on\n");
356 WARN_ON(1);
357 }
358 }
359 if (rst_mask) {
360 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(), rst_mask);
361 } else {
362 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
363 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
364 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
365 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() |
366 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
367 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
368 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() |
369 gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f() |
370 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f() |
371 gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f());
372 }
373
374 /* we need to read the reset register *and* wait for a moment to ensure
375 * reset propagation */
376
377 gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
378 udelay(20);
379
380 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
381 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
382 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
383 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() |
384 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
385 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
386 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() |
387 gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f() |
388 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f() |
389 gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f());
390
391 /* we need to readl the reset and then wait a small moment after that */
392 gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
393 udelay(20);
394
395 if (!tegra_platform_is_linsim()) {
396 /* Set power mode back to auto */
397 gk20a_writel(g, gr_fe_pwr_mode_r(),
398 gr_fe_pwr_mode_req_send_f() |
399 gr_fe_pwr_mode_mode_auto_f());
400
401 /* Wait for the request to complete */
402 end_jiffies = jiffies +
403 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
404 do {
405 reg = gk20a_readl(g, gr_fe_pwr_mode_r());
406
407 if (gr_fe_pwr_mode_req_v(reg) ==
408 gr_fe_pwr_mode_req_done_v())
409 break;
410
411 usleep_range(delay, delay * 2);
412 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
413
414 } while (time_before(jiffies, end_jiffies));
415
416 if (!time_before(jiffies, end_jiffies))
417 gk20a_warn(dev_from_gk20a(g),
418 "failed to set power mode to auto\n");
419 }
420
421 return 0;
422}
423
424static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
425 u32 *mailbox_ret, u32 opc_success,
426 u32 mailbox_ok, u32 opc_fail,
427 u32 mailbox_fail)
428{
429 unsigned long end_jiffies = jiffies +
430 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
431 u32 delay = GR_IDLE_CHECK_DEFAULT;
432 u32 check = WAIT_UCODE_LOOP;
433 u32 reg;
434
435 gk20a_dbg_fn("");
436
437 while (check == WAIT_UCODE_LOOP) {
438 if (!time_before(jiffies, end_jiffies) &&
439 tegra_platform_is_silicon())
440 check = WAIT_UCODE_TIMEOUT;
441
442 reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(mailbox_id));
443
444 if (mailbox_ret)
445 *mailbox_ret = reg;
446
447 switch (opc_success) {
448 case GR_IS_UCODE_OP_EQUAL:
449 if (reg == mailbox_ok)
450 check = WAIT_UCODE_OK;
451 break;
452 case GR_IS_UCODE_OP_NOT_EQUAL:
453 if (reg != mailbox_ok)
454 check = WAIT_UCODE_OK;
455 break;
456 case GR_IS_UCODE_OP_AND:
457 if (reg & mailbox_ok)
458 check = WAIT_UCODE_OK;
459 break;
460 case GR_IS_UCODE_OP_LESSER:
461 if (reg < mailbox_ok)
462 check = WAIT_UCODE_OK;
463 break;
464 case GR_IS_UCODE_OP_LESSER_EQUAL:
465 if (reg <= mailbox_ok)
466 check = WAIT_UCODE_OK;
467 break;
468 case GR_IS_UCODE_OP_SKIP:
469 /* do no success check */
470 break;
471 default:
472 gk20a_err(dev_from_gk20a(g),
473 "invalid success opcode 0x%x", opc_success);
474
475 check = WAIT_UCODE_ERROR;
476 break;
477 }
478
479 switch (opc_fail) {
480 case GR_IS_UCODE_OP_EQUAL:
481 if (reg == mailbox_fail)
482 check = WAIT_UCODE_ERROR;
483 break;
484 case GR_IS_UCODE_OP_NOT_EQUAL:
485 if (reg != mailbox_fail)
486 check = WAIT_UCODE_ERROR;
487 break;
488 case GR_IS_UCODE_OP_AND:
489 if (reg & mailbox_fail)
490 check = WAIT_UCODE_ERROR;
491 break;
492 case GR_IS_UCODE_OP_LESSER:
493 if (reg < mailbox_fail)
494 check = WAIT_UCODE_ERROR;
495 break;
496 case GR_IS_UCODE_OP_LESSER_EQUAL:
497 if (reg <= mailbox_fail)
498 check = WAIT_UCODE_ERROR;
499 break;
500 case GR_IS_UCODE_OP_SKIP:
501 /* do no check on fail*/
502 break;
503 default:
504 gk20a_err(dev_from_gk20a(g),
505 "invalid fail opcode 0x%x", opc_fail);
506 check = WAIT_UCODE_ERROR;
507 break;
508 }
509
510 usleep_range(delay, delay * 2);
511 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
512 }
513
514 if (check == WAIT_UCODE_TIMEOUT) {
515 gk20a_err(dev_from_gk20a(g),
516 "timeout waiting on ucode response");
517 gk20a_fecs_dump_falcon_stats(g);
518 return -1;
519 } else if (check == WAIT_UCODE_ERROR) {
520 gk20a_err(dev_from_gk20a(g),
521 "ucode method failed on mailbox=%d value=0x%08x",
522 mailbox_id, reg);
523 gk20a_fecs_dump_falcon_stats(g);
524 return -1;
525 }
526
527 gk20a_dbg_fn("done");
528 return 0;
529}
530
531/* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...)
532 * We should replace most, if not all, fecs method calls to this instead. */
533struct fecs_method_op_gk20a {
534 struct {
535 u32 addr;
536 u32 data;
537 } method;
538
539 struct {
540 u32 id;
541 u32 data;
542 u32 clr;
543 u32 *ret;
544 u32 ok;
545 u32 fail;
546 } mailbox;
547
548 struct {
549 u32 ok;
550 u32 fail;
551 } cond;
552
553};
554
555int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
556 struct fecs_method_op_gk20a op)
557{
558 struct gr_gk20a *gr = &g->gr;
559 int ret;
560
561 mutex_lock(&gr->fecs_mutex);
562
563 if (op.mailbox.id != 0)
564 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id),
565 op.mailbox.data);
566
567 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
568 gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr));
569
570 gk20a_writel(g, gr_fecs_method_data_r(), op.method.data);
571 gk20a_writel(g, gr_fecs_method_push_r(),
572 gr_fecs_method_push_adr_f(op.method.addr));
573
574 /* op.mb.id == 4 cases require waiting for completion on
575 * for op.mb.id == 0 */
576 if (op.mailbox.id == 4)
577 op.mailbox.id = 0;
578
579 ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret,
580 op.cond.ok, op.mailbox.ok,
581 op.cond.fail, op.mailbox.fail);
582
583 mutex_unlock(&gr->fecs_mutex);
584
585 return ret;
586}
587
588int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret)
589{
590 return gr_gk20a_submit_fecs_method_op(g,
591 (struct fecs_method_op_gk20a) {
592 .method.addr = fecs_method,
593 .method.data = ~0,
594 .mailbox = { .id = 1, /*sideband?*/
595 .data = ~0, .clr = ~0, .ret = ret,
596 .ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
597 .fail = gr_fecs_ctxsw_mailbox_value_fail_v(), },
598 .cond.ok = GR_IS_UCODE_OP_EQUAL,
599 .cond.fail = GR_IS_UCODE_OP_EQUAL });
600}
601
602/* Stop processing (stall) context switches at FECS.
603 * The caller must hold the dbg_sessions_lock, else if mutliple stop methods
604 * are sent to the ucode in sequence, it can get into an undefined state. */
605int gr_gk20a_disable_ctxsw(struct gk20a *g)
606{
607 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
608 return gr_gk20a_ctrl_ctxsw(g, gr_fecs_method_push_adr_stop_ctxsw_v(), 0);
609}
610
611/* Start processing (continue) context switches at FECS */
612int gr_gk20a_enable_ctxsw(struct gk20a *g)
613{
614 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
615 return gr_gk20a_ctrl_ctxsw(g, gr_fecs_method_push_adr_start_ctxsw_v(), 0);
616}
617
618
619static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
620{
621 u32 addr_lo;
622 u32 addr_hi;
623 void *inst_ptr = NULL;
624
625 gk20a_dbg_fn("");
626
627 /* flush gpu_va before commit */
628 gk20a_mm_fb_flush(c->g);
629 gk20a_mm_l2_flush(c->g, true);
630
631 inst_ptr = c->inst_block.cpuva;
632 if (!inst_ptr)
633 return -ENOMEM;
634
635 addr_lo = u64_lo32(gpu_va) >> 12;
636 addr_hi = u64_hi32(gpu_va);
637
638 gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(),
639 ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() |
640 ram_in_gr_wfi_ptr_lo_f(addr_lo));
641
642 gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(),
643 ram_in_gr_wfi_ptr_hi_f(addr_hi));
644
645 gk20a_mm_l2_invalidate(c->g);
646
647 return 0;
648}
649
650/*
651 * Context state can be written directly or "patched" at times.
652 * So that code can be used in either situation it is written
653 * using a series _ctx_patch_write(..., patch) statements.
654 * However any necessary cpu map/unmap and gpu l2 invalidates
655 * should be minimized (to avoid doing it once per patch write).
656 * Before a sequence of these set up with "_ctx_patch_write_begin"
657 * and close with "_ctx_patch_write_end."
658 */
659int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
660 struct channel_ctx_gk20a *ch_ctx)
661{
662 /* being defensive still... */
663 if (ch_ctx->patch_ctx.cpu_va) {
664 gk20a_err(dev_from_gk20a(g), "nested ctx patch begin?");
665 return -EBUSY;
666 }
667
668 ch_ctx->patch_ctx.cpu_va = vmap(ch_ctx->patch_ctx.pages,
669 PAGE_ALIGN(ch_ctx->patch_ctx.size) >> PAGE_SHIFT,
670 0, pgprot_dmacoherent(PAGE_KERNEL));
671
672 if (!ch_ctx->patch_ctx.cpu_va)
673 return -ENOMEM;
674
675 return 0;
676}
677
678int gr_gk20a_ctx_patch_write_end(struct gk20a *g,
679 struct channel_ctx_gk20a *ch_ctx)
680{
681 /* being defensive still... */
682 if (!ch_ctx->patch_ctx.cpu_va) {
683 gk20a_err(dev_from_gk20a(g), "dangling ctx patch end?");
684 return -EINVAL;
685 }
686
687 vunmap(ch_ctx->patch_ctx.cpu_va);
688 ch_ctx->patch_ctx.cpu_va = NULL;
689
690 gk20a_mm_l2_invalidate(g);
691 return 0;
692}
693
694int gr_gk20a_ctx_patch_write(struct gk20a *g,
695 struct channel_ctx_gk20a *ch_ctx,
696 u32 addr, u32 data, bool patch)
697{
698 u32 patch_slot = 0;
699 void *patch_ptr = NULL;
700 bool mapped_here = false;
701
702 BUG_ON(patch != 0 && ch_ctx == NULL);
703
704 if (patch) {
705 if (!ch_ctx)
706 return -EINVAL;
707 /* we added an optimization prolog, epilog
708 * to get rid of unnecessary maps and l2 invals.
709 * but be defensive still... */
710 if (!ch_ctx->patch_ctx.cpu_va) {
711 int err;
712 gk20a_err(dev_from_gk20a(g),
713 "per-write ctx patch begin?");
714 /* yes, gr_gk20a_ctx_patch_smpc causes this one */
715 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
716 if (err)
717 return err;
718 mapped_here = true;
719 } else
720 mapped_here = false;
721
722 patch_ptr = ch_ctx->patch_ctx.cpu_va;
723 patch_slot = ch_ctx->patch_ctx.data_count * 2;
724
725 gk20a_mem_wr32(patch_ptr, patch_slot++, addr);
726 gk20a_mem_wr32(patch_ptr, patch_slot++, data);
727
728 ch_ctx->patch_ctx.data_count++;
729
730 if (mapped_here)
731 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
732
733 } else
734 gk20a_writel(g, addr, data);
735
736 return 0;
737}
738
739static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
740 struct channel_gk20a *c)
741{
742 u32 inst_base_ptr = u64_lo32(c->inst_block.cpu_pa
743 >> ram_in_base_shift_v());
744 u32 ret;
745
746 gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
747 c->hw_chid, inst_base_ptr);
748
749 ret = gr_gk20a_submit_fecs_method_op(g,
750 (struct fecs_method_op_gk20a) {
751 .method.addr = gr_fecs_method_push_adr_bind_pointer_v(),
752 .method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
753 gr_fecs_current_ctx_target_vid_mem_f() |
754 gr_fecs_current_ctx_valid_f(1)),
755 .mailbox = { .id = 0, .data = 0,
756 .clr = 0x30,
757 .ret = NULL,
758 .ok = 0x10,
759 .fail = 0x20, },
760 .cond.ok = GR_IS_UCODE_OP_AND,
761 .cond.fail = GR_IS_UCODE_OP_AND});
762 if (ret)
763 gk20a_err(dev_from_gk20a(g),
764 "bind channel instance failed");
765
766 return ret;
767}
768
769static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
770 bool disable_fifo)
771{
772 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
773 struct fifo_gk20a *f = &g->fifo;
774 struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
775 u32 va_lo, va_hi, va;
776 int ret = 0;
777 void *ctx_ptr = NULL;
778
779 gk20a_dbg_fn("");
780
781 ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
782 PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
783 0, pgprot_dmacoherent(PAGE_KERNEL));
784 if (!ctx_ptr)
785 return -ENOMEM;
786
787 if (ch_ctx->zcull_ctx.gpu_va == 0 &&
788 ch_ctx->zcull_ctx.ctx_sw_mode ==
789 ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
790 ret = -EINVAL;
791 goto clean_up;
792 }
793
794 va_lo = u64_lo32(ch_ctx->zcull_ctx.gpu_va);
795 va_hi = u64_hi32(ch_ctx->zcull_ctx.gpu_va);
796 va = ((va_lo >> 8) & 0x00FFFFFF) | ((va_hi << 24) & 0xFF000000);
797
798 if (disable_fifo) {
799 ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
800 if (ret) {
801 gk20a_err(dev_from_gk20a(g),
802 "failed to disable gr engine activity\n");
803 goto clean_up;
804 }
805 }
806
807 /* Channel gr_ctx buffer is gpu cacheable.
808 Flush and invalidate before cpu update. */
809 gk20a_mm_fb_flush(g);
810 gk20a_mm_l2_flush(g, true);
811
812 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0,
813 ch_ctx->zcull_ctx.ctx_sw_mode);
814
815 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, va);
816
817 if (disable_fifo) {
818 ret = gk20a_fifo_enable_engine_activity(g, gr_info);
819 if (ret) {
820 gk20a_err(dev_from_gk20a(g),
821 "failed to enable gr engine activity\n");
822 goto clean_up;
823 }
824 }
825 gk20a_mm_l2_invalidate(g);
826
827clean_up:
828 vunmap(ctx_ptr);
829
830 return ret;
831}
832
833static int gr_gk20a_commit_global_cb_manager(struct gk20a *g,
834 struct channel_gk20a *c, bool patch)
835{
836 struct gr_gk20a *gr = &g->gr;
837 struct channel_ctx_gk20a *ch_ctx = NULL;
838 u32 attrib_offset_in_chunk = 0;
839 u32 alpha_offset_in_chunk = 0;
840 u32 pd_ab_max_output;
841 u32 gpc_index, ppc_index;
842 u32 temp;
843 u32 cbm_cfg_size1, cbm_cfg_size2;
844
845 gk20a_dbg_fn("");
846
847 if (patch) {
848 int err;
849 ch_ctx = &c->ch_ctx;
850 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
851 if (err)
852 return err;
853 }
854
855 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(),
856 gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) |
857 gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size),
858 patch);
859
860 pd_ab_max_output = (gr->alpha_cb_default_size *
861 gr_gpc0_ppc0_cbm_cfg_size_granularity_v()) /
862 gr_pd_ab_dist_cfg1_max_output_granularity_v();
863
864 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
865 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
866 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
867
868 alpha_offset_in_chunk = attrib_offset_in_chunk +
869 gr->tpc_count * gr->attrib_cb_size;
870
871 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
872 temp = proj_gpc_stride_v() * gpc_index;
873 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
874 ppc_index++) {
875 cbm_cfg_size1 = gr->attrib_cb_default_size *
876 gr->pes_tpc_count[ppc_index][gpc_index];
877 cbm_cfg_size2 = gr->alpha_cb_default_size *
878 gr->pes_tpc_count[ppc_index][gpc_index];
879
880 gr_gk20a_ctx_patch_write(g, ch_ctx,
881 gr_gpc0_ppc0_cbm_cfg_r() + temp +
882 proj_ppc_in_gpc_stride_v() * ppc_index,
883 gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) |
884 gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) |
885 gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch);
886
887 attrib_offset_in_chunk += gr->attrib_cb_size *
888 gr->pes_tpc_count[ppc_index][gpc_index];
889
890 gr_gk20a_ctx_patch_write(g, ch_ctx,
891 gr_gpc0_ppc0_cbm_cfg2_r() + temp +
892 proj_ppc_in_gpc_stride_v() * ppc_index,
893 gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) |
894 gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch);
895
896 alpha_offset_in_chunk += gr->alpha_cb_size *
897 gr->pes_tpc_count[ppc_index][gpc_index];
898 }
899 }
900
901 if (patch)
902 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
903
904 return 0;
905}
906
907static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
908 struct channel_gk20a *c, bool patch)
909{
910 struct gr_gk20a *gr = &g->gr;
911 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
912 u64 addr;
913 u32 size;
914
915 gk20a_dbg_fn("");
916 if (patch) {
917 int err;
918 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
919 if (err)
920 return err;
921 }
922
923 /* global pagepool buffer */
924 addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >>
925 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
926 (u64_hi32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) <<
927 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
928
929 size = gr->global_ctx_buffer[PAGEPOOL].size /
930 gr_scc_pagepool_total_pages_byte_granularity_v();
931
932 if (size == gr_scc_pagepool_total_pages_hwmax_value_v())
933 size = gr_scc_pagepool_total_pages_hwmax_v();
934
935 gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d",
936 addr, size);
937
938 g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, patch);
939
940 /* global bundle cb */
941 addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >>
942 gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) |
943 (u64_hi32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) <<
944 (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v()));
945
946 size = gr->bundle_cb_default_size;
947
948 gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d",
949 addr, size);
950
951 g->ops.gr.commit_global_bundle_cb(g, ch_ctx, addr, size, patch);
952
953 /* global attrib cb */
954 addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >>
955 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
956 (u64_hi32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) <<
957 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
958
959 gk20a_dbg_info("attrib cb addr : 0x%016llx", addr);
960 g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, patch);
961
962 if (patch)
963 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
964
965 return 0;
966}
967
968static void gr_gk20a_commit_global_attrib_cb(struct gk20a *g,
969 struct channel_ctx_gk20a *ch_ctx,
970 u64 addr, bool patch)
971{
972 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(),
973 gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) |
974 gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch);
975
976 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(),
977 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) |
978 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch);
979}
980
981static void gr_gk20a_commit_global_bundle_cb(struct gk20a *g,
982 struct channel_ctx_gk20a *ch_ctx,
983 u64 addr, u64 size, bool patch)
984{
985 u32 data;
986
987 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
988 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
989
990 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
991 gr_scc_bundle_cb_size_div_256b_f(size) |
992 gr_scc_bundle_cb_size_valid_true_f(), patch);
993
994 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_bundle_cb_base_r(),
995 gr_gpcs_setup_bundle_cb_base_addr_39_8_f(addr), patch);
996
997 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_bundle_cb_size_r(),
998 gr_gpcs_setup_bundle_cb_size_div_256b_f(size) |
999 gr_gpcs_setup_bundle_cb_size_valid_true_f(), patch);
1000
1001 /* data for state_limit */
1002 data = (g->gr.bundle_cb_default_size *
1003 gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
1004 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
1005
1006 data = min_t(u32, data, g->gr.min_gpm_fifo_depth);
1007
1008 gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
1009 g->gr.bundle_cb_token_limit, data);
1010
1011 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
1012 gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
1013 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
1014
1015}
1016
1017static int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, bool patch)
1018{
1019 struct gr_gk20a *gr = &g->gr;
1020 struct channel_ctx_gk20a *ch_ctx = NULL;
1021 u32 gpm_pd_cfg;
1022 u32 pd_ab_dist_cfg0;
1023 u32 ds_debug;
1024 u32 mpc_vtg_debug;
1025 u32 pe_vaf;
1026 u32 pe_vsc_vpc;
1027
1028 gk20a_dbg_fn("");
1029
1030 gpm_pd_cfg = gk20a_readl(g, gr_gpcs_gpm_pd_cfg_r());
1031 pd_ab_dist_cfg0 = gk20a_readl(g, gr_pd_ab_dist_cfg0_r());
1032 ds_debug = gk20a_readl(g, gr_ds_debug_r());
1033 mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r());
1034
1035 if (patch) {
1036 int err;
1037 ch_ctx = &c->ch_ctx;
1038 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
1039 if (err)
1040 return err;
1041 }
1042
1043 if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) {
1044 pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r());
1045 pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r());
1046
1047 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f() | gpm_pd_cfg;
1048 pe_vaf = gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f() | pe_vaf;
1049 pe_vsc_vpc = gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f() | pe_vsc_vpc;
1050 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_en_f() | pd_ab_dist_cfg0;
1051 ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
1052 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug;
1053
1054 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
1055 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, patch);
1056 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, patch);
1057 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
1058 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch);
1059 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
1060 } else {
1061 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg;
1062 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0;
1063 ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug;
1064 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug;
1065
1066 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
1067 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
1068 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch);
1069 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
1070 }
1071
1072 if (patch)
1073 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
1074
1075 return 0;
1076}
1077
1078int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
1079{
1080 u32 norm_entries, norm_shift;
1081 u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod;
1082 u32 map0, map1, map2, map3, map4, map5;
1083
1084 if (!gr->map_tiles)
1085 return -1;
1086
1087 gk20a_dbg_fn("");
1088
1089 gk20a_writel(g, gr_crstr_map_table_cfg_r(),
1090 gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) |
1091 gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count));
1092
1093 map0 = gr_crstr_gpc_map0_tile0_f(gr->map_tiles[0]) |
1094 gr_crstr_gpc_map0_tile1_f(gr->map_tiles[1]) |
1095 gr_crstr_gpc_map0_tile2_f(gr->map_tiles[2]) |
1096 gr_crstr_gpc_map0_tile3_f(gr->map_tiles[3]) |
1097 gr_crstr_gpc_map0_tile4_f(gr->map_tiles[4]) |
1098 gr_crstr_gpc_map0_tile5_f(gr->map_tiles[5]);
1099
1100 map1 = gr_crstr_gpc_map1_tile6_f(gr->map_tiles[6]) |
1101 gr_crstr_gpc_map1_tile7_f(gr->map_tiles[7]) |
1102 gr_crstr_gpc_map1_tile8_f(gr->map_tiles[8]) |
1103 gr_crstr_gpc_map1_tile9_f(gr->map_tiles[9]) |
1104 gr_crstr_gpc_map1_tile10_f(gr->map_tiles[10]) |
1105 gr_crstr_gpc_map1_tile11_f(gr->map_tiles[11]);
1106
1107 map2 = gr_crstr_gpc_map2_tile12_f(gr->map_tiles[12]) |
1108 gr_crstr_gpc_map2_tile13_f(gr->map_tiles[13]) |
1109 gr_crstr_gpc_map2_tile14_f(gr->map_tiles[14]) |
1110 gr_crstr_gpc_map2_tile15_f(gr->map_tiles[15]) |
1111 gr_crstr_gpc_map2_tile16_f(gr->map_tiles[16]) |
1112 gr_crstr_gpc_map2_tile17_f(gr->map_tiles[17]);
1113
1114 map3 = gr_crstr_gpc_map3_tile18_f(gr->map_tiles[18]) |
1115 gr_crstr_gpc_map3_tile19_f(gr->map_tiles[19]) |
1116 gr_crstr_gpc_map3_tile20_f(gr->map_tiles[20]) |
1117 gr_crstr_gpc_map3_tile21_f(gr->map_tiles[21]) |
1118 gr_crstr_gpc_map3_tile22_f(gr->map_tiles[22]) |
1119 gr_crstr_gpc_map3_tile23_f(gr->map_tiles[23]);
1120
1121 map4 = gr_crstr_gpc_map4_tile24_f(gr->map_tiles[24]) |
1122 gr_crstr_gpc_map4_tile25_f(gr->map_tiles[25]) |
1123 gr_crstr_gpc_map4_tile26_f(gr->map_tiles[26]) |
1124 gr_crstr_gpc_map4_tile27_f(gr->map_tiles[27]) |
1125 gr_crstr_gpc_map4_tile28_f(gr->map_tiles[28]) |
1126 gr_crstr_gpc_map4_tile29_f(gr->map_tiles[29]);
1127
1128 map5 = gr_crstr_gpc_map5_tile30_f(gr->map_tiles[30]) |
1129 gr_crstr_gpc_map5_tile31_f(gr->map_tiles[31]) |
1130 gr_crstr_gpc_map5_tile32_f(0) |
1131 gr_crstr_gpc_map5_tile33_f(0) |
1132 gr_crstr_gpc_map5_tile34_f(0) |
1133 gr_crstr_gpc_map5_tile35_f(0);
1134
1135 gk20a_writel(g, gr_crstr_gpc_map0_r(), map0);
1136 gk20a_writel(g, gr_crstr_gpc_map1_r(), map1);
1137 gk20a_writel(g, gr_crstr_gpc_map2_r(), map2);
1138 gk20a_writel(g, gr_crstr_gpc_map3_r(), map3);
1139 gk20a_writel(g, gr_crstr_gpc_map4_r(), map4);
1140 gk20a_writel(g, gr_crstr_gpc_map5_r(), map5);
1141
1142 switch (gr->tpc_count) {
1143 case 1:
1144 norm_shift = 4;
1145 break;
1146 case 2:
1147 case 3:
1148 norm_shift = 3;
1149 break;
1150 case 4:
1151 case 5:
1152 case 6:
1153 case 7:
1154 norm_shift = 2;
1155 break;
1156 case 8:
1157 case 9:
1158 case 10:
1159 case 11:
1160 case 12:
1161 case 13:
1162 case 14:
1163 case 15:
1164 norm_shift = 1;
1165 break;
1166 default:
1167 norm_shift = 0;
1168 break;
1169 }
1170
1171 norm_entries = gr->tpc_count << norm_shift;
1172 coeff5_mod = (1 << 5) % norm_entries;
1173 coeff6_mod = (1 << 6) % norm_entries;
1174 coeff7_mod = (1 << 7) % norm_entries;
1175 coeff8_mod = (1 << 8) % norm_entries;
1176 coeff9_mod = (1 << 9) % norm_entries;
1177 coeff10_mod = (1 << 10) % norm_entries;
1178 coeff11_mod = (1 << 11) % norm_entries;
1179
1180 gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
1181 gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) |
1182 gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) |
1183 gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) |
1184 gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) |
1185 gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count));
1186
1187 gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(),
1188 gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) |
1189 gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(coeff7_mod) |
1190 gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(coeff8_mod) |
1191 gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(coeff9_mod) |
1192 gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(coeff10_mod) |
1193 gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(coeff11_mod));
1194
1195 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map0_r(), map0);
1196 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map1_r(), map1);
1197 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map2_r(), map2);
1198 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map3_r(), map3);
1199 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map4_r(), map4);
1200 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5);
1201
1202 gk20a_writel(g, gr_rstr2d_map_table_cfg_r(),
1203 gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) |
1204 gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count));
1205
1206 gk20a_writel(g, gr_rstr2d_gpc_map0_r(), map0);
1207 gk20a_writel(g, gr_rstr2d_gpc_map1_r(), map1);
1208 gk20a_writel(g, gr_rstr2d_gpc_map2_r(), map2);
1209 gk20a_writel(g, gr_rstr2d_gpc_map3_r(), map3);
1210 gk20a_writel(g, gr_rstr2d_gpc_map4_r(), map4);
1211 gk20a_writel(g, gr_rstr2d_gpc_map5_r(), map5);
1212
1213 return 0;
1214}
1215
1216static inline u32 count_bits(u32 mask)
1217{
1218 u32 temp = mask;
1219 u32 count;
1220 for (count = 0; temp != 0; count++)
1221 temp &= temp - 1;
1222
1223 return count;
1224}
1225
1226static inline u32 clear_count_bits(u32 num, u32 clear_count)
1227{
1228 u32 count = clear_count;
1229 for (; (num != 0) && (count != 0); count--)
1230 num &= num - 1;
1231
1232 return num;
1233}
1234
1235static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g,
1236 struct gr_gk20a *gr)
1237{
1238 u32 table_index_bits = 5;
1239 u32 rows = (1 << table_index_bits);
1240 u32 row_stride = gr_pd_alpha_ratio_table__size_1_v() / rows;
1241
1242 u32 row;
1243 u32 index;
1244 u32 gpc_index;
1245 u32 gpcs_per_reg = 4;
1246 u32 pes_index;
1247 u32 tpc_count_pes;
1248 u32 num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
1249
1250 u32 alpha_target, beta_target;
1251 u32 alpha_bits, beta_bits;
1252 u32 alpha_mask, beta_mask, partial_mask;
1253 u32 reg_offset;
1254 bool assign_alpha;
1255
1256 u32 map_alpha[gr_pd_alpha_ratio_table__size_1_v()];
1257 u32 map_beta[gr_pd_alpha_ratio_table__size_1_v()];
1258 u32 map_reg_used[gr_pd_alpha_ratio_table__size_1_v()];
1259
1260 gk20a_dbg_fn("");
1261
1262 memset(map_alpha, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
1263 memset(map_beta, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
1264 memset(map_reg_used, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
1265
1266 for (row = 0; row < rows; ++row) {
1267 alpha_target = max_t(u32, gr->tpc_count * row / rows, 1);
1268 beta_target = gr->tpc_count - alpha_target;
1269
1270 assign_alpha = (alpha_target < beta_target);
1271
1272 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
1273 reg_offset = (row * row_stride) + (gpc_index / gpcs_per_reg);
1274 alpha_mask = beta_mask = 0;
1275
1276 for (pes_index = 0; pes_index < num_pes_per_gpc; pes_index++) {
1277 tpc_count_pes = gr->pes_tpc_count[pes_index][gpc_index];
1278
1279 if (assign_alpha) {
1280 alpha_bits = (alpha_target == 0) ? 0 : tpc_count_pes;
1281 beta_bits = tpc_count_pes - alpha_bits;
1282 } else {
1283 beta_bits = (beta_target == 0) ? 0 : tpc_count_pes;
1284 alpha_bits = tpc_count_pes - beta_bits;
1285 }
1286
1287 partial_mask = gr->pes_tpc_mask[pes_index][gpc_index];
1288 partial_mask = clear_count_bits(partial_mask, tpc_count_pes - alpha_bits);
1289 alpha_mask |= partial_mask;
1290
1291 partial_mask = gr->pes_tpc_mask[pes_index][gpc_index] ^ partial_mask;
1292 beta_mask |= partial_mask;
1293
1294 alpha_target -= min(alpha_bits, alpha_target);
1295 beta_target -= min(beta_bits, beta_target);
1296
1297 if ((alpha_bits > 0) || (beta_bits > 0))
1298 assign_alpha = !assign_alpha;
1299 }
1300
1301 switch (gpc_index % gpcs_per_reg) {
1302 case 0:
1303 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n0_mask_f(alpha_mask);
1304 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n0_mask_f(beta_mask);
1305 break;
1306 case 1:
1307 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n1_mask_f(alpha_mask);
1308 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n1_mask_f(beta_mask);
1309 break;
1310 case 2:
1311 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n2_mask_f(alpha_mask);
1312 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n2_mask_f(beta_mask);
1313 break;
1314 case 3:
1315 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n3_mask_f(alpha_mask);
1316 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n3_mask_f(beta_mask);
1317 break;
1318 }
1319 map_reg_used[reg_offset] = true;
1320 }
1321 }
1322
1323 for (index = 0; index < gr_pd_alpha_ratio_table__size_1_v(); index++) {
1324 if (map_reg_used[index]) {
1325 gk20a_writel(g, gr_pd_alpha_ratio_table_r(index), map_alpha[index]);
1326 gk20a_writel(g, gr_pd_beta_ratio_table_r(index), map_beta[index]);
1327 }
1328 }
1329
1330 return 0;
1331}
1332
1333static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
1334{
1335 struct gr_gk20a *gr = &g->gr;
1336 u32 tpc_index, gpc_index;
1337 u32 tpc_offset, gpc_offset;
1338 u32 sm_id = 0, gpc_id = 0;
1339 u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()];
1340 u32 tpc_per_gpc;
1341 u32 max_ways_evict = INVALID_MAX_WAYS;
1342 u32 l1c_dbg_reg_val;
1343
1344 gk20a_dbg_fn("");
1345
1346 for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) {
1347 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
1348 gpc_offset = proj_gpc_stride_v() * gpc_index;
1349 if (tpc_index < gr->gpc_tpc_count[gpc_index]) {
1350 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index;
1351
1352 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
1353 gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id));
1354 gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset,
1355 gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id));
1356 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset,
1357 gr_gpc0_gpm_pd_sm_id_id_f(sm_id));
1358 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
1359 gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
1360
1361 sm_id_to_gpc_id[sm_id] = gpc_index;
1362 sm_id++;
1363 }
1364
1365 gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset,
1366 gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
1367 gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset,
1368 gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
1369 }
1370 }
1371
1372 for (tpc_index = 0, gpc_id = 0;
1373 tpc_index < gr_pd_num_tpc_per_gpc__size_1_v();
1374 tpc_index++, gpc_id += 8) {
1375
1376 if (gpc_id >= gr->gpc_count)
1377 gpc_id = 0;
1378
1379 tpc_per_gpc =
1380 gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) |
1381 gr_pd_num_tpc_per_gpc_count1_f(gr->gpc_tpc_count[gpc_id + 1]) |
1382 gr_pd_num_tpc_per_gpc_count2_f(gr->gpc_tpc_count[gpc_id + 2]) |
1383 gr_pd_num_tpc_per_gpc_count3_f(gr->gpc_tpc_count[gpc_id + 3]) |
1384 gr_pd_num_tpc_per_gpc_count4_f(gr->gpc_tpc_count[gpc_id + 4]) |
1385 gr_pd_num_tpc_per_gpc_count5_f(gr->gpc_tpc_count[gpc_id + 5]) |
1386 gr_pd_num_tpc_per_gpc_count6_f(gr->gpc_tpc_count[gpc_id + 6]) |
1387 gr_pd_num_tpc_per_gpc_count7_f(gr->gpc_tpc_count[gpc_id + 7]);
1388
1389 gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
1390 gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
1391 }
1392
1393 /* gr__setup_pd_mapping stubbed for gk20a */
1394 gr_gk20a_setup_rop_mapping(g, gr);
1395 if (g->ops.gr.setup_alpha_beta_tables)
1396 g->ops.gr.setup_alpha_beta_tables(g, gr);
1397
1398 if (gr->num_fbps == 1)
1399 max_ways_evict = 9;
1400
1401 if (max_ways_evict != INVALID_MAX_WAYS)
1402 g->ops.ltc.set_max_ways_evict_last(g, max_ways_evict);
1403
1404 for (gpc_index = 0;
1405 gpc_index < gr_pd_dist_skip_table__size_1_v() * 4;
1406 gpc_index += 4) {
1407
1408 gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4),
1409 gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) ||
1410 gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) ||
1411 gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) ||
1412 gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3]));
1413 }
1414
1415 gk20a_writel(g, gr_cwd_fs_r(),
1416 gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
1417 gr_cwd_fs_num_tpcs_f(gr->tpc_count));
1418
1419 gk20a_writel(g, gr_bes_zrop_settings_r(),
1420 gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps));
1421 gk20a_writel(g, gr_bes_crop_settings_r(),
1422 gr_bes_crop_settings_num_active_fbps_f(gr->num_fbps));
1423
1424 /* turn on cya15 bit for a default val that missed the cut */
1425 l1c_dbg_reg_val = gk20a_readl(g, gr_gpc0_tpc0_l1c_dbg_r());
1426 l1c_dbg_reg_val |= gr_gpc0_tpc0_l1c_dbg_cya15_en_f();
1427 gk20a_writel(g, gr_gpc0_tpc0_l1c_dbg_r(), l1c_dbg_reg_val);
1428
1429 return 0;
1430}
1431
1432static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
1433{
1434 struct gk20a *g = c->g;
1435 int ret;
1436
1437 u32 inst_base_ptr =
1438 u64_lo32(c->inst_block.cpu_pa
1439 >> ram_in_base_shift_v());
1440
1441
1442 gk20a_dbg_fn("");
1443
1444 ret = gr_gk20a_submit_fecs_method_op(g,
1445 (struct fecs_method_op_gk20a) {
1446 .method.addr = save_type,
1447 .method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
1448 gr_fecs_current_ctx_target_vid_mem_f() |
1449 gr_fecs_current_ctx_valid_f(1)),
1450 .mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL,
1451 .ok = 1, .fail = 2,
1452 },
1453 .cond.ok = GR_IS_UCODE_OP_AND,
1454 .cond.fail = GR_IS_UCODE_OP_AND,
1455 });
1456
1457 if (ret)
1458 gk20a_err(dev_from_gk20a(g), "save context image failed");
1459
1460 return ret;
1461}
1462
1463static u32 gk20a_init_sw_bundle(struct gk20a *g)
1464{
1465 struct av_list_gk20a *sw_bundle_init = &g->gr.ctx_vars.sw_bundle_init;
1466 u32 last_bundle_data = 0;
1467 u32 err = 0;
1468 int i;
1469 unsigned long end_jiffies = jiffies +
1470 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
1471
1472 /* enable pipe mode override */
1473 gk20a_writel(g, gr_pipe_bundle_config_r(),
1474 gr_pipe_bundle_config_override_pipe_mode_enabled_f());
1475
1476 /* load bundle init */
1477 for (i = 0; i < sw_bundle_init->count; i++) {
1478
1479 if (i == 0 || last_bundle_data != sw_bundle_init->l[i].value) {
1480 gk20a_writel(g, gr_pipe_bundle_data_r(),
1481 sw_bundle_init->l[i].value);
1482 last_bundle_data = sw_bundle_init->l[i].value;
1483 }
1484
1485 gk20a_writel(g, gr_pipe_bundle_address_r(),
1486 sw_bundle_init->l[i].addr);
1487
1488 if (gr_pipe_bundle_address_value_v(sw_bundle_init->l[i].addr) ==
1489 GR_GO_IDLE_BUNDLE)
1490 err |= gr_gk20a_wait_idle(g, end_jiffies,
1491 GR_IDLE_CHECK_DEFAULT);
1492 }
1493
1494 /* disable pipe mode override */
1495 gk20a_writel(g, gr_pipe_bundle_config_r(),
1496 gr_pipe_bundle_config_override_pipe_mode_disabled_f());
1497
1498 return err;
1499}
1500
1501/* init global golden image from a fresh gr_ctx in channel ctx.
1502 save a copy in local_golden_image in ctx_vars */
1503static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1504 struct channel_gk20a *c)
1505{
1506 struct gr_gk20a *gr = &g->gr;
1507 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1508 u32 ctx_header_bytes = ctxsw_prog_fecs_header_v();
1509 u32 ctx_header_words;
1510 u32 i;
1511 u32 data;
1512 void *ctx_ptr = NULL;
1513 void *gold_ptr = NULL;
1514 u32 err = 0;
1515
1516 gk20a_dbg_fn("");
1517
1518 /* golden ctx is global to all channels. Although only the first
1519 channel initializes golden image, driver needs to prevent multiple
1520 channels from initializing golden ctx at the same time */
1521 mutex_lock(&gr->ctx_mutex);
1522
1523 if (gr->ctx_vars.golden_image_initialized)
1524 goto clean_up;
1525
1526 err = gr_gk20a_fecs_ctx_bind_channel(g, c);
1527 if (err)
1528 goto clean_up;
1529
1530 err = gk20a_init_sw_bundle(g);
1531 if (err)
1532 goto clean_up;
1533
1534 err = gr_gk20a_elpg_protected_call(g,
1535 gr_gk20a_commit_global_ctx_buffers(g, c, false));
1536 if (err)
1537 goto clean_up;
1538
1539 gold_ptr = vmap(gr->global_ctx_buffer[GOLDEN_CTX].pages,
1540 PAGE_ALIGN(gr->global_ctx_buffer[GOLDEN_CTX].size) >>
1541 PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL));
1542 if (!gold_ptr)
1543 goto clean_up;
1544
1545 ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
1546 PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
1547 0, pgprot_dmacoherent(PAGE_KERNEL));
1548 if (!ctx_ptr)
1549 goto clean_up;
1550
1551 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
1552 ctx_header_words >>= 2;
1553
1554 /* Channel gr_ctx buffer is gpu cacheable.
1555 Flush before cpu read. */
1556 gk20a_mm_fb_flush(g);
1557 gk20a_mm_l2_flush(g, false);
1558
1559 for (i = 0; i < ctx_header_words; i++) {
1560 data = gk20a_mem_rd32(ctx_ptr, i);
1561 gk20a_mem_wr32(gold_ptr, i, data);
1562 }
1563
1564 gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_o(), 0,
1565 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
1566
1567 gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, 0);
1568
1569 gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1570
1571 gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v());
1572
1573 if (gr->ctx_vars.local_golden_image == NULL) {
1574
1575 gr->ctx_vars.local_golden_image =
1576 kzalloc(gr->ctx_vars.golden_image_size, GFP_KERNEL);
1577
1578 if (gr->ctx_vars.local_golden_image == NULL) {
1579 err = -ENOMEM;
1580 goto clean_up;
1581 }
1582
1583 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++)
1584 gr->ctx_vars.local_golden_image[i] =
1585 gk20a_mem_rd32(gold_ptr, i);
1586 }
1587
1588 gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va);
1589
1590 gr->ctx_vars.golden_image_initialized = true;
1591
1592 gk20a_mm_l2_invalidate(g);
1593
1594 gk20a_writel(g, gr_fecs_current_ctx_r(),
1595 gr_fecs_current_ctx_valid_false_f());
1596
1597clean_up:
1598 if (err)
1599 gk20a_err(dev_from_gk20a(g), "fail");
1600 else
1601 gk20a_dbg_fn("done");
1602
1603 if (gold_ptr)
1604 vunmap(gold_ptr);
1605 if (ctx_ptr)
1606 vunmap(ctx_ptr);
1607
1608 mutex_unlock(&gr->ctx_mutex);
1609 return err;
1610}
1611
1612int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1613 struct channel_gk20a *c,
1614 bool enable_smpc_ctxsw)
1615{
1616 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1617 void *ctx_ptr = NULL;
1618 u32 data;
1619
1620 /*XXX caller responsible for making sure the channel is quiesced? */
1621
1622 /* Channel gr_ctx buffer is gpu cacheable.
1623 Flush and invalidate before cpu update. */
1624 gk20a_mm_fb_flush(g);
1625 gk20a_mm_l2_flush(g, true);
1626
1627 ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
1628 PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
1629 0, pgprot_dmacoherent(PAGE_KERNEL));
1630 if (!ctx_ptr)
1631 return -ENOMEM;
1632
1633 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
1634 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
1635 data |= enable_smpc_ctxsw ?
1636 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
1637 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
1638 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0,
1639 data);
1640
1641 vunmap(ctx_ptr);
1642
1643 gk20a_mm_l2_invalidate(g);
1644
1645 return 0;
1646}
1647
1648/* load saved fresh copy of gloden image into channel gr_ctx */
1649static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1650 struct channel_gk20a *c)
1651{
1652 struct gr_gk20a *gr = &g->gr;
1653 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1654 u32 virt_addr_lo;
1655 u32 virt_addr_hi;
1656 u32 i, v, data;
1657 int ret = 0;
1658 void *ctx_ptr = NULL;
1659
1660 gk20a_dbg_fn("");
1661
1662 if (gr->ctx_vars.local_golden_image == NULL)
1663 return -1;
1664
1665 /* Channel gr_ctx buffer is gpu cacheable.
1666 Flush and invalidate before cpu update. */
1667 gk20a_mm_fb_flush(g);
1668 gk20a_mm_l2_flush(g, true);
1669
1670 ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
1671 PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
1672 0, pgprot_dmacoherent(PAGE_KERNEL));
1673 if (!ctx_ptr)
1674 return -ENOMEM;
1675
1676 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++)
1677 gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]);
1678
1679 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0);
1680 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0);
1681
1682 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va);
1683 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va);
1684
1685 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0,
1686 ch_ctx->patch_ctx.data_count);
1687 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_o(), 0,
1688 virt_addr_lo);
1689 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0,
1690 virt_addr_hi);
1691
1692 /* no user for client managed performance counter ctx */
1693 ch_ctx->pm_ctx.ctx_sw_mode =
1694 ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
1695 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
1696 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1697 data |= ch_ctx->pm_ctx.ctx_sw_mode;
1698 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0,
1699 data);
1700
1701 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, 0);
1702
1703 /* set priv access map */
1704 virt_addr_lo =
1705 u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
1706 virt_addr_hi =
1707 u64_hi32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
1708
1709 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_config_o(), 0,
1710 ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f());
1711 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), 0,
1712 virt_addr_lo);
1713 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), 0,
1714 virt_addr_hi);
1715 /* disable verif features */
1716 v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0);
1717 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
1718 v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
1719 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v);
1720
1721
1722 vunmap(ctx_ptr);
1723
1724 gk20a_mm_l2_invalidate(g);
1725
1726 if (tegra_platform_is_linsim()) {
1727 u32 inst_base_ptr =
1728 u64_lo32(c->inst_block.cpu_pa
1729 >> ram_in_base_shift_v());
1730
1731 ret = gr_gk20a_submit_fecs_method_op(g,
1732 (struct fecs_method_op_gk20a) {
1733 .method.data =
1734 (gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
1735 gr_fecs_current_ctx_target_vid_mem_f() |
1736 gr_fecs_current_ctx_valid_f(1)),
1737 .method.addr =
1738 gr_fecs_method_push_adr_restore_golden_v(),
1739 .mailbox = {
1740 .id = 0, .data = 0,
1741 .clr = ~0, .ret = NULL,
1742 .ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
1743 .fail = 0},
1744 .cond.ok = GR_IS_UCODE_OP_EQUAL,
1745 .cond.fail = GR_IS_UCODE_OP_SKIP});
1746
1747 if (ret)
1748 gk20a_err(dev_from_gk20a(g),
1749 "restore context image failed");
1750 }
1751
1752 return ret;
1753}
1754
1755static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
1756{
1757 gk20a_dbg_fn("");
1758
1759 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
1760 gr_fecs_ctxsw_mailbox_clear_value_f(~0));
1761
1762 gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0));
1763 gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0));
1764
1765 gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1));
1766 gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1));
1767
1768 gk20a_dbg_fn("done");
1769}
1770
1771static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
1772{
1773 struct mm_gk20a *mm = &g->mm;
1774 struct vm_gk20a *vm = &mm->pmu.vm;
1775 struct device *d = dev_from_gk20a(g);
1776 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
1777 void *inst_ptr;
1778 u32 pde_addr_lo;
1779 u32 pde_addr_hi;
1780 u64 pde_addr;
1781 dma_addr_t iova;
1782
1783 /* Alloc mem of inst block */
1784 ucode_info->inst_blk_desc.size = ram_in_alloc_size_v();
1785 ucode_info->inst_blk_desc.cpuva = dma_alloc_coherent(d,
1786 ucode_info->inst_blk_desc.size,
1787 &iova,
1788 GFP_KERNEL);
1789 if (!ucode_info->inst_blk_desc.cpuva) {
1790 gk20a_err(d, "failed to allocate memory\n");
1791 return -ENOMEM;
1792 }
1793
1794 ucode_info->inst_blk_desc.iova = iova;
1795 ucode_info->inst_blk_desc.cpu_pa = gk20a_get_phys_from_iova(d,
1796 ucode_info->inst_blk_desc.iova);
1797
1798 inst_ptr = ucode_info->inst_blk_desc.cpuva;
1799
1800 /* Set inst block */
1801 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
1802 u64_lo32(vm->va_limit) | 0xFFF);
1803 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
1804 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
1805
1806 pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
1807 pde_addr_lo = u64_lo32(pde_addr >> 12);
1808 pde_addr_hi = u64_hi32(pde_addr);
1809 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
1810 ram_in_page_dir_base_target_vid_mem_f() |
1811 ram_in_page_dir_base_vol_true_f() |
1812 ram_in_page_dir_base_lo_f(pde_addr_lo));
1813 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
1814 ram_in_page_dir_base_hi_f(pde_addr_hi));
1815
1816 /* Map ucode surface to GMMU */
1817 ucode_info->ucode_gpuva = gk20a_gmmu_map(vm,
1818 &ucode_info->surface_desc.sgt,
1819 ucode_info->surface_desc.size,
1820 0, /* flags */
1821 gk20a_mem_flag_read_only);
1822 if (!ucode_info->ucode_gpuva) {
1823 gk20a_err(d, "failed to update gmmu ptes\n");
1824 return -ENOMEM;
1825 }
1826
1827 return 0;
1828}
1829
1830static void gr_gk20a_init_ctxsw_ucode_segment(
1831 struct gk20a_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size)
1832{
1833 p_seg->offset = *offset;
1834 p_seg->size = size;
1835 *offset = ALIGN(*offset + size, BLK_SIZE);
1836}
1837
1838static void gr_gk20a_init_ctxsw_ucode_segments(
1839 struct gk20a_ctxsw_ucode_segments *segments, u32 *offset,
1840 struct gk20a_ctxsw_bootloader_desc *bootdesc,
1841 u32 code_size, u32 data_size)
1842{
1843 u32 boot_size = ALIGN(bootdesc->size, sizeof(u32));
1844 segments->boot_entry = bootdesc->entry_point;
1845 segments->boot_imem_offset = bootdesc->imem_offset;
1846 gr_gk20a_init_ctxsw_ucode_segment(&segments->boot, offset, boot_size);
1847 gr_gk20a_init_ctxsw_ucode_segment(&segments->code, offset, code_size);
1848 gr_gk20a_init_ctxsw_ucode_segment(&segments->data, offset, data_size);
1849}
1850
1851static int gr_gk20a_copy_ctxsw_ucode_segments(
1852 u8 *buf,
1853 struct gk20a_ctxsw_ucode_segments *segments,
1854 u32 *bootimage,
1855 u32 *code, u32 *data)
1856{
1857 memcpy(buf + segments->boot.offset, bootimage, segments->boot.size);
1858 memcpy(buf + segments->code.offset, code, segments->code.size);
1859 memcpy(buf + segments->data.offset, data, segments->data.size);
1860 return 0;
1861}
1862
1863static int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
1864{
1865 struct device *d = dev_from_gk20a(g);
1866 struct mm_gk20a *mm = &g->mm;
1867 struct vm_gk20a *vm = &mm->pmu.vm;
1868 struct gk20a_ctxsw_bootloader_desc *fecs_boot_desc;
1869 struct gk20a_ctxsw_bootloader_desc *gpccs_boot_desc;
1870 const struct firmware *fecs_fw;
1871 const struct firmware *gpccs_fw;
1872 u32 *fecs_boot_image;
1873 u32 *gpccs_boot_image;
1874 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
1875 u8 *buf;
1876 u32 ucode_size;
1877 int err = 0;
1878 dma_addr_t iova;
1879 DEFINE_DMA_ATTRS(attrs);
1880
1881 fecs_fw = gk20a_request_firmware(g, GK20A_FECS_UCODE_IMAGE);
1882 if (!fecs_fw) {
1883 gk20a_err(d, "failed to load fecs ucode!!");
1884 return -ENOENT;
1885 }
1886
1887 fecs_boot_desc = (void *)fecs_fw->data;
1888 fecs_boot_image = (void *)(fecs_fw->data +
1889 sizeof(struct gk20a_ctxsw_bootloader_desc));
1890
1891 gpccs_fw = gk20a_request_firmware(g, GK20A_GPCCS_UCODE_IMAGE);
1892 if (!gpccs_fw) {
1893 release_firmware(fecs_fw);
1894 gk20a_err(d, "failed to load gpccs ucode!!");
1895 return -ENOENT;
1896 }
1897
1898 gpccs_boot_desc = (void *)gpccs_fw->data;
1899 gpccs_boot_image = (void *)(gpccs_fw->data +
1900 sizeof(struct gk20a_ctxsw_bootloader_desc));
1901
1902 ucode_size = 0;
1903 gr_gk20a_init_ctxsw_ucode_segments(&ucode_info->fecs, &ucode_size,
1904 fecs_boot_desc,
1905 g->gr.ctx_vars.ucode.fecs.inst.count * sizeof(u32),
1906 g->gr.ctx_vars.ucode.fecs.data.count * sizeof(u32));
1907 gr_gk20a_init_ctxsw_ucode_segments(&ucode_info->gpccs, &ucode_size,
1908 gpccs_boot_desc,
1909 g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32),
1910 g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32));
1911
1912 ucode_info->surface_desc.size = ucode_size;
1913 dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
1914 ucode_info->surface_desc.cpuva = dma_alloc_attrs(d,
1915 ucode_info->surface_desc.size,
1916 &iova,
1917 GFP_KERNEL,
1918 &attrs);
1919 if (!ucode_info->surface_desc.cpuva) {
1920 gk20a_err(d, "memory allocation failed\n");
1921 err = -ENOMEM;
1922 goto clean_up;
1923 }
1924
1925 ucode_info->surface_desc.iova = iova;
1926 err = gk20a_get_sgtable(d, &ucode_info->surface_desc.sgt,
1927 ucode_info->surface_desc.cpuva,
1928 ucode_info->surface_desc.iova,
1929 ucode_info->surface_desc.size);
1930 if (err) {
1931 gk20a_err(d, "failed to create sg table\n");
1932 goto clean_up;
1933 }
1934
1935 buf = (u8 *)ucode_info->surface_desc.cpuva;
1936 if (!buf) {
1937 gk20a_err(d, "failed to map surface desc buffer");
1938 err = -ENOMEM;
1939 goto clean_up;
1940 }
1941
1942 gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->fecs,
1943 fecs_boot_image,
1944 g->gr.ctx_vars.ucode.fecs.inst.l,
1945 g->gr.ctx_vars.ucode.fecs.data.l);
1946
1947 release_firmware(fecs_fw);
1948 fecs_fw = NULL;
1949
1950 gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->gpccs,
1951 gpccs_boot_image,
1952 g->gr.ctx_vars.ucode.gpccs.inst.l,
1953 g->gr.ctx_vars.ucode.gpccs.data.l);
1954
1955 release_firmware(gpccs_fw);
1956 gpccs_fw = NULL;
1957
1958 err = gr_gk20a_init_ctxsw_ucode_vaspace(g);
1959 if (err)
1960 goto clean_up;
1961
1962 gk20a_free_sgtable(&ucode_info->surface_desc.sgt);
1963
1964 return 0;
1965
1966 clean_up:
1967 if (ucode_info->ucode_gpuva)
1968 gk20a_gmmu_unmap(vm, ucode_info->ucode_gpuva,
1969 ucode_info->surface_desc.size, gk20a_mem_flag_none);
1970 if (ucode_info->surface_desc.sgt)
1971 gk20a_free_sgtable(&ucode_info->surface_desc.sgt);
1972 if (ucode_info->surface_desc.cpuva)
1973 dma_free_attrs(d, ucode_info->surface_desc.size,
1974 ucode_info->surface_desc.cpuva,
1975 ucode_info->surface_desc.iova,
1976 &attrs);
1977 ucode_info->surface_desc.cpuva = NULL;
1978 ucode_info->surface_desc.iova = 0;
1979
1980 release_firmware(gpccs_fw);
1981 gpccs_fw = NULL;
1982 release_firmware(fecs_fw);
1983 fecs_fw = NULL;
1984
1985 return err;
1986}
1987
1988static void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
1989{
1990 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
1991 int retries = 20;
1992 phys_addr_t inst_ptr;
1993 u32 val;
1994
1995 while ((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) &
1996 gr_fecs_ctxsw_status_1_arb_busy_m()) && retries) {
1997 udelay(2);
1998 retries--;
1999 }
2000 if (!retries)
2001 gk20a_err(dev_from_gk20a(g), "arbiter idle timeout");
2002
2003 gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
2004
2005 inst_ptr = ucode_info->inst_blk_desc.cpu_pa;
2006 gk20a_writel(g, gr_fecs_new_ctx_r(),
2007 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
2008 gr_fecs_new_ctx_target_m() |
2009 gr_fecs_new_ctx_valid_m());
2010
2011 gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(),
2012 gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) |
2013 gr_fecs_arb_ctx_ptr_target_m());
2014
2015 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
2016
2017 /* Wait for arbiter command to complete */
2018 retries = 20;
2019 val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
2020 while (gr_fecs_arb_ctx_cmd_cmd_v(val) && retries) {
2021 udelay(2);
2022 retries--;
2023 val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
2024 }
2025 if (!retries)
2026 gk20a_err(dev_from_gk20a(g), "arbiter complete timeout");
2027
2028 gk20a_writel(g, gr_fecs_current_ctx_r(),
2029 gr_fecs_current_ctx_ptr_f(inst_ptr >> 12) |
2030 gr_fecs_current_ctx_target_m() |
2031 gr_fecs_current_ctx_valid_m());
2032 /* Send command to arbiter to flush */
2033 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), gr_fecs_arb_ctx_cmd_cmd_s());
2034
2035 retries = 20;
2036 val = (gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()));
2037 while (gr_fecs_arb_ctx_cmd_cmd_v(val) && retries) {
2038 udelay(2);
2039 retries--;
2040 val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
2041 }
2042 if (!retries)
2043 gk20a_err(dev_from_gk20a(g), "arbiter complete timeout");
2044}
2045
2046static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
2047 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
2048{
2049 u32 addr_code32;
2050 u32 addr_data32;
2051 u32 addr_load32;
2052 u32 dst = 0;
2053 u32 blocks;
2054 u32 b;
2055
2056 addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
2057 addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
2058 addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
2059
2060 gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
2061 gr_fecs_dmactl_require_ctx_f(0));
2062
2063 /*
2064 * Copy falcon bootloader header into dmem at offset 0.
2065 * Configure dmem port 0 for auto-incrementing writes starting at dmem
2066 * offset 0.
2067 */
2068 gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0),
2069 gr_fecs_dmemc_offs_f(0) |
2070 gr_fecs_dmemc_blk_f(0) |
2071 gr_fecs_dmemc_aincw_f(1));
2072
2073 /* Write out the actual data */
2074 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2075 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32);
2076 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2077 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->code.size);
2078 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2079 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_data32);
2080 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->data.size);
2081 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32);
2082 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2083 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2084
2085 blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8;
2086
2087 /*
2088 * Set the base FB address for the DMA transfer. Subtract off the 256
2089 * byte IMEM block offset such that the relative FB and IMEM offsets
2090 * match, allowing the IMEM tags to be properly created.
2091 */
2092
2093 dst = segments->boot_imem_offset;
2094 gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(),
2095 (addr_load32 - (dst >> 8)));
2096
2097 for (b = 0; b < blocks; b++) {
2098 /* Setup destination IMEM offset */
2099 gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(),
2100 dst + (b << 8));
2101
2102 /* Setup source offset (relative to BASE) */
2103 gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(),
2104 dst + (b << 8));
2105
2106 gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(),
2107 gr_fecs_dmatrfcmd_imem_f(0x01) |
2108 gr_fecs_dmatrfcmd_write_f(0x00) |
2109 gr_fecs_dmatrfcmd_size_f(0x06) |
2110 gr_fecs_dmatrfcmd_ctxdma_f(0));
2111 }
2112
2113 /* Specify the falcon boot vector */
2114 gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(),
2115 gr_fecs_bootvec_vec_f(segments->boot_entry));
2116
2117 /* Write to CPUCTL to start the falcon */
2118 gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(),
2119 gr_fecs_cpuctl_startcpu_f(0x01));
2120
2121 return 0;
2122}
2123
2124static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
2125{
2126 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2127 u64 addr_base = ucode_info->ucode_gpuva;
2128
2129 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0);
2130
2131 gr_gk20a_load_falcon_bind_instblk(g);
2132
2133 gr_gk20a_load_ctxsw_ucode_segments(g, addr_base,
2134 &g->ctxsw_ucode_info.fecs, 0);
2135
2136 gr_gk20a_load_ctxsw_ucode_segments(g, addr_base,
2137 &g->ctxsw_ucode_info.gpccs,
2138 gr_gpcs_gpccs_falcon_hwcfg_r() -
2139 gr_fecs_falcon_hwcfg_r());
2140}
2141
2142static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr)
2143{
2144 u32 ret;
2145
2146 gk20a_dbg_fn("");
2147
2148 if (tegra_platform_is_linsim()) {
2149 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
2150 gr_fecs_ctxsw_mailbox_value_f(0xc0de7777));
2151 gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
2152 gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
2153 }
2154
2155 /*
2156 * In case the gPMU falcon is not being used, revert to the old way of
2157 * loading gr ucode, without the faster bootstrap routine.
2158 */
2159 if (!support_gk20a_pmu()) {
2160 gr_gk20a_load_falcon_dmem(g);
2161 gr_gk20a_load_falcon_imem(g);
2162 gr_gk20a_start_falcon_ucode(g);
2163 } else {
2164 if (!gr->skip_ucode_init)
2165 gr_gk20a_init_ctxsw_ucode(g);
2166 gr_gk20a_load_falcon_with_bootloader(g);
2167 gr->skip_ucode_init = true;
2168 }
2169
2170 ret = gr_gk20a_ctx_wait_ucode(g, 0, 0,
2171 GR_IS_UCODE_OP_EQUAL,
2172 eUcodeHandshakeInitComplete,
2173 GR_IS_UCODE_OP_SKIP, 0);
2174 if (ret) {
2175 gk20a_err(dev_from_gk20a(g), "falcon ucode init timeout");
2176 return ret;
2177 }
2178
2179 if (support_gk20a_pmu())
2180 gk20a_writel(g, gr_fecs_current_ctx_r(),
2181 gr_fecs_current_ctx_valid_false_f());
2182
2183 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff);
2184 gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff);
2185 gk20a_writel(g, gr_fecs_method_push_r(),
2186 gr_fecs_method_push_adr_set_watchdog_timeout_f());
2187
2188 gk20a_dbg_fn("done");
2189 return 0;
2190}
2191
2192static int gr_gk20a_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr)
2193{
2194 u32 golden_ctx_image_size = 0;
2195 u32 zcull_ctx_image_size = 0;
2196 u32 pm_ctx_image_size = 0;
2197 u32 ret;
2198 struct fecs_method_op_gk20a op = {
2199 .mailbox = { .id = 0, .data = 0,
2200 .clr = ~0, .ok = 0, .fail = 0},
2201 .method.data = 0,
2202 .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
2203 .cond.fail = GR_IS_UCODE_OP_SKIP,
2204 };
2205
2206 gk20a_dbg_fn("");
2207 op.method.addr = gr_fecs_method_push_adr_discover_image_size_v();
2208 op.mailbox.ret = &golden_ctx_image_size;
2209 ret = gr_gk20a_submit_fecs_method_op(g, op);
2210 if (ret) {
2211 gk20a_err(dev_from_gk20a(g),
2212 "query golden image size failed");
2213 return ret;
2214 }
2215 op.method.addr = gr_fecs_method_push_adr_discover_zcull_image_size_v();
2216 op.mailbox.ret = &zcull_ctx_image_size;
2217 ret = gr_gk20a_submit_fecs_method_op(g, op);
2218 if (ret) {
2219 gk20a_err(dev_from_gk20a(g),
2220 "query zcull ctx image size failed");
2221 return ret;
2222 }
2223 op.method.addr = gr_fecs_method_push_adr_discover_pm_image_size_v();
2224 op.mailbox.ret = &pm_ctx_image_size;
2225 ret = gr_gk20a_submit_fecs_method_op(g, op);
2226 if (ret) {
2227 gk20a_err(dev_from_gk20a(g),
2228 "query pm ctx image size failed");
2229 return ret;
2230 }
2231
2232 if (!g->gr.ctx_vars.golden_image_size &&
2233 !g->gr.ctx_vars.zcull_ctxsw_image_size) {
2234 g->gr.ctx_vars.golden_image_size = golden_ctx_image_size;
2235 g->gr.ctx_vars.zcull_ctxsw_image_size = zcull_ctx_image_size;
2236 } else {
2237 /* hw is different after railgating? */
2238 BUG_ON(g->gr.ctx_vars.golden_image_size != golden_ctx_image_size);
2239 BUG_ON(g->gr.ctx_vars.zcull_ctxsw_image_size != zcull_ctx_image_size);
2240 }
2241
2242 g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
2243
2244 gk20a_dbg_fn("done");
2245 return 0;
2246}
2247
2248static void gk20a_gr_destroy_ctx_buffer(struct platform_device *pdev,
2249 struct gr_ctx_buffer_desc *desc)
2250{
2251 struct device *dev = &pdev->dev;
2252 gk20a_free_sgtable(&desc->sgt);
2253 dma_free_attrs(dev, desc->size, desc->pages,
2254 desc->iova, &desc->attrs);
2255}
2256
2257static int gk20a_gr_alloc_ctx_buffer(struct platform_device *pdev,
2258 struct gr_ctx_buffer_desc *desc,
2259 size_t size)
2260{
2261 struct device *dev = &pdev->dev;
2262 DEFINE_DMA_ATTRS(attrs);
2263 dma_addr_t iova;
2264 int err = 0;
2265
2266 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
2267
2268 desc->pages = dma_alloc_attrs(&pdev->dev, size, &iova,
2269 GFP_KERNEL, &attrs);
2270 if (!desc->pages)
2271 return -ENOMEM;
2272
2273 desc->iova = iova;
2274 desc->size = size;
2275 desc->attrs = attrs;
2276 desc->destroy = gk20a_gr_destroy_ctx_buffer;
2277 err = gk20a_get_sgtable_from_pages(&pdev->dev, &desc->sgt, desc->pages,
2278 desc->iova, desc->size);
2279 if (err) {
2280 dma_free_attrs(dev, desc->size, desc->pages,
2281 desc->iova, &desc->attrs);
2282 memset(desc, 0, sizeof(*desc));
2283 }
2284
2285 return err;
2286}
2287
2288static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
2289{
2290 struct gk20a_platform *platform = platform_get_drvdata(g->dev);
2291 struct gr_gk20a *gr = &g->gr;
2292 int i, attr_buffer_size, err;
2293 struct platform_device *pdev = g->dev;
2294
2295 u32 cb_buffer_size = gr->bundle_cb_default_size *
2296 gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
2297
2298 u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() *
2299 gr_scc_pagepool_total_pages_byte_granularity_v();
2300
2301 gk20a_dbg_fn("");
2302
2303 attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g);
2304
2305 gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size);
2306
2307 err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[CIRCULAR],
2308 cb_buffer_size);
2309 if (err)
2310 goto clean_up;
2311
2312 if (platform->secure_alloc)
2313 platform->secure_alloc(pdev,
2314 &gr->global_ctx_buffer[CIRCULAR_VPR],
2315 cb_buffer_size);
2316
2317 gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size);
2318
2319 err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[PAGEPOOL],
2320 pagepool_buffer_size);
2321 if (err)
2322 goto clean_up;
2323
2324 if (platform->secure_alloc)
2325 platform->secure_alloc(pdev,
2326 &gr->global_ctx_buffer[PAGEPOOL_VPR],
2327 pagepool_buffer_size);
2328
2329 gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size);
2330
2331 err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[ATTRIBUTE],
2332 attr_buffer_size);
2333 if (err)
2334 goto clean_up;
2335
2336 if (platform->secure_alloc)
2337 platform->secure_alloc(pdev,
2338 &gr->global_ctx_buffer[ATTRIBUTE_VPR],
2339 attr_buffer_size);
2340
2341 gk20a_dbg_info("golden_image_size : %d",
2342 gr->ctx_vars.golden_image_size);
2343
2344 err = gk20a_gr_alloc_ctx_buffer(pdev,
2345 &gr->global_ctx_buffer[GOLDEN_CTX],
2346 gr->ctx_vars.golden_image_size);
2347 if (err)
2348 goto clean_up;
2349
2350 gk20a_dbg_info("priv_access_map_size : %d",
2351 gr->ctx_vars.priv_access_map_size);
2352
2353 err = gk20a_gr_alloc_ctx_buffer(pdev,
2354 &gr->global_ctx_buffer[PRIV_ACCESS_MAP],
2355 gr->ctx_vars.priv_access_map_size);
2356
2357 if (err)
2358 goto clean_up;
2359
2360 gk20a_dbg_fn("done");
2361 return 0;
2362
2363 clean_up:
2364 gk20a_err(dev_from_gk20a(g), "fail");
2365 for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) {
2366 if (gr->global_ctx_buffer[i].destroy) {
2367 gr->global_ctx_buffer[i].destroy(pdev,
2368 &gr->global_ctx_buffer[i]);
2369 }
2370 }
2371 return -ENOMEM;
2372}
2373
2374static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g)
2375{
2376 struct platform_device *pdev = g->dev;
2377 struct gr_gk20a *gr = &g->gr;
2378 DEFINE_DMA_ATTRS(attrs);
2379 u32 i;
2380
2381 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
2382
2383 for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) {
2384 gr->global_ctx_buffer[i].destroy(pdev,
2385 &gr->global_ctx_buffer[i]);
2386 }
2387
2388 gk20a_dbg_fn("done");
2389}
2390
2391static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2392 struct channel_gk20a *c)
2393{
2394 struct vm_gk20a *ch_vm = c->vm;
2395 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
2396 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
2397 struct gr_gk20a *gr = &g->gr;
2398 struct sg_table *sgt;
2399 u64 size;
2400 u64 gpu_va;
2401 u32 i;
2402 gk20a_dbg_fn("");
2403
2404 /* Circular Buffer */
2405 if (!c->vpr || (gr->global_ctx_buffer[CIRCULAR_VPR].sgt == NULL)) {
2406 sgt = gr->global_ctx_buffer[CIRCULAR].sgt;
2407 size = gr->global_ctx_buffer[CIRCULAR].size;
2408 } else {
2409 sgt = gr->global_ctx_buffer[CIRCULAR_VPR].sgt;
2410 size = gr->global_ctx_buffer[CIRCULAR_VPR].size;
2411 }
2412
2413 gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size,
2414 NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
2415 gk20a_mem_flag_none);
2416 if (!gpu_va)
2417 goto clean_up;
2418 g_bfr_va[CIRCULAR_VA] = gpu_va;
2419 g_bfr_size[CIRCULAR_VA] = size;
2420
2421 /* Attribute Buffer */
2422 if (!c->vpr || (gr->global_ctx_buffer[ATTRIBUTE_VPR].sgt == NULL)) {
2423 sgt = gr->global_ctx_buffer[ATTRIBUTE].sgt;
2424 size = gr->global_ctx_buffer[ATTRIBUTE].size;
2425 } else {
2426 sgt = gr->global_ctx_buffer[ATTRIBUTE_VPR].sgt;
2427 size = gr->global_ctx_buffer[ATTRIBUTE_VPR].size;
2428 }
2429
2430 gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size,
2431 NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
2432 gk20a_mem_flag_none);
2433 if (!gpu_va)
2434 goto clean_up;
2435 g_bfr_va[ATTRIBUTE_VA] = gpu_va;
2436 g_bfr_size[ATTRIBUTE_VA] = size;
2437
2438 /* Page Pool */
2439 if (!c->vpr || (gr->global_ctx_buffer[PAGEPOOL_VPR].sgt == NULL)) {
2440 sgt = gr->global_ctx_buffer[PAGEPOOL].sgt;
2441 size = gr->global_ctx_buffer[PAGEPOOL].size;
2442 } else {
2443 sgt = gr->global_ctx_buffer[PAGEPOOL_VPR].sgt;
2444 size = gr->global_ctx_buffer[PAGEPOOL_VPR].size;
2445 }
2446
2447 gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size,
2448 NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
2449 gk20a_mem_flag_none);
2450 if (!gpu_va)
2451 goto clean_up;
2452 g_bfr_va[PAGEPOOL_VA] = gpu_va;
2453 g_bfr_size[PAGEPOOL_VA] = size;
2454
2455 /* Golden Image */
2456 sgt = gr->global_ctx_buffer[GOLDEN_CTX].sgt;
2457 size = gr->global_ctx_buffer[GOLDEN_CTX].size;
2458 gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, 0,
2459 gk20a_mem_flag_none);
2460 if (!gpu_va)
2461 goto clean_up;
2462 g_bfr_va[GOLDEN_CTX_VA] = gpu_va;
2463 g_bfr_size[GOLDEN_CTX_VA] = size;
2464
2465 /* Priv register Access Map */
2466 sgt = gr->global_ctx_buffer[PRIV_ACCESS_MAP].sgt;
2467 size = gr->global_ctx_buffer[PRIV_ACCESS_MAP].size;
2468 gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, 0,
2469 gk20a_mem_flag_none);
2470 if (!gpu_va)
2471 goto clean_up;
2472 g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
2473 g_bfr_size[PRIV_ACCESS_MAP_VA] = size;
2474
2475 c->ch_ctx.global_ctx_buffer_mapped = true;
2476 return 0;
2477
2478 clean_up:
2479 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
2480 if (g_bfr_va[i]) {
2481 gk20a_gmmu_unmap(ch_vm, g_bfr_va[i],
2482 gr->global_ctx_buffer[i].size,
2483 gk20a_mem_flag_none);
2484 g_bfr_va[i] = 0;
2485 }
2486 }
2487 return -ENOMEM;
2488}
2489
2490static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
2491{
2492 struct vm_gk20a *ch_vm = c->vm;
2493 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
2494 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
2495 u32 i;
2496
2497 gk20a_dbg_fn("");
2498
2499 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
2500 if (g_bfr_va[i]) {
2501 gk20a_gmmu_unmap(ch_vm, g_bfr_va[i],
2502 g_bfr_size[i],
2503 gk20a_mem_flag_none);
2504 g_bfr_va[i] = 0;
2505 g_bfr_size[i] = 0;
2506 }
2507 }
2508 c->ch_ctx.global_ctx_buffer_mapped = false;
2509}
2510
2511static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
2512 struct channel_gk20a *c)
2513{
2514 struct gr_gk20a *gr = &g->gr;
2515 struct gr_ctx_desc *gr_ctx = &c->ch_ctx.gr_ctx;
2516 struct vm_gk20a *ch_vm = c->vm;
2517 struct device *d = dev_from_gk20a(g);
2518 struct sg_table *sgt;
2519 DEFINE_DMA_ATTRS(attrs);
2520 int err = 0;
2521 dma_addr_t iova;
2522
2523 gk20a_dbg_fn("");
2524
2525 if (gr->ctx_vars.buffer_size == 0)
2526 return 0;
2527
2528 /* alloc channel gr ctx buffer */
2529 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
2530 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
2531
2532 gr_ctx->size = gr->ctx_vars.buffer_total_size;
2533 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
2534 gr_ctx->pages = dma_alloc_attrs(d, gr_ctx->size,
2535 &iova, GFP_KERNEL, &attrs);
2536 if (!gr_ctx->pages)
2537 return -ENOMEM;
2538
2539 gr_ctx->iova = iova;
2540 err = gk20a_get_sgtable_from_pages(d, &sgt, gr_ctx->pages,
2541 gr_ctx->iova, gr_ctx->size);
2542 if (err)
2543 goto err_free;
2544
2545 gr_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, gr_ctx->size,
2546 NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
2547 gk20a_mem_flag_none);
2548 if (!gr_ctx->gpu_va)
2549 goto err_free_sgt;
2550
2551 gk20a_free_sgtable(&sgt);
2552
2553 return 0;
2554
2555 err_free_sgt:
2556 gk20a_free_sgtable(&sgt);
2557 err_free:
2558 dma_free_attrs(d, gr_ctx->size,
2559 gr_ctx->pages, gr_ctx->iova, &attrs);
2560 gr_ctx->pages = NULL;
2561 gr_ctx->iova = 0;
2562
2563 return err;
2564}
2565
2566static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
2567{
2568 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
2569 struct vm_gk20a *ch_vm = c->vm;
2570 struct gk20a *g = c->g;
2571 struct device *d = dev_from_gk20a(g);
2572 DEFINE_DMA_ATTRS(attrs);
2573
2574 gk20a_dbg_fn("");
2575
2576 if (!ch_ctx->gr_ctx.gpu_va)
2577 return;
2578
2579 gk20a_gmmu_unmap(ch_vm, ch_ctx->gr_ctx.gpu_va,
2580 ch_ctx->gr_ctx.size, gk20a_mem_flag_none);
2581 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
2582 dma_free_attrs(d, ch_ctx->gr_ctx.size,
2583 ch_ctx->gr_ctx.pages, ch_ctx->gr_ctx.iova, &attrs);
2584 ch_ctx->gr_ctx.pages = NULL;
2585 ch_ctx->gr_ctx.iova = 0;
2586}
2587
2588static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
2589 struct channel_gk20a *c)
2590{
2591 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
2592 struct device *d = dev_from_gk20a(g);
2593 struct vm_gk20a *ch_vm = c->vm;
2594 DEFINE_DMA_ATTRS(attrs);
2595 struct sg_table *sgt;
2596 int err = 0;
2597 dma_addr_t iova;
2598
2599 gk20a_dbg_fn("");
2600
2601 patch_ctx->size = 128 * sizeof(u32);
2602 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
2603 patch_ctx->pages = dma_alloc_attrs(d, patch_ctx->size,
2604 &iova, GFP_KERNEL,
2605 &attrs);
2606 if (!patch_ctx->pages)
2607 return -ENOMEM;
2608
2609 patch_ctx->iova = iova;
2610 err = gk20a_get_sgtable_from_pages(d, &sgt, patch_ctx->pages,
2611 patch_ctx->iova, patch_ctx->size);
2612 if (err)
2613 goto err_free;
2614
2615 patch_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, patch_ctx->size,
2616 0, gk20a_mem_flag_none);
2617 if (!patch_ctx->gpu_va)
2618 goto err_free_sgtable;
2619
2620 gk20a_free_sgtable(&sgt);
2621
2622 gk20a_dbg_fn("done");
2623 return 0;
2624
2625 err_free_sgtable:
2626 gk20a_free_sgtable(&sgt);
2627 err_free:
2628 dma_free_attrs(d, patch_ctx->size,
2629 patch_ctx->pages, patch_ctx->iova, &attrs);
2630 patch_ctx->pages = NULL;
2631 patch_ctx->iova = 0;
2632 gk20a_err(dev_from_gk20a(g), "fail");
2633 return err;
2634}
2635
2636static void gr_gk20a_unmap_channel_patch_ctx(struct channel_gk20a *c)
2637{
2638 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
2639 struct vm_gk20a *ch_vm = c->vm;
2640
2641 gk20a_dbg_fn("");
2642
2643 if (patch_ctx->gpu_va)
2644 gk20a_gmmu_unmap(ch_vm, patch_ctx->gpu_va,
2645 patch_ctx->size, gk20a_mem_flag_none);
2646 patch_ctx->gpu_va = 0;
2647 patch_ctx->data_count = 0;
2648}
2649
2650static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c)
2651{
2652 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
2653 struct gk20a *g = c->g;
2654 struct device *d = dev_from_gk20a(g);
2655 DEFINE_DMA_ATTRS(attrs);
2656
2657 gk20a_dbg_fn("");
2658
2659 gr_gk20a_unmap_channel_patch_ctx(c);
2660
2661 if (patch_ctx->pages) {
2662 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
2663 dma_free_attrs(d, patch_ctx->size,
2664 patch_ctx->pages, patch_ctx->iova, &attrs);
2665 patch_ctx->pages = NULL;
2666 patch_ctx->iova = 0;
2667 }
2668}
2669
2670void gk20a_free_channel_ctx(struct channel_gk20a *c)
2671{
2672 gr_gk20a_unmap_global_ctx_buffers(c);
2673 gr_gk20a_free_channel_patch_ctx(c);
2674 gr_gk20a_free_channel_gr_ctx(c);
2675
2676 /* zcull_ctx, pm_ctx */
2677
2678 memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
2679
2680 c->num_objects = 0;
2681 c->first_init = false;
2682}
2683
2684static bool gr_gk20a_is_valid_class(struct gk20a *g, u32 class_num)
2685{
2686 bool valid = false;
2687
2688 switch (class_num) {
2689 case KEPLER_COMPUTE_A:
2690 case KEPLER_C:
2691 case FERMI_TWOD_A:
2692 case KEPLER_DMA_COPY_A:
2693 valid = true;
2694 break;
2695
2696 default:
2697 break;
2698 }
2699
2700 return valid;
2701}
2702
2703int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2704 struct nvhost_alloc_obj_ctx_args *args)
2705{
2706 struct gk20a *g = c->g;
2707 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
2708 int err = 0;
2709
2710 gk20a_dbg_fn("");
2711
2712 /* an address space needs to have been bound at this point.*/
2713 if (!gk20a_channel_as_bound(c)) {
2714 gk20a_err(dev_from_gk20a(g),
2715 "not bound to address space at time"
2716 " of grctx allocation");
2717 return -EINVAL;
2718 }
2719
2720 if (!g->ops.gr.is_valid_class(g, args->class_num)) {
2721 gk20a_err(dev_from_gk20a(g),
2722 "invalid obj class 0x%x", args->class_num);
2723 err = -EINVAL;
2724 goto out;
2725 }
2726
2727 /* allocate gr ctx buffer */
2728 if (ch_ctx->gr_ctx.pages == NULL) {
2729 err = gr_gk20a_alloc_channel_gr_ctx(g, c);
2730 if (err) {
2731 gk20a_err(dev_from_gk20a(g),
2732 "fail to allocate gr ctx buffer");
2733 goto out;
2734 }
2735 c->obj_class = args->class_num;
2736 } else {
2737 /*TBD: needs to be more subtle about which is being allocated
2738 * as some are allowed to be allocated along same channel */
2739 gk20a_err(dev_from_gk20a(g),
2740 "too many classes alloc'd on same channel");
2741 err = -EINVAL;
2742 goto out;
2743 }
2744
2745 /* commit gr ctx buffer */
2746 err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va);
2747 if (err) {
2748 gk20a_err(dev_from_gk20a(g),
2749 "fail to commit gr ctx buffer");
2750 goto out;
2751 }
2752
2753 /* allocate patch buffer */
2754 if (ch_ctx->patch_ctx.pages == NULL) {
2755 err = gr_gk20a_alloc_channel_patch_ctx(g, c);
2756 if (err) {
2757 gk20a_err(dev_from_gk20a(g),
2758 "fail to allocate patch buffer");
2759 goto out;
2760 }
2761 }
2762
2763 /* map global buffer to channel gpu_va and commit */
2764 if (!ch_ctx->global_ctx_buffer_mapped) {
2765 err = gr_gk20a_map_global_ctx_buffers(g, c);
2766 if (err) {
2767 gk20a_err(dev_from_gk20a(g),
2768 "fail to map global ctx buffer");
2769 goto out;
2770 }
2771 gr_gk20a_elpg_protected_call(g,
2772 gr_gk20a_commit_global_ctx_buffers(g, c, true));
2773 }
2774
2775 /* init golden image, ELPG enabled after this is done */
2776 err = gr_gk20a_init_golden_ctx_image(g, c);
2777 if (err) {
2778 gk20a_err(dev_from_gk20a(g),
2779 "fail to init golden ctx image");
2780 goto out;
2781 }
2782
2783 /* load golden image */
2784 if (!c->first_init) {
2785 err = gr_gk20a_elpg_protected_call(g,
2786 gr_gk20a_load_golden_ctx_image(g, c));
2787 if (err) {
2788 gk20a_err(dev_from_gk20a(g),
2789 "fail to load golden ctx image");
2790 goto out;
2791 }
2792 c->first_init = true;
2793 }
2794 gk20a_mm_l2_invalidate(g);
2795
2796 c->num_objects++;
2797
2798 gk20a_dbg_fn("done");
2799 return 0;
2800out:
2801 /* 1. gr_ctx, patch_ctx and global ctx buffer mapping
2802 can be reused so no need to release them.
2803 2. golden image init and load is a one time thing so if
2804 they pass, no need to undo. */
2805 gk20a_err(dev_from_gk20a(g), "fail");
2806 return err;
2807}
2808
2809int gk20a_free_obj_ctx(struct channel_gk20a *c,
2810 struct nvhost_free_obj_ctx_args *args)
2811{
2812 unsigned long timeout = gk20a_get_gr_idle_timeout(c->g);
2813
2814 gk20a_dbg_fn("");
2815
2816 if (c->num_objects == 0)
2817 return 0;
2818
2819 c->num_objects--;
2820
2821 if (c->num_objects == 0) {
2822 c->first_init = false;
2823 gk20a_disable_channel(c,
2824 !c->has_timedout,
2825 timeout);
2826 gr_gk20a_unmap_channel_patch_ctx(c);
2827 }
2828
2829 return 0;
2830}
2831
2832static void gk20a_remove_gr_support(struct gr_gk20a *gr)
2833{
2834 struct gk20a *g = gr->g;
2835 struct device *d = dev_from_gk20a(g);
2836 DEFINE_DMA_ATTRS(attrs);
2837
2838 gk20a_dbg_fn("");
2839
2840 gr_gk20a_free_global_ctx_buffers(g);
2841
2842 dma_free_coherent(d, gr->mmu_wr_mem.size,
2843 gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova);
2844 gr->mmu_wr_mem.cpuva = NULL;
2845 gr->mmu_wr_mem.iova = 0;
2846 dma_free_coherent(d, gr->mmu_rd_mem.size,
2847 gr->mmu_rd_mem.cpuva, gr->mmu_rd_mem.iova);
2848 gr->mmu_rd_mem.cpuva = NULL;
2849 gr->mmu_rd_mem.iova = 0;
2850
2851 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
2852 dma_free_attrs(d, gr->compbit_store.size, gr->compbit_store.pages,
2853 gr->compbit_store.base_iova, &attrs);
2854
2855 memset(&gr->mmu_wr_mem, 0, sizeof(struct mmu_desc));
2856 memset(&gr->mmu_rd_mem, 0, sizeof(struct mmu_desc));
2857 memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
2858
2859 kfree(gr->gpc_tpc_count);
2860 kfree(gr->gpc_zcb_count);
2861 kfree(gr->gpc_ppc_count);
2862 kfree(gr->pes_tpc_count[0]);
2863 kfree(gr->pes_tpc_count[1]);
2864 kfree(gr->pes_tpc_mask[0]);
2865 kfree(gr->pes_tpc_mask[1]);
2866 kfree(gr->gpc_skip_mask);
2867 kfree(gr->map_tiles);
2868 gr->gpc_tpc_count = NULL;
2869 gr->gpc_zcb_count = NULL;
2870 gr->gpc_ppc_count = NULL;
2871 gr->pes_tpc_count[0] = NULL;
2872 gr->pes_tpc_count[1] = NULL;
2873 gr->pes_tpc_mask[0] = NULL;
2874 gr->pes_tpc_mask[1] = NULL;
2875 gr->gpc_skip_mask = NULL;
2876 gr->map_tiles = NULL;
2877
2878 kfree(gr->ctx_vars.ucode.fecs.inst.l);
2879 kfree(gr->ctx_vars.ucode.fecs.data.l);
2880 kfree(gr->ctx_vars.ucode.gpccs.inst.l);
2881 kfree(gr->ctx_vars.ucode.gpccs.data.l);
2882 kfree(gr->ctx_vars.sw_bundle_init.l);
2883 kfree(gr->ctx_vars.sw_method_init.l);
2884 kfree(gr->ctx_vars.sw_ctx_load.l);
2885 kfree(gr->ctx_vars.sw_non_ctx_load.l);
2886 kfree(gr->ctx_vars.ctxsw_regs.sys.l);
2887 kfree(gr->ctx_vars.ctxsw_regs.gpc.l);
2888 kfree(gr->ctx_vars.ctxsw_regs.tpc.l);
2889 kfree(gr->ctx_vars.ctxsw_regs.zcull_gpc.l);
2890 kfree(gr->ctx_vars.ctxsw_regs.ppc.l);
2891 kfree(gr->ctx_vars.ctxsw_regs.pm_sys.l);
2892 kfree(gr->ctx_vars.ctxsw_regs.pm_gpc.l);
2893 kfree(gr->ctx_vars.ctxsw_regs.pm_tpc.l);
2894
2895 kfree(gr->ctx_vars.local_golden_image);
2896 gr->ctx_vars.local_golden_image = NULL;
2897
2898 gk20a_allocator_destroy(&gr->comp_tags);
2899}
2900
2901static void gr_gk20a_bundle_cb_defaults(struct gk20a *g)
2902{
2903 struct gr_gk20a *gr = &g->gr;
2904
2905 gr->bundle_cb_default_size =
2906 gr_scc_bundle_cb_size_div_256b__prod_v();
2907 gr->min_gpm_fifo_depth =
2908 gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
2909 gr->bundle_cb_token_limit =
2910 gr_pd_ab_dist_cfg2_token_limit_init_v();
2911}
2912
2913static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
2914{
2915 u32 gpc_index, pes_index;
2916 u32 pes_tpc_mask;
2917 u32 pes_tpc_count;
2918 u32 pes_heavy_index;
2919 u32 gpc_new_skip_mask;
2920 u32 tmp;
2921
2922 tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r());
2923 gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp);
2924
2925 tmp = gk20a_readl(g, top_num_gpcs_r());
2926 gr->max_gpc_count = top_num_gpcs_value_v(tmp);
2927
2928 tmp = gk20a_readl(g, top_num_fbps_r());
2929 gr->max_fbps_count = top_num_fbps_value_v(tmp);
2930
2931 tmp = gk20a_readl(g, top_tpc_per_gpc_r());
2932 gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
2933
2934 gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
2935
2936 tmp = gk20a_readl(g, top_num_fbps_r());
2937 gr->sys_count = top_num_fbps_value_v(tmp);
2938
2939 tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r());
2940 gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp);
2941
2942 gr->pe_count_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
2943 gr->max_zcull_per_gpc_count = proj_scal_litter_num_zcull_banks_v();
2944
2945 if (!gr->gpc_count) {
2946 gk20a_err(dev_from_gk20a(g), "gpc_count==0!");
2947 goto clean_up;
2948 }
2949
2950 gr->gpc_tpc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2951 gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2952 gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2953 gr->pes_tpc_count[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2954 gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2955 gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2956 gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2957 gr->gpc_skip_mask =
2958 kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32),
2959 GFP_KERNEL);
2960
2961 if (!gr->gpc_tpc_count || !gr->gpc_zcb_count || !gr->gpc_ppc_count ||
2962 !gr->pes_tpc_count[0] || !gr->pes_tpc_count[1] ||
2963 !gr->pes_tpc_mask[0] || !gr->pes_tpc_mask[1] || !gr->gpc_skip_mask)
2964 goto clean_up;
2965
2966 gr->ppc_count = 0;
2967 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
2968 tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r());
2969
2970 gr->gpc_tpc_count[gpc_index] =
2971 gr_gpc0_fs_gpc_num_available_tpcs_v(tmp);
2972 gr->tpc_count += gr->gpc_tpc_count[gpc_index];
2973
2974 gr->gpc_zcb_count[gpc_index] =
2975 gr_gpc0_fs_gpc_num_available_zculls_v(tmp);
2976 gr->zcb_count += gr->gpc_zcb_count[gpc_index];
2977
2978 gr->gpc_ppc_count[gpc_index] = gr->pe_count_per_gpc;
2979 gr->ppc_count += gr->gpc_ppc_count[gpc_index];
2980 for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) {
2981
2982 tmp = gk20a_readl(g,
2983 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) +
2984 gpc_index * proj_gpc_stride_v());
2985
2986 pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp);
2987 pes_tpc_count = count_bits(pes_tpc_mask);
2988
2989 gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
2990 gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
2991 }
2992
2993 gpc_new_skip_mask = 0;
2994 if (gr->pes_tpc_count[0][gpc_index] +
2995 gr->pes_tpc_count[1][gpc_index] == 5) {
2996 pes_heavy_index =
2997 gr->pes_tpc_count[0][gpc_index] >
2998 gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
2999
3000 gpc_new_skip_mask =
3001 gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
3002 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
3003 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
3004
3005 } else if ((gr->pes_tpc_count[0][gpc_index] +
3006 gr->pes_tpc_count[1][gpc_index] == 4) &&
3007 (gr->pes_tpc_count[0][gpc_index] !=
3008 gr->pes_tpc_count[1][gpc_index])) {
3009 pes_heavy_index =
3010 gr->pes_tpc_count[0][gpc_index] >
3011 gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
3012
3013 gpc_new_skip_mask =
3014 gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
3015 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
3016 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
3017 }
3018 gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
3019 }
3020
3021 gk20a_dbg_info("fbps: %d", gr->num_fbps);
3022 gk20a_dbg_info("max_gpc_count: %d", gr->max_gpc_count);
3023 gk20a_dbg_info("max_fbps_count: %d", gr->max_fbps_count);
3024 gk20a_dbg_info("max_tpc_per_gpc_count: %d", gr->max_tpc_per_gpc_count);
3025 gk20a_dbg_info("max_zcull_per_gpc_count: %d", gr->max_zcull_per_gpc_count);
3026 gk20a_dbg_info("max_tpc_count: %d", gr->max_tpc_count);
3027 gk20a_dbg_info("sys_count: %d", gr->sys_count);
3028 gk20a_dbg_info("gpc_count: %d", gr->gpc_count);
3029 gk20a_dbg_info("pe_count_per_gpc: %d", gr->pe_count_per_gpc);
3030 gk20a_dbg_info("tpc_count: %d", gr->tpc_count);
3031 gk20a_dbg_info("ppc_count: %d", gr->ppc_count);
3032
3033 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
3034 gk20a_dbg_info("gpc_tpc_count[%d] : %d",
3035 gpc_index, gr->gpc_tpc_count[gpc_index]);
3036 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
3037 gk20a_dbg_info("gpc_zcb_count[%d] : %d",
3038 gpc_index, gr->gpc_zcb_count[gpc_index]);
3039 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
3040 gk20a_dbg_info("gpc_ppc_count[%d] : %d",
3041 gpc_index, gr->gpc_ppc_count[gpc_index]);
3042 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
3043 gk20a_dbg_info("gpc_skip_mask[%d] : %d",
3044 gpc_index, gr->gpc_skip_mask[gpc_index]);
3045 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
3046 for (pes_index = 0;
3047 pes_index < gr->pe_count_per_gpc;
3048 pes_index++)
3049 gk20a_dbg_info("pes_tpc_count[%d][%d] : %d",
3050 pes_index, gpc_index,
3051 gr->pes_tpc_count[pes_index][gpc_index]);
3052
3053 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
3054 for (pes_index = 0;
3055 pes_index < gr->pe_count_per_gpc;
3056 pes_index++)
3057 gk20a_dbg_info("pes_tpc_mask[%d][%d] : %d",
3058 pes_index, gpc_index,
3059 gr->pes_tpc_mask[pes_index][gpc_index]);
3060
3061 g->ops.gr.bundle_cb_defaults(g);
3062 g->ops.gr.cb_size_default(g);
3063 g->ops.gr.calc_global_ctx_buffer_size(g);
3064 gr->timeslice_mode = gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v();
3065
3066 gk20a_dbg_info("bundle_cb_default_size: %d",
3067 gr->bundle_cb_default_size);
3068 gk20a_dbg_info("min_gpm_fifo_depth: %d", gr->min_gpm_fifo_depth);
3069 gk20a_dbg_info("bundle_cb_token_limit: %d", gr->bundle_cb_token_limit);
3070 gk20a_dbg_info("attrib_cb_default_size: %d",
3071 gr->attrib_cb_default_size);
3072 gk20a_dbg_info("attrib_cb_size: %d", gr->attrib_cb_size);
3073 gk20a_dbg_info("alpha_cb_default_size: %d", gr->alpha_cb_default_size);
3074 gk20a_dbg_info("alpha_cb_size: %d", gr->alpha_cb_size);
3075 gk20a_dbg_info("timeslice_mode: %d", gr->timeslice_mode);
3076
3077 return 0;
3078
3079clean_up:
3080 return -ENOMEM;
3081}
3082
3083static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr)
3084{
3085 struct device *d = dev_from_gk20a(g);
3086 dma_addr_t iova;
3087
3088 gr->mmu_wr_mem_size = gr->mmu_rd_mem_size = 0x1000;
3089
3090 gr->mmu_wr_mem.size = gr->mmu_wr_mem_size;
3091 gr->mmu_wr_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_wr_mem_size,
3092 &iova, GFP_KERNEL);
3093 if (!gr->mmu_wr_mem.cpuva)
3094 goto err;
3095
3096 gr->mmu_wr_mem.iova = iova;
3097
3098 gr->mmu_rd_mem.size = gr->mmu_rd_mem_size;
3099 gr->mmu_rd_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_rd_mem_size,
3100 &iova, GFP_KERNEL);
3101 if (!gr->mmu_rd_mem.cpuva)
3102 goto err_free_wr_mem;
3103
3104 gr->mmu_rd_mem.iova = iova;
3105 return 0;
3106
3107 err_free_wr_mem:
3108 dma_free_coherent(d, gr->mmu_wr_mem.size,
3109 gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova);
3110 gr->mmu_wr_mem.cpuva = NULL;
3111 gr->mmu_wr_mem.iova = 0;
3112 err:
3113 return -ENOMEM;
3114}
3115
3116static u32 prime_set[18] = {
3117 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 };
3118
3119static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
3120{
3121 s32 comm_denom;
3122 s32 mul_factor;
3123 s32 *init_frac = NULL;
3124 s32 *init_err = NULL;
3125 s32 *run_err = NULL;
3126 s32 *sorted_num_tpcs = NULL;
3127 s32 *sorted_to_unsorted_gpc_map = NULL;
3128 u32 gpc_index;
3129 u32 gpc_mark = 0;
3130 u32 num_tpc;
3131 u32 max_tpc_count = 0;
3132 u32 swap;
3133 u32 tile_count;
3134 u32 index;
3135 bool delete_map = false;
3136 bool gpc_sorted;
3137 int ret = 0;
3138
3139 init_frac = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
3140 init_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
3141 run_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
3142 sorted_num_tpcs =
3143 kzalloc(proj_scal_max_gpcs_v() *
3144 proj_scal_max_tpc_per_gpc_v() * sizeof(s32),
3145 GFP_KERNEL);
3146 sorted_to_unsorted_gpc_map =
3147 kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
3148
3149 if (!(init_frac && init_err && run_err && sorted_num_tpcs &&
3150 sorted_to_unsorted_gpc_map)) {
3151 ret = -ENOMEM;
3152 goto clean_up;
3153 }
3154
3155 gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET;
3156
3157 if (gr->tpc_count == 3)
3158 gr->map_row_offset = 2;
3159 else if (gr->tpc_count < 3)
3160 gr->map_row_offset = 1;
3161 else {
3162 gr->map_row_offset = 3;
3163
3164 for (index = 1; index < 18; index++) {
3165 u32 prime = prime_set[index];
3166 if ((gr->tpc_count % prime) != 0) {
3167 gr->map_row_offset = prime;
3168 break;
3169 }
3170 }
3171 }
3172
3173 switch (gr->tpc_count) {
3174 case 15:
3175 gr->map_row_offset = 6;
3176 break;
3177 case 14:
3178 gr->map_row_offset = 5;
3179 break;
3180 case 13:
3181 gr->map_row_offset = 2;
3182 break;
3183 case 11:
3184 gr->map_row_offset = 7;
3185 break;
3186 case 10:
3187 gr->map_row_offset = 6;
3188 break;
3189 case 7:
3190 case 5:
3191 gr->map_row_offset = 1;
3192 break;
3193 default:
3194 break;
3195 }
3196
3197 if (gr->map_tiles) {
3198 if (gr->map_tile_count != gr->tpc_count)
3199 delete_map = true;
3200
3201 for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) {
3202 if ((u32)gr->map_tiles[tile_count] >= gr->tpc_count)
3203 delete_map = true;
3204 }
3205
3206 if (delete_map) {
3207 kfree(gr->map_tiles);
3208 gr->map_tiles = NULL;
3209 gr->map_tile_count = 0;
3210 }
3211 }
3212
3213 if (gr->map_tiles == NULL) {
3214 gr->map_tile_count = proj_scal_max_gpcs_v();
3215
3216 gr->map_tiles = kzalloc(proj_scal_max_gpcs_v() * sizeof(u8), GFP_KERNEL);
3217 if (gr->map_tiles == NULL) {
3218 ret = -ENOMEM;
3219 goto clean_up;
3220 }
3221
3222 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3223 sorted_num_tpcs[gpc_index] = gr->gpc_tpc_count[gpc_index];
3224 sorted_to_unsorted_gpc_map[gpc_index] = gpc_index;
3225 }
3226
3227 gpc_sorted = false;
3228 while (!gpc_sorted) {
3229 gpc_sorted = true;
3230 for (gpc_index = 0; gpc_index < gr->gpc_count - 1; gpc_index++) {
3231 if (sorted_num_tpcs[gpc_index + 1] > sorted_num_tpcs[gpc_index]) {
3232 gpc_sorted = false;
3233 swap = sorted_num_tpcs[gpc_index];
3234 sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1];
3235 sorted_num_tpcs[gpc_index + 1] = swap;
3236 swap = sorted_to_unsorted_gpc_map[gpc_index];
3237 sorted_to_unsorted_gpc_map[gpc_index] =
3238 sorted_to_unsorted_gpc_map[gpc_index + 1];
3239 sorted_to_unsorted_gpc_map[gpc_index + 1] = swap;
3240 }
3241 }
3242 }
3243
3244 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
3245 if (gr->gpc_tpc_count[gpc_index] > max_tpc_count)
3246 max_tpc_count = gr->gpc_tpc_count[gpc_index];
3247
3248 mul_factor = gr->gpc_count * max_tpc_count;
3249 if (mul_factor & 0x1)
3250 mul_factor = 2;
3251 else
3252 mul_factor = 1;
3253
3254 comm_denom = gr->gpc_count * max_tpc_count * mul_factor;
3255
3256 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3257 num_tpc = sorted_num_tpcs[gpc_index];
3258
3259 init_frac[gpc_index] = num_tpc * gr->gpc_count * mul_factor;
3260
3261 if (num_tpc != 0)
3262 init_err[gpc_index] = gpc_index * max_tpc_count * mul_factor - comm_denom/2;
3263 else
3264 init_err[gpc_index] = 0;
3265
3266 run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
3267 }
3268
3269 while (gpc_mark < gr->tpc_count) {
3270 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3271 if ((run_err[gpc_index] * 2) >= comm_denom) {
3272 gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
3273 run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
3274 } else
3275 run_err[gpc_index] += init_frac[gpc_index];
3276 }
3277 }
3278 }
3279
3280clean_up:
3281 kfree(init_frac);
3282 kfree(init_err);
3283 kfree(run_err);
3284 kfree(sorted_num_tpcs);
3285 kfree(sorted_to_unsorted_gpc_map);
3286
3287 if (ret)
3288 gk20a_err(dev_from_gk20a(g), "fail");
3289 else
3290 gk20a_dbg_fn("done");
3291
3292 return ret;
3293}
3294
3295static int gr_gk20a_init_zcull(struct gk20a *g, struct gr_gk20a *gr)
3296{
3297 struct gr_zcull_gk20a *zcull = &gr->zcull;
3298
3299 zcull->aliquot_width = gr->tpc_count * 16;
3300 zcull->aliquot_height = 16;
3301
3302 zcull->width_align_pixels = gr->tpc_count * 16;
3303 zcull->height_align_pixels = 32;
3304
3305 zcull->aliquot_size =
3306 zcull->aliquot_width * zcull->aliquot_height;
3307
3308 /* assume no floor sweeping since we only have 1 tpc in 1 gpc */
3309 zcull->pixel_squares_by_aliquots =
3310 gr->zcb_count * 16 * 16 * gr->tpc_count /
3311 (gr->gpc_count * gr->gpc_tpc_count[0]);
3312
3313 zcull->total_aliquots =
3314 gr_gpc0_zcull_total_ram_size_num_aliquots_f(
3315 gk20a_readl(g, gr_gpc0_zcull_total_ram_size_r()));
3316
3317 return 0;
3318}
3319
3320u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr)
3321{
3322 /* assuming gr has already been initialized */
3323 return gr->ctx_vars.zcull_ctxsw_image_size;
3324}
3325
3326int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
3327 struct channel_gk20a *c, u64 zcull_va, u32 mode)
3328{
3329 struct zcull_ctx_desc *zcull_ctx = &c->ch_ctx.zcull_ctx;
3330
3331 zcull_ctx->ctx_sw_mode = mode;
3332 zcull_ctx->gpu_va = zcull_va;
3333
3334 /* TBD: don't disable channel in sw method processing */
3335 return gr_gk20a_ctx_zcull_setup(g, c, true);
3336}
3337
3338int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
3339 struct gr_zcull_info *zcull_params)
3340{
3341 struct gr_zcull_gk20a *zcull = &gr->zcull;
3342
3343 zcull_params->width_align_pixels = zcull->width_align_pixels;
3344 zcull_params->height_align_pixels = zcull->height_align_pixels;
3345 zcull_params->pixel_squares_by_aliquots =
3346 zcull->pixel_squares_by_aliquots;
3347 zcull_params->aliquot_total = zcull->total_aliquots;
3348
3349 zcull_params->region_byte_multiplier =
3350 gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v();
3351 zcull_params->region_header_size =
3352 proj_scal_litter_num_gpcs_v() *
3353 gr_zcull_save_restore_header_bytes_per_gpc_v();
3354
3355 zcull_params->subregion_header_size =
3356 proj_scal_litter_num_gpcs_v() *
3357 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v();
3358
3359 zcull_params->subregion_width_align_pixels =
3360 gr->tpc_count * gr_gpc0_zcull_zcsize_width_subregion__multiple_v();
3361 zcull_params->subregion_height_align_pixels =
3362 gr_gpc0_zcull_zcsize_height_subregion__multiple_v();
3363 zcull_params->subregion_count = gr_zcull_subregion_qty_v();
3364
3365 return 0;
3366}
3367
3368static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
3369 struct zbc_entry *color_val, u32 index)
3370{
3371 struct fifo_gk20a *f = &g->fifo;
3372 struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
3373 u32 i;
3374 unsigned long end_jiffies = jiffies +
3375 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
3376 u32 ret;
3377
3378 ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
3379 if (ret) {
3380 gk20a_err(dev_from_gk20a(g),
3381 "failed to disable gr engine activity\n");
3382 return ret;
3383 }
3384
3385 ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
3386 if (ret) {
3387 gk20a_err(dev_from_gk20a(g),
3388 "failed to idle graphics\n");
3389 goto clean_up;
3390 }
3391
3392 /* update l2 table */
3393 g->ops.ltc.set_zbc_color_entry(g, color_val, index);
3394
3395 /* update ds table */
3396 gk20a_writel(g, gr_ds_zbc_color_r_r(),
3397 gr_ds_zbc_color_r_val_f(color_val->color_ds[0]));
3398 gk20a_writel(g, gr_ds_zbc_color_g_r(),
3399 gr_ds_zbc_color_g_val_f(color_val->color_ds[1]));
3400 gk20a_writel(g, gr_ds_zbc_color_b_r(),
3401 gr_ds_zbc_color_b_val_f(color_val->color_ds[2]));
3402 gk20a_writel(g, gr_ds_zbc_color_a_r(),
3403 gr_ds_zbc_color_a_val_f(color_val->color_ds[3]));
3404
3405 gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
3406 gr_ds_zbc_color_fmt_val_f(color_val->format));
3407
3408 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
3409 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
3410
3411 /* trigger the write */
3412 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
3413 gr_ds_zbc_tbl_ld_select_c_f() |
3414 gr_ds_zbc_tbl_ld_action_write_f() |
3415 gr_ds_zbc_tbl_ld_trigger_active_f());
3416
3417 /* update local copy */
3418 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3419 gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i];
3420 gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i];
3421 }
3422 gr->zbc_col_tbl[index].format = color_val->format;
3423 gr->zbc_col_tbl[index].ref_cnt++;
3424
3425clean_up:
3426 ret = gk20a_fifo_enable_engine_activity(g, gr_info);
3427 if (ret) {
3428 gk20a_err(dev_from_gk20a(g),
3429 "failed to enable gr engine activity\n");
3430 }
3431
3432 return ret;
3433}
3434
3435static int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
3436 struct zbc_entry *depth_val, u32 index)
3437{
3438 struct fifo_gk20a *f = &g->fifo;
3439 struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
3440 unsigned long end_jiffies = jiffies +
3441 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
3442 u32 ret;
3443
3444 ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
3445 if (ret) {
3446 gk20a_err(dev_from_gk20a(g),
3447 "failed to disable gr engine activity\n");
3448 return ret;
3449 }
3450
3451 ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
3452 if (ret) {
3453 gk20a_err(dev_from_gk20a(g),
3454 "failed to idle graphics\n");
3455 goto clean_up;
3456 }
3457
3458 /* update l2 table */
3459 g->ops.ltc.set_zbc_depth_entry(g, depth_val, index);
3460
3461 /* update ds table */
3462 gk20a_writel(g, gr_ds_zbc_z_r(),
3463 gr_ds_zbc_z_val_f(depth_val->depth));
3464
3465 gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
3466 gr_ds_zbc_z_fmt_val_f(depth_val->format));
3467
3468 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
3469 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
3470
3471 /* trigger the write */
3472 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
3473 gr_ds_zbc_tbl_ld_select_z_f() |
3474 gr_ds_zbc_tbl_ld_action_write_f() |
3475 gr_ds_zbc_tbl_ld_trigger_active_f());
3476
3477 /* update local copy */
3478 gr->zbc_dep_tbl[index].depth = depth_val->depth;
3479 gr->zbc_dep_tbl[index].format = depth_val->format;
3480 gr->zbc_dep_tbl[index].ref_cnt++;
3481
3482clean_up:
3483 ret = gk20a_fifo_enable_engine_activity(g, gr_info);
3484 if (ret) {
3485 gk20a_err(dev_from_gk20a(g),
3486 "failed to enable gr engine activity\n");
3487 }
3488
3489 return ret;
3490}
3491
3492int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
3493 struct zbc_entry *zbc_val)
3494{
3495 struct zbc_color_table *c_tbl;
3496 struct zbc_depth_table *d_tbl;
3497 u32 i, ret = -ENOMEM;
3498 bool added = false;
3499 u32 entries;
3500
3501 /* no endian swap ? */
3502
3503 switch (zbc_val->type) {
3504 case GK20A_ZBC_TYPE_COLOR:
3505 /* search existing tables */
3506 for (i = 0; i < gr->max_used_color_index; i++) {
3507
3508 c_tbl = &gr->zbc_col_tbl[i];
3509
3510 if (c_tbl->ref_cnt && c_tbl->format == zbc_val->format &&
3511 memcmp(c_tbl->color_ds, zbc_val->color_ds,
3512 sizeof(zbc_val->color_ds)) == 0) {
3513
3514 if (memcmp(c_tbl->color_l2, zbc_val->color_l2,
3515 sizeof(zbc_val->color_l2))) {
3516 gk20a_err(dev_from_gk20a(g),
3517 "zbc l2 and ds color don't match with existing entries");
3518 return -EINVAL;
3519 }
3520 added = true;
3521 c_tbl->ref_cnt++;
3522 ret = 0;
3523 break;
3524 }
3525 }
3526 /* add new table */
3527 if (!added &&
3528 gr->max_used_color_index < GK20A_ZBC_TABLE_SIZE) {
3529
3530 c_tbl =
3531 &gr->zbc_col_tbl[gr->max_used_color_index];
3532 WARN_ON(c_tbl->ref_cnt != 0);
3533
3534 ret = gr_gk20a_add_zbc_color(g, gr,
3535 zbc_val, gr->max_used_color_index);
3536
3537 if (!ret)
3538 gr->max_used_color_index++;
3539 }
3540 break;
3541 case GK20A_ZBC_TYPE_DEPTH:
3542 /* search existing tables */
3543 for (i = 0; i < gr->max_used_depth_index; i++) {
3544
3545 d_tbl = &gr->zbc_dep_tbl[i];
3546
3547 if (d_tbl->ref_cnt &&
3548 d_tbl->depth == zbc_val->depth &&
3549 d_tbl->format == zbc_val->format) {
3550 added = true;
3551 d_tbl->ref_cnt++;
3552 ret = 0;
3553 break;
3554 }
3555 }
3556 /* add new table */
3557 if (!added &&
3558 gr->max_used_depth_index < GK20A_ZBC_TABLE_SIZE) {
3559
3560 d_tbl =
3561 &gr->zbc_dep_tbl[gr->max_used_depth_index];
3562 WARN_ON(d_tbl->ref_cnt != 0);
3563
3564 ret = gr_gk20a_add_zbc_depth(g, gr,
3565 zbc_val, gr->max_used_depth_index);
3566
3567 if (!ret)
3568 gr->max_used_depth_index++;
3569 }
3570 break;
3571 default:
3572 gk20a_err(dev_from_gk20a(g),
3573 "invalid zbc table type %d", zbc_val->type);
3574 return -EINVAL;
3575 }
3576
3577 if (!added && ret == 0) {
3578 /* update zbc for elpg only when new entry is added */
3579 entries = max(gr->max_used_color_index,
3580 gr->max_used_depth_index);
3581 gk20a_pmu_save_zbc(g, entries);
3582 }
3583
3584 return ret;
3585}
3586
3587int gr_gk20a_clear_zbc_table(struct gk20a *g, struct gr_gk20a *gr)
3588{
3589 struct fifo_gk20a *f = &g->fifo;
3590 struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
3591 u32 i, j;
3592 unsigned long end_jiffies = jiffies +
3593 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
3594 u32 ret;
3595
3596 ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
3597 if (ret) {
3598 gk20a_err(dev_from_gk20a(g),
3599 "failed to disable gr engine activity\n");
3600 return ret;
3601 }
3602
3603 ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
3604 if (ret) {
3605 gk20a_err(dev_from_gk20a(g),
3606 "failed to idle graphics\n");
3607 goto clean_up;
3608 }
3609
3610 for (i = 0; i < GK20A_ZBC_TABLE_SIZE; i++) {
3611 gr->zbc_col_tbl[i].format = 0;
3612 gr->zbc_col_tbl[i].ref_cnt = 0;
3613
3614 gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
3615 gr_ds_zbc_color_fmt_val_invalid_f());
3616 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
3617 gr_ds_zbc_tbl_index_val_f(i + GK20A_STARTOF_ZBC_TABLE));
3618
3619 /* trigger the write */
3620 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
3621 gr_ds_zbc_tbl_ld_select_c_f() |
3622 gr_ds_zbc_tbl_ld_action_write_f() |
3623 gr_ds_zbc_tbl_ld_trigger_active_f());
3624
3625 /* clear l2 table */
3626 g->ops.ltc.clear_zbc_color_entry(g, i);
3627
3628 for (j = 0; j < GK20A_ZBC_COLOR_VALUE_SIZE; j++) {
3629 gr->zbc_col_tbl[i].color_l2[j] = 0;
3630 gr->zbc_col_tbl[i].color_ds[j] = 0;
3631 }
3632 }
3633 gr->max_used_color_index = 0;
3634 gr->max_default_color_index = 0;
3635
3636 for (i = 0; i < GK20A_ZBC_TABLE_SIZE; i++) {
3637 gr->zbc_dep_tbl[i].depth = 0;
3638 gr->zbc_dep_tbl[i].format = 0;
3639 gr->zbc_dep_tbl[i].ref_cnt = 0;
3640
3641 gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
3642 gr_ds_zbc_z_fmt_val_invalid_f());
3643 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
3644 gr_ds_zbc_tbl_index_val_f(i + GK20A_STARTOF_ZBC_TABLE));
3645
3646 /* trigger the write */
3647 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
3648 gr_ds_zbc_tbl_ld_select_z_f() |
3649 gr_ds_zbc_tbl_ld_action_write_f() |
3650 gr_ds_zbc_tbl_ld_trigger_active_f());
3651
3652 /* clear l2 table */
3653 g->ops.ltc.clear_zbc_depth_entry(g, i);
3654 }
3655 gr->max_used_depth_index = 0;
3656 gr->max_default_depth_index = 0;
3657
3658clean_up:
3659 ret = gk20a_fifo_enable_engine_activity(g, gr_info);
3660 if (ret) {
3661 gk20a_err(dev_from_gk20a(g),
3662 "failed to enable gr engine activity\n");
3663 }
3664
3665 /* elpg stuff */
3666
3667 return ret;
3668}
3669
3670/* get a zbc table entry specified by index
3671 * return table size when type is invalid */
3672int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
3673 struct zbc_query_params *query_params)
3674{
3675 u32 index = query_params->index_size;
3676 u32 i;
3677
3678 switch (query_params->type) {
3679 case GK20A_ZBC_TYPE_INVALID:
3680 query_params->index_size = GK20A_ZBC_TABLE_SIZE;
3681 break;
3682 case GK20A_ZBC_TYPE_COLOR:
3683 if (index >= GK20A_ZBC_TABLE_SIZE) {
3684 gk20a_err(dev_from_gk20a(g),
3685 "invalid zbc color table index\n");
3686 return -EINVAL;
3687 }
3688 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3689 query_params->color_l2[i] =
3690 gr->zbc_col_tbl[index].color_l2[i];
3691 query_params->color_ds[i] =
3692 gr->zbc_col_tbl[index].color_ds[i];
3693 }
3694 query_params->format = gr->zbc_col_tbl[index].format;
3695 query_params->ref_cnt = gr->zbc_col_tbl[index].ref_cnt;
3696 break;
3697 case GK20A_ZBC_TYPE_DEPTH:
3698 if (index >= GK20A_ZBC_TABLE_SIZE) {
3699 gk20a_err(dev_from_gk20a(g),
3700 "invalid zbc depth table index\n");
3701 return -EINVAL;
3702 }
3703 query_params->depth = gr->zbc_dep_tbl[index].depth;
3704 query_params->format = gr->zbc_dep_tbl[index].format;
3705 query_params->ref_cnt = gr->zbc_dep_tbl[index].ref_cnt;
3706 break;
3707 default:
3708 gk20a_err(dev_from_gk20a(g),
3709 "invalid zbc table type\n");
3710 return -EINVAL;
3711 }
3712
3713 return 0;
3714}
3715
3716int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr)
3717{
3718 struct zbc_entry zbc_val;
3719 u32 i, err;
3720
3721 /* load default color table */
3722 zbc_val.type = GK20A_ZBC_TYPE_COLOR;
3723
3724 zbc_val.format = gr_ds_zbc_color_fmt_val_zero_v();
3725 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3726 zbc_val.color_ds[i] = 0;
3727 zbc_val.color_l2[i] = 0;
3728 }
3729 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
3730
3731 zbc_val.format = gr_ds_zbc_color_fmt_val_unorm_one_v();
3732 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3733 zbc_val.color_ds[i] = 0xffffffff;
3734 zbc_val.color_l2[i] = 0x3f800000;
3735 }
3736 err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3737
3738 zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v();
3739 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3740 zbc_val.color_ds[i] = 0;
3741 zbc_val.color_l2[i] = 0;
3742 }
3743 err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3744
3745 zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v();
3746 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3747 zbc_val.color_ds[i] = 0x3f800000;
3748 zbc_val.color_l2[i] = 0x3f800000;
3749 }
3750 err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3751
3752 if (!err)
3753 gr->max_default_color_index = 4;
3754 else {
3755 gk20a_err(dev_from_gk20a(g),
3756 "fail to load default zbc color table\n");
3757 return err;
3758 }
3759
3760 /* load default depth table */
3761 zbc_val.type = GK20A_ZBC_TYPE_DEPTH;
3762
3763 zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
3764 zbc_val.depth = 0;
3765 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
3766
3767 zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
3768 zbc_val.depth = 0x3f800000;
3769 err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3770
3771 if (!err)
3772 gr->max_default_depth_index = 2;
3773 else {
3774 gk20a_err(dev_from_gk20a(g),
3775 "fail to load default zbc depth table\n");
3776 return err;
3777 }
3778
3779 return 0;
3780}
3781
3782int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
3783 struct zbc_entry *zbc_val)
3784{
3785 gk20a_dbg_fn("");
3786
3787 return gr_gk20a_elpg_protected_call(g,
3788 gr_gk20a_add_zbc(g, gr, zbc_val));
3789}
3790
3791void gr_gk20a_init_blcg_mode(struct gk20a *g, u32 mode, u32 engine)
3792{
3793 u32 gate_ctrl;
3794
3795 gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine));
3796
3797 switch (mode) {
3798 case BLCG_RUN:
3799 gate_ctrl = set_field(gate_ctrl,
3800 therm_gate_ctrl_blk_clk_m(),
3801 therm_gate_ctrl_blk_clk_run_f());
3802 break;
3803 case BLCG_AUTO:
3804 gate_ctrl = set_field(gate_ctrl,
3805 therm_gate_ctrl_blk_clk_m(),
3806 therm_gate_ctrl_blk_clk_auto_f());
3807 break;
3808 default:
3809 gk20a_err(dev_from_gk20a(g),
3810 "invalid blcg mode %d", mode);
3811 return;
3812 }
3813
3814 gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl);
3815}
3816
3817void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine)
3818{
3819 u32 gate_ctrl, idle_filter;
3820
3821 gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine));
3822
3823 switch (mode) {
3824 case ELCG_RUN:
3825 gate_ctrl = set_field(gate_ctrl,
3826 therm_gate_ctrl_eng_clk_m(),
3827 therm_gate_ctrl_eng_clk_run_f());
3828 gate_ctrl = set_field(gate_ctrl,
3829 therm_gate_ctrl_eng_pwr_m(),
3830 /* set elpg to auto to meet hw expectation */
3831 therm_gate_ctrl_eng_pwr_auto_f());
3832 break;
3833 case ELCG_STOP:
3834 gate_ctrl = set_field(gate_ctrl,
3835 therm_gate_ctrl_eng_clk_m(),
3836 therm_gate_ctrl_eng_clk_stop_f());
3837 break;
3838 case ELCG_AUTO:
3839 gate_ctrl = set_field(gate_ctrl,
3840 therm_gate_ctrl_eng_clk_m(),
3841 therm_gate_ctrl_eng_clk_auto_f());
3842 break;
3843 default:
3844 gk20a_err(dev_from_gk20a(g),
3845 "invalid elcg mode %d", mode);
3846 }
3847
3848 if (tegra_platform_is_linsim()) {
3849 gate_ctrl = set_field(gate_ctrl,
3850 therm_gate_ctrl_eng_delay_after_m(),
3851 therm_gate_ctrl_eng_delay_after_f(4));
3852 }
3853
3854 /* 2 * (1 << 9) = 1024 clks */
3855 gate_ctrl = set_field(gate_ctrl,
3856 therm_gate_ctrl_eng_idle_filt_exp_m(),
3857 therm_gate_ctrl_eng_idle_filt_exp_f(9));
3858 gate_ctrl = set_field(gate_ctrl,
3859 therm_gate_ctrl_eng_idle_filt_mant_m(),
3860 therm_gate_ctrl_eng_idle_filt_mant_f(2));
3861 gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl);
3862
3863 /* default fecs_idle_filter to 0 */
3864 idle_filter = gk20a_readl(g, therm_fecs_idle_filter_r());
3865 idle_filter &= ~therm_fecs_idle_filter_value_m();
3866 gk20a_writel(g, therm_fecs_idle_filter_r(), idle_filter);
3867 /* default hubmmu_idle_filter to 0 */
3868 idle_filter = gk20a_readl(g, therm_hubmmu_idle_filter_r());
3869 idle_filter &= ~therm_hubmmu_idle_filter_value_m();
3870 gk20a_writel(g, therm_hubmmu_idle_filter_r(), idle_filter);
3871}
3872
3873static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
3874{
3875 u32 gpc_index, gpc_tpc_count, gpc_zcull_count;
3876 u32 *zcull_map_tiles, *zcull_bank_counters;
3877 u32 map_counter;
3878 u32 rcp_conserv;
3879 u32 offset;
3880 bool floorsweep = false;
3881
3882 if (!gr->map_tiles)
3883 return -1;
3884
3885 zcull_map_tiles = kzalloc(proj_scal_max_gpcs_v() *
3886 proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL);
3887 if (!zcull_map_tiles) {
3888 gk20a_err(dev_from_gk20a(g),
3889 "failed to allocate zcull temp buffers");
3890 return -ENOMEM;
3891 }
3892 zcull_bank_counters = kzalloc(proj_scal_max_gpcs_v() *
3893 proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL);
3894
3895 if (!zcull_bank_counters) {
3896 gk20a_err(dev_from_gk20a(g),
3897 "failed to allocate zcull temp buffers");
3898 kfree(zcull_map_tiles);
3899 return -ENOMEM;
3900 }
3901
3902 for (map_counter = 0; map_counter < gr->tpc_count; map_counter++) {
3903 zcull_map_tiles[map_counter] =
3904 zcull_bank_counters[gr->map_tiles[map_counter]];
3905 zcull_bank_counters[gr->map_tiles[map_counter]]++;
3906 }
3907
3908 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(),
3909 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(zcull_map_tiles[0]) |
3910 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(zcull_map_tiles[1]) |
3911 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(zcull_map_tiles[2]) |
3912 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(zcull_map_tiles[3]) |
3913 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(zcull_map_tiles[4]) |
3914 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(zcull_map_tiles[5]) |
3915 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(zcull_map_tiles[6]) |
3916 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(zcull_map_tiles[7]));
3917
3918 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(),
3919 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(zcull_map_tiles[8]) |
3920 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(zcull_map_tiles[9]) |
3921 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(zcull_map_tiles[10]) |
3922 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(zcull_map_tiles[11]) |
3923 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(zcull_map_tiles[12]) |
3924 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(zcull_map_tiles[13]) |
3925 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(zcull_map_tiles[14]) |
3926 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(zcull_map_tiles[15]));
3927
3928 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(),
3929 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(zcull_map_tiles[16]) |
3930 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(zcull_map_tiles[17]) |
3931 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(zcull_map_tiles[18]) |
3932 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(zcull_map_tiles[19]) |
3933 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(zcull_map_tiles[20]) |
3934 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(zcull_map_tiles[21]) |
3935 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(zcull_map_tiles[22]) |
3936 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(zcull_map_tiles[23]));
3937
3938 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(),
3939 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(zcull_map_tiles[24]) |
3940 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(zcull_map_tiles[25]) |
3941 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(zcull_map_tiles[26]) |
3942 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(zcull_map_tiles[27]) |
3943 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(zcull_map_tiles[28]) |
3944 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(zcull_map_tiles[29]) |
3945 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(zcull_map_tiles[30]) |
3946 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(zcull_map_tiles[31]));
3947
3948 kfree(zcull_map_tiles);
3949 kfree(zcull_bank_counters);
3950
3951 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3952 gpc_tpc_count = gr->gpc_tpc_count[gpc_index];
3953 gpc_zcull_count = gr->gpc_zcb_count[gpc_index];
3954
3955 if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
3956 gpc_zcull_count < gpc_tpc_count) {
3957 gk20a_err(dev_from_gk20a(g),
3958 "zcull_banks (%d) less than tpcs (%d) for gpc (%d)",
3959 gpc_zcull_count, gpc_tpc_count, gpc_index);
3960 return -EINVAL;
3961 }
3962 if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
3963 gpc_zcull_count != 0)
3964 floorsweep = true;
3965 }
3966
3967 /* 1.0f / 1.0f * gr_gpc0_zcull_sm_num_rcp_conservative__max_v() */
3968 rcp_conserv = gr_gpc0_zcull_sm_num_rcp_conservative__max_v();
3969
3970 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3971 offset = gpc_index * proj_gpc_stride_v();
3972
3973 if (floorsweep) {
3974 gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
3975 gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
3976 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
3977 gr->max_zcull_per_gpc_count));
3978 } else {
3979 gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
3980 gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
3981 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
3982 gr->gpc_tpc_count[gpc_index]));
3983 }
3984
3985 gk20a_writel(g, gr_gpc0_zcull_fs_r() + offset,
3986 gr_gpc0_zcull_fs_num_active_banks_f(gr->gpc_zcb_count[gpc_index]) |
3987 gr_gpc0_zcull_fs_num_sms_f(gr->tpc_count));
3988
3989 gk20a_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset,
3990 gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv));
3991 }
3992
3993 gk20a_writel(g, gr_gpcs_ppcs_wwdx_sm_num_rcp_r(),
3994 gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(rcp_conserv));
3995
3996 return 0;
3997}
3998
3999static void gk20a_gr_enable_gpc_exceptions(struct gk20a *g)
4000{
4001 /* enable tpc exception forwarding */
4002 gk20a_writel(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r(),
4003 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f());
4004
4005 /* enable gpc exception forwarding */
4006 gk20a_writel(g, gr_gpc0_gpccs_gpc_exception_en_r(),
4007 gr_gpc0_gpccs_gpc_exception_en_tpc_0_enabled_f());
4008}
4009
4010void gr_gk20a_enable_hww_exceptions(struct gk20a *g)
4011{
4012 /* enable exceptions */
4013 gk20a_writel(g, gr_fe_hww_esr_r(),
4014 gr_fe_hww_esr_en_enable_f() |
4015 gr_fe_hww_esr_reset_active_f());
4016 gk20a_writel(g, gr_memfmt_hww_esr_r(),
4017 gr_memfmt_hww_esr_en_enable_f() |
4018 gr_memfmt_hww_esr_reset_active_f());
4019 gk20a_writel(g, gr_scc_hww_esr_r(),
4020 gr_scc_hww_esr_en_enable_f() |
4021 gr_scc_hww_esr_reset_active_f());
4022 gk20a_writel(g, gr_mme_hww_esr_r(),
4023 gr_mme_hww_esr_en_enable_f() |
4024 gr_mme_hww_esr_reset_active_f());
4025 gk20a_writel(g, gr_pd_hww_esr_r(),
4026 gr_pd_hww_esr_en_enable_f() |
4027 gr_pd_hww_esr_reset_active_f());
4028 gk20a_writel(g, gr_sked_hww_esr_r(), /* enabled by default */
4029 gr_sked_hww_esr_reset_active_f());
4030 gk20a_writel(g, gr_ds_hww_esr_r(),
4031 gr_ds_hww_esr_en_enabled_f() |
4032 gr_ds_hww_esr_reset_task_f());
4033 gk20a_writel(g, gr_ds_hww_report_mask_r(),
4034 gr_ds_hww_report_mask_sph0_err_report_f() |
4035 gr_ds_hww_report_mask_sph1_err_report_f() |
4036 gr_ds_hww_report_mask_sph2_err_report_f() |
4037 gr_ds_hww_report_mask_sph3_err_report_f() |
4038 gr_ds_hww_report_mask_sph4_err_report_f() |
4039 gr_ds_hww_report_mask_sph5_err_report_f() |
4040 gr_ds_hww_report_mask_sph6_err_report_f() |
4041 gr_ds_hww_report_mask_sph7_err_report_f() |
4042 gr_ds_hww_report_mask_sph8_err_report_f() |
4043 gr_ds_hww_report_mask_sph9_err_report_f() |
4044 gr_ds_hww_report_mask_sph10_err_report_f() |
4045 gr_ds_hww_report_mask_sph11_err_report_f() |
4046 gr_ds_hww_report_mask_sph12_err_report_f() |
4047 gr_ds_hww_report_mask_sph13_err_report_f() |
4048 gr_ds_hww_report_mask_sph14_err_report_f() |
4049 gr_ds_hww_report_mask_sph15_err_report_f() |
4050 gr_ds_hww_report_mask_sph16_err_report_f() |
4051 gr_ds_hww_report_mask_sph17_err_report_f() |
4052 gr_ds_hww_report_mask_sph18_err_report_f() |
4053 gr_ds_hww_report_mask_sph19_err_report_f() |
4054 gr_ds_hww_report_mask_sph20_err_report_f() |
4055 gr_ds_hww_report_mask_sph21_err_report_f() |
4056 gr_ds_hww_report_mask_sph22_err_report_f() |
4057 gr_ds_hww_report_mask_sph23_err_report_f());
4058}
4059
4060static void gr_gk20a_set_hww_esr_report_mask(struct gk20a *g)
4061{
4062 /* setup sm warp esr report masks */
4063 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
4064 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() |
4065 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() |
4066 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() |
4067 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() |
4068 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() |
4069 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() |
4070 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() |
4071 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() |
4072 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() |
4073 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() |
4074 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() |
4075 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() |
4076 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() |
4077 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() |
4078 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() |
4079 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() |
4080 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() |
4081 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() |
4082 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() |
4083 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f());
4084
4085 /* setup sm global esr report mask */
4086 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(),
4087 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() |
4088 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f() |
4089 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f() |
4090 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f() |
4091 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f() |
4092 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f() |
4093 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f());
4094}
4095
4096static int gk20a_init_gr_setup_hw(struct gk20a *g)
4097{
4098 struct gr_gk20a *gr = &g->gr;
4099 struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
4100 struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
4101 u32 data;
4102 u32 addr_lo, addr_hi;
4103 u64 addr;
4104 unsigned long end_jiffies = jiffies +
4105 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
4106 u32 fe_go_idle_timeout_save;
4107 u32 last_method_data = 0;
4108 u32 i, err;
4109
4110 gk20a_dbg_fn("");
4111
4112 /* slcg prod values */
4113 g->ops.clock_gating.slcg_gr_load_gating_prod(g, g->slcg_enabled);
4114 g->ops.clock_gating.slcg_perf_load_gating_prod(g, g->slcg_enabled);
4115
4116 /* init mmu debug buffer */
4117 addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_wr_mem.iova);
4118 addr_lo = u64_lo32(addr);
4119 addr_hi = u64_hi32(addr);
4120 addr = (addr_lo >> fb_mmu_debug_wr_addr_alignment_v()) |
4121 (addr_hi << (32 - fb_mmu_debug_wr_addr_alignment_v()));
4122
4123 gk20a_writel(g, fb_mmu_debug_wr_r(),
4124 fb_mmu_debug_wr_aperture_vid_mem_f() |
4125 fb_mmu_debug_wr_vol_false_f() |
4126 fb_mmu_debug_wr_addr_v(addr));
4127
4128 addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_rd_mem.iova);
4129 addr_lo = u64_lo32(addr);
4130 addr_hi = u64_hi32(addr);
4131 addr = (addr_lo >> fb_mmu_debug_rd_addr_alignment_v()) |
4132 (addr_hi << (32 - fb_mmu_debug_rd_addr_alignment_v()));
4133
4134 gk20a_writel(g, fb_mmu_debug_rd_r(),
4135 fb_mmu_debug_rd_aperture_vid_mem_f() |
4136 fb_mmu_debug_rd_vol_false_f() |
4137 fb_mmu_debug_rd_addr_v(addr));
4138
4139 /* load gr floorsweeping registers */
4140 data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r());
4141 data = set_field(data, gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(),
4142 gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f());
4143 gk20a_writel(g, gr_gpc0_ppc0_pes_vsc_strem_r(), data);
4144
4145 gr_gk20a_zcull_init_hw(g, gr);
4146
4147 g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled);
4148 g->ops.clock_gating.pg_gr_load_gating_prod(g, true);
4149
4150 if (g->elcg_enabled) {
4151 gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
4152 gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
4153 } else {
4154 gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
4155 gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
4156 }
4157
4158 /* Bug 1340570: increase the clock timeout to avoid potential
4159 * operation failure at high gpcclk rate. Default values are 0x400.
4160 */
4161 gk20a_writel(g, pri_ringstation_sys_master_config_r(0x15), 0x800);
4162 gk20a_writel(g, pri_ringstation_gpc_master_config_r(0xa), 0x800);
4163 gk20a_writel(g, pri_ringstation_fbp_master_config_r(0x8), 0x800);
4164
4165 /* enable fifo access */
4166 gk20a_writel(g, gr_gpfifo_ctl_r(),
4167 gr_gpfifo_ctl_access_enabled_f() |
4168 gr_gpfifo_ctl_semaphore_access_enabled_f());
4169
4170 /* TBD: reload gr ucode when needed */
4171
4172 /* enable interrupts */
4173 gk20a_writel(g, gr_intr_r(), 0xFFFFFFFF);
4174 gk20a_writel(g, gr_intr_en_r(), 0xFFFFFFFF);
4175
4176 /* enable fecs error interrupts */
4177 gk20a_writel(g, gr_fecs_host_int_enable_r(),
4178 gr_fecs_host_int_enable_fault_during_ctxsw_enable_f() |
4179 gr_fecs_host_int_enable_umimp_firmware_method_enable_f() |
4180 gr_fecs_host_int_enable_umimp_illegal_method_enable_f() |
4181 gr_fecs_host_int_enable_watchdog_enable_f());
4182
4183 g->ops.gr.enable_hww_exceptions(g);
4184 g->ops.gr.set_hww_esr_report_mask(g);
4185
4186 /* enable per GPC exceptions */
4187 gk20a_gr_enable_gpc_exceptions(g);
4188
4189 /* TBD: ECC for L1/SM */
4190 /* TBD: enable per BE exceptions */
4191
4192 /* reset and enable all exceptions */
4193 gk20a_writel(g, gr_exception_r(), 0xFFFFFFFF);
4194 gk20a_writel(g, gr_exception_en_r(), 0xFFFFFFFF);
4195 gk20a_writel(g, gr_exception1_r(), 0xFFFFFFFF);
4196 gk20a_writel(g, gr_exception1_en_r(), 0xFFFFFFFF);
4197 gk20a_writel(g, gr_exception2_r(), 0xFFFFFFFF);
4198 gk20a_writel(g, gr_exception2_en_r(), 0xFFFFFFFF);
4199
4200 /* ignore status from some units */
4201 data = gk20a_readl(g, gr_status_mask_r());
4202 gk20a_writel(g, gr_status_mask_r(), data & gr->status_disable_mask);
4203
4204 g->ops.ltc.init_zbc(g, gr);
4205 g->ops.ltc.init_cbc(g, gr);
4206
4207 /* load ctx init */
4208 for (i = 0; i < sw_ctx_load->count; i++)
4209 gk20a_writel(g, sw_ctx_load->l[i].addr,
4210 sw_ctx_load->l[i].value);
4211
4212 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
4213 if (err)
4214 goto out;
4215
4216 /* save and disable fe_go_idle */
4217 fe_go_idle_timeout_save =
4218 gk20a_readl(g, gr_fe_go_idle_timeout_r());
4219 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
4220 (fe_go_idle_timeout_save & gr_fe_go_idle_timeout_count_f(0)) |
4221 gr_fe_go_idle_timeout_count_disabled_f());
4222
4223 /* override a few ctx state registers */
4224 g->ops.gr.commit_global_cb_manager(g, NULL, false);
4225 gr_gk20a_commit_global_timeslice(g, NULL, false);
4226
4227 /* floorsweep anything left */
4228 g->ops.gr.init_fs_state(g);
4229
4230 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
4231 if (err)
4232 goto restore_fe_go_idle;
4233
4234restore_fe_go_idle:
4235 /* restore fe_go_idle */
4236 gk20a_writel(g, gr_fe_go_idle_timeout_r(), fe_go_idle_timeout_save);
4237
4238 if (err || gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT))
4239 goto out;
4240
4241 /* load method init */
4242 if (sw_method_init->count) {
4243 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
4244 sw_method_init->l[0].value);
4245 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
4246 gr_pri_mme_shadow_raw_index_write_trigger_f() |
4247 sw_method_init->l[0].addr);
4248 last_method_data = sw_method_init->l[0].value;
4249 }
4250 for (i = 1; i < sw_method_init->count; i++) {
4251 if (sw_method_init->l[i].value != last_method_data) {
4252 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
4253 sw_method_init->l[i].value);
4254 last_method_data = sw_method_init->l[i].value;
4255 }
4256 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
4257 gr_pri_mme_shadow_raw_index_write_trigger_f() |
4258 sw_method_init->l[i].addr);
4259 }
4260
4261 gk20a_mm_l2_invalidate(g);
4262
4263 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
4264 if (err)
4265 goto out;
4266
4267out:
4268 gk20a_dbg_fn("done");
4269 return 0;
4270}
4271
4272static int gk20a_init_gr_prepare(struct gk20a *g)
4273{
4274 u32 gpfifo_ctrl, pmc_en;
4275 u32 err = 0;
4276
4277 /* disable fifo access */
4278 pmc_en = gk20a_readl(g, mc_enable_r());
4279 if (pmc_en & mc_enable_pgraph_enabled_f()) {
4280 gpfifo_ctrl = gk20a_readl(g, gr_gpfifo_ctl_r());
4281 gpfifo_ctrl &= ~gr_gpfifo_ctl_access_enabled_f();
4282 gk20a_writel(g, gr_gpfifo_ctl_r(), gpfifo_ctrl);
4283 }
4284
4285 /* reset gr engine */
4286 gk20a_reset(g, mc_enable_pgraph_enabled_f()
4287 | mc_enable_blg_enabled_f()
4288 | mc_enable_perfmon_enabled_f());
4289
4290 /* enable fifo access */
4291 gk20a_writel(g, gr_gpfifo_ctl_r(),
4292 gr_gpfifo_ctl_access_enabled_f() |
4293 gr_gpfifo_ctl_semaphore_access_enabled_f());
4294
4295 if (!g->gr.ctx_vars.valid) {
4296 err = gr_gk20a_init_ctx_vars(g, &g->gr);
4297 if (err)
4298 gk20a_err(dev_from_gk20a(g),
4299 "fail to load gr init ctx");
4300 }
4301 return err;
4302}
4303
4304static int gr_gk20a_wait_mem_scrubbing(struct gk20a *g)
4305{
4306 int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT;
4307 bool fecs_scrubbing;
4308 bool gpccs_scrubbing;
4309
4310 gk20a_dbg_fn("");
4311
4312 do {
4313 fecs_scrubbing = gk20a_readl(g, gr_fecs_dmactl_r()) &
4314 (gr_fecs_dmactl_imem_scrubbing_m() |
4315 gr_fecs_dmactl_dmem_scrubbing_m());
4316
4317 gpccs_scrubbing = gk20a_readl(g, gr_gpccs_dmactl_r()) &
4318 (gr_gpccs_dmactl_imem_scrubbing_m() |
4319 gr_gpccs_dmactl_imem_scrubbing_m());
4320
4321 if (!fecs_scrubbing && !gpccs_scrubbing) {
4322 gk20a_dbg_fn("done");
4323 return 0;
4324 }
4325
4326 udelay(GR_IDLE_CHECK_DEFAULT);
4327 } while (--retries || !tegra_platform_is_silicon());
4328
4329 gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout");
4330 return -ETIMEDOUT;
4331}
4332
4333static int gk20a_init_gr_reset_enable_hw(struct gk20a *g)
4334{
4335 struct gr_gk20a *gr = &g->gr;
4336 struct av_list_gk20a *sw_non_ctx_load = &g->gr.ctx_vars.sw_non_ctx_load;
4337 unsigned long end_jiffies = jiffies +
4338 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
4339 u32 i, err = 0;
4340
4341 gk20a_dbg_fn("");
4342
4343 /* enable interrupts */
4344 gk20a_writel(g, gr_intr_r(), ~0);
4345 gk20a_writel(g, gr_intr_en_r(), ~0);
4346
4347 /* reset ctx switch state */
4348 gr_gk20a_ctx_reset(g, 0);
4349
4350 /* clear scc ram */
4351 gk20a_writel(g, gr_scc_init_r(),
4352 gr_scc_init_ram_trigger_f());
4353
4354 /* load non_ctx init */
4355 for (i = 0; i < sw_non_ctx_load->count; i++)
4356 gk20a_writel(g, sw_non_ctx_load->l[i].addr,
4357 sw_non_ctx_load->l[i].value);
4358
4359 err = gr_gk20a_wait_mem_scrubbing(g);
4360 if (err)
4361 goto out;
4362
4363 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
4364 if (err)
4365 goto out;
4366
4367 err = gr_gk20a_load_ctxsw_ucode(g, gr);
4368 if (err)
4369 goto out;
4370
4371 /* this appears query for sw states but fecs actually init
4372 ramchain, etc so this is hw init */
4373 err = gr_gk20a_init_ctx_state(g, gr);
4374 if (err)
4375 goto out;
4376
4377out:
4378 if (err)
4379 gk20a_err(dev_from_gk20a(g), "fail");
4380 else
4381 gk20a_dbg_fn("done");
4382
4383 return 0;
4384}
4385
4386/*
4387 * XXX Merge this list with the debugger/profiler
4388 * session regops whitelists?
4389 */
4390static u32 wl_addr_gk20a[] = {
4391 /* this list must be sorted (low to high) */
4392 0x404468, /* gr_pri_mme_max_instructions */
4393 0x418800, /* gr_pri_gpcs_setup_debug */
4394 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */
4395 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */
4396 0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */
4397 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */
4398};
4399
4400static int gr_gk20a_init_access_map(struct gk20a *g)
4401{
4402 struct gr_gk20a *gr = &g->gr;
4403 void *data;
4404 int err = 0;
4405 u32 w, nr_pages =
4406 DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size,
4407 PAGE_SIZE);
4408
4409 data = vmap(gr->global_ctx_buffer[PRIV_ACCESS_MAP].pages,
4410 PAGE_ALIGN(gr->global_ctx_buffer[PRIV_ACCESS_MAP].size) >>
4411 PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL));
4412 if (!data) {
4413 gk20a_err(dev_from_gk20a(g),
4414 "failed to map priv access map memory");
4415 err = -ENOMEM;
4416 goto clean_up;
4417 }
4418
4419 memset(data, 0x0, PAGE_SIZE * nr_pages);
4420
4421 for (w = 0; w < ARRAY_SIZE(wl_addr_gk20a); w++) {
4422 u32 map_bit, map_byte, map_shift;
4423 map_bit = wl_addr_gk20a[w] >> 2;
4424 map_byte = map_bit >> 3;
4425 map_shift = map_bit & 0x7; /* i.e. 0-7 */
4426 gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d",
4427 wl_addr_gk20a[w], map_byte, map_shift);
4428 ((u8 *)data)[map_byte] |= 1 << map_shift;
4429 }
4430
4431clean_up:
4432 if (data)
4433 vunmap(data);
4434 return 0;
4435}
4436
4437static int gk20a_init_gr_setup_sw(struct gk20a *g)
4438{
4439 struct gr_gk20a *gr = &g->gr;
4440 int err;
4441
4442 gk20a_dbg_fn("");
4443
4444 if (gr->sw_ready) {
4445 gk20a_dbg_fn("skip init");
4446 return 0;
4447 }
4448
4449 gr->g = g;
4450
4451 err = gr_gk20a_init_gr_config(g, gr);
4452 if (err)
4453 goto clean_up;
4454
4455 err = gr_gk20a_init_mmu_sw(g, gr);
4456 if (err)
4457 goto clean_up;
4458
4459 err = gr_gk20a_init_map_tiles(g, gr);
4460 if (err)
4461 goto clean_up;
4462
4463 if (tegra_cpu_is_asim())
4464 gr->max_comptag_mem = 1; /* MBs worth of comptag coverage */
4465 else {
4466 gk20a_dbg_info("total ram pages : %lu", totalram_pages);
4467 gr->max_comptag_mem = totalram_pages
4468 >> (10 - (PAGE_SHIFT - 10));
4469 }
4470 err = g->ops.ltc.init_comptags(g, gr);
4471 if (err)
4472 goto clean_up;
4473
4474 err = gr_gk20a_init_zcull(g, gr);
4475 if (err)
4476 goto clean_up;
4477
4478 err = gr_gk20a_alloc_global_ctx_buffers(g);
4479 if (err)
4480 goto clean_up;
4481
4482 err = gr_gk20a_init_access_map(g);
4483 if (err)
4484 goto clean_up;
4485
4486 mutex_init(&gr->ctx_mutex);
4487 spin_lock_init(&gr->ch_tlb_lock);
4488
4489 gr->remove_support = gk20a_remove_gr_support;
4490 gr->sw_ready = true;
4491
4492 gk20a_dbg_fn("done");
4493 return 0;
4494
4495clean_up:
4496 gk20a_err(dev_from_gk20a(g), "fail");
4497 gk20a_remove_gr_support(gr);
4498 return err;
4499}
4500
4501int gk20a_init_gr_support(struct gk20a *g)
4502{
4503 u32 err;
4504
4505 gk20a_dbg_fn("");
4506
4507 err = gk20a_init_gr_prepare(g);
4508 if (err)
4509 return err;
4510
4511 /* this is required before gr_gk20a_init_ctx_state */
4512 mutex_init(&g->gr.fecs_mutex);
4513
4514 err = gk20a_init_gr_reset_enable_hw(g);
4515 if (err)
4516 return err;
4517
4518 err = gk20a_init_gr_setup_sw(g);
4519 if (err)
4520 return err;
4521
4522 err = gk20a_init_gr_setup_hw(g);
4523 if (err)
4524 return err;
4525
4526 return 0;
4527}
4528
4529#define NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE 0x02dc
4530#define NVA297_SET_CIRCULAR_BUFFER_SIZE 0x1280
4531#define NVA297_SET_SHADER_EXCEPTIONS 0x1528
4532#define NVA0C0_SET_SHADER_EXCEPTIONS 0x1528
4533
4534#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
4535
4536struct gr_isr_data {
4537 u32 addr;
4538 u32 data_lo;
4539 u32 data_hi;
4540 u32 curr_ctx;
4541 u32 chid;
4542 u32 offset;
4543 u32 sub_chan;
4544 u32 class_num;
4545};
4546
4547void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data)
4548{
4549 gk20a_dbg_fn("");
4550
4551 if (data == NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE) {
4552 gk20a_writel(g,
4553 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), 0);
4554 gk20a_writel(g,
4555 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), 0);
4556 } else {
4557 /* setup sm warp esr report masks */
4558 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
4559 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() |
4560 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() |
4561 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() |
4562 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() |
4563 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() |
4564 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() |
4565 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() |
4566 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() |
4567 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() |
4568 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() |
4569 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() |
4570 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() |
4571 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() |
4572 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() |
4573 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() |
4574 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() |
4575 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() |
4576 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() |
4577 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() |
4578 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f());
4579
4580 /* setup sm global esr report mask */
4581 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(),
4582 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() |
4583 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f() |
4584 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f() |
4585 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f() |
4586 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f() |
4587 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f() |
4588 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f());
4589 }
4590}
4591
4592static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data)
4593{
4594 struct gr_gk20a *gr = &g->gr;
4595 u32 gpc_index, ppc_index, stride, val, offset;
4596 u32 cb_size = data * 4;
4597
4598 gk20a_dbg_fn("");
4599
4600 if (cb_size > gr->attrib_cb_size)
4601 cb_size = gr->attrib_cb_size;
4602
4603 gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
4604 (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
4605 ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
4606 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
4607
4608 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
4609 stride = proj_gpc_stride_v() * gpc_index;
4610
4611 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
4612 ppc_index++) {
4613
4614 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() +
4615 stride +
4616 proj_ppc_in_gpc_stride_v() * ppc_index);
4617
4618 offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val);
4619
4620 val = set_field(val,
4621 gr_gpc0_ppc0_cbm_cfg_size_m(),
4622 gr_gpc0_ppc0_cbm_cfg_size_f(cb_size *
4623 gr->pes_tpc_count[ppc_index][gpc_index]));
4624 val = set_field(val,
4625 gr_gpc0_ppc0_cbm_cfg_start_offset_m(),
4626 (offset + 1));
4627
4628 gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() +
4629 stride +
4630 proj_ppc_in_gpc_stride_v() * ppc_index, val);
4631
4632 val = set_field(val,
4633 gr_gpc0_ppc0_cbm_cfg_start_offset_m(),
4634 offset);
4635
4636 gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() +
4637 stride +
4638 proj_ppc_in_gpc_stride_v() * ppc_index, val);
4639 }
4640 }
4641}
4642
4643static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
4644{
4645 struct gr_gk20a *gr = &g->gr;
4646 u32 gpc_index, ppc_index, stride, val;
4647 u32 pd_ab_max_output;
4648 u32 alpha_cb_size = data * 4;
4649
4650 gk20a_dbg_fn("");
4651 /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF)
4652 return; */
4653
4654 if (alpha_cb_size > gr->alpha_cb_size)
4655 alpha_cb_size = gr->alpha_cb_size;
4656
4657 gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
4658 (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
4659 ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) |
4660 gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size));
4661
4662 pd_ab_max_output = alpha_cb_size *
4663 gr_gpc0_ppc0_cbm_cfg_size_granularity_v() /
4664 gr_pd_ab_dist_cfg1_max_output_granularity_v();
4665
4666 gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
4667 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output));
4668
4669 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
4670 stride = proj_gpc_stride_v() * gpc_index;
4671
4672 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
4673 ppc_index++) {
4674
4675 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() +
4676 stride +
4677 proj_ppc_in_gpc_stride_v() * ppc_index);
4678
4679 val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(),
4680 gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size *
4681 gr->pes_tpc_count[ppc_index][gpc_index]));
4682
4683 gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() +
4684 stride +
4685 proj_ppc_in_gpc_stride_v() * ppc_index, val);
4686 }
4687 }
4688}
4689
4690void gk20a_gr_reset(struct gk20a *g)
4691{
4692 int err;
4693 err = gk20a_init_gr_prepare(g);
4694 BUG_ON(err);
4695 err = gk20a_init_gr_reset_enable_hw(g);
4696 BUG_ON(err);
4697 err = gk20a_init_gr_setup_hw(g);
4698 BUG_ON(err);
4699}
4700
4701static int gr_gk20a_handle_sw_method(struct gk20a *g, u32 addr,
4702 u32 class_num, u32 offset, u32 data)
4703{
4704 gk20a_dbg_fn("");
4705
4706 if (class_num == KEPLER_COMPUTE_A) {
4707 switch (offset << 2) {
4708 case NVA0C0_SET_SHADER_EXCEPTIONS:
4709 gk20a_gr_set_shader_exceptions(g, data);
4710 break;
4711 default:
4712 goto fail;
4713 }
4714 }
4715
4716 if (class_num == KEPLER_C) {
4717 switch (offset << 2) {
4718 case NVA297_SET_SHADER_EXCEPTIONS:
4719 gk20a_gr_set_shader_exceptions(g, data);
4720 break;
4721 case NVA297_SET_CIRCULAR_BUFFER_SIZE:
4722 g->ops.gr.set_circular_buffer_size(g, data);
4723 break;
4724 case NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
4725 g->ops.gr.set_alpha_circular_buffer_size(g, data);
4726 break;
4727 default:
4728 goto fail;
4729 }
4730 }
4731 return 0;
4732
4733fail:
4734 return -EINVAL;
4735}
4736
4737static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g,
4738 struct gr_isr_data *isr_data)
4739{
4740 struct fifo_gk20a *f = &g->fifo;
4741 struct channel_gk20a *ch = &f->channel[isr_data->chid];
4742 gk20a_dbg_fn("");
4743 gk20a_set_error_notifier(ch,
4744 NVHOST_CHANNEL_GR_SEMAPHORE_TIMEOUT);
4745 gk20a_err(dev_from_gk20a(g),
4746 "gr semaphore timeout\n");
4747 return -EINVAL;
4748}
4749
4750static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g,
4751 struct gr_isr_data *isr_data)
4752{
4753 struct fifo_gk20a *f = &g->fifo;
4754 struct channel_gk20a *ch = &f->channel[isr_data->chid];
4755 gk20a_dbg_fn("");
4756 gk20a_set_error_notifier(ch,
4757 NVHOST_CHANNEL_GR_ILLEGAL_NOTIFY);
4758 /* This is an unrecoverable error, reset is needed */
4759 gk20a_err(dev_from_gk20a(g),
4760 "gr semaphore timeout\n");
4761 return -EINVAL;
4762}
4763
4764static int gk20a_gr_handle_illegal_method(struct gk20a *g,
4765 struct gr_isr_data *isr_data)
4766{
4767 int ret = g->ops.gr.handle_sw_method(g, isr_data->addr,
4768 isr_data->class_num, isr_data->offset,
4769 isr_data->data_lo);
4770 if (ret)
4771 gk20a_err(dev_from_gk20a(g), "invalid method class 0x%08x"
4772 ", offset 0x%08x address 0x%08x\n",
4773 isr_data->class_num, isr_data->offset, isr_data->addr);
4774
4775 return ret;
4776}
4777
4778static int gk20a_gr_handle_illegal_class(struct gk20a *g,
4779 struct gr_isr_data *isr_data)
4780{
4781 struct fifo_gk20a *f = &g->fifo;
4782 struct channel_gk20a *ch = &f->channel[isr_data->chid];
4783 gk20a_dbg_fn("");
4784 gk20a_set_error_notifier(ch,
4785 NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY);
4786 gk20a_err(dev_from_gk20a(g),
4787 "invalid class 0x%08x, offset 0x%08x",
4788 isr_data->class_num, isr_data->offset);
4789 return -EINVAL;
4790}
4791
4792static int gk20a_gr_handle_class_error(struct gk20a *g,
4793 struct gr_isr_data *isr_data)
4794{
4795 struct fifo_gk20a *f = &g->fifo;
4796 struct channel_gk20a *ch = &f->channel[isr_data->chid];
4797 gk20a_dbg_fn("");
4798
4799 gk20a_set_error_notifier(ch,
4800 NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY);
4801 gk20a_err(dev_from_gk20a(g),
4802 "class error 0x%08x, offset 0x%08x",
4803 isr_data->class_num, isr_data->offset);
4804 return -EINVAL;
4805}
4806
4807static int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
4808 struct gr_isr_data *isr_data)
4809{
4810 struct fifo_gk20a *f = &g->fifo;
4811 struct channel_gk20a *ch = &f->channel[isr_data->chid];
4812
4813 wake_up(&ch->semaphore_wq);
4814
4815 return 0;
4816}
4817
4818#if defined(CONFIG_GK20A_CYCLE_STATS)
4819static inline bool is_valid_cyclestats_bar0_offset_gk20a(struct gk20a *g,
4820 u32 offset)
4821{
4822 /* support only 24-bit 4-byte aligned offsets */
4823 bool valid = !(offset & 0xFF000003);
4824 /* whitelist check */
4825 valid = valid &&
4826 is_bar0_global_offset_whitelisted_gk20a(offset);
4827 /* resource size check in case there was a problem
4828 * with allocating the assumed size of bar0 */
4829 valid = valid &&
4830 offset < resource_size(g->reg_mem);
4831 return valid;
4832}
4833#endif
4834
4835static int gk20a_gr_handle_notify_pending(struct gk20a *g,
4836 struct gr_isr_data *isr_data)
4837{
4838 struct fifo_gk20a *f = &g->fifo;
4839 struct channel_gk20a *ch = &f->channel[isr_data->chid];
4840
4841#if defined(CONFIG_GK20A_CYCLE_STATS)
4842 void *virtual_address;
4843 u32 buffer_size;
4844 u32 offset;
4845 u32 new_offset;
4846 bool exit;
4847 struct share_buffer_head *sh_hdr;
4848 u32 raw_reg;
4849 u64 mask_orig;
4850 u64 v = 0;
4851 struct gk20a_cyclestate_buffer_elem *op_elem;
4852 /* GL will never use payload 0 for cycle state */
4853 if ((ch->cyclestate.cyclestate_buffer == NULL) || (isr_data->data_lo == 0))
4854 return 0;
4855
4856 mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
4857
4858 virtual_address = ch->cyclestate.cyclestate_buffer;
4859 buffer_size = ch->cyclestate.cyclestate_buffer_size;
4860 offset = isr_data->data_lo;
4861 exit = false;
4862 while (!exit) {
4863 if (offset >= buffer_size) {
4864 WARN_ON(1);
4865 break;
4866 }
4867
4868 sh_hdr = (struct share_buffer_head *)
4869 ((char *)virtual_address + offset);
4870
4871 if (sh_hdr->size < sizeof(struct share_buffer_head)) {
4872 WARN_ON(1);
4873 break;
4874 }
4875 new_offset = offset + sh_hdr->size;
4876
4877 switch (sh_hdr->operation) {
4878 case OP_END:
4879 exit = true;
4880 break;
4881
4882 case BAR0_READ32:
4883 case BAR0_WRITE32:
4884 {
4885 bool valid;
4886 op_elem =
4887 (struct gk20a_cyclestate_buffer_elem *)
4888 sh_hdr;
4889 valid = is_valid_cyclestats_bar0_offset_gk20a(g,
4890 op_elem->offset_bar0);
4891 if (!valid) {
4892 gk20a_err(dev_from_gk20a(g),
4893 "invalid cycletstats op offset: 0x%x\n",
4894 op_elem->offset_bar0);
4895
4896 sh_hdr->failed = exit = true;
4897 break;
4898 }
4899
4900
4901 mask_orig =
4902 ((1ULL <<
4903 (op_elem->last_bit + 1))
4904 -1)&~((1ULL <<
4905 op_elem->first_bit)-1);
4906
4907 raw_reg =
4908 gk20a_readl(g,
4909 op_elem->offset_bar0);
4910
4911 switch (sh_hdr->operation) {
4912 case BAR0_READ32:
4913 op_elem->data =
4914 (raw_reg & mask_orig)
4915 >> op_elem->first_bit;
4916 break;
4917
4918 case BAR0_WRITE32:
4919 v = 0;
4920 if ((unsigned int)mask_orig !=
4921 (unsigned int)~0) {
4922 v = (unsigned int)
4923 (raw_reg & ~mask_orig);
4924 }
4925
4926 v |= ((op_elem->data
4927 << op_elem->first_bit)
4928 & mask_orig);
4929
4930 gk20a_writel(g,
4931 op_elem->offset_bar0,
4932 (unsigned int)v);
4933 break;
4934 default:
4935 /* nop ok?*/
4936 break;
4937 }
4938 }
4939 break;
4940
4941 default:
4942 /* no operation content case */
4943 exit = true;
4944 break;
4945 }
4946 sh_hdr->completed = true;
4947 offset = new_offset;
4948 }
4949 mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
4950#endif
4951 gk20a_dbg_fn("");
4952 wake_up(&ch->notifier_wq);
4953 return 0;
4954}
4955
4956/* Used by sw interrupt thread to translate current ctx to chid.
4957 * For performance, we don't want to go through 128 channels every time.
4958 * A small tlb is used here to cache translation */
4959static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx)
4960{
4961 struct fifo_gk20a *f = &g->fifo;
4962 struct gr_gk20a *gr = &g->gr;
4963 u32 chid = -1;
4964 u32 i;
4965
4966 spin_lock(&gr->ch_tlb_lock);
4967
4968 /* check cache first */
4969 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
4970 if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
4971 chid = gr->chid_tlb[i].hw_chid;
4972 goto unlock;
4973 }
4974 }
4975
4976 /* slow path */
4977 for (chid = 0; chid < f->num_channels; chid++)
4978 if (f->channel[chid].in_use) {
4979 if ((u32)(f->channel[chid].inst_block.cpu_pa >>
4980 ram_in_base_shift_v()) ==
4981 gr_fecs_current_ctx_ptr_v(curr_ctx))
4982 break;
4983 }
4984
4985 if (chid >= f->num_channels) {
4986 chid = -1;
4987 goto unlock;
4988 }
4989
4990 /* add to free tlb entry */
4991 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
4992 if (gr->chid_tlb[i].curr_ctx == 0) {
4993 gr->chid_tlb[i].curr_ctx = curr_ctx;
4994 gr->chid_tlb[i].hw_chid = chid;
4995 goto unlock;
4996 }
4997 }
4998
4999 /* no free entry, flush one */
5000 gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx;
5001 gr->chid_tlb[gr->channel_tlb_flush_index].hw_chid = chid;
5002
5003 gr->channel_tlb_flush_index =
5004 (gr->channel_tlb_flush_index + 1) &
5005 (GR_CHANNEL_MAP_TLB_SIZE - 1);
5006
5007unlock:
5008 spin_unlock(&gr->ch_tlb_lock);
5009 return chid;
5010}
5011
5012static int gk20a_gr_lock_down_sm(struct gk20a *g, u32 global_esr_mask)
5013{
5014 unsigned long end_jiffies = jiffies +
5015 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
5016 u32 delay = GR_IDLE_CHECK_DEFAULT;
5017 bool mmu_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled(g);
5018 u32 dbgr_control0;
5019
5020 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "locking down SM");
5021
5022 /* assert stop trigger */
5023 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
5024 dbgr_control0 |= gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f();
5025 gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r(), dbgr_control0);
5026
5027 /* wait for the sm to lock down */
5028 do {
5029 u32 global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r());
5030 u32 warp_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_warp_esr_r());
5031 u32 dbgr_status0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_status0_r());
5032 bool locked_down =
5033 (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) ==
5034 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v());
5035 bool error_pending =
5036 (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) !=
5037 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) ||
5038 ((global_esr & ~global_esr_mask) != 0);
5039
5040 if (locked_down || !error_pending) {
5041 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "locked down SM");
5042
5043 /* de-assert stop trigger */
5044 dbgr_control0 &= ~gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f();
5045 gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r(), dbgr_control0);
5046
5047 return 0;
5048 }
5049
5050 /* if an mmu fault is pending and mmu debug mode is not
5051 * enabled, the sm will never lock down. */
5052 if (!mmu_debug_mode_enabled && gk20a_fifo_mmu_fault_pending(g)) {
5053 gk20a_err(dev_from_gk20a(g), "mmu fault pending, sm will"
5054 " never lock down!");
5055 return -EFAULT;
5056 }
5057
5058 usleep_range(delay, delay * 2);
5059 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
5060
5061 } while (time_before(jiffies, end_jiffies)
5062 || !tegra_platform_is_silicon());
5063
5064 gk20a_err(dev_from_gk20a(g), "timed out while trying to lock down SM");
5065
5066 return -EAGAIN;
5067}
5068
5069bool gk20a_gr_sm_debugger_attached(struct gk20a *g)
5070{
5071 u32 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
5072
5073 /* check if an sm debugger is attached */
5074 if (gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(dbgr_control0) ==
5075 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v())
5076 return true;
5077
5078 return false;
5079}
5080
5081static void gk20a_gr_clear_sm_hww(struct gk20a *g, u32 global_esr)
5082{
5083 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r(), global_esr);
5084
5085 /* clear the warp hww */
5086 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r(),
5087 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f());
5088}
5089
5090static struct channel_gk20a *
5091channel_from_hw_chid(struct gk20a *g, u32 hw_chid)
5092{
5093 return g->fifo.channel+hw_chid;
5094}
5095
5096static int gk20a_gr_handle_sm_exception(struct gk20a *g,
5097 struct gr_isr_data *isr_data)
5098{
5099 int ret = 0;
5100 bool do_warp_sync = false;
5101 /* these three interrupts don't require locking down the SM. They can
5102 * be handled by usermode clients as they aren't fatal. Additionally,
5103 * usermode clients may wish to allow some warps to execute while others
5104 * are at breakpoints, as opposed to fatal errors where all warps should
5105 * halt. */
5106 u32 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() |
5107 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
5108 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
5109 u32 global_esr, warp_esr;
5110 bool sm_debugger_attached = gk20a_gr_sm_debugger_attached(g);
5111 struct channel_gk20a *fault_ch;
5112
5113 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
5114
5115 global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r());
5116 warp_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_warp_esr_r());
5117
5118 /* if an sm debugger is attached, disable forwarding of tpc exceptions.
5119 * the debugger will reenable exceptions after servicing them. */
5120 if (sm_debugger_attached) {
5121 u32 tpc_exception_en = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r());
5122 tpc_exception_en &= ~gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f();
5123 gk20a_writel(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r(), tpc_exception_en);
5124 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "SM debugger attached");
5125 }
5126
5127 /* if a debugger is present and an error has occurred, do a warp sync */
5128 if (sm_debugger_attached && ((warp_esr != 0) || ((global_esr & ~global_mask) != 0))) {
5129 gk20a_dbg(gpu_dbg_intr, "warp sync needed");
5130 do_warp_sync = true;
5131 }
5132
5133 if (do_warp_sync) {
5134 ret = gk20a_gr_lock_down_sm(g, global_mask);
5135 if (ret) {
5136 gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
5137 return ret;
5138 }
5139 }
5140
5141 /* finally, signal any client waiting on an event */
5142 fault_ch = channel_from_hw_chid(g, isr_data->chid);
5143 if (fault_ch)
5144 gk20a_dbg_gpu_post_events(fault_ch);
5145
5146 return ret;
5147}
5148
5149static int gk20a_gr_handle_tpc_exception(struct gk20a *g,
5150 struct gr_isr_data *isr_data)
5151{
5152 int ret = 0;
5153 u32 tpc_exception = gk20a_readl(g, gr_gpcs_tpcs_tpccs_tpc_exception_r());
5154
5155 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "");
5156
5157 /* check if an sm exeption is pending */
5158 if (gr_gpcs_tpcs_tpccs_tpc_exception_sm_v(tpc_exception) ==
5159 gr_gpcs_tpcs_tpccs_tpc_exception_sm_pending_v()) {
5160 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "SM exception pending");
5161 ret = gk20a_gr_handle_sm_exception(g, isr_data);
5162 }
5163
5164 return ret;
5165}
5166
5167static int gk20a_gr_handle_gpc_exception(struct gk20a *g,
5168 struct gr_isr_data *isr_data)
5169{
5170 int ret = 0;
5171 u32 gpc_exception = gk20a_readl(g, gr_gpcs_gpccs_gpc_exception_r());
5172
5173 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "");
5174
5175 /* check if tpc 0 has an exception */
5176 if (gr_gpcs_gpccs_gpc_exception_tpc_v(gpc_exception) ==
5177 gr_gpcs_gpccs_gpc_exception_tpc_0_pending_v()) {
5178 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "TPC exception pending");
5179 ret = gk20a_gr_handle_tpc_exception(g, isr_data);
5180 }
5181
5182 return ret;
5183}
5184
5185int gk20a_gr_isr(struct gk20a *g)
5186{
5187 struct gr_isr_data isr_data;
5188 u32 grfifo_ctl;
5189 u32 obj_table;
5190 int need_reset = 0;
5191 u32 gr_intr = gk20a_readl(g, gr_intr_r());
5192
5193 gk20a_dbg_fn("");
5194 gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr);
5195
5196 if (!gr_intr)
5197 return 0;
5198
5199 grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
5200 grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
5201 grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1);
5202
5203 gk20a_writel(g, gr_gpfifo_ctl_r(),
5204 grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
5205 gr_gpfifo_ctl_semaphore_access_f(0));
5206
5207 isr_data.addr = gk20a_readl(g, gr_trapped_addr_r());
5208 isr_data.data_lo = gk20a_readl(g, gr_trapped_data_lo_r());
5209 isr_data.data_hi = gk20a_readl(g, gr_trapped_data_hi_r());
5210 isr_data.curr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
5211 isr_data.offset = gr_trapped_addr_mthd_v(isr_data.addr);
5212 isr_data.sub_chan = gr_trapped_addr_subch_v(isr_data.addr);
5213 obj_table = gk20a_readl(g,
5214 gr_fe_object_table_r(isr_data.sub_chan));
5215 isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
5216
5217 isr_data.chid =
5218 gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx);
5219 if (isr_data.chid == -1) {
5220 gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x",
5221 isr_data.curr_ctx);
5222 goto clean_up;
5223 }
5224
5225 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
5226 "channel %d: addr 0x%08x, "
5227 "data 0x%08x 0x%08x,"
5228 "ctx 0x%08x, offset 0x%08x, "
5229 "subchannel 0x%08x, class 0x%08x",
5230 isr_data.chid, isr_data.addr,
5231 isr_data.data_hi, isr_data.data_lo,
5232 isr_data.curr_ctx, isr_data.offset,
5233 isr_data.sub_chan, isr_data.class_num);
5234
5235 if (gr_intr & gr_intr_notify_pending_f()) {
5236 gk20a_gr_handle_notify_pending(g, &isr_data);
5237 gk20a_writel(g, gr_intr_r(),
5238 gr_intr_notify_reset_f());
5239 gr_intr &= ~gr_intr_notify_pending_f();
5240 }
5241
5242 if (gr_intr & gr_intr_semaphore_pending_f()) {
5243 gk20a_gr_handle_semaphore_pending(g, &isr_data);
5244 gk20a_writel(g, gr_intr_r(),
5245 gr_intr_semaphore_reset_f());
5246 gr_intr &= ~gr_intr_semaphore_pending_f();
5247 }
5248
5249 if (gr_intr & gr_intr_semaphore_timeout_pending_f()) {
5250 need_reset |= gk20a_gr_handle_semaphore_timeout_pending(g,
5251 &isr_data);
5252 gk20a_writel(g, gr_intr_r(),
5253 gr_intr_semaphore_reset_f());
5254 gr_intr &= ~gr_intr_semaphore_pending_f();
5255 }
5256
5257 if (gr_intr & gr_intr_illegal_notify_pending_f()) {
5258 need_reset |= gk20a_gr_intr_illegal_notify_pending(g,
5259 &isr_data);
5260 gk20a_writel(g, gr_intr_r(),
5261 gr_intr_illegal_notify_reset_f());
5262 gr_intr &= ~gr_intr_illegal_notify_pending_f();
5263 }
5264
5265 if (gr_intr & gr_intr_illegal_method_pending_f()) {
5266 need_reset |= gk20a_gr_handle_illegal_method(g, &isr_data);
5267 gk20a_writel(g, gr_intr_r(),
5268 gr_intr_illegal_method_reset_f());
5269 gr_intr &= ~gr_intr_illegal_method_pending_f();
5270 }
5271
5272 if (gr_intr & gr_intr_illegal_class_pending_f()) {
5273 need_reset |= gk20a_gr_handle_illegal_class(g, &isr_data);
5274 gk20a_writel(g, gr_intr_r(),
5275 gr_intr_illegal_class_reset_f());
5276 gr_intr &= ~gr_intr_illegal_class_pending_f();
5277 }
5278
5279 if (gr_intr & gr_intr_class_error_pending_f()) {
5280 need_reset |= gk20a_gr_handle_class_error(g, &isr_data);
5281 gk20a_writel(g, gr_intr_r(),
5282 gr_intr_class_error_reset_f());
5283 gr_intr &= ~gr_intr_class_error_pending_f();
5284 }
5285
5286 /* this one happens if someone tries to hit a non-whitelisted
5287 * register using set_falcon[4] */
5288 if (gr_intr & gr_intr_firmware_method_pending_f()) {
5289 need_reset |= true;
5290 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "firmware method intr pending\n");
5291 gk20a_writel(g, gr_intr_r(),
5292 gr_intr_firmware_method_reset_f());
5293 gr_intr &= ~gr_intr_firmware_method_pending_f();
5294 }
5295
5296 if (gr_intr & gr_intr_exception_pending_f()) {
5297 u32 exception = gk20a_readl(g, gr_exception_r());
5298 struct fifo_gk20a *f = &g->fifo;
5299 struct channel_gk20a *ch = &f->channel[isr_data.chid];
5300
5301 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception);
5302
5303 if (exception & gr_exception_fe_m()) {
5304 u32 fe = gk20a_readl(g, gr_fe_hww_esr_r());
5305 gk20a_dbg(gpu_dbg_intr, "fe warning %08x\n", fe);
5306 gk20a_writel(g, gr_fe_hww_esr_r(), fe);
5307 }
5308
5309 /* check if a gpc exception has occurred */
5310 if (exception & gr_exception_gpc_m() && need_reset == 0) {
5311 u32 exception1 = gk20a_readl(g, gr_exception1_r());
5312 u32 global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r());
5313
5314 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "GPC exception pending");
5315
5316 /* if no sm debugger is present, clean up the channel */
5317 if (!gk20a_gr_sm_debugger_attached(g)) {
5318 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
5319 "SM debugger not attached, clearing interrupt");
5320 need_reset |= -EFAULT;
5321 } else {
5322 /* check if gpc 0 has an exception */
5323 if (exception1 & gr_exception1_gpc_0_pending_f())
5324 need_reset |= gk20a_gr_handle_gpc_exception(g, &isr_data);
5325 /* clear the hwws, also causes tpc and gpc
5326 * exceptions to be cleared */
5327 gk20a_gr_clear_sm_hww(g, global_esr);
5328 }
5329
5330 if (need_reset)
5331 gk20a_set_error_notifier(ch,
5332 NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY);
5333 }
5334
5335 gk20a_writel(g, gr_intr_r(), gr_intr_exception_reset_f());
5336 gr_intr &= ~gr_intr_exception_pending_f();
5337 }
5338
5339 if (need_reset)
5340 gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), true);
5341
5342clean_up:
5343 gk20a_writel(g, gr_gpfifo_ctl_r(),
5344 grfifo_ctl | gr_gpfifo_ctl_access_f(1) |
5345 gr_gpfifo_ctl_semaphore_access_f(1));
5346
5347 if (gr_intr)
5348 gk20a_err(dev_from_gk20a(g),
5349 "unhandled gr interrupt 0x%08x", gr_intr);
5350
5351 return 0;
5352}
5353
5354int gk20a_gr_nonstall_isr(struct gk20a *g)
5355{
5356 u32 gr_intr = gk20a_readl(g, gr_intr_nonstall_r());
5357 u32 clear_intr = 0;
5358
5359 gk20a_dbg(gpu_dbg_intr, "pgraph nonstall intr %08x", gr_intr);
5360
5361 if (gr_intr & gr_intr_nonstall_trap_pending_f()) {
5362 gk20a_channel_semaphore_wakeup(g);
5363 clear_intr |= gr_intr_nonstall_trap_pending_f();
5364 }
5365
5366 gk20a_writel(g, gr_intr_nonstall_r(), clear_intr);
5367
5368 return 0;
5369}
5370
5371int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size)
5372{
5373 BUG_ON(size == NULL);
5374 return gr_gk20a_submit_fecs_method_op(g,
5375 (struct fecs_method_op_gk20a) {
5376 .mailbox.id = 0,
5377 .mailbox.data = 0,
5378 .mailbox.clr = ~0,
5379 .method.data = 1,
5380 .method.addr = gr_fecs_method_push_adr_discover_reglist_image_size_v(),
5381 .mailbox.ret = size,
5382 .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
5383 .mailbox.ok = 0,
5384 .cond.fail = GR_IS_UCODE_OP_SKIP,
5385 .mailbox.fail = 0});
5386}
5387
5388int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr)
5389{
5390 return gr_gk20a_submit_fecs_method_op(g,
5391 (struct fecs_method_op_gk20a){
5392 .mailbox.id = 4,
5393 .mailbox.data = (gr_fecs_current_ctx_ptr_f(addr >> 12) |
5394 gr_fecs_current_ctx_valid_f(1) |
5395 gr_fecs_current_ctx_target_vid_mem_f()),
5396 .mailbox.clr = ~0,
5397 .method.data = 1,
5398 .method.addr = gr_fecs_method_push_adr_set_reglist_bind_instance_v(),
5399 .mailbox.ret = NULL,
5400 .cond.ok = GR_IS_UCODE_OP_EQUAL,
5401 .mailbox.ok = 1,
5402 .cond.fail = GR_IS_UCODE_OP_SKIP,
5403 .mailbox.fail = 0});
5404}
5405
5406int gr_gk20a_fecs_set_reglist_virual_addr(struct gk20a *g, u64 pmu_va)
5407{
5408 return gr_gk20a_submit_fecs_method_op(g,
5409 (struct fecs_method_op_gk20a) {
5410 .mailbox.id = 4,
5411 .mailbox.data = u64_lo32(pmu_va >> 8),
5412 .mailbox.clr = ~0,
5413 .method.data = 1,
5414 .method.addr = gr_fecs_method_push_adr_set_reglist_virtual_address_v(),
5415 .mailbox.ret = NULL,
5416 .cond.ok = GR_IS_UCODE_OP_EQUAL,
5417 .mailbox.ok = 1,
5418 .cond.fail = GR_IS_UCODE_OP_SKIP,
5419 .mailbox.fail = 0});
5420}
5421
5422int gk20a_gr_suspend(struct gk20a *g)
5423{
5424 unsigned long end_jiffies = jiffies +
5425 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
5426 u32 ret = 0;
5427
5428 gk20a_dbg_fn("");
5429
5430 ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
5431 if (ret)
5432 return ret;
5433
5434 gk20a_writel(g, gr_gpfifo_ctl_r(),
5435 gr_gpfifo_ctl_access_disabled_f());
5436
5437 /* disable gr intr */
5438 gk20a_writel(g, gr_intr_r(), 0);
5439 gk20a_writel(g, gr_intr_en_r(), 0);
5440
5441 /* disable all exceptions */
5442 gk20a_writel(g, gr_exception_r(), 0);
5443 gk20a_writel(g, gr_exception_en_r(), 0);
5444 gk20a_writel(g, gr_exception1_r(), 0);
5445 gk20a_writel(g, gr_exception1_en_r(), 0);
5446 gk20a_writel(g, gr_exception2_r(), 0);
5447 gk20a_writel(g, gr_exception2_en_r(), 0);
5448
5449 gk20a_gr_flush_channel_tlb(&g->gr);
5450
5451 gk20a_dbg_fn("done");
5452 return ret;
5453}
5454
5455static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
5456 u32 addr,
5457 bool is_quad, u32 quad,
5458 u32 *context_buffer,
5459 u32 context_buffer_size,
5460 u32 *priv_offset);
5461
5462/* This function will decode a priv address and return the partition type and numbers. */
5463int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
5464 int *addr_type, /* enum ctxsw_addr_type */
5465 u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num,
5466 u32 *broadcast_flags)
5467{
5468 u32 gpc_addr;
5469 u32 ppc_address;
5470 u32 ppc_broadcast_addr;
5471
5472 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
5473
5474 /* setup defaults */
5475 ppc_address = 0;
5476 ppc_broadcast_addr = 0;
5477 *addr_type = CTXSW_ADDR_TYPE_SYS;
5478 *broadcast_flags = PRI_BROADCAST_FLAGS_NONE;
5479 *gpc_num = 0;
5480 *tpc_num = 0;
5481 *ppc_num = 0;
5482 *be_num = 0;
5483
5484 if (pri_is_gpc_addr(addr)) {
5485 *addr_type = CTXSW_ADDR_TYPE_GPC;
5486 gpc_addr = pri_gpccs_addr_mask(addr);
5487 if (pri_is_gpc_addr_shared(addr)) {
5488 *addr_type = CTXSW_ADDR_TYPE_GPC;
5489 *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC;
5490 } else
5491 *gpc_num = pri_get_gpc_num(addr);
5492
5493 if (pri_is_tpc_addr(gpc_addr)) {
5494 *addr_type = CTXSW_ADDR_TYPE_TPC;
5495 if (pri_is_tpc_addr_shared(gpc_addr)) {
5496 *broadcast_flags |= PRI_BROADCAST_FLAGS_TPC;
5497 return 0;
5498 }
5499 *tpc_num = pri_get_tpc_num(gpc_addr);
5500 }
5501 return 0;
5502 } else if (pri_is_be_addr(addr)) {
5503 *addr_type = CTXSW_ADDR_TYPE_BE;
5504 if (pri_is_be_addr_shared(addr)) {
5505 *broadcast_flags |= PRI_BROADCAST_FLAGS_BE;
5506 return 0;
5507 }
5508 *be_num = pri_get_be_num(addr);
5509 return 0;
5510 } else {
5511 *addr_type = CTXSW_ADDR_TYPE_SYS;
5512 return 0;
5513 }
5514 /* PPC!?!?!?! */
5515
5516 /*NOTREACHED*/
5517 return -EINVAL;
5518}
5519
5520static int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr,
5521 u32 gpc_num,
5522 u32 *priv_addr_table, u32 *t)
5523{
5524 u32 ppc_num;
5525
5526 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
5527
5528 for (ppc_num = 0; ppc_num < g->gr.pe_count_per_gpc; ppc_num++)
5529 priv_addr_table[(*t)++] = pri_ppc_addr(pri_ppccs_addr_mask(addr),
5530 gpc_num, ppc_num);
5531
5532 return 0;
5533}
5534
5535/*
5536 * The context buffer is indexed using BE broadcast addresses and GPC/TPC
5537 * unicast addresses. This function will convert a BE unicast address to a BE
5538 * broadcast address and split a GPC/TPC broadcast address into a table of
5539 * GPC/TPC addresses. The addresses generated by this function can be
5540 * successfully processed by gr_gk20a_find_priv_offset_in_buffer
5541 */
5542static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
5543 u32 addr,
5544 u32 *priv_addr_table,
5545 u32 *num_registers)
5546{
5547 int addr_type; /*enum ctxsw_addr_type */
5548 u32 gpc_num, tpc_num, ppc_num, be_num;
5549 u32 broadcast_flags;
5550 u32 t;
5551 int err;
5552
5553 t = 0;
5554 *num_registers = 0;
5555
5556 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
5557
5558 err = gr_gk20a_decode_priv_addr(g, addr, &addr_type,
5559 &gpc_num, &tpc_num, &ppc_num, &be_num,
5560 &broadcast_flags);
5561 gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type = %d", addr_type);
5562 if (err)
5563 return err;
5564
5565 if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
5566 (addr_type == CTXSW_ADDR_TYPE_BE)) {
5567 /* The BE broadcast registers are included in the compressed PRI
5568 * table. Convert a BE unicast address to a broadcast address
5569 * so that we can look up the offset. */
5570 if ((addr_type == CTXSW_ADDR_TYPE_BE) &&
5571 !(broadcast_flags & PRI_BROADCAST_FLAGS_BE))
5572 priv_addr_table[t++] = pri_be_shared_addr(addr);
5573 else
5574 priv_addr_table[t++] = addr;
5575
5576 *num_registers = t;
5577 return 0;
5578 }
5579
5580 /* The GPC/TPC unicast registers are included in the compressed PRI
5581 * tables. Convert a GPC/TPC broadcast address to unicast addresses so
5582 * that we can look up the offsets. */
5583 if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) {
5584 for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) {
5585
5586 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC)
5587 for (tpc_num = 0;
5588 tpc_num < g->gr.gpc_tpc_count[gpc_num];
5589 tpc_num++)
5590 priv_addr_table[t++] =
5591 pri_tpc_addr(pri_tpccs_addr_mask(addr),
5592 gpc_num, tpc_num);
5593
5594 else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) {
5595 err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num,
5596 priv_addr_table, &t);
5597 if (err)
5598 return err;
5599 } else
5600 priv_addr_table[t++] =
5601 pri_gpc_addr(pri_gpccs_addr_mask(addr),
5602 gpc_num);
5603 }
5604 } else {
5605 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC)
5606 for (tpc_num = 0;
5607 tpc_num < g->gr.gpc_tpc_count[gpc_num];
5608 tpc_num++)
5609 priv_addr_table[t++] =
5610 pri_tpc_addr(pri_tpccs_addr_mask(addr),
5611 gpc_num, tpc_num);
5612 else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC)
5613 err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num,
5614 priv_addr_table, &t);
5615 else
5616 priv_addr_table[t++] = addr;
5617 }
5618
5619 *num_registers = t;
5620 return 0;
5621}
5622
5623int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
5624 u32 addr,
5625 u32 max_offsets,
5626 u32 *offsets, u32 *offset_addrs,
5627 u32 *num_offsets,
5628 bool is_quad, u32 quad)
5629{
5630 u32 i;
5631 u32 priv_offset = 0;
5632 u32 *priv_registers;
5633 u32 num_registers = 0;
5634 int err = 0;
5635 u32 potential_offsets = proj_scal_litter_num_gpcs_v() *
5636 proj_scal_litter_num_tpc_per_gpc_v();
5637
5638 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
5639
5640 /* implementation is crossed-up if either of these happen */
5641 if (max_offsets > potential_offsets)
5642 return -EINVAL;
5643
5644 if (!g->gr.ctx_vars.golden_image_initialized)
5645 return -ENODEV;
5646
5647 priv_registers = kzalloc(sizeof(u32) * potential_offsets, GFP_KERNEL);
5648 if (IS_ERR_OR_NULL(priv_registers)) {
5649 gk20a_dbg_fn("failed alloc for potential_offsets=%d", potential_offsets);
5650 err = PTR_ERR(priv_registers);
5651 goto cleanup;
5652 }
5653 memset(offsets, 0, sizeof(u32) * max_offsets);
5654 memset(offset_addrs, 0, sizeof(u32) * max_offsets);
5655 *num_offsets = 0;
5656
5657 gr_gk20a_create_priv_addr_table(g, addr, &priv_registers[0], &num_registers);
5658
5659 if ((max_offsets > 1) && (num_registers > max_offsets)) {
5660 err = -EINVAL;
5661 goto cleanup;
5662 }
5663
5664 if ((max_offsets == 1) && (num_registers > 1))
5665 num_registers = 1;
5666
5667 if (!g->gr.ctx_vars.local_golden_image) {
5668 gk20a_dbg_fn("no context switch header info to work with");
5669 err = -EINVAL;
5670 goto cleanup;
5671 }
5672
5673 for (i = 0; i < num_registers; i++) {
5674 err = gr_gk20a_find_priv_offset_in_buffer(g,
5675 priv_registers[i],
5676 is_quad, quad,
5677 g->gr.ctx_vars.local_golden_image,
5678 g->gr.ctx_vars.golden_image_size,
5679 &priv_offset);
5680 if (err) {
5681 gk20a_dbg_fn("Could not determine priv_offset for addr:0x%x",
5682 addr); /*, grPriRegStr(addr)));*/
5683 goto cleanup;
5684 }
5685
5686 offsets[i] = priv_offset;
5687 offset_addrs[i] = priv_registers[i];
5688 }
5689
5690 *num_offsets = num_registers;
5691
5692 cleanup:
5693
5694 if (!IS_ERR_OR_NULL(priv_registers))
5695 kfree(priv_registers);
5696
5697 return err;
5698}
5699
5700/* Setup some register tables. This looks hacky; our
5701 * register/offset functions are just that, functions.
5702 * So they can't be used as initializers... TBD: fix to
5703 * generate consts at least on an as-needed basis.
5704 */
5705static const u32 _num_ovr_perf_regs = 17;
5706static u32 _ovr_perf_regs[17] = { 0, };
5707/* Following are the blocks of registers that the ucode
5708 stores in the extended region.*/
5709/* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */
5710static const u32 _num_sm_dsm_perf_regs = 5;
5711/* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/
5712static const u32 _num_sm_dsm_perf_ctrl_regs = 4;
5713static u32 _sm_dsm_perf_regs[5];
5714static u32 _sm_dsm_perf_ctrl_regs[4];
5715
5716static void init_sm_dsm_reg_info(void)
5717{
5718 if (_ovr_perf_regs[0] != 0)
5719 return;
5720
5721 _ovr_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r();
5722 _ovr_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r();
5723 _ovr_perf_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r();
5724 _ovr_perf_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r();
5725 _ovr_perf_regs[4] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status1_r();
5726 _ovr_perf_regs[5] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_control_r();
5727 _ovr_perf_regs[6] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_control_r();
5728 _ovr_perf_regs[7] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_control_r();
5729 _ovr_perf_regs[8] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_control_r();
5730 _ovr_perf_regs[9] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_control_r();
5731 _ovr_perf_regs[10] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_control_r();
5732 _ovr_perf_regs[11] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_control_r();
5733 _ovr_perf_regs[12] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_control_r();
5734 _ovr_perf_regs[13] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_r();
5735 _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r();
5736 _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r();
5737 _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r();
5738
5739
5740 _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r();
5741 _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r();
5742 _sm_dsm_perf_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_r();
5743 _sm_dsm_perf_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_r();
5744 _sm_dsm_perf_regs[4] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_r();
5745
5746 _sm_dsm_perf_ctrl_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control1_r();
5747 _sm_dsm_perf_ctrl_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control2_r();
5748 _sm_dsm_perf_ctrl_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control3_r();
5749 _sm_dsm_perf_ctrl_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control4_r();
5750
5751}
5752
5753/* TBD: would like to handle this elsewhere, at a higher level.
5754 * these are currently constructed in a "test-then-write" style
5755 * which makes it impossible to know externally whether a ctx
5756 * write will actually occur. so later we should put a lazy,
5757 * map-and-hold system in the patch write state */
5758int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
5759 struct channel_ctx_gk20a *ch_ctx,
5760 u32 addr, u32 data,
5761 u8 *context)
5762{
5763 u32 num_gpc = g->gr.gpc_count;
5764 u32 num_tpc;
5765 u32 tpc, gpc, reg;
5766 u32 chk_addr;
5767 u32 vaddr_lo;
5768 u32 vaddr_hi;
5769 u32 tmp;
5770
5771 init_sm_dsm_reg_info();
5772
5773 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
5774
5775 for (reg = 0; reg < _num_ovr_perf_regs; reg++) {
5776 for (gpc = 0; gpc < num_gpc; gpc++) {
5777 num_tpc = g->gr.gpc_tpc_count[gpc];
5778 for (tpc = 0; tpc < num_tpc; tpc++) {
5779 chk_addr = ((proj_gpc_stride_v() * gpc) +
5780 (proj_tpc_in_gpc_stride_v() * tpc) +
5781 _ovr_perf_regs[reg]);
5782 if (chk_addr != addr)
5783 continue;
5784 /* reset the patch count from previous
5785 runs,if ucode has already processed
5786 it */
5787 tmp = gk20a_mem_rd32(context +
5788 ctxsw_prog_main_image_patch_count_o(), 0);
5789
5790 if (!tmp)
5791 ch_ctx->patch_ctx.data_count = 0;
5792
5793 gr_gk20a_ctx_patch_write(g, ch_ctx,
5794 addr, data, true);
5795
5796 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va);
5797 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va);
5798
5799 gk20a_mem_wr32(context +
5800 ctxsw_prog_main_image_patch_count_o(),
5801 0, ch_ctx->patch_ctx.data_count);
5802 gk20a_mem_wr32(context +
5803 ctxsw_prog_main_image_patch_adr_lo_o(),
5804 0, vaddr_lo);
5805 gk20a_mem_wr32(context +
5806 ctxsw_prog_main_image_patch_adr_hi_o(),
5807 0, vaddr_hi);
5808
5809 /* we're not caching these on cpu side,
5810 but later watch for it */
5811
5812 /* the l2 invalidate in the patch_write
5813 * would be too early for this? */
5814 gk20a_mm_l2_invalidate(g);
5815 return 0;
5816 }
5817 }
5818 }
5819
5820 return 0;
5821}
5822
5823static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset)
5824{
5825 u32 reg;
5826 u32 quad_ctrl;
5827 u32 half_ctrl;
5828 u32 tpc, gpc;
5829 u32 gpc_tpc_addr;
5830 u32 gpc_tpc_stride;
5831
5832 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset);
5833
5834 gpc = pri_get_gpc_num(offset);
5835 gpc_tpc_addr = pri_gpccs_addr_mask(offset);
5836 tpc = pri_get_tpc_num(gpc_tpc_addr);
5837
5838 quad_ctrl = quad & 0x1; /* first bit tells us quad */
5839 half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */
5840
5841 gpc_tpc_stride = gpc * proj_gpc_stride_v() +
5842 tpc * proj_tpc_in_gpc_stride_v();
5843 gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride;
5844
5845 reg = gk20a_readl(g, gpc_tpc_addr);
5846 reg = set_field(reg,
5847 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(),
5848 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(quad_ctrl));
5849
5850 gk20a_writel(g, gpc_tpc_addr, reg);
5851
5852 gpc_tpc_addr = gr_gpc0_tpc0_sm_debug_sfe_control_r() + gpc_tpc_stride;
5853 reg = gk20a_readl(g, gpc_tpc_addr);
5854 reg = set_field(reg,
5855 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(),
5856 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(half_ctrl));
5857 gk20a_writel(g, gpc_tpc_addr, reg);
5858}
5859
5860#define ILLEGAL_ID (~0)
5861
5862static inline bool check_main_image_header_magic(void *context)
5863{
5864 u32 magic = gk20a_mem_rd32(context +
5865 ctxsw_prog_main_image_magic_value_o(), 0);
5866 gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic);
5867 return magic == ctxsw_prog_main_image_magic_value_v_value_v();
5868}
5869static inline bool check_local_header_magic(void *context)
5870{
5871 u32 magic = gk20a_mem_rd32(context +
5872 ctxsw_prog_local_magic_value_o(), 0);
5873 gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic);
5874 return magic == ctxsw_prog_local_magic_value_v_value_v();
5875
5876}
5877
5878/* most likely dupe of ctxsw_gpccs_header__size_1_v() */
5879static inline int ctxsw_prog_ucode_header_size_in_bytes(void)
5880{
5881 return 256;
5882}
5883
5884void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g,
5885 u32 *num_sm_dsm_perf_regs,
5886 u32 **sm_dsm_perf_regs,
5887 u32 *perf_register_stride)
5888{
5889 *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs;
5890 *sm_dsm_perf_regs = _sm_dsm_perf_regs;
5891 *perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v();
5892}
5893
5894void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
5895 u32 *num_sm_dsm_perf_ctrl_regs,
5896 u32 **sm_dsm_perf_ctrl_regs,
5897 u32 *ctrl_register_stride)
5898{
5899 *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs;
5900 *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs;
5901 *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
5902}
5903
5904static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
5905 u32 addr,
5906 bool is_quad, u32 quad,
5907 u32 *context_buffer,
5908 u32 context_buffer_size,
5909 u32 *priv_offset)
5910{
5911 u32 i, data32;
5912 u32 gpc_num, tpc_num;
5913 u32 num_gpcs, num_tpcs;
5914 u32 chk_addr;
5915 u32 ext_priv_offset, ext_priv_size;
5916 void *context;
5917 u32 offset_to_segment, offset_to_segment_end;
5918 u32 sm_dsm_perf_reg_id = ILLEGAL_ID;
5919 u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID;
5920 u32 num_ext_gpccs_ext_buffer_segments;
5921 u32 inter_seg_offset;
5922 u32 tpc_gpc_mask = (proj_tpc_in_gpc_stride_v() - 1);
5923 u32 max_tpc_count;
5924 u32 *sm_dsm_perf_ctrl_regs = NULL;
5925 u32 num_sm_dsm_perf_ctrl_regs = 0;
5926 u32 *sm_dsm_perf_regs = NULL;
5927 u32 num_sm_dsm_perf_regs = 0;
5928 u32 buffer_segments_size = 0;
5929 u32 marker_size = 0;
5930 u32 control_register_stride = 0;
5931 u32 perf_register_stride = 0;
5932
5933 /* Only have TPC registers in extended region, so if not a TPC reg,
5934 then return error so caller can look elsewhere. */
5935 if (pri_is_gpc_addr(addr)) {
5936 u32 gpc_addr = 0;
5937 gpc_num = pri_get_gpc_num(addr);
5938 gpc_addr = pri_gpccs_addr_mask(addr);
5939 if (pri_is_tpc_addr(gpc_addr))
5940 tpc_num = pri_get_tpc_num(gpc_addr);
5941 else
5942 return -EINVAL;
5943
5944 gk20a_dbg_info(" gpc = %d tpc = %d",
5945 gpc_num, tpc_num);
5946 } else
5947 return -EINVAL;
5948
5949 buffer_segments_size = ctxsw_prog_extended_buffer_segments_size_in_bytes_v();
5950 /* note below is in words/num_registers */
5951 marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2;
5952
5953 context = context_buffer;
5954 /* sanity check main header */
5955 if (!check_main_image_header_magic(context)) {
5956 gk20a_err(dev_from_gk20a(g),
5957 "Invalid main header: magic value");
5958 return -EINVAL;
5959 }
5960 num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0);
5961 if (gpc_num >= num_gpcs) {
5962 gk20a_err(dev_from_gk20a(g),
5963 "GPC 0x%08x is greater than total count 0x%08x!\n",
5964 gpc_num, num_gpcs);
5965 return -EINVAL;
5966 }
5967
5968 data32 = gk20a_mem_rd32(context + ctxsw_prog_main_extended_buffer_ctl_o(), 0);
5969 ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32);
5970 if (0 == ext_priv_size) {
5971 gk20a_dbg_info(" No extended memory in context buffer");
5972 return -EINVAL;
5973 }
5974 ext_priv_offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data32);
5975
5976 offset_to_segment = ext_priv_offset * ctxsw_prog_ucode_header_size_in_bytes();
5977 offset_to_segment_end = offset_to_segment +
5978 (ext_priv_size * buffer_segments_size);
5979
5980 /* check local header magic */
5981 context += ctxsw_prog_ucode_header_size_in_bytes();
5982 if (!check_local_header_magic(context)) {
5983 gk20a_err(dev_from_gk20a(g),
5984 "Invalid local header: magic value\n");
5985 return -EINVAL;
5986 }
5987
5988 /*
5989 * See if the incoming register address is in the first table of
5990 * registers. We check this by decoding only the TPC addr portion.
5991 * If we get a hit on the TPC bit, we then double check the address
5992 * by computing it from the base gpc/tpc strides. Then make sure
5993 * it is a real match.
5994 */
5995 g->ops.gr.get_sm_dsm_perf_regs(g, &num_sm_dsm_perf_regs,
5996 &sm_dsm_perf_regs,
5997 &perf_register_stride);
5998
5999 init_sm_dsm_reg_info();
6000
6001 for (i = 0; i < num_sm_dsm_perf_regs; i++) {
6002 if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) {
6003 sm_dsm_perf_reg_id = i;
6004
6005 gk20a_dbg_info("register match: 0x%08x",
6006 sm_dsm_perf_regs[i]);
6007
6008 chk_addr = (proj_gpc_base_v() +
6009 (proj_gpc_stride_v() * gpc_num) +
6010 proj_tpc_in_gpc_base_v() +
6011 (proj_tpc_in_gpc_stride_v() * tpc_num) +
6012 (sm_dsm_perf_regs[sm_dsm_perf_reg_id] & tpc_gpc_mask));
6013
6014 if (chk_addr != addr) {
6015 gk20a_err(dev_from_gk20a(g),
6016 "Oops addr miss-match! : 0x%08x != 0x%08x\n",
6017 addr, chk_addr);
6018 return -EINVAL;
6019 }
6020 break;
6021 }
6022 }
6023
6024 /* Didn't find reg in supported group 1.
6025 * so try the second group now */
6026 g->ops.gr.get_sm_dsm_perf_ctrl_regs(g, &num_sm_dsm_perf_ctrl_regs,
6027 &sm_dsm_perf_ctrl_regs,
6028 &control_register_stride);
6029
6030 if (ILLEGAL_ID == sm_dsm_perf_reg_id) {
6031 for (i = 0; i < num_sm_dsm_perf_ctrl_regs; i++) {
6032 if ((addr & tpc_gpc_mask) ==
6033 (sm_dsm_perf_ctrl_regs[i] & tpc_gpc_mask)) {
6034 sm_dsm_perf_ctrl_reg_id = i;
6035
6036 gk20a_dbg_info("register match: 0x%08x",
6037 sm_dsm_perf_ctrl_regs[i]);
6038
6039 chk_addr = (proj_gpc_base_v() +
6040 (proj_gpc_stride_v() * gpc_num) +
6041 proj_tpc_in_gpc_base_v() +
6042 (proj_tpc_in_gpc_stride_v() * tpc_num) +
6043 (sm_dsm_perf_ctrl_regs[sm_dsm_perf_ctrl_reg_id] &
6044 tpc_gpc_mask));
6045
6046 if (chk_addr != addr) {
6047 gk20a_err(dev_from_gk20a(g),
6048 "Oops addr miss-match! : 0x%08x != 0x%08x\n",
6049 addr, chk_addr);
6050 return -EINVAL;
6051
6052 }
6053
6054 break;
6055 }
6056 }
6057 }
6058
6059 if ((ILLEGAL_ID == sm_dsm_perf_ctrl_reg_id) &&
6060 (ILLEGAL_ID == sm_dsm_perf_reg_id))
6061 return -EINVAL;
6062
6063 /* Skip the FECS extended header, nothing there for us now. */
6064 offset_to_segment += buffer_segments_size;
6065
6066 /* skip through the GPCCS extended headers until we get to the data for
6067 * our GPC. The size of each gpc extended segment is enough to hold the
6068 * max tpc count for the gpcs,in 256b chunks.
6069 */
6070
6071 max_tpc_count = proj_scal_litter_num_tpc_per_gpc_v();
6072
6073 num_ext_gpccs_ext_buffer_segments = (u32)((max_tpc_count + 1) / 2);
6074
6075 offset_to_segment += (num_ext_gpccs_ext_buffer_segments *
6076 buffer_segments_size * gpc_num);
6077
6078 num_tpcs = g->gr.gpc_tpc_count[gpc_num];
6079
6080 /* skip the head marker to start with */
6081 inter_seg_offset = marker_size;
6082
6083 if (ILLEGAL_ID != sm_dsm_perf_ctrl_reg_id) {
6084 /* skip over control regs of TPC's before the one we want.
6085 * then skip to the register in this tpc */
6086 inter_seg_offset = inter_seg_offset +
6087 (tpc_num * control_register_stride) +
6088 sm_dsm_perf_ctrl_reg_id;
6089 } else {
6090 /* skip all the control registers */
6091 inter_seg_offset = inter_seg_offset +
6092 (num_tpcs * control_register_stride);
6093
6094 /* skip the marker between control and counter segments */
6095 inter_seg_offset += marker_size;
6096
6097 /* skip over counter regs of TPCs before the one we want */
6098 inter_seg_offset = inter_seg_offset +
6099 (tpc_num * perf_register_stride) *
6100 ctxsw_prog_extended_num_smpc_quadrants_v();
6101
6102 /* skip over the register for the quadrants we do not want.
6103 * then skip to the register in this tpc */
6104 inter_seg_offset = inter_seg_offset +
6105 (perf_register_stride * quad) +
6106 sm_dsm_perf_reg_id;
6107 }
6108
6109 /* set the offset to the segment offset plus the inter segment offset to
6110 * our register */
6111 offset_to_segment += (inter_seg_offset * 4);
6112
6113 /* last sanity check: did we somehow compute an offset outside the
6114 * extended buffer? */
6115 if (offset_to_segment > offset_to_segment_end) {
6116 gk20a_err(dev_from_gk20a(g),
6117 "Overflow ctxsw buffer! 0x%08x > 0x%08x\n",
6118 offset_to_segment, offset_to_segment_end);
6119 return -EINVAL;
6120 }
6121
6122 *priv_offset = offset_to_segment;
6123
6124 return 0;
6125}
6126
6127
6128static int
6129gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
6130 int addr_type,/* enum ctxsw_addr_type */
6131 u32 pri_addr,
6132 u32 gpc_num, u32 num_tpcs,
6133 u32 num_ppcs, u32 ppc_mask,
6134 u32 *priv_offset)
6135{
6136 u32 i;
6137 u32 address, base_address;
6138 u32 sys_offset, gpc_offset, tpc_offset, ppc_offset;
6139 u32 ppc_num, tpc_num, tpc_addr, gpc_addr, ppc_addr;
6140 struct aiv_gk20a *reg;
6141
6142 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr);
6143
6144 if (!g->gr.ctx_vars.valid)
6145 return -EINVAL;
6146
6147 /* Process the SYS/BE segment. */
6148 if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
6149 (addr_type == CTXSW_ADDR_TYPE_BE)) {
6150 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) {
6151 reg = &g->gr.ctx_vars.ctxsw_regs.sys.l[i];
6152 address = reg->addr;
6153 sys_offset = reg->index;
6154
6155 if (pri_addr == address) {
6156 *priv_offset = sys_offset;
6157 return 0;
6158 }
6159 }
6160 }
6161
6162 /* Process the TPC segment. */
6163 if (addr_type == CTXSW_ADDR_TYPE_TPC) {
6164 for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) {
6165 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) {
6166 reg = &g->gr.ctx_vars.ctxsw_regs.tpc.l[i];
6167 address = reg->addr;
6168 tpc_addr = pri_tpccs_addr_mask(address);
6169 base_address = proj_gpc_base_v() +
6170 (gpc_num * proj_gpc_stride_v()) +
6171 proj_tpc_in_gpc_base_v() +
6172 (tpc_num * proj_tpc_in_gpc_stride_v());
6173 address = base_address + tpc_addr;
6174 /*
6175 * The data for the TPCs is interleaved in the context buffer.
6176 * Example with num_tpcs = 2
6177 * 0 1 2 3 4 5 6 7 8 9 10 11 ...
6178 * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
6179 */
6180 tpc_offset = (reg->index * num_tpcs) + (tpc_num * 4);
6181
6182 if (pri_addr == address) {
6183 *priv_offset = tpc_offset;
6184 return 0;
6185 }
6186 }
6187 }
6188 }
6189
6190 /* Process the PPC segment. */
6191 if (addr_type == CTXSW_ADDR_TYPE_PPC) {
6192 for (ppc_num = 0; ppc_num < num_ppcs; ppc_num++) {
6193 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) {
6194 reg = &g->gr.ctx_vars.ctxsw_regs.ppc.l[i];
6195 address = reg->addr;
6196 ppc_addr = pri_ppccs_addr_mask(address);
6197 base_address = proj_gpc_base_v() +
6198 (gpc_num * proj_gpc_stride_v()) +
6199 proj_ppc_in_gpc_base_v() +
6200 (ppc_num * proj_ppc_in_gpc_stride_v());
6201 address = base_address + ppc_addr;
6202 /*
6203 * The data for the PPCs is interleaved in the context buffer.
6204 * Example with numPpcs = 2
6205 * 0 1 2 3 4 5 6 7 8 9 10 11 ...
6206 * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
6207 */
6208 ppc_offset = (reg->index * num_ppcs) + (ppc_num * 4);
6209
6210 if (pri_addr == address) {
6211 *priv_offset = ppc_offset;
6212 return 0;
6213 }
6214 }
6215 }
6216 }
6217
6218
6219 /* Process the GPC segment. */
6220 if (addr_type == CTXSW_ADDR_TYPE_GPC) {
6221 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) {
6222 reg = &g->gr.ctx_vars.ctxsw_regs.gpc.l[i];
6223
6224 address = reg->addr;
6225 gpc_addr = pri_gpccs_addr_mask(address);
6226 gpc_offset = reg->index;
6227
6228 base_address = proj_gpc_base_v() +
6229 (gpc_num * proj_gpc_stride_v());
6230 address = base_address + gpc_addr;
6231
6232 if (pri_addr == address) {
6233 *priv_offset = gpc_offset;
6234 return 0;
6235 }
6236 }
6237 }
6238
6239 return -EINVAL;
6240}
6241
6242static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
6243 void *context,
6244 u32 *num_ppcs, u32 *ppc_mask,
6245 u32 *reg_ppc_count)
6246{
6247 u32 data32;
6248 u32 litter_num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
6249
6250 /*
6251 * if there is only 1 PES_PER_GPC, then we put the PES registers
6252 * in the GPC reglist, so we can't error out if ppc.count == 0
6253 */
6254 if ((!g->gr.ctx_vars.valid) ||
6255 ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) &&
6256 (litter_num_pes_per_gpc > 1)))
6257 return -EINVAL;
6258
6259 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0);
6260
6261 *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32);
6262 *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32);
6263
6264 *reg_ppc_count = g->gr.ctx_vars.ctxsw_regs.ppc.count;
6265
6266 return 0;
6267}
6268
6269
6270
6271/*
6272 * This function will return the 32 bit offset for a priv register if it is
6273 * present in the context buffer.
6274 */
6275static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
6276 u32 addr,
6277 bool is_quad, u32 quad,
6278 u32 *context_buffer,
6279 u32 context_buffer_size,
6280 u32 *priv_offset)
6281{
6282 struct gr_gk20a *gr = &g->gr;
6283 u32 i, data32;
6284 int err;
6285 int addr_type; /*enum ctxsw_addr_type */
6286 u32 broadcast_flags;
6287 u32 gpc_num, tpc_num, ppc_num, be_num;
6288 u32 num_gpcs, num_tpcs, num_ppcs;
6289 u32 offset;
6290 u32 sys_priv_offset, gpc_priv_offset;
6291 u32 ppc_mask, reg_list_ppc_count;
6292 void *context;
6293 u32 offset_to_segment;
6294
6295 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6296
6297 err = gr_gk20a_decode_priv_addr(g, addr, &addr_type,
6298 &gpc_num, &tpc_num, &ppc_num, &be_num,
6299 &broadcast_flags);
6300 if (err)
6301 return err;
6302
6303 context = context_buffer;
6304 if (!check_main_image_header_magic(context)) {
6305 gk20a_err(dev_from_gk20a(g),
6306 "Invalid main header: magic value");
6307 return -EINVAL;
6308 }
6309 num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0);
6310
6311 /* Parse the FECS local header. */
6312 context += ctxsw_prog_ucode_header_size_in_bytes();
6313 if (!check_local_header_magic(context)) {
6314 gk20a_err(dev_from_gk20a(g),
6315 "Invalid FECS local header: magic value\n");
6316 return -EINVAL;
6317 }
6318 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0);
6319 sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
6320
6321 /* If found in Ext buffer, ok.
6322 * If it failed and we expected to find it there (quad offset)
6323 * then return the error. Otherwise continue on.
6324 */
6325 err = gr_gk20a_find_priv_offset_in_ext_buffer(g,
6326 addr, is_quad, quad, context_buffer,
6327 context_buffer_size, priv_offset);
6328 if (!err || (err && is_quad))
6329 return err;
6330
6331 if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
6332 (addr_type == CTXSW_ADDR_TYPE_BE)) {
6333 /* Find the offset in the FECS segment. */
6334 offset_to_segment = sys_priv_offset *
6335 ctxsw_prog_ucode_header_size_in_bytes();
6336
6337 err = gr_gk20a_process_context_buffer_priv_segment(g,
6338 addr_type, addr,
6339 0, 0, 0, 0,
6340 &offset);
6341 if (err)
6342 return err;
6343
6344 *priv_offset = (offset_to_segment + offset);
6345 return 0;
6346 }
6347
6348 if ((gpc_num + 1) > num_gpcs) {
6349 gk20a_err(dev_from_gk20a(g),
6350 "GPC %d not in this context buffer.\n",
6351 gpc_num);
6352 return -EINVAL;
6353 }
6354
6355 /* Parse the GPCCS local header(s).*/
6356 for (i = 0; i < num_gpcs; i++) {
6357 context += ctxsw_prog_ucode_header_size_in_bytes();
6358 if (!check_local_header_magic(context)) {
6359 gk20a_err(dev_from_gk20a(g),
6360 "Invalid GPCCS local header: magic value\n");
6361 return -EINVAL;
6362
6363 }
6364 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0);
6365 gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
6366
6367 err = gr_gk20a_determine_ppc_configuration(g, context,
6368 &num_ppcs, &ppc_mask,
6369 &reg_list_ppc_count);
6370 if (err)
6371 return err;
6372
6373 num_tpcs = gk20a_mem_rd32(context + ctxsw_prog_local_image_num_tpcs_o(), 0);
6374
6375 if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) {
6376 gk20a_err(dev_from_gk20a(g),
6377 "GPC %d TPC %d not in this context buffer.\n",
6378 gpc_num, tpc_num);
6379 return -EINVAL;
6380 }
6381
6382 /* Find the offset in the GPCCS segment.*/
6383 if (i == gpc_num) {
6384 offset_to_segment = gpc_priv_offset *
6385 ctxsw_prog_ucode_header_size_in_bytes();
6386
6387 if (addr_type == CTXSW_ADDR_TYPE_TPC) {
6388 /*reg = gr->ctx_vars.ctxsw_regs.tpc.l;*/
6389 } else if (addr_type == CTXSW_ADDR_TYPE_PPC) {
6390 /* The ucode stores TPC data before PPC data.
6391 * Advance offset past TPC data to PPC data. */
6392 offset_to_segment +=
6393 ((gr->ctx_vars.ctxsw_regs.tpc.count *
6394 num_tpcs) << 2);
6395 } else if (addr_type == CTXSW_ADDR_TYPE_GPC) {
6396 /* The ucode stores TPC/PPC data before GPC data.
6397 * Advance offset past TPC/PPC data to GPC data. */
6398 /* note 1 PES_PER_GPC case */
6399 u32 litter_num_pes_per_gpc =
6400 proj_scal_litter_num_pes_per_gpc_v();
6401 if (litter_num_pes_per_gpc > 1) {
6402 offset_to_segment +=
6403 (((gr->ctx_vars.ctxsw_regs.tpc.count *
6404 num_tpcs) << 2) +
6405 ((reg_list_ppc_count * num_ppcs) << 2));
6406 } else {
6407 offset_to_segment +=
6408 ((gr->ctx_vars.ctxsw_regs.tpc.count *
6409 num_tpcs) << 2);
6410 }
6411 } else {
6412 gk20a_err(dev_from_gk20a(g),
6413 " Unknown address type.\n");
6414 return -EINVAL;
6415 }
6416 err = gr_gk20a_process_context_buffer_priv_segment(g,
6417 addr_type, addr,
6418 i, num_tpcs,
6419 num_ppcs, ppc_mask,
6420 &offset);
6421 if (err)
6422 return -EINVAL;
6423
6424 *priv_offset = offset_to_segment + offset;
6425 return 0;
6426 }
6427 }
6428
6429 return -EINVAL;
6430}
6431
6432
6433int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
6434 struct nvhost_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
6435 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)
6436{
6437 struct gk20a *g = ch->g;
6438 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
6439 void *ctx_ptr = NULL;
6440 int curr_gr_chid, curr_gr_ctx;
6441 bool ch_is_curr_ctx, restart_gr_ctxsw = false;
6442 u32 i, j, offset, v;
6443 u32 max_offsets = proj_scal_litter_num_gpcs_v() *
6444 proj_scal_litter_num_tpc_per_gpc_v();
6445 u32 *offsets = NULL;
6446 u32 *offset_addrs = NULL;
6447 u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops};
6448 int err, pass;
6449
6450 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
6451 num_ctx_wr_ops, num_ctx_rd_ops);
6452
6453 /* disable channel switching.
6454 * at that point the hardware state can be inspected to
6455 * determine if the context we're interested in is current.
6456 */
6457 err = gr_gk20a_disable_ctxsw(g);
6458 if (err) {
6459 gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw");
6460 /* this should probably be ctx-fatal... */
6461 goto cleanup;
6462 }
6463
6464 restart_gr_ctxsw = true;
6465
6466 curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
6467 curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx);
6468 ch_is_curr_ctx = (curr_gr_chid != -1) && (ch->hw_chid == curr_gr_chid);
6469
6470 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx);
6471 if (ch_is_curr_ctx) {
6472 for (pass = 0; pass < 2; pass++) {
6473 ctx_op_nr = 0;
6474 for (i = 0; (ctx_op_nr < num_ctx_ops[pass]) && (i < num_ops); ++i) {
6475 /* only do ctx ops and only on the right pass */
6476 if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
6477 (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
6478 ((pass == 1) && !reg_op_is_read(ctx_ops[i].op))))
6479 continue;
6480
6481 /* if this is a quad access, setup for special access*/
6482 if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD)
6483 && g->ops.gr.access_smpc_reg)
6484 g->ops.gr.access_smpc_reg(g,
6485 ctx_ops[i].quad,
6486 ctx_ops[i].offset);
6487 offset = ctx_ops[i].offset;
6488
6489 if (pass == 0) { /* write pass */
6490 v = gk20a_readl(g, offset);
6491 v &= ~ctx_ops[i].and_n_mask_lo;
6492 v |= ctx_ops[i].value_lo;
6493 gk20a_writel(g, offset, v);
6494
6495 gk20a_dbg(gpu_dbg_gpu_dbg,
6496 "direct wr: offset=0x%x v=0x%x",
6497 offset, v);
6498
6499 if (ctx_ops[i].op == REGOP(WRITE_64)) {
6500 v = gk20a_readl(g, offset + 4);
6501 v &= ~ctx_ops[i].and_n_mask_hi;
6502 v |= ctx_ops[i].value_hi;
6503 gk20a_writel(g, offset + 4, v);
6504
6505 gk20a_dbg(gpu_dbg_gpu_dbg,
6506 "direct wr: offset=0x%x v=0x%x",
6507 offset + 4, v);
6508 }
6509
6510 } else { /* read pass */
6511 ctx_ops[i].value_lo =
6512 gk20a_readl(g, offset);
6513
6514 gk20a_dbg(gpu_dbg_gpu_dbg,
6515 "direct rd: offset=0x%x v=0x%x",
6516 offset, ctx_ops[i].value_lo);
6517
6518 if (ctx_ops[i].op == REGOP(READ_64)) {
6519 ctx_ops[i].value_hi =
6520 gk20a_readl(g, offset + 4);
6521
6522 gk20a_dbg(gpu_dbg_gpu_dbg,
6523 "direct rd: offset=0x%x v=0x%x",
6524 offset, ctx_ops[i].value_lo);
6525 } else
6526 ctx_ops[i].value_hi = 0;
6527 }
6528 ctx_op_nr++;
6529 }
6530 }
6531 goto cleanup;
6532 }
6533
6534 /* they're the same size, so just use one alloc for both */
6535 offsets = kzalloc(2 * sizeof(u32) * max_offsets, GFP_KERNEL);
6536 if (!offsets) {
6537 err = -ENOMEM;
6538 goto cleanup;
6539 }
6540 offset_addrs = offsets + max_offsets;
6541
6542 /* would have been a variant of gr_gk20a_apply_instmem_overrides */
6543 /* recoded in-place instead.*/
6544 ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
6545 PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
6546 0, pgprot_dmacoherent(PAGE_KERNEL));
6547 if (!ctx_ptr) {
6548 err = -ENOMEM;
6549 goto cleanup;
6550 }
6551
6552 /* Channel gr_ctx buffer is gpu cacheable; so flush and invalidate.
6553 * There should be no on-going/in-flight references by the gpu now. */
6554 gk20a_mm_fb_flush(g);
6555 gk20a_mm_l2_flush(g, true);
6556
6557 /* write to appropriate place in context image,
6558 * first have to figure out where that really is */
6559
6560 /* first pass is writes, second reads */
6561 for (pass = 0; pass < 2; pass++) {
6562 ctx_op_nr = 0;
6563 for (i = 0; (ctx_op_nr < num_ctx_ops[pass]) && (i < num_ops); ++i) {
6564 u32 num_offsets;
6565
6566 /* only do ctx ops and only on the right pass */
6567 if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
6568 (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
6569 ((pass == 1) && !reg_op_is_read(ctx_ops[i].op))))
6570 continue;
6571
6572 err = gr_gk20a_get_ctx_buffer_offsets(g,
6573 ctx_ops[i].offset,
6574 max_offsets,
6575 offsets, offset_addrs,
6576 &num_offsets,
6577 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
6578 ctx_ops[i].quad);
6579 if (err) {
6580 gk20a_dbg(gpu_dbg_gpu_dbg,
6581 "ctx op invalid offset: offset=0x%x",
6582 ctx_ops[i].offset);
6583 ctx_ops[i].status =
6584 NVHOST_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET;
6585 continue;
6586 }
6587
6588 /* if this is a quad access, setup for special access*/
6589 if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) &&
6590 g->ops.gr.access_smpc_reg)
6591 g->ops.gr.access_smpc_reg(g, ctx_ops[i].quad,
6592 ctx_ops[i].offset);
6593
6594 for (j = 0; j < num_offsets; j++) {
6595 /* sanity check, don't write outside, worst case */
6596 if (offsets[j] >= g->gr.ctx_vars.golden_image_size)
6597 continue;
6598 if (pass == 0) { /* write pass */
6599 v = gk20a_mem_rd32(ctx_ptr + offsets[j], 0);
6600 v &= ~ctx_ops[i].and_n_mask_lo;
6601 v |= ctx_ops[i].value_lo;
6602 gk20a_mem_wr32(ctx_ptr + offsets[j], 0, v);
6603
6604 gk20a_dbg(gpu_dbg_gpu_dbg,
6605 "context wr: offset=0x%x v=0x%x",
6606 offsets[j], v);
6607
6608 if (ctx_ops[i].op == REGOP(WRITE_64)) {
6609 v = gk20a_mem_rd32(ctx_ptr + offsets[j] + 4, 0);
6610 v &= ~ctx_ops[i].and_n_mask_hi;
6611 v |= ctx_ops[i].value_hi;
6612 gk20a_mem_wr32(ctx_ptr + offsets[j] + 4, 0, v);
6613
6614 gk20a_dbg(gpu_dbg_gpu_dbg,
6615 "context wr: offset=0x%x v=0x%x",
6616 offsets[j] + 4, v);
6617 }
6618
6619 /* check to see if we need to add a special WAR
6620 for some of the SMPC perf regs */
6621 gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j],
6622 v, ctx_ptr);
6623
6624 } else { /* read pass */
6625 ctx_ops[i].value_lo =
6626 gk20a_mem_rd32(ctx_ptr + offsets[0], 0);
6627
6628 gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x",
6629 offsets[0], ctx_ops[i].value_lo);
6630
6631 if (ctx_ops[i].op == REGOP(READ_64)) {
6632 ctx_ops[i].value_hi =
6633 gk20a_mem_rd32(ctx_ptr + offsets[0] + 4, 0);
6634
6635 gk20a_dbg(gpu_dbg_gpu_dbg,
6636 "context rd: offset=0x%x v=0x%x",
6637 offsets[0] + 4, ctx_ops[i].value_hi);
6638 } else
6639 ctx_ops[i].value_hi = 0;
6640 }
6641 }
6642 ctx_op_nr++;
6643 }
6644 }
6645#if 0
6646 /* flush cpu caches for the ctx buffer? only if cpu cached, of course.
6647 * they aren't, yet */
6648 if (cached) {
6649 FLUSH_CPU_DCACHE(ctx_ptr,
6650 sg_phys(ch_ctx->gr_ctx.mem.ref), size);
6651 }
6652#endif
6653
6654 cleanup:
6655 if (offsets)
6656 kfree(offsets);
6657
6658 if (ctx_ptr)
6659 vunmap(ctx_ptr);
6660
6661 if (restart_gr_ctxsw) {
6662 int tmp_err = gr_gk20a_enable_ctxsw(g);
6663 if (tmp_err) {
6664 gk20a_err(dev_from_gk20a(g), "unable to restart ctxsw!\n");
6665 err = tmp_err;
6666 }
6667 }
6668
6669 return err;
6670}
6671
6672static void gr_gk20a_cb_size_default(struct gk20a *g)
6673{
6674 struct gr_gk20a *gr = &g->gr;
6675
6676 gr->attrib_cb_default_size =
6677 gr_gpc0_ppc0_cbm_cfg_size_default_v();
6678 gr->alpha_cb_default_size =
6679 gr_gpc0_ppc0_cbm_cfg2_size_default_v();
6680}
6681
6682static int gr_gk20a_calc_global_ctx_buffer_size(struct gk20a *g)
6683{
6684 struct gr_gk20a *gr = &g->gr;
6685 int size;
6686
6687 gr->attrib_cb_size = gr->attrib_cb_default_size;
6688 gr->alpha_cb_size = gr->alpha_cb_default_size
6689 + (gr->alpha_cb_default_size >> 1);
6690
6691 size = gr->attrib_cb_size *
6692 gr_gpc0_ppc0_cbm_cfg_size_granularity_v() *
6693 gr->max_tpc_count;
6694
6695 size += gr->alpha_cb_size *
6696 gr_gpc0_ppc0_cbm_cfg2_size_granularity_v() *
6697 gr->max_tpc_count;
6698
6699 return size;
6700}
6701
6702void gr_gk20a_commit_global_pagepool(struct gk20a *g,
6703 struct channel_ctx_gk20a *ch_ctx,
6704 u64 addr, u32 size, bool patch)
6705{
6706 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(),
6707 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
6708
6709 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(),
6710 gr_scc_pagepool_total_pages_f(size) |
6711 gr_scc_pagepool_valid_true_f(), patch);
6712
6713 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(),
6714 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
6715
6716 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(),
6717 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
6718
6719 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_pagepool_r(),
6720 gr_pd_pagepool_total_pages_f(size) |
6721 gr_pd_pagepool_valid_true_f(), patch);
6722}
6723
6724void gk20a_init_gr(struct gpu_ops *gops)
6725{
6726 gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
6727 gops->gr.bundle_cb_defaults = gr_gk20a_bundle_cb_defaults;
6728 gops->gr.cb_size_default = gr_gk20a_cb_size_default;
6729 gops->gr.calc_global_ctx_buffer_size =
6730 gr_gk20a_calc_global_ctx_buffer_size;
6731 gops->gr.commit_global_attrib_cb = gr_gk20a_commit_global_attrib_cb;
6732 gops->gr.commit_global_bundle_cb = gr_gk20a_commit_global_bundle_cb;
6733 gops->gr.commit_global_cb_manager = gr_gk20a_commit_global_cb_manager;
6734 gops->gr.commit_global_pagepool = gr_gk20a_commit_global_pagepool;
6735 gops->gr.handle_sw_method = gr_gk20a_handle_sw_method;
6736 gops->gr.set_alpha_circular_buffer_size =
6737 gk20a_gr_set_circular_buffer_size;
6738 gops->gr.set_circular_buffer_size =
6739 gk20a_gr_set_alpha_circular_buffer_size;
6740 gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions;
6741 gops->gr.is_valid_class = gr_gk20a_is_valid_class;
6742 gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs;
6743 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs;
6744 gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep;
6745 gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask;
6746 gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables;
6747}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
new file mode 100644
index 00000000..7eb2923a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -0,0 +1,406 @@
1/*
2 * GK20A Graphics Engine
3 *
4 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18#ifndef __GR_GK20A_H__
19#define __GR_GK20A_H__
20
21#include <linux/slab.h>
22
23#include "gr_ctx_gk20a.h"
24
25#define GR_IDLE_CHECK_DEFAULT 100 /* usec */
26#define GR_IDLE_CHECK_MAX 5000 /* usec */
27
28#define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFF
29#define INVALID_MAX_WAYS 0xFFFFFFFF
30
31#define GK20A_FECS_UCODE_IMAGE "fecs.bin"
32#define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin"
33
34enum /* global_ctx_buffer */ {
35 CIRCULAR = 0,
36 PAGEPOOL = 1,
37 ATTRIBUTE = 2,
38 CIRCULAR_VPR = 3,
39 PAGEPOOL_VPR = 4,
40 ATTRIBUTE_VPR = 5,
41 GOLDEN_CTX = 6,
42 PRIV_ACCESS_MAP = 7,
43 NR_GLOBAL_CTX_BUF = 8
44};
45
46/* either ATTRIBUTE or ATTRIBUTE_VPR maps to ATTRIBUTE_VA */
47enum /*global_ctx_buffer_va */ {
48 CIRCULAR_VA = 0,
49 PAGEPOOL_VA = 1,
50 ATTRIBUTE_VA = 2,
51 GOLDEN_CTX_VA = 3,
52 PRIV_ACCESS_MAP_VA = 4,
53 NR_GLOBAL_CTX_BUF_VA = 5
54};
55
56enum {
57 WAIT_UCODE_LOOP,
58 WAIT_UCODE_TIMEOUT,
59 WAIT_UCODE_ERROR,
60 WAIT_UCODE_OK
61};
62
63enum {
64 GR_IS_UCODE_OP_EQUAL,
65 GR_IS_UCODE_OP_NOT_EQUAL,
66 GR_IS_UCODE_OP_AND,
67 GR_IS_UCODE_OP_LESSER,
68 GR_IS_UCODE_OP_LESSER_EQUAL,
69 GR_IS_UCODE_OP_SKIP
70};
71
72enum {
73 eUcodeHandshakeInitComplete = 1,
74 eUcodeHandshakeMethodFinished
75};
76
77enum {
78 ELCG_RUN, /* clk always run, i.e. disable elcg */
79 ELCG_STOP, /* clk is stopped */
80 ELCG_AUTO /* clk will run when non-idle, standard elcg mode */
81};
82
83enum {
84 BLCG_RUN, /* clk always run, i.e. disable blcg */
85 BLCG_AUTO /* clk will run when non-idle, standard blcg mode */
86};
87
88#ifndef GR_GO_IDLE_BUNDLE
89#define GR_GO_IDLE_BUNDLE 0x0000e100 /* --V-B */
90#endif
91
92struct gr_channel_map_tlb_entry {
93 u32 curr_ctx;
94 u32 hw_chid;
95};
96
97struct gr_zcull_gk20a {
98 u32 aliquot_width;
99 u32 aliquot_height;
100 u32 aliquot_size;
101 u32 total_aliquots;
102
103 u32 width_align_pixels;
104 u32 height_align_pixels;
105 u32 pixel_squares_by_aliquots;
106};
107
108struct gr_zcull_info {
109 u32 width_align_pixels;
110 u32 height_align_pixels;
111 u32 pixel_squares_by_aliquots;
112 u32 aliquot_total;
113 u32 region_byte_multiplier;
114 u32 region_header_size;
115 u32 subregion_header_size;
116 u32 subregion_width_align_pixels;
117 u32 subregion_height_align_pixels;
118 u32 subregion_count;
119};
120
121#define GK20A_ZBC_COLOR_VALUE_SIZE 4 /* RGBA */
122
123#define GK20A_STARTOF_ZBC_TABLE 1 /* index zero reserved to indicate "not ZBCd" */
124#define GK20A_SIZEOF_ZBC_TABLE 16 /* match ltcs_ltss_dstg_zbc_index_address width (4) */
125#define GK20A_ZBC_TABLE_SIZE (16 - 1)
126
127#define GK20A_ZBC_TYPE_INVALID 0
128#define GK20A_ZBC_TYPE_COLOR 1
129#define GK20A_ZBC_TYPE_DEPTH 2
130
131struct zbc_color_table {
132 u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
133 u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
134 u32 format;
135 u32 ref_cnt;
136};
137
138struct zbc_depth_table {
139 u32 depth;
140 u32 format;
141 u32 ref_cnt;
142};
143
144struct zbc_entry {
145 u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
146 u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
147 u32 depth;
148 u32 type; /* color or depth */
149 u32 format;
150};
151
152struct zbc_query_params {
153 u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
154 u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
155 u32 depth;
156 u32 ref_cnt;
157 u32 format;
158 u32 type; /* color or depth */
159 u32 index_size; /* [out] size, [in] index */
160};
161
162struct gr_gk20a {
163 struct gk20a *g;
164 struct {
165 bool dynamic;
166
167 u32 buffer_size;
168 u32 buffer_total_size;
169
170 bool golden_image_initialized;
171 u32 golden_image_size;
172 u32 *local_golden_image;
173
174 u32 zcull_ctxsw_image_size;
175
176 u32 buffer_header_size;
177
178 u32 priv_access_map_size;
179
180 struct gr_ucode_gk20a ucode;
181
182 struct av_list_gk20a sw_bundle_init;
183 struct av_list_gk20a sw_method_init;
184 struct aiv_list_gk20a sw_ctx_load;
185 struct av_list_gk20a sw_non_ctx_load;
186 struct {
187 struct aiv_list_gk20a sys;
188 struct aiv_list_gk20a gpc;
189 struct aiv_list_gk20a tpc;
190 struct aiv_list_gk20a zcull_gpc;
191 struct aiv_list_gk20a ppc;
192 struct aiv_list_gk20a pm_sys;
193 struct aiv_list_gk20a pm_gpc;
194 struct aiv_list_gk20a pm_tpc;
195 } ctxsw_regs;
196 int regs_base_index;
197 bool valid;
198 } ctx_vars;
199
200 struct mutex ctx_mutex; /* protect golden ctx init */
201 struct mutex fecs_mutex; /* protect fecs method */
202
203#define GR_NETLIST_DYNAMIC -1
204#define GR_NETLIST_STATIC_A 'A'
205 int netlist;
206
207 int initialized;
208 u32 num_fbps;
209
210 u32 max_gpc_count;
211 u32 max_fbps_count;
212 u32 max_tpc_per_gpc_count;
213 u32 max_zcull_per_gpc_count;
214 u32 max_tpc_count;
215
216 u32 sys_count;
217 u32 gpc_count;
218 u32 pe_count_per_gpc;
219 u32 ppc_count;
220 u32 *gpc_ppc_count;
221 u32 tpc_count;
222 u32 *gpc_tpc_count;
223 u32 zcb_count;
224 u32 *gpc_zcb_count;
225 u32 *pes_tpc_count[2];
226 u32 *pes_tpc_mask[2];
227 u32 *gpc_skip_mask;
228
229 u32 bundle_cb_default_size;
230 u32 min_gpm_fifo_depth;
231 u32 bundle_cb_token_limit;
232 u32 attrib_cb_default_size;
233 u32 attrib_cb_size;
234 u32 alpha_cb_default_size;
235 u32 alpha_cb_size;
236 u32 timeslice_mode;
237
238 struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];
239
240 struct mmu_desc mmu_wr_mem;
241 u32 mmu_wr_mem_size;
242 struct mmu_desc mmu_rd_mem;
243 u32 mmu_rd_mem_size;
244
245 u8 *map_tiles;
246 u32 map_tile_count;
247 u32 map_row_offset;
248
249#define COMP_TAG_LINE_SIZE_SHIFT (17) /* one tag covers 128K */
250#define COMP_TAG_LINE_SIZE (1 << COMP_TAG_LINE_SIZE_SHIFT)
251
252 u32 max_comptag_mem; /* max memory size (MB) for comptag */
253 struct compbit_store_desc compbit_store;
254 struct gk20a_allocator comp_tags;
255
256 struct gr_zcull_gk20a zcull;
257
258 struct zbc_color_table zbc_col_tbl[GK20A_ZBC_TABLE_SIZE];
259 struct zbc_depth_table zbc_dep_tbl[GK20A_ZBC_TABLE_SIZE];
260
261 s32 max_default_color_index;
262 s32 max_default_depth_index;
263
264 s32 max_used_color_index;
265 s32 max_used_depth_index;
266
267 u32 status_disable_mask;
268
269#define GR_CHANNEL_MAP_TLB_SIZE 2 /* must of power of 2 */
270 struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE];
271 u32 channel_tlb_flush_index;
272 spinlock_t ch_tlb_lock;
273
274 void (*remove_support)(struct gr_gk20a *gr);
275 bool sw_ready;
276 bool skip_ucode_init;
277};
278
279void gk20a_fecs_dump_falcon_stats(struct gk20a *g);
280
281struct gk20a_ctxsw_ucode_segment {
282 u32 offset;
283 u32 size;
284};
285
286struct gk20a_ctxsw_ucode_segments {
287 u32 boot_entry;
288 u32 boot_imem_offset;
289 struct gk20a_ctxsw_ucode_segment boot;
290 struct gk20a_ctxsw_ucode_segment code;
291 struct gk20a_ctxsw_ucode_segment data;
292};
293
294struct gk20a_ctxsw_ucode_info {
295 u64 *p_va;
296 struct inst_desc inst_blk_desc;
297 struct surface_mem_desc surface_desc;
298 u64 ucode_gpuva;
299 struct gk20a_ctxsw_ucode_segments fecs;
300 struct gk20a_ctxsw_ucode_segments gpccs;
301};
302
303struct gk20a_ctxsw_bootloader_desc {
304 u32 start_offset;
305 u32 size;
306 u32 imem_offset;
307 u32 entry_point;
308};
309
310struct gpu_ops;
311void gk20a_init_gr(struct gpu_ops *gops);
312int gk20a_init_gr_support(struct gk20a *g);
313void gk20a_gr_reset(struct gk20a *g);
314
315int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a);
316
317int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr);
318
319struct nvhost_alloc_obj_ctx_args;
320struct nvhost_free_obj_ctx_args;
321
322int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
323 struct nvhost_alloc_obj_ctx_args *args);
324int gk20a_free_obj_ctx(struct channel_gk20a *c,
325 struct nvhost_free_obj_ctx_args *args);
326void gk20a_free_channel_ctx(struct channel_gk20a *c);
327
328int gk20a_gr_isr(struct gk20a *g);
329int gk20a_gr_nonstall_isr(struct gk20a *g);
330
331/* zcull */
332u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr);
333int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
334 struct channel_gk20a *c, u64 zcull_va, u32 mode);
335int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
336 struct gr_zcull_info *zcull_params);
337/* zbc */
338int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
339 struct zbc_entry *zbc_val);
340int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
341 struct zbc_query_params *query_params);
342int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
343 struct zbc_entry *zbc_val);
344int gr_gk20a_clear_zbc_table(struct gk20a *g, struct gr_gk20a *gr);
345int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr);
346
347/* pmu */
348int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size);
349int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr);
350int gr_gk20a_fecs_set_reglist_virual_addr(struct gk20a *g, u64 pmu_va);
351
352void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine);
353void gr_gk20a_init_blcg_mode(struct gk20a *g, u32 mode, u32 engine);
354
355/* sm */
356bool gk20a_gr_sm_debugger_attached(struct gk20a *g);
357
358#define gr_gk20a_elpg_protected_call(g, func) \
359 ({ \
360 int err; \
361 if (support_gk20a_pmu()) \
362 gk20a_pmu_disable_elpg(g); \
363 err = func; \
364 if (support_gk20a_pmu()) \
365 gk20a_pmu_enable_elpg(g); \
366 err; \
367 })
368
369int gk20a_gr_suspend(struct gk20a *g);
370
371struct nvhost_dbg_gpu_reg_op;
372int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
373 struct nvhost_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
374 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops);
375int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
376 u32 addr,
377 u32 max_offsets,
378 u32 *offsets, u32 *offset_addrs,
379 u32 *num_offsets,
380 bool is_quad, u32 quad);
381int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
382 struct channel_gk20a *c,
383 bool enable_smpc_ctxsw);
384
385struct channel_ctx_gk20a;
386int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx,
387 u32 addr, u32 data, bool patch);
388int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
389 struct channel_ctx_gk20a *ch_ctx);
390int gr_gk20a_ctx_patch_write_end(struct gk20a *g,
391 struct channel_ctx_gk20a *ch_ctx);
392void gr_gk20a_commit_global_pagepool(struct gk20a *g,
393 struct channel_ctx_gk20a *ch_ctx,
394 u64 addr, u32 size, bool patch);
395void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
396void gr_gk20a_enable_hww_exceptions(struct gk20a *g);
397void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g,
398 u32 *num_sm_dsm_perf_regs,
399 u32 **sm_dsm_perf_regs,
400 u32 *perf_register_stride);
401void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
402 u32 *num_sm_dsm_perf_regs,
403 u32 **sm_dsm_perf_regs,
404 u32 *perf_register_stride);
405int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
406#endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h
new file mode 100644
index 00000000..a82a1ee7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h
@@ -0,0 +1,179 @@
1/*
2 * GK20A Graphics Context Pri Register Addressing
3 *
4 * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18#ifndef _NVHOST_GR_PRI_GK20A_H_
19#define _NVHOST_GR_PRI_GK20A_H_
20
21/*
22 * These convenience macros are generally for use in the management/modificaiton
23 * of the context state store for gr/compute contexts.
24 */
25
26/*
27 * GPC pri addressing
28 */
29static inline u32 pri_gpccs_addr_width(void)
30{
31 return 15; /*from where?*/
32}
33static inline u32 pri_gpccs_addr_mask(u32 addr)
34{
35 return addr & ((1 << pri_gpccs_addr_width()) - 1);
36}
37static inline u32 pri_gpc_addr(u32 addr, u32 gpc)
38{
39 return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) + addr;
40}
41static inline bool pri_is_gpc_addr_shared(u32 addr)
42{
43 return (addr >= proj_gpc_shared_base_v()) &&
44 (addr < proj_gpc_shared_base_v() + proj_gpc_stride_v());
45}
46static inline bool pri_is_gpc_addr(u32 addr)
47{
48 return ((addr >= proj_gpc_base_v()) &&
49 (addr < proj_gpc_base_v() +
50 proj_scal_litter_num_gpcs_v() * proj_gpc_stride_v())) ||
51 pri_is_gpc_addr_shared(addr);
52}
53static inline u32 pri_get_gpc_num(u32 addr)
54{
55 u32 i, start;
56 u32 num_gpcs = proj_scal_litter_num_gpcs_v();
57
58 for (i = 0; i < num_gpcs; i++) {
59 start = proj_gpc_base_v() + (i * proj_gpc_stride_v());
60 if ((addr >= start) && (addr < (start + proj_gpc_stride_v())))
61 return i;
62 }
63 return 0;
64}
65/*
66 * TPC pri addressing
67 */
68static inline u32 pri_tpccs_addr_width(void)
69{
70 return 11; /* from where? */
71}
72static inline u32 pri_tpccs_addr_mask(u32 addr)
73{
74 return addr & ((1 << pri_tpccs_addr_width()) - 1);
75}
76static inline u32 pri_tpc_addr(u32 addr, u32 gpc, u32 tpc)
77{
78 return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) +
79 proj_tpc_in_gpc_base_v() + (tpc * proj_tpc_in_gpc_stride_v()) +
80 addr;
81}
82static inline bool pri_is_tpc_addr_shared(u32 addr)
83{
84 return (addr >= proj_tpc_in_gpc_shared_base_v()) &&
85 (addr < (proj_tpc_in_gpc_shared_base_v() +
86 proj_tpc_in_gpc_stride_v()));
87}
88static inline bool pri_is_tpc_addr(u32 addr)
89{
90 return ((addr >= proj_tpc_in_gpc_base_v()) &&
91 (addr < proj_tpc_in_gpc_base_v() + (proj_scal_litter_num_tpc_per_gpc_v() *
92 proj_tpc_in_gpc_stride_v())))
93 ||
94 pri_is_tpc_addr_shared(addr);
95}
96static inline u32 pri_get_tpc_num(u32 addr)
97{
98 u32 i, start;
99 u32 num_tpcs = proj_scal_litter_num_tpc_per_gpc_v();
100
101 for (i = 0; i < num_tpcs; i++) {
102 start = proj_tpc_in_gpc_base_v() + (i * proj_tpc_in_gpc_stride_v());
103 if ((addr >= start) && (addr < (start + proj_tpc_in_gpc_stride_v())))
104 return i;
105 }
106 return 0;
107}
108
109/*
110 * BE pri addressing
111 */
112static inline u32 pri_becs_addr_width(void)
113{
114 return 10;/* from where? */
115}
116static inline u32 pri_becs_addr_mask(u32 addr)
117{
118 return addr & ((1 << pri_becs_addr_width()) - 1);
119}
120static inline bool pri_is_be_addr_shared(u32 addr)
121{
122 return (addr >= proj_rop_shared_base_v()) &&
123 (addr < proj_rop_shared_base_v() + proj_rop_stride_v());
124}
125static inline u32 pri_be_shared_addr(u32 addr)
126{
127 return proj_rop_shared_base_v() + pri_becs_addr_mask(addr);
128}
129static inline bool pri_is_be_addr(u32 addr)
130{
131 return ((addr >= proj_rop_base_v()) &&
132 (addr < proj_rop_base_v()+proj_scal_litter_num_fbps_v() * proj_rop_stride_v())) ||
133 pri_is_be_addr_shared(addr);
134}
135
136static inline u32 pri_get_be_num(u32 addr)
137{
138 u32 i, start;
139 u32 num_fbps = proj_scal_litter_num_fbps_v();
140 for (i = 0; i < num_fbps; i++) {
141 start = proj_rop_base_v() + (i * proj_rop_stride_v());
142 if ((addr >= start) && (addr < (start + proj_rop_stride_v())))
143 return i;
144 }
145 return 0;
146}
147
148/*
149 * PPC pri addressing
150 */
151static inline u32 pri_ppccs_addr_width(void)
152{
153 return 9; /* from where? */
154}
155static inline u32 pri_ppccs_addr_mask(u32 addr)
156{
157 return addr & ((1 << pri_ppccs_addr_width()) - 1);
158}
159static inline u32 pri_ppc_addr(u32 addr, u32 gpc, u32 ppc)
160{
161 return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) +
162 proj_ppc_in_gpc_base_v() + (ppc * proj_ppc_in_gpc_stride_v()) + addr;
163}
164
165enum ctxsw_addr_type {
166 CTXSW_ADDR_TYPE_SYS = 0,
167 CTXSW_ADDR_TYPE_GPC = 1,
168 CTXSW_ADDR_TYPE_TPC = 2,
169 CTXSW_ADDR_TYPE_BE = 3,
170 CTXSW_ADDR_TYPE_PPC = 4
171};
172
173#define PRI_BROADCAST_FLAGS_NONE 0
174#define PRI_BROADCAST_FLAGS_GPC BIT(0)
175#define PRI_BROADCAST_FLAGS_TPC BIT(1)
176#define PRI_BROADCAST_FLAGS_BE BIT(2)
177#define PRI_BROADCAST_FLAGS_PPC BIT(3)
178
179#endif /*_NVHOST_GR_PRI_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/hal.c b/drivers/gpu/nvgpu/gk20a/hal.c
new file mode 100644
index 00000000..dea740c2
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hal.c
@@ -0,0 +1,33 @@
1/*
2 * NVIDIA GPU HAL interface.
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include "gk20a.h"
17#include "hal_gk20a.h"
18
19int gpu_init_hal(struct gk20a *g)
20{
21 u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
22 switch (ver) {
23 case GK20A_GPUID_GK20A:
24 gk20a_dbg_info("gk20a detected");
25 gk20a_init_hal(&g->ops);
26 break;
27 default:
28 gk20a_err(&g->dev->dev, "no support for %x", ver);
29 return -ENODEV;
30 }
31
32 return 0;
33}
diff --git a/drivers/gpu/nvgpu/gk20a/hal.h b/drivers/gpu/nvgpu/gk20a/hal.h
new file mode 100644
index 00000000..da02cf5f
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hal.h
@@ -0,0 +1,25 @@
1/*
2 * NVIDIA GPU Hardware Abstraction Layer functions definitions.
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef __HAL_GPU__
17#define __HAL_GPU__
18
19#include <linux/kernel.h>
20
21struct gk20a;
22
23int gpu_init_hal(struct gk20a *g);
24
25#endif /* __HAL_GPU__ */
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
new file mode 100644
index 00000000..b3e9b0e6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -0,0 +1,50 @@
1/*
2 * drivers/video/tegra/host/gk20a/hal_gk20a.c
3 *
4 * GK20A Tegra HAL interface.
5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#include "hal_gk20a.h"
19#include "ltc_gk20a.h"
20#include "fb_gk20a.h"
21#include "gk20a.h"
22#include "gk20a_gating_reglist.h"
23#include "channel_gk20a.h"
24
25struct gpu_ops gk20a_ops = {
26 .clock_gating = {
27 .slcg_gr_load_gating_prod =
28 gr_gk20a_slcg_gr_load_gating_prod,
29 .slcg_perf_load_gating_prod =
30 gr_gk20a_slcg_perf_load_gating_prod,
31 .blcg_gr_load_gating_prod =
32 gr_gk20a_blcg_gr_load_gating_prod,
33 .pg_gr_load_gating_prod =
34 gr_gk20a_pg_gr_load_gating_prod,
35 .slcg_therm_load_gating_prod =
36 gr_gk20a_slcg_therm_load_gating_prod,
37 }
38};
39
40int gk20a_init_hal(struct gpu_ops *gops)
41{
42 *gops = gk20a_ops;
43 gk20a_init_ltc(gops);
44 gk20a_init_gr(gops);
45 gk20a_init_fb(gops);
46 gk20a_init_fifo(gops);
47 gops->name = "gk20a";
48
49 return 0;
50}
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.h b/drivers/gpu/nvgpu/gk20a/hal_gk20a.h
new file mode 100644
index 00000000..db77a4a7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.h
@@ -0,0 +1,28 @@
1/*
2 * drivers/video/tegra/host/gk20a/hal_gk20a.h
3 *
4 * GK20A Hardware Abstraction Layer functions definitions.
5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#ifndef __HAL_GK20A__
19#define __HAL_GK20A__
20
21#include <linux/kernel.h>
22
23struct gpu_ops;
24struct gk20a;
25
26int gk20a_init_hal(struct gpu_ops *gops);
27
28#endif /* __HAL_GK20A__ */
diff --git a/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h
new file mode 100644
index 00000000..ebf8a873
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h
@@ -0,0 +1,105 @@
1/*
2 * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_bus_gk20a_h_
51#define _hw_bus_gk20a_h_
52
53static inline u32 bus_bar1_block_r(void)
54{
55 return 0x00001704;
56}
57static inline u32 bus_bar1_block_ptr_f(u32 v)
58{
59 return (v & 0xfffffff) << 0;
60}
61static inline u32 bus_bar1_block_target_vid_mem_f(void)
62{
63 return 0x0;
64}
65static inline u32 bus_bar1_block_mode_virtual_f(void)
66{
67 return 0x80000000;
68}
69static inline u32 bus_bar1_block_ptr_shift_v(void)
70{
71 return 0x0000000c;
72}
73static inline u32 bus_intr_0_r(void)
74{
75 return 0x00001100;
76}
77static inline u32 bus_intr_0_pri_squash_m(void)
78{
79 return 0x1 << 1;
80}
81static inline u32 bus_intr_0_pri_fecserr_m(void)
82{
83 return 0x1 << 2;
84}
85static inline u32 bus_intr_0_pri_timeout_m(void)
86{
87 return 0x1 << 3;
88}
89static inline u32 bus_intr_en_0_r(void)
90{
91 return 0x00001140;
92}
93static inline u32 bus_intr_en_0_pri_squash_m(void)
94{
95 return 0x1 << 1;
96}
97static inline u32 bus_intr_en_0_pri_fecserr_m(void)
98{
99 return 0x1 << 2;
100}
101static inline u32 bus_intr_en_0_pri_timeout_m(void)
102{
103 return 0x1 << 3;
104}
105#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h
new file mode 100644
index 00000000..573329f1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h
@@ -0,0 +1,113 @@
1/*
2 * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_ccsr_gk20a_h_
51#define _hw_ccsr_gk20a_h_
52
53static inline u32 ccsr_channel_inst_r(u32 i)
54{
55 return 0x00800000 + i*8;
56}
57static inline u32 ccsr_channel_inst__size_1_v(void)
58{
59 return 0x00000080;
60}
61static inline u32 ccsr_channel_inst_ptr_f(u32 v)
62{
63 return (v & 0xfffffff) << 0;
64}
65static inline u32 ccsr_channel_inst_target_vid_mem_f(void)
66{
67 return 0x0;
68}
69static inline u32 ccsr_channel_inst_bind_false_f(void)
70{
71 return 0x0;
72}
73static inline u32 ccsr_channel_inst_bind_true_f(void)
74{
75 return 0x80000000;
76}
77static inline u32 ccsr_channel_r(u32 i)
78{
79 return 0x00800004 + i*8;
80}
81static inline u32 ccsr_channel__size_1_v(void)
82{
83 return 0x00000080;
84}
85static inline u32 ccsr_channel_enable_v(u32 r)
86{
87 return (r >> 0) & 0x1;
88}
89static inline u32 ccsr_channel_enable_set_f(u32 v)
90{
91 return (v & 0x1) << 10;
92}
93static inline u32 ccsr_channel_enable_set_true_f(void)
94{
95 return 0x400;
96}
97static inline u32 ccsr_channel_enable_clr_true_f(void)
98{
99 return 0x800;
100}
101static inline u32 ccsr_channel_runlist_f(u32 v)
102{
103 return (v & 0xf) << 16;
104}
105static inline u32 ccsr_channel_status_v(u32 r)
106{
107 return (r >> 24) & 0xf;
108}
109static inline u32 ccsr_channel_busy_v(u32 r)
110{
111 return (r >> 28) & 0x1;
112}
113#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h
new file mode 100644
index 00000000..66bf01b0
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h
@@ -0,0 +1,85 @@
1/*
2 * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_chiplet_pwr_gk20a_h_
51#define _hw_chiplet_pwr_gk20a_h_
52
53static inline u32 chiplet_pwr_gpcs_weight_6_r(void)
54{
55 return 0x0010e018;
56}
57static inline u32 chiplet_pwr_gpcs_weight_7_r(void)
58{
59 return 0x0010e01c;
60}
61static inline u32 chiplet_pwr_gpcs_config_1_r(void)
62{
63 return 0x0010e03c;
64}
65static inline u32 chiplet_pwr_gpcs_config_1_ba_enable_yes_f(void)
66{
67 return 0x1;
68}
69static inline u32 chiplet_pwr_fbps_weight_0_r(void)
70{
71 return 0x0010e100;
72}
73static inline u32 chiplet_pwr_fbps_weight_1_r(void)
74{
75 return 0x0010e104;
76}
77static inline u32 chiplet_pwr_fbps_config_1_r(void)
78{
79 return 0x0010e13c;
80}
81static inline u32 chiplet_pwr_fbps_config_1_ba_enable_yes_f(void)
82{
83 return 0x1;
84}
85#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h
new file mode 100644
index 00000000..e2a4f2f2
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h
@@ -0,0 +1,245 @@
1/*
2 * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_ctxsw_prog_gk20a_h_
51#define _hw_ctxsw_prog_gk20a_h_
52
53static inline u32 ctxsw_prog_fecs_header_v(void)
54{
55 return 0x00000100;
56}
57static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
58{
59 return 0x00000008;
60}
61static inline u32 ctxsw_prog_main_image_patch_count_o(void)
62{
63 return 0x00000010;
64}
65static inline u32 ctxsw_prog_main_image_patch_adr_lo_o(void)
66{
67 return 0x00000014;
68}
69static inline u32 ctxsw_prog_main_image_patch_adr_hi_o(void)
70{
71 return 0x00000018;
72}
73static inline u32 ctxsw_prog_main_image_zcull_o(void)
74{
75 return 0x0000001c;
76}
77static inline u32 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v(void)
78{
79 return 0x00000001;
80}
81static inline u32 ctxsw_prog_main_image_zcull_mode_separate_buffer_v(void)
82{
83 return 0x00000002;
84}
85static inline u32 ctxsw_prog_main_image_zcull_ptr_o(void)
86{
87 return 0x00000020;
88}
89static inline u32 ctxsw_prog_main_image_pm_o(void)
90{
91 return 0x00000028;
92}
93static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
94{
95 return 0x7 << 0;
96}
97static inline u32 ctxsw_prog_main_image_pm_mode_v(u32 r)
98{
99 return (r >> 0) & 0x7;
100}
101static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
102{
103 return 0x0;
104}
105static inline u32 ctxsw_prog_main_image_pm_smpc_mode_m(void)
106{
107 return 0x7 << 3;
108}
109static inline u32 ctxsw_prog_main_image_pm_smpc_mode_v(u32 r)
110{
111 return (r >> 3) & 0x7;
112}
113static inline u32 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(void)
114{
115 return 0x0;
116}
117static inline u32 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f(void)
118{
119 return 0x8;
120}
121static inline u32 ctxsw_prog_main_image_pm_ptr_o(void)
122{
123 return 0x0000002c;
124}
125static inline u32 ctxsw_prog_main_image_num_save_ops_o(void)
126{
127 return 0x000000f4;
128}
129static inline u32 ctxsw_prog_main_image_num_restore_ops_o(void)
130{
131 return 0x000000f8;
132}
133static inline u32 ctxsw_prog_main_image_magic_value_o(void)
134{
135 return 0x000000fc;
136}
137static inline u32 ctxsw_prog_main_image_magic_value_v_value_v(void)
138{
139 return 0x600dc0de;
140}
141static inline u32 ctxsw_prog_main_image_priv_access_map_config_o(void)
142{
143 return 0x000000a0;
144}
145static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(void)
146{
147 return 0x0;
148}
149static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_allow_none_f(void)
150{
151 return 0x1;
152}
153static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(void)
154{
155 return 0x2;
156}
157static inline u32 ctxsw_prog_main_image_priv_access_map_addr_lo_o(void)
158{
159 return 0x000000a4;
160}
161static inline u32 ctxsw_prog_main_image_priv_access_map_addr_hi_o(void)
162{
163 return 0x000000a8;
164}
165static inline u32 ctxsw_prog_main_image_misc_options_o(void)
166{
167 return 0x0000003c;
168}
169static inline u32 ctxsw_prog_main_image_misc_options_verif_features_m(void)
170{
171 return 0x1 << 3;
172}
173static inline u32 ctxsw_prog_main_image_misc_options_verif_features_disabled_f(void)
174{
175 return 0x0;
176}
177static inline u32 ctxsw_prog_main_image_misc_options_verif_features_enabled_f(void)
178{
179 return 0x8;
180}
181static inline u32 ctxsw_prog_local_priv_register_ctl_o(void)
182{
183 return 0x0000000c;
184}
185static inline u32 ctxsw_prog_local_priv_register_ctl_offset_v(u32 r)
186{
187 return (r >> 0) & 0xffff;
188}
189static inline u32 ctxsw_prog_local_image_ppc_info_o(void)
190{
191 return 0x000000f4;
192}
193static inline u32 ctxsw_prog_local_image_ppc_info_num_ppcs_v(u32 r)
194{
195 return (r >> 0) & 0xffff;
196}
197static inline u32 ctxsw_prog_local_image_ppc_info_ppc_mask_v(u32 r)
198{
199 return (r >> 16) & 0xffff;
200}
201static inline u32 ctxsw_prog_local_image_num_tpcs_o(void)
202{
203 return 0x000000f8;
204}
205static inline u32 ctxsw_prog_local_magic_value_o(void)
206{
207 return 0x000000fc;
208}
209static inline u32 ctxsw_prog_local_magic_value_v_value_v(void)
210{
211 return 0xad0becab;
212}
213static inline u32 ctxsw_prog_main_extended_buffer_ctl_o(void)
214{
215 return 0x000000ec;
216}
217static inline u32 ctxsw_prog_main_extended_buffer_ctl_offset_v(u32 r)
218{
219 return (r >> 0) & 0xffff;
220}
221static inline u32 ctxsw_prog_main_extended_buffer_ctl_size_v(u32 r)
222{
223 return (r >> 16) & 0xff;
224}
225static inline u32 ctxsw_prog_extended_buffer_segments_size_in_bytes_v(void)
226{
227 return 0x00000100;
228}
229static inline u32 ctxsw_prog_extended_marker_size_in_bytes_v(void)
230{
231 return 0x00000004;
232}
233static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(void)
234{
235 return 0x00000005;
236}
237static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(void)
238{
239 return 0x00000004;
240}
241static inline u32 ctxsw_prog_extended_num_smpc_quadrants_v(void)
242{
243 return 0x00000004;
244}
245#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h
new file mode 100644
index 00000000..b7edc29d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h
@@ -0,0 +1,213 @@
1/*
2 * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_fb_gk20a_h_
51#define _hw_fb_gk20a_h_
52
53static inline u32 fb_mmu_ctrl_r(void)
54{
55 return 0x00100c80;
56}
57static inline u32 fb_mmu_ctrl_vm_pg_size_f(u32 v)
58{
59 return (v & 0x1) << 0;
60}
61static inline u32 fb_mmu_ctrl_vm_pg_size_128kb_f(void)
62{
63 return 0x0;
64}
65static inline u32 fb_mmu_ctrl_pri_fifo_empty_v(u32 r)
66{
67 return (r >> 15) & 0x1;
68}
69static inline u32 fb_mmu_ctrl_pri_fifo_empty_false_f(void)
70{
71 return 0x0;
72}
73static inline u32 fb_mmu_ctrl_pri_fifo_space_v(u32 r)
74{
75 return (r >> 16) & 0xff;
76}
77static inline u32 fb_mmu_invalidate_pdb_r(void)
78{
79 return 0x00100cb8;
80}
81static inline u32 fb_mmu_invalidate_pdb_aperture_vid_mem_f(void)
82{
83 return 0x0;
84}
85static inline u32 fb_mmu_invalidate_pdb_addr_f(u32 v)
86{
87 return (v & 0xfffffff) << 4;
88}
89static inline u32 fb_mmu_invalidate_r(void)
90{
91 return 0x00100cbc;
92}
93static inline u32 fb_mmu_invalidate_all_va_true_f(void)
94{
95 return 0x1;
96}
97static inline u32 fb_mmu_invalidate_all_pdb_true_f(void)
98{
99 return 0x2;
100}
101static inline u32 fb_mmu_invalidate_trigger_s(void)
102{
103 return 1;
104}
105static inline u32 fb_mmu_invalidate_trigger_f(u32 v)
106{
107 return (v & 0x1) << 31;
108}
109static inline u32 fb_mmu_invalidate_trigger_m(void)
110{
111 return 0x1 << 31;
112}
113static inline u32 fb_mmu_invalidate_trigger_v(u32 r)
114{
115 return (r >> 31) & 0x1;
116}
117static inline u32 fb_mmu_invalidate_trigger_true_f(void)
118{
119 return 0x80000000;
120}
121static inline u32 fb_mmu_debug_wr_r(void)
122{
123 return 0x00100cc8;
124}
125static inline u32 fb_mmu_debug_wr_aperture_s(void)
126{
127 return 2;
128}
129static inline u32 fb_mmu_debug_wr_aperture_f(u32 v)
130{
131 return (v & 0x3) << 0;
132}
133static inline u32 fb_mmu_debug_wr_aperture_m(void)
134{
135 return 0x3 << 0;
136}
137static inline u32 fb_mmu_debug_wr_aperture_v(u32 r)
138{
139 return (r >> 0) & 0x3;
140}
141static inline u32 fb_mmu_debug_wr_aperture_vid_mem_f(void)
142{
143 return 0x0;
144}
145static inline u32 fb_mmu_debug_wr_vol_false_f(void)
146{
147 return 0x0;
148}
149static inline u32 fb_mmu_debug_wr_vol_true_v(void)
150{
151 return 0x00000001;
152}
153static inline u32 fb_mmu_debug_wr_vol_true_f(void)
154{
155 return 0x4;
156}
157static inline u32 fb_mmu_debug_wr_addr_v(u32 r)
158{
159 return (r >> 4) & 0xfffffff;
160}
161static inline u32 fb_mmu_debug_wr_addr_alignment_v(void)
162{
163 return 0x0000000c;
164}
165static inline u32 fb_mmu_debug_rd_r(void)
166{
167 return 0x00100ccc;
168}
169static inline u32 fb_mmu_debug_rd_aperture_vid_mem_f(void)
170{
171 return 0x0;
172}
173static inline u32 fb_mmu_debug_rd_vol_false_f(void)
174{
175 return 0x0;
176}
177static inline u32 fb_mmu_debug_rd_addr_v(u32 r)
178{
179 return (r >> 4) & 0xfffffff;
180}
181static inline u32 fb_mmu_debug_rd_addr_alignment_v(void)
182{
183 return 0x0000000c;
184}
185static inline u32 fb_mmu_debug_ctrl_r(void)
186{
187 return 0x00100cc4;
188}
189static inline u32 fb_mmu_debug_ctrl_debug_v(u32 r)
190{
191 return (r >> 16) & 0x1;
192}
193static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void)
194{
195 return 0x00000001;
196}
197static inline u32 fb_mmu_vpr_info_r(void)
198{
199 return 0x00100cd0;
200}
201static inline u32 fb_mmu_vpr_info_fetch_v(u32 r)
202{
203 return (r >> 2) & 0x1;
204}
205static inline u32 fb_mmu_vpr_info_fetch_false_v(void)
206{
207 return 0x00000000;
208}
209static inline u32 fb_mmu_vpr_info_fetch_true_v(void)
210{
211 return 0x00000001;
212}
213#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h
new file mode 100644
index 00000000..a39d3c51
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h
@@ -0,0 +1,565 @@
1/*
2 * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_fifo_gk20a_h_
51#define _hw_fifo_gk20a_h_
52
53static inline u32 fifo_bar1_base_r(void)
54{
55 return 0x00002254;
56}
57static inline u32 fifo_bar1_base_ptr_f(u32 v)
58{
59 return (v & 0xfffffff) << 0;
60}
61static inline u32 fifo_bar1_base_ptr_align_shift_v(void)
62{
63 return 0x0000000c;
64}
65static inline u32 fifo_bar1_base_valid_false_f(void)
66{
67 return 0x0;
68}
69static inline u32 fifo_bar1_base_valid_true_f(void)
70{
71 return 0x10000000;
72}
73static inline u32 fifo_runlist_base_r(void)
74{
75 return 0x00002270;
76}
77static inline u32 fifo_runlist_base_ptr_f(u32 v)
78{
79 return (v & 0xfffffff) << 0;
80}
81static inline u32 fifo_runlist_base_target_vid_mem_f(void)
82{
83 return 0x0;
84}
85static inline u32 fifo_runlist_r(void)
86{
87 return 0x00002274;
88}
89static inline u32 fifo_runlist_engine_f(u32 v)
90{
91 return (v & 0xf) << 20;
92}
93static inline u32 fifo_eng_runlist_base_r(u32 i)
94{
95 return 0x00002280 + i*8;
96}
97static inline u32 fifo_eng_runlist_base__size_1_v(void)
98{
99 return 0x00000001;
100}
101static inline u32 fifo_eng_runlist_r(u32 i)
102{
103 return 0x00002284 + i*8;
104}
105static inline u32 fifo_eng_runlist__size_1_v(void)
106{
107 return 0x00000001;
108}
109static inline u32 fifo_eng_runlist_length_f(u32 v)
110{
111 return (v & 0xffff) << 0;
112}
113static inline u32 fifo_eng_runlist_pending_true_f(void)
114{
115 return 0x100000;
116}
117static inline u32 fifo_eng_timeslice_r(u32 i)
118{
119 return 0x00002310 + i*4;
120}
121static inline u32 fifo_eng_timeslice_timeout_128_f(void)
122{
123 return 0x80;
124}
125static inline u32 fifo_eng_timeslice_timescale_3_f(void)
126{
127 return 0x3000;
128}
129static inline u32 fifo_eng_timeslice_enable_true_f(void)
130{
131 return 0x10000000;
132}
133static inline u32 fifo_pb_timeslice_r(u32 i)
134{
135 return 0x00002350 + i*4;
136}
137static inline u32 fifo_pb_timeslice_timeout_16_f(void)
138{
139 return 0x10;
140}
141static inline u32 fifo_pb_timeslice_timescale_0_f(void)
142{
143 return 0x0;
144}
145static inline u32 fifo_pb_timeslice_enable_true_f(void)
146{
147 return 0x10000000;
148}
149static inline u32 fifo_pbdma_map_r(u32 i)
150{
151 return 0x00002390 + i*4;
152}
153static inline u32 fifo_intr_0_r(void)
154{
155 return 0x00002100;
156}
157static inline u32 fifo_intr_0_bind_error_pending_f(void)
158{
159 return 0x1;
160}
161static inline u32 fifo_intr_0_bind_error_reset_f(void)
162{
163 return 0x1;
164}
165static inline u32 fifo_intr_0_pio_error_pending_f(void)
166{
167 return 0x10;
168}
169static inline u32 fifo_intr_0_pio_error_reset_f(void)
170{
171 return 0x10;
172}
173static inline u32 fifo_intr_0_sched_error_pending_f(void)
174{
175 return 0x100;
176}
177static inline u32 fifo_intr_0_sched_error_reset_f(void)
178{
179 return 0x100;
180}
181static inline u32 fifo_intr_0_chsw_error_pending_f(void)
182{
183 return 0x10000;
184}
185static inline u32 fifo_intr_0_chsw_error_reset_f(void)
186{
187 return 0x10000;
188}
189static inline u32 fifo_intr_0_fb_flush_timeout_pending_f(void)
190{
191 return 0x800000;
192}
193static inline u32 fifo_intr_0_fb_flush_timeout_reset_f(void)
194{
195 return 0x800000;
196}
197static inline u32 fifo_intr_0_lb_error_pending_f(void)
198{
199 return 0x1000000;
200}
201static inline u32 fifo_intr_0_lb_error_reset_f(void)
202{
203 return 0x1000000;
204}
205static inline u32 fifo_intr_0_dropped_mmu_fault_pending_f(void)
206{
207 return 0x8000000;
208}
209static inline u32 fifo_intr_0_dropped_mmu_fault_reset_f(void)
210{
211 return 0x8000000;
212}
213static inline u32 fifo_intr_0_mmu_fault_pending_f(void)
214{
215 return 0x10000000;
216}
217static inline u32 fifo_intr_0_pbdma_intr_pending_f(void)
218{
219 return 0x20000000;
220}
221static inline u32 fifo_intr_0_runlist_event_pending_f(void)
222{
223 return 0x40000000;
224}
225static inline u32 fifo_intr_0_channel_intr_pending_f(void)
226{
227 return 0x80000000;
228}
229static inline u32 fifo_intr_en_0_r(void)
230{
231 return 0x00002140;
232}
233static inline u32 fifo_intr_en_1_r(void)
234{
235 return 0x00002528;
236}
237static inline u32 fifo_intr_bind_error_r(void)
238{
239 return 0x0000252c;
240}
241static inline u32 fifo_intr_sched_error_r(void)
242{
243 return 0x0000254c;
244}
245static inline u32 fifo_intr_sched_error_code_f(u32 v)
246{
247 return (v & 0xff) << 0;
248}
249static inline u32 fifo_intr_sched_error_code_ctxsw_timeout_v(void)
250{
251 return 0x0000000a;
252}
253static inline u32 fifo_intr_chsw_error_r(void)
254{
255 return 0x0000256c;
256}
257static inline u32 fifo_intr_mmu_fault_id_r(void)
258{
259 return 0x0000259c;
260}
261static inline u32 fifo_intr_mmu_fault_eng_id_graphics_v(void)
262{
263 return 0x00000000;
264}
265static inline u32 fifo_intr_mmu_fault_eng_id_graphics_f(void)
266{
267 return 0x0;
268}
269static inline u32 fifo_intr_mmu_fault_inst_r(u32 i)
270{
271 return 0x00002800 + i*16;
272}
273static inline u32 fifo_intr_mmu_fault_inst_ptr_v(u32 r)
274{
275 return (r >> 0) & 0xfffffff;
276}
277static inline u32 fifo_intr_mmu_fault_inst_ptr_align_shift_v(void)
278{
279 return 0x0000000c;
280}
281static inline u32 fifo_intr_mmu_fault_lo_r(u32 i)
282{
283 return 0x00002804 + i*16;
284}
285static inline u32 fifo_intr_mmu_fault_hi_r(u32 i)
286{
287 return 0x00002808 + i*16;
288}
289static inline u32 fifo_intr_mmu_fault_info_r(u32 i)
290{
291 return 0x0000280c + i*16;
292}
293static inline u32 fifo_intr_mmu_fault_info_type_v(u32 r)
294{
295 return (r >> 0) & 0xf;
296}
297static inline u32 fifo_intr_mmu_fault_info_engine_subid_v(u32 r)
298{
299 return (r >> 6) & 0x1;
300}
301static inline u32 fifo_intr_mmu_fault_info_engine_subid_gpc_v(void)
302{
303 return 0x00000000;
304}
305static inline u32 fifo_intr_mmu_fault_info_engine_subid_hub_v(void)
306{
307 return 0x00000001;
308}
309static inline u32 fifo_intr_mmu_fault_info_client_v(u32 r)
310{
311 return (r >> 8) & 0x1f;
312}
313static inline u32 fifo_intr_pbdma_id_r(void)
314{
315 return 0x000025a0;
316}
317static inline u32 fifo_intr_pbdma_id_status_f(u32 v, u32 i)
318{
319 return (v & 0x1) << (0 + i*1);
320}
321static inline u32 fifo_intr_pbdma_id_status__size_1_v(void)
322{
323 return 0x00000001;
324}
325static inline u32 fifo_intr_runlist_r(void)
326{
327 return 0x00002a00;
328}
329static inline u32 fifo_fb_timeout_r(void)
330{
331 return 0x00002a04;
332}
333static inline u32 fifo_fb_timeout_period_m(void)
334{
335 return 0x3fffffff << 0;
336}
337static inline u32 fifo_fb_timeout_period_max_f(void)
338{
339 return 0x3fffffff;
340}
341static inline u32 fifo_pb_timeout_r(void)
342{
343 return 0x00002a08;
344}
345static inline u32 fifo_pb_timeout_detection_enabled_f(void)
346{
347 return 0x80000000;
348}
349static inline u32 fifo_eng_timeout_r(void)
350{
351 return 0x00002a0c;
352}
353static inline u32 fifo_eng_timeout_period_m(void)
354{
355 return 0x7fffffff << 0;
356}
357static inline u32 fifo_eng_timeout_period_max_f(void)
358{
359 return 0x7fffffff;
360}
361static inline u32 fifo_eng_timeout_detection_m(void)
362{
363 return 0x1 << 31;
364}
365static inline u32 fifo_eng_timeout_detection_enabled_f(void)
366{
367 return 0x80000000;
368}
369static inline u32 fifo_eng_timeout_detection_disabled_f(void)
370{
371 return 0x0;
372}
373static inline u32 fifo_error_sched_disable_r(void)
374{
375 return 0x0000262c;
376}
377static inline u32 fifo_sched_disable_r(void)
378{
379 return 0x00002630;
380}
381static inline u32 fifo_sched_disable_runlist_f(u32 v, u32 i)
382{
383 return (v & 0x1) << (0 + i*1);
384}
385static inline u32 fifo_sched_disable_runlist_m(u32 i)
386{
387 return 0x1 << (0 + i*1);
388}
389static inline u32 fifo_sched_disable_true_v(void)
390{
391 return 0x00000001;
392}
393static inline u32 fifo_preempt_r(void)
394{
395 return 0x00002634;
396}
397static inline u32 fifo_preempt_pending_true_f(void)
398{
399 return 0x100000;
400}
401static inline u32 fifo_preempt_type_channel_f(void)
402{
403 return 0x0;
404}
405static inline u32 fifo_preempt_chid_f(u32 v)
406{
407 return (v & 0xfff) << 0;
408}
409static inline u32 fifo_trigger_mmu_fault_r(u32 i)
410{
411 return 0x00002a30 + i*4;
412}
413static inline u32 fifo_trigger_mmu_fault_id_f(u32 v)
414{
415 return (v & 0x1f) << 0;
416}
417static inline u32 fifo_trigger_mmu_fault_enable_f(u32 v)
418{
419 return (v & 0x1) << 8;
420}
421static inline u32 fifo_engine_status_r(u32 i)
422{
423 return 0x00002640 + i*8;
424}
425static inline u32 fifo_engine_status__size_1_v(void)
426{
427 return 0x00000002;
428}
429static inline u32 fifo_engine_status_id_v(u32 r)
430{
431 return (r >> 0) & 0xfff;
432}
433static inline u32 fifo_engine_status_id_type_v(u32 r)
434{
435 return (r >> 12) & 0x1;
436}
437static inline u32 fifo_engine_status_id_type_chid_v(void)
438{
439 return 0x00000000;
440}
441static inline u32 fifo_engine_status_ctx_status_v(u32 r)
442{
443 return (r >> 13) & 0x7;
444}
445static inline u32 fifo_engine_status_ctx_status_valid_v(void)
446{
447 return 0x00000001;
448}
449static inline u32 fifo_engine_status_ctx_status_ctxsw_load_v(void)
450{
451 return 0x00000005;
452}
453static inline u32 fifo_engine_status_ctx_status_ctxsw_save_v(void)
454{
455 return 0x00000006;
456}
457static inline u32 fifo_engine_status_ctx_status_ctxsw_switch_v(void)
458{
459 return 0x00000007;
460}
461static inline u32 fifo_engine_status_next_id_v(u32 r)
462{
463 return (r >> 16) & 0xfff;
464}
465static inline u32 fifo_engine_status_next_id_type_v(u32 r)
466{
467 return (r >> 28) & 0x1;
468}
469static inline u32 fifo_engine_status_next_id_type_chid_v(void)
470{
471 return 0x00000000;
472}
473static inline u32 fifo_engine_status_faulted_v(u32 r)
474{
475 return (r >> 30) & 0x1;
476}
477static inline u32 fifo_engine_status_faulted_true_v(void)
478{
479 return 0x00000001;
480}
481static inline u32 fifo_engine_status_engine_v(u32 r)
482{
483 return (r >> 31) & 0x1;
484}
485static inline u32 fifo_engine_status_engine_idle_v(void)
486{
487 return 0x00000000;
488}
489static inline u32 fifo_engine_status_engine_busy_v(void)
490{
491 return 0x00000001;
492}
493static inline u32 fifo_engine_status_ctxsw_v(u32 r)
494{
495 return (r >> 15) & 0x1;
496}
497static inline u32 fifo_engine_status_ctxsw_in_progress_v(void)
498{
499 return 0x00000001;
500}
501static inline u32 fifo_engine_status_ctxsw_in_progress_f(void)
502{
503 return 0x8000;
504}
505static inline u32 fifo_pbdma_status_r(u32 i)
506{
507 return 0x00003080 + i*4;
508}
509static inline u32 fifo_pbdma_status__size_1_v(void)
510{
511 return 0x00000001;
512}
513static inline u32 fifo_pbdma_status_id_v(u32 r)
514{
515 return (r >> 0) & 0xfff;
516}
517static inline u32 fifo_pbdma_status_id_type_v(u32 r)
518{
519 return (r >> 12) & 0x1;
520}
521static inline u32 fifo_pbdma_status_id_type_chid_v(void)
522{
523 return 0x00000000;
524}
525static inline u32 fifo_pbdma_status_chan_status_v(u32 r)
526{
527 return (r >> 13) & 0x7;
528}
529static inline u32 fifo_pbdma_status_chan_status_valid_v(void)
530{
531 return 0x00000001;
532}
533static inline u32 fifo_pbdma_status_chan_status_chsw_load_v(void)
534{
535 return 0x00000005;
536}
537static inline u32 fifo_pbdma_status_chan_status_chsw_save_v(void)
538{
539 return 0x00000006;
540}
541static inline u32 fifo_pbdma_status_chan_status_chsw_switch_v(void)
542{
543 return 0x00000007;
544}
545static inline u32 fifo_pbdma_status_next_id_v(u32 r)
546{
547 return (r >> 16) & 0xfff;
548}
549static inline u32 fifo_pbdma_status_next_id_type_v(u32 r)
550{
551 return (r >> 28) & 0x1;
552}
553static inline u32 fifo_pbdma_status_next_id_type_chid_v(void)
554{
555 return 0x00000000;
556}
557static inline u32 fifo_pbdma_status_chsw_v(u32 r)
558{
559 return (r >> 15) & 0x1;
560}
561static inline u32 fifo_pbdma_status_chsw_in_progress_v(void)
562{
563 return 0x00000001;
564}
565#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h
new file mode 100644
index 00000000..0aeb11f9
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h
@@ -0,0 +1,141 @@
1/*
2 * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_flush_gk20a_h_
51#define _hw_flush_gk20a_h_
52
53static inline u32 flush_l2_system_invalidate_r(void)
54{
55 return 0x00070004;
56}
57static inline u32 flush_l2_system_invalidate_pending_v(u32 r)
58{
59 return (r >> 0) & 0x1;
60}
61static inline u32 flush_l2_system_invalidate_pending_busy_v(void)
62{
63 return 0x00000001;
64}
65static inline u32 flush_l2_system_invalidate_pending_busy_f(void)
66{
67 return 0x1;
68}
69static inline u32 flush_l2_system_invalidate_outstanding_v(u32 r)
70{
71 return (r >> 1) & 0x1;
72}
73static inline u32 flush_l2_system_invalidate_outstanding_true_v(void)
74{
75 return 0x00000001;
76}
77static inline u32 flush_l2_flush_dirty_r(void)
78{
79 return 0x00070010;
80}
81static inline u32 flush_l2_flush_dirty_pending_v(u32 r)
82{
83 return (r >> 0) & 0x1;
84}
85static inline u32 flush_l2_flush_dirty_pending_empty_v(void)
86{
87 return 0x00000000;
88}
89static inline u32 flush_l2_flush_dirty_pending_empty_f(void)
90{
91 return 0x0;
92}
93static inline u32 flush_l2_flush_dirty_pending_busy_v(void)
94{
95 return 0x00000001;
96}
97static inline u32 flush_l2_flush_dirty_pending_busy_f(void)
98{
99 return 0x1;
100}
101static inline u32 flush_l2_flush_dirty_outstanding_v(u32 r)
102{
103 return (r >> 1) & 0x1;
104}
105static inline u32 flush_l2_flush_dirty_outstanding_false_v(void)
106{
107 return 0x00000000;
108}
109static inline u32 flush_l2_flush_dirty_outstanding_false_f(void)
110{
111 return 0x0;
112}
113static inline u32 flush_l2_flush_dirty_outstanding_true_v(void)
114{
115 return 0x00000001;
116}
117static inline u32 flush_fb_flush_r(void)
118{
119 return 0x00070000;
120}
121static inline u32 flush_fb_flush_pending_v(u32 r)
122{
123 return (r >> 0) & 0x1;
124}
125static inline u32 flush_fb_flush_pending_busy_v(void)
126{
127 return 0x00000001;
128}
129static inline u32 flush_fb_flush_pending_busy_f(void)
130{
131 return 0x1;
132}
133static inline u32 flush_fb_flush_outstanding_v(u32 r)
134{
135 return (r >> 1) & 0x1;
136}
137static inline u32 flush_fb_flush_outstanding_true_v(void)
138{
139 return 0x00000001;
140}
141#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h
new file mode 100644
index 00000000..e0118946
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h
@@ -0,0 +1,1141 @@
1/*
2 * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_gmmu_gk20a_h_
51#define _hw_gmmu_gk20a_h_
52
53static inline u32 gmmu_pde_aperture_big_w(void)
54{
55 return 0;
56}
57static inline u32 gmmu_pde_aperture_big_invalid_f(void)
58{
59 return 0x0;
60}
61static inline u32 gmmu_pde_aperture_big_video_memory_f(void)
62{
63 return 0x1;
64}
65static inline u32 gmmu_pde_size_w(void)
66{
67 return 0;
68}
69static inline u32 gmmu_pde_size_full_f(void)
70{
71 return 0x0;
72}
73static inline u32 gmmu_pde_address_big_sys_f(u32 v)
74{
75 return (v & 0xfffffff) << 4;
76}
77static inline u32 gmmu_pde_address_big_sys_w(void)
78{
79 return 0;
80}
81static inline u32 gmmu_pde_aperture_small_w(void)
82{
83 return 1;
84}
85static inline u32 gmmu_pde_aperture_small_invalid_f(void)
86{
87 return 0x0;
88}
89static inline u32 gmmu_pde_aperture_small_video_memory_f(void)
90{
91 return 0x1;
92}
93static inline u32 gmmu_pde_vol_small_w(void)
94{
95 return 1;
96}
97static inline u32 gmmu_pde_vol_small_true_f(void)
98{
99 return 0x4;
100}
101static inline u32 gmmu_pde_vol_small_false_f(void)
102{
103 return 0x0;
104}
105static inline u32 gmmu_pde_vol_big_w(void)
106{
107 return 1;
108}
109static inline u32 gmmu_pde_vol_big_true_f(void)
110{
111 return 0x8;
112}
113static inline u32 gmmu_pde_vol_big_false_f(void)
114{
115 return 0x0;
116}
117static inline u32 gmmu_pde_address_small_sys_f(u32 v)
118{
119 return (v & 0xfffffff) << 4;
120}
121static inline u32 gmmu_pde_address_small_sys_w(void)
122{
123 return 1;
124}
125static inline u32 gmmu_pde_address_shift_v(void)
126{
127 return 0x0000000c;
128}
129static inline u32 gmmu_pde__size_v(void)
130{
131 return 0x00000008;
132}
133static inline u32 gmmu_pte__size_v(void)
134{
135 return 0x00000008;
136}
137static inline u32 gmmu_pte_valid_w(void)
138{
139 return 0;
140}
141static inline u32 gmmu_pte_valid_true_f(void)
142{
143 return 0x1;
144}
145static inline u32 gmmu_pte_address_sys_f(u32 v)
146{
147 return (v & 0xfffffff) << 4;
148}
149static inline u32 gmmu_pte_address_sys_w(void)
150{
151 return 0;
152}
153static inline u32 gmmu_pte_vol_w(void)
154{
155 return 1;
156}
157static inline u32 gmmu_pte_vol_true_f(void)
158{
159 return 0x1;
160}
161static inline u32 gmmu_pte_vol_false_f(void)
162{
163 return 0x0;
164}
165static inline u32 gmmu_pte_aperture_w(void)
166{
167 return 1;
168}
169static inline u32 gmmu_pte_aperture_video_memory_f(void)
170{
171 return 0x0;
172}
173static inline u32 gmmu_pte_read_only_w(void)
174{
175 return 0;
176}
177static inline u32 gmmu_pte_read_only_true_f(void)
178{
179 return 0x4;
180}
181static inline u32 gmmu_pte_write_disable_w(void)
182{
183 return 1;
184}
185static inline u32 gmmu_pte_write_disable_true_f(void)
186{
187 return 0x80000000;
188}
189static inline u32 gmmu_pte_read_disable_w(void)
190{
191 return 1;
192}
193static inline u32 gmmu_pte_read_disable_true_f(void)
194{
195 return 0x40000000;
196}
197static inline u32 gmmu_pte_comptagline_f(u32 v)
198{
199 return (v & 0x1ffff) << 12;
200}
201static inline u32 gmmu_pte_comptagline_w(void)
202{
203 return 1;
204}
205static inline u32 gmmu_pte_address_shift_v(void)
206{
207 return 0x0000000c;
208}
209static inline u32 gmmu_pte_kind_f(u32 v)
210{
211 return (v & 0xff) << 4;
212}
213static inline u32 gmmu_pte_kind_w(void)
214{
215 return 1;
216}
217static inline u32 gmmu_pte_kind_invalid_v(void)
218{
219 return 0x000000ff;
220}
221static inline u32 gmmu_pte_kind_pitch_v(void)
222{
223 return 0x00000000;
224}
225static inline u32 gmmu_pte_kind_z16_v(void)
226{
227 return 0x00000001;
228}
229static inline u32 gmmu_pte_kind_z16_2c_v(void)
230{
231 return 0x00000002;
232}
233static inline u32 gmmu_pte_kind_z16_ms2_2c_v(void)
234{
235 return 0x00000003;
236}
237static inline u32 gmmu_pte_kind_z16_ms4_2c_v(void)
238{
239 return 0x00000004;
240}
241static inline u32 gmmu_pte_kind_z16_ms8_2c_v(void)
242{
243 return 0x00000005;
244}
245static inline u32 gmmu_pte_kind_z16_ms16_2c_v(void)
246{
247 return 0x00000006;
248}
249static inline u32 gmmu_pte_kind_z16_2z_v(void)
250{
251 return 0x00000007;
252}
253static inline u32 gmmu_pte_kind_z16_ms2_2z_v(void)
254{
255 return 0x00000008;
256}
257static inline u32 gmmu_pte_kind_z16_ms4_2z_v(void)
258{
259 return 0x00000009;
260}
261static inline u32 gmmu_pte_kind_z16_ms8_2z_v(void)
262{
263 return 0x0000000a;
264}
265static inline u32 gmmu_pte_kind_z16_ms16_2z_v(void)
266{
267 return 0x0000000b;
268}
269static inline u32 gmmu_pte_kind_z16_4cz_v(void)
270{
271 return 0x0000000c;
272}
273static inline u32 gmmu_pte_kind_z16_ms2_4cz_v(void)
274{
275 return 0x0000000d;
276}
277static inline u32 gmmu_pte_kind_z16_ms4_4cz_v(void)
278{
279 return 0x0000000e;
280}
281static inline u32 gmmu_pte_kind_z16_ms8_4cz_v(void)
282{
283 return 0x0000000f;
284}
285static inline u32 gmmu_pte_kind_z16_ms16_4cz_v(void)
286{
287 return 0x00000010;
288}
289static inline u32 gmmu_pte_kind_s8z24_v(void)
290{
291 return 0x00000011;
292}
293static inline u32 gmmu_pte_kind_s8z24_1z_v(void)
294{
295 return 0x00000012;
296}
297static inline u32 gmmu_pte_kind_s8z24_ms2_1z_v(void)
298{
299 return 0x00000013;
300}
301static inline u32 gmmu_pte_kind_s8z24_ms4_1z_v(void)
302{
303 return 0x00000014;
304}
305static inline u32 gmmu_pte_kind_s8z24_ms8_1z_v(void)
306{
307 return 0x00000015;
308}
309static inline u32 gmmu_pte_kind_s8z24_ms16_1z_v(void)
310{
311 return 0x00000016;
312}
313static inline u32 gmmu_pte_kind_s8z24_2cz_v(void)
314{
315 return 0x00000017;
316}
317static inline u32 gmmu_pte_kind_s8z24_ms2_2cz_v(void)
318{
319 return 0x00000018;
320}
321static inline u32 gmmu_pte_kind_s8z24_ms4_2cz_v(void)
322{
323 return 0x00000019;
324}
325static inline u32 gmmu_pte_kind_s8z24_ms8_2cz_v(void)
326{
327 return 0x0000001a;
328}
329static inline u32 gmmu_pte_kind_s8z24_ms16_2cz_v(void)
330{
331 return 0x0000001b;
332}
333static inline u32 gmmu_pte_kind_s8z24_2cs_v(void)
334{
335 return 0x0000001c;
336}
337static inline u32 gmmu_pte_kind_s8z24_ms2_2cs_v(void)
338{
339 return 0x0000001d;
340}
341static inline u32 gmmu_pte_kind_s8z24_ms4_2cs_v(void)
342{
343 return 0x0000001e;
344}
345static inline u32 gmmu_pte_kind_s8z24_ms8_2cs_v(void)
346{
347 return 0x0000001f;
348}
349static inline u32 gmmu_pte_kind_s8z24_ms16_2cs_v(void)
350{
351 return 0x00000020;
352}
353static inline u32 gmmu_pte_kind_s8z24_4cszv_v(void)
354{
355 return 0x00000021;
356}
357static inline u32 gmmu_pte_kind_s8z24_ms2_4cszv_v(void)
358{
359 return 0x00000022;
360}
361static inline u32 gmmu_pte_kind_s8z24_ms4_4cszv_v(void)
362{
363 return 0x00000023;
364}
365static inline u32 gmmu_pte_kind_s8z24_ms8_4cszv_v(void)
366{
367 return 0x00000024;
368}
369static inline u32 gmmu_pte_kind_s8z24_ms16_4cszv_v(void)
370{
371 return 0x00000025;
372}
373static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_v(void)
374{
375 return 0x00000026;
376}
377static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_v(void)
378{
379 return 0x00000027;
380}
381static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_v(void)
382{
383 return 0x00000028;
384}
385static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_v(void)
386{
387 return 0x00000029;
388}
389static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_1zv_v(void)
390{
391 return 0x0000002e;
392}
393static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_1zv_v(void)
394{
395 return 0x0000002f;
396}
397static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_1zv_v(void)
398{
399 return 0x00000030;
400}
401static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_1zv_v(void)
402{
403 return 0x00000031;
404}
405static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2cs_v(void)
406{
407 return 0x00000032;
408}
409static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2cs_v(void)
410{
411 return 0x00000033;
412}
413static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2cs_v(void)
414{
415 return 0x00000034;
416}
417static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2cs_v(void)
418{
419 return 0x00000035;
420}
421static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2czv_v(void)
422{
423 return 0x0000003a;
424}
425static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2czv_v(void)
426{
427 return 0x0000003b;
428}
429static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2czv_v(void)
430{
431 return 0x0000003c;
432}
433static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2czv_v(void)
434{
435 return 0x0000003d;
436}
437static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2zv_v(void)
438{
439 return 0x0000003e;
440}
441static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2zv_v(void)
442{
443 return 0x0000003f;
444}
445static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2zv_v(void)
446{
447 return 0x00000040;
448}
449static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2zv_v(void)
450{
451 return 0x00000041;
452}
453static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_4cszv_v(void)
454{
455 return 0x00000042;
456}
457static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_4cszv_v(void)
458{
459 return 0x00000043;
460}
461static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_4cszv_v(void)
462{
463 return 0x00000044;
464}
465static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_4cszv_v(void)
466{
467 return 0x00000045;
468}
469static inline u32 gmmu_pte_kind_z24s8_v(void)
470{
471 return 0x00000046;
472}
473static inline u32 gmmu_pte_kind_z24s8_1z_v(void)
474{
475 return 0x00000047;
476}
477static inline u32 gmmu_pte_kind_z24s8_ms2_1z_v(void)
478{
479 return 0x00000048;
480}
481static inline u32 gmmu_pte_kind_z24s8_ms4_1z_v(void)
482{
483 return 0x00000049;
484}
485static inline u32 gmmu_pte_kind_z24s8_ms8_1z_v(void)
486{
487 return 0x0000004a;
488}
489static inline u32 gmmu_pte_kind_z24s8_ms16_1z_v(void)
490{
491 return 0x0000004b;
492}
493static inline u32 gmmu_pte_kind_z24s8_2cs_v(void)
494{
495 return 0x0000004c;
496}
497static inline u32 gmmu_pte_kind_z24s8_ms2_2cs_v(void)
498{
499 return 0x0000004d;
500}
501static inline u32 gmmu_pte_kind_z24s8_ms4_2cs_v(void)
502{
503 return 0x0000004e;
504}
505static inline u32 gmmu_pte_kind_z24s8_ms8_2cs_v(void)
506{
507 return 0x0000004f;
508}
509static inline u32 gmmu_pte_kind_z24s8_ms16_2cs_v(void)
510{
511 return 0x00000050;
512}
513static inline u32 gmmu_pte_kind_z24s8_2cz_v(void)
514{
515 return 0x00000051;
516}
517static inline u32 gmmu_pte_kind_z24s8_ms2_2cz_v(void)
518{
519 return 0x00000052;
520}
521static inline u32 gmmu_pte_kind_z24s8_ms4_2cz_v(void)
522{
523 return 0x00000053;
524}
525static inline u32 gmmu_pte_kind_z24s8_ms8_2cz_v(void)
526{
527 return 0x00000054;
528}
529static inline u32 gmmu_pte_kind_z24s8_ms16_2cz_v(void)
530{
531 return 0x00000055;
532}
533static inline u32 gmmu_pte_kind_z24s8_4cszv_v(void)
534{
535 return 0x00000056;
536}
537static inline u32 gmmu_pte_kind_z24s8_ms2_4cszv_v(void)
538{
539 return 0x00000057;
540}
541static inline u32 gmmu_pte_kind_z24s8_ms4_4cszv_v(void)
542{
543 return 0x00000058;
544}
545static inline u32 gmmu_pte_kind_z24s8_ms8_4cszv_v(void)
546{
547 return 0x00000059;
548}
549static inline u32 gmmu_pte_kind_z24s8_ms16_4cszv_v(void)
550{
551 return 0x0000005a;
552}
553static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_v(void)
554{
555 return 0x0000005b;
556}
557static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_v(void)
558{
559 return 0x0000005c;
560}
561static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_v(void)
562{
563 return 0x0000005d;
564}
565static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_v(void)
566{
567 return 0x0000005e;
568}
569static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_1zv_v(void)
570{
571 return 0x00000063;
572}
573static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_1zv_v(void)
574{
575 return 0x00000064;
576}
577static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_1zv_v(void)
578{
579 return 0x00000065;
580}
581static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_1zv_v(void)
582{
583 return 0x00000066;
584}
585static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2cs_v(void)
586{
587 return 0x00000067;
588}
589static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2cs_v(void)
590{
591 return 0x00000068;
592}
593static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2cs_v(void)
594{
595 return 0x00000069;
596}
597static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2cs_v(void)
598{
599 return 0x0000006a;
600}
601static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2czv_v(void)
602{
603 return 0x0000006f;
604}
605static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2czv_v(void)
606{
607 return 0x00000070;
608}
609static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2czv_v(void)
610{
611 return 0x00000071;
612}
613static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2czv_v(void)
614{
615 return 0x00000072;
616}
617static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2zv_v(void)
618{
619 return 0x00000073;
620}
621static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2zv_v(void)
622{
623 return 0x00000074;
624}
625static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2zv_v(void)
626{
627 return 0x00000075;
628}
629static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2zv_v(void)
630{
631 return 0x00000076;
632}
633static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_4cszv_v(void)
634{
635 return 0x00000077;
636}
637static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_4cszv_v(void)
638{
639 return 0x00000078;
640}
641static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_4cszv_v(void)
642{
643 return 0x00000079;
644}
645static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_4cszv_v(void)
646{
647 return 0x0000007a;
648}
649static inline u32 gmmu_pte_kind_zf32_v(void)
650{
651 return 0x0000007b;
652}
653static inline u32 gmmu_pte_kind_zf32_1z_v(void)
654{
655 return 0x0000007c;
656}
657static inline u32 gmmu_pte_kind_zf32_ms2_1z_v(void)
658{
659 return 0x0000007d;
660}
661static inline u32 gmmu_pte_kind_zf32_ms4_1z_v(void)
662{
663 return 0x0000007e;
664}
665static inline u32 gmmu_pte_kind_zf32_ms8_1z_v(void)
666{
667 return 0x0000007f;
668}
669static inline u32 gmmu_pte_kind_zf32_ms16_1z_v(void)
670{
671 return 0x00000080;
672}
673static inline u32 gmmu_pte_kind_zf32_2cs_v(void)
674{
675 return 0x00000081;
676}
677static inline u32 gmmu_pte_kind_zf32_ms2_2cs_v(void)
678{
679 return 0x00000082;
680}
681static inline u32 gmmu_pte_kind_zf32_ms4_2cs_v(void)
682{
683 return 0x00000083;
684}
685static inline u32 gmmu_pte_kind_zf32_ms8_2cs_v(void)
686{
687 return 0x00000084;
688}
689static inline u32 gmmu_pte_kind_zf32_ms16_2cs_v(void)
690{
691 return 0x00000085;
692}
693static inline u32 gmmu_pte_kind_zf32_2cz_v(void)
694{
695 return 0x00000086;
696}
697static inline u32 gmmu_pte_kind_zf32_ms2_2cz_v(void)
698{
699 return 0x00000087;
700}
701static inline u32 gmmu_pte_kind_zf32_ms4_2cz_v(void)
702{
703 return 0x00000088;
704}
705static inline u32 gmmu_pte_kind_zf32_ms8_2cz_v(void)
706{
707 return 0x00000089;
708}
709static inline u32 gmmu_pte_kind_zf32_ms16_2cz_v(void)
710{
711 return 0x0000008a;
712}
713static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v(void)
714{
715 return 0x0000008b;
716}
717static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_v(void)
718{
719 return 0x0000008c;
720}
721static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_v(void)
722{
723 return 0x0000008d;
724}
725static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v(void)
726{
727 return 0x0000008e;
728}
729static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1cs_v(void)
730{
731 return 0x0000008f;
732}
733static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1cs_v(void)
734{
735 return 0x00000090;
736}
737static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1cs_v(void)
738{
739 return 0x00000091;
740}
741static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v(void)
742{
743 return 0x00000092;
744}
745static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1zv_v(void)
746{
747 return 0x00000097;
748}
749static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1zv_v(void)
750{
751 return 0x00000098;
752}
753static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1zv_v(void)
754{
755 return 0x00000099;
756}
757static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1zv_v(void)
758{
759 return 0x0000009a;
760}
761static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1czv_v(void)
762{
763 return 0x0000009b;
764}
765static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1czv_v(void)
766{
767 return 0x0000009c;
768}
769static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1czv_v(void)
770{
771 return 0x0000009d;
772}
773static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1czv_v(void)
774{
775 return 0x0000009e;
776}
777static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cs_v(void)
778{
779 return 0x0000009f;
780}
781static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cs_v(void)
782{
783 return 0x000000a0;
784}
785static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cs_v(void)
786{
787 return 0x000000a1;
788}
789static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cs_v(void)
790{
791 return 0x000000a2;
792}
793static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cszv_v(void)
794{
795 return 0x000000a3;
796}
797static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cszv_v(void)
798{
799 return 0x000000a4;
800}
801static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cszv_v(void)
802{
803 return 0x000000a5;
804}
805static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v(void)
806{
807 return 0x000000a6;
808}
809static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_v(void)
810{
811 return 0x000000a7;
812}
813static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_v(void)
814{
815 return 0x000000a8;
816}
817static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_v(void)
818{
819 return 0x000000a9;
820}
821static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v(void)
822{
823 return 0x000000aa;
824}
825static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1cs_v(void)
826{
827 return 0x000000ab;
828}
829static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1cs_v(void)
830{
831 return 0x000000ac;
832}
833static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1cs_v(void)
834{
835 return 0x000000ad;
836}
837static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v(void)
838{
839 return 0x000000ae;
840}
841static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1zv_v(void)
842{
843 return 0x000000b3;
844}
845static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1zv_v(void)
846{
847 return 0x000000b4;
848}
849static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1zv_v(void)
850{
851 return 0x000000b5;
852}
853static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1zv_v(void)
854{
855 return 0x000000b6;
856}
857static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1czv_v(void)
858{
859 return 0x000000b7;
860}
861static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1czv_v(void)
862{
863 return 0x000000b8;
864}
865static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1czv_v(void)
866{
867 return 0x000000b9;
868}
869static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1czv_v(void)
870{
871 return 0x000000ba;
872}
873static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cs_v(void)
874{
875 return 0x000000bb;
876}
877static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cs_v(void)
878{
879 return 0x000000bc;
880}
881static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cs_v(void)
882{
883 return 0x000000bd;
884}
885static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cs_v(void)
886{
887 return 0x000000be;
888}
889static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cszv_v(void)
890{
891 return 0x000000bf;
892}
893static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cszv_v(void)
894{
895 return 0x000000c0;
896}
897static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cszv_v(void)
898{
899 return 0x000000c1;
900}
901static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v(void)
902{
903 return 0x000000c2;
904}
905static inline u32 gmmu_pte_kind_zf32_x24s8_v(void)
906{
907 return 0x000000c3;
908}
909static inline u32 gmmu_pte_kind_zf32_x24s8_1cs_v(void)
910{
911 return 0x000000c4;
912}
913static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_1cs_v(void)
914{
915 return 0x000000c5;
916}
917static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_1cs_v(void)
918{
919 return 0x000000c6;
920}
921static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_1cs_v(void)
922{
923 return 0x000000c7;
924}
925static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_1cs_v(void)
926{
927 return 0x000000c8;
928}
929static inline u32 gmmu_pte_kind_zf32_x24s8_2cszv_v(void)
930{
931 return 0x000000ce;
932}
933static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_2cszv_v(void)
934{
935 return 0x000000cf;
936}
937static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_2cszv_v(void)
938{
939 return 0x000000d0;
940}
941static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_2cszv_v(void)
942{
943 return 0x000000d1;
944}
945static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_2cszv_v(void)
946{
947 return 0x000000d2;
948}
949static inline u32 gmmu_pte_kind_zf32_x24s8_2cs_v(void)
950{
951 return 0x000000d3;
952}
953static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_2cs_v(void)
954{
955 return 0x000000d4;
956}
957static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_2cs_v(void)
958{
959 return 0x000000d5;
960}
961static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_2cs_v(void)
962{
963 return 0x000000d6;
964}
965static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_2cs_v(void)
966{
967 return 0x000000d7;
968}
969static inline u32 gmmu_pte_kind_generic_16bx2_v(void)
970{
971 return 0x000000fe;
972}
973static inline u32 gmmu_pte_kind_c32_2c_v(void)
974{
975 return 0x000000d8;
976}
977static inline u32 gmmu_pte_kind_c32_2cbr_v(void)
978{
979 return 0x000000d9;
980}
981static inline u32 gmmu_pte_kind_c32_2cba_v(void)
982{
983 return 0x000000da;
984}
985static inline u32 gmmu_pte_kind_c32_2cra_v(void)
986{
987 return 0x000000db;
988}
989static inline u32 gmmu_pte_kind_c32_2bra_v(void)
990{
991 return 0x000000dc;
992}
993static inline u32 gmmu_pte_kind_c32_ms2_2c_v(void)
994{
995 return 0x000000dd;
996}
997static inline u32 gmmu_pte_kind_c32_ms2_2cbr_v(void)
998{
999 return 0x000000de;
1000}
1001static inline u32 gmmu_pte_kind_c32_ms2_2cra_v(void)
1002{
1003 return 0x000000cc;
1004}
1005static inline u32 gmmu_pte_kind_c32_ms4_2c_v(void)
1006{
1007 return 0x000000df;
1008}
1009static inline u32 gmmu_pte_kind_c32_ms4_2cbr_v(void)
1010{
1011 return 0x000000e0;
1012}
1013static inline u32 gmmu_pte_kind_c32_ms4_2cba_v(void)
1014{
1015 return 0x000000e1;
1016}
1017static inline u32 gmmu_pte_kind_c32_ms4_2cra_v(void)
1018{
1019 return 0x000000e2;
1020}
1021static inline u32 gmmu_pte_kind_c32_ms4_2bra_v(void)
1022{
1023 return 0x000000e3;
1024}
1025static inline u32 gmmu_pte_kind_c32_ms8_ms16_2c_v(void)
1026{
1027 return 0x000000e4;
1028}
1029static inline u32 gmmu_pte_kind_c32_ms8_ms16_2cra_v(void)
1030{
1031 return 0x000000e5;
1032}
1033static inline u32 gmmu_pte_kind_c64_2c_v(void)
1034{
1035 return 0x000000e6;
1036}
1037static inline u32 gmmu_pte_kind_c64_2cbr_v(void)
1038{
1039 return 0x000000e7;
1040}
1041static inline u32 gmmu_pte_kind_c64_2cba_v(void)
1042{
1043 return 0x000000e8;
1044}
1045static inline u32 gmmu_pte_kind_c64_2cra_v(void)
1046{
1047 return 0x000000e9;
1048}
1049static inline u32 gmmu_pte_kind_c64_2bra_v(void)
1050{
1051 return 0x000000ea;
1052}
1053static inline u32 gmmu_pte_kind_c64_ms2_2c_v(void)
1054{
1055 return 0x000000eb;
1056}
1057static inline u32 gmmu_pte_kind_c64_ms2_2cbr_v(void)
1058{
1059 return 0x000000ec;
1060}
1061static inline u32 gmmu_pte_kind_c64_ms2_2cra_v(void)
1062{
1063 return 0x000000cd;
1064}
1065static inline u32 gmmu_pte_kind_c64_ms4_2c_v(void)
1066{
1067 return 0x000000ed;
1068}
1069static inline u32 gmmu_pte_kind_c64_ms4_2cbr_v(void)
1070{
1071 return 0x000000ee;
1072}
1073static inline u32 gmmu_pte_kind_c64_ms4_2cba_v(void)
1074{
1075 return 0x000000ef;
1076}
1077static inline u32 gmmu_pte_kind_c64_ms4_2cra_v(void)
1078{
1079 return 0x000000f0;
1080}
1081static inline u32 gmmu_pte_kind_c64_ms4_2bra_v(void)
1082{
1083 return 0x000000f1;
1084}
1085static inline u32 gmmu_pte_kind_c64_ms8_ms16_2c_v(void)
1086{
1087 return 0x000000f2;
1088}
1089static inline u32 gmmu_pte_kind_c64_ms8_ms16_2cra_v(void)
1090{
1091 return 0x000000f3;
1092}
1093static inline u32 gmmu_pte_kind_c128_2c_v(void)
1094{
1095 return 0x000000f4;
1096}
1097static inline u32 gmmu_pte_kind_c128_2cr_v(void)
1098{
1099 return 0x000000f5;
1100}
1101static inline u32 gmmu_pte_kind_c128_ms2_2c_v(void)
1102{
1103 return 0x000000f6;
1104}
1105static inline u32 gmmu_pte_kind_c128_ms2_2cr_v(void)
1106{
1107 return 0x000000f7;
1108}
1109static inline u32 gmmu_pte_kind_c128_ms4_2c_v(void)
1110{
1111 return 0x000000f8;
1112}
1113static inline u32 gmmu_pte_kind_c128_ms4_2cr_v(void)
1114{
1115 return 0x000000f9;
1116}
1117static inline u32 gmmu_pte_kind_c128_ms8_ms16_2c_v(void)
1118{
1119 return 0x000000fa;
1120}
1121static inline u32 gmmu_pte_kind_c128_ms8_ms16_2cr_v(void)
1122{
1123 return 0x000000fb;
1124}
1125static inline u32 gmmu_pte_kind_x8c24_v(void)
1126{
1127 return 0x000000fc;
1128}
1129static inline u32 gmmu_pte_kind_pitch_no_swizzle_v(void)
1130{
1131 return 0x000000fd;
1132}
1133static inline u32 gmmu_pte_kind_smsked_message_v(void)
1134{
1135 return 0x000000ca;
1136}
1137static inline u32 gmmu_pte_kind_smhost_message_v(void)
1138{
1139 return 0x000000cb;
1140}
1141#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
new file mode 100644
index 00000000..ece7602d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -0,0 +1,3173 @@
1/*
2 * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_gr_gk20a_h_
51#define _hw_gr_gk20a_h_
52
53static inline u32 gr_intr_r(void)
54{
55 return 0x00400100;
56}
57static inline u32 gr_intr_notify_pending_f(void)
58{
59 return 0x1;
60}
61static inline u32 gr_intr_notify_reset_f(void)
62{
63 return 0x1;
64}
65static inline u32 gr_intr_semaphore_pending_f(void)
66{
67 return 0x2;
68}
69static inline u32 gr_intr_semaphore_reset_f(void)
70{
71 return 0x2;
72}
73static inline u32 gr_intr_semaphore_timeout_not_pending_f(void)
74{
75 return 0x0;
76}
77static inline u32 gr_intr_semaphore_timeout_pending_f(void)
78{
79 return 0x4;
80}
81static inline u32 gr_intr_semaphore_timeout_reset_f(void)
82{
83 return 0x4;
84}
85static inline u32 gr_intr_illegal_method_pending_f(void)
86{
87 return 0x10;
88}
89static inline u32 gr_intr_illegal_method_reset_f(void)
90{
91 return 0x10;
92}
93static inline u32 gr_intr_illegal_notify_pending_f(void)
94{
95 return 0x40;
96}
97static inline u32 gr_intr_illegal_notify_reset_f(void)
98{
99 return 0x40;
100}
101static inline u32 gr_intr_illegal_class_pending_f(void)
102{
103 return 0x20;
104}
105static inline u32 gr_intr_illegal_class_reset_f(void)
106{
107 return 0x20;
108}
109static inline u32 gr_intr_class_error_pending_f(void)
110{
111 return 0x100000;
112}
113static inline u32 gr_intr_class_error_reset_f(void)
114{
115 return 0x100000;
116}
117static inline u32 gr_intr_exception_pending_f(void)
118{
119 return 0x200000;
120}
121static inline u32 gr_intr_exception_reset_f(void)
122{
123 return 0x200000;
124}
125static inline u32 gr_intr_firmware_method_pending_f(void)
126{
127 return 0x100;
128}
129static inline u32 gr_intr_firmware_method_reset_f(void)
130{
131 return 0x100;
132}
133static inline u32 gr_intr_nonstall_r(void)
134{
135 return 0x00400120;
136}
137static inline u32 gr_intr_nonstall_trap_pending_f(void)
138{
139 return 0x2;
140}
141static inline u32 gr_intr_en_r(void)
142{
143 return 0x0040013c;
144}
145static inline u32 gr_exception_r(void)
146{
147 return 0x00400108;
148}
149static inline u32 gr_exception_fe_m(void)
150{
151 return 0x1 << 0;
152}
153static inline u32 gr_exception_gpc_m(void)
154{
155 return 0x1 << 24;
156}
157static inline u32 gr_exception1_r(void)
158{
159 return 0x00400118;
160}
161static inline u32 gr_exception1_gpc_0_pending_f(void)
162{
163 return 0x1;
164}
165static inline u32 gr_exception2_r(void)
166{
167 return 0x0040011c;
168}
169static inline u32 gr_exception_en_r(void)
170{
171 return 0x00400138;
172}
173static inline u32 gr_exception_en_fe_m(void)
174{
175 return 0x1 << 0;
176}
177static inline u32 gr_exception1_en_r(void)
178{
179 return 0x00400130;
180}
181static inline u32 gr_exception2_en_r(void)
182{
183 return 0x00400134;
184}
185static inline u32 gr_gpfifo_ctl_r(void)
186{
187 return 0x00400500;
188}
189static inline u32 gr_gpfifo_ctl_access_f(u32 v)
190{
191 return (v & 0x1) << 0;
192}
193static inline u32 gr_gpfifo_ctl_access_disabled_f(void)
194{
195 return 0x0;
196}
197static inline u32 gr_gpfifo_ctl_access_enabled_f(void)
198{
199 return 0x1;
200}
201static inline u32 gr_gpfifo_ctl_semaphore_access_f(u32 v)
202{
203 return (v & 0x1) << 16;
204}
205static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_v(void)
206{
207 return 0x00000001;
208}
209static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_f(void)
210{
211 return 0x10000;
212}
213static inline u32 gr_trapped_addr_r(void)
214{
215 return 0x00400704;
216}
217static inline u32 gr_trapped_addr_mthd_v(u32 r)
218{
219 return (r >> 2) & 0xfff;
220}
221static inline u32 gr_trapped_addr_subch_v(u32 r)
222{
223 return (r >> 16) & 0x7;
224}
225static inline u32 gr_trapped_data_lo_r(void)
226{
227 return 0x00400708;
228}
229static inline u32 gr_trapped_data_hi_r(void)
230{
231 return 0x0040070c;
232}
233static inline u32 gr_status_r(void)
234{
235 return 0x00400700;
236}
237static inline u32 gr_status_fe_method_lower_v(u32 r)
238{
239 return (r >> 2) & 0x1;
240}
241static inline u32 gr_status_fe_method_lower_idle_v(void)
242{
243 return 0x00000000;
244}
245static inline u32 gr_status_mask_r(void)
246{
247 return 0x00400610;
248}
249static inline u32 gr_engine_status_r(void)
250{
251 return 0x0040060c;
252}
253static inline u32 gr_engine_status_value_busy_f(void)
254{
255 return 0x1;
256}
257static inline u32 gr_pipe_bundle_address_r(void)
258{
259 return 0x00400200;
260}
261static inline u32 gr_pipe_bundle_address_value_v(u32 r)
262{
263 return (r >> 0) & 0xffff;
264}
265static inline u32 gr_pipe_bundle_data_r(void)
266{
267 return 0x00400204;
268}
269static inline u32 gr_pipe_bundle_config_r(void)
270{
271 return 0x00400208;
272}
273static inline u32 gr_pipe_bundle_config_override_pipe_mode_disabled_f(void)
274{
275 return 0x0;
276}
277static inline u32 gr_pipe_bundle_config_override_pipe_mode_enabled_f(void)
278{
279 return 0x80000000;
280}
281static inline u32 gr_fe_hww_esr_r(void)
282{
283 return 0x00404000;
284}
285static inline u32 gr_fe_hww_esr_reset_active_f(void)
286{
287 return 0x40000000;
288}
289static inline u32 gr_fe_hww_esr_en_enable_f(void)
290{
291 return 0x80000000;
292}
293static inline u32 gr_fe_go_idle_timeout_r(void)
294{
295 return 0x00404154;
296}
297static inline u32 gr_fe_go_idle_timeout_count_f(u32 v)
298{
299 return (v & 0xffffffff) << 0;
300}
301static inline u32 gr_fe_go_idle_timeout_count_disabled_f(void)
302{
303 return 0x0;
304}
305static inline u32 gr_fe_object_table_r(u32 i)
306{
307 return 0x00404200 + i*4;
308}
309static inline u32 gr_fe_object_table_nvclass_v(u32 r)
310{
311 return (r >> 0) & 0xffff;
312}
313static inline u32 gr_pri_mme_shadow_raw_index_r(void)
314{
315 return 0x00404488;
316}
317static inline u32 gr_pri_mme_shadow_raw_index_write_trigger_f(void)
318{
319 return 0x80000000;
320}
321static inline u32 gr_pri_mme_shadow_raw_data_r(void)
322{
323 return 0x0040448c;
324}
325static inline u32 gr_mme_hww_esr_r(void)
326{
327 return 0x00404490;
328}
329static inline u32 gr_mme_hww_esr_reset_active_f(void)
330{
331 return 0x40000000;
332}
333static inline u32 gr_mme_hww_esr_en_enable_f(void)
334{
335 return 0x80000000;
336}
337static inline u32 gr_memfmt_hww_esr_r(void)
338{
339 return 0x00404600;
340}
341static inline u32 gr_memfmt_hww_esr_reset_active_f(void)
342{
343 return 0x40000000;
344}
345static inline u32 gr_memfmt_hww_esr_en_enable_f(void)
346{
347 return 0x80000000;
348}
349static inline u32 gr_fecs_cpuctl_r(void)
350{
351 return 0x00409100;
352}
353static inline u32 gr_fecs_cpuctl_startcpu_f(u32 v)
354{
355 return (v & 0x1) << 1;
356}
357static inline u32 gr_fecs_dmactl_r(void)
358{
359 return 0x0040910c;
360}
361static inline u32 gr_fecs_dmactl_require_ctx_f(u32 v)
362{
363 return (v & 0x1) << 0;
364}
365static inline u32 gr_fecs_dmactl_dmem_scrubbing_m(void)
366{
367 return 0x1 << 1;
368}
369static inline u32 gr_fecs_dmactl_imem_scrubbing_m(void)
370{
371 return 0x1 << 2;
372}
373static inline u32 gr_fecs_os_r(void)
374{
375 return 0x00409080;
376}
377static inline u32 gr_fecs_idlestate_r(void)
378{
379 return 0x0040904c;
380}
381static inline u32 gr_fecs_mailbox0_r(void)
382{
383 return 0x00409040;
384}
385static inline u32 gr_fecs_mailbox1_r(void)
386{
387 return 0x00409044;
388}
389static inline u32 gr_fecs_irqstat_r(void)
390{
391 return 0x00409008;
392}
393static inline u32 gr_fecs_irqmode_r(void)
394{
395 return 0x0040900c;
396}
397static inline u32 gr_fecs_irqmask_r(void)
398{
399 return 0x00409018;
400}
401static inline u32 gr_fecs_irqdest_r(void)
402{
403 return 0x0040901c;
404}
405static inline u32 gr_fecs_curctx_r(void)
406{
407 return 0x00409050;
408}
409static inline u32 gr_fecs_nxtctx_r(void)
410{
411 return 0x00409054;
412}
413static inline u32 gr_fecs_engctl_r(void)
414{
415 return 0x004090a4;
416}
417static inline u32 gr_fecs_debug1_r(void)
418{
419 return 0x00409090;
420}
421static inline u32 gr_fecs_debuginfo_r(void)
422{
423 return 0x00409094;
424}
425static inline u32 gr_fecs_icd_cmd_r(void)
426{
427 return 0x00409200;
428}
429static inline u32 gr_fecs_icd_cmd_opc_s(void)
430{
431 return 4;
432}
433static inline u32 gr_fecs_icd_cmd_opc_f(u32 v)
434{
435 return (v & 0xf) << 0;
436}
437static inline u32 gr_fecs_icd_cmd_opc_m(void)
438{
439 return 0xf << 0;
440}
441static inline u32 gr_fecs_icd_cmd_opc_v(u32 r)
442{
443 return (r >> 0) & 0xf;
444}
445static inline u32 gr_fecs_icd_cmd_opc_rreg_f(void)
446{
447 return 0x8;
448}
449static inline u32 gr_fecs_icd_cmd_opc_rstat_f(void)
450{
451 return 0xe;
452}
453static inline u32 gr_fecs_icd_cmd_idx_f(u32 v)
454{
455 return (v & 0x1f) << 8;
456}
457static inline u32 gr_fecs_icd_rdata_r(void)
458{
459 return 0x0040920c;
460}
461static inline u32 gr_fecs_imemc_r(u32 i)
462{
463 return 0x00409180 + i*16;
464}
465static inline u32 gr_fecs_imemc_offs_f(u32 v)
466{
467 return (v & 0x3f) << 2;
468}
469static inline u32 gr_fecs_imemc_blk_f(u32 v)
470{
471 return (v & 0xff) << 8;
472}
473static inline u32 gr_fecs_imemc_aincw_f(u32 v)
474{
475 return (v & 0x1) << 24;
476}
477static inline u32 gr_fecs_imemd_r(u32 i)
478{
479 return 0x00409184 + i*16;
480}
481static inline u32 gr_fecs_imemt_r(u32 i)
482{
483 return 0x00409188 + i*16;
484}
485static inline u32 gr_fecs_imemt_tag_f(u32 v)
486{
487 return (v & 0xffff) << 0;
488}
489static inline u32 gr_fecs_dmemc_r(u32 i)
490{
491 return 0x004091c0 + i*8;
492}
493static inline u32 gr_fecs_dmemc_offs_s(void)
494{
495 return 6;
496}
497static inline u32 gr_fecs_dmemc_offs_f(u32 v)
498{
499 return (v & 0x3f) << 2;
500}
501static inline u32 gr_fecs_dmemc_offs_m(void)
502{
503 return 0x3f << 2;
504}
505static inline u32 gr_fecs_dmemc_offs_v(u32 r)
506{
507 return (r >> 2) & 0x3f;
508}
509static inline u32 gr_fecs_dmemc_blk_f(u32 v)
510{
511 return (v & 0xff) << 8;
512}
513static inline u32 gr_fecs_dmemc_aincw_f(u32 v)
514{
515 return (v & 0x1) << 24;
516}
517static inline u32 gr_fecs_dmemd_r(u32 i)
518{
519 return 0x004091c4 + i*8;
520}
521static inline u32 gr_fecs_dmatrfbase_r(void)
522{
523 return 0x00409110;
524}
525static inline u32 gr_fecs_dmatrfmoffs_r(void)
526{
527 return 0x00409114;
528}
529static inline u32 gr_fecs_dmatrffboffs_r(void)
530{
531 return 0x0040911c;
532}
533static inline u32 gr_fecs_dmatrfcmd_r(void)
534{
535 return 0x00409118;
536}
537static inline u32 gr_fecs_dmatrfcmd_imem_f(u32 v)
538{
539 return (v & 0x1) << 4;
540}
541static inline u32 gr_fecs_dmatrfcmd_write_f(u32 v)
542{
543 return (v & 0x1) << 5;
544}
545static inline u32 gr_fecs_dmatrfcmd_size_f(u32 v)
546{
547 return (v & 0x7) << 8;
548}
549static inline u32 gr_fecs_dmatrfcmd_ctxdma_f(u32 v)
550{
551 return (v & 0x7) << 12;
552}
553static inline u32 gr_fecs_bootvec_r(void)
554{
555 return 0x00409104;
556}
557static inline u32 gr_fecs_bootvec_vec_f(u32 v)
558{
559 return (v & 0xffffffff) << 0;
560}
561static inline u32 gr_fecs_falcon_hwcfg_r(void)
562{
563 return 0x00409108;
564}
565static inline u32 gr_gpcs_gpccs_falcon_hwcfg_r(void)
566{
567 return 0x0041a108;
568}
569static inline u32 gr_fecs_falcon_rm_r(void)
570{
571 return 0x00409084;
572}
573static inline u32 gr_fecs_current_ctx_r(void)
574{
575 return 0x00409b00;
576}
577static inline u32 gr_fecs_current_ctx_ptr_f(u32 v)
578{
579 return (v & 0xfffffff) << 0;
580}
581static inline u32 gr_fecs_current_ctx_ptr_v(u32 r)
582{
583 return (r >> 0) & 0xfffffff;
584}
585static inline u32 gr_fecs_current_ctx_target_s(void)
586{
587 return 2;
588}
589static inline u32 gr_fecs_current_ctx_target_f(u32 v)
590{
591 return (v & 0x3) << 28;
592}
593static inline u32 gr_fecs_current_ctx_target_m(void)
594{
595 return 0x3 << 28;
596}
597static inline u32 gr_fecs_current_ctx_target_v(u32 r)
598{
599 return (r >> 28) & 0x3;
600}
601static inline u32 gr_fecs_current_ctx_target_vid_mem_f(void)
602{
603 return 0x0;
604}
605static inline u32 gr_fecs_current_ctx_valid_s(void)
606{
607 return 1;
608}
609static inline u32 gr_fecs_current_ctx_valid_f(u32 v)
610{
611 return (v & 0x1) << 31;
612}
613static inline u32 gr_fecs_current_ctx_valid_m(void)
614{
615 return 0x1 << 31;
616}
617static inline u32 gr_fecs_current_ctx_valid_v(u32 r)
618{
619 return (r >> 31) & 0x1;
620}
621static inline u32 gr_fecs_current_ctx_valid_false_f(void)
622{
623 return 0x0;
624}
625static inline u32 gr_fecs_method_data_r(void)
626{
627 return 0x00409500;
628}
629static inline u32 gr_fecs_method_push_r(void)
630{
631 return 0x00409504;
632}
633static inline u32 gr_fecs_method_push_adr_f(u32 v)
634{
635 return (v & 0xfff) << 0;
636}
637static inline u32 gr_fecs_method_push_adr_bind_pointer_v(void)
638{
639 return 0x00000003;
640}
641static inline u32 gr_fecs_method_push_adr_bind_pointer_f(void)
642{
643 return 0x3;
644}
645static inline u32 gr_fecs_method_push_adr_discover_image_size_v(void)
646{
647 return 0x00000010;
648}
649static inline u32 gr_fecs_method_push_adr_wfi_golden_save_v(void)
650{
651 return 0x00000009;
652}
653static inline u32 gr_fecs_method_push_adr_restore_golden_v(void)
654{
655 return 0x00000015;
656}
657static inline u32 gr_fecs_method_push_adr_discover_zcull_image_size_v(void)
658{
659 return 0x00000016;
660}
661static inline u32 gr_fecs_method_push_adr_discover_pm_image_size_v(void)
662{
663 return 0x00000025;
664}
665static inline u32 gr_fecs_method_push_adr_discover_reglist_image_size_v(void)
666{
667 return 0x00000030;
668}
669static inline u32 gr_fecs_method_push_adr_set_reglist_bind_instance_v(void)
670{
671 return 0x00000031;
672}
673static inline u32 gr_fecs_method_push_adr_set_reglist_virtual_address_v(void)
674{
675 return 0x00000032;
676}
677static inline u32 gr_fecs_method_push_adr_stop_ctxsw_v(void)
678{
679 return 0x00000038;
680}
681static inline u32 gr_fecs_method_push_adr_start_ctxsw_v(void)
682{
683 return 0x00000039;
684}
685static inline u32 gr_fecs_method_push_adr_set_watchdog_timeout_f(void)
686{
687 return 0x21;
688}
689static inline u32 gr_fecs_host_int_enable_r(void)
690{
691 return 0x00409c24;
692}
693static inline u32 gr_fecs_host_int_enable_fault_during_ctxsw_enable_f(void)
694{
695 return 0x10000;
696}
697static inline u32 gr_fecs_host_int_enable_umimp_firmware_method_enable_f(void)
698{
699 return 0x20000;
700}
701static inline u32 gr_fecs_host_int_enable_umimp_illegal_method_enable_f(void)
702{
703 return 0x40000;
704}
705static inline u32 gr_fecs_host_int_enable_watchdog_enable_f(void)
706{
707 return 0x80000;
708}
709static inline u32 gr_fecs_ctxsw_reset_ctl_r(void)
710{
711 return 0x00409614;
712}
713static inline u32 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f(void)
714{
715 return 0x0;
716}
717static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f(void)
718{
719 return 0x0;
720}
721static inline u32 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f(void)
722{
723 return 0x0;
724}
725static inline u32 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f(void)
726{
727 return 0x10;
728}
729static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f(void)
730{
731 return 0x20;
732}
733static inline u32 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f(void)
734{
735 return 0x40;
736}
737static inline u32 gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f(void)
738{
739 return 0x0;
740}
741static inline u32 gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f(void)
742{
743 return 0x100;
744}
745static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f(void)
746{
747 return 0x0;
748}
749static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f(void)
750{
751 return 0x200;
752}
753static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_s(void)
754{
755 return 1;
756}
757static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_f(u32 v)
758{
759 return (v & 0x1) << 10;
760}
761static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_m(void)
762{
763 return 0x1 << 10;
764}
765static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_v(u32 r)
766{
767 return (r >> 10) & 0x1;
768}
769static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f(void)
770{
771 return 0x0;
772}
773static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f(void)
774{
775 return 0x400;
776}
777static inline u32 gr_fecs_ctx_state_store_major_rev_id_r(void)
778{
779 return 0x0040960c;
780}
781static inline u32 gr_fecs_ctxsw_mailbox_r(u32 i)
782{
783 return 0x00409800 + i*4;
784}
785static inline u32 gr_fecs_ctxsw_mailbox__size_1_v(void)
786{
787 return 0x00000008;
788}
789static inline u32 gr_fecs_ctxsw_mailbox_value_f(u32 v)
790{
791 return (v & 0xffffffff) << 0;
792}
793static inline u32 gr_fecs_ctxsw_mailbox_value_pass_v(void)
794{
795 return 0x00000001;
796}
797static inline u32 gr_fecs_ctxsw_mailbox_value_fail_v(void)
798{
799 return 0x00000002;
800}
801static inline u32 gr_fecs_ctxsw_mailbox_set_r(u32 i)
802{
803 return 0x00409820 + i*4;
804}
805static inline u32 gr_fecs_ctxsw_mailbox_set_value_f(u32 v)
806{
807 return (v & 0xffffffff) << 0;
808}
809static inline u32 gr_fecs_ctxsw_mailbox_clear_r(u32 i)
810{
811 return 0x00409840 + i*4;
812}
813static inline u32 gr_fecs_ctxsw_mailbox_clear_value_f(u32 v)
814{
815 return (v & 0xffffffff) << 0;
816}
817static inline u32 gr_fecs_fs_r(void)
818{
819 return 0x00409604;
820}
821static inline u32 gr_fecs_fs_num_available_gpcs_s(void)
822{
823 return 5;
824}
825static inline u32 gr_fecs_fs_num_available_gpcs_f(u32 v)
826{
827 return (v & 0x1f) << 0;
828}
829static inline u32 gr_fecs_fs_num_available_gpcs_m(void)
830{
831 return 0x1f << 0;
832}
833static inline u32 gr_fecs_fs_num_available_gpcs_v(u32 r)
834{
835 return (r >> 0) & 0x1f;
836}
837static inline u32 gr_fecs_fs_num_available_fbps_s(void)
838{
839 return 5;
840}
841static inline u32 gr_fecs_fs_num_available_fbps_f(u32 v)
842{
843 return (v & 0x1f) << 16;
844}
845static inline u32 gr_fecs_fs_num_available_fbps_m(void)
846{
847 return 0x1f << 16;
848}
849static inline u32 gr_fecs_fs_num_available_fbps_v(u32 r)
850{
851 return (r >> 16) & 0x1f;
852}
853static inline u32 gr_fecs_cfg_r(void)
854{
855 return 0x00409620;
856}
857static inline u32 gr_fecs_cfg_imem_sz_v(u32 r)
858{
859 return (r >> 0) & 0xff;
860}
861static inline u32 gr_fecs_rc_lanes_r(void)
862{
863 return 0x00409880;
864}
865static inline u32 gr_fecs_rc_lanes_num_chains_s(void)
866{
867 return 6;
868}
869static inline u32 gr_fecs_rc_lanes_num_chains_f(u32 v)
870{
871 return (v & 0x3f) << 0;
872}
873static inline u32 gr_fecs_rc_lanes_num_chains_m(void)
874{
875 return 0x3f << 0;
876}
877static inline u32 gr_fecs_rc_lanes_num_chains_v(u32 r)
878{
879 return (r >> 0) & 0x3f;
880}
881static inline u32 gr_fecs_ctxsw_status_1_r(void)
882{
883 return 0x00409400;
884}
885static inline u32 gr_fecs_ctxsw_status_1_arb_busy_s(void)
886{
887 return 1;
888}
889static inline u32 gr_fecs_ctxsw_status_1_arb_busy_f(u32 v)
890{
891 return (v & 0x1) << 12;
892}
893static inline u32 gr_fecs_ctxsw_status_1_arb_busy_m(void)
894{
895 return 0x1 << 12;
896}
897static inline u32 gr_fecs_ctxsw_status_1_arb_busy_v(u32 r)
898{
899 return (r >> 12) & 0x1;
900}
901static inline u32 gr_fecs_arb_ctx_adr_r(void)
902{
903 return 0x00409a24;
904}
905static inline u32 gr_fecs_new_ctx_r(void)
906{
907 return 0x00409b04;
908}
909static inline u32 gr_fecs_new_ctx_ptr_s(void)
910{
911 return 28;
912}
913static inline u32 gr_fecs_new_ctx_ptr_f(u32 v)
914{
915 return (v & 0xfffffff) << 0;
916}
917static inline u32 gr_fecs_new_ctx_ptr_m(void)
918{
919 return 0xfffffff << 0;
920}
921static inline u32 gr_fecs_new_ctx_ptr_v(u32 r)
922{
923 return (r >> 0) & 0xfffffff;
924}
925static inline u32 gr_fecs_new_ctx_target_s(void)
926{
927 return 2;
928}
929static inline u32 gr_fecs_new_ctx_target_f(u32 v)
930{
931 return (v & 0x3) << 28;
932}
933static inline u32 gr_fecs_new_ctx_target_m(void)
934{
935 return 0x3 << 28;
936}
937static inline u32 gr_fecs_new_ctx_target_v(u32 r)
938{
939 return (r >> 28) & 0x3;
940}
941static inline u32 gr_fecs_new_ctx_valid_s(void)
942{
943 return 1;
944}
945static inline u32 gr_fecs_new_ctx_valid_f(u32 v)
946{
947 return (v & 0x1) << 31;
948}
949static inline u32 gr_fecs_new_ctx_valid_m(void)
950{
951 return 0x1 << 31;
952}
953static inline u32 gr_fecs_new_ctx_valid_v(u32 r)
954{
955 return (r >> 31) & 0x1;
956}
957static inline u32 gr_fecs_arb_ctx_ptr_r(void)
958{
959 return 0x00409a0c;
960}
961static inline u32 gr_fecs_arb_ctx_ptr_ptr_s(void)
962{
963 return 28;
964}
965static inline u32 gr_fecs_arb_ctx_ptr_ptr_f(u32 v)
966{
967 return (v & 0xfffffff) << 0;
968}
969static inline u32 gr_fecs_arb_ctx_ptr_ptr_m(void)
970{
971 return 0xfffffff << 0;
972}
973static inline u32 gr_fecs_arb_ctx_ptr_ptr_v(u32 r)
974{
975 return (r >> 0) & 0xfffffff;
976}
977static inline u32 gr_fecs_arb_ctx_ptr_target_s(void)
978{
979 return 2;
980}
981static inline u32 gr_fecs_arb_ctx_ptr_target_f(u32 v)
982{
983 return (v & 0x3) << 28;
984}
985static inline u32 gr_fecs_arb_ctx_ptr_target_m(void)
986{
987 return 0x3 << 28;
988}
989static inline u32 gr_fecs_arb_ctx_ptr_target_v(u32 r)
990{
991 return (r >> 28) & 0x3;
992}
993static inline u32 gr_fecs_arb_ctx_cmd_r(void)
994{
995 return 0x00409a10;
996}
997static inline u32 gr_fecs_arb_ctx_cmd_cmd_s(void)
998{
999 return 5;
1000}
1001static inline u32 gr_fecs_arb_ctx_cmd_cmd_f(u32 v)
1002{
1003 return (v & 0x1f) << 0;
1004}
1005static inline u32 gr_fecs_arb_ctx_cmd_cmd_m(void)
1006{
1007 return 0x1f << 0;
1008}
1009static inline u32 gr_fecs_arb_ctx_cmd_cmd_v(u32 r)
1010{
1011 return (r >> 0) & 0x1f;
1012}
1013static inline u32 gr_rstr2d_gpc_map0_r(void)
1014{
1015 return 0x0040780c;
1016}
1017static inline u32 gr_rstr2d_gpc_map1_r(void)
1018{
1019 return 0x00407810;
1020}
1021static inline u32 gr_rstr2d_gpc_map2_r(void)
1022{
1023 return 0x00407814;
1024}
1025static inline u32 gr_rstr2d_gpc_map3_r(void)
1026{
1027 return 0x00407818;
1028}
1029static inline u32 gr_rstr2d_gpc_map4_r(void)
1030{
1031 return 0x0040781c;
1032}
1033static inline u32 gr_rstr2d_gpc_map5_r(void)
1034{
1035 return 0x00407820;
1036}
1037static inline u32 gr_rstr2d_map_table_cfg_r(void)
1038{
1039 return 0x004078bc;
1040}
1041static inline u32 gr_rstr2d_map_table_cfg_row_offset_f(u32 v)
1042{
1043 return (v & 0xff) << 0;
1044}
1045static inline u32 gr_rstr2d_map_table_cfg_num_entries_f(u32 v)
1046{
1047 return (v & 0xff) << 8;
1048}
1049static inline u32 gr_pd_hww_esr_r(void)
1050{
1051 return 0x00406018;
1052}
1053static inline u32 gr_pd_hww_esr_reset_active_f(void)
1054{
1055 return 0x40000000;
1056}
1057static inline u32 gr_pd_hww_esr_en_enable_f(void)
1058{
1059 return 0x80000000;
1060}
1061static inline u32 gr_pd_num_tpc_per_gpc_r(u32 i)
1062{
1063 return 0x00406028 + i*4;
1064}
1065static inline u32 gr_pd_num_tpc_per_gpc__size_1_v(void)
1066{
1067 return 0x00000004;
1068}
1069static inline u32 gr_pd_num_tpc_per_gpc_count0_f(u32 v)
1070{
1071 return (v & 0xf) << 0;
1072}
1073static inline u32 gr_pd_num_tpc_per_gpc_count1_f(u32 v)
1074{
1075 return (v & 0xf) << 4;
1076}
1077static inline u32 gr_pd_num_tpc_per_gpc_count2_f(u32 v)
1078{
1079 return (v & 0xf) << 8;
1080}
1081static inline u32 gr_pd_num_tpc_per_gpc_count3_f(u32 v)
1082{
1083 return (v & 0xf) << 12;
1084}
1085static inline u32 gr_pd_num_tpc_per_gpc_count4_f(u32 v)
1086{
1087 return (v & 0xf) << 16;
1088}
1089static inline u32 gr_pd_num_tpc_per_gpc_count5_f(u32 v)
1090{
1091 return (v & 0xf) << 20;
1092}
1093static inline u32 gr_pd_num_tpc_per_gpc_count6_f(u32 v)
1094{
1095 return (v & 0xf) << 24;
1096}
1097static inline u32 gr_pd_num_tpc_per_gpc_count7_f(u32 v)
1098{
1099 return (v & 0xf) << 28;
1100}
1101static inline u32 gr_pd_ab_dist_cfg0_r(void)
1102{
1103 return 0x004064c0;
1104}
1105static inline u32 gr_pd_ab_dist_cfg0_timeslice_enable_en_f(void)
1106{
1107 return 0x80000000;
1108}
1109static inline u32 gr_pd_ab_dist_cfg0_timeslice_enable_dis_f(void)
1110{
1111 return 0x0;
1112}
1113static inline u32 gr_pd_ab_dist_cfg1_r(void)
1114{
1115 return 0x004064c4;
1116}
1117static inline u32 gr_pd_ab_dist_cfg1_max_batches_init_f(void)
1118{
1119 return 0xffff;
1120}
1121static inline u32 gr_pd_ab_dist_cfg1_max_output_f(u32 v)
1122{
1123 return (v & 0x7ff) << 16;
1124}
1125static inline u32 gr_pd_ab_dist_cfg1_max_output_granularity_v(void)
1126{
1127 return 0x00000080;
1128}
1129static inline u32 gr_pd_ab_dist_cfg2_r(void)
1130{
1131 return 0x004064c8;
1132}
1133static inline u32 gr_pd_ab_dist_cfg2_token_limit_f(u32 v)
1134{
1135 return (v & 0xfff) << 0;
1136}
1137static inline u32 gr_pd_ab_dist_cfg2_token_limit_init_v(void)
1138{
1139 return 0x00000100;
1140}
1141static inline u32 gr_pd_ab_dist_cfg2_state_limit_f(u32 v)
1142{
1143 return (v & 0xfff) << 16;
1144}
1145static inline u32 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(void)
1146{
1147 return 0x00000020;
1148}
1149static inline u32 gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(void)
1150{
1151 return 0x00000062;
1152}
1153static inline u32 gr_pd_pagepool_r(void)
1154{
1155 return 0x004064cc;
1156}
1157static inline u32 gr_pd_pagepool_total_pages_f(u32 v)
1158{
1159 return (v & 0xff) << 0;
1160}
1161static inline u32 gr_pd_pagepool_valid_true_f(void)
1162{
1163 return 0x80000000;
1164}
1165static inline u32 gr_pd_dist_skip_table_r(u32 i)
1166{
1167 return 0x004064d0 + i*4;
1168}
1169static inline u32 gr_pd_dist_skip_table__size_1_v(void)
1170{
1171 return 0x00000008;
1172}
1173static inline u32 gr_pd_dist_skip_table_gpc_4n0_mask_f(u32 v)
1174{
1175 return (v & 0xff) << 0;
1176}
1177static inline u32 gr_pd_dist_skip_table_gpc_4n1_mask_f(u32 v)
1178{
1179 return (v & 0xff) << 8;
1180}
1181static inline u32 gr_pd_dist_skip_table_gpc_4n2_mask_f(u32 v)
1182{
1183 return (v & 0xff) << 16;
1184}
1185static inline u32 gr_pd_dist_skip_table_gpc_4n3_mask_f(u32 v)
1186{
1187 return (v & 0xff) << 24;
1188}
1189static inline u32 gr_pd_alpha_ratio_table_r(u32 i)
1190{
1191 return 0x00406800 + i*4;
1192}
1193static inline u32 gr_pd_alpha_ratio_table__size_1_v(void)
1194{
1195 return 0x00000100;
1196}
1197static inline u32 gr_pd_alpha_ratio_table_gpc_4n0_mask_f(u32 v)
1198{
1199 return (v & 0xff) << 0;
1200}
1201static inline u32 gr_pd_alpha_ratio_table_gpc_4n1_mask_f(u32 v)
1202{
1203 return (v & 0xff) << 8;
1204}
1205static inline u32 gr_pd_alpha_ratio_table_gpc_4n2_mask_f(u32 v)
1206{
1207 return (v & 0xff) << 16;
1208}
1209static inline u32 gr_pd_alpha_ratio_table_gpc_4n3_mask_f(u32 v)
1210{
1211 return (v & 0xff) << 24;
1212}
1213static inline u32 gr_pd_beta_ratio_table_r(u32 i)
1214{
1215 return 0x00406c00 + i*4;
1216}
1217static inline u32 gr_pd_beta_ratio_table__size_1_v(void)
1218{
1219 return 0x00000100;
1220}
1221static inline u32 gr_pd_beta_ratio_table_gpc_4n0_mask_f(u32 v)
1222{
1223 return (v & 0xff) << 0;
1224}
1225static inline u32 gr_pd_beta_ratio_table_gpc_4n1_mask_f(u32 v)
1226{
1227 return (v & 0xff) << 8;
1228}
1229static inline u32 gr_pd_beta_ratio_table_gpc_4n2_mask_f(u32 v)
1230{
1231 return (v & 0xff) << 16;
1232}
1233static inline u32 gr_pd_beta_ratio_table_gpc_4n3_mask_f(u32 v)
1234{
1235 return (v & 0xff) << 24;
1236}
1237static inline u32 gr_ds_debug_r(void)
1238{
1239 return 0x00405800;
1240}
1241static inline u32 gr_ds_debug_timeslice_mode_disable_f(void)
1242{
1243 return 0x0;
1244}
1245static inline u32 gr_ds_debug_timeslice_mode_enable_f(void)
1246{
1247 return 0x8000000;
1248}
1249static inline u32 gr_ds_zbc_color_r_r(void)
1250{
1251 return 0x00405804;
1252}
1253static inline u32 gr_ds_zbc_color_r_val_f(u32 v)
1254{
1255 return (v & 0xffffffff) << 0;
1256}
1257static inline u32 gr_ds_zbc_color_g_r(void)
1258{
1259 return 0x00405808;
1260}
1261static inline u32 gr_ds_zbc_color_g_val_f(u32 v)
1262{
1263 return (v & 0xffffffff) << 0;
1264}
1265static inline u32 gr_ds_zbc_color_b_r(void)
1266{
1267 return 0x0040580c;
1268}
1269static inline u32 gr_ds_zbc_color_b_val_f(u32 v)
1270{
1271 return (v & 0xffffffff) << 0;
1272}
1273static inline u32 gr_ds_zbc_color_a_r(void)
1274{
1275 return 0x00405810;
1276}
1277static inline u32 gr_ds_zbc_color_a_val_f(u32 v)
1278{
1279 return (v & 0xffffffff) << 0;
1280}
1281static inline u32 gr_ds_zbc_color_fmt_r(void)
1282{
1283 return 0x00405814;
1284}
1285static inline u32 gr_ds_zbc_color_fmt_val_f(u32 v)
1286{
1287 return (v & 0x7f) << 0;
1288}
1289static inline u32 gr_ds_zbc_color_fmt_val_invalid_f(void)
1290{
1291 return 0x0;
1292}
1293static inline u32 gr_ds_zbc_color_fmt_val_zero_v(void)
1294{
1295 return 0x00000001;
1296}
1297static inline u32 gr_ds_zbc_color_fmt_val_unorm_one_v(void)
1298{
1299 return 0x00000002;
1300}
1301static inline u32 gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v(void)
1302{
1303 return 0x00000004;
1304}
1305static inline u32 gr_ds_zbc_z_r(void)
1306{
1307 return 0x00405818;
1308}
1309static inline u32 gr_ds_zbc_z_val_s(void)
1310{
1311 return 32;
1312}
1313static inline u32 gr_ds_zbc_z_val_f(u32 v)
1314{
1315 return (v & 0xffffffff) << 0;
1316}
1317static inline u32 gr_ds_zbc_z_val_m(void)
1318{
1319 return 0xffffffff << 0;
1320}
1321static inline u32 gr_ds_zbc_z_val_v(u32 r)
1322{
1323 return (r >> 0) & 0xffffffff;
1324}
1325static inline u32 gr_ds_zbc_z_val__init_v(void)
1326{
1327 return 0x00000000;
1328}
1329static inline u32 gr_ds_zbc_z_val__init_f(void)
1330{
1331 return 0x0;
1332}
1333static inline u32 gr_ds_zbc_z_fmt_r(void)
1334{
1335 return 0x0040581c;
1336}
1337static inline u32 gr_ds_zbc_z_fmt_val_f(u32 v)
1338{
1339 return (v & 0x1) << 0;
1340}
1341static inline u32 gr_ds_zbc_z_fmt_val_invalid_f(void)
1342{
1343 return 0x0;
1344}
1345static inline u32 gr_ds_zbc_z_fmt_val_fp32_v(void)
1346{
1347 return 0x00000001;
1348}
1349static inline u32 gr_ds_zbc_tbl_index_r(void)
1350{
1351 return 0x00405820;
1352}
1353static inline u32 gr_ds_zbc_tbl_index_val_f(u32 v)
1354{
1355 return (v & 0xf) << 0;
1356}
1357static inline u32 gr_ds_zbc_tbl_ld_r(void)
1358{
1359 return 0x00405824;
1360}
1361static inline u32 gr_ds_zbc_tbl_ld_select_c_f(void)
1362{
1363 return 0x0;
1364}
1365static inline u32 gr_ds_zbc_tbl_ld_select_z_f(void)
1366{
1367 return 0x1;
1368}
1369static inline u32 gr_ds_zbc_tbl_ld_action_write_f(void)
1370{
1371 return 0x0;
1372}
1373static inline u32 gr_ds_zbc_tbl_ld_trigger_active_f(void)
1374{
1375 return 0x4;
1376}
1377static inline u32 gr_ds_tga_constraintlogic_r(void)
1378{
1379 return 0x00405830;
1380}
1381static inline u32 gr_ds_tga_constraintlogic_beta_cbsize_f(u32 v)
1382{
1383 return (v & 0xfff) << 16;
1384}
1385static inline u32 gr_ds_tga_constraintlogic_alpha_cbsize_f(u32 v)
1386{
1387 return (v & 0xfff) << 0;
1388}
1389static inline u32 gr_ds_hww_esr_r(void)
1390{
1391 return 0x00405840;
1392}
1393static inline u32 gr_ds_hww_esr_reset_s(void)
1394{
1395 return 1;
1396}
1397static inline u32 gr_ds_hww_esr_reset_f(u32 v)
1398{
1399 return (v & 0x1) << 30;
1400}
1401static inline u32 gr_ds_hww_esr_reset_m(void)
1402{
1403 return 0x1 << 30;
1404}
1405static inline u32 gr_ds_hww_esr_reset_v(u32 r)
1406{
1407 return (r >> 30) & 0x1;
1408}
1409static inline u32 gr_ds_hww_esr_reset_task_v(void)
1410{
1411 return 0x00000001;
1412}
1413static inline u32 gr_ds_hww_esr_reset_task_f(void)
1414{
1415 return 0x40000000;
1416}
1417static inline u32 gr_ds_hww_esr_en_enabled_f(void)
1418{
1419 return 0x80000000;
1420}
1421static inline u32 gr_ds_hww_report_mask_r(void)
1422{
1423 return 0x00405844;
1424}
1425static inline u32 gr_ds_hww_report_mask_sph0_err_report_f(void)
1426{
1427 return 0x1;
1428}
1429static inline u32 gr_ds_hww_report_mask_sph1_err_report_f(void)
1430{
1431 return 0x2;
1432}
1433static inline u32 gr_ds_hww_report_mask_sph2_err_report_f(void)
1434{
1435 return 0x4;
1436}
1437static inline u32 gr_ds_hww_report_mask_sph3_err_report_f(void)
1438{
1439 return 0x8;
1440}
1441static inline u32 gr_ds_hww_report_mask_sph4_err_report_f(void)
1442{
1443 return 0x10;
1444}
1445static inline u32 gr_ds_hww_report_mask_sph5_err_report_f(void)
1446{
1447 return 0x20;
1448}
1449static inline u32 gr_ds_hww_report_mask_sph6_err_report_f(void)
1450{
1451 return 0x40;
1452}
1453static inline u32 gr_ds_hww_report_mask_sph7_err_report_f(void)
1454{
1455 return 0x80;
1456}
1457static inline u32 gr_ds_hww_report_mask_sph8_err_report_f(void)
1458{
1459 return 0x100;
1460}
1461static inline u32 gr_ds_hww_report_mask_sph9_err_report_f(void)
1462{
1463 return 0x200;
1464}
1465static inline u32 gr_ds_hww_report_mask_sph10_err_report_f(void)
1466{
1467 return 0x400;
1468}
1469static inline u32 gr_ds_hww_report_mask_sph11_err_report_f(void)
1470{
1471 return 0x800;
1472}
1473static inline u32 gr_ds_hww_report_mask_sph12_err_report_f(void)
1474{
1475 return 0x1000;
1476}
1477static inline u32 gr_ds_hww_report_mask_sph13_err_report_f(void)
1478{
1479 return 0x2000;
1480}
1481static inline u32 gr_ds_hww_report_mask_sph14_err_report_f(void)
1482{
1483 return 0x4000;
1484}
1485static inline u32 gr_ds_hww_report_mask_sph15_err_report_f(void)
1486{
1487 return 0x8000;
1488}
1489static inline u32 gr_ds_hww_report_mask_sph16_err_report_f(void)
1490{
1491 return 0x10000;
1492}
1493static inline u32 gr_ds_hww_report_mask_sph17_err_report_f(void)
1494{
1495 return 0x20000;
1496}
1497static inline u32 gr_ds_hww_report_mask_sph18_err_report_f(void)
1498{
1499 return 0x40000;
1500}
1501static inline u32 gr_ds_hww_report_mask_sph19_err_report_f(void)
1502{
1503 return 0x80000;
1504}
1505static inline u32 gr_ds_hww_report_mask_sph20_err_report_f(void)
1506{
1507 return 0x100000;
1508}
1509static inline u32 gr_ds_hww_report_mask_sph21_err_report_f(void)
1510{
1511 return 0x200000;
1512}
1513static inline u32 gr_ds_hww_report_mask_sph22_err_report_f(void)
1514{
1515 return 0x400000;
1516}
1517static inline u32 gr_ds_hww_report_mask_sph23_err_report_f(void)
1518{
1519 return 0x800000;
1520}
1521static inline u32 gr_ds_num_tpc_per_gpc_r(u32 i)
1522{
1523 return 0x00405870 + i*4;
1524}
1525static inline u32 gr_scc_bundle_cb_base_r(void)
1526{
1527 return 0x00408004;
1528}
1529static inline u32 gr_scc_bundle_cb_base_addr_39_8_f(u32 v)
1530{
1531 return (v & 0xffffffff) << 0;
1532}
1533static inline u32 gr_scc_bundle_cb_base_addr_39_8_align_bits_v(void)
1534{
1535 return 0x00000008;
1536}
1537static inline u32 gr_scc_bundle_cb_size_r(void)
1538{
1539 return 0x00408008;
1540}
1541static inline u32 gr_scc_bundle_cb_size_div_256b_f(u32 v)
1542{
1543 return (v & 0x7ff) << 0;
1544}
1545static inline u32 gr_scc_bundle_cb_size_div_256b__prod_v(void)
1546{
1547 return 0x00000018;
1548}
1549static inline u32 gr_scc_bundle_cb_size_div_256b_byte_granularity_v(void)
1550{
1551 return 0x00000100;
1552}
1553static inline u32 gr_scc_bundle_cb_size_valid_false_v(void)
1554{
1555 return 0x00000000;
1556}
1557static inline u32 gr_scc_bundle_cb_size_valid_false_f(void)
1558{
1559 return 0x0;
1560}
1561static inline u32 gr_scc_bundle_cb_size_valid_true_f(void)
1562{
1563 return 0x80000000;
1564}
1565static inline u32 gr_scc_pagepool_base_r(void)
1566{
1567 return 0x0040800c;
1568}
1569static inline u32 gr_scc_pagepool_base_addr_39_8_f(u32 v)
1570{
1571 return (v & 0xffffffff) << 0;
1572}
1573static inline u32 gr_scc_pagepool_base_addr_39_8_align_bits_v(void)
1574{
1575 return 0x00000008;
1576}
1577static inline u32 gr_scc_pagepool_r(void)
1578{
1579 return 0x00408010;
1580}
1581static inline u32 gr_scc_pagepool_total_pages_f(u32 v)
1582{
1583 return (v & 0xff) << 0;
1584}
1585static inline u32 gr_scc_pagepool_total_pages_hwmax_v(void)
1586{
1587 return 0x00000000;
1588}
1589static inline u32 gr_scc_pagepool_total_pages_hwmax_value_v(void)
1590{
1591 return 0x00000080;
1592}
1593static inline u32 gr_scc_pagepool_total_pages_byte_granularity_v(void)
1594{
1595 return 0x00000100;
1596}
1597static inline u32 gr_scc_pagepool_max_valid_pages_s(void)
1598{
1599 return 8;
1600}
1601static inline u32 gr_scc_pagepool_max_valid_pages_f(u32 v)
1602{
1603 return (v & 0xff) << 8;
1604}
1605static inline u32 gr_scc_pagepool_max_valid_pages_m(void)
1606{
1607 return 0xff << 8;
1608}
1609static inline u32 gr_scc_pagepool_max_valid_pages_v(u32 r)
1610{
1611 return (r >> 8) & 0xff;
1612}
1613static inline u32 gr_scc_pagepool_valid_true_f(void)
1614{
1615 return 0x80000000;
1616}
1617static inline u32 gr_scc_init_r(void)
1618{
1619 return 0x0040802c;
1620}
1621static inline u32 gr_scc_init_ram_trigger_f(void)
1622{
1623 return 0x1;
1624}
1625static inline u32 gr_scc_hww_esr_r(void)
1626{
1627 return 0x00408030;
1628}
1629static inline u32 gr_scc_hww_esr_reset_active_f(void)
1630{
1631 return 0x40000000;
1632}
1633static inline u32 gr_scc_hww_esr_en_enable_f(void)
1634{
1635 return 0x80000000;
1636}
1637static inline u32 gr_sked_hww_esr_r(void)
1638{
1639 return 0x00407020;
1640}
1641static inline u32 gr_sked_hww_esr_reset_active_f(void)
1642{
1643 return 0x40000000;
1644}
1645static inline u32 gr_cwd_fs_r(void)
1646{
1647 return 0x00405b00;
1648}
1649static inline u32 gr_cwd_fs_num_gpcs_f(u32 v)
1650{
1651 return (v & 0xff) << 0;
1652}
1653static inline u32 gr_cwd_fs_num_tpcs_f(u32 v)
1654{
1655 return (v & 0xff) << 8;
1656}
1657static inline u32 gr_gpc0_fs_gpc_r(void)
1658{
1659 return 0x00502608;
1660}
1661static inline u32 gr_gpc0_fs_gpc_num_available_tpcs_v(u32 r)
1662{
1663 return (r >> 0) & 0x1f;
1664}
1665static inline u32 gr_gpc0_fs_gpc_num_available_zculls_v(u32 r)
1666{
1667 return (r >> 16) & 0x1f;
1668}
1669static inline u32 gr_gpc0_cfg_r(void)
1670{
1671 return 0x00502620;
1672}
1673static inline u32 gr_gpc0_cfg_imem_sz_v(u32 r)
1674{
1675 return (r >> 0) & 0xff;
1676}
1677static inline u32 gr_gpccs_rc_lanes_r(void)
1678{
1679 return 0x00502880;
1680}
1681static inline u32 gr_gpccs_rc_lanes_num_chains_s(void)
1682{
1683 return 6;
1684}
1685static inline u32 gr_gpccs_rc_lanes_num_chains_f(u32 v)
1686{
1687 return (v & 0x3f) << 0;
1688}
1689static inline u32 gr_gpccs_rc_lanes_num_chains_m(void)
1690{
1691 return 0x3f << 0;
1692}
1693static inline u32 gr_gpccs_rc_lanes_num_chains_v(u32 r)
1694{
1695 return (r >> 0) & 0x3f;
1696}
1697static inline u32 gr_gpccs_rc_lane_size_r(u32 i)
1698{
1699 return 0x00502910 + i*0;
1700}
1701static inline u32 gr_gpccs_rc_lane_size__size_1_v(void)
1702{
1703 return 0x00000010;
1704}
1705static inline u32 gr_gpccs_rc_lane_size_v_s(void)
1706{
1707 return 24;
1708}
1709static inline u32 gr_gpccs_rc_lane_size_v_f(u32 v)
1710{
1711 return (v & 0xffffff) << 0;
1712}
1713static inline u32 gr_gpccs_rc_lane_size_v_m(void)
1714{
1715 return 0xffffff << 0;
1716}
1717static inline u32 gr_gpccs_rc_lane_size_v_v(u32 r)
1718{
1719 return (r >> 0) & 0xffffff;
1720}
1721static inline u32 gr_gpccs_rc_lane_size_v_0_v(void)
1722{
1723 return 0x00000000;
1724}
1725static inline u32 gr_gpccs_rc_lane_size_v_0_f(void)
1726{
1727 return 0x0;
1728}
1729static inline u32 gr_gpc0_zcull_fs_r(void)
1730{
1731 return 0x00500910;
1732}
1733static inline u32 gr_gpc0_zcull_fs_num_sms_f(u32 v)
1734{
1735 return (v & 0x1ff) << 0;
1736}
1737static inline u32 gr_gpc0_zcull_fs_num_active_banks_f(u32 v)
1738{
1739 return (v & 0xf) << 16;
1740}
1741static inline u32 gr_gpc0_zcull_ram_addr_r(void)
1742{
1743 return 0x00500914;
1744}
1745static inline u32 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(u32 v)
1746{
1747 return (v & 0xf) << 0;
1748}
1749static inline u32 gr_gpc0_zcull_ram_addr_row_offset_f(u32 v)
1750{
1751 return (v & 0xf) << 8;
1752}
1753static inline u32 gr_gpc0_zcull_sm_num_rcp_r(void)
1754{
1755 return 0x00500918;
1756}
1757static inline u32 gr_gpc0_zcull_sm_num_rcp_conservative_f(u32 v)
1758{
1759 return (v & 0xffffff) << 0;
1760}
1761static inline u32 gr_gpc0_zcull_sm_num_rcp_conservative__max_v(void)
1762{
1763 return 0x00800000;
1764}
1765static inline u32 gr_gpc0_zcull_total_ram_size_r(void)
1766{
1767 return 0x00500920;
1768}
1769static inline u32 gr_gpc0_zcull_total_ram_size_num_aliquots_f(u32 v)
1770{
1771 return (v & 0xffff) << 0;
1772}
1773static inline u32 gr_gpc0_zcull_zcsize_r(u32 i)
1774{
1775 return 0x00500a04 + i*32;
1776}
1777static inline u32 gr_gpc0_zcull_zcsize_height_subregion__multiple_v(void)
1778{
1779 return 0x00000040;
1780}
1781static inline u32 gr_gpc0_zcull_zcsize_width_subregion__multiple_v(void)
1782{
1783 return 0x00000010;
1784}
1785static inline u32 gr_gpc0_gpm_pd_active_tpcs_r(void)
1786{
1787 return 0x00500c08;
1788}
1789static inline u32 gr_gpc0_gpm_pd_active_tpcs_num_f(u32 v)
1790{
1791 return (v & 0x7) << 0;
1792}
1793static inline u32 gr_gpc0_gpm_pd_sm_id_r(u32 i)
1794{
1795 return 0x00500c10 + i*4;
1796}
1797static inline u32 gr_gpc0_gpm_pd_sm_id_id_f(u32 v)
1798{
1799 return (v & 0xff) << 0;
1800}
1801static inline u32 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(u32 i)
1802{
1803 return 0x00500c30 + i*4;
1804}
1805static inline u32 gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(u32 r)
1806{
1807 return (r >> 0) & 0xff;
1808}
1809static inline u32 gr_gpc0_gpm_sd_active_tpcs_r(void)
1810{
1811 return 0x00500c8c;
1812}
1813static inline u32 gr_gpc0_gpm_sd_active_tpcs_num_f(u32 v)
1814{
1815 return (v & 0x7) << 0;
1816}
1817static inline u32 gr_gpc0_tpc0_pe_cfg_smid_r(void)
1818{
1819 return 0x00504088;
1820}
1821static inline u32 gr_gpc0_tpc0_pe_cfg_smid_value_f(u32 v)
1822{
1823 return (v & 0xffff) << 0;
1824}
1825static inline u32 gr_gpc0_tpc0_l1c_cfg_smid_r(void)
1826{
1827 return 0x005044e8;
1828}
1829static inline u32 gr_gpc0_tpc0_l1c_cfg_smid_value_f(u32 v)
1830{
1831 return (v & 0xffff) << 0;
1832}
1833static inline u32 gr_gpc0_tpc0_sm_cfg_r(void)
1834{
1835 return 0x00504698;
1836}
1837static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
1838{
1839 return (v & 0xffff) << 0;
1840}
1841static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
1842{
1843 return 0x00503018;
1844}
1845static inline u32 gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(void)
1846{
1847 return 0x1 << 0;
1848}
1849static inline u32 gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f(void)
1850{
1851 return 0x1;
1852}
1853static inline u32 gr_gpc0_ppc0_cbm_cfg_r(void)
1854{
1855 return 0x005030c0;
1856}
1857static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_f(u32 v)
1858{
1859 return (v & 0xffff) << 0;
1860}
1861static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_m(void)
1862{
1863 return 0xffff << 0;
1864}
1865static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_v(u32 r)
1866{
1867 return (r >> 0) & 0xffff;
1868}
1869static inline u32 gr_gpc0_ppc0_cbm_cfg_size_f(u32 v)
1870{
1871 return (v & 0xfff) << 16;
1872}
1873static inline u32 gr_gpc0_ppc0_cbm_cfg_size_m(void)
1874{
1875 return 0xfff << 16;
1876}
1877static inline u32 gr_gpc0_ppc0_cbm_cfg_size_v(u32 r)
1878{
1879 return (r >> 16) & 0xfff;
1880}
1881static inline u32 gr_gpc0_ppc0_cbm_cfg_size_default_v(void)
1882{
1883 return 0x00000240;
1884}
1885static inline u32 gr_gpc0_ppc0_cbm_cfg_size_granularity_v(void)
1886{
1887 return 0x00000020;
1888}
1889static inline u32 gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(u32 v)
1890{
1891 return (v & 0x1) << 28;
1892}
1893static inline u32 gr_gpc0_ppc0_cbm_cfg2_r(void)
1894{
1895 return 0x005030e4;
1896}
1897static inline u32 gr_gpc0_ppc0_cbm_cfg2_start_offset_f(u32 v)
1898{
1899 return (v & 0xffff) << 0;
1900}
1901static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_f(u32 v)
1902{
1903 return (v & 0xfff) << 16;
1904}
1905static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_m(void)
1906{
1907 return 0xfff << 16;
1908}
1909static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_v(u32 r)
1910{
1911 return (r >> 16) & 0xfff;
1912}
1913static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_default_v(void)
1914{
1915 return 0x00000648;
1916}
1917static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_granularity_v(void)
1918{
1919 return 0x00000020;
1920}
1921static inline u32 gr_gpccs_falcon_addr_r(void)
1922{
1923 return 0x0041a0ac;
1924}
1925static inline u32 gr_gpccs_falcon_addr_lsb_s(void)
1926{
1927 return 6;
1928}
1929static inline u32 gr_gpccs_falcon_addr_lsb_f(u32 v)
1930{
1931 return (v & 0x3f) << 0;
1932}
1933static inline u32 gr_gpccs_falcon_addr_lsb_m(void)
1934{
1935 return 0x3f << 0;
1936}
1937static inline u32 gr_gpccs_falcon_addr_lsb_v(u32 r)
1938{
1939 return (r >> 0) & 0x3f;
1940}
1941static inline u32 gr_gpccs_falcon_addr_lsb_init_v(void)
1942{
1943 return 0x00000000;
1944}
1945static inline u32 gr_gpccs_falcon_addr_lsb_init_f(void)
1946{
1947 return 0x0;
1948}
1949static inline u32 gr_gpccs_falcon_addr_msb_s(void)
1950{
1951 return 6;
1952}
1953static inline u32 gr_gpccs_falcon_addr_msb_f(u32 v)
1954{
1955 return (v & 0x3f) << 6;
1956}
1957static inline u32 gr_gpccs_falcon_addr_msb_m(void)
1958{
1959 return 0x3f << 6;
1960}
1961static inline u32 gr_gpccs_falcon_addr_msb_v(u32 r)
1962{
1963 return (r >> 6) & 0x3f;
1964}
1965static inline u32 gr_gpccs_falcon_addr_msb_init_v(void)
1966{
1967 return 0x00000000;
1968}
1969static inline u32 gr_gpccs_falcon_addr_msb_init_f(void)
1970{
1971 return 0x0;
1972}
1973static inline u32 gr_gpccs_falcon_addr_ext_s(void)
1974{
1975 return 12;
1976}
1977static inline u32 gr_gpccs_falcon_addr_ext_f(u32 v)
1978{
1979 return (v & 0xfff) << 0;
1980}
1981static inline u32 gr_gpccs_falcon_addr_ext_m(void)
1982{
1983 return 0xfff << 0;
1984}
1985static inline u32 gr_gpccs_falcon_addr_ext_v(u32 r)
1986{
1987 return (r >> 0) & 0xfff;
1988}
1989static inline u32 gr_gpccs_cpuctl_r(void)
1990{
1991 return 0x0041a100;
1992}
1993static inline u32 gr_gpccs_cpuctl_startcpu_f(u32 v)
1994{
1995 return (v & 0x1) << 1;
1996}
1997static inline u32 gr_gpccs_dmactl_r(void)
1998{
1999 return 0x0041a10c;
2000}
2001static inline u32 gr_gpccs_dmactl_require_ctx_f(u32 v)
2002{
2003 return (v & 0x1) << 0;
2004}
2005static inline u32 gr_gpccs_dmactl_dmem_scrubbing_m(void)
2006{
2007 return 0x1 << 1;
2008}
2009static inline u32 gr_gpccs_dmactl_imem_scrubbing_m(void)
2010{
2011 return 0x1 << 2;
2012}
2013static inline u32 gr_gpccs_imemc_r(u32 i)
2014{
2015 return 0x0041a180 + i*16;
2016}
2017static inline u32 gr_gpccs_imemc_offs_f(u32 v)
2018{
2019 return (v & 0x3f) << 2;
2020}
2021static inline u32 gr_gpccs_imemc_blk_f(u32 v)
2022{
2023 return (v & 0xff) << 8;
2024}
2025static inline u32 gr_gpccs_imemc_aincw_f(u32 v)
2026{
2027 return (v & 0x1) << 24;
2028}
2029static inline u32 gr_gpccs_imemd_r(u32 i)
2030{
2031 return 0x0041a184 + i*16;
2032}
2033static inline u32 gr_gpccs_imemt_r(u32 i)
2034{
2035 return 0x0041a188 + i*16;
2036}
2037static inline u32 gr_gpccs_imemt__size_1_v(void)
2038{
2039 return 0x00000004;
2040}
2041static inline u32 gr_gpccs_imemt_tag_f(u32 v)
2042{
2043 return (v & 0xffff) << 0;
2044}
2045static inline u32 gr_gpccs_dmemc_r(u32 i)
2046{
2047 return 0x0041a1c0 + i*8;
2048}
2049static inline u32 gr_gpccs_dmemc_offs_f(u32 v)
2050{
2051 return (v & 0x3f) << 2;
2052}
2053static inline u32 gr_gpccs_dmemc_blk_f(u32 v)
2054{
2055 return (v & 0xff) << 8;
2056}
2057static inline u32 gr_gpccs_dmemc_aincw_f(u32 v)
2058{
2059 return (v & 0x1) << 24;
2060}
2061static inline u32 gr_gpccs_dmemd_r(u32 i)
2062{
2063 return 0x0041a1c4 + i*8;
2064}
2065static inline u32 gr_gpccs_ctxsw_mailbox_r(u32 i)
2066{
2067 return 0x0041a800 + i*4;
2068}
2069static inline u32 gr_gpccs_ctxsw_mailbox_value_f(u32 v)
2070{
2071 return (v & 0xffffffff) << 0;
2072}
2073static inline u32 gr_gpcs_setup_bundle_cb_base_r(void)
2074{
2075 return 0x00418808;
2076}
2077static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_s(void)
2078{
2079 return 32;
2080}
2081static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_f(u32 v)
2082{
2083 return (v & 0xffffffff) << 0;
2084}
2085static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_m(void)
2086{
2087 return 0xffffffff << 0;
2088}
2089static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_v(u32 r)
2090{
2091 return (r >> 0) & 0xffffffff;
2092}
2093static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_init_v(void)
2094{
2095 return 0x00000000;
2096}
2097static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_init_f(void)
2098{
2099 return 0x0;
2100}
2101static inline u32 gr_gpcs_setup_bundle_cb_size_r(void)
2102{
2103 return 0x0041880c;
2104}
2105static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_s(void)
2106{
2107 return 11;
2108}
2109static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_f(u32 v)
2110{
2111 return (v & 0x7ff) << 0;
2112}
2113static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_m(void)
2114{
2115 return 0x7ff << 0;
2116}
2117static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_v(u32 r)
2118{
2119 return (r >> 0) & 0x7ff;
2120}
2121static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_init_v(void)
2122{
2123 return 0x00000000;
2124}
2125static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_init_f(void)
2126{
2127 return 0x0;
2128}
2129static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b__prod_v(void)
2130{
2131 return 0x00000018;
2132}
2133static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b__prod_f(void)
2134{
2135 return 0x18;
2136}
2137static inline u32 gr_gpcs_setup_bundle_cb_size_valid_s(void)
2138{
2139 return 1;
2140}
2141static inline u32 gr_gpcs_setup_bundle_cb_size_valid_f(u32 v)
2142{
2143 return (v & 0x1) << 31;
2144}
2145static inline u32 gr_gpcs_setup_bundle_cb_size_valid_m(void)
2146{
2147 return 0x1 << 31;
2148}
2149static inline u32 gr_gpcs_setup_bundle_cb_size_valid_v(u32 r)
2150{
2151 return (r >> 31) & 0x1;
2152}
2153static inline u32 gr_gpcs_setup_bundle_cb_size_valid_false_v(void)
2154{
2155 return 0x00000000;
2156}
2157static inline u32 gr_gpcs_setup_bundle_cb_size_valid_false_f(void)
2158{
2159 return 0x0;
2160}
2161static inline u32 gr_gpcs_setup_bundle_cb_size_valid_true_v(void)
2162{
2163 return 0x00000001;
2164}
2165static inline u32 gr_gpcs_setup_bundle_cb_size_valid_true_f(void)
2166{
2167 return 0x80000000;
2168}
2169static inline u32 gr_gpcs_setup_attrib_cb_base_r(void)
2170{
2171 return 0x00418810;
2172}
2173static inline u32 gr_gpcs_setup_attrib_cb_base_addr_39_12_f(u32 v)
2174{
2175 return (v & 0xfffffff) << 0;
2176}
2177static inline u32 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v(void)
2178{
2179 return 0x0000000c;
2180}
2181static inline u32 gr_gpcs_setup_attrib_cb_base_valid_true_f(void)
2182{
2183 return 0x80000000;
2184}
2185static inline u32 gr_crstr_gpc_map0_r(void)
2186{
2187 return 0x00418b08;
2188}
2189static inline u32 gr_crstr_gpc_map0_tile0_f(u32 v)
2190{
2191 return (v & 0x7) << 0;
2192}
2193static inline u32 gr_crstr_gpc_map0_tile1_f(u32 v)
2194{
2195 return (v & 0x7) << 5;
2196}
2197static inline u32 gr_crstr_gpc_map0_tile2_f(u32 v)
2198{
2199 return (v & 0x7) << 10;
2200}
2201static inline u32 gr_crstr_gpc_map0_tile3_f(u32 v)
2202{
2203 return (v & 0x7) << 15;
2204}
2205static inline u32 gr_crstr_gpc_map0_tile4_f(u32 v)
2206{
2207 return (v & 0x7) << 20;
2208}
2209static inline u32 gr_crstr_gpc_map0_tile5_f(u32 v)
2210{
2211 return (v & 0x7) << 25;
2212}
2213static inline u32 gr_crstr_gpc_map1_r(void)
2214{
2215 return 0x00418b0c;
2216}
2217static inline u32 gr_crstr_gpc_map1_tile6_f(u32 v)
2218{
2219 return (v & 0x7) << 0;
2220}
2221static inline u32 gr_crstr_gpc_map1_tile7_f(u32 v)
2222{
2223 return (v & 0x7) << 5;
2224}
2225static inline u32 gr_crstr_gpc_map1_tile8_f(u32 v)
2226{
2227 return (v & 0x7) << 10;
2228}
2229static inline u32 gr_crstr_gpc_map1_tile9_f(u32 v)
2230{
2231 return (v & 0x7) << 15;
2232}
2233static inline u32 gr_crstr_gpc_map1_tile10_f(u32 v)
2234{
2235 return (v & 0x7) << 20;
2236}
2237static inline u32 gr_crstr_gpc_map1_tile11_f(u32 v)
2238{
2239 return (v & 0x7) << 25;
2240}
2241static inline u32 gr_crstr_gpc_map2_r(void)
2242{
2243 return 0x00418b10;
2244}
2245static inline u32 gr_crstr_gpc_map2_tile12_f(u32 v)
2246{
2247 return (v & 0x7) << 0;
2248}
2249static inline u32 gr_crstr_gpc_map2_tile13_f(u32 v)
2250{
2251 return (v & 0x7) << 5;
2252}
2253static inline u32 gr_crstr_gpc_map2_tile14_f(u32 v)
2254{
2255 return (v & 0x7) << 10;
2256}
2257static inline u32 gr_crstr_gpc_map2_tile15_f(u32 v)
2258{
2259 return (v & 0x7) << 15;
2260}
2261static inline u32 gr_crstr_gpc_map2_tile16_f(u32 v)
2262{
2263 return (v & 0x7) << 20;
2264}
2265static inline u32 gr_crstr_gpc_map2_tile17_f(u32 v)
2266{
2267 return (v & 0x7) << 25;
2268}
2269static inline u32 gr_crstr_gpc_map3_r(void)
2270{
2271 return 0x00418b14;
2272}
2273static inline u32 gr_crstr_gpc_map3_tile18_f(u32 v)
2274{
2275 return (v & 0x7) << 0;
2276}
2277static inline u32 gr_crstr_gpc_map3_tile19_f(u32 v)
2278{
2279 return (v & 0x7) << 5;
2280}
2281static inline u32 gr_crstr_gpc_map3_tile20_f(u32 v)
2282{
2283 return (v & 0x7) << 10;
2284}
2285static inline u32 gr_crstr_gpc_map3_tile21_f(u32 v)
2286{
2287 return (v & 0x7) << 15;
2288}
2289static inline u32 gr_crstr_gpc_map3_tile22_f(u32 v)
2290{
2291 return (v & 0x7) << 20;
2292}
2293static inline u32 gr_crstr_gpc_map3_tile23_f(u32 v)
2294{
2295 return (v & 0x7) << 25;
2296}
2297static inline u32 gr_crstr_gpc_map4_r(void)
2298{
2299 return 0x00418b18;
2300}
2301static inline u32 gr_crstr_gpc_map4_tile24_f(u32 v)
2302{
2303 return (v & 0x7) << 0;
2304}
2305static inline u32 gr_crstr_gpc_map4_tile25_f(u32 v)
2306{
2307 return (v & 0x7) << 5;
2308}
2309static inline u32 gr_crstr_gpc_map4_tile26_f(u32 v)
2310{
2311 return (v & 0x7) << 10;
2312}
2313static inline u32 gr_crstr_gpc_map4_tile27_f(u32 v)
2314{
2315 return (v & 0x7) << 15;
2316}
2317static inline u32 gr_crstr_gpc_map4_tile28_f(u32 v)
2318{
2319 return (v & 0x7) << 20;
2320}
2321static inline u32 gr_crstr_gpc_map4_tile29_f(u32 v)
2322{
2323 return (v & 0x7) << 25;
2324}
2325static inline u32 gr_crstr_gpc_map5_r(void)
2326{
2327 return 0x00418b1c;
2328}
2329static inline u32 gr_crstr_gpc_map5_tile30_f(u32 v)
2330{
2331 return (v & 0x7) << 0;
2332}
2333static inline u32 gr_crstr_gpc_map5_tile31_f(u32 v)
2334{
2335 return (v & 0x7) << 5;
2336}
2337static inline u32 gr_crstr_gpc_map5_tile32_f(u32 v)
2338{
2339 return (v & 0x7) << 10;
2340}
2341static inline u32 gr_crstr_gpc_map5_tile33_f(u32 v)
2342{
2343 return (v & 0x7) << 15;
2344}
2345static inline u32 gr_crstr_gpc_map5_tile34_f(u32 v)
2346{
2347 return (v & 0x7) << 20;
2348}
2349static inline u32 gr_crstr_gpc_map5_tile35_f(u32 v)
2350{
2351 return (v & 0x7) << 25;
2352}
2353static inline u32 gr_crstr_map_table_cfg_r(void)
2354{
2355 return 0x00418bb8;
2356}
2357static inline u32 gr_crstr_map_table_cfg_row_offset_f(u32 v)
2358{
2359 return (v & 0xff) << 0;
2360}
2361static inline u32 gr_crstr_map_table_cfg_num_entries_f(u32 v)
2362{
2363 return (v & 0xff) << 8;
2364}
2365static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_r(void)
2366{
2367 return 0x00418980;
2368}
2369static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(u32 v)
2370{
2371 return (v & 0x7) << 0;
2372}
2373static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(u32 v)
2374{
2375 return (v & 0x7) << 4;
2376}
2377static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(u32 v)
2378{
2379 return (v & 0x7) << 8;
2380}
2381static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(u32 v)
2382{
2383 return (v & 0x7) << 12;
2384}
2385static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(u32 v)
2386{
2387 return (v & 0x7) << 16;
2388}
2389static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(u32 v)
2390{
2391 return (v & 0x7) << 20;
2392}
2393static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(u32 v)
2394{
2395 return (v & 0x7) << 24;
2396}
2397static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(u32 v)
2398{
2399 return (v & 0x7) << 28;
2400}
2401static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_r(void)
2402{
2403 return 0x00418984;
2404}
2405static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(u32 v)
2406{
2407 return (v & 0x7) << 0;
2408}
2409static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(u32 v)
2410{
2411 return (v & 0x7) << 4;
2412}
2413static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(u32 v)
2414{
2415 return (v & 0x7) << 8;
2416}
2417static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(u32 v)
2418{
2419 return (v & 0x7) << 12;
2420}
2421static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(u32 v)
2422{
2423 return (v & 0x7) << 16;
2424}
2425static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(u32 v)
2426{
2427 return (v & 0x7) << 20;
2428}
2429static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(u32 v)
2430{
2431 return (v & 0x7) << 24;
2432}
2433static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(u32 v)
2434{
2435 return (v & 0x7) << 28;
2436}
2437static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_r(void)
2438{
2439 return 0x00418988;
2440}
2441static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(u32 v)
2442{
2443 return (v & 0x7) << 0;
2444}
2445static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(u32 v)
2446{
2447 return (v & 0x7) << 4;
2448}
2449static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(u32 v)
2450{
2451 return (v & 0x7) << 8;
2452}
2453static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(u32 v)
2454{
2455 return (v & 0x7) << 12;
2456}
2457static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(u32 v)
2458{
2459 return (v & 0x7) << 16;
2460}
2461static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(u32 v)
2462{
2463 return (v & 0x7) << 20;
2464}
2465static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(u32 v)
2466{
2467 return (v & 0x7) << 24;
2468}
2469static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_s(void)
2470{
2471 return 3;
2472}
2473static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(u32 v)
2474{
2475 return (v & 0x7) << 28;
2476}
2477static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_m(void)
2478{
2479 return 0x7 << 28;
2480}
2481static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_v(u32 r)
2482{
2483 return (r >> 28) & 0x7;
2484}
2485static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_r(void)
2486{
2487 return 0x0041898c;
2488}
2489static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(u32 v)
2490{
2491 return (v & 0x7) << 0;
2492}
2493static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(u32 v)
2494{
2495 return (v & 0x7) << 4;
2496}
2497static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(u32 v)
2498{
2499 return (v & 0x7) << 8;
2500}
2501static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(u32 v)
2502{
2503 return (v & 0x7) << 12;
2504}
2505static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(u32 v)
2506{
2507 return (v & 0x7) << 16;
2508}
2509static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(u32 v)
2510{
2511 return (v & 0x7) << 20;
2512}
2513static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(u32 v)
2514{
2515 return (v & 0x7) << 24;
2516}
2517static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(u32 v)
2518{
2519 return (v & 0x7) << 28;
2520}
2521static inline u32 gr_gpcs_gpm_pd_cfg_r(void)
2522{
2523 return 0x00418c6c;
2524}
2525static inline u32 gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f(void)
2526{
2527 return 0x0;
2528}
2529static inline u32 gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f(void)
2530{
2531 return 0x1;
2532}
2533static inline u32 gr_gpcs_gcc_pagepool_base_r(void)
2534{
2535 return 0x00419004;
2536}
2537static inline u32 gr_gpcs_gcc_pagepool_base_addr_39_8_f(u32 v)
2538{
2539 return (v & 0xffffffff) << 0;
2540}
2541static inline u32 gr_gpcs_gcc_pagepool_r(void)
2542{
2543 return 0x00419008;
2544}
2545static inline u32 gr_gpcs_gcc_pagepool_total_pages_f(u32 v)
2546{
2547 return (v & 0xff) << 0;
2548}
2549static inline u32 gr_gpcs_tpcs_pe_vaf_r(void)
2550{
2551 return 0x0041980c;
2552}
2553static inline u32 gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f(void)
2554{
2555 return 0x10;
2556}
2557static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(void)
2558{
2559 return 0x00419848;
2560}
2561static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(u32 v)
2562{
2563 return (v & 0xfffffff) << 0;
2564}
2565static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_f(u32 v)
2566{
2567 return (v & 0x1) << 28;
2568}
2569static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(void)
2570{
2571 return 0x10000000;
2572}
2573static inline u32 gr_gpcs_tpcs_l1c_pm_r(void)
2574{
2575 return 0x00419ca8;
2576}
2577static inline u32 gr_gpcs_tpcs_l1c_pm_enable_m(void)
2578{
2579 return 0x1 << 31;
2580}
2581static inline u32 gr_gpcs_tpcs_l1c_pm_enable_enable_f(void)
2582{
2583 return 0x80000000;
2584}
2585static inline u32 gr_gpcs_tpcs_l1c_cfg_r(void)
2586{
2587 return 0x00419cb8;
2588}
2589static inline u32 gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_m(void)
2590{
2591 return 0x1 << 31;
2592}
2593static inline u32 gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_enable_f(void)
2594{
2595 return 0x80000000;
2596}
2597static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_r(void)
2598{
2599 return 0x00419c00;
2600}
2601static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f(void)
2602{
2603 return 0x0;
2604}
2605static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f(void)
2606{
2607 return 0x8;
2608}
2609static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_r(void)
2610{
2611 return 0x00419e00;
2612}
2613static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_core_enable_m(void)
2614{
2615 return 0x1 << 7;
2616}
2617static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_core_enable_enable_f(void)
2618{
2619 return 0x80;
2620}
2621static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_m(void)
2622{
2623 return 0x1 << 15;
2624}
2625static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_enable_f(void)
2626{
2627 return 0x8000;
2628}
2629static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(void)
2630{
2631 return 0x00419e44;
2632}
2633static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f(void)
2634{
2635 return 0x2;
2636}
2637static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f(void)
2638{
2639 return 0x4;
2640}
2641static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f(void)
2642{
2643 return 0x8;
2644}
2645static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f(void)
2646{
2647 return 0x10;
2648}
2649static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f(void)
2650{
2651 return 0x20;
2652}
2653static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f(void)
2654{
2655 return 0x40;
2656}
2657static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f(void)
2658{
2659 return 0x80;
2660}
2661static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f(void)
2662{
2663 return 0x100;
2664}
2665static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f(void)
2666{
2667 return 0x200;
2668}
2669static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f(void)
2670{
2671 return 0x400;
2672}
2673static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f(void)
2674{
2675 return 0x800;
2676}
2677static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f(void)
2678{
2679 return 0x1000;
2680}
2681static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f(void)
2682{
2683 return 0x2000;
2684}
2685static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f(void)
2686{
2687 return 0x4000;
2688}
2689static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f(void)
2690{
2691 return 0x8000;
2692}
2693static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f(void)
2694{
2695 return 0x10000;
2696}
2697static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f(void)
2698{
2699 return 0x20000;
2700}
2701static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f(void)
2702{
2703 return 0x40000;
2704}
2705static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f(void)
2706{
2707 return 0x80000;
2708}
2709static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f(void)
2710{
2711 return 0x100000;
2712}
2713static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(void)
2714{
2715 return 0x00419e4c;
2716}
2717static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f(void)
2718{
2719 return 0x1;
2720}
2721static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f(void)
2722{
2723 return 0x2;
2724}
2725static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f(void)
2726{
2727 return 0x4;
2728}
2729static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f(void)
2730{
2731 return 0x8;
2732}
2733static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f(void)
2734{
2735 return 0x10;
2736}
2737static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f(void)
2738{
2739 return 0x20;
2740}
2741static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f(void)
2742{
2743 return 0x40;
2744}
2745static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
2746{
2747 return 0x0050450c;
2748}
2749static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void)
2750{
2751 return 0x2;
2752}
2753static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_disabled_f(void)
2754{
2755 return 0x0;
2756}
2757static inline u32 gr_gpc0_gpccs_gpc_exception_en_r(void)
2758{
2759 return 0x00502c94;
2760}
2761static inline u32 gr_gpc0_gpccs_gpc_exception_en_tpc_0_enabled_f(void)
2762{
2763 return 0x10000;
2764}
2765static inline u32 gr_gpc0_gpccs_gpc_exception_en_tpc_0_disabled_f(void)
2766{
2767 return 0x0;
2768}
2769static inline u32 gr_gpcs_gpccs_gpc_exception_r(void)
2770{
2771 return 0x0041ac90;
2772}
2773static inline u32 gr_gpcs_gpccs_gpc_exception_tpc_v(u32 r)
2774{
2775 return (r >> 16) & 0xff;
2776}
2777static inline u32 gr_gpcs_gpccs_gpc_exception_tpc_0_pending_v(void)
2778{
2779 return 0x00000001;
2780}
2781static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_r(void)
2782{
2783 return 0x00419d08;
2784}
2785static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_sm_v(u32 r)
2786{
2787 return (r >> 1) & 0x1;
2788}
2789static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_sm_pending_v(void)
2790{
2791 return 0x00000001;
2792}
2793static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_r(void)
2794{
2795 return 0x00504610;
2796}
2797static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(u32 r)
2798{
2799 return (r >> 0) & 0x1;
2800}
2801static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v(void)
2802{
2803 return 0x00000001;
2804}
2805static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void)
2806{
2807 return 0x80000000;
2808}
2809static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void)
2810{
2811 return 0x0050460c;
2812}
2813static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r)
2814{
2815 return (r >> 4) & 0x1;
2816}
2817static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v(void)
2818{
2819 return 0x00000001;
2820}
2821static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_r(void)
2822{
2823 return 0x00504650;
2824}
2825static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f(void)
2826{
2827 return 0x10;
2828}
2829static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f(void)
2830{
2831 return 0x20;
2832}
2833static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(void)
2834{
2835 return 0x40;
2836}
2837static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_r(void)
2838{
2839 return 0x00504648;
2840}
2841static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_v(u32 r)
2842{
2843 return (r >> 0) & 0xffff;
2844}
2845static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v(void)
2846{
2847 return 0x00000000;
2848}
2849static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f(void)
2850{
2851 return 0x0;
2852}
2853static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)
2854{
2855 return 0x00504770;
2856}
2857static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_r(void)
2858{
2859 return 0x00419f70;
2860}
2861static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_m(void)
2862{
2863 return 0x1 << 1;
2864}
2865static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_enable_f(void)
2866{
2867 return 0x2;
2868}
2869static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(void)
2870{
2871 return 0x1 << 4;
2872}
2873static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(u32 v)
2874{
2875 return (v & 0x1) << 4;
2876}
2877static inline u32 gr_gpc0_tpc0_sm_debug_sfe_control_r(void)
2878{
2879 return 0x0050477c;
2880}
2881static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_r(void)
2882{
2883 return 0x00419f7c;
2884}
2885static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(void)
2886{
2887 return 0x1 << 0;
2888}
2889static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(u32 v)
2890{
2891 return (v & 0x1) << 0;
2892}
2893static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_m(void)
2894{
2895 return 0x1 << 16;
2896}
2897static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_enable_f(void)
2898{
2899 return 0x10000;
2900}
2901static inline u32 gr_gpcs_tpcs_sm_power_throttle_r(void)
2902{
2903 return 0x00419ed0;
2904}
2905static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_r(void)
2906{
2907 return 0x0041be08;
2908}
2909static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f(void)
2910{
2911 return 0x4;
2912}
2913static inline u32 gr_ppcs_wwdx_map_gpc_map0_r(void)
2914{
2915 return 0x0041bf00;
2916}
2917static inline u32 gr_ppcs_wwdx_map_gpc_map1_r(void)
2918{
2919 return 0x0041bf04;
2920}
2921static inline u32 gr_ppcs_wwdx_map_gpc_map2_r(void)
2922{
2923 return 0x0041bf08;
2924}
2925static inline u32 gr_ppcs_wwdx_map_gpc_map3_r(void)
2926{
2927 return 0x0041bf0c;
2928}
2929static inline u32 gr_ppcs_wwdx_map_gpc_map4_r(void)
2930{
2931 return 0x0041bf10;
2932}
2933static inline u32 gr_ppcs_wwdx_map_gpc_map5_r(void)
2934{
2935 return 0x0041bf14;
2936}
2937static inline u32 gr_ppcs_wwdx_map_table_cfg_r(void)
2938{
2939 return 0x0041bfd0;
2940}
2941static inline u32 gr_ppcs_wwdx_map_table_cfg_row_offset_f(u32 v)
2942{
2943 return (v & 0xff) << 0;
2944}
2945static inline u32 gr_ppcs_wwdx_map_table_cfg_num_entries_f(u32 v)
2946{
2947 return (v & 0xff) << 8;
2948}
2949static inline u32 gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(u32 v)
2950{
2951 return (v & 0x1f) << 16;
2952}
2953static inline u32 gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(u32 v)
2954{
2955 return (v & 0x7) << 21;
2956}
2957static inline u32 gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(u32 v)
2958{
2959 return (v & 0x1f) << 24;
2960}
2961static inline u32 gr_gpcs_ppcs_wwdx_sm_num_rcp_r(void)
2962{
2963 return 0x0041bfd4;
2964}
2965static inline u32 gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(u32 v)
2966{
2967 return (v & 0xffffff) << 0;
2968}
2969static inline u32 gr_ppcs_wwdx_map_table_cfg2_r(void)
2970{
2971 return 0x0041bfe4;
2972}
2973static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(u32 v)
2974{
2975 return (v & 0x1f) << 0;
2976}
2977static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(u32 v)
2978{
2979 return (v & 0x1f) << 5;
2980}
2981static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(u32 v)
2982{
2983 return (v & 0x1f) << 10;
2984}
2985static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(u32 v)
2986{
2987 return (v & 0x1f) << 15;
2988}
2989static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(u32 v)
2990{
2991 return (v & 0x1f) << 20;
2992}
2993static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(u32 v)
2994{
2995 return (v & 0x1f) << 25;
2996}
2997static inline u32 gr_gpcs_ppcs_cbm_cfg_r(void)
2998{
2999 return 0x0041bec0;
3000}
3001static inline u32 gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v(void)
3002{
3003 return 0x00000001;
3004}
3005static inline u32 gr_bes_zrop_settings_r(void)
3006{
3007 return 0x00408850;
3008}
3009static inline u32 gr_bes_zrop_settings_num_active_fbps_f(u32 v)
3010{
3011 return (v & 0xf) << 0;
3012}
3013static inline u32 gr_bes_crop_settings_r(void)
3014{
3015 return 0x00408958;
3016}
3017static inline u32 gr_bes_crop_settings_num_active_fbps_f(u32 v)
3018{
3019 return (v & 0xf) << 0;
3020}
3021static inline u32 gr_zcull_bytes_per_aliquot_per_gpu_v(void)
3022{
3023 return 0x00000020;
3024}
3025static inline u32 gr_zcull_save_restore_header_bytes_per_gpc_v(void)
3026{
3027 return 0x00000020;
3028}
3029static inline u32 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(void)
3030{
3031 return 0x000000c0;
3032}
3033static inline u32 gr_zcull_subregion_qty_v(void)
3034{
3035 return 0x00000010;
3036}
3037static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r(void)
3038{
3039 return 0x00504604;
3040}
3041static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r(void)
3042{
3043 return 0x00504608;
3044}
3045static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r(void)
3046{
3047 return 0x0050465c;
3048}
3049static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control1_r(void)
3050{
3051 return 0x00504660;
3052}
3053static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control2_r(void)
3054{
3055 return 0x00504664;
3056}
3057static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control3_r(void)
3058{
3059 return 0x00504668;
3060}
3061static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control4_r(void)
3062{
3063 return 0x0050466c;
3064}
3065static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(void)
3066{
3067 return 0x00504658;
3068}
3069static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(void)
3070{
3071 return 0x00504670;
3072}
3073static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status1_r(void)
3074{
3075 return 0x00504694;
3076}
3077static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_control_r(void)
3078{
3079 return 0x00504730;
3080}
3081static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_control_r(void)
3082{
3083 return 0x00504734;
3084}
3085static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_control_r(void)
3086{
3087 return 0x00504738;
3088}
3089static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_control_r(void)
3090{
3091 return 0x0050473c;
3092}
3093static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_control_r(void)
3094{
3095 return 0x00504740;
3096}
3097static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_control_r(void)
3098{
3099 return 0x00504744;
3100}
3101static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_control_r(void)
3102{
3103 return 0x00504748;
3104}
3105static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_control_r(void)
3106{
3107 return 0x0050474c;
3108}
3109static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(void)
3110{
3111 return 0x00504674;
3112}
3113static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_r(void)
3114{
3115 return 0x00504678;
3116}
3117static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_r(void)
3118{
3119 return 0x0050467c;
3120}
3121static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_r(void)
3122{
3123 return 0x00504680;
3124}
3125static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_r(void)
3126{
3127 return 0x00504684;
3128}
3129static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(void)
3130{
3131 return 0x00504688;
3132}
3133static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(void)
3134{
3135 return 0x0050468c;
3136}
3137static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(void)
3138{
3139 return 0x00504690;
3140}
3141static inline u32 gr_fe_pwr_mode_r(void)
3142{
3143 return 0x00404170;
3144}
3145static inline u32 gr_fe_pwr_mode_mode_auto_f(void)
3146{
3147 return 0x0;
3148}
3149static inline u32 gr_fe_pwr_mode_mode_force_on_f(void)
3150{
3151 return 0x2;
3152}
3153static inline u32 gr_fe_pwr_mode_req_v(u32 r)
3154{
3155 return (r >> 4) & 0x1;
3156}
3157static inline u32 gr_fe_pwr_mode_req_send_f(void)
3158{
3159 return 0x10;
3160}
3161static inline u32 gr_fe_pwr_mode_req_done_v(void)
3162{
3163 return 0x00000000;
3164}
3165static inline u32 gr_gpc0_tpc0_l1c_dbg_r(void)
3166{
3167 return 0x005044b0;
3168}
3169static inline u32 gr_gpc0_tpc0_l1c_dbg_cya15_en_f(void)
3170{
3171 return 0x8000000;
3172}
3173#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h
new file mode 100644
index 00000000..65221b59
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h
@@ -0,0 +1,221 @@
1/*
2 * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_ltc_gk20a_h_
51#define _hw_ltc_gk20a_h_
52
53static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void)
54{
55 return 0x001410c8;
56}
57static inline u32 ltc_ltc0_lts0_tstg_cfg1_r(void)
58{
59 return 0x00141104;
60}
61static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_ways_v(u32 r)
62{
63 return (r >> 0) & 0xffff;
64}
65static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_v(u32 r)
66{
67 return (r >> 16) & 0x3;
68}
69static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v(void)
70{
71 return 0x00000000;
72}
73static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v(void)
74{
75 return 0x00000001;
76}
77static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v(void)
78{
79 return 0x00000002;
80}
81static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void)
82{
83 return 0x0017e8c8;
84}
85static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r)
86{
87 return (r >> 2) & 0x1;
88}
89static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_v(void)
90{
91 return 0x00000001;
92}
93static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(void)
94{
95 return 0x4;
96}
97static inline u32 ltc_ltc0_lts0_cbc_ctrl1_r(void)
98{
99 return 0x0017e8c8;
100}
101static inline u32 ltc_ltcs_ltss_cbc_ctrl2_r(void)
102{
103 return 0x0017e8cc;
104}
105static inline u32 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(u32 v)
106{
107 return (v & 0x1ffff) << 0;
108}
109static inline u32 ltc_ltcs_ltss_cbc_ctrl3_r(void)
110{
111 return 0x0017e8d0;
112}
113static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(u32 v)
114{
115 return (v & 0x1ffff) << 0;
116}
117static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(void)
118{
119 return 0x0001ffff;
120}
121static inline u32 ltc_ltcs_ltss_cbc_base_r(void)
122{
123 return 0x0017e8d4;
124}
125static inline u32 ltc_ltcs_ltss_cbc_base_alignment_shift_v(void)
126{
127 return 0x0000000b;
128}
129static inline u32 ltc_ltcs_ltss_cbc_base_address_v(u32 r)
130{
131 return (r >> 0) & 0x3ffffff;
132}
133static inline u32 ltc_ltcs_ltss_cbc_param_r(void)
134{
135 return 0x0017e8dc;
136}
137static inline u32 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(u32 r)
138{
139 return (r >> 0) & 0xffff;
140}
141static inline u32 ltc_ltcs_ltss_cbc_param_cache_line_size_v(u32 r)
142{
143 return (r >> 24) & 0xf;
144}
145static inline u32 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(u32 r)
146{
147 return (r >> 28) & 0xf;
148}
149static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_r(void)
150{
151 return 0x0017e91c;
152}
153static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(u32 v)
154{
155 return (v & 0x1f) << 16;
156}
157static inline u32 ltc_ltcs_ltss_dstg_zbc_index_r(void)
158{
159 return 0x0017ea44;
160}
161static inline u32 ltc_ltcs_ltss_dstg_zbc_index_address_f(u32 v)
162{
163 return (v & 0xf) << 0;
164}
165static inline u32 ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(u32 i)
166{
167 return 0x0017ea48 + i*4;
168}
169static inline u32 ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(void)
170{
171 return 0x00000004;
172}
173static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(void)
174{
175 return 0x0017ea58;
176}
177static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_s(void)
178{
179 return 32;
180}
181static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_f(u32 v)
182{
183 return (v & 0xffffffff) << 0;
184}
185static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_m(void)
186{
187 return 0xffffffff << 0;
188}
189static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_v(u32 r)
190{
191 return (r >> 0) & 0xffffffff;
192}
193static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_2_r(void)
194{
195 return 0x0017e924;
196}
197static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(void)
198{
199 return 0x10000000;
200}
201static inline u32 ltc_ltss_g_elpg_r(void)
202{
203 return 0x0017e828;
204}
205static inline u32 ltc_ltss_g_elpg_flush_v(u32 r)
206{
207 return (r >> 0) & 0x1;
208}
209static inline u32 ltc_ltss_g_elpg_flush_pending_v(void)
210{
211 return 0x00000001;
212}
213static inline u32 ltc_ltss_g_elpg_flush_pending_f(void)
214{
215 return 0x1;
216}
217static inline u32 ltc_ltc0_ltss_intr_r(void)
218{
219 return 0x00140820;
220}
221#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h
new file mode 100644
index 00000000..1692bb54
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h
@@ -0,0 +1,253 @@
1/*
2 * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_mc_gk20a_h_
51#define _hw_mc_gk20a_h_
52
53static inline u32 mc_boot_0_r(void)
54{
55 return 0x00000000;
56}
57static inline u32 mc_boot_0_architecture_v(u32 r)
58{
59 return (r >> 24) & 0x1f;
60}
61static inline u32 mc_boot_0_implementation_v(u32 r)
62{
63 return (r >> 20) & 0xf;
64}
65static inline u32 mc_boot_0_major_revision_v(u32 r)
66{
67 return (r >> 4) & 0xf;
68}
69static inline u32 mc_boot_0_minor_revision_v(u32 r)
70{
71 return (r >> 0) & 0xf;
72}
73static inline u32 mc_intr_0_r(void)
74{
75 return 0x00000100;
76}
77static inline u32 mc_intr_0_pfifo_pending_f(void)
78{
79 return 0x100;
80}
81static inline u32 mc_intr_0_pgraph_pending_f(void)
82{
83 return 0x1000;
84}
85static inline u32 mc_intr_0_pmu_pending_f(void)
86{
87 return 0x1000000;
88}
89static inline u32 mc_intr_0_ltc_pending_f(void)
90{
91 return 0x2000000;
92}
93static inline u32 mc_intr_0_priv_ring_pending_f(void)
94{
95 return 0x40000000;
96}
97static inline u32 mc_intr_0_pbus_pending_f(void)
98{
99 return 0x10000000;
100}
101static inline u32 mc_intr_1_r(void)
102{
103 return 0x00000104;
104}
105static inline u32 mc_intr_mask_0_r(void)
106{
107 return 0x00000640;
108}
109static inline u32 mc_intr_mask_0_pmu_enabled_f(void)
110{
111 return 0x1000000;
112}
113static inline u32 mc_intr_mask_1_r(void)
114{
115 return 0x00000644;
116}
117static inline u32 mc_intr_mask_1_pmu_enabled_f(void)
118{
119 return 0x1000000;
120}
121static inline u32 mc_intr_en_0_r(void)
122{
123 return 0x00000140;
124}
125static inline u32 mc_intr_en_0_inta_disabled_f(void)
126{
127 return 0x0;
128}
129static inline u32 mc_intr_en_0_inta_hardware_f(void)
130{
131 return 0x1;
132}
133static inline u32 mc_intr_en_1_r(void)
134{
135 return 0x00000144;
136}
137static inline u32 mc_intr_en_1_inta_disabled_f(void)
138{
139 return 0x0;
140}
141static inline u32 mc_intr_en_1_inta_hardware_f(void)
142{
143 return 0x1;
144}
145static inline u32 mc_enable_r(void)
146{
147 return 0x00000200;
148}
149static inline u32 mc_enable_xbar_enabled_f(void)
150{
151 return 0x4;
152}
153static inline u32 mc_enable_l2_enabled_f(void)
154{
155 return 0x8;
156}
157static inline u32 mc_enable_pmedia_s(void)
158{
159 return 1;
160}
161static inline u32 mc_enable_pmedia_f(u32 v)
162{
163 return (v & 0x1) << 4;
164}
165static inline u32 mc_enable_pmedia_m(void)
166{
167 return 0x1 << 4;
168}
169static inline u32 mc_enable_pmedia_v(u32 r)
170{
171 return (r >> 4) & 0x1;
172}
173static inline u32 mc_enable_priv_ring_enabled_f(void)
174{
175 return 0x20;
176}
177static inline u32 mc_enable_ce0_m(void)
178{
179 return 0x1 << 6;
180}
181static inline u32 mc_enable_pfifo_enabled_f(void)
182{
183 return 0x100;
184}
185static inline u32 mc_enable_pgraph_enabled_f(void)
186{
187 return 0x1000;
188}
189static inline u32 mc_enable_pwr_v(u32 r)
190{
191 return (r >> 13) & 0x1;
192}
193static inline u32 mc_enable_pwr_disabled_v(void)
194{
195 return 0x00000000;
196}
197static inline u32 mc_enable_pwr_enabled_f(void)
198{
199 return 0x2000;
200}
201static inline u32 mc_enable_pfb_enabled_f(void)
202{
203 return 0x100000;
204}
205static inline u32 mc_enable_ce2_m(void)
206{
207 return 0x1 << 21;
208}
209static inline u32 mc_enable_ce2_enabled_f(void)
210{
211 return 0x200000;
212}
213static inline u32 mc_enable_blg_enabled_f(void)
214{
215 return 0x8000000;
216}
217static inline u32 mc_enable_perfmon_enabled_f(void)
218{
219 return 0x10000000;
220}
221static inline u32 mc_enable_hub_enabled_f(void)
222{
223 return 0x20000000;
224}
225static inline u32 mc_enable_pb_r(void)
226{
227 return 0x00000204;
228}
229static inline u32 mc_enable_pb_0_s(void)
230{
231 return 1;
232}
233static inline u32 mc_enable_pb_0_f(u32 v)
234{
235 return (v & 0x1) << 0;
236}
237static inline u32 mc_enable_pb_0_m(void)
238{
239 return 0x1 << 0;
240}
241static inline u32 mc_enable_pb_0_v(u32 r)
242{
243 return (r >> 0) & 0x1;
244}
245static inline u32 mc_enable_pb_0_enabled_v(void)
246{
247 return 0x00000001;
248}
249static inline u32 mc_enable_pb_sel_f(u32 v, u32 i)
250{
251 return (v & 0x1) << (0 + i*1);
252}
253#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h
new file mode 100644
index 00000000..df1a6d48
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h
@@ -0,0 +1,469 @@
1/*
2 * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_pbdma_gk20a_h_
51#define _hw_pbdma_gk20a_h_
52
53static inline u32 pbdma_gp_entry1_r(void)
54{
55 return 0x10000004;
56}
57static inline u32 pbdma_gp_entry1_get_hi_v(u32 r)
58{
59 return (r >> 0) & 0xff;
60}
61static inline u32 pbdma_gp_entry1_length_f(u32 v)
62{
63 return (v & 0x1fffff) << 10;
64}
65static inline u32 pbdma_gp_entry1_length_v(u32 r)
66{
67 return (r >> 10) & 0x1fffff;
68}
69static inline u32 pbdma_gp_base_r(u32 i)
70{
71 return 0x00040048 + i*8192;
72}
73static inline u32 pbdma_gp_base__size_1_v(void)
74{
75 return 0x00000001;
76}
77static inline u32 pbdma_gp_base_offset_f(u32 v)
78{
79 return (v & 0x1fffffff) << 3;
80}
81static inline u32 pbdma_gp_base_rsvd_s(void)
82{
83 return 3;
84}
85static inline u32 pbdma_gp_base_hi_r(u32 i)
86{
87 return 0x0004004c + i*8192;
88}
89static inline u32 pbdma_gp_base_hi_offset_f(u32 v)
90{
91 return (v & 0xff) << 0;
92}
93static inline u32 pbdma_gp_base_hi_limit2_f(u32 v)
94{
95 return (v & 0x1f) << 16;
96}
97static inline u32 pbdma_gp_fetch_r(u32 i)
98{
99 return 0x00040050 + i*8192;
100}
101static inline u32 pbdma_gp_get_r(u32 i)
102{
103 return 0x00040014 + i*8192;
104}
105static inline u32 pbdma_gp_put_r(u32 i)
106{
107 return 0x00040000 + i*8192;
108}
109static inline u32 pbdma_pb_fetch_r(u32 i)
110{
111 return 0x00040054 + i*8192;
112}
113static inline u32 pbdma_pb_fetch_hi_r(u32 i)
114{
115 return 0x00040058 + i*8192;
116}
117static inline u32 pbdma_get_r(u32 i)
118{
119 return 0x00040018 + i*8192;
120}
121static inline u32 pbdma_get_hi_r(u32 i)
122{
123 return 0x0004001c + i*8192;
124}
125static inline u32 pbdma_put_r(u32 i)
126{
127 return 0x0004005c + i*8192;
128}
129static inline u32 pbdma_put_hi_r(u32 i)
130{
131 return 0x00040060 + i*8192;
132}
133static inline u32 pbdma_formats_r(u32 i)
134{
135 return 0x0004009c + i*8192;
136}
137static inline u32 pbdma_formats_gp_fermi0_f(void)
138{
139 return 0x0;
140}
141static inline u32 pbdma_formats_pb_fermi1_f(void)
142{
143 return 0x100;
144}
145static inline u32 pbdma_formats_mp_fermi0_f(void)
146{
147 return 0x0;
148}
149static inline u32 pbdma_syncpointa_r(u32 i)
150{
151 return 0x000400a4 + i*8192;
152}
153static inline u32 pbdma_syncpointa_payload_v(u32 r)
154{
155 return (r >> 0) & 0xffffffff;
156}
157static inline u32 pbdma_syncpointb_r(u32 i)
158{
159 return 0x000400a8 + i*8192;
160}
161static inline u32 pbdma_syncpointb_op_v(u32 r)
162{
163 return (r >> 0) & 0x3;
164}
165static inline u32 pbdma_syncpointb_op_wait_v(void)
166{
167 return 0x00000000;
168}
169static inline u32 pbdma_syncpointb_wait_switch_v(u32 r)
170{
171 return (r >> 4) & 0x1;
172}
173static inline u32 pbdma_syncpointb_wait_switch_en_v(void)
174{
175 return 0x00000001;
176}
177static inline u32 pbdma_syncpointb_syncpt_index_v(u32 r)
178{
179 return (r >> 8) & 0xff;
180}
181static inline u32 pbdma_pb_header_r(u32 i)
182{
183 return 0x00040084 + i*8192;
184}
185static inline u32 pbdma_pb_header_priv_user_f(void)
186{
187 return 0x0;
188}
189static inline u32 pbdma_pb_header_method_zero_f(void)
190{
191 return 0x0;
192}
193static inline u32 pbdma_pb_header_subchannel_zero_f(void)
194{
195 return 0x0;
196}
197static inline u32 pbdma_pb_header_level_main_f(void)
198{
199 return 0x0;
200}
201static inline u32 pbdma_pb_header_first_true_f(void)
202{
203 return 0x400000;
204}
205static inline u32 pbdma_pb_header_type_inc_f(void)
206{
207 return 0x20000000;
208}
209static inline u32 pbdma_subdevice_r(u32 i)
210{
211 return 0x00040094 + i*8192;
212}
213static inline u32 pbdma_subdevice_id_f(u32 v)
214{
215 return (v & 0xfff) << 0;
216}
217static inline u32 pbdma_subdevice_status_active_f(void)
218{
219 return 0x10000000;
220}
221static inline u32 pbdma_subdevice_channel_dma_enable_f(void)
222{
223 return 0x20000000;
224}
225static inline u32 pbdma_method0_r(u32 i)
226{
227 return 0x000400c0 + i*8192;
228}
229static inline u32 pbdma_data0_r(u32 i)
230{
231 return 0x000400c4 + i*8192;
232}
233static inline u32 pbdma_target_r(u32 i)
234{
235 return 0x000400ac + i*8192;
236}
237static inline u32 pbdma_target_engine_sw_f(void)
238{
239 return 0x1f;
240}
241static inline u32 pbdma_acquire_r(u32 i)
242{
243 return 0x00040030 + i*8192;
244}
245static inline u32 pbdma_acquire_retry_man_2_f(void)
246{
247 return 0x2;
248}
249static inline u32 pbdma_acquire_retry_exp_2_f(void)
250{
251 return 0x100;
252}
253static inline u32 pbdma_acquire_timeout_exp_max_f(void)
254{
255 return 0x7800;
256}
257static inline u32 pbdma_acquire_timeout_man_max_f(void)
258{
259 return 0x7fff8000;
260}
261static inline u32 pbdma_acquire_timeout_en_disable_f(void)
262{
263 return 0x0;
264}
265static inline u32 pbdma_status_r(u32 i)
266{
267 return 0x00040100 + i*8192;
268}
269static inline u32 pbdma_channel_r(u32 i)
270{
271 return 0x00040120 + i*8192;
272}
273static inline u32 pbdma_signature_r(u32 i)
274{
275 return 0x00040010 + i*8192;
276}
277static inline u32 pbdma_signature_hw_valid_f(void)
278{
279 return 0xface;
280}
281static inline u32 pbdma_signature_sw_zero_f(void)
282{
283 return 0x0;
284}
285static inline u32 pbdma_userd_r(u32 i)
286{
287 return 0x00040008 + i*8192;
288}
289static inline u32 pbdma_userd_target_vid_mem_f(void)
290{
291 return 0x0;
292}
293static inline u32 pbdma_userd_addr_f(u32 v)
294{
295 return (v & 0x7fffff) << 9;
296}
297static inline u32 pbdma_userd_hi_r(u32 i)
298{
299 return 0x0004000c + i*8192;
300}
301static inline u32 pbdma_userd_hi_addr_f(u32 v)
302{
303 return (v & 0xff) << 0;
304}
305static inline u32 pbdma_hce_ctrl_r(u32 i)
306{
307 return 0x000400e4 + i*8192;
308}
309static inline u32 pbdma_hce_ctrl_hce_priv_mode_yes_f(void)
310{
311 return 0x20;
312}
313static inline u32 pbdma_intr_0_r(u32 i)
314{
315 return 0x00040108 + i*8192;
316}
317static inline u32 pbdma_intr_0_memreq_v(u32 r)
318{
319 return (r >> 0) & 0x1;
320}
321static inline u32 pbdma_intr_0_memreq_pending_f(void)
322{
323 return 0x1;
324}
325static inline u32 pbdma_intr_0_memack_timeout_pending_f(void)
326{
327 return 0x2;
328}
329static inline u32 pbdma_intr_0_memack_extra_pending_f(void)
330{
331 return 0x4;
332}
333static inline u32 pbdma_intr_0_memdat_timeout_pending_f(void)
334{
335 return 0x8;
336}
337static inline u32 pbdma_intr_0_memdat_extra_pending_f(void)
338{
339 return 0x10;
340}
341static inline u32 pbdma_intr_0_memflush_pending_f(void)
342{
343 return 0x20;
344}
345static inline u32 pbdma_intr_0_memop_pending_f(void)
346{
347 return 0x40;
348}
349static inline u32 pbdma_intr_0_lbconnect_pending_f(void)
350{
351 return 0x80;
352}
353static inline u32 pbdma_intr_0_lbreq_pending_f(void)
354{
355 return 0x100;
356}
357static inline u32 pbdma_intr_0_lback_timeout_pending_f(void)
358{
359 return 0x200;
360}
361static inline u32 pbdma_intr_0_lback_extra_pending_f(void)
362{
363 return 0x400;
364}
365static inline u32 pbdma_intr_0_lbdat_timeout_pending_f(void)
366{
367 return 0x800;
368}
369static inline u32 pbdma_intr_0_lbdat_extra_pending_f(void)
370{
371 return 0x1000;
372}
373static inline u32 pbdma_intr_0_gpfifo_pending_f(void)
374{
375 return 0x2000;
376}
377static inline u32 pbdma_intr_0_gpptr_pending_f(void)
378{
379 return 0x4000;
380}
381static inline u32 pbdma_intr_0_gpentry_pending_f(void)
382{
383 return 0x8000;
384}
385static inline u32 pbdma_intr_0_gpcrc_pending_f(void)
386{
387 return 0x10000;
388}
389static inline u32 pbdma_intr_0_pbptr_pending_f(void)
390{
391 return 0x20000;
392}
393static inline u32 pbdma_intr_0_pbentry_pending_f(void)
394{
395 return 0x40000;
396}
397static inline u32 pbdma_intr_0_pbcrc_pending_f(void)
398{
399 return 0x80000;
400}
401static inline u32 pbdma_intr_0_xbarconnect_pending_f(void)
402{
403 return 0x100000;
404}
405static inline u32 pbdma_intr_0_method_pending_f(void)
406{
407 return 0x200000;
408}
409static inline u32 pbdma_intr_0_methodcrc_pending_f(void)
410{
411 return 0x400000;
412}
413static inline u32 pbdma_intr_0_device_pending_f(void)
414{
415 return 0x800000;
416}
417static inline u32 pbdma_intr_0_semaphore_pending_f(void)
418{
419 return 0x2000000;
420}
421static inline u32 pbdma_intr_0_acquire_pending_f(void)
422{
423 return 0x4000000;
424}
425static inline u32 pbdma_intr_0_pri_pending_f(void)
426{
427 return 0x8000000;
428}
429static inline u32 pbdma_intr_0_no_ctxsw_seg_pending_f(void)
430{
431 return 0x20000000;
432}
433static inline u32 pbdma_intr_0_pbseg_pending_f(void)
434{
435 return 0x40000000;
436}
437static inline u32 pbdma_intr_0_signature_pending_f(void)
438{
439 return 0x80000000;
440}
441static inline u32 pbdma_intr_1_r(u32 i)
442{
443 return 0x00040148 + i*8192;
444}
445static inline u32 pbdma_intr_en_0_r(u32 i)
446{
447 return 0x0004010c + i*8192;
448}
449static inline u32 pbdma_intr_en_0_lbreq_enabled_f(void)
450{
451 return 0x100;
452}
453static inline u32 pbdma_intr_en_1_r(u32 i)
454{
455 return 0x0004014c + i*8192;
456}
457static inline u32 pbdma_intr_stall_r(u32 i)
458{
459 return 0x0004013c + i*8192;
460}
461static inline u32 pbdma_intr_stall_lbreq_enabled_f(void)
462{
463 return 0x100;
464}
465static inline u32 pbdma_udma_nop_r(void)
466{
467 return 0x00000008;
468}
469#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h
new file mode 100644
index 00000000..d4007613
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h
@@ -0,0 +1,137 @@
1/*
2 * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_pri_ringmaster_gk20a_h_
51#define _hw_pri_ringmaster_gk20a_h_
52
53static inline u32 pri_ringmaster_command_r(void)
54{
55 return 0x0012004c;
56}
57static inline u32 pri_ringmaster_command_cmd_m(void)
58{
59 return 0x3f << 0;
60}
61static inline u32 pri_ringmaster_command_cmd_v(u32 r)
62{
63 return (r >> 0) & 0x3f;
64}
65static inline u32 pri_ringmaster_command_cmd_no_cmd_v(void)
66{
67 return 0x00000000;
68}
69static inline u32 pri_ringmaster_command_cmd_start_ring_f(void)
70{
71 return 0x1;
72}
73static inline u32 pri_ringmaster_command_cmd_ack_interrupt_f(void)
74{
75 return 0x2;
76}
77static inline u32 pri_ringmaster_command_cmd_enumerate_stations_f(void)
78{
79 return 0x3;
80}
81static inline u32 pri_ringmaster_command_cmd_enumerate_stations_bc_grp_all_f(void)
82{
83 return 0x0;
84}
85static inline u32 pri_ringmaster_command_data_r(void)
86{
87 return 0x00120048;
88}
89static inline u32 pri_ringmaster_start_results_r(void)
90{
91 return 0x00120050;
92}
93static inline u32 pri_ringmaster_start_results_connectivity_v(u32 r)
94{
95 return (r >> 0) & 0x1;
96}
97static inline u32 pri_ringmaster_start_results_connectivity_pass_v(void)
98{
99 return 0x00000001;
100}
101static inline u32 pri_ringmaster_intr_status0_r(void)
102{
103 return 0x00120058;
104}
105static inline u32 pri_ringmaster_intr_status1_r(void)
106{
107 return 0x0012005c;
108}
109static inline u32 pri_ringmaster_global_ctl_r(void)
110{
111 return 0x00120060;
112}
113static inline u32 pri_ringmaster_global_ctl_ring_reset_asserted_f(void)
114{
115 return 0x1;
116}
117static inline u32 pri_ringmaster_global_ctl_ring_reset_deasserted_f(void)
118{
119 return 0x0;
120}
121static inline u32 pri_ringmaster_enum_fbp_r(void)
122{
123 return 0x00120074;
124}
125static inline u32 pri_ringmaster_enum_fbp_count_v(u32 r)
126{
127 return (r >> 0) & 0x1f;
128}
129static inline u32 pri_ringmaster_enum_gpc_r(void)
130{
131 return 0x00120078;
132}
133static inline u32 pri_ringmaster_enum_gpc_count_v(u32 r)
134{
135 return (r >> 0) & 0x1f;
136}
137#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h
new file mode 100644
index 00000000..db16a8de
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h
@@ -0,0 +1,226 @@
1/*
2 * drivers/video/tegra/host/gk20a/hw_pri_ringstation_fbp_gk20a.h
3 *
4 * Copyright (c) 2012-2013, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 /*
21 * Function naming determines intended use:
22 *
23 * <x>_r(void) : Returns the offset for register <x>.
24 *
25 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
26 *
27 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
28 *
29 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
30 * and masked to place it at field <y> of register <x>. This value
31 * can be |'d with others to produce a full register value for
32 * register <x>.
33 *
34 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
35 * value can be ~'d and then &'d to clear the value of field <y> for
36 * register <x>.
37 *
38 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
39 * to place it at field <y> of register <x>. This value can be |'d
40 * with others to produce a full register value for <x>.
41 *
42 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
43 * <x> value 'r' after being shifted to place its LSB at bit 0.
44 * This value is suitable for direct comparison with other unshifted
45 * values appropriate for use in field <y> of register <x>.
46 *
47 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
48 * field <y> of register <x>. This value is suitable for direct
49 * comparison with unshifted values appropriate for use in field <y>
50 * of register <x>.
51 */
52
53#ifndef __hw_pri_ringstation_fbp_gk20a_h__
54#define __hw_pri_ringstation_fbp_gk20a_h__
55/*This file is autogenerated. Do not edit. */
56
57static inline u32 pri_ringstation_fbp_master_config_r(u32 i)
58{
59 return 0x00124300+((i)*4);
60}
61static inline u32 pri_ringstation_fbp_master_config__size_1_v(void)
62{
63 return 64;
64}
65static inline u32 pri_ringstation_fbp_master_config_timeout_s(void)
66{
67 return 18;
68}
69static inline u32 pri_ringstation_fbp_master_config_timeout_f(u32 v)
70{
71 return (v & 0x3ffff) << 0;
72}
73static inline u32 pri_ringstation_fbp_master_config_timeout_m(void)
74{
75 return 0x3ffff << 0;
76}
77static inline u32 pri_ringstation_fbp_master_config_timeout_v(u32 r)
78{
79 return (r >> 0) & 0x3ffff;
80}
81static inline u32 pri_ringstation_fbp_master_config_timeout_i_v(void)
82{
83 return 0x00000064;
84}
85static inline u32 pri_ringstation_fbp_master_config_timeout_i_f(void)
86{
87 return 0x64;
88}
89static inline u32 pri_ringstation_fbp_master_config_fs_action_s(void)
90{
91 return 1;
92}
93static inline u32 pri_ringstation_fbp_master_config_fs_action_f(u32 v)
94{
95 return (v & 0x1) << 30;
96}
97static inline u32 pri_ringstation_fbp_master_config_fs_action_m(void)
98{
99 return 0x1 << 30;
100}
101static inline u32 pri_ringstation_fbp_master_config_fs_action_v(u32 r)
102{
103 return (r >> 30) & 0x1;
104}
105static inline u32 pri_ringstation_fbp_master_config_fs_action_error_v(void)
106{
107 return 0x00000000;
108}
109static inline u32 pri_ringstation_fbp_master_config_fs_action_error_f(void)
110{
111 return 0x0;
112}
113static inline u32 pri_ringstation_fbp_master_config_fs_action_soldier_on_v(void)
114{
115 return 0x00000001;
116}
117static inline u32 pri_ringstation_fbp_master_config_fs_action_soldier_on_f(void)
118{
119 return 0x40000000;
120}
121static inline u32 pri_ringstation_fbp_master_config_reset_action_s(void)
122{
123 return 1;
124}
125static inline u32 pri_ringstation_fbp_master_config_reset_action_f(u32 v)
126{
127 return (v & 0x1) << 31;
128}
129static inline u32 pri_ringstation_fbp_master_config_reset_action_m(void)
130{
131 return 0x1 << 31;
132}
133static inline u32 pri_ringstation_fbp_master_config_reset_action_v(u32 r)
134{
135 return (r >> 31) & 0x1;
136}
137static inline u32 pri_ringstation_fbp_master_config_reset_action_error_v(void)
138{
139 return 0x00000000;
140}
141static inline u32 pri_ringstation_fbp_master_config_reset_action_error_f(void)
142{
143 return 0x0;
144}
145static inline u32 pri_ringstation_fbp_master_config_reset_action_soldier_on_v(void)
146{
147 return 0x00000001;
148}
149static inline u32 pri_ringstation_fbp_master_config_reset_action_soldier_on_f(void)
150{
151 return 0x80000000;
152}
153static inline u32 pri_ringstation_fbp_master_config_setup_clocks_s(void)
154{
155 return 3;
156}
157static inline u32 pri_ringstation_fbp_master_config_setup_clocks_f(u32 v)
158{
159 return (v & 0x7) << 20;
160}
161static inline u32 pri_ringstation_fbp_master_config_setup_clocks_m(void)
162{
163 return 0x7 << 20;
164}
165static inline u32 pri_ringstation_fbp_master_config_setup_clocks_v(u32 r)
166{
167 return (r >> 20) & 0x7;
168}
169static inline u32 pri_ringstation_fbp_master_config_setup_clocks_i_v(void)
170{
171 return 0x00000000;
172}
173static inline u32 pri_ringstation_fbp_master_config_setup_clocks_i_f(void)
174{
175 return 0x0;
176}
177static inline u32 pri_ringstation_fbp_master_config_wait_clocks_s(void)
178{
179 return 3;
180}
181static inline u32 pri_ringstation_fbp_master_config_wait_clocks_f(u32 v)
182{
183 return (v & 0x7) << 24;
184}
185static inline u32 pri_ringstation_fbp_master_config_wait_clocks_m(void)
186{
187 return 0x7 << 24;
188}
189static inline u32 pri_ringstation_fbp_master_config_wait_clocks_v(u32 r)
190{
191 return (r >> 24) & 0x7;
192}
193static inline u32 pri_ringstation_fbp_master_config_wait_clocks_i_v(void)
194{
195 return 0x00000000;
196}
197static inline u32 pri_ringstation_fbp_master_config_wait_clocks_i_f(void)
198{
199 return 0x0;
200}
201static inline u32 pri_ringstation_fbp_master_config_hold_clocks_s(void)
202{
203 return 3;
204}
205static inline u32 pri_ringstation_fbp_master_config_hold_clocks_f(u32 v)
206{
207 return (v & 0x7) << 27;
208}
209static inline u32 pri_ringstation_fbp_master_config_hold_clocks_m(void)
210{
211 return 0x7 << 27;
212}
213static inline u32 pri_ringstation_fbp_master_config_hold_clocks_v(u32 r)
214{
215 return (r >> 27) & 0x7;
216}
217static inline u32 pri_ringstation_fbp_master_config_hold_clocks_i_v(void)
218{
219 return 0x00000000;
220}
221static inline u32 pri_ringstation_fbp_master_config_hold_clocks_i_f(void)
222{
223 return 0x0;
224}
225
226#endif /* __hw_pri_ringstation_fbp_gk20a_h__ */
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h
new file mode 100644
index 00000000..e8aad933
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h
@@ -0,0 +1,226 @@
1/*
2 * drivers/video/tegra/host/gk20a/hw_pri_ringstation_gpc_gk20a.h
3 *
4 * Copyright (c) 2012-2013, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 /*
21 * Function naming determines intended use:
22 *
23 * <x>_r(void) : Returns the offset for register <x>.
24 *
25 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
26 *
27 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
28 *
29 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
30 * and masked to place it at field <y> of register <x>. This value
31 * can be |'d with others to produce a full register value for
32 * register <x>.
33 *
34 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
35 * value can be ~'d and then &'d to clear the value of field <y> for
36 * register <x>.
37 *
38 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
39 * to place it at field <y> of register <x>. This value can be |'d
40 * with others to produce a full register value for <x>.
41 *
42 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
43 * <x> value 'r' after being shifted to place its LSB at bit 0.
44 * This value is suitable for direct comparison with other unshifted
45 * values appropriate for use in field <y> of register <x>.
46 *
47 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
48 * field <y> of register <x>. This value is suitable for direct
49 * comparison with unshifted values appropriate for use in field <y>
50 * of register <x>.
51 */
52
53#ifndef __hw_pri_ringstation_gpc_gk20a_h__
54#define __hw_pri_ringstation_gpc_gk20a_h__
55/*This file is autogenerated. Do not edit. */
56
57static inline u32 pri_ringstation_gpc_master_config_r(u32 i)
58{
59 return 0x00128300+((i)*4);
60}
61static inline u32 pri_ringstation_gpc_master_config__size_1_v(void)
62{
63 return 64;
64}
65static inline u32 pri_ringstation_gpc_master_config_timeout_s(void)
66{
67 return 18;
68}
69static inline u32 pri_ringstation_gpc_master_config_timeout_f(u32 v)
70{
71 return (v & 0x3ffff) << 0;
72}
73static inline u32 pri_ringstation_gpc_master_config_timeout_m(void)
74{
75 return 0x3ffff << 0;
76}
77static inline u32 pri_ringstation_gpc_master_config_timeout_v(u32 r)
78{
79 return (r >> 0) & 0x3ffff;
80}
81static inline u32 pri_ringstation_gpc_master_config_timeout_i_v(void)
82{
83 return 0x00000064;
84}
85static inline u32 pri_ringstation_gpc_master_config_timeout_i_f(void)
86{
87 return 0x64;
88}
89static inline u32 pri_ringstation_gpc_master_config_fs_action_s(void)
90{
91 return 1;
92}
93static inline u32 pri_ringstation_gpc_master_config_fs_action_f(u32 v)
94{
95 return (v & 0x1) << 30;
96}
97static inline u32 pri_ringstation_gpc_master_config_fs_action_m(void)
98{
99 return 0x1 << 30;
100}
101static inline u32 pri_ringstation_gpc_master_config_fs_action_v(u32 r)
102{
103 return (r >> 30) & 0x1;
104}
105static inline u32 pri_ringstation_gpc_master_config_fs_action_error_v(void)
106{
107 return 0x00000000;
108}
109static inline u32 pri_ringstation_gpc_master_config_fs_action_error_f(void)
110{
111 return 0x0;
112}
113static inline u32 pri_ringstation_gpc_master_config_fs_action_soldier_on_v(void)
114{
115 return 0x00000001;
116}
117static inline u32 pri_ringstation_gpc_master_config_fs_action_soldier_on_f(void)
118{
119 return 0x40000000;
120}
121static inline u32 pri_ringstation_gpc_master_config_reset_action_s(void)
122{
123 return 1;
124}
125static inline u32 pri_ringstation_gpc_master_config_reset_action_f(u32 v)
126{
127 return (v & 0x1) << 31;
128}
129static inline u32 pri_ringstation_gpc_master_config_reset_action_m(void)
130{
131 return 0x1 << 31;
132}
133static inline u32 pri_ringstation_gpc_master_config_reset_action_v(u32 r)
134{
135 return (r >> 31) & 0x1;
136}
137static inline u32 pri_ringstation_gpc_master_config_reset_action_error_v(void)
138{
139 return 0x00000000;
140}
141static inline u32 pri_ringstation_gpc_master_config_reset_action_error_f(void)
142{
143 return 0x0;
144}
145static inline u32 pri_ringstation_gpc_master_config_reset_action_soldier_on_v(void)
146{
147 return 0x00000001;
148}
149static inline u32 pri_ringstation_gpc_master_config_reset_action_soldier_on_f(void)
150{
151 return 0x80000000;
152}
153static inline u32 pri_ringstation_gpc_master_config_setup_clocks_s(void)
154{
155 return 3;
156}
157static inline u32 pri_ringstation_gpc_master_config_setup_clocks_f(u32 v)
158{
159 return (v & 0x7) << 20;
160}
161static inline u32 pri_ringstation_gpc_master_config_setup_clocks_m(void)
162{
163 return 0x7 << 20;
164}
165static inline u32 pri_ringstation_gpc_master_config_setup_clocks_v(u32 r)
166{
167 return (r >> 20) & 0x7;
168}
169static inline u32 pri_ringstation_gpc_master_config_setup_clocks_i_v(void)
170{
171 return 0x00000000;
172}
173static inline u32 pri_ringstation_gpc_master_config_setup_clocks_i_f(void)
174{
175 return 0x0;
176}
177static inline u32 pri_ringstation_gpc_master_config_wait_clocks_s(void)
178{
179 return 3;
180}
181static inline u32 pri_ringstation_gpc_master_config_wait_clocks_f(u32 v)
182{
183 return (v & 0x7) << 24;
184}
185static inline u32 pri_ringstation_gpc_master_config_wait_clocks_m(void)
186{
187 return 0x7 << 24;
188}
189static inline u32 pri_ringstation_gpc_master_config_wait_clocks_v(u32 r)
190{
191 return (r >> 24) & 0x7;
192}
193static inline u32 pri_ringstation_gpc_master_config_wait_clocks_i_v(void)
194{
195 return 0x00000000;
196}
197static inline u32 pri_ringstation_gpc_master_config_wait_clocks_i_f(void)
198{
199 return 0x0;
200}
201static inline u32 pri_ringstation_gpc_master_config_hold_clocks_s(void)
202{
203 return 3;
204}
205static inline u32 pri_ringstation_gpc_master_config_hold_clocks_f(u32 v)
206{
207 return (v & 0x7) << 27;
208}
209static inline u32 pri_ringstation_gpc_master_config_hold_clocks_m(void)
210{
211 return 0x7 << 27;
212}
213static inline u32 pri_ringstation_gpc_master_config_hold_clocks_v(u32 r)
214{
215 return (r >> 27) & 0x7;
216}
217static inline u32 pri_ringstation_gpc_master_config_hold_clocks_i_v(void)
218{
219 return 0x00000000;
220}
221static inline u32 pri_ringstation_gpc_master_config_hold_clocks_i_f(void)
222{
223 return 0x0;
224}
225
226#endif /* __hw_pri_ringstation_gpc_gk20a_h__ */
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h
new file mode 100644
index 00000000..c281dd54
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h
@@ -0,0 +1,69 @@
1/*
2 * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_pri_ringstation_sys_gk20a_h_
51#define _hw_pri_ringstation_sys_gk20a_h_
52
53static inline u32 pri_ringstation_sys_master_config_r(u32 i)
54{
55 return 0x00122300 + i*4;
56}
57static inline u32 pri_ringstation_sys_decode_config_r(void)
58{
59 return 0x00122204;
60}
61static inline u32 pri_ringstation_sys_decode_config_ring_m(void)
62{
63 return 0x7 << 0;
64}
65static inline u32 pri_ringstation_sys_decode_config_ring_drop_on_ring_not_started_f(void)
66{
67 return 0x1;
68}
69#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h
new file mode 100644
index 00000000..93c55c30
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h
@@ -0,0 +1,141 @@
1/*
2 * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_proj_gk20a_h_
51#define _hw_proj_gk20a_h_
52
53static inline u32 proj_gpc_base_v(void)
54{
55 return 0x00500000;
56}
57static inline u32 proj_gpc_shared_base_v(void)
58{
59 return 0x00418000;
60}
61static inline u32 proj_gpc_stride_v(void)
62{
63 return 0x00008000;
64}
65static inline u32 proj_ltc_stride_v(void)
66{
67 return 0x00002000;
68}
69static inline u32 proj_lts_stride_v(void)
70{
71 return 0x00000400;
72}
73static inline u32 proj_ppc_in_gpc_base_v(void)
74{
75 return 0x00003000;
76}
77static inline u32 proj_ppc_in_gpc_stride_v(void)
78{
79 return 0x00000200;
80}
81static inline u32 proj_rop_base_v(void)
82{
83 return 0x00410000;
84}
85static inline u32 proj_rop_shared_base_v(void)
86{
87 return 0x00408800;
88}
89static inline u32 proj_rop_stride_v(void)
90{
91 return 0x00000400;
92}
93static inline u32 proj_tpc_in_gpc_base_v(void)
94{
95 return 0x00004000;
96}
97static inline u32 proj_tpc_in_gpc_stride_v(void)
98{
99 return 0x00000800;
100}
101static inline u32 proj_tpc_in_gpc_shared_base_v(void)
102{
103 return 0x00001800;
104}
105static inline u32 proj_host_num_pbdma_v(void)
106{
107 return 0x00000001;
108}
109static inline u32 proj_scal_litter_num_tpc_per_gpc_v(void)
110{
111 return 0x00000001;
112}
113static inline u32 proj_scal_litter_num_fbps_v(void)
114{
115 return 0x00000001;
116}
117static inline u32 proj_scal_litter_num_gpcs_v(void)
118{
119 return 0x00000001;
120}
121static inline u32 proj_scal_litter_num_pes_per_gpc_v(void)
122{
123 return 0x00000001;
124}
125static inline u32 proj_scal_litter_num_tpcs_per_pes_v(void)
126{
127 return 0x00000001;
128}
129static inline u32 proj_scal_litter_num_zcull_banks_v(void)
130{
131 return 0x00000004;
132}
133static inline u32 proj_scal_max_gpcs_v(void)
134{
135 return 0x00000020;
136}
137static inline u32 proj_scal_max_tpc_per_gpc_v(void)
138{
139 return 0x00000008;
140}
141#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h
new file mode 100644
index 00000000..d7d26b80
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h
@@ -0,0 +1,737 @@
1/*
2 * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_pwr_gk20a_h_
51#define _hw_pwr_gk20a_h_
52
53static inline u32 pwr_falcon_irqsset_r(void)
54{
55 return 0x0010a000;
56}
57static inline u32 pwr_falcon_irqsset_swgen0_set_f(void)
58{
59 return 0x40;
60}
61static inline u32 pwr_falcon_irqsclr_r(void)
62{
63 return 0x0010a004;
64}
65static inline u32 pwr_falcon_irqstat_r(void)
66{
67 return 0x0010a008;
68}
69static inline u32 pwr_falcon_irqstat_halt_true_f(void)
70{
71 return 0x10;
72}
73static inline u32 pwr_falcon_irqstat_exterr_true_f(void)
74{
75 return 0x20;
76}
77static inline u32 pwr_falcon_irqstat_swgen0_true_f(void)
78{
79 return 0x40;
80}
81static inline u32 pwr_falcon_irqmode_r(void)
82{
83 return 0x0010a00c;
84}
85static inline u32 pwr_falcon_irqmset_r(void)
86{
87 return 0x0010a010;
88}
89static inline u32 pwr_falcon_irqmset_gptmr_f(u32 v)
90{
91 return (v & 0x1) << 0;
92}
93static inline u32 pwr_falcon_irqmset_wdtmr_f(u32 v)
94{
95 return (v & 0x1) << 1;
96}
97static inline u32 pwr_falcon_irqmset_mthd_f(u32 v)
98{
99 return (v & 0x1) << 2;
100}
101static inline u32 pwr_falcon_irqmset_ctxsw_f(u32 v)
102{
103 return (v & 0x1) << 3;
104}
105static inline u32 pwr_falcon_irqmset_halt_f(u32 v)
106{
107 return (v & 0x1) << 4;
108}
109static inline u32 pwr_falcon_irqmset_exterr_f(u32 v)
110{
111 return (v & 0x1) << 5;
112}
113static inline u32 pwr_falcon_irqmset_swgen0_f(u32 v)
114{
115 return (v & 0x1) << 6;
116}
117static inline u32 pwr_falcon_irqmset_swgen1_f(u32 v)
118{
119 return (v & 0x1) << 7;
120}
121static inline u32 pwr_falcon_irqmclr_r(void)
122{
123 return 0x0010a014;
124}
125static inline u32 pwr_falcon_irqmclr_gptmr_f(u32 v)
126{
127 return (v & 0x1) << 0;
128}
129static inline u32 pwr_falcon_irqmclr_wdtmr_f(u32 v)
130{
131 return (v & 0x1) << 1;
132}
133static inline u32 pwr_falcon_irqmclr_mthd_f(u32 v)
134{
135 return (v & 0x1) << 2;
136}
137static inline u32 pwr_falcon_irqmclr_ctxsw_f(u32 v)
138{
139 return (v & 0x1) << 3;
140}
141static inline u32 pwr_falcon_irqmclr_halt_f(u32 v)
142{
143 return (v & 0x1) << 4;
144}
145static inline u32 pwr_falcon_irqmclr_exterr_f(u32 v)
146{
147 return (v & 0x1) << 5;
148}
149static inline u32 pwr_falcon_irqmclr_swgen0_f(u32 v)
150{
151 return (v & 0x1) << 6;
152}
153static inline u32 pwr_falcon_irqmclr_swgen1_f(u32 v)
154{
155 return (v & 0x1) << 7;
156}
157static inline u32 pwr_falcon_irqmclr_ext_f(u32 v)
158{
159 return (v & 0xff) << 8;
160}
161static inline u32 pwr_falcon_irqmask_r(void)
162{
163 return 0x0010a018;
164}
165static inline u32 pwr_falcon_irqdest_r(void)
166{
167 return 0x0010a01c;
168}
169static inline u32 pwr_falcon_irqdest_host_gptmr_f(u32 v)
170{
171 return (v & 0x1) << 0;
172}
173static inline u32 pwr_falcon_irqdest_host_wdtmr_f(u32 v)
174{
175 return (v & 0x1) << 1;
176}
177static inline u32 pwr_falcon_irqdest_host_mthd_f(u32 v)
178{
179 return (v & 0x1) << 2;
180}
181static inline u32 pwr_falcon_irqdest_host_ctxsw_f(u32 v)
182{
183 return (v & 0x1) << 3;
184}
185static inline u32 pwr_falcon_irqdest_host_halt_f(u32 v)
186{
187 return (v & 0x1) << 4;
188}
189static inline u32 pwr_falcon_irqdest_host_exterr_f(u32 v)
190{
191 return (v & 0x1) << 5;
192}
193static inline u32 pwr_falcon_irqdest_host_swgen0_f(u32 v)
194{
195 return (v & 0x1) << 6;
196}
197static inline u32 pwr_falcon_irqdest_host_swgen1_f(u32 v)
198{
199 return (v & 0x1) << 7;
200}
201static inline u32 pwr_falcon_irqdest_host_ext_f(u32 v)
202{
203 return (v & 0xff) << 8;
204}
205static inline u32 pwr_falcon_irqdest_target_gptmr_f(u32 v)
206{
207 return (v & 0x1) << 16;
208}
209static inline u32 pwr_falcon_irqdest_target_wdtmr_f(u32 v)
210{
211 return (v & 0x1) << 17;
212}
213static inline u32 pwr_falcon_irqdest_target_mthd_f(u32 v)
214{
215 return (v & 0x1) << 18;
216}
217static inline u32 pwr_falcon_irqdest_target_ctxsw_f(u32 v)
218{
219 return (v & 0x1) << 19;
220}
221static inline u32 pwr_falcon_irqdest_target_halt_f(u32 v)
222{
223 return (v & 0x1) << 20;
224}
225static inline u32 pwr_falcon_irqdest_target_exterr_f(u32 v)
226{
227 return (v & 0x1) << 21;
228}
229static inline u32 pwr_falcon_irqdest_target_swgen0_f(u32 v)
230{
231 return (v & 0x1) << 22;
232}
233static inline u32 pwr_falcon_irqdest_target_swgen1_f(u32 v)
234{
235 return (v & 0x1) << 23;
236}
237static inline u32 pwr_falcon_irqdest_target_ext_f(u32 v)
238{
239 return (v & 0xff) << 24;
240}
241static inline u32 pwr_falcon_curctx_r(void)
242{
243 return 0x0010a050;
244}
245static inline u32 pwr_falcon_nxtctx_r(void)
246{
247 return 0x0010a054;
248}
249static inline u32 pwr_falcon_mailbox0_r(void)
250{
251 return 0x0010a040;
252}
253static inline u32 pwr_falcon_mailbox1_r(void)
254{
255 return 0x0010a044;
256}
257static inline u32 pwr_falcon_itfen_r(void)
258{
259 return 0x0010a048;
260}
261static inline u32 pwr_falcon_itfen_ctxen_enable_f(void)
262{
263 return 0x1;
264}
265static inline u32 pwr_falcon_idlestate_r(void)
266{
267 return 0x0010a04c;
268}
269static inline u32 pwr_falcon_idlestate_falcon_busy_v(u32 r)
270{
271 return (r >> 0) & 0x1;
272}
273static inline u32 pwr_falcon_idlestate_ext_busy_v(u32 r)
274{
275 return (r >> 1) & 0x7fff;
276}
277static inline u32 pwr_falcon_os_r(void)
278{
279 return 0x0010a080;
280}
281static inline u32 pwr_falcon_engctl_r(void)
282{
283 return 0x0010a0a4;
284}
285static inline u32 pwr_falcon_cpuctl_r(void)
286{
287 return 0x0010a100;
288}
289static inline u32 pwr_falcon_cpuctl_startcpu_f(u32 v)
290{
291 return (v & 0x1) << 1;
292}
293static inline u32 pwr_falcon_bootvec_r(void)
294{
295 return 0x0010a104;
296}
297static inline u32 pwr_falcon_bootvec_vec_f(u32 v)
298{
299 return (v & 0xffffffff) << 0;
300}
301static inline u32 pwr_falcon_dmactl_r(void)
302{
303 return 0x0010a10c;
304}
305static inline u32 pwr_falcon_dmactl_dmem_scrubbing_m(void)
306{
307 return 0x1 << 1;
308}
309static inline u32 pwr_falcon_dmactl_imem_scrubbing_m(void)
310{
311 return 0x1 << 2;
312}
313static inline u32 pwr_falcon_hwcfg_r(void)
314{
315 return 0x0010a108;
316}
317static inline u32 pwr_falcon_hwcfg_imem_size_v(u32 r)
318{
319 return (r >> 0) & 0x1ff;
320}
321static inline u32 pwr_falcon_hwcfg_dmem_size_v(u32 r)
322{
323 return (r >> 9) & 0x1ff;
324}
325static inline u32 pwr_falcon_dmatrfbase_r(void)
326{
327 return 0x0010a110;
328}
329static inline u32 pwr_falcon_dmatrfmoffs_r(void)
330{
331 return 0x0010a114;
332}
333static inline u32 pwr_falcon_dmatrfcmd_r(void)
334{
335 return 0x0010a118;
336}
337static inline u32 pwr_falcon_dmatrfcmd_imem_f(u32 v)
338{
339 return (v & 0x1) << 4;
340}
341static inline u32 pwr_falcon_dmatrfcmd_write_f(u32 v)
342{
343 return (v & 0x1) << 5;
344}
345static inline u32 pwr_falcon_dmatrfcmd_size_f(u32 v)
346{
347 return (v & 0x7) << 8;
348}
349static inline u32 pwr_falcon_dmatrfcmd_ctxdma_f(u32 v)
350{
351 return (v & 0x7) << 12;
352}
353static inline u32 pwr_falcon_dmatrffboffs_r(void)
354{
355 return 0x0010a11c;
356}
357static inline u32 pwr_falcon_exterraddr_r(void)
358{
359 return 0x0010a168;
360}
361static inline u32 pwr_falcon_exterrstat_r(void)
362{
363 return 0x0010a16c;
364}
365static inline u32 pwr_falcon_exterrstat_valid_m(void)
366{
367 return 0x1 << 31;
368}
369static inline u32 pwr_falcon_exterrstat_valid_v(u32 r)
370{
371 return (r >> 31) & 0x1;
372}
373static inline u32 pwr_falcon_exterrstat_valid_true_v(void)
374{
375 return 0x00000001;
376}
377static inline u32 pwr_pmu_falcon_icd_cmd_r(void)
378{
379 return 0x0010a200;
380}
381static inline u32 pwr_pmu_falcon_icd_cmd_opc_s(void)
382{
383 return 4;
384}
385static inline u32 pwr_pmu_falcon_icd_cmd_opc_f(u32 v)
386{
387 return (v & 0xf) << 0;
388}
389static inline u32 pwr_pmu_falcon_icd_cmd_opc_m(void)
390{
391 return 0xf << 0;
392}
393static inline u32 pwr_pmu_falcon_icd_cmd_opc_v(u32 r)
394{
395 return (r >> 0) & 0xf;
396}
397static inline u32 pwr_pmu_falcon_icd_cmd_opc_rreg_f(void)
398{
399 return 0x8;
400}
401static inline u32 pwr_pmu_falcon_icd_cmd_opc_rstat_f(void)
402{
403 return 0xe;
404}
405static inline u32 pwr_pmu_falcon_icd_cmd_idx_f(u32 v)
406{
407 return (v & 0x1f) << 8;
408}
409static inline u32 pwr_pmu_falcon_icd_rdata_r(void)
410{
411 return 0x0010a20c;
412}
413static inline u32 pwr_falcon_dmemc_r(u32 i)
414{
415 return 0x0010a1c0 + i*8;
416}
417static inline u32 pwr_falcon_dmemc_offs_f(u32 v)
418{
419 return (v & 0x3f) << 2;
420}
421static inline u32 pwr_falcon_dmemc_offs_m(void)
422{
423 return 0x3f << 2;
424}
425static inline u32 pwr_falcon_dmemc_blk_f(u32 v)
426{
427 return (v & 0xff) << 8;
428}
429static inline u32 pwr_falcon_dmemc_blk_m(void)
430{
431 return 0xff << 8;
432}
433static inline u32 pwr_falcon_dmemc_aincw_f(u32 v)
434{
435 return (v & 0x1) << 24;
436}
437static inline u32 pwr_falcon_dmemc_aincr_f(u32 v)
438{
439 return (v & 0x1) << 25;
440}
441static inline u32 pwr_falcon_dmemd_r(u32 i)
442{
443 return 0x0010a1c4 + i*8;
444}
445static inline u32 pwr_pmu_new_instblk_r(void)
446{
447 return 0x0010a480;
448}
449static inline u32 pwr_pmu_new_instblk_ptr_f(u32 v)
450{
451 return (v & 0xfffffff) << 0;
452}
453static inline u32 pwr_pmu_new_instblk_target_fb_f(void)
454{
455 return 0x0;
456}
457static inline u32 pwr_pmu_new_instblk_target_sys_coh_f(void)
458{
459 return 0x20000000;
460}
461static inline u32 pwr_pmu_new_instblk_valid_f(u32 v)
462{
463 return (v & 0x1) << 30;
464}
465static inline u32 pwr_pmu_mutex_id_r(void)
466{
467 return 0x0010a488;
468}
469static inline u32 pwr_pmu_mutex_id_value_v(u32 r)
470{
471 return (r >> 0) & 0xff;
472}
473static inline u32 pwr_pmu_mutex_id_value_init_v(void)
474{
475 return 0x00000000;
476}
477static inline u32 pwr_pmu_mutex_id_value_not_avail_v(void)
478{
479 return 0x000000ff;
480}
481static inline u32 pwr_pmu_mutex_id_release_r(void)
482{
483 return 0x0010a48c;
484}
485static inline u32 pwr_pmu_mutex_id_release_value_f(u32 v)
486{
487 return (v & 0xff) << 0;
488}
489static inline u32 pwr_pmu_mutex_id_release_value_m(void)
490{
491 return 0xff << 0;
492}
493static inline u32 pwr_pmu_mutex_id_release_value_init_v(void)
494{
495 return 0x00000000;
496}
497static inline u32 pwr_pmu_mutex_id_release_value_init_f(void)
498{
499 return 0x0;
500}
501static inline u32 pwr_pmu_mutex_r(u32 i)
502{
503 return 0x0010a580 + i*4;
504}
505static inline u32 pwr_pmu_mutex__size_1_v(void)
506{
507 return 0x00000010;
508}
509static inline u32 pwr_pmu_mutex_value_f(u32 v)
510{
511 return (v & 0xff) << 0;
512}
513static inline u32 pwr_pmu_mutex_value_v(u32 r)
514{
515 return (r >> 0) & 0xff;
516}
517static inline u32 pwr_pmu_mutex_value_initial_lock_f(void)
518{
519 return 0x0;
520}
521static inline u32 pwr_pmu_queue_head_r(u32 i)
522{
523 return 0x0010a4a0 + i*4;
524}
525static inline u32 pwr_pmu_queue_head__size_1_v(void)
526{
527 return 0x00000004;
528}
529static inline u32 pwr_pmu_queue_head_address_f(u32 v)
530{
531 return (v & 0xffffffff) << 0;
532}
533static inline u32 pwr_pmu_queue_head_address_v(u32 r)
534{
535 return (r >> 0) & 0xffffffff;
536}
537static inline u32 pwr_pmu_queue_tail_r(u32 i)
538{
539 return 0x0010a4b0 + i*4;
540}
541static inline u32 pwr_pmu_queue_tail__size_1_v(void)
542{
543 return 0x00000004;
544}
545static inline u32 pwr_pmu_queue_tail_address_f(u32 v)
546{
547 return (v & 0xffffffff) << 0;
548}
549static inline u32 pwr_pmu_queue_tail_address_v(u32 r)
550{
551 return (r >> 0) & 0xffffffff;
552}
553static inline u32 pwr_pmu_msgq_head_r(void)
554{
555 return 0x0010a4c8;
556}
557static inline u32 pwr_pmu_msgq_head_val_f(u32 v)
558{
559 return (v & 0xffffffff) << 0;
560}
561static inline u32 pwr_pmu_msgq_head_val_v(u32 r)
562{
563 return (r >> 0) & 0xffffffff;
564}
565static inline u32 pwr_pmu_msgq_tail_r(void)
566{
567 return 0x0010a4cc;
568}
569static inline u32 pwr_pmu_msgq_tail_val_f(u32 v)
570{
571 return (v & 0xffffffff) << 0;
572}
573static inline u32 pwr_pmu_msgq_tail_val_v(u32 r)
574{
575 return (r >> 0) & 0xffffffff;
576}
577static inline u32 pwr_pmu_idle_mask_r(u32 i)
578{
579 return 0x0010a504 + i*16;
580}
581static inline u32 pwr_pmu_idle_mask_gr_enabled_f(void)
582{
583 return 0x1;
584}
585static inline u32 pwr_pmu_idle_mask_ce_2_enabled_f(void)
586{
587 return 0x200000;
588}
589static inline u32 pwr_pmu_idle_count_r(u32 i)
590{
591 return 0x0010a508 + i*16;
592}
593static inline u32 pwr_pmu_idle_count_value_f(u32 v)
594{
595 return (v & 0x7fffffff) << 0;
596}
597static inline u32 pwr_pmu_idle_count_value_v(u32 r)
598{
599 return (r >> 0) & 0x7fffffff;
600}
601static inline u32 pwr_pmu_idle_count_reset_f(u32 v)
602{
603 return (v & 0x1) << 31;
604}
605static inline u32 pwr_pmu_idle_ctrl_r(u32 i)
606{
607 return 0x0010a50c + i*16;
608}
609static inline u32 pwr_pmu_idle_ctrl_value_m(void)
610{
611 return 0x3 << 0;
612}
613static inline u32 pwr_pmu_idle_ctrl_value_busy_f(void)
614{
615 return 0x2;
616}
617static inline u32 pwr_pmu_idle_ctrl_value_always_f(void)
618{
619 return 0x3;
620}
621static inline u32 pwr_pmu_idle_ctrl_filter_m(void)
622{
623 return 0x1 << 2;
624}
625static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
626{
627 return 0x0;
628}
629static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
630{
631 return 0x0010a9f0 + i*8;
632}
633static inline u32 pwr_pmu_idle_mask_1_supp_r(u32 i)
634{
635 return 0x0010a9f4 + i*8;
636}
637static inline u32 pwr_pmu_idle_ctrl_supp_r(u32 i)
638{
639 return 0x0010aa30 + i*8;
640}
641static inline u32 pwr_pmu_debug_r(u32 i)
642{
643 return 0x0010a5c0 + i*4;
644}
645static inline u32 pwr_pmu_debug__size_1_v(void)
646{
647 return 0x00000004;
648}
649static inline u32 pwr_pmu_mailbox_r(u32 i)
650{
651 return 0x0010a450 + i*4;
652}
653static inline u32 pwr_pmu_mailbox__size_1_v(void)
654{
655 return 0x0000000c;
656}
657static inline u32 pwr_pmu_bar0_addr_r(void)
658{
659 return 0x0010a7a0;
660}
661static inline u32 pwr_pmu_bar0_data_r(void)
662{
663 return 0x0010a7a4;
664}
665static inline u32 pwr_pmu_bar0_ctl_r(void)
666{
667 return 0x0010a7ac;
668}
669static inline u32 pwr_pmu_bar0_timeout_r(void)
670{
671 return 0x0010a7a8;
672}
673static inline u32 pwr_pmu_bar0_fecs_error_r(void)
674{
675 return 0x0010a988;
676}
677static inline u32 pwr_pmu_bar0_error_status_r(void)
678{
679 return 0x0010a7b0;
680}
681static inline u32 pwr_pmu_pg_idlefilth_r(u32 i)
682{
683 return 0x0010a6c0 + i*4;
684}
685static inline u32 pwr_pmu_pg_ppuidlefilth_r(u32 i)
686{
687 return 0x0010a6e8 + i*4;
688}
689static inline u32 pwr_pmu_pg_idle_cnt_r(u32 i)
690{
691 return 0x0010a710 + i*4;
692}
693static inline u32 pwr_pmu_pg_intren_r(u32 i)
694{
695 return 0x0010a760 + i*4;
696}
697static inline u32 pwr_fbif_transcfg_r(u32 i)
698{
699 return 0x0010a600 + i*4;
700}
701static inline u32 pwr_fbif_transcfg_target_local_fb_f(void)
702{
703 return 0x0;
704}
705static inline u32 pwr_fbif_transcfg_target_coherent_sysmem_f(void)
706{
707 return 0x1;
708}
709static inline u32 pwr_fbif_transcfg_target_noncoherent_sysmem_f(void)
710{
711 return 0x2;
712}
713static inline u32 pwr_fbif_transcfg_mem_type_s(void)
714{
715 return 1;
716}
717static inline u32 pwr_fbif_transcfg_mem_type_f(u32 v)
718{
719 return (v & 0x1) << 2;
720}
721static inline u32 pwr_fbif_transcfg_mem_type_m(void)
722{
723 return 0x1 << 2;
724}
725static inline u32 pwr_fbif_transcfg_mem_type_v(u32 r)
726{
727 return (r >> 2) & 0x1;
728}
729static inline u32 pwr_fbif_transcfg_mem_type_virtual_f(void)
730{
731 return 0x0;
732}
733static inline u32 pwr_fbif_transcfg_mem_type_physical_f(void)
734{
735 return 0x4;
736}
737#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h
new file mode 100644
index 00000000..7eff3881
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h
@@ -0,0 +1,389 @@
1/*
2 * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_ram_gk20a_h_
51#define _hw_ram_gk20a_h_
52
53static inline u32 ram_in_ramfc_s(void)
54{
55 return 4096;
56}
57static inline u32 ram_in_ramfc_w(void)
58{
59 return 0;
60}
61static inline u32 ram_in_page_dir_base_target_f(u32 v)
62{
63 return (v & 0x3) << 0;
64}
65static inline u32 ram_in_page_dir_base_target_w(void)
66{
67 return 128;
68}
69static inline u32 ram_in_page_dir_base_target_vid_mem_f(void)
70{
71 return 0x0;
72}
73static inline u32 ram_in_page_dir_base_vol_w(void)
74{
75 return 128;
76}
77static inline u32 ram_in_page_dir_base_vol_true_f(void)
78{
79 return 0x4;
80}
81static inline u32 ram_in_page_dir_base_lo_f(u32 v)
82{
83 return (v & 0xfffff) << 12;
84}
85static inline u32 ram_in_page_dir_base_lo_w(void)
86{
87 return 128;
88}
89static inline u32 ram_in_page_dir_base_hi_f(u32 v)
90{
91 return (v & 0xff) << 0;
92}
93static inline u32 ram_in_page_dir_base_hi_w(void)
94{
95 return 129;
96}
97static inline u32 ram_in_adr_limit_lo_f(u32 v)
98{
99 return (v & 0xfffff) << 12;
100}
101static inline u32 ram_in_adr_limit_lo_w(void)
102{
103 return 130;
104}
105static inline u32 ram_in_adr_limit_hi_f(u32 v)
106{
107 return (v & 0xff) << 0;
108}
109static inline u32 ram_in_adr_limit_hi_w(void)
110{
111 return 131;
112}
113static inline u32 ram_in_engine_cs_w(void)
114{
115 return 132;
116}
117static inline u32 ram_in_engine_cs_wfi_v(void)
118{
119 return 0x00000000;
120}
121static inline u32 ram_in_engine_cs_wfi_f(void)
122{
123 return 0x0;
124}
125static inline u32 ram_in_engine_cs_fg_v(void)
126{
127 return 0x00000001;
128}
129static inline u32 ram_in_engine_cs_fg_f(void)
130{
131 return 0x8;
132}
133static inline u32 ram_in_gr_cs_w(void)
134{
135 return 132;
136}
137static inline u32 ram_in_gr_cs_wfi_f(void)
138{
139 return 0x0;
140}
141static inline u32 ram_in_gr_wfi_target_w(void)
142{
143 return 132;
144}
145static inline u32 ram_in_gr_wfi_mode_w(void)
146{
147 return 132;
148}
149static inline u32 ram_in_gr_wfi_mode_physical_v(void)
150{
151 return 0x00000000;
152}
153static inline u32 ram_in_gr_wfi_mode_physical_f(void)
154{
155 return 0x0;
156}
157static inline u32 ram_in_gr_wfi_mode_virtual_v(void)
158{
159 return 0x00000001;
160}
161static inline u32 ram_in_gr_wfi_mode_virtual_f(void)
162{
163 return 0x4;
164}
165static inline u32 ram_in_gr_wfi_ptr_lo_f(u32 v)
166{
167 return (v & 0xfffff) << 12;
168}
169static inline u32 ram_in_gr_wfi_ptr_lo_w(void)
170{
171 return 132;
172}
173static inline u32 ram_in_gr_wfi_ptr_hi_f(u32 v)
174{
175 return (v & 0xff) << 0;
176}
177static inline u32 ram_in_gr_wfi_ptr_hi_w(void)
178{
179 return 133;
180}
181static inline u32 ram_in_base_shift_v(void)
182{
183 return 0x0000000c;
184}
185static inline u32 ram_in_alloc_size_v(void)
186{
187 return 0x00001000;
188}
189static inline u32 ram_fc_size_val_v(void)
190{
191 return 0x00000200;
192}
193static inline u32 ram_fc_gp_put_w(void)
194{
195 return 0;
196}
197static inline u32 ram_fc_userd_w(void)
198{
199 return 2;
200}
201static inline u32 ram_fc_userd_hi_w(void)
202{
203 return 3;
204}
205static inline u32 ram_fc_signature_w(void)
206{
207 return 4;
208}
209static inline u32 ram_fc_gp_get_w(void)
210{
211 return 5;
212}
213static inline u32 ram_fc_pb_get_w(void)
214{
215 return 6;
216}
217static inline u32 ram_fc_pb_get_hi_w(void)
218{
219 return 7;
220}
221static inline u32 ram_fc_pb_top_level_get_w(void)
222{
223 return 8;
224}
225static inline u32 ram_fc_pb_top_level_get_hi_w(void)
226{
227 return 9;
228}
229static inline u32 ram_fc_acquire_w(void)
230{
231 return 12;
232}
233static inline u32 ram_fc_semaphorea_w(void)
234{
235 return 14;
236}
237static inline u32 ram_fc_semaphoreb_w(void)
238{
239 return 15;
240}
241static inline u32 ram_fc_semaphorec_w(void)
242{
243 return 16;
244}
245static inline u32 ram_fc_semaphored_w(void)
246{
247 return 17;
248}
249static inline u32 ram_fc_gp_base_w(void)
250{
251 return 18;
252}
253static inline u32 ram_fc_gp_base_hi_w(void)
254{
255 return 19;
256}
257static inline u32 ram_fc_gp_fetch_w(void)
258{
259 return 20;
260}
261static inline u32 ram_fc_pb_fetch_w(void)
262{
263 return 21;
264}
265static inline u32 ram_fc_pb_fetch_hi_w(void)
266{
267 return 22;
268}
269static inline u32 ram_fc_pb_put_w(void)
270{
271 return 23;
272}
273static inline u32 ram_fc_pb_put_hi_w(void)
274{
275 return 24;
276}
277static inline u32 ram_fc_pb_header_w(void)
278{
279 return 33;
280}
281static inline u32 ram_fc_pb_count_w(void)
282{
283 return 34;
284}
285static inline u32 ram_fc_subdevice_w(void)
286{
287 return 37;
288}
289static inline u32 ram_fc_formats_w(void)
290{
291 return 39;
292}
293static inline u32 ram_fc_syncpointa_w(void)
294{
295 return 41;
296}
297static inline u32 ram_fc_syncpointb_w(void)
298{
299 return 42;
300}
301static inline u32 ram_fc_target_w(void)
302{
303 return 43;
304}
305static inline u32 ram_fc_hce_ctrl_w(void)
306{
307 return 57;
308}
309static inline u32 ram_fc_chid_w(void)
310{
311 return 58;
312}
313static inline u32 ram_fc_chid_id_f(u32 v)
314{
315 return (v & 0xfff) << 0;
316}
317static inline u32 ram_fc_chid_id_w(void)
318{
319 return 0;
320}
321static inline u32 ram_fc_eng_timeslice_w(void)
322{
323 return 62;
324}
325static inline u32 ram_fc_pb_timeslice_w(void)
326{
327 return 63;
328}
329static inline u32 ram_userd_base_shift_v(void)
330{
331 return 0x00000009;
332}
333static inline u32 ram_userd_chan_size_v(void)
334{
335 return 0x00000200;
336}
337static inline u32 ram_userd_put_w(void)
338{
339 return 16;
340}
341static inline u32 ram_userd_get_w(void)
342{
343 return 17;
344}
345static inline u32 ram_userd_ref_w(void)
346{
347 return 18;
348}
349static inline u32 ram_userd_put_hi_w(void)
350{
351 return 19;
352}
353static inline u32 ram_userd_ref_threshold_w(void)
354{
355 return 20;
356}
357static inline u32 ram_userd_top_level_get_w(void)
358{
359 return 22;
360}
361static inline u32 ram_userd_top_level_get_hi_w(void)
362{
363 return 23;
364}
365static inline u32 ram_userd_get_hi_w(void)
366{
367 return 24;
368}
369static inline u32 ram_userd_gp_get_w(void)
370{
371 return 34;
372}
373static inline u32 ram_userd_gp_put_w(void)
374{
375 return 35;
376}
377static inline u32 ram_userd_gp_top_level_get_w(void)
378{
379 return 22;
380}
381static inline u32 ram_userd_gp_top_level_get_hi_w(void)
382{
383 return 23;
384}
385static inline u32 ram_rl_entry_size_v(void)
386{
387 return 0x00000008;
388}
389#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h
new file mode 100644
index 00000000..b1e6658d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h
@@ -0,0 +1,2150 @@
1/*
2 * drivers/video/tegra/host/gk20a/hw_sim_gk20a.h
3 *
4 * Copyright (c) 2012, NVIDIA Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 /*
21 * Function naming determines intended use:
22 *
23 * <x>_r(void) : Returns the offset for register <x>.
24 *
25 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
26 *
27 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
28 *
29 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
30 * and masked to place it at field <y> of register <x>. This value
31 * can be |'d with others to produce a full register value for
32 * register <x>.
33 *
34 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
35 * value can be ~'d and then &'d to clear the value of field <y> for
36 * register <x>.
37 *
38 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
39 * to place it at field <y> of register <x>. This value can be |'d
40 * with others to produce a full register value for <x>.
41 *
42 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
43 * <x> value 'r' after being shifted to place its LSB at bit 0.
44 * This value is suitable for direct comparison with other unshifted
45 * values appropriate for use in field <y> of register <x>.
46 *
47 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
48 * field <y> of register <x>. This value is suitable for direct
49 * comparison with unshifted values appropriate for use in field <y>
50 * of register <x>.
51 */
52
53#ifndef __hw_sim_gk20a_h__
54#define __hw_sim_gk20a_h__
55/*This file is autogenerated. Do not edit. */
56
57static inline u32 sim_send_ring_r(void)
58{
59 return 0x00000000;
60}
61static inline u32 sim_send_ring_target_s(void)
62{
63 return 2;
64}
65static inline u32 sim_send_ring_target_f(u32 v)
66{
67 return (v & 0x3) << 0;
68}
69static inline u32 sim_send_ring_target_m(void)
70{
71 return 0x3 << 0;
72}
73static inline u32 sim_send_ring_target_v(u32 r)
74{
75 return (r >> 0) & 0x3;
76}
77static inline u32 sim_send_ring_target_phys_init_v(void)
78{
79 return 0x00000001;
80}
81static inline u32 sim_send_ring_target_phys_init_f(void)
82{
83 return 0x1;
84}
85static inline u32 sim_send_ring_target_phys__init_v(void)
86{
87 return 0x00000001;
88}
89static inline u32 sim_send_ring_target_phys__init_f(void)
90{
91 return 0x1;
92}
93static inline u32 sim_send_ring_target_phys__prod_v(void)
94{
95 return 0x00000001;
96}
97static inline u32 sim_send_ring_target_phys__prod_f(void)
98{
99 return 0x1;
100}
101static inline u32 sim_send_ring_target_phys_nvm_v(void)
102{
103 return 0x00000001;
104}
105static inline u32 sim_send_ring_target_phys_nvm_f(void)
106{
107 return 0x1;
108}
109static inline u32 sim_send_ring_target_phys_pci_v(void)
110{
111 return 0x00000002;
112}
113static inline u32 sim_send_ring_target_phys_pci_f(void)
114{
115 return 0x2;
116}
117static inline u32 sim_send_ring_target_phys_pci_coherent_v(void)
118{
119 return 0x00000003;
120}
121static inline u32 sim_send_ring_target_phys_pci_coherent_f(void)
122{
123 return 0x3;
124}
125static inline u32 sim_send_ring_status_s(void)
126{
127 return 1;
128}
129static inline u32 sim_send_ring_status_f(u32 v)
130{
131 return (v & 0x1) << 3;
132}
133static inline u32 sim_send_ring_status_m(void)
134{
135 return 0x1 << 3;
136}
137static inline u32 sim_send_ring_status_v(u32 r)
138{
139 return (r >> 3) & 0x1;
140}
141static inline u32 sim_send_ring_status_init_v(void)
142{
143 return 0x00000000;
144}
145static inline u32 sim_send_ring_status_init_f(void)
146{
147 return 0x0;
148}
149static inline u32 sim_send_ring_status__init_v(void)
150{
151 return 0x00000000;
152}
153static inline u32 sim_send_ring_status__init_f(void)
154{
155 return 0x0;
156}
157static inline u32 sim_send_ring_status__prod_v(void)
158{
159 return 0x00000000;
160}
161static inline u32 sim_send_ring_status__prod_f(void)
162{
163 return 0x0;
164}
165static inline u32 sim_send_ring_status_invalid_v(void)
166{
167 return 0x00000000;
168}
169static inline u32 sim_send_ring_status_invalid_f(void)
170{
171 return 0x0;
172}
173static inline u32 sim_send_ring_status_valid_v(void)
174{
175 return 0x00000001;
176}
177static inline u32 sim_send_ring_status_valid_f(void)
178{
179 return 0x8;
180}
181static inline u32 sim_send_ring_size_s(void)
182{
183 return 2;
184}
185static inline u32 sim_send_ring_size_f(u32 v)
186{
187 return (v & 0x3) << 4;
188}
189static inline u32 sim_send_ring_size_m(void)
190{
191 return 0x3 << 4;
192}
193static inline u32 sim_send_ring_size_v(u32 r)
194{
195 return (r >> 4) & 0x3;
196}
197static inline u32 sim_send_ring_size_init_v(void)
198{
199 return 0x00000000;
200}
201static inline u32 sim_send_ring_size_init_f(void)
202{
203 return 0x0;
204}
205static inline u32 sim_send_ring_size__init_v(void)
206{
207 return 0x00000000;
208}
209static inline u32 sim_send_ring_size__init_f(void)
210{
211 return 0x0;
212}
213static inline u32 sim_send_ring_size__prod_v(void)
214{
215 return 0x00000000;
216}
217static inline u32 sim_send_ring_size__prod_f(void)
218{
219 return 0x0;
220}
221static inline u32 sim_send_ring_size_4kb_v(void)
222{
223 return 0x00000000;
224}
225static inline u32 sim_send_ring_size_4kb_f(void)
226{
227 return 0x0;
228}
229static inline u32 sim_send_ring_size_8kb_v(void)
230{
231 return 0x00000001;
232}
233static inline u32 sim_send_ring_size_8kb_f(void)
234{
235 return 0x10;
236}
237static inline u32 sim_send_ring_size_12kb_v(void)
238{
239 return 0x00000002;
240}
241static inline u32 sim_send_ring_size_12kb_f(void)
242{
243 return 0x20;
244}
245static inline u32 sim_send_ring_size_16kb_v(void)
246{
247 return 0x00000003;
248}
249static inline u32 sim_send_ring_size_16kb_f(void)
250{
251 return 0x30;
252}
253static inline u32 sim_send_ring_gp_in_ring_s(void)
254{
255 return 1;
256}
257static inline u32 sim_send_ring_gp_in_ring_f(u32 v)
258{
259 return (v & 0x1) << 11;
260}
261static inline u32 sim_send_ring_gp_in_ring_m(void)
262{
263 return 0x1 << 11;
264}
265static inline u32 sim_send_ring_gp_in_ring_v(u32 r)
266{
267 return (r >> 11) & 0x1;
268}
269static inline u32 sim_send_ring_gp_in_ring__init_v(void)
270{
271 return 0x00000000;
272}
273static inline u32 sim_send_ring_gp_in_ring__init_f(void)
274{
275 return 0x0;
276}
277static inline u32 sim_send_ring_gp_in_ring__prod_v(void)
278{
279 return 0x00000000;
280}
281static inline u32 sim_send_ring_gp_in_ring__prod_f(void)
282{
283 return 0x0;
284}
285static inline u32 sim_send_ring_gp_in_ring_no_v(void)
286{
287 return 0x00000000;
288}
289static inline u32 sim_send_ring_gp_in_ring_no_f(void)
290{
291 return 0x0;
292}
293static inline u32 sim_send_ring_gp_in_ring_yes_v(void)
294{
295 return 0x00000001;
296}
297static inline u32 sim_send_ring_gp_in_ring_yes_f(void)
298{
299 return 0x800;
300}
301static inline u32 sim_send_ring_addr_lo_s(void)
302{
303 return 20;
304}
305static inline u32 sim_send_ring_addr_lo_f(u32 v)
306{
307 return (v & 0xfffff) << 12;
308}
309static inline u32 sim_send_ring_addr_lo_m(void)
310{
311 return 0xfffff << 12;
312}
313static inline u32 sim_send_ring_addr_lo_v(u32 r)
314{
315 return (r >> 12) & 0xfffff;
316}
317static inline u32 sim_send_ring_addr_lo__init_v(void)
318{
319 return 0x00000000;
320}
321static inline u32 sim_send_ring_addr_lo__init_f(void)
322{
323 return 0x0;
324}
325static inline u32 sim_send_ring_addr_lo__prod_v(void)
326{
327 return 0x00000000;
328}
329static inline u32 sim_send_ring_addr_lo__prod_f(void)
330{
331 return 0x0;
332}
333static inline u32 sim_send_ring_hi_r(void)
334{
335 return 0x00000004;
336}
337static inline u32 sim_send_ring_hi_addr_s(void)
338{
339 return 20;
340}
341static inline u32 sim_send_ring_hi_addr_f(u32 v)
342{
343 return (v & 0xfffff) << 0;
344}
345static inline u32 sim_send_ring_hi_addr_m(void)
346{
347 return 0xfffff << 0;
348}
349static inline u32 sim_send_ring_hi_addr_v(u32 r)
350{
351 return (r >> 0) & 0xfffff;
352}
353static inline u32 sim_send_ring_hi_addr__init_v(void)
354{
355 return 0x00000000;
356}
357static inline u32 sim_send_ring_hi_addr__init_f(void)
358{
359 return 0x0;
360}
361static inline u32 sim_send_ring_hi_addr__prod_v(void)
362{
363 return 0x00000000;
364}
365static inline u32 sim_send_ring_hi_addr__prod_f(void)
366{
367 return 0x0;
368}
369static inline u32 sim_send_put_r(void)
370{
371 return 0x00000008;
372}
373static inline u32 sim_send_put_pointer_s(void)
374{
375 return 29;
376}
377static inline u32 sim_send_put_pointer_f(u32 v)
378{
379 return (v & 0x1fffffff) << 3;
380}
381static inline u32 sim_send_put_pointer_m(void)
382{
383 return 0x1fffffff << 3;
384}
385static inline u32 sim_send_put_pointer_v(u32 r)
386{
387 return (r >> 3) & 0x1fffffff;
388}
389static inline u32 sim_send_get_r(void)
390{
391 return 0x0000000c;
392}
393static inline u32 sim_send_get_pointer_s(void)
394{
395 return 29;
396}
397static inline u32 sim_send_get_pointer_f(u32 v)
398{
399 return (v & 0x1fffffff) << 3;
400}
401static inline u32 sim_send_get_pointer_m(void)
402{
403 return 0x1fffffff << 3;
404}
405static inline u32 sim_send_get_pointer_v(u32 r)
406{
407 return (r >> 3) & 0x1fffffff;
408}
409static inline u32 sim_recv_ring_r(void)
410{
411 return 0x00000010;
412}
413static inline u32 sim_recv_ring_target_s(void)
414{
415 return 2;
416}
417static inline u32 sim_recv_ring_target_f(u32 v)
418{
419 return (v & 0x3) << 0;
420}
421static inline u32 sim_recv_ring_target_m(void)
422{
423 return 0x3 << 0;
424}
425static inline u32 sim_recv_ring_target_v(u32 r)
426{
427 return (r >> 0) & 0x3;
428}
429static inline u32 sim_recv_ring_target_phys_init_v(void)
430{
431 return 0x00000001;
432}
433static inline u32 sim_recv_ring_target_phys_init_f(void)
434{
435 return 0x1;
436}
437static inline u32 sim_recv_ring_target_phys__init_v(void)
438{
439 return 0x00000001;
440}
441static inline u32 sim_recv_ring_target_phys__init_f(void)
442{
443 return 0x1;
444}
445static inline u32 sim_recv_ring_target_phys__prod_v(void)
446{
447 return 0x00000001;
448}
449static inline u32 sim_recv_ring_target_phys__prod_f(void)
450{
451 return 0x1;
452}
453static inline u32 sim_recv_ring_target_phys_nvm_v(void)
454{
455 return 0x00000001;
456}
457static inline u32 sim_recv_ring_target_phys_nvm_f(void)
458{
459 return 0x1;
460}
461static inline u32 sim_recv_ring_target_phys_pci_v(void)
462{
463 return 0x00000002;
464}
465static inline u32 sim_recv_ring_target_phys_pci_f(void)
466{
467 return 0x2;
468}
469static inline u32 sim_recv_ring_target_phys_pci_coherent_v(void)
470{
471 return 0x00000003;
472}
473static inline u32 sim_recv_ring_target_phys_pci_coherent_f(void)
474{
475 return 0x3;
476}
477static inline u32 sim_recv_ring_status_s(void)
478{
479 return 1;
480}
481static inline u32 sim_recv_ring_status_f(u32 v)
482{
483 return (v & 0x1) << 3;
484}
485static inline u32 sim_recv_ring_status_m(void)
486{
487 return 0x1 << 3;
488}
489static inline u32 sim_recv_ring_status_v(u32 r)
490{
491 return (r >> 3) & 0x1;
492}
493static inline u32 sim_recv_ring_status_init_v(void)
494{
495 return 0x00000000;
496}
497static inline u32 sim_recv_ring_status_init_f(void)
498{
499 return 0x0;
500}
501static inline u32 sim_recv_ring_status__init_v(void)
502{
503 return 0x00000000;
504}
505static inline u32 sim_recv_ring_status__init_f(void)
506{
507 return 0x0;
508}
509static inline u32 sim_recv_ring_status__prod_v(void)
510{
511 return 0x00000000;
512}
513static inline u32 sim_recv_ring_status__prod_f(void)
514{
515 return 0x0;
516}
517static inline u32 sim_recv_ring_status_invalid_v(void)
518{
519 return 0x00000000;
520}
521static inline u32 sim_recv_ring_status_invalid_f(void)
522{
523 return 0x0;
524}
525static inline u32 sim_recv_ring_status_valid_v(void)
526{
527 return 0x00000001;
528}
529static inline u32 sim_recv_ring_status_valid_f(void)
530{
531 return 0x8;
532}
533static inline u32 sim_recv_ring_size_s(void)
534{
535 return 2;
536}
537static inline u32 sim_recv_ring_size_f(u32 v)
538{
539 return (v & 0x3) << 4;
540}
541static inline u32 sim_recv_ring_size_m(void)
542{
543 return 0x3 << 4;
544}
545static inline u32 sim_recv_ring_size_v(u32 r)
546{
547 return (r >> 4) & 0x3;
548}
549static inline u32 sim_recv_ring_size_init_v(void)
550{
551 return 0x00000000;
552}
553static inline u32 sim_recv_ring_size_init_f(void)
554{
555 return 0x0;
556}
557static inline u32 sim_recv_ring_size__init_v(void)
558{
559 return 0x00000000;
560}
561static inline u32 sim_recv_ring_size__init_f(void)
562{
563 return 0x0;
564}
565static inline u32 sim_recv_ring_size__prod_v(void)
566{
567 return 0x00000000;
568}
569static inline u32 sim_recv_ring_size__prod_f(void)
570{
571 return 0x0;
572}
573static inline u32 sim_recv_ring_size_4kb_v(void)
574{
575 return 0x00000000;
576}
577static inline u32 sim_recv_ring_size_4kb_f(void)
578{
579 return 0x0;
580}
581static inline u32 sim_recv_ring_size_8kb_v(void)
582{
583 return 0x00000001;
584}
585static inline u32 sim_recv_ring_size_8kb_f(void)
586{
587 return 0x10;
588}
589static inline u32 sim_recv_ring_size_12kb_v(void)
590{
591 return 0x00000002;
592}
593static inline u32 sim_recv_ring_size_12kb_f(void)
594{
595 return 0x20;
596}
597static inline u32 sim_recv_ring_size_16kb_v(void)
598{
599 return 0x00000003;
600}
601static inline u32 sim_recv_ring_size_16kb_f(void)
602{
603 return 0x30;
604}
605static inline u32 sim_recv_ring_gp_in_ring_s(void)
606{
607 return 1;
608}
609static inline u32 sim_recv_ring_gp_in_ring_f(u32 v)
610{
611 return (v & 0x1) << 11;
612}
613static inline u32 sim_recv_ring_gp_in_ring_m(void)
614{
615 return 0x1 << 11;
616}
617static inline u32 sim_recv_ring_gp_in_ring_v(u32 r)
618{
619 return (r >> 11) & 0x1;
620}
621static inline u32 sim_recv_ring_gp_in_ring__init_v(void)
622{
623 return 0x00000000;
624}
625static inline u32 sim_recv_ring_gp_in_ring__init_f(void)
626{
627 return 0x0;
628}
629static inline u32 sim_recv_ring_gp_in_ring__prod_v(void)
630{
631 return 0x00000000;
632}
633static inline u32 sim_recv_ring_gp_in_ring__prod_f(void)
634{
635 return 0x0;
636}
637static inline u32 sim_recv_ring_gp_in_ring_no_v(void)
638{
639 return 0x00000000;
640}
641static inline u32 sim_recv_ring_gp_in_ring_no_f(void)
642{
643 return 0x0;
644}
645static inline u32 sim_recv_ring_gp_in_ring_yes_v(void)
646{
647 return 0x00000001;
648}
649static inline u32 sim_recv_ring_gp_in_ring_yes_f(void)
650{
651 return 0x800;
652}
653static inline u32 sim_recv_ring_addr_lo_s(void)
654{
655 return 20;
656}
657static inline u32 sim_recv_ring_addr_lo_f(u32 v)
658{
659 return (v & 0xfffff) << 12;
660}
661static inline u32 sim_recv_ring_addr_lo_m(void)
662{
663 return 0xfffff << 12;
664}
665static inline u32 sim_recv_ring_addr_lo_v(u32 r)
666{
667 return (r >> 12) & 0xfffff;
668}
669static inline u32 sim_recv_ring_addr_lo__init_v(void)
670{
671 return 0x00000000;
672}
673static inline u32 sim_recv_ring_addr_lo__init_f(void)
674{
675 return 0x0;
676}
677static inline u32 sim_recv_ring_addr_lo__prod_v(void)
678{
679 return 0x00000000;
680}
681static inline u32 sim_recv_ring_addr_lo__prod_f(void)
682{
683 return 0x0;
684}
685static inline u32 sim_recv_ring_hi_r(void)
686{
687 return 0x00000014;
688}
689static inline u32 sim_recv_ring_hi_addr_s(void)
690{
691 return 20;
692}
693static inline u32 sim_recv_ring_hi_addr_f(u32 v)
694{
695 return (v & 0xfffff) << 0;
696}
697static inline u32 sim_recv_ring_hi_addr_m(void)
698{
699 return 0xfffff << 0;
700}
701static inline u32 sim_recv_ring_hi_addr_v(u32 r)
702{
703 return (r >> 0) & 0xfffff;
704}
705static inline u32 sim_recv_ring_hi_addr__init_v(void)
706{
707 return 0x00000000;
708}
709static inline u32 sim_recv_ring_hi_addr__init_f(void)
710{
711 return 0x0;
712}
713static inline u32 sim_recv_ring_hi_addr__prod_v(void)
714{
715 return 0x00000000;
716}
717static inline u32 sim_recv_ring_hi_addr__prod_f(void)
718{
719 return 0x0;
720}
721static inline u32 sim_recv_put_r(void)
722{
723 return 0x00000018;
724}
725static inline u32 sim_recv_put_pointer_s(void)
726{
727 return 11;
728}
729static inline u32 sim_recv_put_pointer_f(u32 v)
730{
731 return (v & 0x7ff) << 3;
732}
733static inline u32 sim_recv_put_pointer_m(void)
734{
735 return 0x7ff << 3;
736}
737static inline u32 sim_recv_put_pointer_v(u32 r)
738{
739 return (r >> 3) & 0x7ff;
740}
741static inline u32 sim_recv_get_r(void)
742{
743 return 0x0000001c;
744}
745static inline u32 sim_recv_get_pointer_s(void)
746{
747 return 11;
748}
749static inline u32 sim_recv_get_pointer_f(u32 v)
750{
751 return (v & 0x7ff) << 3;
752}
753static inline u32 sim_recv_get_pointer_m(void)
754{
755 return 0x7ff << 3;
756}
757static inline u32 sim_recv_get_pointer_v(u32 r)
758{
759 return (r >> 3) & 0x7ff;
760}
761static inline u32 sim_config_r(void)
762{
763 return 0x00000020;
764}
765static inline u32 sim_config_mode_s(void)
766{
767 return 1;
768}
769static inline u32 sim_config_mode_f(u32 v)
770{
771 return (v & 0x1) << 0;
772}
773static inline u32 sim_config_mode_m(void)
774{
775 return 0x1 << 0;
776}
777static inline u32 sim_config_mode_v(u32 r)
778{
779 return (r >> 0) & 0x1;
780}
781static inline u32 sim_config_mode_disabled_v(void)
782{
783 return 0x00000000;
784}
785static inline u32 sim_config_mode_disabled_f(void)
786{
787 return 0x0;
788}
789static inline u32 sim_config_mode_enabled_v(void)
790{
791 return 0x00000001;
792}
793static inline u32 sim_config_mode_enabled_f(void)
794{
795 return 0x1;
796}
797static inline u32 sim_config_channels_s(void)
798{
799 return 7;
800}
801static inline u32 sim_config_channels_f(u32 v)
802{
803 return (v & 0x7f) << 1;
804}
805static inline u32 sim_config_channels_m(void)
806{
807 return 0x7f << 1;
808}
809static inline u32 sim_config_channels_v(u32 r)
810{
811 return (r >> 1) & 0x7f;
812}
813static inline u32 sim_config_channels_none_v(void)
814{
815 return 0x00000000;
816}
817static inline u32 sim_config_channels_none_f(void)
818{
819 return 0x0;
820}
821static inline u32 sim_config_cached_only_s(void)
822{
823 return 1;
824}
825static inline u32 sim_config_cached_only_f(u32 v)
826{
827 return (v & 0x1) << 8;
828}
829static inline u32 sim_config_cached_only_m(void)
830{
831 return 0x1 << 8;
832}
833static inline u32 sim_config_cached_only_v(u32 r)
834{
835 return (r >> 8) & 0x1;
836}
837static inline u32 sim_config_cached_only_disabled_v(void)
838{
839 return 0x00000000;
840}
841static inline u32 sim_config_cached_only_disabled_f(void)
842{
843 return 0x0;
844}
845static inline u32 sim_config_cached_only_enabled_v(void)
846{
847 return 0x00000001;
848}
849static inline u32 sim_config_cached_only_enabled_f(void)
850{
851 return 0x100;
852}
853static inline u32 sim_config_validity_s(void)
854{
855 return 2;
856}
857static inline u32 sim_config_validity_f(u32 v)
858{
859 return (v & 0x3) << 9;
860}
861static inline u32 sim_config_validity_m(void)
862{
863 return 0x3 << 9;
864}
865static inline u32 sim_config_validity_v(u32 r)
866{
867 return (r >> 9) & 0x3;
868}
869static inline u32 sim_config_validity__init_v(void)
870{
871 return 0x00000001;
872}
873static inline u32 sim_config_validity__init_f(void)
874{
875 return 0x200;
876}
877static inline u32 sim_config_validity_valid_v(void)
878{
879 return 0x00000001;
880}
881static inline u32 sim_config_validity_valid_f(void)
882{
883 return 0x200;
884}
885static inline u32 sim_config_simulation_s(void)
886{
887 return 2;
888}
889static inline u32 sim_config_simulation_f(u32 v)
890{
891 return (v & 0x3) << 12;
892}
893static inline u32 sim_config_simulation_m(void)
894{
895 return 0x3 << 12;
896}
897static inline u32 sim_config_simulation_v(u32 r)
898{
899 return (r >> 12) & 0x3;
900}
901static inline u32 sim_config_simulation_disabled_v(void)
902{
903 return 0x00000000;
904}
905static inline u32 sim_config_simulation_disabled_f(void)
906{
907 return 0x0;
908}
909static inline u32 sim_config_simulation_fmodel_v(void)
910{
911 return 0x00000001;
912}
913static inline u32 sim_config_simulation_fmodel_f(void)
914{
915 return 0x1000;
916}
917static inline u32 sim_config_simulation_rtlsim_v(void)
918{
919 return 0x00000002;
920}
921static inline u32 sim_config_simulation_rtlsim_f(void)
922{
923 return 0x2000;
924}
925static inline u32 sim_config_secondary_display_s(void)
926{
927 return 1;
928}
929static inline u32 sim_config_secondary_display_f(u32 v)
930{
931 return (v & 0x1) << 14;
932}
933static inline u32 sim_config_secondary_display_m(void)
934{
935 return 0x1 << 14;
936}
937static inline u32 sim_config_secondary_display_v(u32 r)
938{
939 return (r >> 14) & 0x1;
940}
941static inline u32 sim_config_secondary_display_disabled_v(void)
942{
943 return 0x00000000;
944}
945static inline u32 sim_config_secondary_display_disabled_f(void)
946{
947 return 0x0;
948}
949static inline u32 sim_config_secondary_display_enabled_v(void)
950{
951 return 0x00000001;
952}
953static inline u32 sim_config_secondary_display_enabled_f(void)
954{
955 return 0x4000;
956}
957static inline u32 sim_config_num_heads_s(void)
958{
959 return 8;
960}
961static inline u32 sim_config_num_heads_f(u32 v)
962{
963 return (v & 0xff) << 17;
964}
965static inline u32 sim_config_num_heads_m(void)
966{
967 return 0xff << 17;
968}
969static inline u32 sim_config_num_heads_v(u32 r)
970{
971 return (r >> 17) & 0xff;
972}
973static inline u32 sim_event_ring_r(void)
974{
975 return 0x00000030;
976}
977static inline u32 sim_event_ring_target_s(void)
978{
979 return 2;
980}
981static inline u32 sim_event_ring_target_f(u32 v)
982{
983 return (v & 0x3) << 0;
984}
985static inline u32 sim_event_ring_target_m(void)
986{
987 return 0x3 << 0;
988}
989static inline u32 sim_event_ring_target_v(u32 r)
990{
991 return (r >> 0) & 0x3;
992}
993static inline u32 sim_event_ring_target_phys_init_v(void)
994{
995 return 0x00000001;
996}
997static inline u32 sim_event_ring_target_phys_init_f(void)
998{
999 return 0x1;
1000}
1001static inline u32 sim_event_ring_target_phys__init_v(void)
1002{
1003 return 0x00000001;
1004}
1005static inline u32 sim_event_ring_target_phys__init_f(void)
1006{
1007 return 0x1;
1008}
1009static inline u32 sim_event_ring_target_phys__prod_v(void)
1010{
1011 return 0x00000001;
1012}
1013static inline u32 sim_event_ring_target_phys__prod_f(void)
1014{
1015 return 0x1;
1016}
1017static inline u32 sim_event_ring_target_phys_nvm_v(void)
1018{
1019 return 0x00000001;
1020}
1021static inline u32 sim_event_ring_target_phys_nvm_f(void)
1022{
1023 return 0x1;
1024}
1025static inline u32 sim_event_ring_target_phys_pci_v(void)
1026{
1027 return 0x00000002;
1028}
1029static inline u32 sim_event_ring_target_phys_pci_f(void)
1030{
1031 return 0x2;
1032}
1033static inline u32 sim_event_ring_target_phys_pci_coherent_v(void)
1034{
1035 return 0x00000003;
1036}
1037static inline u32 sim_event_ring_target_phys_pci_coherent_f(void)
1038{
1039 return 0x3;
1040}
1041static inline u32 sim_event_ring_status_s(void)
1042{
1043 return 1;
1044}
1045static inline u32 sim_event_ring_status_f(u32 v)
1046{
1047 return (v & 0x1) << 3;
1048}
1049static inline u32 sim_event_ring_status_m(void)
1050{
1051 return 0x1 << 3;
1052}
1053static inline u32 sim_event_ring_status_v(u32 r)
1054{
1055 return (r >> 3) & 0x1;
1056}
1057static inline u32 sim_event_ring_status_init_v(void)
1058{
1059 return 0x00000000;
1060}
1061static inline u32 sim_event_ring_status_init_f(void)
1062{
1063 return 0x0;
1064}
1065static inline u32 sim_event_ring_status__init_v(void)
1066{
1067 return 0x00000000;
1068}
1069static inline u32 sim_event_ring_status__init_f(void)
1070{
1071 return 0x0;
1072}
1073static inline u32 sim_event_ring_status__prod_v(void)
1074{
1075 return 0x00000000;
1076}
1077static inline u32 sim_event_ring_status__prod_f(void)
1078{
1079 return 0x0;
1080}
1081static inline u32 sim_event_ring_status_invalid_v(void)
1082{
1083 return 0x00000000;
1084}
1085static inline u32 sim_event_ring_status_invalid_f(void)
1086{
1087 return 0x0;
1088}
1089static inline u32 sim_event_ring_status_valid_v(void)
1090{
1091 return 0x00000001;
1092}
1093static inline u32 sim_event_ring_status_valid_f(void)
1094{
1095 return 0x8;
1096}
1097static inline u32 sim_event_ring_size_s(void)
1098{
1099 return 2;
1100}
1101static inline u32 sim_event_ring_size_f(u32 v)
1102{
1103 return (v & 0x3) << 4;
1104}
1105static inline u32 sim_event_ring_size_m(void)
1106{
1107 return 0x3 << 4;
1108}
1109static inline u32 sim_event_ring_size_v(u32 r)
1110{
1111 return (r >> 4) & 0x3;
1112}
1113static inline u32 sim_event_ring_size_init_v(void)
1114{
1115 return 0x00000000;
1116}
1117static inline u32 sim_event_ring_size_init_f(void)
1118{
1119 return 0x0;
1120}
1121static inline u32 sim_event_ring_size__init_v(void)
1122{
1123 return 0x00000000;
1124}
1125static inline u32 sim_event_ring_size__init_f(void)
1126{
1127 return 0x0;
1128}
1129static inline u32 sim_event_ring_size__prod_v(void)
1130{
1131 return 0x00000000;
1132}
1133static inline u32 sim_event_ring_size__prod_f(void)
1134{
1135 return 0x0;
1136}
1137static inline u32 sim_event_ring_size_4kb_v(void)
1138{
1139 return 0x00000000;
1140}
1141static inline u32 sim_event_ring_size_4kb_f(void)
1142{
1143 return 0x0;
1144}
1145static inline u32 sim_event_ring_size_8kb_v(void)
1146{
1147 return 0x00000001;
1148}
1149static inline u32 sim_event_ring_size_8kb_f(void)
1150{
1151 return 0x10;
1152}
1153static inline u32 sim_event_ring_size_12kb_v(void)
1154{
1155 return 0x00000002;
1156}
1157static inline u32 sim_event_ring_size_12kb_f(void)
1158{
1159 return 0x20;
1160}
1161static inline u32 sim_event_ring_size_16kb_v(void)
1162{
1163 return 0x00000003;
1164}
1165static inline u32 sim_event_ring_size_16kb_f(void)
1166{
1167 return 0x30;
1168}
1169static inline u32 sim_event_ring_gp_in_ring_s(void)
1170{
1171 return 1;
1172}
1173static inline u32 sim_event_ring_gp_in_ring_f(u32 v)
1174{
1175 return (v & 0x1) << 11;
1176}
1177static inline u32 sim_event_ring_gp_in_ring_m(void)
1178{
1179 return 0x1 << 11;
1180}
1181static inline u32 sim_event_ring_gp_in_ring_v(u32 r)
1182{
1183 return (r >> 11) & 0x1;
1184}
1185static inline u32 sim_event_ring_gp_in_ring__init_v(void)
1186{
1187 return 0x00000000;
1188}
1189static inline u32 sim_event_ring_gp_in_ring__init_f(void)
1190{
1191 return 0x0;
1192}
1193static inline u32 sim_event_ring_gp_in_ring__prod_v(void)
1194{
1195 return 0x00000000;
1196}
1197static inline u32 sim_event_ring_gp_in_ring__prod_f(void)
1198{
1199 return 0x0;
1200}
1201static inline u32 sim_event_ring_gp_in_ring_no_v(void)
1202{
1203 return 0x00000000;
1204}
1205static inline u32 sim_event_ring_gp_in_ring_no_f(void)
1206{
1207 return 0x0;
1208}
1209static inline u32 sim_event_ring_gp_in_ring_yes_v(void)
1210{
1211 return 0x00000001;
1212}
1213static inline u32 sim_event_ring_gp_in_ring_yes_f(void)
1214{
1215 return 0x800;
1216}
1217static inline u32 sim_event_ring_addr_lo_s(void)
1218{
1219 return 20;
1220}
1221static inline u32 sim_event_ring_addr_lo_f(u32 v)
1222{
1223 return (v & 0xfffff) << 12;
1224}
1225static inline u32 sim_event_ring_addr_lo_m(void)
1226{
1227 return 0xfffff << 12;
1228}
1229static inline u32 sim_event_ring_addr_lo_v(u32 r)
1230{
1231 return (r >> 12) & 0xfffff;
1232}
1233static inline u32 sim_event_ring_addr_lo__init_v(void)
1234{
1235 return 0x00000000;
1236}
1237static inline u32 sim_event_ring_addr_lo__init_f(void)
1238{
1239 return 0x0;
1240}
1241static inline u32 sim_event_ring_addr_lo__prod_v(void)
1242{
1243 return 0x00000000;
1244}
1245static inline u32 sim_event_ring_addr_lo__prod_f(void)
1246{
1247 return 0x0;
1248}
1249static inline u32 sim_event_ring_hi_v(void)
1250{
1251 return 0x00000034;
1252}
1253static inline u32 sim_event_ring_hi_addr_s(void)
1254{
1255 return 20;
1256}
1257static inline u32 sim_event_ring_hi_addr_f(u32 v)
1258{
1259 return (v & 0xfffff) << 0;
1260}
1261static inline u32 sim_event_ring_hi_addr_m(void)
1262{
1263 return 0xfffff << 0;
1264}
1265static inline u32 sim_event_ring_hi_addr_v(u32 r)
1266{
1267 return (r >> 0) & 0xfffff;
1268}
1269static inline u32 sim_event_ring_hi_addr__init_v(void)
1270{
1271 return 0x00000000;
1272}
1273static inline u32 sim_event_ring_hi_addr__init_f(void)
1274{
1275 return 0x0;
1276}
1277static inline u32 sim_event_ring_hi_addr__prod_v(void)
1278{
1279 return 0x00000000;
1280}
1281static inline u32 sim_event_ring_hi_addr__prod_f(void)
1282{
1283 return 0x0;
1284}
1285static inline u32 sim_event_put_r(void)
1286{
1287 return 0x00000038;
1288}
1289static inline u32 sim_event_put_pointer_s(void)
1290{
1291 return 30;
1292}
1293static inline u32 sim_event_put_pointer_f(u32 v)
1294{
1295 return (v & 0x3fffffff) << 2;
1296}
1297static inline u32 sim_event_put_pointer_m(void)
1298{
1299 return 0x3fffffff << 2;
1300}
1301static inline u32 sim_event_put_pointer_v(u32 r)
1302{
1303 return (r >> 2) & 0x3fffffff;
1304}
1305static inline u32 sim_event_get_r(void)
1306{
1307 return 0x0000003c;
1308}
1309static inline u32 sim_event_get_pointer_s(void)
1310{
1311 return 30;
1312}
1313static inline u32 sim_event_get_pointer_f(u32 v)
1314{
1315 return (v & 0x3fffffff) << 2;
1316}
1317static inline u32 sim_event_get_pointer_m(void)
1318{
1319 return 0x3fffffff << 2;
1320}
1321static inline u32 sim_event_get_pointer_v(u32 r)
1322{
1323 return (r >> 2) & 0x3fffffff;
1324}
1325static inline u32 sim_status_r(void)
1326{
1327 return 0x00000028;
1328}
1329static inline u32 sim_status_send_put_s(void)
1330{
1331 return 1;
1332}
1333static inline u32 sim_status_send_put_f(u32 v)
1334{
1335 return (v & 0x1) << 0;
1336}
1337static inline u32 sim_status_send_put_m(void)
1338{
1339 return 0x1 << 0;
1340}
1341static inline u32 sim_status_send_put_v(u32 r)
1342{
1343 return (r >> 0) & 0x1;
1344}
1345static inline u32 sim_status_send_put__init_v(void)
1346{
1347 return 0x00000000;
1348}
1349static inline u32 sim_status_send_put__init_f(void)
1350{
1351 return 0x0;
1352}
1353static inline u32 sim_status_send_put_idle_v(void)
1354{
1355 return 0x00000000;
1356}
1357static inline u32 sim_status_send_put_idle_f(void)
1358{
1359 return 0x0;
1360}
1361static inline u32 sim_status_send_put_pending_v(void)
1362{
1363 return 0x00000001;
1364}
1365static inline u32 sim_status_send_put_pending_f(void)
1366{
1367 return 0x1;
1368}
1369static inline u32 sim_status_send_get_s(void)
1370{
1371 return 1;
1372}
1373static inline u32 sim_status_send_get_f(u32 v)
1374{
1375 return (v & 0x1) << 1;
1376}
1377static inline u32 sim_status_send_get_m(void)
1378{
1379 return 0x1 << 1;
1380}
1381static inline u32 sim_status_send_get_v(u32 r)
1382{
1383 return (r >> 1) & 0x1;
1384}
1385static inline u32 sim_status_send_get__init_v(void)
1386{
1387 return 0x00000000;
1388}
1389static inline u32 sim_status_send_get__init_f(void)
1390{
1391 return 0x0;
1392}
1393static inline u32 sim_status_send_get_idle_v(void)
1394{
1395 return 0x00000000;
1396}
1397static inline u32 sim_status_send_get_idle_f(void)
1398{
1399 return 0x0;
1400}
1401static inline u32 sim_status_send_get_pending_v(void)
1402{
1403 return 0x00000001;
1404}
1405static inline u32 sim_status_send_get_pending_f(void)
1406{
1407 return 0x2;
1408}
1409static inline u32 sim_status_send_get_clear_v(void)
1410{
1411 return 0x00000001;
1412}
1413static inline u32 sim_status_send_get_clear_f(void)
1414{
1415 return 0x2;
1416}
1417static inline u32 sim_status_recv_put_s(void)
1418{
1419 return 1;
1420}
1421static inline u32 sim_status_recv_put_f(u32 v)
1422{
1423 return (v & 0x1) << 2;
1424}
1425static inline u32 sim_status_recv_put_m(void)
1426{
1427 return 0x1 << 2;
1428}
1429static inline u32 sim_status_recv_put_v(u32 r)
1430{
1431 return (r >> 2) & 0x1;
1432}
1433static inline u32 sim_status_recv_put__init_v(void)
1434{
1435 return 0x00000000;
1436}
1437static inline u32 sim_status_recv_put__init_f(void)
1438{
1439 return 0x0;
1440}
1441static inline u32 sim_status_recv_put_idle_v(void)
1442{
1443 return 0x00000000;
1444}
1445static inline u32 sim_status_recv_put_idle_f(void)
1446{
1447 return 0x0;
1448}
1449static inline u32 sim_status_recv_put_pending_v(void)
1450{
1451 return 0x00000001;
1452}
1453static inline u32 sim_status_recv_put_pending_f(void)
1454{
1455 return 0x4;
1456}
1457static inline u32 sim_status_recv_put_clear_v(void)
1458{
1459 return 0x00000001;
1460}
1461static inline u32 sim_status_recv_put_clear_f(void)
1462{
1463 return 0x4;
1464}
1465static inline u32 sim_status_recv_get_s(void)
1466{
1467 return 1;
1468}
1469static inline u32 sim_status_recv_get_f(u32 v)
1470{
1471 return (v & 0x1) << 3;
1472}
1473static inline u32 sim_status_recv_get_m(void)
1474{
1475 return 0x1 << 3;
1476}
1477static inline u32 sim_status_recv_get_v(u32 r)
1478{
1479 return (r >> 3) & 0x1;
1480}
1481static inline u32 sim_status_recv_get__init_v(void)
1482{
1483 return 0x00000000;
1484}
1485static inline u32 sim_status_recv_get__init_f(void)
1486{
1487 return 0x0;
1488}
1489static inline u32 sim_status_recv_get_idle_v(void)
1490{
1491 return 0x00000000;
1492}
1493static inline u32 sim_status_recv_get_idle_f(void)
1494{
1495 return 0x0;
1496}
1497static inline u32 sim_status_recv_get_pending_v(void)
1498{
1499 return 0x00000001;
1500}
1501static inline u32 sim_status_recv_get_pending_f(void)
1502{
1503 return 0x8;
1504}
1505static inline u32 sim_status_event_put_s(void)
1506{
1507 return 1;
1508}
1509static inline u32 sim_status_event_put_f(u32 v)
1510{
1511 return (v & 0x1) << 4;
1512}
1513static inline u32 sim_status_event_put_m(void)
1514{
1515 return 0x1 << 4;
1516}
1517static inline u32 sim_status_event_put_v(u32 r)
1518{
1519 return (r >> 4) & 0x1;
1520}
1521static inline u32 sim_status_event_put__init_v(void)
1522{
1523 return 0x00000000;
1524}
1525static inline u32 sim_status_event_put__init_f(void)
1526{
1527 return 0x0;
1528}
1529static inline u32 sim_status_event_put_idle_v(void)
1530{
1531 return 0x00000000;
1532}
1533static inline u32 sim_status_event_put_idle_f(void)
1534{
1535 return 0x0;
1536}
1537static inline u32 sim_status_event_put_pending_v(void)
1538{
1539 return 0x00000001;
1540}
1541static inline u32 sim_status_event_put_pending_f(void)
1542{
1543 return 0x10;
1544}
1545static inline u32 sim_status_event_put_clear_v(void)
1546{
1547 return 0x00000001;
1548}
1549static inline u32 sim_status_event_put_clear_f(void)
1550{
1551 return 0x10;
1552}
1553static inline u32 sim_status_event_get_s(void)
1554{
1555 return 1;
1556}
1557static inline u32 sim_status_event_get_f(u32 v)
1558{
1559 return (v & 0x1) << 5;
1560}
1561static inline u32 sim_status_event_get_m(void)
1562{
1563 return 0x1 << 5;
1564}
1565static inline u32 sim_status_event_get_v(u32 r)
1566{
1567 return (r >> 5) & 0x1;
1568}
1569static inline u32 sim_status_event_get__init_v(void)
1570{
1571 return 0x00000000;
1572}
1573static inline u32 sim_status_event_get__init_f(void)
1574{
1575 return 0x0;
1576}
1577static inline u32 sim_status_event_get_idle_v(void)
1578{
1579 return 0x00000000;
1580}
1581static inline u32 sim_status_event_get_idle_f(void)
1582{
1583 return 0x0;
1584}
1585static inline u32 sim_status_event_get_pending_v(void)
1586{
1587 return 0x00000001;
1588}
1589static inline u32 sim_status_event_get_pending_f(void)
1590{
1591 return 0x20;
1592}
1593static inline u32 sim_control_r(void)
1594{
1595 return 0x0000002c;
1596}
1597static inline u32 sim_control_send_put_s(void)
1598{
1599 return 1;
1600}
1601static inline u32 sim_control_send_put_f(u32 v)
1602{
1603 return (v & 0x1) << 0;
1604}
1605static inline u32 sim_control_send_put_m(void)
1606{
1607 return 0x1 << 0;
1608}
1609static inline u32 sim_control_send_put_v(u32 r)
1610{
1611 return (r >> 0) & 0x1;
1612}
1613static inline u32 sim_control_send_put__init_v(void)
1614{
1615 return 0x00000000;
1616}
1617static inline u32 sim_control_send_put__init_f(void)
1618{
1619 return 0x0;
1620}
1621static inline u32 sim_control_send_put_disabled_v(void)
1622{
1623 return 0x00000000;
1624}
1625static inline u32 sim_control_send_put_disabled_f(void)
1626{
1627 return 0x0;
1628}
1629static inline u32 sim_control_send_put_enabled_v(void)
1630{
1631 return 0x00000001;
1632}
1633static inline u32 sim_control_send_put_enabled_f(void)
1634{
1635 return 0x1;
1636}
1637static inline u32 sim_control_send_get_s(void)
1638{
1639 return 1;
1640}
1641static inline u32 sim_control_send_get_f(u32 v)
1642{
1643 return (v & 0x1) << 1;
1644}
1645static inline u32 sim_control_send_get_m(void)
1646{
1647 return 0x1 << 1;
1648}
1649static inline u32 sim_control_send_get_v(u32 r)
1650{
1651 return (r >> 1) & 0x1;
1652}
1653static inline u32 sim_control_send_get__init_v(void)
1654{
1655 return 0x00000000;
1656}
1657static inline u32 sim_control_send_get__init_f(void)
1658{
1659 return 0x0;
1660}
1661static inline u32 sim_control_send_get_disabled_v(void)
1662{
1663 return 0x00000000;
1664}
1665static inline u32 sim_control_send_get_disabled_f(void)
1666{
1667 return 0x0;
1668}
1669static inline u32 sim_control_send_get_enabled_v(void)
1670{
1671 return 0x00000001;
1672}
1673static inline u32 sim_control_send_get_enabled_f(void)
1674{
1675 return 0x2;
1676}
1677static inline u32 sim_control_recv_put_s(void)
1678{
1679 return 1;
1680}
1681static inline u32 sim_control_recv_put_f(u32 v)
1682{
1683 return (v & 0x1) << 2;
1684}
1685static inline u32 sim_control_recv_put_m(void)
1686{
1687 return 0x1 << 2;
1688}
1689static inline u32 sim_control_recv_put_v(u32 r)
1690{
1691 return (r >> 2) & 0x1;
1692}
1693static inline u32 sim_control_recv_put__init_v(void)
1694{
1695 return 0x00000000;
1696}
1697static inline u32 sim_control_recv_put__init_f(void)
1698{
1699 return 0x0;
1700}
1701static inline u32 sim_control_recv_put_disabled_v(void)
1702{
1703 return 0x00000000;
1704}
1705static inline u32 sim_control_recv_put_disabled_f(void)
1706{
1707 return 0x0;
1708}
1709static inline u32 sim_control_recv_put_enabled_v(void)
1710{
1711 return 0x00000001;
1712}
1713static inline u32 sim_control_recv_put_enabled_f(void)
1714{
1715 return 0x4;
1716}
1717static inline u32 sim_control_recv_get_s(void)
1718{
1719 return 1;
1720}
1721static inline u32 sim_control_recv_get_f(u32 v)
1722{
1723 return (v & 0x1) << 3;
1724}
1725static inline u32 sim_control_recv_get_m(void)
1726{
1727 return 0x1 << 3;
1728}
1729static inline u32 sim_control_recv_get_v(u32 r)
1730{
1731 return (r >> 3) & 0x1;
1732}
1733static inline u32 sim_control_recv_get__init_v(void)
1734{
1735 return 0x00000000;
1736}
1737static inline u32 sim_control_recv_get__init_f(void)
1738{
1739 return 0x0;
1740}
1741static inline u32 sim_control_recv_get_disabled_v(void)
1742{
1743 return 0x00000000;
1744}
1745static inline u32 sim_control_recv_get_disabled_f(void)
1746{
1747 return 0x0;
1748}
1749static inline u32 sim_control_recv_get_enabled_v(void)
1750{
1751 return 0x00000001;
1752}
1753static inline u32 sim_control_recv_get_enabled_f(void)
1754{
1755 return 0x8;
1756}
1757static inline u32 sim_control_event_put_s(void)
1758{
1759 return 1;
1760}
1761static inline u32 sim_control_event_put_f(u32 v)
1762{
1763 return (v & 0x1) << 4;
1764}
1765static inline u32 sim_control_event_put_m(void)
1766{
1767 return 0x1 << 4;
1768}
1769static inline u32 sim_control_event_put_v(u32 r)
1770{
1771 return (r >> 4) & 0x1;
1772}
1773static inline u32 sim_control_event_put__init_v(void)
1774{
1775 return 0x00000000;
1776}
1777static inline u32 sim_control_event_put__init_f(void)
1778{
1779 return 0x0;
1780}
1781static inline u32 sim_control_event_put_disabled_v(void)
1782{
1783 return 0x00000000;
1784}
1785static inline u32 sim_control_event_put_disabled_f(void)
1786{
1787 return 0x0;
1788}
1789static inline u32 sim_control_event_put_enabled_v(void)
1790{
1791 return 0x00000001;
1792}
1793static inline u32 sim_control_event_put_enabled_f(void)
1794{
1795 return 0x10;
1796}
1797static inline u32 sim_control_event_get_s(void)
1798{
1799 return 1;
1800}
1801static inline u32 sim_control_event_get_f(u32 v)
1802{
1803 return (v & 0x1) << 5;
1804}
1805static inline u32 sim_control_event_get_m(void)
1806{
1807 return 0x1 << 5;
1808}
1809static inline u32 sim_control_event_get_v(u32 r)
1810{
1811 return (r >> 5) & 0x1;
1812}
1813static inline u32 sim_control_event_get__init_v(void)
1814{
1815 return 0x00000000;
1816}
1817static inline u32 sim_control_event_get__init_f(void)
1818{
1819 return 0x0;
1820}
1821static inline u32 sim_control_event_get_disabled_v(void)
1822{
1823 return 0x00000000;
1824}
1825static inline u32 sim_control_event_get_disabled_f(void)
1826{
1827 return 0x0;
1828}
1829static inline u32 sim_control_event_get_enabled_v(void)
1830{
1831 return 0x00000001;
1832}
1833static inline u32 sim_control_event_get_enabled_f(void)
1834{
1835 return 0x20;
1836}
1837static inline u32 sim_dma_r(void)
1838{
1839 return 0x00000000;
1840}
1841static inline u32 sim_dma_target_s(void)
1842{
1843 return 2;
1844}
1845static inline u32 sim_dma_target_f(u32 v)
1846{
1847 return (v & 0x3) << 0;
1848}
1849static inline u32 sim_dma_target_m(void)
1850{
1851 return 0x3 << 0;
1852}
1853static inline u32 sim_dma_target_v(u32 r)
1854{
1855 return (r >> 0) & 0x3;
1856}
1857static inline u32 sim_dma_target_phys_init_v(void)
1858{
1859 return 0x00000001;
1860}
1861static inline u32 sim_dma_target_phys_init_f(void)
1862{
1863 return 0x1;
1864}
1865static inline u32 sim_dma_target_phys__init_v(void)
1866{
1867 return 0x00000001;
1868}
1869static inline u32 sim_dma_target_phys__init_f(void)
1870{
1871 return 0x1;
1872}
1873static inline u32 sim_dma_target_phys__prod_v(void)
1874{
1875 return 0x00000001;
1876}
1877static inline u32 sim_dma_target_phys__prod_f(void)
1878{
1879 return 0x1;
1880}
1881static inline u32 sim_dma_target_phys_nvm_v(void)
1882{
1883 return 0x00000001;
1884}
1885static inline u32 sim_dma_target_phys_nvm_f(void)
1886{
1887 return 0x1;
1888}
1889static inline u32 sim_dma_target_phys_pci_v(void)
1890{
1891 return 0x00000002;
1892}
1893static inline u32 sim_dma_target_phys_pci_f(void)
1894{
1895 return 0x2;
1896}
1897static inline u32 sim_dma_target_phys_pci_coherent_v(void)
1898{
1899 return 0x00000003;
1900}
1901static inline u32 sim_dma_target_phys_pci_coherent_f(void)
1902{
1903 return 0x3;
1904}
1905static inline u32 sim_dma_status_s(void)
1906{
1907 return 1;
1908}
1909static inline u32 sim_dma_status_f(u32 v)
1910{
1911 return (v & 0x1) << 3;
1912}
1913static inline u32 sim_dma_status_m(void)
1914{
1915 return 0x1 << 3;
1916}
1917static inline u32 sim_dma_status_v(u32 r)
1918{
1919 return (r >> 3) & 0x1;
1920}
1921static inline u32 sim_dma_status_init_v(void)
1922{
1923 return 0x00000000;
1924}
1925static inline u32 sim_dma_status_init_f(void)
1926{
1927 return 0x0;
1928}
1929static inline u32 sim_dma_status__init_v(void)
1930{
1931 return 0x00000000;
1932}
1933static inline u32 sim_dma_status__init_f(void)
1934{
1935 return 0x0;
1936}
1937static inline u32 sim_dma_status__prod_v(void)
1938{
1939 return 0x00000000;
1940}
1941static inline u32 sim_dma_status__prod_f(void)
1942{
1943 return 0x0;
1944}
1945static inline u32 sim_dma_status_invalid_v(void)
1946{
1947 return 0x00000000;
1948}
1949static inline u32 sim_dma_status_invalid_f(void)
1950{
1951 return 0x0;
1952}
1953static inline u32 sim_dma_status_valid_v(void)
1954{
1955 return 0x00000001;
1956}
1957static inline u32 sim_dma_status_valid_f(void)
1958{
1959 return 0x8;
1960}
1961static inline u32 sim_dma_size_s(void)
1962{
1963 return 2;
1964}
1965static inline u32 sim_dma_size_f(u32 v)
1966{
1967 return (v & 0x3) << 4;
1968}
1969static inline u32 sim_dma_size_m(void)
1970{
1971 return 0x3 << 4;
1972}
1973static inline u32 sim_dma_size_v(u32 r)
1974{
1975 return (r >> 4) & 0x3;
1976}
1977static inline u32 sim_dma_size_init_v(void)
1978{
1979 return 0x00000000;
1980}
1981static inline u32 sim_dma_size_init_f(void)
1982{
1983 return 0x0;
1984}
1985static inline u32 sim_dma_size__init_v(void)
1986{
1987 return 0x00000000;
1988}
1989static inline u32 sim_dma_size__init_f(void)
1990{
1991 return 0x0;
1992}
1993static inline u32 sim_dma_size__prod_v(void)
1994{
1995 return 0x00000000;
1996}
1997static inline u32 sim_dma_size__prod_f(void)
1998{
1999 return 0x0;
2000}
2001static inline u32 sim_dma_size_4kb_v(void)
2002{
2003 return 0x00000000;
2004}
2005static inline u32 sim_dma_size_4kb_f(void)
2006{
2007 return 0x0;
2008}
2009static inline u32 sim_dma_size_8kb_v(void)
2010{
2011 return 0x00000001;
2012}
2013static inline u32 sim_dma_size_8kb_f(void)
2014{
2015 return 0x10;
2016}
2017static inline u32 sim_dma_size_12kb_v(void)
2018{
2019 return 0x00000002;
2020}
2021static inline u32 sim_dma_size_12kb_f(void)
2022{
2023 return 0x20;
2024}
2025static inline u32 sim_dma_size_16kb_v(void)
2026{
2027 return 0x00000003;
2028}
2029static inline u32 sim_dma_size_16kb_f(void)
2030{
2031 return 0x30;
2032}
2033static inline u32 sim_dma_addr_lo_s(void)
2034{
2035 return 20;
2036}
2037static inline u32 sim_dma_addr_lo_f(u32 v)
2038{
2039 return (v & 0xfffff) << 12;
2040}
2041static inline u32 sim_dma_addr_lo_m(void)
2042{
2043 return 0xfffff << 12;
2044}
2045static inline u32 sim_dma_addr_lo_v(u32 r)
2046{
2047 return (r >> 12) & 0xfffff;
2048}
2049static inline u32 sim_dma_addr_lo__init_v(void)
2050{
2051 return 0x00000000;
2052}
2053static inline u32 sim_dma_addr_lo__init_f(void)
2054{
2055 return 0x0;
2056}
2057static inline u32 sim_dma_addr_lo__prod_v(void)
2058{
2059 return 0x00000000;
2060}
2061static inline u32 sim_dma_addr_lo__prod_f(void)
2062{
2063 return 0x0;
2064}
2065static inline u32 sim_dma_hi_r(void)
2066{
2067 return 0x00000004;
2068}
2069static inline u32 sim_dma_hi_addr_s(void)
2070{
2071 return 20;
2072}
2073static inline u32 sim_dma_hi_addr_f(u32 v)
2074{
2075 return (v & 0xfffff) << 0;
2076}
2077static inline u32 sim_dma_hi_addr_m(void)
2078{
2079 return 0xfffff << 0;
2080}
2081static inline u32 sim_dma_hi_addr_v(u32 r)
2082{
2083 return (r >> 0) & 0xfffff;
2084}
2085static inline u32 sim_dma_hi_addr__init_v(void)
2086{
2087 return 0x00000000;
2088}
2089static inline u32 sim_dma_hi_addr__init_f(void)
2090{
2091 return 0x0;
2092}
2093static inline u32 sim_dma_hi_addr__prod_v(void)
2094{
2095 return 0x00000000;
2096}
2097static inline u32 sim_dma_hi_addr__prod_f(void)
2098{
2099 return 0x0;
2100}
2101static inline u32 sim_msg_signature_r(void)
2102{
2103 return 0x00000000;
2104}
2105static inline u32 sim_msg_signature_valid_v(void)
2106{
2107 return 0x43505256;
2108}
2109static inline u32 sim_msg_length_r(void)
2110{
2111 return 0x00000004;
2112}
2113static inline u32 sim_msg_function_r(void)
2114{
2115 return 0x00000008;
2116}
2117static inline u32 sim_msg_function_sim_escape_read_v(void)
2118{
2119 return 0x00000023;
2120}
2121static inline u32 sim_msg_function_sim_escape_write_v(void)
2122{
2123 return 0x00000024;
2124}
2125static inline u32 sim_msg_result_r(void)
2126{
2127 return 0x0000000c;
2128}
2129static inline u32 sim_msg_result_success_v(void)
2130{
2131 return 0x00000000;
2132}
2133static inline u32 sim_msg_result_rpc_pending_v(void)
2134{
2135 return 0xFFFFFFFF;
2136}
2137static inline u32 sim_msg_sequence_r(void)
2138{
2139 return 0x00000010;
2140}
2141static inline u32 sim_msg_spare_r(void)
2142{
2143 return 0x00000014;
2144}
2145static inline u32 sim_msg_spare__init_v(void)
2146{
2147 return 0x00000000;
2148}
2149
2150#endif /* __hw_sim_gk20a_h__ */
diff --git a/drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h
new file mode 100644
index 00000000..5d6397b4
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h
@@ -0,0 +1,225 @@
1/*
2 * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_therm_gk20a_h_
51#define _hw_therm_gk20a_h_
52
53static inline u32 therm_use_a_r(void)
54{
55 return 0x00020798;
56}
57static inline u32 therm_evt_ext_therm_0_r(void)
58{
59 return 0x00020700;
60}
61static inline u32 therm_evt_ext_therm_1_r(void)
62{
63 return 0x00020704;
64}
65static inline u32 therm_evt_ext_therm_2_r(void)
66{
67 return 0x00020708;
68}
69static inline u32 therm_evt_ba_w0_t1h_r(void)
70{
71 return 0x00020750;
72}
73static inline u32 therm_weight_1_r(void)
74{
75 return 0x00020024;
76}
77static inline u32 therm_peakpower_config1_r(u32 i)
78{
79 return 0x00020154 + i*4;
80}
81static inline u32 therm_peakpower_config1_window_period_2m_v(void)
82{
83 return 0x0000000f;
84}
85static inline u32 therm_peakpower_config1_window_period_2m_f(void)
86{
87 return 0xf;
88}
89static inline u32 therm_peakpower_config1_ba_sum_shift_s(void)
90{
91 return 6;
92}
93static inline u32 therm_peakpower_config1_ba_sum_shift_f(u32 v)
94{
95 return (v & 0x3f) << 8;
96}
97static inline u32 therm_peakpower_config1_ba_sum_shift_m(void)
98{
99 return 0x3f << 8;
100}
101static inline u32 therm_peakpower_config1_ba_sum_shift_v(u32 r)
102{
103 return (r >> 8) & 0x3f;
104}
105static inline u32 therm_peakpower_config1_ba_sum_shift_20_f(void)
106{
107 return 0x1400;
108}
109static inline u32 therm_peakpower_config1_window_en_enabled_f(void)
110{
111 return 0x80000000;
112}
113static inline u32 therm_peakpower_config2_r(u32 i)
114{
115 return 0x00020170 + i*4;
116}
117static inline u32 therm_peakpower_config4_r(u32 i)
118{
119 return 0x000201c0 + i*4;
120}
121static inline u32 therm_peakpower_config6_r(u32 i)
122{
123 return 0x00020270 + i*4;
124}
125static inline u32 therm_peakpower_config8_r(u32 i)
126{
127 return 0x000202e8 + i*4;
128}
129static inline u32 therm_peakpower_config9_r(u32 i)
130{
131 return 0x000202f4 + i*4;
132}
133static inline u32 therm_config1_r(void)
134{
135 return 0x00020050;
136}
137static inline u32 therm_gate_ctrl_r(u32 i)
138{
139 return 0x00020200 + i*4;
140}
141static inline u32 therm_gate_ctrl_eng_clk_m(void)
142{
143 return 0x3 << 0;
144}
145static inline u32 therm_gate_ctrl_eng_clk_run_f(void)
146{
147 return 0x0;
148}
149static inline u32 therm_gate_ctrl_eng_clk_auto_f(void)
150{
151 return 0x1;
152}
153static inline u32 therm_gate_ctrl_eng_clk_stop_f(void)
154{
155 return 0x2;
156}
157static inline u32 therm_gate_ctrl_blk_clk_m(void)
158{
159 return 0x3 << 2;
160}
161static inline u32 therm_gate_ctrl_blk_clk_run_f(void)
162{
163 return 0x0;
164}
165static inline u32 therm_gate_ctrl_blk_clk_auto_f(void)
166{
167 return 0x4;
168}
169static inline u32 therm_gate_ctrl_eng_pwr_m(void)
170{
171 return 0x3 << 4;
172}
173static inline u32 therm_gate_ctrl_eng_pwr_auto_f(void)
174{
175 return 0x10;
176}
177static inline u32 therm_gate_ctrl_eng_pwr_off_v(void)
178{
179 return 0x00000002;
180}
181static inline u32 therm_gate_ctrl_eng_pwr_off_f(void)
182{
183 return 0x20;
184}
185static inline u32 therm_gate_ctrl_eng_idle_filt_exp_f(u32 v)
186{
187 return (v & 0x1f) << 8;
188}
189static inline u32 therm_gate_ctrl_eng_idle_filt_exp_m(void)
190{
191 return 0x1f << 8;
192}
193static inline u32 therm_gate_ctrl_eng_idle_filt_mant_f(u32 v)
194{
195 return (v & 0x7) << 13;
196}
197static inline u32 therm_gate_ctrl_eng_idle_filt_mant_m(void)
198{
199 return 0x7 << 13;
200}
201static inline u32 therm_gate_ctrl_eng_delay_after_f(u32 v)
202{
203 return (v & 0xf) << 20;
204}
205static inline u32 therm_gate_ctrl_eng_delay_after_m(void)
206{
207 return 0xf << 20;
208}
209static inline u32 therm_fecs_idle_filter_r(void)
210{
211 return 0x00020288;
212}
213static inline u32 therm_fecs_idle_filter_value_m(void)
214{
215 return 0xffffffff << 0;
216}
217static inline u32 therm_hubmmu_idle_filter_r(void)
218{
219 return 0x0002028c;
220}
221static inline u32 therm_hubmmu_idle_filter_value_m(void)
222{
223 return 0xffffffff << 0;
224}
225#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h
new file mode 100644
index 00000000..22bc50ac
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h
@@ -0,0 +1,101 @@
1/*
2 * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_timer_gk20a_h_
51#define _hw_timer_gk20a_h_
52
53static inline u32 timer_pri_timeout_r(void)
54{
55 return 0x00009080;
56}
57static inline u32 timer_pri_timeout_period_f(u32 v)
58{
59 return (v & 0xffffff) << 0;
60}
61static inline u32 timer_pri_timeout_period_m(void)
62{
63 return 0xffffff << 0;
64}
65static inline u32 timer_pri_timeout_period_v(u32 r)
66{
67 return (r >> 0) & 0xffffff;
68}
69static inline u32 timer_pri_timeout_en_f(u32 v)
70{
71 return (v & 0x1) << 31;
72}
73static inline u32 timer_pri_timeout_en_m(void)
74{
75 return 0x1 << 31;
76}
77static inline u32 timer_pri_timeout_en_v(u32 r)
78{
79 return (r >> 31) & 0x1;
80}
81static inline u32 timer_pri_timeout_en_en_enabled_f(void)
82{
83 return 0x80000000;
84}
85static inline u32 timer_pri_timeout_en_en_disabled_f(void)
86{
87 return 0x0;
88}
89static inline u32 timer_pri_timeout_save_0_r(void)
90{
91 return 0x00009084;
92}
93static inline u32 timer_pri_timeout_save_1_r(void)
94{
95 return 0x00009088;
96}
97static inline u32 timer_pri_timeout_fecs_errcode_r(void)
98{
99 return 0x0000908c;
100}
101#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h
new file mode 100644
index 00000000..c2922814
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h
@@ -0,0 +1,137 @@
1/*
2 * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_top_gk20a_h_
51#define _hw_top_gk20a_h_
52
53static inline u32 top_num_gpcs_r(void)
54{
55 return 0x00022430;
56}
57static inline u32 top_num_gpcs_value_v(u32 r)
58{
59 return (r >> 0) & 0x1f;
60}
61static inline u32 top_tpc_per_gpc_r(void)
62{
63 return 0x00022434;
64}
65static inline u32 top_tpc_per_gpc_value_v(u32 r)
66{
67 return (r >> 0) & 0x1f;
68}
69static inline u32 top_num_fbps_r(void)
70{
71 return 0x00022438;
72}
73static inline u32 top_num_fbps_value_v(u32 r)
74{
75 return (r >> 0) & 0x1f;
76}
77static inline u32 top_fs_status_r(void)
78{
79 return 0x00022500;
80}
81static inline u32 top_device_info_r(u32 i)
82{
83 return 0x00022700 + i*4;
84}
85static inline u32 top_device_info__size_1_v(void)
86{
87 return 0x00000040;
88}
89static inline u32 top_device_info_chain_v(u32 r)
90{
91 return (r >> 31) & 0x1;
92}
93static inline u32 top_device_info_chain_enable_v(void)
94{
95 return 0x00000001;
96}
97static inline u32 top_device_info_engine_enum_v(u32 r)
98{
99 return (r >> 26) & 0xf;
100}
101static inline u32 top_device_info_runlist_enum_v(u32 r)
102{
103 return (r >> 21) & 0xf;
104}
105static inline u32 top_device_info_type_enum_v(u32 r)
106{
107 return (r >> 2) & 0x1fffffff;
108}
109static inline u32 top_device_info_type_enum_graphics_v(void)
110{
111 return 0x00000000;
112}
113static inline u32 top_device_info_type_enum_graphics_f(void)
114{
115 return 0x0;
116}
117static inline u32 top_device_info_type_enum_copy0_v(void)
118{
119 return 0x00000001;
120}
121static inline u32 top_device_info_type_enum_copy0_f(void)
122{
123 return 0x4;
124}
125static inline u32 top_device_info_entry_v(u32 r)
126{
127 return (r >> 0) & 0x3;
128}
129static inline u32 top_device_info_entry_not_valid_v(void)
130{
131 return 0x00000000;
132}
133static inline u32 top_device_info_entry_enum_v(void)
134{
135 return 0x00000002;
136}
137#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h
new file mode 100644
index 00000000..826e9bd1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h
@@ -0,0 +1,301 @@
1/*
2 * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_trim_gk20a_h_
51#define _hw_trim_gk20a_h_
52
53static inline u32 trim_sys_gpcpll_cfg_r(void)
54{
55 return 0x00137000;
56}
57static inline u32 trim_sys_gpcpll_cfg_enable_m(void)
58{
59 return 0x1 << 0;
60}
61static inline u32 trim_sys_gpcpll_cfg_enable_v(u32 r)
62{
63 return (r >> 0) & 0x1;
64}
65static inline u32 trim_sys_gpcpll_cfg_enable_no_f(void)
66{
67 return 0x0;
68}
69static inline u32 trim_sys_gpcpll_cfg_enable_yes_f(void)
70{
71 return 0x1;
72}
73static inline u32 trim_sys_gpcpll_cfg_iddq_m(void)
74{
75 return 0x1 << 1;
76}
77static inline u32 trim_sys_gpcpll_cfg_iddq_v(u32 r)
78{
79 return (r >> 1) & 0x1;
80}
81static inline u32 trim_sys_gpcpll_cfg_iddq_power_on_v(void)
82{
83 return 0x00000000;
84}
85static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_m(void)
86{
87 return 0x1 << 4;
88}
89static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_power_on_f(void)
90{
91 return 0x0;
92}
93static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_power_off_f(void)
94{
95 return 0x10;
96}
97static inline u32 trim_sys_gpcpll_cfg_pll_lock_v(u32 r)
98{
99 return (r >> 17) & 0x1;
100}
101static inline u32 trim_sys_gpcpll_cfg_pll_lock_true_f(void)
102{
103 return 0x20000;
104}
105static inline u32 trim_sys_gpcpll_coeff_r(void)
106{
107 return 0x00137004;
108}
109static inline u32 trim_sys_gpcpll_coeff_mdiv_f(u32 v)
110{
111 return (v & 0xff) << 0;
112}
113static inline u32 trim_sys_gpcpll_coeff_mdiv_v(u32 r)
114{
115 return (r >> 0) & 0xff;
116}
117static inline u32 trim_sys_gpcpll_coeff_ndiv_f(u32 v)
118{
119 return (v & 0xff) << 8;
120}
121static inline u32 trim_sys_gpcpll_coeff_ndiv_m(void)
122{
123 return 0xff << 8;
124}
125static inline u32 trim_sys_gpcpll_coeff_ndiv_v(u32 r)
126{
127 return (r >> 8) & 0xff;
128}
129static inline u32 trim_sys_gpcpll_coeff_pldiv_f(u32 v)
130{
131 return (v & 0x3f) << 16;
132}
133static inline u32 trim_sys_gpcpll_coeff_pldiv_v(u32 r)
134{
135 return (r >> 16) & 0x3f;
136}
137static inline u32 trim_sys_sel_vco_r(void)
138{
139 return 0x00137100;
140}
141static inline u32 trim_sys_sel_vco_gpc2clk_out_m(void)
142{
143 return 0x1 << 0;
144}
145static inline u32 trim_sys_sel_vco_gpc2clk_out_init_v(void)
146{
147 return 0x00000000;
148}
149static inline u32 trim_sys_sel_vco_gpc2clk_out_init_f(void)
150{
151 return 0x0;
152}
153static inline u32 trim_sys_sel_vco_gpc2clk_out_bypass_f(void)
154{
155 return 0x0;
156}
157static inline u32 trim_sys_sel_vco_gpc2clk_out_vco_f(void)
158{
159 return 0x1;
160}
161static inline u32 trim_sys_gpc2clk_out_r(void)
162{
163 return 0x00137250;
164}
165static inline u32 trim_sys_gpc2clk_out_bypdiv_s(void)
166{
167 return 6;
168}
169static inline u32 trim_sys_gpc2clk_out_bypdiv_f(u32 v)
170{
171 return (v & 0x3f) << 0;
172}
173static inline u32 trim_sys_gpc2clk_out_bypdiv_m(void)
174{
175 return 0x3f << 0;
176}
177static inline u32 trim_sys_gpc2clk_out_bypdiv_v(u32 r)
178{
179 return (r >> 0) & 0x3f;
180}
181static inline u32 trim_sys_gpc2clk_out_bypdiv_by31_f(void)
182{
183 return 0x3c;
184}
185static inline u32 trim_sys_gpc2clk_out_vcodiv_s(void)
186{
187 return 6;
188}
189static inline u32 trim_sys_gpc2clk_out_vcodiv_f(u32 v)
190{
191 return (v & 0x3f) << 8;
192}
193static inline u32 trim_sys_gpc2clk_out_vcodiv_m(void)
194{
195 return 0x3f << 8;
196}
197static inline u32 trim_sys_gpc2clk_out_vcodiv_v(u32 r)
198{
199 return (r >> 8) & 0x3f;
200}
201static inline u32 trim_sys_gpc2clk_out_vcodiv_by1_f(void)
202{
203 return 0x0;
204}
205static inline u32 trim_sys_gpc2clk_out_sdiv14_m(void)
206{
207 return 0x1 << 31;
208}
209static inline u32 trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f(void)
210{
211 return 0x80000000;
212}
213static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_r(u32 i)
214{
215 return 0x00134124 + i*512;
216}
217static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(u32 v)
218{
219 return (v & 0x3fff) << 0;
220}
221static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f(void)
222{
223 return 0x10000;
224}
225static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f(void)
226{
227 return 0x100000;
228}
229static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f(void)
230{
231 return 0x1000000;
232}
233static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_r(u32 i)
234{
235 return 0x00134128 + i*512;
236}
237static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(u32 r)
238{
239 return (r >> 0) & 0xfffff;
240}
241static inline u32 trim_sys_gpcpll_cfg2_r(void)
242{
243 return 0x0013700c;
244}
245static inline u32 trim_sys_gpcpll_cfg2_pll_stepa_f(u32 v)
246{
247 return (v & 0xff) << 24;
248}
249static inline u32 trim_sys_gpcpll_cfg2_pll_stepa_m(void)
250{
251 return 0xff << 24;
252}
253static inline u32 trim_sys_gpcpll_cfg3_r(void)
254{
255 return 0x00137018;
256}
257static inline u32 trim_sys_gpcpll_cfg3_pll_stepb_f(u32 v)
258{
259 return (v & 0xff) << 16;
260}
261static inline u32 trim_sys_gpcpll_cfg3_pll_stepb_m(void)
262{
263 return 0xff << 16;
264}
265static inline u32 trim_sys_gpcpll_ndiv_slowdown_r(void)
266{
267 return 0x0013701c;
268}
269static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(void)
270{
271 return 0x1 << 22;
272}
273static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f(void)
274{
275 return 0x400000;
276}
277static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f(void)
278{
279 return 0x0;
280}
281static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(void)
282{
283 return 0x1 << 31;
284}
285static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f(void)
286{
287 return 0x80000000;
288}
289static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f(void)
290{
291 return 0x0;
292}
293static inline u32 trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r(void)
294{
295 return 0x001328a0;
296}
297static inline u32 trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(u32 r)
298{
299 return (r >> 24) & 0x1;
300}
301#endif
diff --git a/drivers/gpu/nvgpu/gk20a/kind_gk20a.c b/drivers/gpu/nvgpu/gk20a/kind_gk20a.c
new file mode 100644
index 00000000..b0a74056
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/kind_gk20a.c
@@ -0,0 +1,424 @@
1/*
2 * drivers/video/tegra/host/gk20a/kind_gk20a.c
3 *
4 * GK20A memory kind management
5 *
6 * Copyright (c) 2011, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21#include <linux/bitops.h>
22
23#include "hw_gmmu_gk20a.h"
24#include "kind_gk20a.h"
25
26/* TBD: generate these from kind_macros.h */
27
28/* TBD: not sure on the work creation for gk20a, doubtful */
29static inline bool gk20a_kind_work_creation_sked(u8 k)
30{
31 return false;
32}
33static inline bool gk20a_kind_work_creation_host(u8 k)
34{
35 return false;
36}
37
38static inline bool gk20a_kind_work_creation(u8 k)
39{
40 return gk20a_kind_work_creation_sked(k) ||
41 gk20a_kind_work_creation_host(k);
42}
43
44/* note: taken from the !2cs_compression case */
45static inline bool gk20a_kind_supported(u8 k)
46{
47 return gk20a_kind_work_creation(k) ||
48 (k == gmmu_pte_kind_invalid_v()) ||
49 (k == gmmu_pte_kind_pitch_v()) ||
50 (k >= gmmu_pte_kind_z16_v() &&
51 k <= gmmu_pte_kind_z16_ms8_2c_v()) ||
52 (k >= gmmu_pte_kind_z16_2z_v() &&
53 k <= gmmu_pte_kind_z16_ms8_2z_v()) ||
54 (k == gmmu_pte_kind_s8z24_v()) ||
55 (k >= gmmu_pte_kind_s8z24_2cz_v() &&
56 k <= gmmu_pte_kind_s8z24_ms8_2cz_v()) ||
57 (k >= gmmu_pte_kind_v8z24_ms4_vc12_v() &&
58 k <= gmmu_pte_kind_v8z24_ms8_vc24_v()) ||
59 (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() &&
60 k <= gmmu_pte_kind_v8z24_ms8_vc24_2zv_v()) ||
61 (k == gmmu_pte_kind_z24s8_v()) ||
62 (k >= gmmu_pte_kind_z24s8_2cz_v() &&
63 k <= gmmu_pte_kind_z24s8_ms8_2cz_v()) ||
64 (k == gmmu_pte_kind_zf32_v()) ||
65 (k >= gmmu_pte_kind_zf32_2cz_v() &&
66 k <= gmmu_pte_kind_zf32_ms8_2cz_v()) ||
67 (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v() &&
68 k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v()) ||
69 (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cszv_v() &&
70 k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v()) ||
71 (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cszv_v() &&
72 k <= gmmu_pte_kind_zf32_x24s8_v()) ||
73 (k >= gmmu_pte_kind_zf32_x24s8_2cszv_v() &&
74 k <= gmmu_pte_kind_zf32_x24s8_ms8_2cszv_v()) ||
75 (k == gmmu_pte_kind_generic_16bx2_v()) ||
76 (k == gmmu_pte_kind_c32_2c_v()) ||
77 (k == gmmu_pte_kind_c32_2cra_v()) ||
78 (k == gmmu_pte_kind_c32_ms2_2c_v()) ||
79 (k == gmmu_pte_kind_c32_ms2_2cra_v()) ||
80 (k >= gmmu_pte_kind_c32_ms4_2c_v() &&
81 k <= gmmu_pte_kind_c32_ms4_2cbr_v()) ||
82 (k >= gmmu_pte_kind_c32_ms4_2cra_v() &&
83 k <= gmmu_pte_kind_c64_2c_v()) ||
84 (k == gmmu_pte_kind_c64_2cra_v()) ||
85 (k == gmmu_pte_kind_c64_ms2_2c_v()) ||
86 (k == gmmu_pte_kind_c64_ms2_2cra_v()) ||
87 (k >= gmmu_pte_kind_c64_ms4_2c_v() &&
88 k <= gmmu_pte_kind_c64_ms4_2cbr_v()) ||
89 (k >= gmmu_pte_kind_c64_ms4_2cra_v() &&
90 k <= gmmu_pte_kind_c128_ms8_ms16_2cr_v()) ||
91 (k == gmmu_pte_kind_pitch_no_swizzle_v());
92 }
93
94static inline bool gk20a_kind_z(u8 k)
95{
96 return (k >= gmmu_pte_kind_z16_v() &&
97 k <= gmmu_pte_kind_v8z24_ms8_vc24_v()) ||
98 (k >= gmmu_pte_kind_v8z24_ms4_vc12_1zv_v() &&
99 k <= gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()) ||
100 (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() &&
101 k <= gmmu_pte_kind_z24v8_ms8_vc24_v()) ||
102 (k >= gmmu_pte_kind_z24v8_ms4_vc12_1zv_v() &&
103 k <= gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()) ||
104 (k >= gmmu_pte_kind_z24v8_ms4_vc12_2czv_v() &&
105 k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v()) ||
106 (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1zv_v() &&
107 k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v()) ||
108 (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1zv_v() &&
109 k <= gmmu_pte_kind_zf32_x24s8_ms16_1cs_v())
110 /* ||
111 (k >= gmmu_pte_kind_zv32_x24s8_2cszv_v() &&
112 k <= gmmu_pte_kind_xf32_x24s8_ms16_2cs_v())*/;
113}
114
115static inline bool gk20a_kind_c(u8 k)
116{
117 return gk20a_kind_work_creation(k) ||
118 (k == gmmu_pte_kind_pitch_v()) ||
119 (k == gmmu_pte_kind_generic_16bx2_v()) ||
120 (k >= gmmu_pte_kind_c32_2c_v() &&
121 k <= gmmu_pte_kind_c32_ms2_2cbr_v()) ||
122 (k == gmmu_pte_kind_c32_ms2_2cra_v()) ||
123 (k >= gmmu_pte_kind_c32_ms4_2c_v() &&
124 k <= gmmu_pte_kind_c64_ms2_2cbr_v()) ||
125 (k == gmmu_pte_kind_c64_ms2_2cra_v()) ||
126 (k >= gmmu_pte_kind_c64_ms4_2c_v() &&
127 k <= gmmu_pte_kind_pitch_no_swizzle_v());
128}
129
130static inline bool gk20a_kind_compressible(u8 k)
131{
132 return (k >= gmmu_pte_kind_z16_2c_v() &&
133 k <= gmmu_pte_kind_z16_ms16_4cz_v()) ||
134 (k >= gmmu_pte_kind_s8z24_1z_v() &&
135 k <= gmmu_pte_kind_s8z24_ms16_4cszv_v()) ||
136 (k >= gmmu_pte_kind_v8z24_ms4_vc12_1zv_v() &&
137 k <= gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()) ||
138 (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() &&
139 k <= gmmu_pte_kind_v8z24_ms8_vc24_4cszv_v()) ||
140 (k >= gmmu_pte_kind_z24s8_1z_v() &&
141 k <= gmmu_pte_kind_z24s8_ms16_4cszv_v()) ||
142 (k >= gmmu_pte_kind_z24v8_ms4_vc12_1zv_v() &&
143 k <= gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()) ||
144 (k >= gmmu_pte_kind_z24v8_ms4_vc12_2czv_v() &&
145 k <= gmmu_pte_kind_z24v8_ms8_vc24_4cszv_v()) ||
146 (k >= gmmu_pte_kind_zf32_1z_v() &&
147 k <= gmmu_pte_kind_zf32_ms16_2cz_v()) ||
148 (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1cs_v() &&
149 k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v()) ||
150 (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1zv_v() &&
151 k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v()) ||
152 (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1cs_v() &&
153 k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v()) ||
154 (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1zv_v() &&
155 k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v()) ||
156 (k >= gmmu_pte_kind_zf32_x24s8_1cs_v() &&
157 k <= gmmu_pte_kind_zf32_x24s8_ms16_1cs_v()) ||
158 (k >= gmmu_pte_kind_zf32_x24s8_2cszv_v() &&
159 k <= gmmu_pte_kind_c32_ms2_2cbr_v()) ||
160 (k == gmmu_pte_kind_c32_ms2_2cra_v()) ||
161 (k >= gmmu_pte_kind_c32_ms4_2c_v() &&
162 k <= gmmu_pte_kind_c64_ms2_2cbr_v()) ||
163 (k == gmmu_pte_kind_c64_ms2_2cra_v()) ||
164 (k >= gmmu_pte_kind_c64_ms4_2c_v() &&
165 k <= gmmu_pte_kind_c128_ms8_ms16_2cr_v());
166}
167
168static inline bool gk20a_kind_zbc(u8 k)
169{
170 return (k >= gmmu_pte_kind_z16_2c_v() &&
171 k <= gmmu_pte_kind_z16_ms16_2c_v()) ||
172 (k >= gmmu_pte_kind_z16_4cz_v() &&
173 k <= gmmu_pte_kind_z16_ms16_4cz_v()) ||
174 (k >= gmmu_pte_kind_s8z24_2cz_v() &&
175 k <= gmmu_pte_kind_s8z24_ms16_4cszv_v()) ||
176 (k >= gmmu_pte_kind_v8z24_ms4_vc12_2cs_v() &&
177 k <= gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()) ||
178 (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() &&
179 k <= gmmu_pte_kind_v8z24_ms8_vc24_2czv_v()) ||
180 (k >= gmmu_pte_kind_v8z24_ms4_vc12_4cszv_v() &&
181 k <= gmmu_pte_kind_v8z24_ms8_vc24_4cszv_v()) ||
182 (k >= gmmu_pte_kind_z24s8_2cs_v() &&
183 k <= gmmu_pte_kind_z24s8_ms16_4cszv_v()) ||
184 (k >= gmmu_pte_kind_z24v8_ms4_vc12_2cs_v() &&
185 k <= gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()) ||
186 (k >= gmmu_pte_kind_z24v8_ms4_vc12_2czv_v() &&
187 k <= gmmu_pte_kind_z24v8_ms8_vc24_2czv_v()) ||
188 (k >= gmmu_pte_kind_z24v8_ms4_vc12_4cszv_v() &&
189 k <= gmmu_pte_kind_z24v8_ms8_vc24_4cszv_v()) ||
190 (k >= gmmu_pte_kind_zf32_2cs_v() &&
191 k <= gmmu_pte_kind_zf32_ms16_2cz_v()) ||
192 (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1cs_v() &&
193 k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v()) ||
194 (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1czv_v() &&
195 k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v()) ||
196 (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1cs_v() &&
197 k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v()) ||
198 (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1czv_v() &&
199 k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v()) ||
200 (k >= gmmu_pte_kind_zf32_x24s8_1cs_v() &&
201 k <= gmmu_pte_kind_zf32_x24s8_ms16_1cs_v()) ||
202 (k >= gmmu_pte_kind_zf32_x24s8_2cszv_v() &&
203 k <= gmmu_pte_kind_c32_2cra_v()) ||
204 (k >= gmmu_pte_kind_c32_ms2_2c_v() &&
205 k <= gmmu_pte_kind_c32_ms2_2cbr_v()) ||
206 (k == gmmu_pte_kind_c32_ms2_2cra_v()) ||
207 (k >= gmmu_pte_kind_c32_ms4_2c_v() &&
208 k <= gmmu_pte_kind_c32_ms4_2cra_v()) ||
209 (k >= gmmu_pte_kind_c32_ms8_ms16_2c_v() &&
210 k <= gmmu_pte_kind_c64_2cra_v()) ||
211 (k >= gmmu_pte_kind_c64_ms2_2c_v() &&
212 k <= gmmu_pte_kind_c64_ms2_2cbr_v()) ||
213 (k == gmmu_pte_kind_c64_ms2_2cra_v()) ||
214 (k >= gmmu_pte_kind_c64_ms4_2c_v() &&
215 k <= gmmu_pte_kind_c64_ms4_2cra_v()) ||
216 (k >= gmmu_pte_kind_c64_ms8_ms16_2c_v() &&
217 k <= gmmu_pte_kind_c128_ms8_ms16_2cr_v());
218}
219
220u8 gk20a_uc_kind_map[256];
221void gk20a_init_uncompressed_kind_map(void)
222{
223 int i;
224 for (i = 0; i < 256; i++)
225 gk20a_uc_kind_map[i] = gmmu_pte_kind_invalid_v();
226
227 gk20a_uc_kind_map[gmmu_pte_kind_z16_v()] =
228 gk20a_uc_kind_map[gmmu_pte_kind_z16_2c_v()] =
229 gk20a_uc_kind_map[gmmu_pte_kind_z16_ms2_2c_v()] =
230 gk20a_uc_kind_map[gmmu_pte_kind_z16_ms4_2c_v()] =
231 gk20a_uc_kind_map[gmmu_pte_kind_z16_ms8_2c_v()] =
232 gk20a_uc_kind_map[gmmu_pte_kind_z16_2z_v()] =
233 gk20a_uc_kind_map[gmmu_pte_kind_z16_ms2_2z_v()] =
234 gk20a_uc_kind_map[gmmu_pte_kind_z16_ms4_2z_v()] =
235 gk20a_uc_kind_map[gmmu_pte_kind_z16_ms8_2z_v()] =
236 gmmu_pte_kind_z16_v();
237
238 gk20a_uc_kind_map[gmmu_pte_kind_s8z24_v()] =
239 gk20a_uc_kind_map[gmmu_pte_kind_s8z24_2cz_v()] =
240 gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms2_2cz_v()] =
241 gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms4_2cz_v()] =
242 gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms8_2cz_v()] =
243 gk20a_uc_kind_map[gmmu_pte_kind_s8z24_2cs_v()] =
244 gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms2_2cs_v()] =
245 gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms4_2cs_v()] =
246 gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms8_2cs_v()] =
247 gmmu_pte_kind_s8z24_v();
248
249 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_v()] =
250 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_2cs_v()] =
251 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_2czv_v()] =
252 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_2zv_v()] =
253 gmmu_pte_kind_v8z24_ms4_vc4_v();
254
255 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_v()] =
256 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_2cs_v()] =
257 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_2czv_v()] =
258 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_2zv_v()] =
259 gmmu_pte_kind_v8z24_ms8_vc8_v();
260
261 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_v()] =
262 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_2cs_v()] =
263 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_2czv_v()] =
264 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_2zv_v()] =
265 gmmu_pte_kind_v8z24_ms4_vc12_v();
266
267 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_v()] =
268 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()] =
269 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_2czv_v()] =
270 gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_2zv_v()] =
271 gmmu_pte_kind_v8z24_ms8_vc24_v();
272
273 gk20a_uc_kind_map[gmmu_pte_kind_z24s8_v()] =
274 gk20a_uc_kind_map[gmmu_pte_kind_z24s8_2cs_v()] =
275 gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms2_2cs_v()] =
276 gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms4_2cs_v()] =
277 gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms8_2cs_v()] =
278 gk20a_uc_kind_map[gmmu_pte_kind_z24s8_2cz_v()] =
279 gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms2_2cz_v()] =
280 gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms4_2cz_v()] =
281 gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms8_2cz_v()] =
282 gmmu_pte_kind_z24s8_v();
283
284 gk20a_uc_kind_map[gmmu_pte_kind_zf32_v()] =
285 gk20a_uc_kind_map[gmmu_pte_kind_zf32_2cs_v()] =
286 gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms2_2cs_v()] =
287 gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms4_2cs_v()] =
288 gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms8_2cs_v()] =
289 gk20a_uc_kind_map[gmmu_pte_kind_zf32_2cz_v()] =
290 gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms2_2cz_v()] =
291 gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms4_2cz_v()] =
292 gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms8_2cz_v()] =
293 gmmu_pte_kind_zf32_v();
294
295 gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v()] =
296 gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cs_v()] =
297 gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cszv_v()] =
298 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v();
299
300 gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_v()] =
301 gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cs_v()] =
302 gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cszv_v()] =
303 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_v();
304
305 gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_v()] =
306 gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cs_v()] =
307 gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cszv_v()] =
308 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_v();
309
310 gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v()] =
311 gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cs_v()] =
312 gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v()] =
313 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v();
314
315 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_v()] =
316 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cs_v()] =
317 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cszv_v()] =
318 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_v();
319
320 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_v()] =
321 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cs_v()] =
322 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cszv_v()] =
323 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_v();
324
325 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_v()] =
326 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cs_v()] =
327 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cszv_v()] =
328 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_v();
329
330 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v()] =
331 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cs_v()] =
332 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v()] =
333 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v();
334
335 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_v()] =
336 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_2cszv_v()] =
337 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms2_2cszv_v()] =
338 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms4_2cszv_v()] =
339 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms8_2cszv_v()] =
340 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_2cs_v()] =
341 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms2_2cs_v()] =
342 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms4_2cs_v()] =
343 gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms8_2cs_v()] =
344 gmmu_pte_kind_zf32_x24s8_v();
345
346 gk20a_uc_kind_map[gmmu_pte_kind_c32_2c_v()] =
347 gk20a_uc_kind_map[gmmu_pte_kind_c32_2cba_v()] =
348 gk20a_uc_kind_map[gmmu_pte_kind_c32_2cra_v()] =
349 gk20a_uc_kind_map[gmmu_pte_kind_c32_2bra_v()] =
350 gk20a_uc_kind_map[gmmu_pte_kind_c32_ms2_2c_v()] =
351 gk20a_uc_kind_map[gmmu_pte_kind_c32_ms2_2cra_v()] =
352 gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2c_v()] =
353 gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2cbr_v()] =
354 gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2cba_v()] =
355 gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2cra_v()] =
356 gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2bra_v()] =
357 gk20a_uc_kind_map[gmmu_pte_kind_c32_ms8_ms16_2c_v()] =
358 gk20a_uc_kind_map[gmmu_pte_kind_c32_ms8_ms16_2cra_v()] =
359 gk20a_uc_kind_map[gmmu_pte_kind_c64_2c_v()] =
360 gk20a_uc_kind_map[gmmu_pte_kind_c64_2cbr_v()] =
361 gk20a_uc_kind_map[gmmu_pte_kind_c64_2cba_v()] =
362 gk20a_uc_kind_map[gmmu_pte_kind_c64_2cra_v()] =
363 gk20a_uc_kind_map[gmmu_pte_kind_c64_2bra_v()] =
364 gk20a_uc_kind_map[gmmu_pte_kind_c64_ms2_2c_v()] =
365 gk20a_uc_kind_map[gmmu_pte_kind_c64_ms2_2cra_v()] =
366 gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2c_v()] =
367 gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2cbr_v()] =
368 gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2cba_v()] =
369 gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2cra_v()] =
370 gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2bra_v()] =
371 gk20a_uc_kind_map[gmmu_pte_kind_c64_ms8_ms16_2c_v()] =
372 gk20a_uc_kind_map[gmmu_pte_kind_c64_ms8_ms16_2cra_v()] =
373 gk20a_uc_kind_map[gmmu_pte_kind_c128_2c_v()] =
374 gk20a_uc_kind_map[gmmu_pte_kind_c128_2cr_v()] =
375 gk20a_uc_kind_map[gmmu_pte_kind_c128_ms2_2c_v()] =
376 gk20a_uc_kind_map[gmmu_pte_kind_c128_ms2_2cr_v()] =
377 gk20a_uc_kind_map[gmmu_pte_kind_c128_ms4_2c_v()] =
378 gk20a_uc_kind_map[gmmu_pte_kind_c128_ms4_2cr_v()] =
379 gk20a_uc_kind_map[gmmu_pte_kind_c128_ms8_ms16_2c_v()] =
380 gk20a_uc_kind_map[gmmu_pte_kind_c128_ms8_ms16_2cr_v()] =
381 gmmu_pte_kind_generic_16bx2_v();
382
383 gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc4_2czv_v()] =
384 gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc4_2cs_v()] =
385 gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc4_2zv_v()] =
386 gmmu_pte_kind_z24v8_ms4_vc4_v();
387
388 gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc12_2czv_v()] =
389 gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc12_2cs_v()] =
390 gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc12_2zv_v()] =
391 gmmu_pte_kind_z24v8_ms4_vc12_v();
392
393 gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc8_2cs_v()] =
394 gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc8_2czv_v()] =
395 gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc8_2zv_v()] =
396 gmmu_pte_kind_z24v8_ms8_vc8_v();
397
398 gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()] =
399 gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc24_2czv_v()] =
400 gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc24_2zv_v()] =
401 gmmu_pte_kind_z24v8_ms8_vc24_v();
402
403 gk20a_uc_kind_map[gmmu_pte_kind_x8c24_v()] =
404 gmmu_pte_kind_x8c24_v();
405}
406
407u16 gk20a_kind_attr[256];
408void gk20a_init_kind_attr(void)
409{
410 u16 k;
411 for (k = 0; k < 256; k++) {
412 gk20a_kind_attr[k] = 0;
413 if (gk20a_kind_supported((u8)k))
414 gk20a_kind_attr[k] |= GK20A_KIND_ATTR_SUPPORTED;
415 if (gk20a_kind_compressible((u8)k))
416 gk20a_kind_attr[k] |= GK20A_KIND_ATTR_COMPRESSIBLE;
417 if (gk20a_kind_z((u8)k))
418 gk20a_kind_attr[k] |= GK20A_KIND_ATTR_Z;
419 if (gk20a_kind_c((u8)k))
420 gk20a_kind_attr[k] |= GK20A_KIND_ATTR_C;
421 if (gk20a_kind_zbc((u8)k))
422 gk20a_kind_attr[k] |= GK20A_KIND_ATTR_ZBC;
423 }
424}
diff --git a/drivers/gpu/nvgpu/gk20a/kind_gk20a.h b/drivers/gpu/nvgpu/gk20a/kind_gk20a.h
new file mode 100644
index 00000000..93f011d4
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/kind_gk20a.h
@@ -0,0 +1,67 @@
1/*
2 * drivers/video/tegra/host/gk20a/kind_gk20a.h
3 *
4 * GK20A memory kind management
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21#ifndef __KIND_GK20A_H__
22#define __KIND_GK20A_H__
23
24
25void gk20a_init_uncompressed_kind_map(void);
26void gk20a_init_kind_attr(void);
27
28extern u16 gk20a_kind_attr[];
29#define NV_KIND_DEFAULT -1
30
31#define GK20A_KIND_ATTR_SUPPORTED BIT(0)
32#define GK20A_KIND_ATTR_COMPRESSIBLE BIT(1)
33#define GK20A_KIND_ATTR_Z BIT(2)
34#define GK20A_KIND_ATTR_C BIT(3)
35#define GK20A_KIND_ATTR_ZBC BIT(4)
36
37static inline bool gk20a_kind_is_supported(u8 k)
38{
39 return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_SUPPORTED);
40}
41static inline bool gk20a_kind_is_compressible(u8 k)
42{
43 return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_COMPRESSIBLE);
44}
45
46static inline bool gk20a_kind_is_z(u8 k)
47{
48 return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_Z);
49}
50
51static inline bool gk20a_kind_is_c(u8 k)
52{
53 return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_C);
54}
55static inline bool gk20a_kind_is_zbc(u8 k)
56{
57 return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_ZBC);
58}
59
60/* maps kind to its uncompressed version */
61extern u8 gk20a_uc_kind_map[];
62static inline u8 gk20a_get_uncompressed_kind(u8 k)
63{
64 return gk20a_uc_kind_map[k];
65}
66
67#endif /* __KIND_GK20A_H__ */
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c
new file mode 100644
index 00000000..cbb27cc7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c
@@ -0,0 +1,243 @@
1/*
2 * drivers/video/tegra/host/gk20a/ltc_common.c
3 *
4 * GK20A Graphics
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#include <linux/dma-mapping.h>
22#include <linux/delay.h>
23
24#include "gk20a.h"
25#include "gr_gk20a.h"
26
27static int gk20a_determine_L2_size_bytes(struct gk20a *g)
28{
29 const u32 gpuid = GK20A_GPUID(g->gpu_characteristics.arch,
30 g->gpu_characteristics.impl);
31 u32 lts_per_ltc;
32 u32 ways;
33 u32 sets;
34 u32 bytes_per_line;
35 u32 active_ltcs;
36 u32 cache_size;
37
38 u32 tmp;
39 u32 active_sets_value;
40
41 tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r());
42 ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp));
43
44 active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp);
45 if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) {
46 sets = 64;
47 } else if (active_sets_value ==
48 ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) {
49 sets = 32;
50 } else if (active_sets_value ==
51 ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) {
52 sets = 16;
53 } else {
54 dev_err(dev_from_gk20a(g),
55 "Unknown constant %u for active sets",
56 (unsigned)active_sets_value);
57 sets = 0;
58 }
59
60 active_ltcs = g->gr.num_fbps;
61
62 /* chip-specific values */
63 switch (gpuid) {
64 case GK20A_GPUID_GK20A:
65 lts_per_ltc = 1;
66 bytes_per_line = 128;
67 break;
68
69 default:
70 dev_err(dev_from_gk20a(g), "Unknown GPU id 0x%02x\n",
71 (unsigned)gpuid);
72 lts_per_ltc = 0;
73 bytes_per_line = 0;
74 }
75
76 cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line;
77
78 return cache_size;
79}
80
81/*
82 * Set the maximum number of ways that can have the "EVIST_LAST" class.
83 */
84static void gk20a_ltc_set_max_ways_evict_last(struct gk20a *g, u32 max_ways)
85{
86 u32 mgmt_reg;
87
88 mgmt_reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_r()) &
89 ~ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(~0);
90 mgmt_reg |= ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(max_ways);
91
92 gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_r(), mgmt_reg);
93}
94
95/*
96 * Sets the ZBC color for the passed index.
97 */
98static void gk20a_ltc_set_zbc_color_entry(struct gk20a *g,
99 struct zbc_entry *color_val,
100 u32 index)
101{
102 u32 i;
103 u32 real_index = index + GK20A_STARTOF_ZBC_TABLE;
104
105 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
106 ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index));
107
108 for (i = 0;
109 i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++)
110 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i),
111 color_val->color_l2[i]);
112}
113
114/*
115 * Sets the ZBC depth for the passed index.
116 */
117static void gk20a_ltc_set_zbc_depth_entry(struct gk20a *g,
118 struct zbc_entry *depth_val,
119 u32 index)
120{
121 u32 real_index = index + GK20A_STARTOF_ZBC_TABLE;
122
123 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
124 ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index));
125
126 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(),
127 depth_val->depth);
128}
129
130/*
131 * Clear the L2 ZBC color table for the passed index.
132 */
133static void gk20a_ltc_clear_zbc_color_entry(struct gk20a *g, u32 index)
134{
135 u32 i;
136 u32 real_index = index + GK20A_STARTOF_ZBC_TABLE;
137
138 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
139 ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index));
140
141 for (i = 0;
142 i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++)
143 gk20a_writel(g,
144 ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), 0);
145}
146
147/*
148 * Clear the L2 ZBC depth entry for the passed index.
149 */
150static void gk20a_ltc_clear_zbc_depth_entry(struct gk20a *g, u32 index)
151{
152 u32 real_index = index + GK20A_STARTOF_ZBC_TABLE;
153
154 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
155 ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index));
156
157 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), 0);
158}
159
160static int gk20a_ltc_init_zbc(struct gk20a *g, struct gr_gk20a *gr)
161{
162 u32 i, j;
163
164 /* reset zbc clear */
165 for (i = 0; i < GK20A_SIZEOF_ZBC_TABLE -
166 GK20A_STARTOF_ZBC_TABLE; i++) {
167 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
168 (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
169 ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
170 ltc_ltcs_ltss_dstg_zbc_index_address_f(
171 i + GK20A_STARTOF_ZBC_TABLE));
172 for (j = 0; j < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); j++)
173 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(j), 0);
174 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), 0);
175 }
176
177 gr_gk20a_clear_zbc_table(g, gr);
178 gr_gk20a_load_zbc_default_table(g, gr);
179
180 return 0;
181}
182
183static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr)
184{
185 u32 compbit_base_post_divide;
186 u64 compbit_base_post_multiply64;
187 u64 compbit_store_base_iova =
188 NV_MC_SMMU_VADDR_TRANSLATE(gr->compbit_store.base_iova);
189 u64 compbit_base_post_divide64 = (compbit_store_base_iova >>
190 ltc_ltcs_ltss_cbc_base_alignment_shift_v());
191
192 do_div(compbit_base_post_divide64, gr->num_fbps);
193 compbit_base_post_divide = u64_lo32(compbit_base_post_divide64);
194
195 compbit_base_post_multiply64 = ((u64)compbit_base_post_divide *
196 gr->num_fbps) << ltc_ltcs_ltss_cbc_base_alignment_shift_v();
197
198 if (compbit_base_post_multiply64 < compbit_store_base_iova)
199 compbit_base_post_divide++;
200
201 gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(),
202 compbit_base_post_divide);
203
204 gk20a_dbg(gpu_dbg_info | gpu_dbg_map | gpu_dbg_pte,
205 "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n",
206 (u32)(compbit_store_base_iova >> 32),
207 (u32)(compbit_store_base_iova & 0xffffffff),
208 compbit_base_post_divide);
209}
210
211/* Flushes the compression bit cache as well as "data".
212 * Note: the name here is a bit of a misnomer. ELPG uses this
213 * internally... but ELPG doesn't have to be on to do it manually.
214 */
215static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
216{
217 u32 data;
218 s32 retry = 100;
219
220 gk20a_dbg_fn("");
221
222 /* Make sure all previous writes are committed to the L2. There's no
223 guarantee that writes are to DRAM. This will be a sysmembar internal
224 to the L2. */
225 gk20a_writel(g, ltc_ltss_g_elpg_r(),
226 ltc_ltss_g_elpg_flush_pending_f());
227 do {
228 data = gk20a_readl(g, ltc_ltss_g_elpg_r());
229
230 if (ltc_ltss_g_elpg_flush_v(data) ==
231 ltc_ltss_g_elpg_flush_pending_v()) {
232 gk20a_dbg_info("g_elpg_flush 0x%x", data);
233 retry--;
234 usleep_range(20, 40);
235 } else
236 break;
237 } while (retry >= 0 || !tegra_platform_is_silicon());
238
239 if (retry < 0)
240 gk20a_warn(dev_from_gk20a(g),
241 "g_elpg_flush too many retries");
242
243}
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
new file mode 100644
index 00000000..08aedecd
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -0,0 +1,203 @@
1/*
2 * drivers/video/tegra/host/gk20a/ltc_gk20a.c
3 *
4 * GK20A Graphics
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#include <linux/kernel.h>
22
23#include "hw_ltc_gk20a.h"
24#include "hw_proj_gk20a.h"
25
26#include "ltc_common.c"
27
28static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
29{
30 struct device *d = dev_from_gk20a(g);
31 DEFINE_DMA_ATTRS(attrs);
32 dma_addr_t iova;
33
34 /* max memory size (MB) to cover */
35 u32 max_size = gr->max_comptag_mem;
36 /* one tag line covers 128KB */
37 u32 max_comptag_lines = max_size << 3;
38
39 u32 hw_max_comptag_lines =
40 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v();
41
42 u32 cbc_param =
43 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r());
44 u32 comptags_per_cacheline =
45 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param);
46 u32 slices_per_fbp =
47 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(cbc_param);
48 u32 cacheline_size =
49 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param);
50
51 u32 compbit_backing_size;
52
53 gk20a_dbg_fn("");
54
55 if (max_comptag_lines == 0) {
56 gr->compbit_store.size = 0;
57 return 0;
58 }
59
60 if (max_comptag_lines > hw_max_comptag_lines)
61 max_comptag_lines = hw_max_comptag_lines;
62
63 /* no hybird fb */
64 compbit_backing_size =
65 DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) *
66 cacheline_size * slices_per_fbp * gr->num_fbps;
67
68 /* aligned to 2KB * num_fbps */
69 compbit_backing_size +=
70 gr->num_fbps << ltc_ltcs_ltss_cbc_base_alignment_shift_v();
71
72 /* must be a multiple of 64KB */
73 compbit_backing_size = roundup(compbit_backing_size, 64*1024);
74
75 max_comptag_lines =
76 (compbit_backing_size * comptags_per_cacheline) /
77 cacheline_size * slices_per_fbp * gr->num_fbps;
78
79 if (max_comptag_lines > hw_max_comptag_lines)
80 max_comptag_lines = hw_max_comptag_lines;
81
82 gk20a_dbg_info("compbit backing store size : %d",
83 compbit_backing_size);
84 gk20a_dbg_info("max comptag lines : %d",
85 max_comptag_lines);
86
87 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
88 gr->compbit_store.size = compbit_backing_size;
89 gr->compbit_store.pages = dma_alloc_attrs(d, gr->compbit_store.size,
90 &iova, GFP_KERNEL, &attrs);
91 if (!gr->compbit_store.pages) {
92 gk20a_err(dev_from_gk20a(g), "failed to allocate"
93 "backing store for compbit : size %d",
94 compbit_backing_size);
95 return -ENOMEM;
96 }
97 gr->compbit_store.base_iova = iova;
98
99 gk20a_allocator_init(&gr->comp_tags, "comptag",
100 1, /* start */
101 max_comptag_lines - 1, /* length*/
102 1); /* align */
103
104 return 0;
105}
106
107static int gk20a_ltc_clear_comptags(struct gk20a *g, u32 min, u32 max)
108{
109 struct gr_gk20a *gr = &g->gr;
110 u32 fbp, slice, ctrl1, val;
111 unsigned long end_jiffies = jiffies +
112 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
113 u32 delay = GR_IDLE_CHECK_DEFAULT;
114 u32 slices_per_fbp =
115 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(
116 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
117
118 gk20a_dbg_fn("");
119
120 if (gr->compbit_store.size == 0)
121 return 0;
122
123 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(),
124 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min));
125 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(),
126 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max));
127 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
128 gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) |
129 ltc_ltcs_ltss_cbc_ctrl1_clear_active_f());
130
131 for (fbp = 0; fbp < gr->num_fbps; fbp++) {
132 for (slice = 0; slice < slices_per_fbp; slice++) {
133
134 delay = GR_IDLE_CHECK_DEFAULT;
135
136 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
137 fbp * proj_ltc_stride_v() +
138 slice * proj_lts_stride_v();
139
140 do {
141 val = gk20a_readl(g, ctrl1);
142 if (ltc_ltcs_ltss_cbc_ctrl1_clear_v(val) !=
143 ltc_ltcs_ltss_cbc_ctrl1_clear_active_v())
144 break;
145
146 usleep_range(delay, delay * 2);
147 delay = min_t(u32, delay << 1,
148 GR_IDLE_CHECK_MAX);
149
150 } while (time_before(jiffies, end_jiffies) ||
151 !tegra_platform_is_silicon());
152
153 if (!time_before(jiffies, end_jiffies)) {
154 gk20a_err(dev_from_gk20a(g),
155 "comp tag clear timeout\n");
156 return -EBUSY;
157 }
158 }
159 }
160
161 return 0;
162}
163
164
165#ifdef CONFIG_DEBUG_FS
166static void gk20a_ltc_sync_debugfs(struct gk20a *g)
167{
168 u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f();
169
170 spin_lock(&g->debugfs_lock);
171 if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) {
172 u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r());
173 if (g->mm.ltc_enabled_debug)
174 /* bypass disabled (normal caching ops)*/
175 reg &= ~reg_f;
176 else
177 /* bypass enabled (no caching) */
178 reg |= reg_f;
179
180 gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg);
181 g->mm.ltc_enabled = g->mm.ltc_enabled_debug;
182 }
183 spin_unlock(&g->debugfs_lock);
184}
185#endif
186
187void gk20a_init_ltc(struct gpu_ops *gops)
188{
189 gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes;
190 gops->ltc.set_max_ways_evict_last = gk20a_ltc_set_max_ways_evict_last;
191 gops->ltc.init_comptags = gk20a_ltc_init_comptags;
192 gops->ltc.clear_comptags = gk20a_ltc_clear_comptags;
193 gops->ltc.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry;
194 gops->ltc.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry;
195 gops->ltc.clear_zbc_color_entry = gk20a_ltc_clear_zbc_color_entry;
196 gops->ltc.clear_zbc_depth_entry = gk20a_ltc_clear_zbc_depth_entry;
197 gops->ltc.init_zbc = gk20a_ltc_init_zbc;
198 gops->ltc.init_cbc = gk20a_ltc_init_cbc;
199#ifdef CONFIG_DEBUG_FS
200 gops->ltc.sync_debugfs = gk20a_ltc_sync_debugfs;
201#endif
202 gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked;
203}
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h
new file mode 100644
index 00000000..208811b2
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h
@@ -0,0 +1,21 @@
1/*
2 * GK20A L2
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef _NVHOST_GK20A_LTC
17#define _NVHOST_GK20A_LTC
18struct gk20a;
19
20void gk20a_init_ltc(struct gpu_ops *gops);
21#endif
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
new file mode 100644
index 00000000..b22df5e8
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -0,0 +1,2984 @@
1/*
2 * drivers/video/tegra/host/gk20a/mm_gk20a.c
3 *
4 * GK20A memory management
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#include <linux/delay.h>
23#include <linux/highmem.h>
24#include <linux/log2.h>
25#include <linux/nvhost.h>
26#include <linux/pm_runtime.h>
27#include <linux/scatterlist.h>
28#include <linux/nvmap.h>
29#include <linux/tegra-soc.h>
30#include <linux/vmalloc.h>
31#include <linux/dma-buf.h>
32#include <asm/cacheflush.h>
33
34#include "gk20a.h"
35#include "mm_gk20a.h"
36#include "hw_gmmu_gk20a.h"
37#include "hw_fb_gk20a.h"
38#include "hw_bus_gk20a.h"
39#include "hw_ram_gk20a.h"
40#include "hw_mc_gk20a.h"
41#include "hw_flush_gk20a.h"
42#include "hw_ltc_gk20a.h"
43
44#include "kind_gk20a.h"
45
46#ifdef CONFIG_ARM64
47#define outer_flush_range(a, b)
48#define __cpuc_flush_dcache_area __flush_dcache_area
49#endif
50
51/*
52 * GPU mapping life cycle
53 * ======================
54 *
55 * Kernel mappings
56 * ---------------
57 *
58 * Kernel mappings are created through vm.map(..., false):
59 *
60 * - Mappings to the same allocations are reused and refcounted.
61 * - This path does not support deferred unmapping (i.e. kernel must wait for
62 * all hw operations on the buffer to complete before unmapping).
63 * - References to dmabuf are owned and managed by the (kernel) clients of
64 * the gk20a_vm layer.
65 *
66 *
67 * User space mappings
68 * -------------------
69 *
70 * User space mappings are created through as.map_buffer -> vm.map(..., true):
71 *
72 * - Mappings to the same allocations are reused and refcounted.
73 * - This path supports deferred unmapping (i.e. we delay the actual unmapping
74 * until all hw operations have completed).
75 * - References to dmabuf are owned and managed by the vm_gk20a
76 * layer itself. vm.map acquires these refs, and sets
77 * mapped_buffer->own_mem_ref to record that we must release the refs when we
78 * actually unmap.
79 *
80 */
81
82static inline int vm_aspace_id(struct vm_gk20a *vm)
83{
84 /* -1 is bar1 or pmu, etc. */
85 return vm->as_share ? vm->as_share->id : -1;
86}
87static inline u32 hi32(u64 f)
88{
89 return (u32)(f >> 32);
90}
91static inline u32 lo32(u64 f)
92{
93 return (u32)(f & 0xffffffff);
94}
95
96#define FLUSH_CPU_DCACHE(va, pa, size) \
97 do { \
98 __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
99 outer_flush_range(pa, pa + (size_t)(size)); \
100 } while (0)
101
102static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
103static struct mapped_buffer_node *find_mapped_buffer_locked(
104 struct rb_root *root, u64 addr);
105static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
106 struct rb_root *root, struct dma_buf *dmabuf,
107 u32 kind);
108static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
109 enum gmmu_pgsz_gk20a pgsz_idx,
110 struct sg_table *sgt,
111 u64 first_vaddr, u64 last_vaddr,
112 u8 kind_v, u32 ctag_offset, bool cacheable,
113 int rw_flag);
114static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
115static void gk20a_vm_remove_support(struct vm_gk20a *vm);
116
117
118/* note: keep the page sizes sorted lowest to highest here */
119static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
120static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
121static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
122 0x1ffffLL };
123static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
124
125struct gk20a_comptags {
126 u32 offset;
127 u32 lines;
128};
129
130struct gk20a_dmabuf_priv {
131 struct mutex lock;
132
133 struct gk20a_allocator *comptag_allocator;
134 struct gk20a_comptags comptags;
135
136 struct dma_buf_attachment *attach;
137 struct sg_table *sgt;
138
139 int pin_count;
140};
141
142static void gk20a_mm_delete_priv(void *_priv)
143{
144 struct gk20a_dmabuf_priv *priv = _priv;
145 if (!priv)
146 return;
147
148 if (priv->comptags.lines) {
149 BUG_ON(!priv->comptag_allocator);
150 priv->comptag_allocator->free(priv->comptag_allocator,
151 priv->comptags.offset,
152 priv->comptags.lines);
153 }
154
155 kfree(priv);
156}
157
158struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf)
159{
160 struct gk20a_dmabuf_priv *priv;
161
162 priv = dma_buf_get_drvdata(dmabuf, dev);
163 if (WARN_ON(!priv))
164 return ERR_PTR(-EINVAL);
165
166 mutex_lock(&priv->lock);
167
168 if (priv->pin_count == 0) {
169 priv->attach = dma_buf_attach(dmabuf, dev);
170 if (IS_ERR(priv->attach)) {
171 mutex_unlock(&priv->lock);
172 return (struct sg_table *)priv->attach;
173 }
174
175 priv->sgt = dma_buf_map_attachment(priv->attach,
176 DMA_BIDIRECTIONAL);
177 if (IS_ERR(priv->sgt)) {
178 dma_buf_detach(dmabuf, priv->attach);
179 mutex_unlock(&priv->lock);
180 return priv->sgt;
181 }
182 }
183
184 priv->pin_count++;
185 mutex_unlock(&priv->lock);
186 return priv->sgt;
187}
188
189void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
190 struct sg_table *sgt)
191{
192 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
193 dma_addr_t dma_addr;
194
195 if (IS_ERR(priv) || !priv)
196 return;
197
198 mutex_lock(&priv->lock);
199 WARN_ON(priv->sgt != sgt);
200 priv->pin_count--;
201 WARN_ON(priv->pin_count < 0);
202 dma_addr = sg_dma_address(priv->sgt->sgl);
203 if (priv->pin_count == 0) {
204 dma_buf_unmap_attachment(priv->attach, priv->sgt,
205 DMA_BIDIRECTIONAL);
206 dma_buf_detach(dmabuf, priv->attach);
207 }
208 mutex_unlock(&priv->lock);
209}
210
211
212static void gk20a_get_comptags(struct device *dev,
213 struct dma_buf *dmabuf,
214 struct gk20a_comptags *comptags)
215{
216 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
217
218 if (!comptags)
219 return;
220
221 if (!priv) {
222 comptags->lines = 0;
223 comptags->offset = 0;
224 return;
225 }
226
227 *comptags = priv->comptags;
228}
229
230static int gk20a_alloc_comptags(struct device *dev,
231 struct dma_buf *dmabuf,
232 struct gk20a_allocator *allocator,
233 int lines)
234{
235 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
236 u32 offset = 0;
237 int err;
238
239 if (!priv)
240 return -ENOSYS;
241
242 if (!lines)
243 return -EINVAL;
244
245 /* store the allocator so we can use it when we free the ctags */
246 priv->comptag_allocator = allocator;
247 err = allocator->alloc(allocator, &offset, lines);
248 if (!err) {
249 priv->comptags.lines = lines;
250 priv->comptags.offset = offset;
251 }
252 return err;
253}
254
255
256
257
258static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
259{
260 gk20a_dbg_fn("");
261 if (g->ops.fb.reset)
262 g->ops.fb.reset(g);
263
264 if (g->ops.fb.init_fs_state)
265 g->ops.fb.init_fs_state(g);
266
267 return 0;
268}
269
270void gk20a_remove_mm_support(struct mm_gk20a *mm)
271{
272 struct gk20a *g = mm->g;
273 struct device *d = dev_from_gk20a(g);
274 struct vm_gk20a *vm = &mm->bar1.vm;
275 struct inst_desc *inst_block = &mm->bar1.inst_block;
276
277 gk20a_dbg_fn("");
278
279 if (inst_block->cpuva)
280 dma_free_coherent(d, inst_block->size,
281 inst_block->cpuva, inst_block->iova);
282 inst_block->cpuva = NULL;
283 inst_block->iova = 0;
284
285 gk20a_vm_remove_support(vm);
286}
287
288int gk20a_init_mm_setup_sw(struct gk20a *g)
289{
290 struct mm_gk20a *mm = &g->mm;
291 int i;
292
293 gk20a_dbg_fn("");
294
295 if (mm->sw_ready) {
296 gk20a_dbg_fn("skip init");
297 return 0;
298 }
299
300 mm->g = g;
301 mutex_init(&mm->tlb_lock);
302 mutex_init(&mm->l2_op_lock);
303 mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
304 mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
305 mm->pde_stride = mm->big_page_size << 10;
306 mm->pde_stride_shift = ilog2(mm->pde_stride);
307 BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
308
309 for (i = 0; i < ARRAY_SIZE(gmmu_page_sizes); i++) {
310
311 u32 num_ptes, pte_space, num_pages;
312
313 /* assuming "full" page tables */
314 num_ptes = mm->pde_stride / gmmu_page_sizes[i];
315
316 pte_space = num_ptes * gmmu_pte__size_v();
317 /* allocate whole pages */
318 pte_space = roundup(pte_space, PAGE_SIZE);
319
320 num_pages = pte_space / PAGE_SIZE;
321 /* make sure "order" is viable */
322 BUG_ON(!is_power_of_2(num_pages));
323
324 mm->page_table_sizing[i].num_ptes = num_ptes;
325 mm->page_table_sizing[i].order = ilog2(num_pages);
326 }
327
328 /*TBD: make channel vm size configurable */
329 mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
330
331 gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
332
333 gk20a_dbg_info("small page-size (%dKB) pte array: %dKB",
334 gmmu_page_sizes[gmmu_page_size_small] >> 10,
335 (mm->page_table_sizing[gmmu_page_size_small].num_ptes *
336 gmmu_pte__size_v()) >> 10);
337
338 gk20a_dbg_info("big page-size (%dKB) pte array: %dKB",
339 gmmu_page_sizes[gmmu_page_size_big] >> 10,
340 (mm->page_table_sizing[gmmu_page_size_big].num_ptes *
341 gmmu_pte__size_v()) >> 10);
342
343
344 gk20a_init_bar1_vm(mm);
345
346 mm->remove_support = gk20a_remove_mm_support;
347 mm->sw_ready = true;
348
349 gk20a_dbg_fn("done");
350 return 0;
351}
352
353/* make sure gk20a_init_mm_support is called before */
354static int gk20a_init_mm_setup_hw(struct gk20a *g)
355{
356 struct mm_gk20a *mm = &g->mm;
357 struct inst_desc *inst_block = &mm->bar1.inst_block;
358 phys_addr_t inst_pa = inst_block->cpu_pa;
359
360 gk20a_dbg_fn("");
361
362 /* set large page size in fb
363 * note this is very early on, can we defer it ? */
364 {
365 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
366
367 if (gmmu_page_sizes[gmmu_page_size_big] == SZ_128K)
368 fb_mmu_ctrl = (fb_mmu_ctrl &
369 ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) |
370 fb_mmu_ctrl_vm_pg_size_128kb_f();
371 else
372 BUG_ON(1); /* no support/testing for larger ones yet */
373
374 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
375 }
376
377 inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a());
378 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa);
379
380 /* this is very early in init... can we defer this? */
381 {
382 gk20a_writel(g, bus_bar1_block_r(),
383 bus_bar1_block_target_vid_mem_f() |
384 bus_bar1_block_mode_virtual_f() |
385 bus_bar1_block_ptr_f(inst_pa));
386 }
387
388 gk20a_dbg_fn("done");
389 return 0;
390}
391
392int gk20a_init_mm_support(struct gk20a *g)
393{
394 u32 err;
395
396 err = gk20a_init_mm_reset_enable_hw(g);
397 if (err)
398 return err;
399
400 err = gk20a_init_mm_setup_sw(g);
401 if (err)
402 return err;
403
404 err = gk20a_init_mm_setup_hw(g);
405 if (err)
406 return err;
407
408 return err;
409}
410
411#ifdef CONFIG_GK20A_PHYS_PAGE_TABLES
412static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
413 void **handle,
414 struct sg_table **sgt,
415 size_t *size)
416{
417 u32 num_pages = 1 << order;
418 u32 len = num_pages * PAGE_SIZE;
419 int err;
420 struct page *pages;
421
422 gk20a_dbg_fn("");
423
424 pages = alloc_pages(GFP_KERNEL, order);
425 if (!pages) {
426 gk20a_dbg(gpu_dbg_pte, "alloc_pages failed\n");
427 goto err_out;
428 }
429 *sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
430 if (!sgt) {
431 gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table");
432 goto err_alloced;
433 }
434 err = sg_alloc_table(*sgt, 1, GFP_KERNEL);
435 if (err) {
436 gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed\n");
437 goto err_sg_table;
438 }
439 sg_set_page((*sgt)->sgl, pages, len, 0);
440 *handle = page_address(pages);
441 memset(*handle, 0, len);
442 *size = len;
443 FLUSH_CPU_DCACHE(*handle, sg_phys((*sgt)->sgl), len);
444
445 return 0;
446
447err_sg_table:
448 kfree(*sgt);
449err_alloced:
450 __free_pages(pages, order);
451err_out:
452 return -ENOMEM;
453}
454
455static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
456 struct sg_table *sgt, u32 order,
457 size_t size)
458{
459 gk20a_dbg_fn("");
460 BUG_ON(sgt == NULL);
461 free_pages((unsigned long)handle, order);
462 sg_free_table(sgt);
463 kfree(sgt);
464}
465
466static int map_gmmu_pages(void *handle, struct sg_table *sgt,
467 void **va, size_t size)
468{
469 FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
470 *va = handle;
471 return 0;
472}
473
474static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
475{
476 FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
477}
478#else
479static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
480 void **handle,
481 struct sg_table **sgt,
482 size_t *size)
483{
484 struct device *d = dev_from_vm(vm);
485 u32 num_pages = 1 << order;
486 u32 len = num_pages * PAGE_SIZE;
487 dma_addr_t iova;
488 DEFINE_DMA_ATTRS(attrs);
489 struct page **pages;
490 int err = 0;
491
492 gk20a_dbg_fn("");
493
494 *size = len;
495 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
496 pages = dma_alloc_attrs(d, len, &iova, GFP_KERNEL, &attrs);
497 if (!pages) {
498 gk20a_err(d, "memory allocation failed\n");
499 goto err_out;
500 }
501
502 err = gk20a_get_sgtable_from_pages(d, sgt, pages,
503 iova, len);
504 if (err) {
505 gk20a_err(d, "sgt allocation failed\n");
506 goto err_free;
507 }
508
509 *handle = (void *)pages;
510
511 return 0;
512
513err_free:
514 dma_free_attrs(d, len, pages, iova, &attrs);
515 pages = NULL;
516 iova = 0;
517err_out:
518 return -ENOMEM;
519}
520
521static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
522 struct sg_table *sgt, u32 order,
523 size_t size)
524{
525 struct device *d = dev_from_vm(vm);
526 u64 iova;
527 DEFINE_DMA_ATTRS(attrs);
528 struct page **pages = (struct page **)handle;
529
530 gk20a_dbg_fn("");
531 BUG_ON(sgt == NULL);
532
533 iova = sg_dma_address(sgt->sgl);
534
535 gk20a_free_sgtable(&sgt);
536
537 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
538 dma_free_attrs(d, size, pages, iova, &attrs);
539 pages = NULL;
540 iova = 0;
541}
542
543static int map_gmmu_pages(void *handle, struct sg_table *sgt,
544 void **kva, size_t size)
545{
546 int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
547 struct page **pages = (struct page **)handle;
548 gk20a_dbg_fn("");
549
550 *kva = vmap(pages, count, 0, pgprot_dmacoherent(PAGE_KERNEL));
551 if (!(*kva))
552 return -ENOMEM;
553
554 return 0;
555}
556
557static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
558{
559 gk20a_dbg_fn("");
560 vunmap(va);
561}
562#endif
563
564/* allocate a phys contig region big enough for a full
565 * sized gmmu page table for the given gmmu_page_size.
566 * the whole range is zeroed so it's "invalid"/will fault
567 */
568
569static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
570 enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
571 struct page_table_gk20a *pte)
572{
573 int err;
574 u32 pte_order;
575 void *handle = NULL;
576 struct sg_table *sgt;
577 size_t size;
578
579 gk20a_dbg_fn("");
580
581 /* allocate enough pages for the table */
582 pte_order = vm->mm->page_table_sizing[gmmu_pgsz_idx].order;
583
584 err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt, &size);
585 if (err)
586 return err;
587
588 gk20a_dbg(gpu_dbg_pte, "pte = 0x%p, addr=%08llx, size %d",
589 pte, gk20a_mm_iova_addr(sgt->sgl), pte_order);
590
591 pte->ref = handle;
592 pte->sgt = sgt;
593 pte->size = size;
594
595 return 0;
596}
597
598/* given address range (inclusive) determine the pdes crossed */
599static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm,
600 u64 addr_lo, u64 addr_hi,
601 u32 *pde_lo, u32 *pde_hi)
602{
603 *pde_lo = (u32)(addr_lo >> vm->mm->pde_stride_shift);
604 *pde_hi = (u32)(addr_hi >> vm->mm->pde_stride_shift);
605 gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d",
606 addr_lo, addr_hi, vm->mm->pde_stride_shift);
607 gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d",
608 *pde_lo, *pde_hi);
609}
610
611static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i)
612{
613 return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v());
614}
615
616static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm,
617 u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
618{
619 u32 ret;
620 /* mask off pde part */
621 addr = addr & ((((u64)1) << vm->mm->pde_stride_shift) - ((u64)1));
622 /* shift over to get pte index. note assumption that pte index
623 * doesn't leak over into the high 32b */
624 ret = (u32)(addr >> gmmu_page_shifts[pgsz_idx]);
625
626 gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret);
627 return ret;
628}
629
630static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page,
631 u32 *pte_offset)
632{
633 /* ptes are 8B regardless of pagesize */
634 /* pte space pages are 4KB. so 512 ptes per 4KB page*/
635 *pte_page = i >> 9;
636
637 /* this offset is a pte offset, not a byte offset */
638 *pte_offset = i & ((1<<9)-1);
639
640 gk20a_dbg(gpu_dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x",
641 i, *pte_page, *pte_offset);
642}
643
644
645/*
646 * given a pde index/page table number make sure it has
647 * backing store and if not go ahead allocate it and
648 * record it in the appropriate pde
649 */
650static int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
651 u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
652{
653 int err;
654 struct page_table_gk20a *pte =
655 vm->pdes.ptes[gmmu_pgsz_idx] + i;
656
657 gk20a_dbg_fn("");
658
659 /* if it's already in place it's valid */
660 if (pte->ref)
661 return 0;
662
663 gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d",
664 gmmu_page_sizes[gmmu_pgsz_idx]/1024, i);
665
666 err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte);
667 if (err)
668 return err;
669
670 /* rewrite pde */
671 update_gmmu_pde_locked(vm, i);
672
673 return 0;
674}
675
676static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm,
677 u64 addr)
678{
679 struct vm_reserved_va_node *va_node;
680 list_for_each_entry(va_node, &vm->reserved_va_list, reserved_va_list)
681 if (addr >= va_node->vaddr_start &&
682 addr < (u64)va_node->vaddr_start + (u64)va_node->size)
683 return va_node;
684
685 return NULL;
686}
687
688int gk20a_vm_get_buffers(struct vm_gk20a *vm,
689 struct mapped_buffer_node ***mapped_buffers,
690 int *num_buffers)
691{
692 struct mapped_buffer_node *mapped_buffer;
693 struct mapped_buffer_node **buffer_list;
694 struct rb_node *node;
695 int i = 0;
696
697 mutex_lock(&vm->update_gmmu_lock);
698
699 buffer_list = kzalloc(sizeof(*buffer_list) *
700 vm->num_user_mapped_buffers, GFP_KERNEL);
701 if (!buffer_list) {
702 mutex_unlock(&vm->update_gmmu_lock);
703 return -ENOMEM;
704 }
705
706 node = rb_first(&vm->mapped_buffers);
707 while (node) {
708 mapped_buffer =
709 container_of(node, struct mapped_buffer_node, node);
710 if (mapped_buffer->user_mapped) {
711 buffer_list[i] = mapped_buffer;
712 kref_get(&mapped_buffer->ref);
713 i++;
714 }
715 node = rb_next(&mapped_buffer->node);
716 }
717
718 BUG_ON(i != vm->num_user_mapped_buffers);
719
720 *num_buffers = vm->num_user_mapped_buffers;
721 *mapped_buffers = buffer_list;
722
723 mutex_unlock(&vm->update_gmmu_lock);
724
725 return 0;
726}
727
728static void gk20a_vm_unmap_locked_kref(struct kref *ref)
729{
730 struct mapped_buffer_node *mapped_buffer =
731 container_of(ref, struct mapped_buffer_node, ref);
732 gk20a_vm_unmap_locked(mapped_buffer);
733}
734
735void gk20a_vm_put_buffers(struct vm_gk20a *vm,
736 struct mapped_buffer_node **mapped_buffers,
737 int num_buffers)
738{
739 int i;
740
741 mutex_lock(&vm->update_gmmu_lock);
742
743 for (i = 0; i < num_buffers; ++i)
744 kref_put(&mapped_buffers[i]->ref,
745 gk20a_vm_unmap_locked_kref);
746
747 mutex_unlock(&vm->update_gmmu_lock);
748
749 kfree(mapped_buffers);
750}
751
752static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
753{
754 struct device *d = dev_from_vm(vm);
755 int retries;
756 struct mapped_buffer_node *mapped_buffer;
757
758 mutex_lock(&vm->update_gmmu_lock);
759
760 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
761 if (!mapped_buffer) {
762 mutex_unlock(&vm->update_gmmu_lock);
763 gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
764 return;
765 }
766
767 if (mapped_buffer->flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
768 mutex_unlock(&vm->update_gmmu_lock);
769
770 retries = 1000;
771 while (retries) {
772 if (atomic_read(&mapped_buffer->ref.refcount) == 1)
773 break;
774 retries--;
775 udelay(50);
776 }
777 if (!retries)
778 gk20a_err(d, "sync-unmap failed on 0x%llx",
779 offset);
780 mutex_lock(&vm->update_gmmu_lock);
781 }
782
783 mapped_buffer->user_mapped--;
784 if (mapped_buffer->user_mapped == 0)
785 vm->num_user_mapped_buffers--;
786 kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
787
788 mutex_unlock(&vm->update_gmmu_lock);
789}
790
791static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
792 u64 size,
793 enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
794
795{
796 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
797 int err;
798 u64 offset;
799 u32 start_page_nr = 0, num_pages;
800 u64 gmmu_page_size = gmmu_page_sizes[gmmu_pgsz_idx];
801
802 if (gmmu_pgsz_idx >= ARRAY_SIZE(gmmu_page_sizes)) {
803 dev_warn(dev_from_vm(vm),
804 "invalid page size requested in gk20a vm alloc");
805 return -EINVAL;
806 }
807
808 if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) {
809 dev_warn(dev_from_vm(vm),
810 "unsupportd page size requested");
811 return -EINVAL;
812
813 }
814
815 /* be certain we round up to gmmu_page_size if needed */
816 /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
817 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
818
819 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
820 gmmu_page_sizes[gmmu_pgsz_idx]>>10);
821
822 /* The vma allocator represents page accounting. */
823 num_pages = size >> gmmu_page_shifts[gmmu_pgsz_idx];
824
825 err = vma->alloc(vma, &start_page_nr, num_pages);
826
827 if (err) {
828 gk20a_err(dev_from_vm(vm),
829 "%s oom: sz=0x%llx", vma->name, size);
830 return 0;
831 }
832
833 offset = (u64)start_page_nr << gmmu_page_shifts[gmmu_pgsz_idx];
834 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
835
836 return offset;
837}
838
839static int gk20a_vm_free_va(struct vm_gk20a *vm,
840 u64 offset, u64 size,
841 enum gmmu_pgsz_gk20a pgsz_idx)
842{
843 struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
844 u32 page_size = gmmu_page_sizes[pgsz_idx];
845 u32 page_shift = gmmu_page_shifts[pgsz_idx];
846 u32 start_page_nr, num_pages;
847 int err;
848
849 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
850 vma->name, offset, size);
851
852 start_page_nr = (u32)(offset >> page_shift);
853 num_pages = (u32)((size + page_size - 1) >> page_shift);
854
855 err = vma->free(vma, start_page_nr, num_pages);
856 if (err) {
857 gk20a_err(dev_from_vm(vm),
858 "not found: offset=0x%llx, sz=0x%llx",
859 offset, size);
860 }
861
862 return err;
863}
864
865static int insert_mapped_buffer(struct rb_root *root,
866 struct mapped_buffer_node *mapped_buffer)
867{
868 struct rb_node **new_node = &(root->rb_node), *parent = NULL;
869
870 /* Figure out where to put new node */
871 while (*new_node) {
872 struct mapped_buffer_node *cmp_with =
873 container_of(*new_node, struct mapped_buffer_node,
874 node);
875
876 parent = *new_node;
877
878 if (cmp_with->addr > mapped_buffer->addr) /* u64 cmp */
879 new_node = &((*new_node)->rb_left);
880 else if (cmp_with->addr != mapped_buffer->addr) /* u64 cmp */
881 new_node = &((*new_node)->rb_right);
882 else
883 return -EINVAL; /* no fair dup'ing */
884 }
885
886 /* Add new node and rebalance tree. */
887 rb_link_node(&mapped_buffer->node, parent, new_node);
888 rb_insert_color(&mapped_buffer->node, root);
889
890 return 0;
891}
892
893static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
894 struct rb_root *root, struct dma_buf *dmabuf,
895 u32 kind)
896{
897 struct rb_node *node = rb_first(root);
898 while (node) {
899 struct mapped_buffer_node *mapped_buffer =
900 container_of(node, struct mapped_buffer_node, node);
901 if (mapped_buffer->dmabuf == dmabuf &&
902 kind == mapped_buffer->kind)
903 return mapped_buffer;
904 node = rb_next(&mapped_buffer->node);
905 }
906 return 0;
907}
908
909static struct mapped_buffer_node *find_mapped_buffer_locked(
910 struct rb_root *root, u64 addr)
911{
912
913 struct rb_node *node = root->rb_node;
914 while (node) {
915 struct mapped_buffer_node *mapped_buffer =
916 container_of(node, struct mapped_buffer_node, node);
917 if (mapped_buffer->addr > addr) /* u64 cmp */
918 node = node->rb_left;
919 else if (mapped_buffer->addr != addr) /* u64 cmp */
920 node = node->rb_right;
921 else
922 return mapped_buffer;
923 }
924 return 0;
925}
926
927static struct mapped_buffer_node *find_mapped_buffer_range_locked(
928 struct rb_root *root, u64 addr)
929{
930 struct rb_node *node = root->rb_node;
931 while (node) {
932 struct mapped_buffer_node *m =
933 container_of(node, struct mapped_buffer_node, node);
934 if (m->addr <= addr && m->addr + m->size > addr)
935 return m;
936 else if (m->addr > addr) /* u64 cmp */
937 node = node->rb_left;
938 else
939 node = node->rb_right;
940 }
941 return 0;
942}
943
944#define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0]))
945
946struct buffer_attrs {
947 struct sg_table *sgt;
948 u64 size;
949 u64 align;
950 u32 ctag_offset;
951 u32 ctag_lines;
952 int pgsz_idx;
953 u8 kind_v;
954 u8 uc_kind_v;
955};
956
957static void gmmu_select_page_size(struct buffer_attrs *bfr)
958{
959 int i;
960 /* choose the biggest first (top->bottom) */
961 for (i = (gmmu_nr_page_sizes-1); i >= 0; i--)
962 if (!(gmmu_page_offset_masks[i] & bfr->align)) {
963 /* would like to add this too but nvmap returns the
964 * original requested size not the allocated size.
965 * (!(gmmu_page_offset_masks[i] & bfr->size)) */
966 bfr->pgsz_idx = i;
967 break;
968 }
969}
970
971static int setup_buffer_kind_and_compression(struct device *d,
972 u32 flags,
973 struct buffer_attrs *bfr,
974 enum gmmu_pgsz_gk20a pgsz_idx)
975{
976 bool kind_compressible;
977
978 if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))
979 bfr->kind_v = gmmu_pte_kind_pitch_v();
980
981 if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) {
982 gk20a_err(d, "kind 0x%x not supported", bfr->kind_v);
983 return -EINVAL;
984 }
985
986 bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
987 /* find a suitable uncompressed kind if it becomes necessary later */
988 kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);
989 if (kind_compressible) {
990 bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);
991 if (unlikely(bfr->uc_kind_v == gmmu_pte_kind_invalid_v())) {
992 /* shouldn't happen, but it is worth cross-checking */
993 gk20a_err(d, "comptag kind 0x%x can't be"
994 " downgraded to uncompressed kind",
995 bfr->kind_v);
996 return -EINVAL;
997 }
998 }
999 /* comptags only supported for suitable kinds, 128KB pagesize */
1000 if (unlikely(kind_compressible &&
1001 (gmmu_page_sizes[pgsz_idx] != 128*1024))) {
1002 /*
1003 gk20a_warn(d, "comptags specified"
1004 " but pagesize being used doesn't support it");*/
1005 /* it is safe to fall back to uncompressed as
1006 functionality is not harmed */
1007 bfr->kind_v = bfr->uc_kind_v;
1008 kind_compressible = false;
1009 }
1010 if (kind_compressible)
1011 bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >>
1012 COMP_TAG_LINE_SIZE_SHIFT;
1013 else
1014 bfr->ctag_lines = 0;
1015
1016 return 0;
1017}
1018
1019static int validate_fixed_buffer(struct vm_gk20a *vm,
1020 struct buffer_attrs *bfr,
1021 u64 map_offset)
1022{
1023 struct device *dev = dev_from_vm(vm);
1024 struct vm_reserved_va_node *va_node;
1025 struct mapped_buffer_node *buffer;
1026
1027 if (map_offset & gmmu_page_offset_masks[bfr->pgsz_idx]) {
1028 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
1029 map_offset);
1030 return -EINVAL;
1031 }
1032
1033 /* find the space reservation */
1034 va_node = addr_to_reservation(vm, map_offset);
1035 if (!va_node) {
1036 gk20a_warn(dev, "fixed offset mapping without space allocation");
1037 return -EINVAL;
1038 }
1039
1040 /* check that this mappings does not collide with existing
1041 * mappings by checking the overlapping area between the current
1042 * buffer and all other mapped buffers */
1043
1044 list_for_each_entry(buffer,
1045 &va_node->va_buffers_list, va_buffers_list) {
1046 s64 begin = max(buffer->addr, map_offset);
1047 s64 end = min(buffer->addr +
1048 buffer->size, map_offset + bfr->size);
1049 if (end - begin > 0) {
1050 gk20a_warn(dev, "overlapping buffer map requested");
1051 return -EINVAL;
1052 }
1053 }
1054
1055 return 0;
1056}
1057
1058static u64 __locked_gmmu_map(struct vm_gk20a *vm,
1059 u64 map_offset,
1060 struct sg_table *sgt,
1061 u64 size,
1062 int pgsz_idx,
1063 u8 kind_v,
1064 u32 ctag_offset,
1065 u32 flags,
1066 int rw_flag)
1067{
1068 int err = 0, i = 0;
1069 u32 pde_lo, pde_hi;
1070 struct device *d = dev_from_vm(vm);
1071
1072 /* Allocate (or validate when map_offset != 0) the virtual address. */
1073 if (!map_offset) {
1074 map_offset = gk20a_vm_alloc_va(vm, size,
1075 pgsz_idx);
1076 if (!map_offset) {
1077 gk20a_err(d, "failed to allocate va space");
1078 err = -ENOMEM;
1079 goto fail;
1080 }
1081 }
1082
1083 pde_range_from_vaddr_range(vm,
1084 map_offset,
1085 map_offset + size - 1,
1086 &pde_lo, &pde_hi);
1087
1088 /* mark the addr range valid (but with 0 phys addr, which will fault) */
1089 for (i = pde_lo; i <= pde_hi; i++) {
1090 err = validate_gmmu_page_table_gk20a_locked(vm, i,
1091 pgsz_idx);
1092 if (err) {
1093 gk20a_err(d, "failed to validate page table %d: %d",
1094 i, err);
1095 goto fail;
1096 }
1097 }
1098
1099 err = update_gmmu_ptes_locked(vm, pgsz_idx,
1100 sgt,
1101 map_offset, map_offset + size - 1,
1102 kind_v,
1103 ctag_offset,
1104 flags &
1105 NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1106 rw_flag);
1107 if (err) {
1108 gk20a_err(d, "failed to update ptes on map");
1109 goto fail;
1110 }
1111
1112 return map_offset;
1113 fail:
1114 gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
1115 return 0;
1116}
1117
1118static void __locked_gmmu_unmap(struct vm_gk20a *vm,
1119 u64 vaddr,
1120 u64 size,
1121 int pgsz_idx,
1122 bool va_allocated,
1123 int rw_flag)
1124{
1125 int err = 0;
1126 struct gk20a *g = gk20a_from_vm(vm);
1127
1128 if (va_allocated) {
1129 err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
1130 if (err) {
1131 dev_err(dev_from_vm(vm),
1132 "failed to free va");
1133 return;
1134 }
1135 }
1136
1137 /* unmap here needs to know the page size we assigned at mapping */
1138 err = update_gmmu_ptes_locked(vm,
1139 pgsz_idx,
1140 0, /* n/a for unmap */
1141 vaddr,
1142 vaddr + size - 1,
1143 0, 0, false /* n/a for unmap */,
1144 rw_flag);
1145 if (err)
1146 dev_err(dev_from_vm(vm),
1147 "failed to update gmmu ptes on unmap");
1148
1149 /* detect which if any pdes/ptes can now be released */
1150
1151 /* flush l2 so any dirty lines are written out *now*.
1152 * also as we could potentially be switching this buffer
1153 * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
1154 * some point in the future we need to invalidate l2. e.g. switching
1155 * from a render buffer unmap (here) to later using the same memory
1156 * for gmmu ptes. note the positioning of this relative to any smmu
1157 * unmapping (below). */
1158
1159 gk20a_mm_l2_flush(g, true);
1160}
1161
1162static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
1163 struct dma_buf *dmabuf,
1164 u64 offset_align,
1165 u32 flags,
1166 int kind,
1167 struct sg_table **sgt,
1168 bool user_mapped,
1169 int rw_flag)
1170{
1171 struct mapped_buffer_node *mapped_buffer = 0;
1172
1173 mapped_buffer =
1174 find_mapped_buffer_reverse_locked(&vm->mapped_buffers,
1175 dmabuf, kind);
1176 if (!mapped_buffer)
1177 return 0;
1178
1179 if (mapped_buffer->flags != flags)
1180 return 0;
1181
1182 if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET &&
1183 mapped_buffer->addr != offset_align)
1184 return 0;
1185
1186 BUG_ON(mapped_buffer->vm != vm);
1187
1188 /* mark the buffer as used */
1189 if (user_mapped) {
1190 if (mapped_buffer->user_mapped == 0)
1191 vm->num_user_mapped_buffers++;
1192 mapped_buffer->user_mapped++;
1193
1194 /* If the mapping comes from user space, we own
1195 * the handle ref. Since we reuse an
1196 * existing mapping here, we need to give back those
1197 * refs once in order not to leak.
1198 */
1199 if (mapped_buffer->own_mem_ref)
1200 dma_buf_put(mapped_buffer->dmabuf);
1201 else
1202 mapped_buffer->own_mem_ref = true;
1203 }
1204 kref_get(&mapped_buffer->ref);
1205
1206 gk20a_dbg(gpu_dbg_map,
1207 "reusing as=%d pgsz=%d flags=0x%x ctags=%d "
1208 "start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x "
1209 "own_mem_ref=%d user_mapped=%d",
1210 vm_aspace_id(vm), mapped_buffer->pgsz_idx,
1211 mapped_buffer->flags,
1212 mapped_buffer->ctag_lines,
1213 mapped_buffer->ctag_offset,
1214 hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
1215 hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1216 lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1217 hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1218 lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1219 mapped_buffer->own_mem_ref, user_mapped);
1220
1221 if (sgt)
1222 *sgt = mapped_buffer->sgt;
1223 return mapped_buffer->addr;
1224}
1225
1226u64 gk20a_vm_map(struct vm_gk20a *vm,
1227 struct dma_buf *dmabuf,
1228 u64 offset_align,
1229 u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/,
1230 int kind,
1231 struct sg_table **sgt,
1232 bool user_mapped,
1233 int rw_flag)
1234{
1235 struct gk20a *g = gk20a_from_vm(vm);
1236 struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags;
1237 struct device *d = dev_from_vm(vm);
1238 struct mapped_buffer_node *mapped_buffer = 0;
1239 bool inserted = false, va_allocated = false;
1240 u32 gmmu_page_size = 0;
1241 u64 map_offset = 0;
1242 int err = 0;
1243 struct buffer_attrs bfr = {0};
1244 struct gk20a_comptags comptags;
1245
1246 mutex_lock(&vm->update_gmmu_lock);
1247
1248 /* check if this buffer is already mapped */
1249 map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align,
1250 flags, kind, sgt,
1251 user_mapped, rw_flag);
1252 if (map_offset) {
1253 mutex_unlock(&vm->update_gmmu_lock);
1254 return map_offset;
1255 }
1256
1257 /* pin buffer to get phys/iovmm addr */
1258 bfr.sgt = gk20a_mm_pin(d, dmabuf);
1259 if (IS_ERR(bfr.sgt)) {
1260 /* Falling back to physical is actually possible
1261 * here in many cases if we use 4K phys pages in the
1262 * gmmu. However we have some regions which require
1263 * contig regions to work properly (either phys-contig
1264 * or contig through smmu io_vaspace). Until we can
1265 * track the difference between those two cases we have
1266 * to fail the mapping when we run out of SMMU space.
1267 */
1268 gk20a_warn(d, "oom allocating tracking buffer");
1269 goto clean_up;
1270 }
1271
1272 if (sgt)
1273 *sgt = bfr.sgt;
1274
1275 bfr.kind_v = kind;
1276 bfr.size = dmabuf->size;
1277 bfr.align = 1 << __ffs((u64)sg_dma_address(bfr.sgt->sgl));
1278 bfr.pgsz_idx = -1;
1279
1280 /* If FIX_OFFSET is set, pgsz is determined. Otherwise, select
1281 * page size according to memory alignment */
1282 if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1283 bfr.pgsz_idx = NV_GMMU_VA_IS_UPPER(offset_align) ?
1284 gmmu_page_size_big : gmmu_page_size_small;
1285 } else {
1286 gmmu_select_page_size(&bfr);
1287 }
1288
1289 /* validate/adjust bfr attributes */
1290 if (unlikely(bfr.pgsz_idx == -1)) {
1291 gk20a_err(d, "unsupported page size detected");
1292 goto clean_up;
1293 }
1294
1295 if (unlikely(bfr.pgsz_idx < gmmu_page_size_small ||
1296 bfr.pgsz_idx > gmmu_page_size_big)) {
1297 BUG_ON(1);
1298 err = -EINVAL;
1299 goto clean_up;
1300 }
1301 gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx];
1302
1303 /* Check if we should use a fixed offset for mapping this buffer */
1304 if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1305 err = validate_fixed_buffer(vm, &bfr, offset_align);
1306 if (err)
1307 goto clean_up;
1308
1309 map_offset = offset_align;
1310 va_allocated = false;
1311 } else
1312 va_allocated = true;
1313
1314 if (sgt)
1315 *sgt = bfr.sgt;
1316
1317 err = setup_buffer_kind_and_compression(d, flags, &bfr, bfr.pgsz_idx);
1318 if (unlikely(err)) {
1319 gk20a_err(d, "failure setting up kind and compression");
1320 goto clean_up;
1321 }
1322
1323 /* bar1 and pmu vm don't need ctag */
1324 if (!vm->enable_ctag)
1325 bfr.ctag_lines = 0;
1326
1327 gk20a_get_comptags(d, dmabuf, &comptags);
1328
1329 if (bfr.ctag_lines && !comptags.lines) {
1330 /* allocate compression resources if needed */
1331 err = gk20a_alloc_comptags(d, dmabuf, ctag_allocator,
1332 bfr.ctag_lines);
1333 if (err) {
1334 /* ok to fall back here if we ran out */
1335 /* TBD: we can partially alloc ctags as well... */
1336 bfr.ctag_lines = bfr.ctag_offset = 0;
1337 bfr.kind_v = bfr.uc_kind_v;
1338 } else {
1339 gk20a_get_comptags(d, dmabuf, &comptags);
1340
1341 /* init/clear the ctag buffer */
1342 g->ops.ltc.clear_comptags(g,
1343 comptags.offset,
1344 comptags.offset + comptags.lines - 1);
1345 }
1346 }
1347
1348 /* store the comptag info */
1349 bfr.ctag_offset = comptags.offset;
1350
1351 /* update gmmu ptes */
1352 map_offset = __locked_gmmu_map(vm, map_offset,
1353 bfr.sgt,
1354 bfr.size,
1355 bfr.pgsz_idx,
1356 bfr.kind_v,
1357 bfr.ctag_offset,
1358 flags, rw_flag);
1359 if (!map_offset)
1360 goto clean_up;
1361
1362 gk20a_dbg(gpu_dbg_map,
1363 "as=%d pgsz=%d "
1364 "kind=0x%x kind_uc=0x%x flags=0x%x "
1365 "ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x",
1366 vm_aspace_id(vm), gmmu_page_size,
1367 bfr.kind_v, bfr.uc_kind_v, flags,
1368 bfr.ctag_lines, bfr.ctag_offset,
1369 hi32(map_offset), lo32(map_offset),
1370 hi32((u64)sg_dma_address(bfr.sgt->sgl)),
1371 lo32((u64)sg_dma_address(bfr.sgt->sgl)),
1372 hi32((u64)sg_phys(bfr.sgt->sgl)),
1373 lo32((u64)sg_phys(bfr.sgt->sgl)));
1374
1375#if defined(NVHOST_DEBUG)
1376 {
1377 int i;
1378 struct scatterlist *sg = NULL;
1379 gk20a_dbg(gpu_dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)");
1380 for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) {
1381 u64 da = sg_dma_address(sg);
1382 u64 pa = sg_phys(sg);
1383 u64 len = sg->length;
1384 gk20a_dbg(gpu_dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x",
1385 i, hi32(pa), lo32(pa), hi32(da), lo32(da),
1386 hi32(len), lo32(len));
1387 }
1388 }
1389#endif
1390
1391 /* keep track of the buffer for unmapping */
1392 /* TBD: check for multiple mapping of same buffer */
1393 mapped_buffer = kzalloc(sizeof(*mapped_buffer), GFP_KERNEL);
1394 if (!mapped_buffer) {
1395 gk20a_warn(d, "oom allocating tracking buffer");
1396 goto clean_up;
1397 }
1398 mapped_buffer->dmabuf = dmabuf;
1399 mapped_buffer->sgt = bfr.sgt;
1400 mapped_buffer->addr = map_offset;
1401 mapped_buffer->size = bfr.size;
1402 mapped_buffer->pgsz_idx = bfr.pgsz_idx;
1403 mapped_buffer->ctag_offset = bfr.ctag_offset;
1404 mapped_buffer->ctag_lines = bfr.ctag_lines;
1405 mapped_buffer->vm = vm;
1406 mapped_buffer->flags = flags;
1407 mapped_buffer->kind = kind;
1408 mapped_buffer->va_allocated = va_allocated;
1409 mapped_buffer->user_mapped = user_mapped ? 1 : 0;
1410 mapped_buffer->own_mem_ref = user_mapped;
1411 INIT_LIST_HEAD(&mapped_buffer->unmap_list);
1412 INIT_LIST_HEAD(&mapped_buffer->va_buffers_list);
1413 kref_init(&mapped_buffer->ref);
1414
1415 err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer);
1416 if (err) {
1417 gk20a_err(d, "failed to insert into mapped buffer tree");
1418 goto clean_up;
1419 }
1420 inserted = true;
1421 if (user_mapped)
1422 vm->num_user_mapped_buffers++;
1423
1424 gk20a_dbg_info("allocated va @ 0x%llx", map_offset);
1425
1426 if (!va_allocated) {
1427 struct vm_reserved_va_node *va_node;
1428
1429 /* find the space reservation */
1430 va_node = addr_to_reservation(vm, map_offset);
1431 list_add_tail(&mapped_buffer->va_buffers_list,
1432 &va_node->va_buffers_list);
1433 mapped_buffer->va_node = va_node;
1434 }
1435
1436 mutex_unlock(&vm->update_gmmu_lock);
1437
1438 /* Invalidate kernel mappings immediately */
1439 if (vm_aspace_id(vm) == -1)
1440 gk20a_mm_tlb_invalidate(vm);
1441
1442 return map_offset;
1443
1444clean_up:
1445 if (inserted) {
1446 rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
1447 if (user_mapped)
1448 vm->num_user_mapped_buffers--;
1449 }
1450 kfree(mapped_buffer);
1451 if (va_allocated)
1452 gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
1453 if (!IS_ERR(bfr.sgt))
1454 gk20a_mm_unpin(d, dmabuf, bfr.sgt);
1455
1456 mutex_unlock(&vm->update_gmmu_lock);
1457 gk20a_dbg_info("err=%d\n", err);
1458 return 0;
1459}
1460
1461u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1462 struct sg_table **sgt,
1463 u64 size,
1464 u32 flags,
1465 int rw_flag)
1466{
1467 u64 vaddr;
1468
1469 mutex_lock(&vm->update_gmmu_lock);
1470 vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */
1471 *sgt, /* sg table */
1472 size,
1473 0, /* page size index = 0 i.e. SZ_4K */
1474 0, /* kind */
1475 0, /* ctag_offset */
1476 flags, rw_flag);
1477 mutex_unlock(&vm->update_gmmu_lock);
1478 if (!vaddr) {
1479 gk20a_err(dev_from_vm(vm), "failed to allocate va space");
1480 return 0;
1481 }
1482
1483 /* Invalidate kernel mappings immediately */
1484 gk20a_mm_tlb_invalidate(vm);
1485
1486 return vaddr;
1487}
1488
1489void gk20a_gmmu_unmap(struct vm_gk20a *vm,
1490 u64 vaddr,
1491 u64 size,
1492 int rw_flag)
1493{
1494 mutex_lock(&vm->update_gmmu_lock);
1495 __locked_gmmu_unmap(vm,
1496 vaddr,
1497 size,
1498 0, /* page size 4K */
1499 true, /*va_allocated */
1500 rw_flag);
1501 mutex_unlock(&vm->update_gmmu_lock);
1502}
1503
1504phys_addr_t gk20a_get_phys_from_iova(struct device *d,
1505 u64 dma_addr)
1506{
1507 phys_addr_t phys;
1508 u64 iova;
1509
1510 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
1511 if (!mapping)
1512 return dma_addr;
1513
1514 iova = dma_addr & PAGE_MASK;
1515 phys = iommu_iova_to_phys(mapping->domain, iova);
1516 return phys;
1517}
1518
1519/* get sg_table from already allocated buffer */
1520int gk20a_get_sgtable(struct device *d, struct sg_table **sgt,
1521 void *cpuva, u64 iova,
1522 size_t size)
1523{
1524 int err = 0;
1525 *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1526 if (!(*sgt)) {
1527 dev_err(d, "failed to allocate memory\n");
1528 err = -ENOMEM;
1529 goto fail;
1530 }
1531 err = dma_get_sgtable(d, *sgt,
1532 cpuva, iova,
1533 size);
1534 if (err) {
1535 dev_err(d, "failed to create sg table\n");
1536 goto fail;
1537 }
1538 sg_dma_address((*sgt)->sgl) = iova;
1539
1540 return 0;
1541 fail:
1542 if (*sgt) {
1543 kfree(*sgt);
1544 *sgt = NULL;
1545 }
1546 return err;
1547}
1548
1549int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt,
1550 struct page **pages, u64 iova,
1551 size_t size)
1552{
1553 int err = 0;
1554 *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1555 if (!(*sgt)) {
1556 dev_err(d, "failed to allocate memory\n");
1557 err = -ENOMEM;
1558 goto fail;
1559 }
1560 err = sg_alloc_table(*sgt, 1, GFP_KERNEL);
1561 if (err) {
1562 dev_err(d, "failed to allocate sg_table\n");
1563 goto fail;
1564 }
1565 sg_set_page((*sgt)->sgl, *pages, size, 0);
1566 sg_dma_address((*sgt)->sgl) = iova;
1567
1568 return 0;
1569 fail:
1570 if (*sgt) {
1571 kfree(*sgt);
1572 *sgt = NULL;
1573 }
1574 return err;
1575}
1576
1577void gk20a_free_sgtable(struct sg_table **sgt)
1578{
1579 sg_free_table(*sgt);
1580 kfree(*sgt);
1581 *sgt = NULL;
1582}
1583
1584u64 gk20a_mm_iova_addr(struct scatterlist *sgl)
1585{
1586 u64 result = sg_phys(sgl);
1587#ifdef CONFIG_TEGRA_IOMMU_SMMU
1588 if (sg_dma_address(sgl) == DMA_ERROR_CODE)
1589 result = 0;
1590 else if (sg_dma_address(sgl)) {
1591 result = sg_dma_address(sgl) |
1592 1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT;
1593 }
1594#endif
1595 return result;
1596}
1597
1598static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1599 enum gmmu_pgsz_gk20a pgsz_idx,
1600 struct sg_table *sgt,
1601 u64 first_vaddr, u64 last_vaddr,
1602 u8 kind_v, u32 ctag_offset,
1603 bool cacheable,
1604 int rw_flag)
1605{
1606 int err;
1607 u32 pde_lo, pde_hi, pde_i;
1608 struct scatterlist *cur_chunk;
1609 unsigned int cur_offset;
1610 u32 pte_w[2] = {0, 0}; /* invalid pte */
1611 u32 ctag = ctag_offset;
1612 u32 ctag_incr;
1613 u32 page_size = gmmu_page_sizes[pgsz_idx];
1614 u64 addr = 0;
1615
1616 pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
1617 &pde_lo, &pde_hi);
1618
1619 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
1620 pgsz_idx, pde_lo, pde_hi);
1621
1622 /* If ctag_offset !=0 add 1 else add 0. The idea is to avoid a branch
1623 * below (per-pte). Note: this doesn't work unless page size (when
1624 * comptags are active) is 128KB. We have checks elsewhere for that. */
1625 ctag_incr = !!ctag_offset;
1626
1627 if (sgt)
1628 cur_chunk = sgt->sgl;
1629 else
1630 cur_chunk = NULL;
1631
1632 cur_offset = 0;
1633
1634 for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
1635 u32 pte_lo, pte_hi;
1636 u32 pte_cur;
1637 void *pte_kv_cur;
1638
1639 struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
1640
1641 if (pde_i == pde_lo)
1642 pte_lo = pte_index_from_vaddr(vm, first_vaddr,
1643 pgsz_idx);
1644 else
1645 pte_lo = 0;
1646
1647 if ((pde_i != pde_hi) && (pde_hi != pde_lo))
1648 pte_hi = vm->mm->page_table_sizing[pgsz_idx].num_ptes-1;
1649 else
1650 pte_hi = pte_index_from_vaddr(vm, last_vaddr,
1651 pgsz_idx);
1652
1653 /* get cpu access to the ptes */
1654 err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur,
1655 pte->size);
1656 if (err) {
1657 gk20a_err(dev_from_vm(vm),
1658 "couldn't map ptes for update as=%d pte_ref_cnt=%d",
1659 vm_aspace_id(vm), pte->ref_cnt);
1660 goto clean_up;
1661 }
1662
1663 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
1664 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
1665
1666 if (likely(sgt)) {
1667 u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
1668 if (new_addr) {
1669 addr = new_addr;
1670 addr += cur_offset;
1671 }
1672
1673 pte_w[0] = gmmu_pte_valid_true_f() |
1674 gmmu_pte_address_sys_f(addr
1675 >> gmmu_pte_address_shift_v());
1676 pte_w[1] = gmmu_pte_aperture_video_memory_f() |
1677 gmmu_pte_kind_f(kind_v) |
1678 gmmu_pte_comptagline_f(ctag);
1679
1680 if (rw_flag == gk20a_mem_flag_read_only) {
1681 pte_w[0] |= gmmu_pte_read_only_true_f();
1682 pte_w[1] |=
1683 gmmu_pte_write_disable_true_f();
1684 } else if (rw_flag ==
1685 gk20a_mem_flag_write_only) {
1686 pte_w[1] |=
1687 gmmu_pte_read_disable_true_f();
1688 }
1689
1690 if (!cacheable)
1691 pte_w[1] |= gmmu_pte_vol_true_f();
1692
1693 pte->ref_cnt++;
1694
1695 gk20a_dbg(gpu_dbg_pte,
1696 "pte_cur=%d addr=0x%x,%08x kind=%d"
1697 " ctag=%d vol=%d refs=%d"
1698 " [0x%08x,0x%08x]",
1699 pte_cur, hi32(addr), lo32(addr),
1700 kind_v, ctag, !cacheable,
1701 pte->ref_cnt, pte_w[1], pte_w[0]);
1702
1703 ctag += ctag_incr;
1704 cur_offset += page_size;
1705 addr += page_size;
1706 while (cur_chunk &&
1707 cur_offset >= cur_chunk->length) {
1708 cur_offset -= cur_chunk->length;
1709 cur_chunk = sg_next(cur_chunk);
1710 }
1711
1712 } else {
1713 pte->ref_cnt--;
1714 gk20a_dbg(gpu_dbg_pte,
1715 "pte_cur=%d ref=%d [0x0,0x0]",
1716 pte_cur, pte->ref_cnt);
1717 }
1718
1719 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
1720 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]);
1721 }
1722
1723 unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
1724
1725 if (pte->ref_cnt == 0) {
1726 /* It can make sense to keep around one page table for
1727 * each flavor (empty)... in case a new map is coming
1728 * right back to alloc (and fill it in) again.
1729 * But: deferring unmapping should help with pathologic
1730 * unmap/map/unmap/map cases where we'd trigger pte
1731 * free/alloc/free/alloc.
1732 */
1733 free_gmmu_pages(vm, pte->ref, pte->sgt,
1734 vm->mm->page_table_sizing[pgsz_idx].order,
1735 pte->size);
1736 pte->ref = NULL;
1737
1738 /* rewrite pde */
1739 update_gmmu_pde_locked(vm, pde_i);
1740 }
1741
1742 }
1743
1744 smp_mb();
1745 vm->tlb_dirty = true;
1746 gk20a_dbg_fn("set tlb dirty");
1747
1748 return 0;
1749
1750clean_up:
1751 /*TBD: potentially rewrite above to pre-map everything it needs to
1752 * as that's the only way it can fail */
1753 return err;
1754
1755}
1756
1757
1758/* for gk20a the "video memory" apertures here are misnomers. */
1759static inline u32 big_valid_pde0_bits(u64 pte_addr)
1760{
1761 u32 pde0_bits =
1762 gmmu_pde_aperture_big_video_memory_f() |
1763 gmmu_pde_address_big_sys_f(
1764 (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1765 return pde0_bits;
1766}
1767static inline u32 small_valid_pde1_bits(u64 pte_addr)
1768{
1769 u32 pde1_bits =
1770 gmmu_pde_aperture_small_video_memory_f() |
1771 gmmu_pde_vol_small_true_f() | /* tbd: why? */
1772 gmmu_pde_address_small_sys_f(
1773 (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1774 return pde1_bits;
1775}
1776
1777/* Given the current state of the ptes associated with a pde,
1778 determine value and write it out. There's no checking
1779 here to determine whether or not a change was actually
1780 made. So, superfluous updates will cause unnecessary
1781 pde invalidations.
1782*/
1783static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1784{
1785 bool small_valid, big_valid;
1786 u64 pte_addr[2] = {0, 0};
1787 struct page_table_gk20a *small_pte =
1788 vm->pdes.ptes[gmmu_page_size_small] + i;
1789 struct page_table_gk20a *big_pte =
1790 vm->pdes.ptes[gmmu_page_size_big] + i;
1791 u32 pde_v[2] = {0, 0};
1792 u32 *pde;
1793
1794 small_valid = small_pte && small_pte->ref;
1795 big_valid = big_pte && big_pte->ref;
1796
1797 if (small_valid)
1798 pte_addr[gmmu_page_size_small] =
1799 gk20a_mm_iova_addr(small_pte->sgt->sgl);
1800 if (big_valid)
1801 pte_addr[gmmu_page_size_big] =
1802 gk20a_mm_iova_addr(big_pte->sgt->sgl);
1803
1804 pde_v[0] = gmmu_pde_size_full_f();
1805 pde_v[0] |= big_valid ?
1806 big_valid_pde0_bits(pte_addr[gmmu_page_size_big])
1807 :
1808 (gmmu_pde_aperture_big_invalid_f());
1809
1810 pde_v[1] |= (small_valid ?
1811 small_valid_pde1_bits(pte_addr[gmmu_page_size_small])
1812 :
1813 (gmmu_pde_aperture_small_invalid_f() |
1814 gmmu_pde_vol_small_false_f())
1815 )
1816 |
1817 (big_valid ? (gmmu_pde_vol_big_true_f()) :
1818 gmmu_pde_vol_big_false_f());
1819
1820 pde = pde_from_index(vm, i);
1821
1822 gk20a_mem_wr32(pde, 0, pde_v[0]);
1823 gk20a_mem_wr32(pde, 1, pde_v[1]);
1824
1825 smp_mb();
1826
1827 FLUSH_CPU_DCACHE(pde,
1828 sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()),
1829 sizeof(u32)*2);
1830
1831 gk20a_mm_l2_invalidate(vm->mm->g);
1832
1833 gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
1834
1835 vm->tlb_dirty = true;
1836}
1837
1838
1839static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1840 u32 num_pages, u32 pgsz_idx)
1841{
1842 struct mm_gk20a *mm = vm->mm;
1843 struct gk20a *g = mm->g;
1844 u32 pgsz = gmmu_page_sizes[pgsz_idx];
1845 u32 i;
1846 dma_addr_t iova;
1847
1848 /* allocate the zero page if the va does not already have one */
1849 if (!vm->zero_page_cpuva) {
1850 int err = 0;
1851 vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev,
1852 mm->big_page_size,
1853 &iova,
1854 GFP_KERNEL);
1855 if (!vm->zero_page_cpuva) {
1856 dev_err(&g->dev->dev, "failed to allocate zero page\n");
1857 return -ENOMEM;
1858 }
1859
1860 vm->zero_page_iova = iova;
1861 err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt,
1862 vm->zero_page_cpuva, vm->zero_page_iova,
1863 mm->big_page_size);
1864 if (err) {
1865 dma_free_coherent(&g->dev->dev, mm->big_page_size,
1866 vm->zero_page_cpuva,
1867 vm->zero_page_iova);
1868 vm->zero_page_iova = 0;
1869 vm->zero_page_cpuva = NULL;
1870
1871 dev_err(&g->dev->dev, "failed to create sg table for zero page\n");
1872 return -ENOMEM;
1873 }
1874 }
1875
1876 for (i = 0; i < num_pages; i++) {
1877 u64 page_vaddr = __locked_gmmu_map(vm, vaddr,
1878 vm->zero_page_sgt, pgsz, pgsz_idx, 0, 0,
1879 NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
1880 gk20a_mem_flag_none);
1881
1882 if (!page_vaddr) {
1883 gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!");
1884 goto err_unmap;
1885 }
1886 vaddr += pgsz;
1887 }
1888
1889 gk20a_mm_l2_flush(mm->g, true);
1890
1891 return 0;
1892
1893err_unmap:
1894
1895 WARN_ON(1);
1896 /* something went wrong. unmap pages */
1897 while (i--) {
1898 vaddr -= pgsz;
1899 __locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
1900 gk20a_mem_flag_none);
1901 }
1902
1903 return -EINVAL;
1904}
1905
1906/* NOTE! mapped_buffers lock must be held */
1907static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
1908{
1909 struct vm_gk20a *vm = mapped_buffer->vm;
1910
1911 if (mapped_buffer->va_node &&
1912 mapped_buffer->va_node->sparse) {
1913 u64 vaddr = mapped_buffer->addr;
1914 u32 pgsz_idx = mapped_buffer->pgsz_idx;
1915 u32 num_pages = mapped_buffer->size >>
1916 gmmu_page_shifts[pgsz_idx];
1917
1918 /* there is little we can do if this fails... */
1919 gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
1920
1921 } else
1922 __locked_gmmu_unmap(vm,
1923 mapped_buffer->addr,
1924 mapped_buffer->size,
1925 mapped_buffer->pgsz_idx,
1926 mapped_buffer->va_allocated,
1927 gk20a_mem_flag_none);
1928
1929 gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d",
1930 vm_aspace_id(vm), gmmu_page_sizes[mapped_buffer->pgsz_idx],
1931 hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
1932 mapped_buffer->own_mem_ref);
1933
1934 gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf,
1935 mapped_buffer->sgt);
1936
1937 /* remove from mapped buffer tree and remove list, free */
1938 rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
1939 if (!list_empty(&mapped_buffer->va_buffers_list))
1940 list_del(&mapped_buffer->va_buffers_list);
1941
1942 /* keep track of mapped buffers */
1943 if (mapped_buffer->user_mapped)
1944 vm->num_user_mapped_buffers--;
1945
1946 if (mapped_buffer->own_mem_ref)
1947 dma_buf_put(mapped_buffer->dmabuf);
1948
1949 kfree(mapped_buffer);
1950
1951 return;
1952}
1953
1954void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
1955{
1956 struct device *d = dev_from_vm(vm);
1957 struct mapped_buffer_node *mapped_buffer;
1958
1959 mutex_lock(&vm->update_gmmu_lock);
1960 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
1961 if (!mapped_buffer) {
1962 mutex_unlock(&vm->update_gmmu_lock);
1963 gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
1964 return;
1965 }
1966 kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
1967 mutex_unlock(&vm->update_gmmu_lock);
1968}
1969
1970static void gk20a_vm_remove_support(struct vm_gk20a *vm)
1971{
1972 struct gk20a *g = vm->mm->g;
1973 struct mapped_buffer_node *mapped_buffer;
1974 struct vm_reserved_va_node *va_node, *va_node_tmp;
1975 struct rb_node *node;
1976
1977 gk20a_dbg_fn("");
1978 mutex_lock(&vm->update_gmmu_lock);
1979
1980 /* TBD: add a flag here for the unmap code to recognize teardown
1981 * and short-circuit any otherwise expensive operations. */
1982
1983 node = rb_first(&vm->mapped_buffers);
1984 while (node) {
1985 mapped_buffer =
1986 container_of(node, struct mapped_buffer_node, node);
1987 gk20a_vm_unmap_locked(mapped_buffer);
1988 node = rb_first(&vm->mapped_buffers);
1989 }
1990
1991 /* destroy remaining reserved memory areas */
1992 list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
1993 reserved_va_list) {
1994 list_del(&va_node->reserved_va_list);
1995 kfree(va_node);
1996 }
1997
1998 /* TBD: unmapping all buffers above may not actually free
1999 * all vm ptes. jettison them here for certain... */
2000
2001 unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
2002 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, vm->pdes.size);
2003
2004 kfree(vm->pdes.ptes[gmmu_page_size_small]);
2005 kfree(vm->pdes.ptes[gmmu_page_size_big]);
2006 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
2007 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
2008
2009 mutex_unlock(&vm->update_gmmu_lock);
2010
2011 /* release zero page if used */
2012 if (vm->zero_page_cpuva)
2013 dma_free_coherent(&g->dev->dev, vm->mm->big_page_size,
2014 vm->zero_page_cpuva, vm->zero_page_iova);
2015
2016 /* vm is not used anymore. release it. */
2017 kfree(vm);
2018}
2019
2020static void gk20a_vm_remove_support_kref(struct kref *ref)
2021{
2022 struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
2023 gk20a_vm_remove_support(vm);
2024}
2025
2026void gk20a_vm_get(struct vm_gk20a *vm)
2027{
2028 kref_get(&vm->ref);
2029}
2030
2031void gk20a_vm_put(struct vm_gk20a *vm)
2032{
2033 kref_put(&vm->ref, gk20a_vm_remove_support_kref);
2034}
2035
2036/* address space interfaces for the gk20a module */
2037int gk20a_vm_alloc_share(struct gk20a_as_share *as_share)
2038{
2039 struct gk20a_as *as = as_share->as;
2040 struct gk20a *g = gk20a_from_as(as);
2041 struct mm_gk20a *mm = &g->mm;
2042 struct vm_gk20a *vm;
2043 u64 vma_size;
2044 u32 num_pages, low_hole_pages;
2045 char name[32];
2046 int err;
2047
2048 gk20a_dbg_fn("");
2049
2050 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
2051 if (!vm)
2052 return -ENOMEM;
2053
2054 as_share->vm = vm;
2055
2056 vm->mm = mm;
2057 vm->as_share = as_share;
2058
2059 vm->big_pages = true;
2060
2061 vm->va_start = mm->pde_stride; /* create a one pde hole */
2062 vm->va_limit = mm->channel.size; /* note this means channel.size is
2063 really just the max */
2064 {
2065 u32 pde_lo, pde_hi;
2066 pde_range_from_vaddr_range(vm,
2067 0, vm->va_limit-1,
2068 &pde_lo, &pde_hi);
2069 vm->pdes.num_pdes = pde_hi + 1;
2070 }
2071
2072 vm->pdes.ptes[gmmu_page_size_small] =
2073 kzalloc(sizeof(struct page_table_gk20a) *
2074 vm->pdes.num_pdes, GFP_KERNEL);
2075
2076 vm->pdes.ptes[gmmu_page_size_big] =
2077 kzalloc(sizeof(struct page_table_gk20a) *
2078 vm->pdes.num_pdes, GFP_KERNEL);
2079
2080 if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2081 vm->pdes.ptes[gmmu_page_size_big]))
2082 return -ENOMEM;
2083
2084 gk20a_dbg_info("init space for va_limit=0x%llx num_pdes=%d",
2085 vm->va_limit, vm->pdes.num_pdes);
2086
2087 /* allocate the page table directory */
2088 err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2089 &vm->pdes.sgt, &vm->pdes.size);
2090 if (err)
2091 return -ENOMEM;
2092
2093 err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2094 vm->pdes.size);
2095 if (err) {
2096 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2097 vm->pdes.size);
2098 return -ENOMEM;
2099 }
2100 gk20a_dbg(gpu_dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx",
2101 vm->pdes.kv,
2102 gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2103 /* we could release vm->pdes.kv but it's only one page... */
2104
2105
2106 /* low-half: alloc small pages */
2107 /* high-half: alloc big pages */
2108 vma_size = mm->channel.size >> 1;
2109
2110 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
2111 gmmu_page_sizes[gmmu_page_size_small]>>10);
2112 num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
2113
2114 /* num_pages above is without regard to the low-side hole. */
2115 low_hole_pages = (vm->va_start >>
2116 gmmu_page_shifts[gmmu_page_size_small]);
2117
2118 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name,
2119 low_hole_pages, /* start */
2120 num_pages - low_hole_pages, /* length */
2121 1); /* align */
2122
2123 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
2124 gmmu_page_sizes[gmmu_page_size_big]>>10);
2125
2126 num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
2127 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name,
2128 num_pages, /* start */
2129 num_pages, /* length */
2130 1); /* align */
2131
2132 vm->mapped_buffers = RB_ROOT;
2133
2134 mutex_init(&vm->update_gmmu_lock);
2135 kref_init(&vm->ref);
2136 INIT_LIST_HEAD(&vm->reserved_va_list);
2137
2138 vm->enable_ctag = true;
2139
2140 return 0;
2141}
2142
2143
2144int gk20a_vm_release_share(struct gk20a_as_share *as_share)
2145{
2146 struct vm_gk20a *vm = as_share->vm;
2147
2148 gk20a_dbg_fn("");
2149
2150 vm->as_share = NULL;
2151
2152 /* put as reference to vm */
2153 gk20a_vm_put(vm);
2154
2155 as_share->vm = NULL;
2156
2157 return 0;
2158}
2159
2160
2161int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2162 struct nvhost_as_alloc_space_args *args)
2163
2164{ int err = -ENOMEM;
2165 int pgsz_idx;
2166 u32 start_page_nr;
2167 struct gk20a_allocator *vma;
2168 struct vm_gk20a *vm = as_share->vm;
2169 struct vm_reserved_va_node *va_node;
2170 u64 vaddr_start = 0;
2171
2172 gk20a_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx",
2173 args->flags, args->page_size, args->pages,
2174 args->o_a.offset);
2175
2176 /* determine pagesz idx */
2177 for (pgsz_idx = gmmu_page_size_small;
2178 pgsz_idx < gmmu_nr_page_sizes;
2179 pgsz_idx++) {
2180 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
2181 break;
2182 }
2183
2184 if (pgsz_idx >= gmmu_nr_page_sizes) {
2185 err = -EINVAL;
2186 goto clean_up;
2187 }
2188
2189 va_node = kzalloc(sizeof(*va_node), GFP_KERNEL);
2190 if (!va_node) {
2191 err = -ENOMEM;
2192 goto clean_up;
2193 }
2194
2195 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE &&
2196 pgsz_idx != gmmu_page_size_big) {
2197 err = -ENOSYS;
2198 kfree(va_node);
2199 goto clean_up;
2200 }
2201
2202 start_page_nr = 0;
2203 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
2204 start_page_nr = (u32)(args->o_a.offset >>
2205 gmmu_page_shifts[pgsz_idx]);
2206
2207 vma = &vm->vma[pgsz_idx];
2208 err = vma->alloc(vma, &start_page_nr, args->pages);
2209 if (err) {
2210 kfree(va_node);
2211 goto clean_up;
2212 }
2213
2214 vaddr_start = (u64)start_page_nr << gmmu_page_shifts[pgsz_idx];
2215
2216 va_node->vaddr_start = vaddr_start;
2217 va_node->size = (u64)args->page_size * (u64)args->pages;
2218 va_node->pgsz_idx = args->page_size;
2219 INIT_LIST_HEAD(&va_node->va_buffers_list);
2220 INIT_LIST_HEAD(&va_node->reserved_va_list);
2221
2222 mutex_lock(&vm->update_gmmu_lock);
2223
2224 /* mark that we need to use sparse mappings here */
2225 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
2226 err = gk20a_vm_put_empty(vm, vaddr_start, args->pages,
2227 pgsz_idx);
2228 if (err) {
2229 mutex_unlock(&vm->update_gmmu_lock);
2230 vma->free(vma, start_page_nr, args->pages);
2231 kfree(va_node);
2232 goto clean_up;
2233 }
2234
2235 va_node->sparse = true;
2236 }
2237
2238 list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list);
2239
2240 mutex_unlock(&vm->update_gmmu_lock);
2241
2242 args->o_a.offset = vaddr_start;
2243
2244clean_up:
2245 return err;
2246}
2247
2248int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2249 struct nvhost_as_free_space_args *args)
2250{
2251 int err = -ENOMEM;
2252 int pgsz_idx;
2253 u32 start_page_nr;
2254 struct gk20a_allocator *vma;
2255 struct vm_gk20a *vm = as_share->vm;
2256 struct vm_reserved_va_node *va_node;
2257
2258 gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
2259 args->pages, args->offset);
2260
2261 /* determine pagesz idx */
2262 for (pgsz_idx = gmmu_page_size_small;
2263 pgsz_idx < gmmu_nr_page_sizes;
2264 pgsz_idx++) {
2265 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
2266 break;
2267 }
2268
2269 if (pgsz_idx >= gmmu_nr_page_sizes) {
2270 err = -EINVAL;
2271 goto clean_up;
2272 }
2273
2274 start_page_nr = (u32)(args->offset >>
2275 gmmu_page_shifts[pgsz_idx]);
2276
2277 vma = &vm->vma[pgsz_idx];
2278 err = vma->free(vma, start_page_nr, args->pages);
2279
2280 if (err)
2281 goto clean_up;
2282
2283 mutex_lock(&vm->update_gmmu_lock);
2284 va_node = addr_to_reservation(vm, args->offset);
2285 if (va_node) {
2286 struct mapped_buffer_node *buffer;
2287
2288 /* there is no need to unallocate the buffers in va. Just
2289 * convert them into normal buffers */
2290
2291 list_for_each_entry(buffer,
2292 &va_node->va_buffers_list, va_buffers_list)
2293 list_del_init(&buffer->va_buffers_list);
2294
2295 list_del(&va_node->reserved_va_list);
2296
2297 /* if this was a sparse mapping, free the va */
2298 if (va_node->sparse)
2299 __locked_gmmu_unmap(vm,
2300 va_node->vaddr_start,
2301 va_node->size,
2302 va_node->pgsz_idx,
2303 false,
2304 gk20a_mem_flag_none);
2305 kfree(va_node);
2306 }
2307 mutex_unlock(&vm->update_gmmu_lock);
2308
2309clean_up:
2310 return err;
2311}
2312
2313int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
2314 struct channel_gk20a *ch)
2315{
2316 int err = 0;
2317 struct vm_gk20a *vm = as_share->vm;
2318
2319 gk20a_dbg_fn("");
2320
2321 ch->vm = vm;
2322 err = channel_gk20a_commit_va(ch);
2323 if (err)
2324 ch->vm = 0;
2325
2326 return err;
2327}
2328
2329int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
2330{
2331 struct gk20a_dmabuf_priv *priv;
2332 static DEFINE_MUTEX(priv_lock);
2333
2334 priv = dma_buf_get_drvdata(dmabuf, dev);
2335 if (likely(priv))
2336 return 0;
2337
2338 mutex_lock(&priv_lock);
2339 priv = dma_buf_get_drvdata(dmabuf, dev);
2340 if (priv)
2341 goto priv_exist_or_err;
2342 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
2343 if (!priv) {
2344 priv = ERR_PTR(-ENOMEM);
2345 goto priv_exist_or_err;
2346 }
2347 mutex_init(&priv->lock);
2348 dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv);
2349priv_exist_or_err:
2350 mutex_unlock(&priv_lock);
2351 if (IS_ERR(priv))
2352 return -ENOMEM;
2353
2354 return 0;
2355}
2356
2357
2358static int gk20a_dmabuf_get_kind(struct dma_buf *dmabuf)
2359{
2360 int kind = 0;
2361#ifdef CONFIG_TEGRA_NVMAP
2362 int err;
2363 u64 nvmap_param;
2364
2365 err = nvmap_get_dmabuf_param(dmabuf, NVMAP_HANDLE_PARAM_KIND,
2366 &nvmap_param);
2367 kind = err ? kind : nvmap_param;
2368#endif
2369 return kind;
2370}
2371
2372int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
2373 int dmabuf_fd,
2374 u64 *offset_align,
2375 u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/
2376 int kind)
2377{
2378 int err = 0;
2379 struct vm_gk20a *vm = as_share->vm;
2380 struct dma_buf *dmabuf;
2381 u64 ret_va;
2382
2383 gk20a_dbg_fn("");
2384
2385 /* get ref to the mem handle (released on unmap_locked) */
2386 dmabuf = dma_buf_get(dmabuf_fd);
2387 if (!dmabuf)
2388 return 0;
2389
2390 err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
2391 if (err) {
2392 dma_buf_put(dmabuf);
2393 return err;
2394 }
2395
2396 if (kind == -1)
2397 kind = gk20a_dmabuf_get_kind(dmabuf);
2398
2399 ret_va = gk20a_vm_map(vm, dmabuf, *offset_align,
2400 flags, kind, NULL, true,
2401 gk20a_mem_flag_none);
2402 *offset_align = ret_va;
2403 if (!ret_va) {
2404 dma_buf_put(dmabuf);
2405 err = -EINVAL;
2406 }
2407
2408 return err;
2409}
2410
2411int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset)
2412{
2413 struct vm_gk20a *vm = as_share->vm;
2414
2415 gk20a_dbg_fn("");
2416
2417 gk20a_vm_unmap_user(vm, offset);
2418 return 0;
2419}
2420
2421int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2422{
2423 int err;
2424 phys_addr_t inst_pa;
2425 void *inst_ptr;
2426 struct vm_gk20a *vm = &mm->bar1.vm;
2427 struct gk20a *g = gk20a_from_mm(mm);
2428 struct device *d = dev_from_gk20a(g);
2429 struct inst_desc *inst_block = &mm->bar1.inst_block;
2430 u64 pde_addr;
2431 u32 pde_addr_lo;
2432 u32 pde_addr_hi;
2433 dma_addr_t iova;
2434
2435 vm->mm = mm;
2436
2437 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
2438
2439 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
2440
2441 vm->va_start = mm->pde_stride * 1;
2442 vm->va_limit = mm->bar1.aperture_size;
2443
2444 {
2445 u32 pde_lo, pde_hi;
2446 pde_range_from_vaddr_range(vm,
2447 0, vm->va_limit-1,
2448 &pde_lo, &pde_hi);
2449 vm->pdes.num_pdes = pde_hi + 1;
2450 }
2451
2452 /* bar1 is likely only to ever use/need small page sizes. */
2453 /* But just in case, for now... arrange for both.*/
2454 vm->pdes.ptes[gmmu_page_size_small] =
2455 kzalloc(sizeof(struct page_table_gk20a) *
2456 vm->pdes.num_pdes, GFP_KERNEL);
2457
2458 vm->pdes.ptes[gmmu_page_size_big] =
2459 kzalloc(sizeof(struct page_table_gk20a) *
2460 vm->pdes.num_pdes, GFP_KERNEL);
2461
2462 if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2463 vm->pdes.ptes[gmmu_page_size_big]))
2464 return -ENOMEM;
2465
2466 gk20a_dbg_info("init space for bar1 va_limit=0x%llx num_pdes=%d",
2467 vm->va_limit, vm->pdes.num_pdes);
2468
2469
2470 /* allocate the page table directory */
2471 err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2472 &vm->pdes.sgt, &vm->pdes.size);
2473 if (err)
2474 goto clean_up;
2475
2476 err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2477 vm->pdes.size);
2478 if (err) {
2479 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2480 vm->pdes.size);
2481 goto clean_up;
2482 }
2483 gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx",
2484 vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2485 /* we could release vm->pdes.kv but it's only one page... */
2486
2487 pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2488 pde_addr_lo = u64_lo32(pde_addr >> 12);
2489 pde_addr_hi = u64_hi32(pde_addr);
2490
2491 gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2492 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl),
2493 pde_addr_lo, pde_addr_hi);
2494
2495 /* allocate instance mem for bar1 */
2496 inst_block->size = ram_in_alloc_size_v();
2497 inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2498 &iova, GFP_KERNEL);
2499 if (!inst_block->cpuva) {
2500 gk20a_err(d, "%s: memory allocation failed\n", __func__);
2501 err = -ENOMEM;
2502 goto clean_up;
2503 }
2504
2505 inst_block->iova = iova;
2506 inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
2507 if (!inst_block->cpu_pa) {
2508 gk20a_err(d, "%s: failed to get phys address\n", __func__);
2509 err = -ENOMEM;
2510 goto clean_up;
2511 }
2512
2513 inst_pa = inst_block->cpu_pa;
2514 inst_ptr = inst_block->cpuva;
2515
2516 gk20a_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p",
2517 (u64)inst_pa, inst_ptr);
2518
2519 memset(inst_ptr, 0, ram_fc_size_val_v());
2520
2521 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2522 ram_in_page_dir_base_target_vid_mem_f() |
2523 ram_in_page_dir_base_vol_true_f() |
2524 ram_in_page_dir_base_lo_f(pde_addr_lo));
2525
2526 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
2527 ram_in_page_dir_base_hi_f(pde_addr_hi));
2528
2529 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
2530 u64_lo32(vm->va_limit) | 0xFFF);
2531
2532 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2533 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2534
2535 gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa);
2536 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_bar1",
2537 1,/*start*/
2538 (vm->va_limit >> 12) - 1 /* length*/,
2539 1); /* align */
2540 /* initialize just in case we try to use it anyway */
2541 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_bar1-unused",
2542 0x0badc0de, /* start */
2543 1, /* length */
2544 1); /* align */
2545
2546 vm->mapped_buffers = RB_ROOT;
2547
2548 mutex_init(&vm->update_gmmu_lock);
2549 kref_init(&vm->ref);
2550 INIT_LIST_HEAD(&vm->reserved_va_list);
2551
2552 return 0;
2553
2554clean_up:
2555 /* free, etc */
2556 if (inst_block->cpuva)
2557 dma_free_coherent(d, inst_block->size,
2558 inst_block->cpuva, inst_block->iova);
2559 inst_block->cpuva = NULL;
2560 inst_block->iova = 0;
2561 return err;
2562}
2563
2564/* pmu vm, share channel_vm interfaces */
2565int gk20a_init_pmu_vm(struct mm_gk20a *mm)
2566{
2567 int err;
2568 phys_addr_t inst_pa;
2569 void *inst_ptr;
2570 struct vm_gk20a *vm = &mm->pmu.vm;
2571 struct gk20a *g = gk20a_from_mm(mm);
2572 struct device *d = dev_from_gk20a(g);
2573 struct inst_desc *inst_block = &mm->pmu.inst_block;
2574 u64 pde_addr;
2575 u32 pde_addr_lo;
2576 u32 pde_addr_hi;
2577 dma_addr_t iova;
2578
2579 vm->mm = mm;
2580
2581 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
2582
2583 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
2584
2585 vm->va_start = GK20A_PMU_VA_START;
2586 vm->va_limit = vm->va_start + mm->pmu.aperture_size;
2587
2588 {
2589 u32 pde_lo, pde_hi;
2590 pde_range_from_vaddr_range(vm,
2591 0, vm->va_limit-1,
2592 &pde_lo, &pde_hi);
2593 vm->pdes.num_pdes = pde_hi + 1;
2594 }
2595
2596 /* The pmu is likely only to ever use/need small page sizes. */
2597 /* But just in case, for now... arrange for both.*/
2598 vm->pdes.ptes[gmmu_page_size_small] =
2599 kzalloc(sizeof(struct page_table_gk20a) *
2600 vm->pdes.num_pdes, GFP_KERNEL);
2601
2602 vm->pdes.ptes[gmmu_page_size_big] =
2603 kzalloc(sizeof(struct page_table_gk20a) *
2604 vm->pdes.num_pdes, GFP_KERNEL);
2605
2606 if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2607 vm->pdes.ptes[gmmu_page_size_big]))
2608 return -ENOMEM;
2609
2610 gk20a_dbg_info("init space for pmu va_limit=0x%llx num_pdes=%d",
2611 vm->va_limit, vm->pdes.num_pdes);
2612
2613 /* allocate the page table directory */
2614 err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2615 &vm->pdes.sgt, &vm->pdes.size);
2616 if (err)
2617 goto clean_up;
2618
2619 err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2620 vm->pdes.size);
2621 if (err) {
2622 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2623 vm->pdes.size);
2624 goto clean_up;
2625 }
2626 gk20a_dbg_info("pmu pdes phys @ 0x%llx",
2627 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2628 /* we could release vm->pdes.kv but it's only one page... */
2629
2630 pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2631 pde_addr_lo = u64_lo32(pde_addr >> 12);
2632 pde_addr_hi = u64_hi32(pde_addr);
2633
2634 gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2635 (u64)pde_addr, pde_addr_lo, pde_addr_hi);
2636
2637 /* allocate instance mem for pmu */
2638 inst_block->size = GK20A_PMU_INST_SIZE;
2639 inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2640 &iova, GFP_KERNEL);
2641 if (!inst_block->cpuva) {
2642 gk20a_err(d, "%s: memory allocation failed\n", __func__);
2643 err = -ENOMEM;
2644 goto clean_up;
2645 }
2646
2647 inst_block->iova = iova;
2648 inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
2649 if (!inst_block->cpu_pa) {
2650 gk20a_err(d, "%s: failed to get phys address\n", __func__);
2651 err = -ENOMEM;
2652 goto clean_up;
2653 }
2654
2655 inst_pa = inst_block->cpu_pa;
2656 inst_ptr = inst_block->cpuva;
2657
2658 gk20a_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa);
2659
2660 memset(inst_ptr, 0, GK20A_PMU_INST_SIZE);
2661
2662 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2663 ram_in_page_dir_base_target_vid_mem_f() |
2664 ram_in_page_dir_base_vol_true_f() |
2665 ram_in_page_dir_base_lo_f(pde_addr_lo));
2666
2667 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
2668 ram_in_page_dir_base_hi_f(pde_addr_hi));
2669
2670 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
2671 u64_lo32(vm->va_limit) | 0xFFF);
2672
2673 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2674 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2675
2676 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_pmu",
2677 (vm->va_start >> 12), /* start */
2678 (vm->va_limit - vm->va_start) >> 12, /*length*/
2679 1); /* align */
2680 /* initialize just in case we try to use it anyway */
2681 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_pmu-unused",
2682 0x0badc0de, /* start */
2683 1, /* length */
2684 1); /* align */
2685
2686
2687 vm->mapped_buffers = RB_ROOT;
2688
2689 mutex_init(&vm->update_gmmu_lock);
2690 kref_init(&vm->ref);
2691 INIT_LIST_HEAD(&vm->reserved_va_list);
2692
2693 return 0;
2694
2695clean_up:
2696 /* free, etc */
2697 if (inst_block->cpuva)
2698 dma_free_coherent(d, inst_block->size,
2699 inst_block->cpuva, inst_block->iova);
2700 inst_block->cpuva = NULL;
2701 inst_block->iova = 0;
2702 return err;
2703}
2704
2705void gk20a_mm_fb_flush(struct gk20a *g)
2706{
2707 struct mm_gk20a *mm = &g->mm;
2708 u32 data;
2709 s32 retry = 100;
2710
2711 gk20a_dbg_fn("");
2712
2713 mutex_lock(&mm->l2_op_lock);
2714
2715 g->ops.ltc.elpg_flush(g);
2716
2717 /* Make sure all previous writes are committed to the L2. There's no
2718 guarantee that writes are to DRAM. This will be a sysmembar internal
2719 to the L2. */
2720 gk20a_writel(g, flush_fb_flush_r(),
2721 flush_fb_flush_pending_busy_f());
2722
2723 do {
2724 data = gk20a_readl(g, flush_fb_flush_r());
2725
2726 if (flush_fb_flush_outstanding_v(data) ==
2727 flush_fb_flush_outstanding_true_v() ||
2728 flush_fb_flush_pending_v(data) ==
2729 flush_fb_flush_pending_busy_v()) {
2730 gk20a_dbg_info("fb_flush 0x%x", data);
2731 retry--;
2732 usleep_range(20, 40);
2733 } else
2734 break;
2735 } while (retry >= 0 || !tegra_platform_is_silicon());
2736
2737 if (retry < 0)
2738 gk20a_warn(dev_from_gk20a(g),
2739 "fb_flush too many retries");
2740
2741 mutex_unlock(&mm->l2_op_lock);
2742}
2743
2744static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
2745{
2746 u32 data;
2747 s32 retry = 200;
2748
2749 /* Invalidate any clean lines from the L2 so subsequent reads go to
2750 DRAM. Dirty lines are not affected by this operation. */
2751 gk20a_writel(g, flush_l2_system_invalidate_r(),
2752 flush_l2_system_invalidate_pending_busy_f());
2753
2754 do {
2755 data = gk20a_readl(g, flush_l2_system_invalidate_r());
2756
2757 if (flush_l2_system_invalidate_outstanding_v(data) ==
2758 flush_l2_system_invalidate_outstanding_true_v() ||
2759 flush_l2_system_invalidate_pending_v(data) ==
2760 flush_l2_system_invalidate_pending_busy_v()) {
2761 gk20a_dbg_info("l2_system_invalidate 0x%x",
2762 data);
2763 retry--;
2764 usleep_range(20, 40);
2765 } else
2766 break;
2767 } while (retry >= 0 || !tegra_platform_is_silicon());
2768
2769 if (retry < 0)
2770 gk20a_warn(dev_from_gk20a(g),
2771 "l2_system_invalidate too many retries");
2772}
2773
2774void gk20a_mm_l2_invalidate(struct gk20a *g)
2775{
2776 struct mm_gk20a *mm = &g->mm;
2777 mutex_lock(&mm->l2_op_lock);
2778 gk20a_mm_l2_invalidate_locked(g);
2779 mutex_unlock(&mm->l2_op_lock);
2780}
2781
2782void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
2783{
2784 struct mm_gk20a *mm = &g->mm;
2785 u32 data;
2786 s32 retry = 200;
2787
2788 gk20a_dbg_fn("");
2789
2790 mutex_lock(&mm->l2_op_lock);
2791
2792 /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
2793 as clean, so subsequent reads might hit in the L2. */
2794 gk20a_writel(g, flush_l2_flush_dirty_r(),
2795 flush_l2_flush_dirty_pending_busy_f());
2796
2797 do {
2798 data = gk20a_readl(g, flush_l2_flush_dirty_r());
2799
2800 if (flush_l2_flush_dirty_outstanding_v(data) ==
2801 flush_l2_flush_dirty_outstanding_true_v() ||
2802 flush_l2_flush_dirty_pending_v(data) ==
2803 flush_l2_flush_dirty_pending_busy_v()) {
2804 gk20a_dbg_info("l2_flush_dirty 0x%x", data);
2805 retry--;
2806 usleep_range(20, 40);
2807 } else
2808 break;
2809 } while (retry >= 0 || !tegra_platform_is_silicon());
2810
2811 if (retry < 0)
2812 gk20a_warn(dev_from_gk20a(g),
2813 "l2_flush_dirty too many retries");
2814
2815 if (invalidate)
2816 gk20a_mm_l2_invalidate_locked(g);
2817
2818 mutex_unlock(&mm->l2_op_lock);
2819}
2820
2821
2822int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
2823 struct dma_buf **dmabuf,
2824 u64 *offset)
2825{
2826 struct mapped_buffer_node *mapped_buffer;
2827
2828 gk20a_dbg_fn("gpu_va=0x%llx", gpu_va);
2829
2830 mutex_lock(&vm->update_gmmu_lock);
2831
2832 mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers,
2833 gpu_va);
2834 if (!mapped_buffer) {
2835 mutex_unlock(&vm->update_gmmu_lock);
2836 return -EINVAL;
2837 }
2838
2839 *dmabuf = mapped_buffer->dmabuf;
2840 *offset = gpu_va - mapped_buffer->addr;
2841
2842 mutex_unlock(&vm->update_gmmu_lock);
2843
2844 return 0;
2845}
2846
2847void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
2848{
2849 struct mm_gk20a *mm = vm->mm;
2850 struct gk20a *g = gk20a_from_vm(vm);
2851 u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12);
2852 u32 data;
2853 s32 retry = 200;
2854
2855 gk20a_dbg_fn("");
2856
2857 /* pagetables are considered sw states which are preserved after
2858 prepare_poweroff. When gk20a deinit releases those pagetables,
2859 common code in vm unmap path calls tlb invalidate that touches
2860 hw. Use the power_on flag to skip tlb invalidation when gpu
2861 power is turned off */
2862
2863 if (!g->power_on)
2864 return;
2865
2866 /* No need to invalidate if tlb is clean */
2867 mutex_lock(&vm->update_gmmu_lock);
2868 if (!vm->tlb_dirty) {
2869 mutex_unlock(&vm->update_gmmu_lock);
2870 return;
2871 }
2872 vm->tlb_dirty = false;
2873 mutex_unlock(&vm->update_gmmu_lock);
2874
2875 mutex_lock(&mm->tlb_lock);
2876 do {
2877 data = gk20a_readl(g, fb_mmu_ctrl_r());
2878 if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0)
2879 break;
2880 usleep_range(20, 40);
2881 retry--;
2882 } while (retry >= 0 || !tegra_platform_is_silicon());
2883
2884 if (retry < 0)
2885 gk20a_warn(dev_from_gk20a(g),
2886 "wait mmu fifo space too many retries");
2887
2888 gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
2889 fb_mmu_invalidate_pdb_addr_f(addr_lo) |
2890 fb_mmu_invalidate_pdb_aperture_vid_mem_f());
2891
2892 /* this is a sledgehammer, it would seem */
2893 gk20a_writel(g, fb_mmu_invalidate_r(),
2894 fb_mmu_invalidate_all_pdb_true_f() |
2895 fb_mmu_invalidate_all_va_true_f() |
2896 fb_mmu_invalidate_trigger_true_f());
2897
2898 do {
2899 data = gk20a_readl(g, fb_mmu_ctrl_r());
2900 if (fb_mmu_ctrl_pri_fifo_empty_v(data) !=
2901 fb_mmu_ctrl_pri_fifo_empty_false_f())
2902 break;
2903 retry--;
2904 usleep_range(20, 40);
2905 } while (retry >= 0 || !tegra_platform_is_silicon());
2906
2907 if (retry < 0)
2908 gk20a_warn(dev_from_gk20a(g),
2909 "mmu invalidate too many retries");
2910
2911 mutex_unlock(&mm->tlb_lock);
2912}
2913
2914int gk20a_mm_suspend(struct gk20a *g)
2915{
2916 gk20a_dbg_fn("");
2917
2918 gk20a_mm_fb_flush(g);
2919 gk20a_mm_l2_flush(g, true);
2920
2921 gk20a_dbg_fn("done");
2922 return 0;
2923}
2924
2925void gk20a_mm_ltc_isr(struct gk20a *g)
2926{
2927 u32 intr;
2928
2929 intr = gk20a_readl(g, ltc_ltc0_ltss_intr_r());
2930 gk20a_err(dev_from_gk20a(g), "ltc: %08x\n", intr);
2931 gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
2932}
2933
2934bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
2935{
2936 u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
2937 return fb_mmu_debug_ctrl_debug_v(debug_ctrl) ==
2938 fb_mmu_debug_ctrl_debug_enabled_v();
2939}
2940
2941static int gk20a_mm_mmu_vpr_info_fetch_wait(struct gk20a *g,
2942 const unsigned int msec)
2943{
2944 unsigned long timeout;
2945
2946 timeout = jiffies + msecs_to_jiffies(msec);
2947 while (1) {
2948 u32 val;
2949
2950 val = gk20a_readl(g, fb_mmu_vpr_info_r());
2951 if (fb_mmu_vpr_info_fetch_v(val) ==
2952 fb_mmu_vpr_info_fetch_false_v())
2953 break;
2954
2955 if (tegra_platform_is_silicon() &&
2956 WARN_ON(time_after(jiffies, timeout)))
2957 return -ETIME;
2958 }
2959
2960 return 0;
2961}
2962
2963int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g)
2964{
2965 int ret = 0;
2966
2967 gk20a_busy_noresume(g->dev);
2968 if (!pm_runtime_active(&g->dev->dev))
2969 goto fail;
2970
2971 if (gk20a_mm_mmu_vpr_info_fetch_wait(g, 5)) {
2972 ret = -ETIME;
2973 goto fail;
2974 }
2975
2976 gk20a_writel(g, fb_mmu_vpr_info_r(),
2977 fb_mmu_vpr_info_fetch_true_v());
2978
2979 ret = gk20a_mm_mmu_vpr_info_fetch_wait(g, 5);
2980
2981 fail:
2982 gk20a_idle(g->dev);
2983 return ret;
2984}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
new file mode 100644
index 00000000..23d15c23
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -0,0 +1,464 @@
1/*
2 * drivers/video/tegra/host/gk20a/mm_gk20a.h
3 *
4 * GK20A memory management
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21#ifndef __MM_GK20A_H__
22#define __MM_GK20A_H__
23
24#include <linux/scatterlist.h>
25#include <linux/dma-attrs.h>
26#include <linux/iommu.h>
27#include <asm/dma-iommu.h>
28#include "gk20a_allocator.h"
29
30/* This "address bit" in the gmmu ptes (and other gk20a accesses)
31 * signals the address as presented should be translated by the SMMU.
32 * Without this bit present gk20a accesses are *not* translated.
33 */
34/* Hack, get this from manuals somehow... */
35#define NV_MC_SMMU_VADDR_TRANSLATION_BIT 34
36#define NV_MC_SMMU_VADDR_TRANSLATE(x) (x | \
37 (1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT))
38
39/* For now keep the size relatively small-ish compared to the full
40 * 40b va. 32GB for now. It consists of two 16GB spaces. */
41#define NV_GMMU_VA_RANGE 35ULL
42#define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1)))
43
44struct mem_desc {
45 struct dma_buf *ref;
46 struct sg_table *sgt;
47 u32 size;
48};
49
50struct mem_desc_sub {
51 u32 offset;
52 u32 size;
53};
54
55struct gpfifo_desc {
56 size_t size;
57 u32 entry_num;
58
59 u32 get;
60 u32 put;
61
62 bool wrap;
63
64 u64 iova;
65 struct gpfifo *cpu_va;
66 u64 gpu_va;
67};
68
69struct mmu_desc {
70 void *cpuva;
71 u64 iova;
72 size_t size;
73};
74
75struct inst_desc {
76 u64 iova;
77 void *cpuva;
78 phys_addr_t cpu_pa;
79 size_t size;
80};
81
82struct surface_mem_desc {
83 u64 iova;
84 void *cpuva;
85 struct sg_table *sgt;
86 size_t size;
87};
88
89struct userd_desc {
90 struct sg_table *sgt;
91 u64 iova;
92 void *cpuva;
93 size_t size;
94 u64 gpu_va;
95};
96
97struct runlist_mem_desc {
98 u64 iova;
99 void *cpuva;
100 size_t size;
101};
102
103struct patch_desc {
104 struct page **pages;
105 u64 iova;
106 size_t size;
107 void *cpu_va;
108 u64 gpu_va;
109 u32 data_count;
110};
111
112struct pmu_mem_desc {
113 void *cpuva;
114 u64 iova;
115 u64 pmu_va;
116 size_t size;
117};
118
119struct priv_cmd_queue_mem_desc {
120 u64 base_iova;
121 u32 *base_cpuva;
122 size_t size;
123};
124
125struct zcull_ctx_desc {
126 struct mem_desc mem;
127 u64 gpu_va;
128 u32 ctx_attr;
129 u32 ctx_sw_mode;
130};
131
132struct pm_ctx_desc {
133 struct mem_desc mem;
134 u64 gpu_va;
135 u32 ctx_attr;
136 u32 ctx_sw_mode;
137};
138
139struct gr_ctx_buffer_desc;
140struct platform_device;
141struct gr_ctx_buffer_desc {
142 void (*destroy)(struct platform_device *, struct gr_ctx_buffer_desc *);
143 struct sg_table *sgt;
144 struct page **pages;
145 size_t size;
146 u64 iova;
147 struct dma_attrs attrs;
148 void *priv;
149};
150
151struct gr_ctx_desc {
152 struct page **pages;
153 u64 iova;
154 size_t size;
155 u64 gpu_va;
156};
157
158struct compbit_store_desc {
159 struct pages **pages;
160 size_t size;
161 u64 base_iova;
162};
163
164struct page_table_gk20a {
165 /* backing for */
166 /* Either a *page or a *mem_handle */
167 void *ref;
168 /* track mapping cnt on this page table */
169 u32 ref_cnt;
170 struct sg_table *sgt;
171 size_t size;
172};
173
174#ifndef _NVHOST_MEM_MGR_H
175enum gk20a_mem_rw_flag {
176 gk20a_mem_flag_none = 0,
177 gk20a_mem_flag_read_only = 1,
178 gk20a_mem_flag_write_only = 2,
179};
180#endif
181
182enum gmmu_pgsz_gk20a {
183 gmmu_page_size_small = 0,
184 gmmu_page_size_big = 1,
185 gmmu_nr_page_sizes = 2
186};
187
188
189struct page_directory_gk20a {
190 /* backing for */
191 u32 num_pdes;
192 void *kv;
193 /* Either a *page or a *mem_handle */
194 void *ref;
195 struct sg_table *sgt;
196 size_t size;
197 struct page_table_gk20a *ptes[gmmu_nr_page_sizes];
198};
199
200struct priv_cmd_queue {
201 struct priv_cmd_queue_mem_desc mem;
202 u64 base_gpuva; /* gpu_va base */
203 u16 size; /* num of entries in words */
204 u16 put; /* put for priv cmd queue */
205 u16 get; /* get for priv cmd queue */
206 struct list_head free; /* list of pre-allocated free entries */
207 struct list_head head; /* list of used entries */
208};
209
210struct priv_cmd_entry {
211 u32 *ptr;
212 u64 gva;
213 u16 get; /* start of entry in queue */
214 u16 size; /* in words */
215 u32 gp_get; /* gp_get when submitting last priv cmd */
216 u32 gp_put; /* gp_put when submitting last priv cmd */
217 u32 gp_wrap; /* wrap when submitting last priv cmd */
218 bool pre_alloc; /* prealloc entry, free to free list */
219 struct list_head list; /* node for lists */
220};
221
222struct mapped_buffer_node {
223 struct vm_gk20a *vm;
224 struct rb_node node;
225 struct list_head unmap_list;
226 struct list_head va_buffers_list;
227 struct vm_reserved_va_node *va_node;
228 u64 addr;
229 u64 size;
230 struct dma_buf *dmabuf;
231 struct sg_table *sgt;
232 struct kref ref;
233 u32 user_mapped;
234 bool own_mem_ref;
235 u32 pgsz_idx;
236 u32 ctag_offset;
237 u32 ctag_lines;
238 u32 flags;
239 u32 kind;
240 bool va_allocated;
241};
242
243struct vm_reserved_va_node {
244 struct list_head reserved_va_list;
245 struct list_head va_buffers_list;
246 u32 pgsz_idx;
247 u64 vaddr_start;
248 u64 size;
249 bool sparse;
250};
251
252struct vm_gk20a {
253 struct mm_gk20a *mm;
254 struct gk20a_as_share *as_share; /* as_share this represents */
255
256 u64 va_start;
257 u64 va_limit;
258
259 int num_user_mapped_buffers;
260
261 bool big_pages; /* enable large page support */
262 bool enable_ctag;
263 bool tlb_dirty;
264 bool mapped;
265
266 struct kref ref;
267
268 struct mutex update_gmmu_lock;
269
270 struct page_directory_gk20a pdes;
271
272 struct gk20a_allocator vma[gmmu_nr_page_sizes];
273 struct rb_root mapped_buffers;
274
275 struct list_head reserved_va_list;
276
277 dma_addr_t zero_page_iova;
278 void *zero_page_cpuva;
279 struct sg_table *zero_page_sgt;
280};
281
282struct gk20a;
283struct channel_gk20a;
284
285int gk20a_init_mm_support(struct gk20a *g);
286int gk20a_init_mm_setup_sw(struct gk20a *g);
287int gk20a_init_bar1_vm(struct mm_gk20a *mm);
288int gk20a_init_pmu_vm(struct mm_gk20a *mm);
289
290void gk20a_mm_fb_flush(struct gk20a *g);
291void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
292void gk20a_mm_l2_invalidate(struct gk20a *g);
293
294struct mm_gk20a {
295 struct gk20a *g;
296
297 u32 compression_page_size;
298 u32 big_page_size;
299 u32 pde_stride;
300 u32 pde_stride_shift;
301
302 struct {
303 u32 order;
304 u32 num_ptes;
305 } page_table_sizing[gmmu_nr_page_sizes];
306
307
308 struct {
309 u64 size;
310 } channel;
311
312 struct {
313 u32 aperture_size;
314 struct vm_gk20a vm;
315 struct inst_desc inst_block;
316 } bar1;
317
318 struct {
319 u32 aperture_size;
320 struct vm_gk20a vm;
321 struct inst_desc inst_block;
322 } pmu;
323
324 struct mutex tlb_lock;
325 struct mutex l2_op_lock;
326
327 void (*remove_support)(struct mm_gk20a *mm);
328 bool sw_ready;
329#ifdef CONFIG_DEBUG_FS
330 u32 ltc_enabled;
331 u32 ltc_enabled_debug;
332#endif
333};
334
335int gk20a_mm_init(struct mm_gk20a *mm);
336
337#define gk20a_from_mm(mm) ((mm)->g)
338#define gk20a_from_vm(vm) ((vm)->mm->g)
339
340#define dev_from_vm(vm) dev_from_gk20a(vm->mm->g)
341
342#define DEFAULT_ALLOC_ALIGNMENT (4*1024)
343
344static inline int bar1_aperture_size_mb_gk20a(void)
345{
346 return 128; /*TBD read this from fuses?*/
347}
348/* max address bits */
349static inline int max_physaddr_bits_gk20a(void)
350{
351 return 40;/*"old" sys physaddr, meaningful? */
352}
353static inline int max_vid_physaddr_bits_gk20a(void)
354{
355 /* "vid phys" is asid/smmu phys?,
356 * i.e. is this the real sys physaddr? */
357 return 37;
358}
359static inline int max_vaddr_bits_gk20a(void)
360{
361 return 40; /* chopped for area? */
362}
363
364#if 0 /*related to addr bits above, concern below TBD on which is accurate */
365#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
366 bus_bar1_block_ptr_s())
367#else
368#define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v()
369#endif
370
371void gk20a_mm_dump_vm(struct vm_gk20a *vm,
372 u64 va_begin, u64 va_end, char *label);
373
374int gk20a_mm_suspend(struct gk20a *g);
375
376phys_addr_t gk20a_get_phys_from_iova(struct device *d,
377 u64 dma_addr);
378
379int gk20a_get_sgtable(struct device *d, struct sg_table **sgt,
380 void *cpuva, u64 iova,
381 size_t size);
382
383int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt,
384 struct page **pages, u64 iova,
385 size_t size);
386
387void gk20a_free_sgtable(struct sg_table **sgt);
388
389u64 gk20a_mm_iova_addr(struct scatterlist *sgl);
390
391void gk20a_mm_ltc_isr(struct gk20a *g);
392
393bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
394
395int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g);
396
397u64 gk20a_gmmu_map(struct vm_gk20a *vm,
398 struct sg_table **sgt,
399 u64 size,
400 u32 flags,
401 int rw_flag);
402
403void gk20a_gmmu_unmap(struct vm_gk20a *vm,
404 u64 vaddr,
405 u64 size,
406 int rw_flag);
407
408struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
409void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
410 struct sg_table *sgt);
411
412u64 gk20a_vm_map(struct vm_gk20a *vm,
413 struct dma_buf *dmabuf,
414 u64 offset_align,
415 u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/,
416 int kind,
417 struct sg_table **sgt,
418 bool user_mapped,
419 int rw_flag);
420
421/* unmap handle from kernel */
422void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
423
424/* get reference to all currently mapped buffers */
425int gk20a_vm_get_buffers(struct vm_gk20a *vm,
426 struct mapped_buffer_node ***mapped_buffers,
427 int *num_buffers);
428
429/* put references on the given buffers */
430void gk20a_vm_put_buffers(struct vm_gk20a *vm,
431 struct mapped_buffer_node **mapped_buffers,
432 int num_buffers);
433
434/* invalidate tlbs for the vm area */
435void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm);
436
437/* find buffer corresponding to va */
438int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
439 struct dma_buf **dmabuf,
440 u64 *offset);
441
442void gk20a_vm_get(struct vm_gk20a *vm);
443void gk20a_vm_put(struct vm_gk20a *vm);
444
445/* vm-as interface */
446struct nvhost_as_alloc_space_args;
447struct nvhost_as_free_space_args;
448int gk20a_vm_alloc_share(struct gk20a_as_share *as_share);
449int gk20a_vm_release_share(struct gk20a_as_share *as_share);
450int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
451 struct nvhost_as_alloc_space_args *args);
452int gk20a_vm_free_space(struct gk20a_as_share *as_share,
453 struct nvhost_as_free_space_args *args);
454int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
455 struct channel_gk20a *ch);
456int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
457 int dmabuf_fd,
458 u64 *offset_align,
459 u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/
460 int kind);
461int gk20a_vm_unmap_buffer(struct gk20a_as_share *, u64 offset);
462
463int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
464#endif /*_MM_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
new file mode 100644
index 00000000..09f348cb
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -0,0 +1,160 @@
1/*
2 * drivers/video/tegra/host/gk20a/soc/platform_gk20a.h
3 *
4 * GK20A Platform (SoC) Interface
5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#ifndef _GK20A_PLATFORM_H_
19#define _GK20A_PLATFORM_H_
20
21#include <linux/platform_device.h>
22#include <linux/pm_domain.h>
23
24struct gk20a;
25struct channel_gk20a;
26struct gr_ctx_buffer_desc;
27struct gk20a_scale_profile;
28
29struct gk20a_platform {
30#ifdef CONFIG_TEGRA_GK20A
31 u32 syncpt_base;
32#endif
33 /* Populated by the gk20a driver before probing the platform. */
34 struct gk20a *g;
35
36 /* Should be populated at probe. */
37 bool can_railgate;
38
39 /* Should be populated at probe. */
40 bool has_syncpoints;
41
42 /* Should be populated by probe. */
43 struct dentry *debugfs;
44
45 /* Clock configuration is stored here. Platform probe is responsible
46 * for filling this data. */
47 struct clk *clk[3];
48 int num_clks;
49
50 /* Delay before rail gated */
51 int railgate_delay;
52
53 /* Delay before clock gated */
54 int clockgate_delay;
55
56 /* Initialize the platform interface of the gk20a driver.
57 *
58 * The platform implementation of this function must
59 * - set the power and clocks of the gk20a device to a known
60 * state, and
61 * - populate the gk20a_platform structure (a pointer to the
62 * structure can be obtained by calling gk20a_get_platform).
63 *
64 * After this function is finished, the driver will initialise
65 * pm runtime and genpd based on the platform configuration.
66 */
67 int (*probe)(struct platform_device *dev);
68
69 /* Second stage initialisation - called once all power management
70 * initialisations are done.
71 */
72 int (*late_probe)(struct platform_device *dev);
73
74 /* Called before submitting work to the gpu. The platform may use this
75 * hook to ensure that any other hw modules that the gpu depends on are
76 * powered. The platform implementation must count refs to this call. */
77 int (*channel_busy)(struct platform_device *dev);
78
79 /* Called after the work on the gpu is completed. The platform may use
80 * this hook to release power refs to any other hw modules that the gpu
81 * depends on. The platform implementation must count refs to this
82 * call. */
83 void (*channel_idle)(struct platform_device *dev);
84
85 /* This function is called to allocate secure memory (memory that the
86 * CPU cannot see). The function should fill the context buffer
87 * descriptor (especially fields destroy, sgt, size).
88 */
89 int (*secure_alloc)(struct platform_device *dev,
90 struct gr_ctx_buffer_desc *desc,
91 size_t size);
92
93 /* Device is going to be suspended */
94 int (*suspend)(struct device *);
95
96 /* Called to turn off the device */
97 int (*railgate)(struct platform_device *dev);
98
99 /* Called to turn on the device */
100 int (*unrailgate)(struct platform_device *dev);
101
102 /* Postscale callback is called after frequency change */
103 void (*postscale)(struct platform_device *pdev,
104 unsigned long freq);
105
106 /* Pre callback is called before frequency change */
107 void (*prescale)(struct platform_device *pdev);
108
109 /* Devfreq governor name. If scaling is enabled, we request
110 * this governor to be used in scaling */
111 const char *devfreq_governor;
112
113 /* Quality of service id. If this is set, the scaling routines
114 * will register a callback to id. Each time we receive a new value,
115 * the postscale callback gets called. */
116 int qos_id;
117
118 /* Called as part of debug dump. If the gpu gets hung, this function
119 * is responsible for delivering all necessary debug data of other
120 * hw units which may interact with the gpu without direct supervision
121 * of the CPU.
122 */
123 void (*dump_platform_dependencies)(struct platform_device *dev);
124};
125
126static inline struct gk20a_platform *gk20a_get_platform(
127 struct platform_device *dev)
128{
129 return (struct gk20a_platform *)platform_get_drvdata(dev);
130}
131
132extern struct gk20a_platform gk20a_generic_platform;
133#ifdef CONFIG_TEGRA_GK20A
134extern struct gk20a_platform gk20a_tegra_platform;
135#endif
136
137static inline int gk20a_platform_channel_busy(struct platform_device *dev)
138{
139 struct gk20a_platform *p = gk20a_get_platform(dev);
140 int ret = 0;
141 if (p->channel_busy)
142 ret = p->channel_busy(dev);
143
144 return ret;
145}
146
147static inline void gk20a_platform_channel_idle(struct platform_device *dev)
148{
149 struct gk20a_platform *p = gk20a_get_platform(dev);
150 if (p->channel_idle)
151 p->channel_idle(dev);
152}
153
154static inline bool gk20a_platform_has_syncpoints(struct platform_device *dev)
155{
156 struct gk20a_platform *p = gk20a_get_platform(dev);
157 return p->has_syncpoints;
158}
159
160#endif
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c
new file mode 100644
index 00000000..7b750df6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c
@@ -0,0 +1,35 @@
1/*
2 * drivers/video/tegra/host/gk20a/platform_gk20a_generic.c
3 *
4 * GK20A Generic Platform Interface
5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#include "platform_gk20a.h"
22
23static int gk20a_generic_probe(struct platform_device *dev)
24{
25 struct gk20a_platform *platform = gk20a_get_platform(dev);
26
27 /* TODO: Initialize clocks and power */
28 (void)platform;
29
30 return 0;
31}
32
33struct gk20a_platform gk20a_generic_platform = {
34 .probe = gk20a_generic_probe,
35};
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
new file mode 100644
index 00000000..35658f31
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -0,0 +1,561 @@
1/*
2 * drivers/video/tegra/host/gk20a/platform_gk20a_tegra.c
3 *
4 * GK20A Tegra Platform Interface
5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#include <linux/debugfs.h>
19#include <linux/tegra-powergate.h>
20#include <linux/platform_data/tegra_edp.h>
21#include <linux/nvhost_ioctl.h>
22#include <linux/dma-buf.h>
23#include <linux/nvmap.h>
24#include <mach/irqs.h>
25#include <mach/pm_domains.h>
26
27#include "../../../arch/arm/mach-tegra/iomap.h"
28
29#include "gk20a.h"
30#include "hal_gk20a.h"
31#include "platform_gk20a.h"
32#include "gk20a_scale.h"
33
34#define TEGRA_GK20A_INTR INT_GPU
35#define TEGRA_GK20A_INTR_NONSTALL INT_GPU_NONSTALL
36
37#define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */
38#define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */
39
40extern struct device tegra_vpr_dev;
41struct gk20a_platform t132_gk20a_tegra_platform;
42
43struct gk20a_emc_params {
44 long emc_slope;
45 long emc_offset;
46 long emc_dip_slope;
47 long emc_dip_offset;
48 long emc_xmid;
49 bool linear;
50};
51
52/*
53 * 20.12 fixed point arithmetic
54 */
55
56static const int FXFRAC = 12;
57static const int FX_HALF = (1 << 12) / 2;
58
59#define INT_TO_FX(x) ((x) << FXFRAC)
60#define FX_TO_INT(x) ((x) >> FXFRAC)
61
62#define MHZ_TO_HZ(x) ((x) * 1000000)
63#define HZ_TO_MHZ(x) ((x) / 1000000)
64
65int FXMUL(int x, int y)
66{
67 return ((long long) x * (long long) y) >> FXFRAC;
68}
69
70int FXDIV(int x, int y)
71{
72 /* long long div operation not supported, must shift manually. This
73 * would have been
74 *
75 * return (((long long) x) << FXFRAC) / (long long) y;
76 */
77 int pos, t;
78 if (x == 0)
79 return 0;
80
81 /* find largest allowable right shift to numerator, limit to FXFRAC */
82 t = x < 0 ? -x : x;
83 pos = 31 - fls(t); /* fls can't be 32 if x != 0 */
84 if (pos > FXFRAC)
85 pos = FXFRAC;
86
87 y >>= FXFRAC - pos;
88 if (y == 0)
89 return 0x7FFFFFFF; /* overflow, return MAX_FIXED */
90
91 return (x << pos) / y;
92}
93
94static int gk20a_tegra_channel_busy(struct platform_device *dev)
95{
96 int ret = 0;
97
98 /* Explicitly turn on the host1x clocks
99 * - This is needed as host1x driver sets ignore_children = true
100 * to cater the use case of display clock ON but host1x clock OFF
101 * in OS-Idle-Display-ON case
102 * - This was easily done in ACM as it only checked the ref count
103 * of host1x (or any device for that matter) to be zero before
104 * turning off its clock
105 * - However, runtime PM checks to see if *ANY* child of device is
106 * in ACTIVE state and if yes, it doesn't suspend the parent. As a
107 * result of this, display && host1x clocks remains ON during
108 * OS-Idle-Display-ON case
109 * - The code below fixes this use-case
110 */
111 if (to_platform_device(dev->dev.parent))
112 ret = nvhost_module_busy_ext(
113 to_platform_device(dev->dev.parent));
114
115 return ret;
116}
117
118static void gk20a_tegra_channel_idle(struct platform_device *dev)
119{
120 /* Explicitly turn off the host1x clocks */
121 if (to_platform_device(dev->dev.parent))
122 nvhost_module_idle_ext(to_platform_device(dev->dev.parent));
123}
124
125static void gk20a_tegra_secure_destroy(struct platform_device *pdev,
126 struct gr_ctx_buffer_desc *desc)
127{
128 gk20a_free_sgtable(&desc->sgt);
129 dma_free_attrs(&tegra_vpr_dev, desc->size,
130 (void *)(uintptr_t)&desc->iova,
131 desc->iova, &desc->attrs);
132}
133
134static int gk20a_tegra_secure_alloc(struct platform_device *pdev,
135 struct gr_ctx_buffer_desc *desc,
136 size_t size)
137{
138 struct device *dev = &pdev->dev;
139 DEFINE_DMA_ATTRS(attrs);
140 dma_addr_t iova;
141 struct sg_table *sgt;
142 struct page *page;
143 int err = 0;
144
145 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
146
147 (void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova,
148 GFP_KERNEL, &attrs);
149 if (dma_mapping_error(&tegra_vpr_dev, iova))
150 return -ENOMEM;
151
152 desc->iova = iova;
153 desc->size = size;
154 desc->attrs = attrs;
155 desc->destroy = gk20a_tegra_secure_destroy;
156
157 sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
158 if (!sgt) {
159 gk20a_err(dev, "failed to allocate memory\n");
160 goto fail;
161 }
162 err = sg_alloc_table(sgt, 1, GFP_KERNEL);
163 if (err) {
164 gk20a_err(dev, "failed to allocate sg_table\n");
165 goto fail_sgt;
166 }
167 page = phys_to_page(iova);
168 sg_set_page(sgt->sgl, page, size, 0);
169 sg_dma_address(sgt->sgl) = iova;
170
171 desc->sgt = sgt;
172
173 return err;
174
175fail_sgt:
176 kfree(sgt);
177fail:
178 dma_free_attrs(&tegra_vpr_dev, desc->size,
179 (void *)(uintptr_t)&desc->iova,
180 desc->iova, &desc->attrs);
181 return err;
182}
183
184/*
185 * gk20a_tegra_get_emc_rate()
186 *
187 * This function returns the minimum emc clock based on gpu frequency
188 */
189
190long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq)
191{
192 long hz;
193
194 freq = INT_TO_FX(HZ_TO_MHZ(freq));
195 hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset;
196
197 hz -= FXMUL(emc_params->emc_dip_slope,
198 FXMUL(freq - emc_params->emc_xmid,
199 freq - emc_params->emc_xmid)) +
200 emc_params->emc_dip_offset;
201
202 hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */
203 hz = (hz < 0) ? 0 : hz;
204
205 return hz;
206}
207
208/*
209 * gk20a_tegra_postscale(profile, freq)
210 *
211 * This function sets emc frequency based on current gpu frequency
212 */
213
214static void gk20a_tegra_postscale(struct platform_device *pdev,
215 unsigned long freq)
216{
217 struct gk20a_platform *platform = platform_get_drvdata(pdev);
218 struct gk20a_scale_profile *profile = platform->g->scale_profile;
219 struct gk20a_emc_params *emc_params = profile->private_data;
220 struct gk20a *g = get_gk20a(pdev);
221
222 long after = gk20a_clk_get_rate(g);
223 long emc_target = gk20a_tegra_get_emc_rate(emc_params, after);
224
225 clk_set_rate(platform->clk[2], emc_target);
226}
227
228/*
229 * gk20a_tegra_prescale(profile, freq)
230 *
231 * This function informs EDP about changed constraints.
232 */
233
234static void gk20a_tegra_prescale(struct platform_device *pdev)
235{
236 struct gk20a *g = get_gk20a(pdev);
237 u32 avg = 0;
238
239 gk20a_pmu_load_norm(g, &avg);
240 tegra_edp_notify_gpu_load(avg);
241}
242
243/*
244 * gk20a_tegra_calibrate_emc()
245 *
246 * Compute emc scaling parameters
247 *
248 * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
249 *
250 * Remc - 3d.emc rate
251 * R3d - 3d.cbus rate
252 * Rm - 3d.cbus 'middle' rate = (max + min)/2
253 * S - emc_slope
254 * O - emc_offset
255 * Sd - emc_dip_slope
256 * Od - emc_dip_offset
257 *
258 * this superposes a quadratic dip centered around the middle 3d
259 * frequency over a linear correlation of 3d.emc to 3d clock
260 * rates.
261 *
262 * S, O are chosen so that the maximum 3d rate produces the
263 * maximum 3d.emc rate exactly, and the minimum 3d rate produces
264 * at least the minimum 3d.emc rate.
265 *
266 * Sd and Od are chosen to produce the largest dip that will
267 * keep 3d.emc frequencies monotonously decreasing with 3d
268 * frequencies. To achieve this, the first derivative of Remc
269 * with respect to R3d should be zero for the minimal 3d rate:
270 *
271 * R'emc = S - 2 * Sd * (R3d - Rm)
272 * R'emc(R3d-min) = 0
273 * S = 2 * Sd * (R3d-min - Rm)
274 * = 2 * Sd * (R3d-min - R3d-max) / 2
275 *
276 * +------------------------------+
277 * | Sd = S / (R3d-min - R3d-max) |
278 * +------------------------------+
279 *
280 * dip = Sd * (R3d - Rm)^2 + Od
281 *
282 * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
283 *
284 * Sd * (R3d-min - Rm)^2 + Od = 0
285 * Od = -Sd * ((R3d-min - R3d-max) / 2)^2
286 * = -Sd * ((R3d-min - R3d-max)^2) / 4
287 *
288 * +------------------------------+
289 * | Od = (emc-max - emc-min) / 4 |
290 * +------------------------------+
291 *
292 */
293
294void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params,
295 struct clk *clk_3d, struct clk *clk_3d_emc)
296{
297 long correction;
298 unsigned long max_emc;
299 unsigned long min_emc;
300 unsigned long min_rate_3d;
301 unsigned long max_rate_3d;
302
303 max_emc = clk_round_rate(clk_3d_emc, UINT_MAX);
304 max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc));
305
306 min_emc = clk_round_rate(clk_3d_emc, 0);
307 min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc));
308
309 max_rate_3d = clk_round_rate(clk_3d, UINT_MAX);
310 max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d));
311
312 min_rate_3d = clk_round_rate(clk_3d, 0);
313 min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d));
314
315 emc_params->emc_slope =
316 FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d));
317 emc_params->emc_offset = max_emc -
318 FXMUL(emc_params->emc_slope, max_rate_3d);
319 /* Guarantee max 3d rate maps to max emc rate */
320 emc_params->emc_offset += max_emc -
321 (FXMUL(emc_params->emc_slope, max_rate_3d) +
322 emc_params->emc_offset);
323
324 emc_params->emc_dip_offset = (max_emc - min_emc) / 4;
325 emc_params->emc_dip_slope =
326 -FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d);
327 emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2;
328 correction =
329 emc_params->emc_dip_offset +
330 FXMUL(emc_params->emc_dip_slope,
331 FXMUL(max_rate_3d - emc_params->emc_xmid,
332 max_rate_3d - emc_params->emc_xmid));
333 emc_params->emc_dip_offset -= correction;
334}
335
336/*
337 * gk20a_tegra_railgate()
338 *
339 * Gate (disable) gk20a power rail
340 */
341
342static int gk20a_tegra_railgate(struct platform_device *pdev)
343{
344 if (tegra_powergate_is_powered(TEGRA_POWERGATE_GPU))
345 tegra_powergate_partition(TEGRA_POWERGATE_GPU);
346 return 0;
347}
348
349/*
350 * gk20a_tegra_unrailgate()
351 *
352 * Ungate (enable) gk20a power rail
353 */
354
355static int gk20a_tegra_unrailgate(struct platform_device *pdev)
356{
357 tegra_unpowergate_partition(TEGRA_POWERGATE_GPU);
358 return 0;
359}
360
361struct {
362 char *name;
363 unsigned long default_rate;
364} tegra_gk20a_clocks[] = {
365 {"PLLG_ref", UINT_MAX},
366 {"pwr", 204000000},
367 {"emc", UINT_MAX} };
368
369/*
370 * gk20a_tegra_get_clocks()
371 *
372 * This function finds clocks in tegra platform and populates
373 * the clock information to gk20a platform data.
374 */
375
376static int gk20a_tegra_get_clocks(struct platform_device *pdev)
377{
378 struct gk20a_platform *platform = platform_get_drvdata(pdev);
379 char devname[16];
380 int i;
381 int ret = 0;
382
383 snprintf(devname, sizeof(devname),
384 (pdev->id <= 0) ? "tegra_%s" : "tegra_%s.%d\n",
385 pdev->name, pdev->id);
386
387 platform->num_clks = 0;
388 for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) {
389 long rate = tegra_gk20a_clocks[i].default_rate;
390 struct clk *c;
391
392 c = clk_get_sys(devname, tegra_gk20a_clocks[i].name);
393 if (IS_ERR(c)) {
394 ret = PTR_ERR(c);
395 goto err_get_clock;
396 }
397 rate = clk_round_rate(c, rate);
398 clk_set_rate(c, rate);
399 platform->clk[i] = c;
400 }
401 platform->num_clks = i;
402
403 return 0;
404
405err_get_clock:
406
407 while (i--)
408 clk_put(platform->clk[i]);
409 return ret;
410}
411
412static void gk20a_tegra_scale_init(struct platform_device *pdev)
413{
414 struct gk20a_platform *platform = gk20a_get_platform(pdev);
415 struct gk20a_scale_profile *profile = platform->g->scale_profile;
416 struct gk20a_emc_params *emc_params;
417
418 if (!profile)
419 return;
420
421 emc_params = kzalloc(sizeof(*emc_params), GFP_KERNEL);
422 if (!emc_params)
423 return;
424
425 gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g),
426 platform->clk[2]);
427
428 profile->private_data = emc_params;
429}
430
431static void gk20a_tegra_debug_dump(struct platform_device *pdev)
432{
433 struct gk20a_platform *platform = gk20a_get_platform(pdev);
434 struct gk20a *g = platform->g;
435 nvhost_debug_dump_device(g->dev);
436}
437
438static int gk20a_tegra_probe(struct platform_device *dev)
439{
440 struct gk20a_platform *platform = gk20a_get_platform(dev);
441
442 if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA13) {
443 t132_gk20a_tegra_platform.g = platform->g;
444 *platform = t132_gk20a_tegra_platform;
445 }
446
447 gk20a_tegra_get_clocks(dev);
448
449 return 0;
450}
451
452static int gk20a_tegra_late_probe(struct platform_device *dev)
453{
454 struct gk20a_platform *platform = gk20a_get_platform(dev);
455
456 /* Make gk20a power domain a subdomain of mc */
457 tegra_pd_add_sd(&platform->g->pd);
458
459 /* Initialise tegra specific scaling quirks */
460 gk20a_tegra_scale_init(dev);
461
462 return 0;
463}
464
465static int gk20a_tegra_suspend(struct device *dev)
466{
467 tegra_edp_notify_gpu_load(0);
468 return 0;
469}
470
471static struct resource gk20a_tegra_resources[] = {
472 {
473 .start = TEGRA_GK20A_BAR0_BASE,
474 .end = TEGRA_GK20A_BAR0_BASE + TEGRA_GK20A_BAR0_SIZE - 1,
475 .flags = IORESOURCE_MEM,
476 },
477 {
478 .start = TEGRA_GK20A_BAR1_BASE,
479 .end = TEGRA_GK20A_BAR1_BASE + TEGRA_GK20A_BAR1_SIZE - 1,
480 .flags = IORESOURCE_MEM,
481 },
482 { /* Used on ASIM only */
483 .start = TEGRA_GK20A_SIM_BASE,
484 .end = TEGRA_GK20A_SIM_BASE + TEGRA_GK20A_SIM_SIZE - 1,
485 .flags = IORESOURCE_MEM,
486 },
487 {
488 .start = TEGRA_GK20A_INTR,
489 .end = TEGRA_GK20A_INTR,
490 .flags = IORESOURCE_IRQ,
491 },
492 {
493 .start = TEGRA_GK20A_INTR_NONSTALL,
494 .end = TEGRA_GK20A_INTR_NONSTALL,
495 .flags = IORESOURCE_IRQ,
496 },
497};
498
499struct gk20a_platform t132_gk20a_tegra_platform = {
500 .has_syncpoints = true,
501
502 /* power management configuration */
503 .railgate_delay = 500,
504 .clockgate_delay = 50,
505
506 .probe = gk20a_tegra_probe,
507 .late_probe = gk20a_tegra_late_probe,
508
509 /* power management callbacks */
510 .suspend = gk20a_tegra_suspend,
511 .railgate = gk20a_tegra_railgate,
512 .unrailgate = gk20a_tegra_unrailgate,
513
514 /* frequency scaling configuration */
515 .prescale = gk20a_tegra_prescale,
516 .postscale = gk20a_tegra_postscale,
517 .devfreq_governor = "nvhost_podgov",
518 .qos_id = PM_QOS_GPU_FREQ_MIN,
519
520 .channel_busy = gk20a_tegra_channel_busy,
521 .channel_idle = gk20a_tegra_channel_idle,
522 .secure_alloc = gk20a_tegra_secure_alloc,
523 .dump_platform_dependencies = gk20a_tegra_debug_dump,
524};
525
526struct gk20a_platform gk20a_tegra_platform = {
527 .has_syncpoints = true,
528
529 /* power management configuration */
530 .railgate_delay = 500,
531 .clockgate_delay = 50,
532 .can_railgate = true,
533
534 .probe = gk20a_tegra_probe,
535 .late_probe = gk20a_tegra_late_probe,
536
537 /* power management callbacks */
538 .suspend = gk20a_tegra_suspend,
539 .railgate = gk20a_tegra_railgate,
540 .unrailgate = gk20a_tegra_unrailgate,
541
542 /* frequency scaling configuration */
543 .prescale = gk20a_tegra_prescale,
544 .postscale = gk20a_tegra_postscale,
545 .devfreq_governor = "nvhost_podgov",
546 .qos_id = PM_QOS_GPU_FREQ_MIN,
547
548 .channel_busy = gk20a_tegra_channel_busy,
549 .channel_idle = gk20a_tegra_channel_idle,
550 .secure_alloc = gk20a_tegra_secure_alloc,
551 .dump_platform_dependencies = gk20a_tegra_debug_dump,
552};
553
554struct platform_device tegra_gk20a_device = {
555 .name = "gk20a",
556 .resource = gk20a_tegra_resources,
557 .num_resources = ARRAY_SIZE(gk20a_tegra_resources),
558 .dev = {
559 .platform_data = &gk20a_tegra_platform,
560 },
561};
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
new file mode 100644
index 00000000..a00499a9
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -0,0 +1,3796 @@
1/*
2 * drivers/video/tegra/host/gk20a/pmu_gk20a.c
3 *
4 * GK20A PMU (aka. gPMU outside gk20a context)
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#include <linux/delay.h> /* for mdelay */
23#include <linux/firmware.h>
24#include <linux/clk.h>
25#include <linux/module.h>
26#include <linux/debugfs.h>
27#include <linux/dma-mapping.h>
28
29#include "gk20a.h"
30#include "hw_mc_gk20a.h"
31#include "hw_pwr_gk20a.h"
32#include "hw_top_gk20a.h"
33
34#define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin"
35
36#define gk20a_dbg_pmu(fmt, arg...) \
37 gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
38
39static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
40static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
41 u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
42static void gk20a_init_pmu_setup_hw2_workqueue(struct work_struct *work);
43static void pmu_save_zbc(struct gk20a *g, u32 entries);
44static void ap_callback_init_and_enable_ctrl(
45 struct gk20a *g, struct pmu_msg *msg,
46 void *param, u32 seq_desc, u32 status);
47static int gk20a_pmu_ap_send_command(struct gk20a *g,
48 union pmu_ap_cmd *p_ap_cmd, bool b_block);
49
50static u32 pmu_cmdline_size_v0(struct pmu_gk20a *pmu)
51{
52 return sizeof(struct pmu_cmdline_args_v0);
53}
54
55static u32 pmu_cmdline_size_v1(struct pmu_gk20a *pmu)
56{
57 return sizeof(struct pmu_cmdline_args_v1);
58}
59
60static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
61{
62 pmu->args_v1.cpu_freq_hz = freq;
63}
64
65static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
66{
67 pmu->args_v0.cpu_freq_hz = freq;
68}
69
70static void *get_pmu_cmdline_args_ptr_v1(struct pmu_gk20a *pmu)
71{
72 return (void *)(&pmu->args_v1);
73}
74
75static void *get_pmu_cmdline_args_ptr_v0(struct pmu_gk20a *pmu)
76{
77 return (void *)(&pmu->args_v0);
78}
79
80static u32 get_pmu_allocation_size_v1(struct pmu_gk20a *pmu)
81{
82 return sizeof(struct pmu_allocation_v1);
83}
84
85static u32 get_pmu_allocation_size_v0(struct pmu_gk20a *pmu)
86{
87 return sizeof(struct pmu_allocation_v0);
88}
89
90static void set_pmu_allocation_ptr_v1(struct pmu_gk20a *pmu,
91 void **pmu_alloc_ptr, void *assign_ptr)
92{
93 struct pmu_allocation_v1 **pmu_a_ptr =
94 (struct pmu_allocation_v1 **)pmu_alloc_ptr;
95 *pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr;
96}
97
98static void set_pmu_allocation_ptr_v0(struct pmu_gk20a *pmu,
99 void **pmu_alloc_ptr, void *assign_ptr)
100{
101 struct pmu_allocation_v0 **pmu_a_ptr =
102 (struct pmu_allocation_v0 **)pmu_alloc_ptr;
103 *pmu_a_ptr = (struct pmu_allocation_v0 *)assign_ptr;
104}
105
106static void pmu_allocation_set_dmem_size_v1(struct pmu_gk20a *pmu,
107 void *pmu_alloc_ptr, u16 size)
108{
109 struct pmu_allocation_v1 *pmu_a_ptr =
110 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
111 pmu_a_ptr->alloc.dmem.size = size;
112}
113
114static void pmu_allocation_set_dmem_size_v0(struct pmu_gk20a *pmu,
115 void *pmu_alloc_ptr, u16 size)
116{
117 struct pmu_allocation_v0 *pmu_a_ptr =
118 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
119 pmu_a_ptr->alloc.dmem.size = size;
120}
121
122static u16 pmu_allocation_get_dmem_size_v1(struct pmu_gk20a *pmu,
123 void *pmu_alloc_ptr)
124{
125 struct pmu_allocation_v1 *pmu_a_ptr =
126 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
127 return pmu_a_ptr->alloc.dmem.size;
128}
129
130static u16 pmu_allocation_get_dmem_size_v0(struct pmu_gk20a *pmu,
131 void *pmu_alloc_ptr)
132{
133 struct pmu_allocation_v0 *pmu_a_ptr =
134 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
135 return pmu_a_ptr->alloc.dmem.size;
136}
137
138static u32 pmu_allocation_get_dmem_offset_v1(struct pmu_gk20a *pmu,
139 void *pmu_alloc_ptr)
140{
141 struct pmu_allocation_v1 *pmu_a_ptr =
142 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
143 return pmu_a_ptr->alloc.dmem.offset;
144}
145
146static u32 pmu_allocation_get_dmem_offset_v0(struct pmu_gk20a *pmu,
147 void *pmu_alloc_ptr)
148{
149 struct pmu_allocation_v0 *pmu_a_ptr =
150 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
151 return pmu_a_ptr->alloc.dmem.offset;
152}
153
154static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct pmu_gk20a *pmu,
155 void *pmu_alloc_ptr)
156{
157 struct pmu_allocation_v1 *pmu_a_ptr =
158 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
159 return &pmu_a_ptr->alloc.dmem.offset;
160}
161
162static u32 *pmu_allocation_get_dmem_offset_addr_v0(struct pmu_gk20a *pmu,
163 void *pmu_alloc_ptr)
164{
165 struct pmu_allocation_v0 *pmu_a_ptr =
166 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
167 return &pmu_a_ptr->alloc.dmem.offset;
168}
169
170static void pmu_allocation_set_dmem_offset_v1(struct pmu_gk20a *pmu,
171 void *pmu_alloc_ptr, u32 offset)
172{
173 struct pmu_allocation_v1 *pmu_a_ptr =
174 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
175 pmu_a_ptr->alloc.dmem.offset = offset;
176}
177
178static void pmu_allocation_set_dmem_offset_v0(struct pmu_gk20a *pmu,
179 void *pmu_alloc_ptr, u32 offset)
180{
181 struct pmu_allocation_v0 *pmu_a_ptr =
182 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
183 pmu_a_ptr->alloc.dmem.offset = offset;
184}
185
186static void *get_pmu_msg_pmu_init_msg_ptr_v1(struct pmu_init_msg *init)
187{
188 return (void *)(&(init->pmu_init_v1));
189}
190
191static u16 get_pmu_init_msg_pmu_sw_mg_off_v1(union pmu_init_msg_pmu *init_msg)
192{
193 struct pmu_init_msg_pmu_v1 *init =
194 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
195 return init->sw_managed_area_offset;
196}
197
198static u16 get_pmu_init_msg_pmu_sw_mg_size_v1(union pmu_init_msg_pmu *init_msg)
199{
200 struct pmu_init_msg_pmu_v1 *init =
201 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
202 return init->sw_managed_area_size;
203}
204
205static void *get_pmu_msg_pmu_init_msg_ptr_v0(struct pmu_init_msg *init)
206{
207 return (void *)(&(init->pmu_init_v0));
208}
209
210static u16 get_pmu_init_msg_pmu_sw_mg_off_v0(union pmu_init_msg_pmu *init_msg)
211{
212 struct pmu_init_msg_pmu_v0 *init =
213 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
214 return init->sw_managed_area_offset;
215}
216
217static u16 get_pmu_init_msg_pmu_sw_mg_size_v0(union pmu_init_msg_pmu *init_msg)
218{
219 struct pmu_init_msg_pmu_v0 *init =
220 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
221 return init->sw_managed_area_size;
222}
223
224static u32 get_pmu_perfmon_cmd_start_size_v1(void)
225{
226 return sizeof(struct pmu_perfmon_cmd_start_v1);
227}
228
229static u32 get_pmu_perfmon_cmd_start_size_v0(void)
230{
231 return sizeof(struct pmu_perfmon_cmd_start_v0);
232}
233
234static int get_perfmon_cmd_start_offsetofvar_v1(
235 enum pmu_perfmon_cmd_start_fields field)
236{
237 switch (field) {
238 case COUNTER_ALLOC:
239 return offsetof(struct pmu_perfmon_cmd_start_v1,
240 counter_alloc);
241 default:
242 return -EINVAL;
243 break;
244 }
245 return 0;
246}
247
248static int get_perfmon_cmd_start_offsetofvar_v0(
249 enum pmu_perfmon_cmd_start_fields field)
250{
251 switch (field) {
252 case COUNTER_ALLOC:
253 return offsetof(struct pmu_perfmon_cmd_start_v0,
254 counter_alloc);
255 default:
256 return -EINVAL;
257 break;
258 }
259 return 0;
260}
261
262static u32 get_pmu_perfmon_cmd_init_size_v1(void)
263{
264 return sizeof(struct pmu_perfmon_cmd_init_v1);
265}
266
267static u32 get_pmu_perfmon_cmd_init_size_v0(void)
268{
269 return sizeof(struct pmu_perfmon_cmd_init_v0);
270}
271
272static int get_perfmon_cmd_init_offsetofvar_v1(
273 enum pmu_perfmon_cmd_start_fields field)
274{
275 switch (field) {
276 case COUNTER_ALLOC:
277 return offsetof(struct pmu_perfmon_cmd_init_v1,
278 counter_alloc);
279 default:
280 return -EINVAL;
281 break;
282 }
283 return 0;
284}
285
286static int get_perfmon_cmd_init_offsetofvar_v0(
287 enum pmu_perfmon_cmd_start_fields field)
288{
289 switch (field) {
290 case COUNTER_ALLOC:
291 return offsetof(struct pmu_perfmon_cmd_init_v0,
292 counter_alloc);
293 default:
294 return -EINVAL;
295 break;
296 }
297 return 0;
298}
299
300static void perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, u8 value)
301{
302 struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
303 start->cmd_type = value;
304}
305
306static void perfmon_start_set_cmd_type_v0(struct pmu_perfmon_cmd *pc, u8 value)
307{
308 struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
309 start->cmd_type = value;
310}
311
312static void perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
313{
314 struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
315 start->group_id = value;
316}
317
318static void perfmon_start_set_group_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
319{
320 struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
321 start->group_id = value;
322}
323
324static void perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
325{
326 struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
327 start->state_id = value;
328}
329
330static void perfmon_start_set_state_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
331{
332 struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
333 start->state_id = value;
334}
335
336static void perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, u8 value)
337{
338 struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
339 start->flags = value;
340}
341
342static void perfmon_start_set_flags_v0(struct pmu_perfmon_cmd *pc, u8 value)
343{
344 struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
345 start->flags = value;
346}
347
348static u8 perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc)
349{
350 struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
351 return start->flags;
352}
353
354static u8 perfmon_start_get_flags_v0(struct pmu_perfmon_cmd *pc)
355{
356 struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
357 return start->flags;
358}
359
360static void perfmon_cmd_init_set_sample_buffer_v1(struct pmu_perfmon_cmd *pc,
361 u16 value)
362{
363 struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
364 init->sample_buffer = value;
365}
366
367static void perfmon_cmd_init_set_sample_buffer_v0(struct pmu_perfmon_cmd *pc,
368 u16 value)
369{
370 struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
371 init->sample_buffer = value;
372}
373
374static void perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc,
375 u8 value)
376{
377 struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
378 init->to_decrease_count = value;
379}
380
381static void perfmon_cmd_init_set_dec_cnt_v0(struct pmu_perfmon_cmd *pc,
382 u8 value)
383{
384 struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
385 init->to_decrease_count = value;
386}
387
388static void perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc,
389 u8 value)
390{
391 struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
392 init->base_counter_id = value;
393}
394
395static void perfmon_cmd_init_set_base_cnt_id_v0(struct pmu_perfmon_cmd *pc,
396 u8 value)
397{
398 struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
399 init->base_counter_id = value;
400}
401
402static void perfmon_cmd_init_set_samp_period_us_v1(struct pmu_perfmon_cmd *pc,
403 u32 value)
404{
405 struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
406 init->sample_period_us = value;
407}
408
409static void perfmon_cmd_init_set_samp_period_us_v0(struct pmu_perfmon_cmd *pc,
410 u32 value)
411{
412 struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
413 init->sample_period_us = value;
414}
415
416static void perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc,
417 u8 value)
418{
419 struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
420 init->num_counters = value;
421}
422
423static void perfmon_cmd_init_set_num_cnt_v0(struct pmu_perfmon_cmd *pc,
424 u8 value)
425{
426 struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
427 init->num_counters = value;
428}
429
430static void perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc,
431 u8 value)
432{
433 struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
434 init->samples_in_moving_avg = value;
435}
436
437static void perfmon_cmd_init_set_mov_avg_v0(struct pmu_perfmon_cmd *pc,
438 u8 value)
439{
440 struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
441 init->samples_in_moving_avg = value;
442}
443
444static void get_pmu_init_msg_pmu_queue_params_v0(struct pmu_queue *queue,
445 u32 id, void *pmu_init_msg)
446{
447 struct pmu_init_msg_pmu_v0 *init =
448 (struct pmu_init_msg_pmu_v0 *)pmu_init_msg;
449 queue->index = init->queue_info[id].index;
450 queue->offset = init->queue_info[id].offset;
451 queue->size = init->queue_info[id].size;
452}
453
454static void get_pmu_init_msg_pmu_queue_params_v1(struct pmu_queue *queue,
455 u32 id, void *pmu_init_msg)
456{
457 struct pmu_init_msg_pmu_v1 *init =
458 (struct pmu_init_msg_pmu_v1 *)pmu_init_msg;
459 queue->index = init->queue_info[id].index;
460 queue->offset = init->queue_info[id].offset;
461 queue->size = init->queue_info[id].size;
462}
463
464static void *get_pmu_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq)
465{
466 return (void *)(&seq->in_v1);
467}
468
469static void *get_pmu_sequence_in_alloc_ptr_v0(struct pmu_sequence *seq)
470{
471 return (void *)(&seq->in_v0);
472}
473
474static void *get_pmu_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq)
475{
476 return (void *)(&seq->out_v1);
477}
478
479static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
480{
481 return (void *)(&seq->out_v0);
482}
483
484static int gk20a_init_pmu(struct pmu_gk20a *pmu)
485{
486 struct gk20a *g = pmu->g;
487 switch (pmu->desc->app_version) {
488 case APP_VERSION_1:
489 g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
490 g->ops.pmu_ver.get_pmu_cmdline_args_size =
491 pmu_cmdline_size_v1;
492 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
493 set_pmu_cmdline_args_cpufreq_v1;
494 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
495 get_pmu_cmdline_args_ptr_v1;
496 g->ops.pmu_ver.get_pmu_allocation_struct_size =
497 get_pmu_allocation_size_v1;
498 g->ops.pmu_ver.set_pmu_allocation_ptr =
499 set_pmu_allocation_ptr_v1;
500 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
501 pmu_allocation_set_dmem_size_v1;
502 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
503 pmu_allocation_get_dmem_size_v1;
504 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
505 pmu_allocation_get_dmem_offset_v1;
506 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
507 pmu_allocation_get_dmem_offset_addr_v1;
508 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
509 pmu_allocation_set_dmem_offset_v1;
510 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
511 get_pmu_init_msg_pmu_queue_params_v1;
512 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
513 get_pmu_msg_pmu_init_msg_ptr_v1;
514 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
515 get_pmu_init_msg_pmu_sw_mg_off_v1;
516 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
517 get_pmu_init_msg_pmu_sw_mg_size_v1;
518 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
519 get_pmu_perfmon_cmd_start_size_v1;
520 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
521 get_perfmon_cmd_start_offsetofvar_v1;
522 g->ops.pmu_ver.perfmon_start_set_cmd_type =
523 perfmon_start_set_cmd_type_v1;
524 g->ops.pmu_ver.perfmon_start_set_group_id =
525 perfmon_start_set_group_id_v1;
526 g->ops.pmu_ver.perfmon_start_set_state_id =
527 perfmon_start_set_state_id_v1;
528 g->ops.pmu_ver.perfmon_start_set_flags =
529 perfmon_start_set_flags_v1;
530 g->ops.pmu_ver.perfmon_start_get_flags =
531 perfmon_start_get_flags_v1;
532 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
533 get_pmu_perfmon_cmd_init_size_v1;
534 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
535 get_perfmon_cmd_init_offsetofvar_v1;
536 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
537 perfmon_cmd_init_set_sample_buffer_v1;
538 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
539 perfmon_cmd_init_set_dec_cnt_v1;
540 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
541 perfmon_cmd_init_set_base_cnt_id_v1;
542 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
543 perfmon_cmd_init_set_samp_period_us_v1;
544 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
545 perfmon_cmd_init_set_num_cnt_v1;
546 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
547 perfmon_cmd_init_set_mov_avg_v1;
548 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
549 get_pmu_sequence_in_alloc_ptr_v1;
550 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
551 get_pmu_sequence_out_alloc_ptr_v1;
552 break;
553 case APP_VERSION_0:
554 g->ops.pmu_ver.cmd_id_zbc_table_update = 14;
555 g->ops.pmu_ver.get_pmu_cmdline_args_size =
556 pmu_cmdline_size_v0;
557 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
558 set_pmu_cmdline_args_cpufreq_v0;
559 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
560 get_pmu_cmdline_args_ptr_v0;
561 g->ops.pmu_ver.get_pmu_allocation_struct_size =
562 get_pmu_allocation_size_v0;
563 g->ops.pmu_ver.set_pmu_allocation_ptr =
564 set_pmu_allocation_ptr_v0;
565 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
566 pmu_allocation_set_dmem_size_v0;
567 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
568 pmu_allocation_get_dmem_size_v0;
569 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
570 pmu_allocation_get_dmem_offset_v0;
571 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
572 pmu_allocation_get_dmem_offset_addr_v0;
573 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
574 pmu_allocation_set_dmem_offset_v0;
575 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
576 get_pmu_init_msg_pmu_queue_params_v0;
577 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
578 get_pmu_msg_pmu_init_msg_ptr_v0;
579 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
580 get_pmu_init_msg_pmu_sw_mg_off_v0;
581 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
582 get_pmu_init_msg_pmu_sw_mg_size_v0;
583 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
584 get_pmu_perfmon_cmd_start_size_v0;
585 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
586 get_perfmon_cmd_start_offsetofvar_v0;
587 g->ops.pmu_ver.perfmon_start_set_cmd_type =
588 perfmon_start_set_cmd_type_v0;
589 g->ops.pmu_ver.perfmon_start_set_group_id =
590 perfmon_start_set_group_id_v0;
591 g->ops.pmu_ver.perfmon_start_set_state_id =
592 perfmon_start_set_state_id_v0;
593 g->ops.pmu_ver.perfmon_start_set_flags =
594 perfmon_start_set_flags_v0;
595 g->ops.pmu_ver.perfmon_start_get_flags =
596 perfmon_start_get_flags_v0;
597 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
598 get_pmu_perfmon_cmd_init_size_v0;
599 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
600 get_perfmon_cmd_init_offsetofvar_v0;
601 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
602 perfmon_cmd_init_set_sample_buffer_v0;
603 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
604 perfmon_cmd_init_set_dec_cnt_v0;
605 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
606 perfmon_cmd_init_set_base_cnt_id_v0;
607 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
608 perfmon_cmd_init_set_samp_period_us_v0;
609 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
610 perfmon_cmd_init_set_num_cnt_v0;
611 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
612 perfmon_cmd_init_set_mov_avg_v0;
613 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
614 get_pmu_sequence_in_alloc_ptr_v0;
615 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
616 get_pmu_sequence_out_alloc_ptr_v0;
617 break;
618 default:
619 gk20a_err(dev_from_gk20a(pmu->g),
620 "PMU code version not supported\n");
621 return -EINVAL;
622 break;
623 }
624 return 0;
625}
626
627static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
628 u32 src, u8 *dst, u32 size, u8 port)
629{
630 struct gk20a *g = pmu->g;
631 u32 i, words, bytes;
632 u32 data, addr_mask;
633 u32 *dst_u32 = (u32*)dst;
634
635 if (size == 0) {
636 gk20a_err(dev_from_gk20a(g),
637 "size is zero");
638 return;
639 }
640
641 if (src & 0x3) {
642 gk20a_err(dev_from_gk20a(g),
643 "src (0x%08x) not 4-byte aligned", src);
644 return;
645 }
646
647 mutex_lock(&pmu->pmu_copy_lock);
648
649 words = size >> 2;
650 bytes = size & 0x3;
651
652 addr_mask = pwr_falcon_dmemc_offs_m() |
653 pwr_falcon_dmemc_blk_m();
654
655 src &= addr_mask;
656
657 gk20a_writel(g, pwr_falcon_dmemc_r(port),
658 src | pwr_falcon_dmemc_aincr_f(1));
659
660 for (i = 0; i < words; i++)
661 dst_u32[i] = gk20a_readl(g, pwr_falcon_dmemd_r(port));
662
663 if (bytes > 0) {
664 data = gk20a_readl(g, pwr_falcon_dmemd_r(port));
665 for (i = 0; i < bytes; i++) {
666 dst[(words << 2) + i] = ((u8 *)&data)[i];
667 gk20a_dbg_pmu("read: dst_u8[%d]=0x%08x",
668 i, dst[(words << 2) + i]);
669 }
670 }
671 mutex_unlock(&pmu->pmu_copy_lock);
672 return;
673}
674
675static void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
676 u32 dst, u8 *src, u32 size, u8 port)
677{
678 struct gk20a *g = pmu->g;
679 u32 i, words, bytes;
680 u32 data, addr_mask;
681 u32 *src_u32 = (u32*)src;
682
683 if (size == 0) {
684 gk20a_err(dev_from_gk20a(g),
685 "size is zero");
686 return;
687 }
688
689 if (dst & 0x3) {
690 gk20a_err(dev_from_gk20a(g),
691 "dst (0x%08x) not 4-byte aligned", dst);
692 return;
693 }
694
695 mutex_lock(&pmu->pmu_copy_lock);
696
697 words = size >> 2;
698 bytes = size & 0x3;
699
700 addr_mask = pwr_falcon_dmemc_offs_m() |
701 pwr_falcon_dmemc_blk_m();
702
703 dst &= addr_mask;
704
705 gk20a_writel(g, pwr_falcon_dmemc_r(port),
706 dst | pwr_falcon_dmemc_aincw_f(1));
707
708 for (i = 0; i < words; i++)
709 gk20a_writel(g, pwr_falcon_dmemd_r(port), src_u32[i]);
710
711 if (bytes > 0) {
712 data = 0;
713 for (i = 0; i < bytes; i++)
714 ((u8 *)&data)[i] = src[(words << 2) + i];
715 gk20a_writel(g, pwr_falcon_dmemd_r(port), data);
716 }
717
718 data = gk20a_readl(g, pwr_falcon_dmemc_r(port)) & addr_mask;
719 size = ALIGN(size, 4);
720 if (data != dst + size) {
721 gk20a_err(dev_from_gk20a(g),
722 "copy failed. bytes written %d, expected %d",
723 data - dst, size);
724 }
725 mutex_unlock(&pmu->pmu_copy_lock);
726 return;
727}
728
729static int pmu_idle(struct pmu_gk20a *pmu)
730{
731 struct gk20a *g = pmu->g;
732 unsigned long end_jiffies = jiffies +
733 msecs_to_jiffies(2000);
734 u32 idle_stat;
735
736 /* wait for pmu idle */
737 do {
738 idle_stat = gk20a_readl(g, pwr_falcon_idlestate_r());
739
740 if (pwr_falcon_idlestate_falcon_busy_v(idle_stat) == 0 &&
741 pwr_falcon_idlestate_ext_busy_v(idle_stat) == 0) {
742 break;
743 }
744
745 if (time_after_eq(jiffies, end_jiffies)) {
746 gk20a_err(dev_from_gk20a(g),
747 "timeout waiting pmu idle : 0x%08x",
748 idle_stat);
749 return -EBUSY;
750 }
751 usleep_range(100, 200);
752 } while (1);
753
754 gk20a_dbg_fn("done");
755 return 0;
756}
757
758static void pmu_enable_irq(struct pmu_gk20a *pmu, bool enable)
759{
760 struct gk20a *g = pmu->g;
761
762 gk20a_dbg_fn("");
763
764 gk20a_writel(g, mc_intr_mask_0_r(),
765 gk20a_readl(g, mc_intr_mask_0_r()) &
766 ~mc_intr_mask_0_pmu_enabled_f());
767 gk20a_writel(g, mc_intr_mask_1_r(),
768 gk20a_readl(g, mc_intr_mask_1_r()) &
769 ~mc_intr_mask_1_pmu_enabled_f());
770
771 gk20a_writel(g, pwr_falcon_irqmclr_r(),
772 pwr_falcon_irqmclr_gptmr_f(1) |
773 pwr_falcon_irqmclr_wdtmr_f(1) |
774 pwr_falcon_irqmclr_mthd_f(1) |
775 pwr_falcon_irqmclr_ctxsw_f(1) |
776 pwr_falcon_irqmclr_halt_f(1) |
777 pwr_falcon_irqmclr_exterr_f(1) |
778 pwr_falcon_irqmclr_swgen0_f(1) |
779 pwr_falcon_irqmclr_swgen1_f(1) |
780 pwr_falcon_irqmclr_ext_f(0xff));
781
782 if (enable) {
783 /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */
784 gk20a_writel(g, pwr_falcon_irqdest_r(),
785 pwr_falcon_irqdest_host_gptmr_f(0) |
786 pwr_falcon_irqdest_host_wdtmr_f(1) |
787 pwr_falcon_irqdest_host_mthd_f(0) |
788 pwr_falcon_irqdest_host_ctxsw_f(0) |
789 pwr_falcon_irqdest_host_halt_f(1) |
790 pwr_falcon_irqdest_host_exterr_f(0) |
791 pwr_falcon_irqdest_host_swgen0_f(1) |
792 pwr_falcon_irqdest_host_swgen1_f(0) |
793 pwr_falcon_irqdest_host_ext_f(0xff) |
794 pwr_falcon_irqdest_target_gptmr_f(1) |
795 pwr_falcon_irqdest_target_wdtmr_f(0) |
796 pwr_falcon_irqdest_target_mthd_f(0) |
797 pwr_falcon_irqdest_target_ctxsw_f(0) |
798 pwr_falcon_irqdest_target_halt_f(0) |
799 pwr_falcon_irqdest_target_exterr_f(0) |
800 pwr_falcon_irqdest_target_swgen0_f(0) |
801 pwr_falcon_irqdest_target_swgen1_f(0) |
802 pwr_falcon_irqdest_target_ext_f(0xff));
803
804 /* 0=disable, 1=enable */
805 gk20a_writel(g, pwr_falcon_irqmset_r(),
806 pwr_falcon_irqmset_gptmr_f(1) |
807 pwr_falcon_irqmset_wdtmr_f(1) |
808 pwr_falcon_irqmset_mthd_f(0) |
809 pwr_falcon_irqmset_ctxsw_f(0) |
810 pwr_falcon_irqmset_halt_f(1) |
811 pwr_falcon_irqmset_exterr_f(1) |
812 pwr_falcon_irqmset_swgen0_f(1) |
813 pwr_falcon_irqmset_swgen1_f(1));
814
815 gk20a_writel(g, mc_intr_mask_0_r(),
816 gk20a_readl(g, mc_intr_mask_0_r()) |
817 mc_intr_mask_0_pmu_enabled_f());
818 }
819
820 gk20a_dbg_fn("done");
821}
822
823static int pmu_enable_hw(struct pmu_gk20a *pmu, bool enable)
824{
825 struct gk20a *g = pmu->g;
826
827 gk20a_dbg_fn("");
828
829 if (enable) {
830 int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT;
831 gk20a_enable(g, mc_enable_pwr_enabled_f());
832
833 do {
834 u32 w = gk20a_readl(g, pwr_falcon_dmactl_r()) &
835 (pwr_falcon_dmactl_dmem_scrubbing_m() |
836 pwr_falcon_dmactl_imem_scrubbing_m());
837
838 if (!w) {
839 gk20a_dbg_fn("done");
840 return 0;
841 }
842 udelay(GR_IDLE_CHECK_DEFAULT);
843 } while (--retries || !tegra_platform_is_silicon());
844
845 gk20a_disable(g, mc_enable_pwr_enabled_f());
846 gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout");
847
848 return -ETIMEDOUT;
849 } else {
850 gk20a_disable(g, mc_enable_pwr_enabled_f());
851 return 0;
852 }
853}
854
855static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
856{
857 struct gk20a *g = pmu->g;
858 u32 pmc_enable;
859 int err;
860
861 gk20a_dbg_fn("");
862
863 if (!enable) {
864 pmc_enable = gk20a_readl(g, mc_enable_r());
865 if (mc_enable_pwr_v(pmc_enable) !=
866 mc_enable_pwr_disabled_v()) {
867
868 pmu_enable_irq(pmu, false);
869 pmu_enable_hw(pmu, false);
870 }
871 } else {
872 err = pmu_enable_hw(pmu, true);
873 if (err)
874 return err;
875
876 /* TBD: post reset */
877
878 err = pmu_idle(pmu);
879 if (err)
880 return err;
881
882 pmu_enable_irq(pmu, true);
883 }
884
885 gk20a_dbg_fn("done");
886 return 0;
887}
888
889static int pmu_reset(struct pmu_gk20a *pmu)
890{
891 int err;
892
893 err = pmu_idle(pmu);
894 if (err)
895 return err;
896
897 /* TBD: release pmu hw mutex */
898
899 err = pmu_enable(pmu, false);
900 if (err)
901 return err;
902
903 /* TBD: cancel all sequences */
904 /* TBD: init all sequences and state tables */
905 /* TBD: restore pre-init message handler */
906
907 err = pmu_enable(pmu, true);
908 if (err)
909 return err;
910
911 return 0;
912}
913
914static int pmu_bootstrap(struct pmu_gk20a *pmu)
915{
916 struct gk20a *g = pmu->g;
917 struct gk20a_platform *platform = platform_get_drvdata(g->dev);
918 struct mm_gk20a *mm = &g->mm;
919 struct pmu_ucode_desc *desc = pmu->desc;
920 u64 addr_code, addr_data, addr_load;
921 u32 i, blocks, addr_args;
922
923 gk20a_dbg_fn("");
924
925 gk20a_writel(g, pwr_falcon_itfen_r(),
926 gk20a_readl(g, pwr_falcon_itfen_r()) |
927 pwr_falcon_itfen_ctxen_enable_f());
928 gk20a_writel(g, pwr_pmu_new_instblk_r(),
929 pwr_pmu_new_instblk_ptr_f(
930 mm->pmu.inst_block.cpu_pa >> 12) |
931 pwr_pmu_new_instblk_valid_f(1) |
932 pwr_pmu_new_instblk_target_sys_coh_f());
933
934 /* TBD: load all other surfaces */
935
936 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
937 clk_get_rate(platform->clk[1]));
938
939 addr_args = (pwr_falcon_hwcfg_dmem_size_v(
940 gk20a_readl(g, pwr_falcon_hwcfg_r()))
941 << GK20A_PMU_DMEM_BLKSIZE2) -
942 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
943
944 pmu_copy_to_dmem(pmu, addr_args,
945 (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
946 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
947
948 gk20a_writel(g, pwr_falcon_dmemc_r(0),
949 pwr_falcon_dmemc_offs_f(0) |
950 pwr_falcon_dmemc_blk_f(0) |
951 pwr_falcon_dmemc_aincw_f(1));
952
953 addr_code = u64_lo32((pmu->ucode.pmu_va +
954 desc->app_start_offset +
955 desc->app_resident_code_offset) >> 8) ;
956 addr_data = u64_lo32((pmu->ucode.pmu_va +
957 desc->app_start_offset +
958 desc->app_resident_data_offset) >> 8);
959 addr_load = u64_lo32((pmu->ucode.pmu_va +
960 desc->bootloader_start_offset) >> 8);
961
962 gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE);
963 gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
964 gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size);
965 gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size);
966 gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry);
967 gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data);
968 gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
969 gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
970 gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1);
971 gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args);
972
973 gk20a_writel(g, pwr_falcon_dmatrfbase_r(),
974 addr_load - (desc->bootloader_imem_offset >> 8));
975
976 blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
977
978 for (i = 0; i < blocks; i++) {
979 gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(),
980 desc->bootloader_imem_offset + (i << 8));
981 gk20a_writel(g, pwr_falcon_dmatrffboffs_r(),
982 desc->bootloader_imem_offset + (i << 8));
983 gk20a_writel(g, pwr_falcon_dmatrfcmd_r(),
984 pwr_falcon_dmatrfcmd_imem_f(1) |
985 pwr_falcon_dmatrfcmd_write_f(0) |
986 pwr_falcon_dmatrfcmd_size_f(6) |
987 pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE));
988 }
989
990 gk20a_writel(g, pwr_falcon_bootvec_r(),
991 pwr_falcon_bootvec_vec_f(desc->bootloader_entry_point));
992
993 gk20a_writel(g, pwr_falcon_cpuctl_r(),
994 pwr_falcon_cpuctl_startcpu_f(1));
995
996 gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);
997
998 return 0;
999}
1000
1001static void pmu_seq_init(struct pmu_gk20a *pmu)
1002{
1003 u32 i;
1004
1005 memset(pmu->seq, 0,
1006 sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
1007 memset(pmu->pmu_seq_tbl, 0,
1008 sizeof(pmu->pmu_seq_tbl));
1009
1010 for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
1011 pmu->seq[i].id = i;
1012}
1013
1014static int pmu_seq_acquire(struct pmu_gk20a *pmu,
1015 struct pmu_sequence **pseq)
1016{
1017 struct gk20a *g = pmu->g;
1018 struct pmu_sequence *seq;
1019 u32 index;
1020
1021 mutex_lock(&pmu->pmu_seq_lock);
1022 index = find_first_zero_bit(pmu->pmu_seq_tbl,
1023 sizeof(pmu->pmu_seq_tbl));
1024 if (index >= sizeof(pmu->pmu_seq_tbl)) {
1025 gk20a_err(dev_from_gk20a(g),
1026 "no free sequence available");
1027 mutex_unlock(&pmu->pmu_seq_lock);
1028 return -EAGAIN;
1029 }
1030 set_bit(index, pmu->pmu_seq_tbl);
1031 mutex_unlock(&pmu->pmu_seq_lock);
1032
1033 seq = &pmu->seq[index];
1034 seq->state = PMU_SEQ_STATE_PENDING;
1035
1036 *pseq = seq;
1037 return 0;
1038}
1039
1040static void pmu_seq_release(struct pmu_gk20a *pmu,
1041 struct pmu_sequence *seq)
1042{
1043 struct gk20a *g = pmu->g;
1044 seq->state = PMU_SEQ_STATE_FREE;
1045 seq->desc = PMU_INVALID_SEQ_DESC;
1046 seq->callback = NULL;
1047 seq->cb_params = NULL;
1048 seq->msg = NULL;
1049 seq->out_payload = NULL;
1050 g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1051 g->ops.pmu_ver.get_pmu_seq_in_a_ptr(seq), 0);
1052 g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1053 g->ops.pmu_ver.get_pmu_seq_out_a_ptr(seq), 0);
1054
1055 clear_bit(seq->id, pmu->pmu_seq_tbl);
1056}
1057
1058static int pmu_queue_init(struct pmu_gk20a *pmu,
1059 u32 id, union pmu_init_msg_pmu *init)
1060{
1061 struct gk20a *g = pmu->g;
1062 struct pmu_queue *queue = &pmu->queue[id];
1063 queue->id = id;
1064 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init);
1065
1066 queue->mutex_id = id;
1067 mutex_init(&queue->mutex);
1068
1069 gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x",
1070 id, queue->index, queue->offset, queue->size);
1071
1072 return 0;
1073}
1074
1075static int pmu_queue_head(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1076 u32 *head, bool set)
1077{
1078 struct gk20a *g = pmu->g;
1079
1080 BUG_ON(!head);
1081
1082 if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1083
1084 if (queue->index >= pwr_pmu_queue_head__size_1_v())
1085 return -EINVAL;
1086
1087 if (!set)
1088 *head = pwr_pmu_queue_head_address_v(
1089 gk20a_readl(g,
1090 pwr_pmu_queue_head_r(queue->index)));
1091 else
1092 gk20a_writel(g,
1093 pwr_pmu_queue_head_r(queue->index),
1094 pwr_pmu_queue_head_address_f(*head));
1095 } else {
1096 if (!set)
1097 *head = pwr_pmu_msgq_head_val_v(
1098 gk20a_readl(g, pwr_pmu_msgq_head_r()));
1099 else
1100 gk20a_writel(g,
1101 pwr_pmu_msgq_head_r(),
1102 pwr_pmu_msgq_head_val_f(*head));
1103 }
1104
1105 return 0;
1106}
1107
1108static int pmu_queue_tail(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1109 u32 *tail, bool set)
1110{
1111 struct gk20a *g = pmu->g;
1112
1113 BUG_ON(!tail);
1114
1115 if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1116
1117 if (queue->index >= pwr_pmu_queue_tail__size_1_v())
1118 return -EINVAL;
1119
1120 if (!set)
1121 *tail = pwr_pmu_queue_tail_address_v(
1122 gk20a_readl(g,
1123 pwr_pmu_queue_tail_r(queue->index)));
1124 else
1125 gk20a_writel(g,
1126 pwr_pmu_queue_tail_r(queue->index),
1127 pwr_pmu_queue_tail_address_f(*tail));
1128 } else {
1129 if (!set)
1130 *tail = pwr_pmu_msgq_tail_val_v(
1131 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
1132 else
1133 gk20a_writel(g,
1134 pwr_pmu_msgq_tail_r(),
1135 pwr_pmu_msgq_tail_val_f(*tail));
1136 }
1137
1138 return 0;
1139}
1140
1141static inline void pmu_queue_read(struct pmu_gk20a *pmu,
1142 u32 offset, u8 *dst, u32 size)
1143{
1144 pmu_copy_from_dmem(pmu, offset, dst, size, 0);
1145}
1146
1147static inline void pmu_queue_write(struct pmu_gk20a *pmu,
1148 u32 offset, u8 *src, u32 size)
1149{
1150 pmu_copy_to_dmem(pmu, offset, src, size, 0);
1151}
1152
1153int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token)
1154{
1155 struct gk20a *g = pmu->g;
1156 struct pmu_mutex *mutex;
1157 u32 data, owner, max_retry;
1158
1159 if (!pmu->initialized)
1160 return 0;
1161
1162 BUG_ON(!token);
1163 BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1164 BUG_ON(id > pmu->mutex_cnt);
1165
1166 mutex = &pmu->mutex[id];
1167
1168 owner = pwr_pmu_mutex_value_v(
1169 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1170
1171 if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
1172 BUG_ON(mutex->ref_cnt == 0);
1173 gk20a_dbg_pmu("already acquired by owner : 0x%08x", *token);
1174 mutex->ref_cnt++;
1175 return 0;
1176 }
1177
1178 max_retry = 40;
1179 do {
1180 data = pwr_pmu_mutex_id_value_v(
1181 gk20a_readl(g, pwr_pmu_mutex_id_r()));
1182 if (data == pwr_pmu_mutex_id_value_init_v() ||
1183 data == pwr_pmu_mutex_id_value_not_avail_v()) {
1184 gk20a_warn(dev_from_gk20a(g),
1185 "fail to generate mutex token: val 0x%08x",
1186 owner);
1187 usleep_range(20, 40);
1188 continue;
1189 }
1190
1191 owner = data;
1192 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1193 pwr_pmu_mutex_value_f(owner));
1194
1195 data = pwr_pmu_mutex_value_v(
1196 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1197
1198 if (owner == data) {
1199 mutex->ref_cnt = 1;
1200 gk20a_dbg_pmu("mutex acquired: id=%d, token=0x%x",
1201 mutex->index, *token);
1202 *token = owner;
1203 return 0;
1204 } else {
1205 gk20a_dbg_info("fail to acquire mutex idx=0x%08x",
1206 mutex->index);
1207
1208 data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1209 data = set_field(data,
1210 pwr_pmu_mutex_id_release_value_m(),
1211 pwr_pmu_mutex_id_release_value_f(owner));
1212 gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1213
1214 usleep_range(20, 40);
1215 continue;
1216 }
1217 } while (max_retry-- > 0);
1218
1219 return -EBUSY;
1220}
1221
1222int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token)
1223{
1224 struct gk20a *g = pmu->g;
1225 struct pmu_mutex *mutex;
1226 u32 owner, data;
1227
1228 if (!pmu->initialized)
1229 return 0;
1230
1231 BUG_ON(!token);
1232 BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1233 BUG_ON(id > pmu->mutex_cnt);
1234
1235 mutex = &pmu->mutex[id];
1236
1237 owner = pwr_pmu_mutex_value_v(
1238 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1239
1240 if (*token != owner) {
1241 gk20a_err(dev_from_gk20a(g),
1242 "requester 0x%08x NOT match owner 0x%08x",
1243 *token, owner);
1244 return -EINVAL;
1245 }
1246
1247 if (--mutex->ref_cnt == 0) {
1248 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1249 pwr_pmu_mutex_value_initial_lock_f());
1250
1251 data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1252 data = set_field(data, pwr_pmu_mutex_id_release_value_m(),
1253 pwr_pmu_mutex_id_release_value_f(owner));
1254 gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1255
1256 gk20a_dbg_pmu("mutex released: id=%d, token=0x%x",
1257 mutex->index, *token);
1258 }
1259
1260 return 0;
1261}
1262
1263static int pmu_queue_lock(struct pmu_gk20a *pmu,
1264 struct pmu_queue *queue)
1265{
1266 int err;
1267
1268 if (PMU_IS_MESSAGE_QUEUE(queue->id))
1269 return 0;
1270
1271 if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1272 mutex_lock(&queue->mutex);
1273 queue->locked = true;
1274 return 0;
1275 }
1276
1277 err = pmu_mutex_acquire(pmu, queue->mutex_id,
1278 &queue->mutex_lock);
1279 if (err == 0)
1280 queue->locked = true;
1281
1282 return err;
1283}
1284
1285static int pmu_queue_unlock(struct pmu_gk20a *pmu,
1286 struct pmu_queue *queue)
1287{
1288 int err;
1289
1290 if (PMU_IS_MESSAGE_QUEUE(queue->id))
1291 return 0;
1292
1293 if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1294 mutex_unlock(&queue->mutex);
1295 queue->locked = false;
1296 return 0;
1297 }
1298
1299 if (queue->locked) {
1300 err = pmu_mutex_release(pmu, queue->mutex_id,
1301 &queue->mutex_lock);
1302 if (err == 0)
1303 queue->locked = false;
1304 }
1305
1306 return 0;
1307}
1308
1309/* called by pmu_read_message, no lock */
1310static bool pmu_queue_is_empty(struct pmu_gk20a *pmu,
1311 struct pmu_queue *queue)
1312{
1313 u32 head, tail;
1314
1315 pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1316 if (queue->opened && queue->oflag == OFLAG_READ)
1317 tail = queue->position;
1318 else
1319 pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1320
1321 return head == tail;
1322}
1323
1324static bool pmu_queue_has_room(struct pmu_gk20a *pmu,
1325 struct pmu_queue *queue, u32 size, bool *need_rewind)
1326{
1327 u32 head, tail, free;
1328 bool rewind = false;
1329
1330 BUG_ON(!queue->locked);
1331
1332 size = ALIGN(size, QUEUE_ALIGNMENT);
1333
1334 pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1335 pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1336
1337 if (head >= tail) {
1338 free = queue->offset + queue->size - head;
1339 free -= PMU_CMD_HDR_SIZE;
1340
1341 if (size > free) {
1342 rewind = true;
1343 head = queue->offset;
1344 }
1345 }
1346
1347 if (head < tail)
1348 free = tail - head - 1;
1349
1350 if (need_rewind)
1351 *need_rewind = rewind;
1352
1353 return size <= free;
1354}
1355
1356static int pmu_queue_push(struct pmu_gk20a *pmu,
1357 struct pmu_queue *queue, void *data, u32 size)
1358{
1359 gk20a_dbg_fn("");
1360
1361 if (!queue->opened && queue->oflag == OFLAG_WRITE){
1362 gk20a_err(dev_from_gk20a(pmu->g),
1363 "queue not opened for write");
1364 return -EINVAL;
1365 }
1366
1367 pmu_queue_write(pmu, queue->position, data, size);
1368 queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1369 return 0;
1370}
1371
1372static int pmu_queue_pop(struct pmu_gk20a *pmu,
1373 struct pmu_queue *queue, void *data, u32 size,
1374 u32 *bytes_read)
1375{
1376 u32 head, tail, used;
1377
1378 *bytes_read = 0;
1379
1380 if (!queue->opened && queue->oflag == OFLAG_READ){
1381 gk20a_err(dev_from_gk20a(pmu->g),
1382 "queue not opened for read");
1383 return -EINVAL;
1384 }
1385
1386 pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1387 tail = queue->position;
1388
1389 if (head == tail)
1390 return 0;
1391
1392 if (head > tail)
1393 used = head - tail;
1394 else
1395 used = queue->offset + queue->size - tail;
1396
1397 if (size > used) {
1398 gk20a_warn(dev_from_gk20a(pmu->g),
1399 "queue size smaller than request read");
1400 size = used;
1401 }
1402
1403 pmu_queue_read(pmu, tail, data, size);
1404 queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1405 *bytes_read = size;
1406 return 0;
1407}
1408
1409static void pmu_queue_rewind(struct pmu_gk20a *pmu,
1410 struct pmu_queue *queue)
1411{
1412 struct pmu_cmd cmd;
1413
1414 gk20a_dbg_fn("");
1415
1416 if (!queue->opened) {
1417 gk20a_err(dev_from_gk20a(pmu->g),
1418 "queue not opened");
1419 return;
1420 }
1421
1422 if (queue->oflag == OFLAG_WRITE) {
1423 cmd.hdr.unit_id = PMU_UNIT_REWIND;
1424 cmd.hdr.size = PMU_CMD_HDR_SIZE;
1425 pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
1426 gk20a_dbg_pmu("queue %d rewinded", queue->id);
1427 }
1428
1429 queue->position = queue->offset;
1430 return;
1431}
1432
1433/* open for read and lock the queue */
1434static int pmu_queue_open_read(struct pmu_gk20a *pmu,
1435 struct pmu_queue *queue)
1436{
1437 int err;
1438
1439 err = pmu_queue_lock(pmu, queue);
1440 if (err)
1441 return err;
1442
1443 if (queue->opened)
1444 BUG();
1445
1446 pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
1447 queue->oflag = OFLAG_READ;
1448 queue->opened = true;
1449
1450 return 0;
1451}
1452
1453/* open for write and lock the queue
1454 make sure there's enough free space for the write */
1455static int pmu_queue_open_write(struct pmu_gk20a *pmu,
1456 struct pmu_queue *queue, u32 size)
1457{
1458 bool rewind = false;
1459 int err;
1460
1461 err = pmu_queue_lock(pmu, queue);
1462 if (err)
1463 return err;
1464
1465 if (queue->opened)
1466 BUG();
1467
1468 if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
1469 gk20a_err(dev_from_gk20a(pmu->g), "queue full");
1470 return -EAGAIN;
1471 }
1472
1473 pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
1474 queue->oflag = OFLAG_WRITE;
1475 queue->opened = true;
1476
1477 if (rewind)
1478 pmu_queue_rewind(pmu, queue);
1479
1480 return 0;
1481}
1482
1483/* close and unlock the queue */
1484static int pmu_queue_close(struct pmu_gk20a *pmu,
1485 struct pmu_queue *queue, bool commit)
1486{
1487 if (!queue->opened)
1488 return 0;
1489
1490 if (commit) {
1491 if (queue->oflag == OFLAG_READ) {
1492 pmu_queue_tail(pmu, queue,
1493 &queue->position, QUEUE_SET);
1494 }
1495 else {
1496 pmu_queue_head(pmu, queue,
1497 &queue->position, QUEUE_SET);
1498 }
1499 }
1500
1501 queue->opened = false;
1502
1503 pmu_queue_unlock(pmu, queue);
1504
1505 return 0;
1506}
1507
1508static void gk20a_save_pmu_sw_state(struct pmu_gk20a *pmu,
1509 struct gk20a_pmu_save_state *save)
1510{
1511 save->seq = pmu->seq;
1512 save->next_seq_desc = pmu->next_seq_desc;
1513 save->mutex = pmu->mutex;
1514 save->mutex_cnt = pmu->mutex_cnt;
1515 save->desc = pmu->desc;
1516 save->ucode = pmu->ucode;
1517 save->elpg_enable = pmu->elpg_enable;
1518 save->pg_wq = pmu->pg_wq;
1519 save->seq_buf = pmu->seq_buf;
1520 save->pg_buf = pmu->pg_buf;
1521 save->sw_ready = pmu->sw_ready;
1522 save->pg_init = pmu->pg_init;
1523}
1524
1525static void gk20a_restore_pmu_sw_state(struct pmu_gk20a *pmu,
1526 struct gk20a_pmu_save_state *save)
1527{
1528 pmu->seq = save->seq;
1529 pmu->next_seq_desc = save->next_seq_desc;
1530 pmu->mutex = save->mutex;
1531 pmu->mutex_cnt = save->mutex_cnt;
1532 pmu->desc = save->desc;
1533 pmu->ucode = save->ucode;
1534 pmu->elpg_enable = save->elpg_enable;
1535 pmu->pg_wq = save->pg_wq;
1536 pmu->seq_buf = save->seq_buf;
1537 pmu->pg_buf = save->pg_buf;
1538 pmu->sw_ready = save->sw_ready;
1539 pmu->pg_init = save->pg_init;
1540}
1541
1542void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
1543{
1544 struct gk20a_pmu_save_state save;
1545
1546 gk20a_dbg_fn("");
1547
1548 gk20a_allocator_destroy(&pmu->dmem);
1549
1550 /* Save the stuff you don't want to lose */
1551 gk20a_save_pmu_sw_state(pmu, &save);
1552
1553 /* this function is also called by pmu_destory outside gk20a deinit that
1554 releases gk20a struct so fill up with zeros here. */
1555 memset(pmu, 0, sizeof(struct pmu_gk20a));
1556
1557 /* Restore stuff you want to keep */
1558 gk20a_restore_pmu_sw_state(pmu, &save);
1559}
1560
1561int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
1562{
1563 struct pmu_gk20a *pmu = &g->pmu;
1564
1565 gk20a_dbg_fn("");
1566
1567 pmu_enable_hw(pmu, true);
1568
1569 return 0;
1570}
1571
1572static void pmu_elpg_enable_allow(struct work_struct *work);
1573
1574int gk20a_init_pmu_setup_sw(struct gk20a *g)
1575{
1576 struct pmu_gk20a *pmu = &g->pmu;
1577 struct mm_gk20a *mm = &g->mm;
1578 struct vm_gk20a *vm = &mm->pmu.vm;
1579 struct device *d = dev_from_gk20a(g);
1580 int i, err = 0;
1581 u8 *ptr;
1582 void *ucode_ptr;
1583 struct sg_table *sgt_pmu_ucode;
1584 struct sg_table *sgt_seq_buf;
1585 DEFINE_DMA_ATTRS(attrs);
1586 dma_addr_t iova;
1587
1588 gk20a_dbg_fn("");
1589
1590 if (pmu->sw_ready) {
1591 for (i = 0; i < pmu->mutex_cnt; i++) {
1592 pmu->mutex[i].id = i;
1593 pmu->mutex[i].index = i;
1594 }
1595 pmu_seq_init(pmu);
1596
1597 gk20a_dbg_fn("skip init");
1598 goto skip_init;
1599 }
1600
1601 /* no infoRom script from vbios? */
1602
1603 /* TBD: sysmon subtask */
1604
1605 pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
1606 pmu->mutex = kzalloc(pmu->mutex_cnt *
1607 sizeof(struct pmu_mutex), GFP_KERNEL);
1608 if (!pmu->mutex) {
1609 err = -ENOMEM;
1610 goto err;
1611 }
1612
1613 for (i = 0; i < pmu->mutex_cnt; i++) {
1614 pmu->mutex[i].id = i;
1615 pmu->mutex[i].index = i;
1616 }
1617
1618 pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
1619 sizeof(struct pmu_sequence), GFP_KERNEL);
1620 if (!pmu->seq) {
1621 err = -ENOMEM;
1622 goto err_free_mutex;
1623 }
1624
1625 pmu_seq_init(pmu);
1626
1627 if (!g->pmu_fw) {
1628 g->pmu_fw = gk20a_request_firmware(g, GK20A_PMU_UCODE_IMAGE);
1629 if (!g->pmu_fw) {
1630 gk20a_err(d, "failed to load pmu ucode!!");
1631 err = -ENOENT;
1632 goto err_free_seq;
1633 }
1634 }
1635
1636 gk20a_dbg_fn("firmware loaded");
1637
1638 pmu->desc = (struct pmu_ucode_desc *)g->pmu_fw->data;
1639 pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
1640 pmu->desc->descriptor_size);
1641
1642
1643 INIT_DELAYED_WORK(&pmu->elpg_enable, pmu_elpg_enable_allow);
1644 INIT_WORK(&pmu->pg_init, gk20a_init_pmu_setup_hw2_workqueue);
1645
1646 gk20a_init_pmu_vm(mm);
1647
1648 dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
1649 pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1650 &iova,
1651 GFP_KERNEL,
1652 &attrs);
1653 if (!pmu->ucode.cpuva) {
1654 gk20a_err(d, "failed to allocate memory\n");
1655 err = -ENOMEM;
1656 goto err_release_fw;
1657 }
1658
1659 pmu->ucode.iova = iova;
1660 pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1661 &iova,
1662 GFP_KERNEL);
1663 if (!pmu->seq_buf.cpuva) {
1664 gk20a_err(d, "failed to allocate memory\n");
1665 err = -ENOMEM;
1666 goto err_free_pmu_ucode;
1667 }
1668
1669 pmu->seq_buf.iova = iova;
1670 init_waitqueue_head(&pmu->pg_wq);
1671
1672 err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
1673 pmu->ucode.cpuva,
1674 pmu->ucode.iova,
1675 GK20A_PMU_UCODE_SIZE_MAX);
1676 if (err) {
1677 gk20a_err(d, "failed to allocate sg table\n");
1678 goto err_free_seq_buf;
1679 }
1680
1681 pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
1682 GK20A_PMU_UCODE_SIZE_MAX,
1683 0, /* flags */
1684 gk20a_mem_flag_read_only);
1685 if (!pmu->ucode.pmu_va) {
1686 gk20a_err(d, "failed to map pmu ucode memory!!");
1687 goto err_free_ucode_sgt;
1688 }
1689
1690 err = gk20a_get_sgtable(d, &sgt_seq_buf,
1691 pmu->seq_buf.cpuva,
1692 pmu->seq_buf.iova,
1693 GK20A_PMU_SEQ_BUF_SIZE);
1694 if (err) {
1695 gk20a_err(d, "failed to allocate sg table\n");
1696 goto err_unmap_ucode;
1697 }
1698
1699 pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf,
1700 GK20A_PMU_SEQ_BUF_SIZE,
1701 0, /* flags */
1702 gk20a_mem_flag_none);
1703 if (!pmu->seq_buf.pmu_va) {
1704 gk20a_err(d, "failed to map pmu ucode memory!!");
1705 goto err_free_seq_buf_sgt;
1706 }
1707
1708 ptr = (u8 *)pmu->seq_buf.cpuva;
1709 if (!ptr) {
1710 gk20a_err(d, "failed to map cpu ptr for zbc buffer");
1711 goto err_unmap_seq_buf;
1712 }
1713
1714 /* TBD: remove this if ZBC save/restore is handled by PMU
1715 * end an empty ZBC sequence for now */
1716 ptr[0] = 0x16; /* opcode EXIT */
1717 ptr[1] = 0; ptr[2] = 1; ptr[3] = 0;
1718 ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0;
1719
1720 pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
1721
1722 ucode_ptr = pmu->ucode.cpuva;
1723
1724 for (i = 0; i < (pmu->desc->app_start_offset +
1725 pmu->desc->app_size) >> 2; i++)
1726 gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]);
1727
1728 gk20a_free_sgtable(&sgt_pmu_ucode);
1729 gk20a_free_sgtable(&sgt_seq_buf);
1730
1731skip_init:
1732 mutex_init(&pmu->elpg_mutex);
1733 mutex_init(&pmu->isr_mutex);
1734 mutex_init(&pmu->pmu_copy_lock);
1735 mutex_init(&pmu->pmu_seq_lock);
1736
1737 pmu->perfmon_counter.index = 3; /* GR & CE2 */
1738 pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE;
1739
1740 pmu->remove_support = gk20a_remove_pmu_support;
1741 err = gk20a_init_pmu(pmu);
1742 if (err) {
1743 gk20a_err(d, "failed to set function pointers\n");
1744 return err;
1745 }
1746
1747 gk20a_dbg_fn("done");
1748 return 0;
1749
1750 err_unmap_seq_buf:
1751 gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va,
1752 GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none);
1753 err_free_seq_buf_sgt:
1754 gk20a_free_sgtable(&sgt_seq_buf);
1755 err_unmap_ucode:
1756 gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va,
1757 GK20A_PMU_UCODE_SIZE_MAX, gk20a_mem_flag_none);
1758 err_free_ucode_sgt:
1759 gk20a_free_sgtable(&sgt_pmu_ucode);
1760 err_free_seq_buf:
1761 dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1762 pmu->seq_buf.cpuva, pmu->seq_buf.iova);
1763 pmu->seq_buf.cpuva = NULL;
1764 pmu->seq_buf.iova = 0;
1765 err_free_pmu_ucode:
1766 dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1767 pmu->ucode.cpuva, pmu->ucode.iova, &attrs);
1768 pmu->ucode.cpuva = NULL;
1769 pmu->ucode.iova = 0;
1770 err_release_fw:
1771 release_firmware(g->pmu_fw);
1772 err_free_seq:
1773 kfree(pmu->seq);
1774 err_free_mutex:
1775 kfree(pmu->mutex);
1776 err:
1777 gk20a_dbg_fn("fail");
1778 return err;
1779}
1780
1781static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
1782 void *param, u32 handle, u32 status);
1783
1784static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg,
1785 void *param, u32 handle, u32 status)
1786{
1787 struct pmu_gk20a *pmu = param;
1788 struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = &msg->msg.pg.eng_buf_stat;
1789
1790 gk20a_dbg_fn("");
1791
1792 if (status != 0) {
1793 gk20a_err(dev_from_gk20a(g), "PGENG cmd aborted");
1794 /* TBD: disable ELPG */
1795 return;
1796 }
1797
1798 if (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_FAILED) {
1799 gk20a_err(dev_from_gk20a(g), "failed to load PGENG buffer");
1800 }
1801
1802 pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED);
1803 wake_up(&pmu->pg_wq);
1804}
1805
1806int gk20a_init_pmu_setup_hw1(struct gk20a *g)
1807{
1808 struct pmu_gk20a *pmu = &g->pmu;
1809 int err;
1810
1811 gk20a_dbg_fn("");
1812
1813 pmu_reset(pmu);
1814
1815 /* setup apertures - virtual */
1816 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
1817 pwr_fbif_transcfg_mem_type_virtual_f());
1818 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
1819 pwr_fbif_transcfg_mem_type_virtual_f());
1820 /* setup apertures - physical */
1821 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
1822 pwr_fbif_transcfg_mem_type_physical_f() |
1823 pwr_fbif_transcfg_target_local_fb_f());
1824 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
1825 pwr_fbif_transcfg_mem_type_physical_f() |
1826 pwr_fbif_transcfg_target_coherent_sysmem_f());
1827 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
1828 pwr_fbif_transcfg_mem_type_physical_f() |
1829 pwr_fbif_transcfg_target_noncoherent_sysmem_f());
1830
1831 /* TBD: load pmu ucode */
1832 err = pmu_bootstrap(pmu);
1833 if (err)
1834 return err;
1835
1836 return 0;
1837
1838}
1839
1840static int gk20a_aelpg_init(struct gk20a *g);
1841static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);
1842
1843
1844static void gk20a_init_pmu_setup_hw2_workqueue(struct work_struct *work)
1845{
1846 struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init);
1847 struct gk20a *g = pmu->g;
1848 gk20a_init_pmu_setup_hw2(g);
1849}
1850
1851int gk20a_init_pmu_setup_hw2(struct gk20a *g)
1852{
1853 struct pmu_gk20a *pmu = &g->pmu;
1854 struct mm_gk20a *mm = &g->mm;
1855 struct vm_gk20a *vm = &mm->pmu.vm;
1856 struct device *d = dev_from_gk20a(g);
1857 struct pmu_cmd cmd;
1858 u32 desc;
1859 long remain;
1860 int err;
1861 bool status;
1862 u32 size;
1863 struct sg_table *sgt_pg_buf;
1864 dma_addr_t iova;
1865
1866 gk20a_dbg_fn("");
1867
1868 if (!support_gk20a_pmu())
1869 return 0;
1870
1871 size = 0;
1872 err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
1873 if (err) {
1874 gk20a_err(dev_from_gk20a(g),
1875 "fail to query fecs pg buffer size");
1876 return err;
1877 }
1878
1879 if (!pmu->sw_ready) {
1880 pmu->pg_buf.cpuva = dma_alloc_coherent(d, size,
1881 &iova,
1882 GFP_KERNEL);
1883 if (!pmu->pg_buf.cpuva) {
1884 gk20a_err(d, "failed to allocate memory\n");
1885 err = -ENOMEM;
1886 goto err;
1887 }
1888
1889 pmu->pg_buf.iova = iova;
1890 pmu->pg_buf.size = size;
1891
1892 err = gk20a_get_sgtable(d, &sgt_pg_buf,
1893 pmu->pg_buf.cpuva,
1894 pmu->pg_buf.iova,
1895 size);
1896 if (err) {
1897 gk20a_err(d, "failed to create sg table\n");
1898 goto err_free_pg_buf;
1899 }
1900
1901 pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm,
1902 &sgt_pg_buf,
1903 size,
1904 0, /* flags */
1905 gk20a_mem_flag_none);
1906 if (!pmu->pg_buf.pmu_va) {
1907 gk20a_err(d, "failed to map fecs pg buffer");
1908 err = -ENOMEM;
1909 goto err_free_sgtable;
1910 }
1911
1912 gk20a_free_sgtable(&sgt_pg_buf);
1913 }
1914
1915 /*
1916 * This is the actual point at which sw setup is complete, so set the
1917 * sw_ready flag here.
1918 */
1919 pmu->sw_ready = true;
1920
1921 /* TBD: acquire pmu hw mutex */
1922
1923 /* TBD: post reset again? */
1924
1925 /* PMU_INIT message handler will send PG_INIT */
1926 remain = wait_event_timeout(
1927 pmu->pg_wq,
1928 (status = (pmu->elpg_ready &&
1929 pmu->stat_dmem_offset != 0 &&
1930 pmu->elpg_stat == PMU_ELPG_STAT_OFF)),
1931 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)));
1932 if (status == 0) {
1933 gk20a_err(dev_from_gk20a(g),
1934 "PG_INIT_ACK failed, remaining timeout : 0x%lx", remain);
1935 pmu_dump_falcon_stats(pmu);
1936 return -EBUSY;
1937 }
1938
1939 err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa);
1940 if (err) {
1941 gk20a_err(dev_from_gk20a(g),
1942 "fail to bind pmu inst to gr");
1943 return err;
1944 }
1945
1946 err = gr_gk20a_fecs_set_reglist_virual_addr(g, pmu->pg_buf.pmu_va);
1947 if (err) {
1948 gk20a_err(dev_from_gk20a(g),
1949 "fail to set pg buffer pmu va");
1950 return err;
1951 }
1952
1953 memset(&cmd, 0, sizeof(struct pmu_cmd));
1954 cmd.hdr.unit_id = PMU_UNIT_PG;
1955 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1956 cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1957 cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1958 cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS;
1959 cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size;
1960 cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8);
1961 cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF);
1962 cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1963
1964 pmu->buf_loaded = false;
1965 gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1966 pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1967
1968 remain = wait_event_timeout(
1969 pmu->pg_wq,
1970 pmu->buf_loaded,
1971 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)));
1972 if (!pmu->buf_loaded) {
1973 gk20a_err(dev_from_gk20a(g),
1974 "PGENG FECS buffer load failed, remaining timeout : 0x%lx",
1975 remain);
1976 return -EBUSY;
1977 }
1978
1979 memset(&cmd, 0, sizeof(struct pmu_cmd));
1980 cmd.hdr.unit_id = PMU_UNIT_PG;
1981 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1982 cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1983 cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1984 cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC;
1985 cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size;
1986 cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8);
1987 cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF);
1988 cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1989
1990 pmu->buf_loaded = false;
1991 gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1992 pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1993
1994 remain = wait_event_timeout(
1995 pmu->pg_wq,
1996 pmu->buf_loaded,
1997 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)));
1998 if (!pmu->buf_loaded) {
1999 gk20a_err(dev_from_gk20a(g),
2000 "PGENG ZBC buffer load failed, remaining timeout 0x%lx",
2001 remain);
2002 return -EBUSY;
2003 }
2004
2005 /*
2006 * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to
2007 * 7. This prevents PMU stalling on Host register accesses. Once the
2008 * cause for this hang is discovered and fixed, this WAR should be
2009 * removed.
2010 */
2011 gk20a_writel(g, 0x10a164, 0x109ff);
2012
2013 pmu->initialized = true;
2014 pmu->zbc_ready = true;
2015
2016 /* Save zbc table after PMU is initialized. */
2017 pmu_save_zbc(g, 0xf);
2018
2019 /*
2020 * We can't guarantee that gr code to enable ELPG will be
2021 * invoked, so we explicitly call disable-enable here
2022 * to enable elpg.
2023 */
2024 gk20a_pmu_disable_elpg(g);
2025
2026 if (g->elpg_enabled)
2027 gk20a_pmu_enable_elpg(g);
2028
2029 udelay(50);
2030
2031 /* Enable AELPG */
2032 if (g->aelpg_enabled) {
2033 gk20a_aelpg_init(g);
2034 gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
2035 }
2036
2037 return 0;
2038
2039 err_free_sgtable:
2040 gk20a_free_sgtable(&sgt_pg_buf);
2041 err_free_pg_buf:
2042 dma_free_coherent(d, size,
2043 pmu->pg_buf.cpuva, pmu->pg_buf.iova);
2044 pmu->pg_buf.cpuva = NULL;
2045 pmu->pg_buf.iova = 0;
2046 err:
2047 return err;
2048}
2049
2050int gk20a_init_pmu_support(struct gk20a *g)
2051{
2052 struct pmu_gk20a *pmu = &g->pmu;
2053 u32 err;
2054
2055 gk20a_dbg_fn("");
2056
2057 if (pmu->initialized)
2058 return 0;
2059
2060 pmu->g = g;
2061
2062 err = gk20a_init_pmu_reset_enable_hw(g);
2063 if (err)
2064 return err;
2065
2066 if (support_gk20a_pmu()) {
2067 err = gk20a_init_pmu_setup_sw(g);
2068 if (err)
2069 return err;
2070
2071 err = gk20a_init_pmu_setup_hw1(g);
2072 if (err)
2073 return err;
2074 }
2075
2076 return err;
2077}
2078
2079static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
2080 void *param, u32 handle, u32 status)
2081{
2082 struct pmu_gk20a *pmu = param;
2083 struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg;
2084
2085 gk20a_dbg_fn("");
2086
2087 if (status != 0) {
2088 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
2089 /* TBD: disable ELPG */
2090 return;
2091 }
2092
2093 switch (elpg_msg->msg) {
2094 case PMU_PG_ELPG_MSG_INIT_ACK:
2095 gk20a_dbg_pmu("INIT_PG is acknowledged from PMU");
2096 pmu->elpg_ready = true;
2097 wake_up(&pmu->pg_wq);
2098 break;
2099 case PMU_PG_ELPG_MSG_ALLOW_ACK:
2100 gk20a_dbg_pmu("ALLOW is acknowledged from PMU");
2101 pmu->elpg_stat = PMU_ELPG_STAT_ON;
2102 wake_up(&pmu->pg_wq);
2103 break;
2104 case PMU_PG_ELPG_MSG_DISALLOW_ACK:
2105 gk20a_dbg_pmu("DISALLOW is acknowledged from PMU");
2106 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
2107 wake_up(&pmu->pg_wq);
2108 break;
2109 default:
2110 gk20a_err(dev_from_gk20a(g),
2111 "unsupported ELPG message : 0x%04x", elpg_msg->msg);
2112 }
2113
2114 return;
2115}
2116
2117static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg,
2118 void *param, u32 handle, u32 status)
2119{
2120 struct pmu_gk20a *pmu = param;
2121
2122 gk20a_dbg_fn("");
2123
2124 if (status != 0) {
2125 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
2126 /* TBD: disable ELPG */
2127 return;
2128 }
2129
2130 switch (msg->msg.pg.stat.sub_msg_id) {
2131 case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET:
2132 gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU");
2133 pmu->stat_dmem_offset = msg->msg.pg.stat.data;
2134 wake_up(&pmu->pg_wq);
2135 break;
2136 default:
2137 break;
2138 }
2139}
2140
2141static int pmu_init_powergating(struct pmu_gk20a *pmu)
2142{
2143 struct gk20a *g = pmu->g;
2144 struct pmu_cmd cmd;
2145 u32 seq;
2146
2147 gk20a_dbg_fn("");
2148
2149 if (tegra_cpu_is_asim()) {
2150 /* TBD: calculate threshold for silicon */
2151 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2152 PMU_PG_IDLE_THRESHOLD_SIM);
2153 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2154 PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM);
2155 } else {
2156 /* TBD: calculate threshold for silicon */
2157 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2158 PMU_PG_IDLE_THRESHOLD);
2159 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2160 PMU_PG_POST_POWERUP_IDLE_THRESHOLD);
2161 }
2162
2163 /* init ELPG */
2164 memset(&cmd, 0, sizeof(struct pmu_cmd));
2165 cmd.hdr.unit_id = PMU_UNIT_PG;
2166 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2167 cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2168 cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2169 cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT;
2170
2171 gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2172 pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2173
2174 /* alloc dmem for powergating state log */
2175 pmu->stat_dmem_offset = 0;
2176 memset(&cmd, 0, sizeof(struct pmu_cmd));
2177 cmd.hdr.unit_id = PMU_UNIT_PG;
2178 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat);
2179 cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT;
2180 cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A;
2181 cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM;
2182 cmd.cmd.pg.stat.data = 0;
2183
2184 gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2185 pmu_handle_pg_stat_msg, pmu, &seq, ~0);
2186
2187 /* disallow ELPG initially
2188 PMU ucode requires a disallow cmd before allow cmd */
2189 pmu->elpg_stat = PMU_ELPG_STAT_ON; /* set for wait_event PMU_ELPG_STAT_OFF */
2190 memset(&cmd, 0, sizeof(struct pmu_cmd));
2191 cmd.hdr.unit_id = PMU_UNIT_PG;
2192 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2193 cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2194 cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2195 cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
2196
2197 gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2198 pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2199
2200 /* start with elpg disabled until first enable call */
2201 pmu->elpg_refcnt = 1;
2202
2203 return 0;
2204}
2205
2206static int pmu_init_perfmon(struct pmu_gk20a *pmu)
2207{
2208 struct gk20a *g = pmu->g;
2209 struct pmu_v *pv = &g->ops.pmu_ver;
2210 struct pmu_cmd cmd;
2211 struct pmu_payload payload;
2212 u32 seq;
2213 u32 data;
2214 int err;
2215
2216 gk20a_dbg_fn("");
2217
2218 pmu->perfmon_ready = 0;
2219
2220 /* use counter #3 for GR && CE2 busy cycles */
2221 gk20a_writel(g, pwr_pmu_idle_mask_r(3),
2222 pwr_pmu_idle_mask_gr_enabled_f() |
2223 pwr_pmu_idle_mask_ce_2_enabled_f());
2224
2225 /* disable idle filtering for counters 3 and 6 */
2226 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
2227 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2228 pwr_pmu_idle_ctrl_filter_m(),
2229 pwr_pmu_idle_ctrl_value_busy_f() |
2230 pwr_pmu_idle_ctrl_filter_disabled_f());
2231 gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
2232
2233 /* use counter #6 for total cycles */
2234 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
2235 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2236 pwr_pmu_idle_ctrl_filter_m(),
2237 pwr_pmu_idle_ctrl_value_always_f() |
2238 pwr_pmu_idle_ctrl_filter_disabled_f());
2239 gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
2240
2241 /*
2242 * We don't want to disturb counters #3 and #6, which are used by
2243 * perfmon, so we add wiring also to counters #1 and #2 for
2244 * exposing raw counter readings.
2245 */
2246 gk20a_writel(g, pwr_pmu_idle_mask_r(1),
2247 pwr_pmu_idle_mask_gr_enabled_f() |
2248 pwr_pmu_idle_mask_ce_2_enabled_f());
2249
2250 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
2251 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2252 pwr_pmu_idle_ctrl_filter_m(),
2253 pwr_pmu_idle_ctrl_value_busy_f() |
2254 pwr_pmu_idle_ctrl_filter_disabled_f());
2255 gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
2256
2257 data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
2258 data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2259 pwr_pmu_idle_ctrl_filter_m(),
2260 pwr_pmu_idle_ctrl_value_always_f() |
2261 pwr_pmu_idle_ctrl_filter_disabled_f());
2262 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
2263
2264 pmu->sample_buffer = 0;
2265 err = pmu->dmem.alloc(&pmu->dmem, &pmu->sample_buffer, 2 * sizeof(u16));
2266 if (err) {
2267 gk20a_err(dev_from_gk20a(g),
2268 "failed to allocate perfmon sample buffer");
2269 return -ENOMEM;
2270 }
2271
2272 /* init PERFMON */
2273 memset(&cmd, 0, sizeof(struct pmu_cmd));
2274 cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2275 cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size();
2276 cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
2277 /* buffer to save counter values for pmu perfmon */
2278 pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
2279 (u16)pmu->sample_buffer);
2280 /* number of sample periods below lower threshold
2281 before pmu triggers perfmon decrease event
2282 TBD: = 15 */
2283 pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
2284 /* index of base counter, aka. always ticking counter */
2285 pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6);
2286 /* microseconds interval between pmu polls perf counters */
2287 pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700);
2288 /* number of perfmon counters
2289 counter #3 (GR and CE2) for gk20a */
2290 pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1);
2291 /* moving average window for sample periods
2292 TBD: = 3000000 / sample_period_us = 17 */
2293 pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17);
2294
2295 memset(&payload, 0, sizeof(struct pmu_payload));
2296 payload.in.buf = &pmu->perfmon_counter;
2297 payload.in.size = sizeof(struct pmu_perfmon_counter);
2298 payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC);
2299
2300 gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2301 NULL, NULL, &seq, ~0);
2302
2303 return 0;
2304}
2305
2306static int pmu_process_init_msg(struct pmu_gk20a *pmu,
2307 struct pmu_msg *msg)
2308{
2309 struct gk20a *g = pmu->g;
2310 struct pmu_v *pv = &g->ops.pmu_ver;
2311 union pmu_init_msg_pmu *init;
2312 struct pmu_sha1_gid_data gid_data;
2313 u32 i, tail = 0;
2314
2315 tail = pwr_pmu_msgq_tail_val_v(
2316 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
2317
2318 pmu_copy_from_dmem(pmu, tail,
2319 (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);
2320
2321 if (msg->hdr.unit_id != PMU_UNIT_INIT) {
2322 gk20a_err(dev_from_gk20a(g),
2323 "expecting init msg");
2324 return -EINVAL;
2325 }
2326
2327 pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE,
2328 (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);
2329
2330 if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
2331 gk20a_err(dev_from_gk20a(g),
2332 "expecting init msg");
2333 return -EINVAL;
2334 }
2335
2336 tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
2337 gk20a_writel(g, pwr_pmu_msgq_tail_r(),
2338 pwr_pmu_msgq_tail_val_f(tail));
2339
2340 init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init));
2341 if (!pmu->gid_info.valid) {
2342
2343 pmu_copy_from_dmem(pmu,
2344 pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2345 (u8 *)&gid_data,
2346 sizeof(struct pmu_sha1_gid_data), 0);
2347
2348 pmu->gid_info.valid =
2349 (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);
2350
2351 if (pmu->gid_info.valid) {
2352
2353 BUG_ON(sizeof(pmu->gid_info.gid) !=
2354 sizeof(gid_data.gid));
2355
2356 memcpy(pmu->gid_info.gid, gid_data.gid,
2357 sizeof(pmu->gid_info.gid));
2358 }
2359 }
2360
2361 for (i = 0; i < PMU_QUEUE_COUNT; i++)
2362 pmu_queue_init(pmu, i, init);
2363
2364 gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
2365 pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2366 pv->get_pmu_init_msg_pmu_sw_mg_size(init),
2367 PMU_DMEM_ALLOC_ALIGNMENT);
2368
2369 pmu->pmu_ready = true;
2370
2371 return 0;
2372}
2373
2374static bool pmu_read_message(struct pmu_gk20a *pmu, struct pmu_queue *queue,
2375 struct pmu_msg *msg, int *status)
2376{
2377 struct gk20a *g = pmu->g;
2378 u32 read_size, bytes_read;
2379 int err;
2380
2381 *status = 0;
2382
2383 if (pmu_queue_is_empty(pmu, queue))
2384 return false;
2385
2386 err = pmu_queue_open_read(pmu, queue);
2387 if (err) {
2388 gk20a_err(dev_from_gk20a(g),
2389 "fail to open queue %d for read", queue->id);
2390 *status = err;
2391 return false;
2392 }
2393
2394 err = pmu_queue_pop(pmu, queue, &msg->hdr,
2395 PMU_MSG_HDR_SIZE, &bytes_read);
2396 if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2397 gk20a_err(dev_from_gk20a(g),
2398 "fail to read msg from queue %d", queue->id);
2399 *status = err | -EINVAL;
2400 goto clean_up;
2401 }
2402
2403 if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
2404 pmu_queue_rewind(pmu, queue);
2405 /* read again after rewind */
2406 err = pmu_queue_pop(pmu, queue, &msg->hdr,
2407 PMU_MSG_HDR_SIZE, &bytes_read);
2408 if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2409 gk20a_err(dev_from_gk20a(g),
2410 "fail to read msg from queue %d", queue->id);
2411 *status = err | -EINVAL;
2412 goto clean_up;
2413 }
2414 }
2415
2416 if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
2417 gk20a_err(dev_from_gk20a(g),
2418 "read invalid unit_id %d from queue %d",
2419 msg->hdr.unit_id, queue->id);
2420 *status = -EINVAL;
2421 goto clean_up;
2422 }
2423
2424 if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
2425 read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
2426 err = pmu_queue_pop(pmu, queue, &msg->msg,
2427 read_size, &bytes_read);
2428 if (err || bytes_read != read_size) {
2429 gk20a_err(dev_from_gk20a(g),
2430 "fail to read msg from queue %d", queue->id);
2431 *status = err;
2432 goto clean_up;
2433 }
2434 }
2435
2436 err = pmu_queue_close(pmu, queue, true);
2437 if (err) {
2438 gk20a_err(dev_from_gk20a(g),
2439 "fail to close queue %d", queue->id);
2440 *status = err;
2441 return false;
2442 }
2443
2444 return true;
2445
2446clean_up:
2447 err = pmu_queue_close(pmu, queue, false);
2448 if (err)
2449 gk20a_err(dev_from_gk20a(g),
2450 "fail to close queue %d", queue->id);
2451 return false;
2452}
2453
2454static int pmu_response_handle(struct pmu_gk20a *pmu,
2455 struct pmu_msg *msg)
2456{
2457 struct gk20a *g = pmu->g;
2458 struct pmu_sequence *seq;
2459 struct pmu_v *pv = &g->ops.pmu_ver;
2460 int ret = 0;
2461
2462 gk20a_dbg_fn("");
2463
2464 seq = &pmu->seq[msg->hdr.seq_id];
2465 if (seq->state != PMU_SEQ_STATE_USED &&
2466 seq->state != PMU_SEQ_STATE_CANCELLED) {
2467 gk20a_err(dev_from_gk20a(g),
2468 "msg for an unknown sequence %d", seq->id);
2469 return -EINVAL;
2470 }
2471
2472 if (msg->hdr.unit_id == PMU_UNIT_RC &&
2473 msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
2474 gk20a_err(dev_from_gk20a(g),
2475 "unhandled cmd: seq %d", seq->id);
2476 }
2477 else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
2478 if (seq->msg) {
2479 if (seq->msg->hdr.size >= msg->hdr.size) {
2480 memcpy(seq->msg, msg, msg->hdr.size);
2481 if (pv->pmu_allocation_get_dmem_size(pmu,
2482 pv->get_pmu_seq_out_a_ptr(seq)) != 0) {
2483 pmu_copy_from_dmem(pmu,
2484 pv->pmu_allocation_get_dmem_offset(pmu,
2485 pv->get_pmu_seq_out_a_ptr(seq)),
2486 seq->out_payload,
2487 pv->pmu_allocation_get_dmem_size(pmu,
2488 pv->get_pmu_seq_out_a_ptr(seq)), 0);
2489 }
2490 } else {
2491 gk20a_err(dev_from_gk20a(g),
2492 "sequence %d msg buffer too small",
2493 seq->id);
2494 }
2495 }
2496 } else
2497 seq->callback = NULL;
2498 if (pv->pmu_allocation_get_dmem_size(pmu,
2499 pv->get_pmu_seq_in_a_ptr(seq)) != 0)
2500 pmu->dmem.free(&pmu->dmem,
2501 pv->pmu_allocation_get_dmem_offset(pmu,
2502 pv->get_pmu_seq_in_a_ptr(seq)),
2503 pv->pmu_allocation_get_dmem_size(pmu,
2504 pv->get_pmu_seq_in_a_ptr(seq)));
2505 if (pv->pmu_allocation_get_dmem_size(pmu,
2506 pv->get_pmu_seq_out_a_ptr(seq)) != 0)
2507 pmu->dmem.free(&pmu->dmem,
2508 pv->pmu_allocation_get_dmem_offset(pmu,
2509 pv->get_pmu_seq_out_a_ptr(seq)),
2510 pv->pmu_allocation_get_dmem_size(pmu,
2511 pv->get_pmu_seq_out_a_ptr(seq)));
2512
2513 if (seq->callback)
2514 seq->callback(g, msg, seq->cb_params, seq->desc, ret);
2515
2516 pmu_seq_release(pmu, seq);
2517
2518 /* TBD: notify client waiting for available dmem */
2519
2520 gk20a_dbg_fn("done");
2521
2522 return 0;
2523}
2524
2525static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2526 u32 *var, u32 val);
2527
2528static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg,
2529 void *param, u32 handle, u32 status)
2530{
2531 struct pmu_gk20a *pmu = param;
2532 pmu->zbc_save_done = 1;
2533}
2534
2535static void pmu_save_zbc(struct gk20a *g, u32 entries)
2536{
2537 struct pmu_gk20a *pmu = &g->pmu;
2538 struct pmu_cmd cmd;
2539 u32 seq;
2540
2541 if (!pmu->pmu_ready || !entries || !pmu->zbc_ready)
2542 return;
2543
2544 memset(&cmd, 0, sizeof(struct pmu_cmd));
2545 cmd.hdr.unit_id = PMU_UNIT_PG;
2546 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd);
2547 cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update;
2548 cmd.cmd.zbc.entry_mask = ZBC_MASK(entries);
2549
2550 pmu->zbc_save_done = 0;
2551
2552 gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2553 pmu_handle_zbc_msg, pmu, &seq, ~0);
2554 pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
2555 &pmu->zbc_save_done, 1);
2556 if (!pmu->zbc_save_done)
2557 gk20a_err(dev_from_gk20a(g), "ZBC save timeout");
2558}
2559
2560void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
2561{
2562 if (g->pmu.zbc_ready)
2563 pmu_save_zbc(g, entries);
2564}
2565
2566static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
2567{
2568 struct gk20a *g = pmu->g;
2569 struct pmu_v *pv = &g->ops.pmu_ver;
2570 struct pmu_cmd cmd;
2571 struct pmu_payload payload;
2572 u32 current_rate = 0;
2573 u32 seq;
2574
2575 /* PERFMON Start */
2576 memset(&cmd, 0, sizeof(struct pmu_cmd));
2577 cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2578 cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size();
2579 pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon,
2580 PMU_PERFMON_CMD_ID_START);
2581 pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
2582 PMU_DOMAIN_GROUP_PSTATE);
2583 pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
2584 pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
2585
2586 current_rate = rate_gpu_to_gpc2clk(gk20a_clk_get_rate(g));
2587 if (current_rate >= gpc_pll_params.max_freq)
2588 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2589 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2590 else if (current_rate <= gpc_pll_params.min_freq)
2591 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2592 PMU_PERFMON_FLAG_ENABLE_INCREASE);
2593 else
2594 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2595 PMU_PERFMON_FLAG_ENABLE_INCREASE |
2596 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2597
2598 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2599 pv->perfmon_start_get_flags(&cmd.cmd.perfmon) |
2600 PMU_PERFMON_FLAG_CLEAR_PREV);
2601
2602 memset(&payload, 0, sizeof(struct pmu_payload));
2603
2604 /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */
2605 pmu->perfmon_counter.upper_threshold = 3000; /* 30% */
2606 /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */
2607 pmu->perfmon_counter.lower_threshold = 1000; /* 10% */
2608 pmu->perfmon_counter.valid = true;
2609
2610 payload.in.buf = &pmu->perfmon_counter;
2611 payload.in.size = sizeof(pmu->perfmon_counter);
2612 payload.in.offset =
2613 pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC);
2614
2615 gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2616 NULL, NULL, &seq, ~0);
2617
2618 return 0;
2619}
2620
2621static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu)
2622{
2623 struct gk20a *g = pmu->g;
2624 struct pmu_cmd cmd;
2625 u32 seq;
2626
2627 /* PERFMON Stop */
2628 memset(&cmd, 0, sizeof(struct pmu_cmd));
2629 cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2630 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop);
2631 cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP;
2632
2633 gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2634 NULL, NULL, &seq, ~0);
2635 return 0;
2636}
2637
2638static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
2639 struct pmu_perfmon_msg *msg)
2640{
2641 struct gk20a *g = pmu->g;
2642 u32 rate;
2643
2644 gk20a_dbg_fn("");
2645
2646 switch (msg->msg_type) {
2647 case PMU_PERFMON_MSG_ID_INCREASE_EVENT:
2648 gk20a_dbg_pmu("perfmon increase event: "
2649 "state_id %d, ground_id %d, pct %d",
2650 msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2651 /* increase gk20a clock freq by 20% */
2652 rate = gk20a_clk_get_rate(g);
2653 gk20a_clk_set_rate(g, rate * 6 / 5);
2654 break;
2655 case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
2656 gk20a_dbg_pmu("perfmon decrease event: "
2657 "state_id %d, ground_id %d, pct %d",
2658 msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2659 /* decrease gk20a clock freq by 10% */
2660 rate = gk20a_clk_get_rate(g);
2661 gk20a_clk_set_rate(g, (rate / 10) * 7);
2662 break;
2663 case PMU_PERFMON_MSG_ID_INIT_EVENT:
2664 pmu->perfmon_ready = 1;
2665 gk20a_dbg_pmu("perfmon init event");
2666 break;
2667 default:
2668 break;
2669 }
2670
2671 /* restart sampling */
2672 if (IS_ENABLED(CONFIG_GK20A_PERFMON))
2673 return pmu_perfmon_start_sampling(pmu);
2674 return 0;
2675}
2676
2677
2678static int pmu_handle_event(struct pmu_gk20a *pmu, struct pmu_msg *msg)
2679{
2680 int err;
2681
2682 gk20a_dbg_fn("");
2683
2684 switch (msg->hdr.unit_id) {
2685 case PMU_UNIT_PERFMON:
2686 err = pmu_handle_perfmon_event(pmu, &msg->msg.perfmon);
2687 break;
2688 default:
2689 break;
2690 }
2691
2692 return err;
2693}
2694
2695static int pmu_process_message(struct pmu_gk20a *pmu)
2696{
2697 struct pmu_msg msg;
2698 int status;
2699
2700 if (unlikely(!pmu->pmu_ready)) {
2701 pmu_process_init_msg(pmu, &msg);
2702 pmu_init_powergating(pmu);
2703 pmu_init_perfmon(pmu);
2704 return 0;
2705 }
2706
2707 while (pmu_read_message(pmu,
2708 &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
2709
2710 gk20a_dbg_pmu("read msg hdr: "
2711 "unit_id = 0x%08x, size = 0x%08x, "
2712 "ctrl_flags = 0x%08x, seq_id = 0x%08x",
2713 msg.hdr.unit_id, msg.hdr.size,
2714 msg.hdr.ctrl_flags, msg.hdr.seq_id);
2715
2716 msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;
2717
2718 if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) {
2719 pmu_handle_event(pmu, &msg);
2720 } else {
2721 pmu_response_handle(pmu, &msg);
2722 }
2723 }
2724
2725 return 0;
2726}
2727
2728static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2729 u32 *var, u32 val)
2730{
2731 struct gk20a *g = pmu->g;
2732 unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
2733 unsigned long delay = GR_IDLE_CHECK_DEFAULT;
2734
2735 do {
2736 if (*var == val)
2737 return 0;
2738
2739 if (gk20a_readl(g, pwr_falcon_irqstat_r()))
2740 gk20a_pmu_isr(g);
2741
2742 usleep_range(delay, delay * 2);
2743 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
2744 } while (time_before(jiffies, end_jiffies) ||
2745 !tegra_platform_is_silicon());
2746
2747 return -ETIMEDOUT;
2748}
2749
2750static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu)
2751{
2752 struct gk20a *g = pmu->g;
2753 struct pmu_pg_stats stats;
2754
2755 pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
2756 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
2757
2758 gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx",
2759 stats.pg_entry_start_timestamp);
2760 gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx",
2761 stats.pg_exit_start_timestamp);
2762 gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx",
2763 stats.pg_ingating_start_timestamp);
2764 gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx",
2765 stats.pg_ungating_start_timestamp);
2766 gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x",
2767 stats.pg_avg_entry_time_us);
2768 gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x",
2769 stats.pg_avg_exit_time_us);
2770 gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x",
2771 stats.pg_ingating_cnt);
2772 gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x",
2773 stats.pg_ingating_time_us);
2774 gk20a_dbg_pmu("pg_ungating_count : 0x%08x",
2775 stats.pg_ungating_count);
2776 gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ",
2777 stats.pg_ungating_time_us);
2778 gk20a_dbg_pmu("pg_gating_cnt : 0x%08x",
2779 stats.pg_gating_cnt);
2780 gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x",
2781 stats.pg_gating_deny_cnt);
2782
2783 /*
2784 Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset
2785 in .nm file, e.g. 0x1000066c. use 0x66c.
2786 u32 i, val[20];
2787 pmu_copy_from_dmem(pmu, 0x66c,
2788 (u8 *)val, sizeof(val), 0);
2789 gk20a_dbg_pmu("elpg log begin");
2790 for (i = 0; i < 20; i++)
2791 gk20a_dbg_pmu("0x%08x", val[i]);
2792 gk20a_dbg_pmu("elpg log end");
2793 */
2794
2795 gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x",
2796 gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3)));
2797 gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x",
2798 gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3)));
2799 gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x",
2800 gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3)));
2801 gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x",
2802 gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0)));
2803 gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x",
2804 gk20a_readl(g, pwr_pmu_pg_intren_r(0)));
2805
2806 gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x",
2807 gk20a_readl(g, pwr_pmu_idle_count_r(3)));
2808 gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x",
2809 gk20a_readl(g, pwr_pmu_idle_count_r(4)));
2810 gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x",
2811 gk20a_readl(g, pwr_pmu_idle_count_r(7)));
2812
2813 /*
2814 TBD: script can't generate those registers correctly
2815 gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x",
2816 gk20a_readl(g, pwr_pmu_idle_status_r()));
2817 gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x",
2818 gk20a_readl(g, pwr_pmu_pg_ctrl_r()));
2819 */
2820}
2821
2822static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
2823{
2824 struct gk20a *g = pmu->g;
2825 int i;
2826
2827 gk20a_err(dev_from_gk20a(g), "pwr_falcon_os_r : %d",
2828 gk20a_readl(g, pwr_falcon_os_r()));
2829 gk20a_err(dev_from_gk20a(g), "pwr_falcon_cpuctl_r : 0x%x",
2830 gk20a_readl(g, pwr_falcon_cpuctl_r()));
2831 gk20a_err(dev_from_gk20a(g), "pwr_falcon_idlestate_r : 0x%x",
2832 gk20a_readl(g, pwr_falcon_idlestate_r()));
2833 gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox0_r : 0x%x",
2834 gk20a_readl(g, pwr_falcon_mailbox0_r()));
2835 gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox1_r : 0x%x",
2836 gk20a_readl(g, pwr_falcon_mailbox1_r()));
2837 gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqstat_r : 0x%x",
2838 gk20a_readl(g, pwr_falcon_irqstat_r()));
2839 gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmode_r : 0x%x",
2840 gk20a_readl(g, pwr_falcon_irqmode_r()));
2841 gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmask_r : 0x%x",
2842 gk20a_readl(g, pwr_falcon_irqmask_r()));
2843 gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqdest_r : 0x%x",
2844 gk20a_readl(g, pwr_falcon_irqdest_r()));
2845
2846 for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++)
2847 gk20a_err(dev_from_gk20a(g), "pwr_pmu_mailbox_r(%d) : 0x%x",
2848 i, gk20a_readl(g, pwr_pmu_mailbox_r(i)));
2849
2850 for (i = 0; i < pwr_pmu_debug__size_1_v(); i++)
2851 gk20a_err(dev_from_gk20a(g), "pwr_pmu_debug_r(%d) : 0x%x",
2852 i, gk20a_readl(g, pwr_pmu_debug_r(i)));
2853
2854 for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) {
2855 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2856 pwr_pmu_falcon_icd_cmd_opc_rstat_f() |
2857 pwr_pmu_falcon_icd_cmd_idx_f(i));
2858 gk20a_err(dev_from_gk20a(g), "pmu_rstat (%d) : 0x%x",
2859 i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2860 }
2861
2862 i = gk20a_readl(g, pwr_pmu_bar0_error_status_r());
2863 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_error_status_r : 0x%x", i);
2864 if (i != 0) {
2865 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_addr_r : 0x%x",
2866 gk20a_readl(g, pwr_pmu_bar0_addr_r()));
2867 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_data_r : 0x%x",
2868 gk20a_readl(g, pwr_pmu_bar0_data_r()));
2869 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_timeout_r : 0x%x",
2870 gk20a_readl(g, pwr_pmu_bar0_timeout_r()));
2871 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_ctl_r : 0x%x",
2872 gk20a_readl(g, pwr_pmu_bar0_ctl_r()));
2873 }
2874
2875 i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r());
2876 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_fecs_error_r : 0x%x", i);
2877
2878 i = gk20a_readl(g, pwr_falcon_exterrstat_r());
2879 gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterrstat_r : 0x%x", i);
2880 if (pwr_falcon_exterrstat_valid_v(i) ==
2881 pwr_falcon_exterrstat_valid_true_v()) {
2882 gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterraddr_r : 0x%x",
2883 gk20a_readl(g, pwr_falcon_exterraddr_r()));
2884 gk20a_err(dev_from_gk20a(g), "top_fs_status_r : 0x%x",
2885 gk20a_readl(g, top_fs_status_r()));
2886 gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
2887 gk20a_readl(g, mc_enable_r()));
2888 }
2889
2890 gk20a_err(dev_from_gk20a(g), "pwr_falcon_engctl_r : 0x%x",
2891 gk20a_readl(g, pwr_falcon_engctl_r()));
2892 gk20a_err(dev_from_gk20a(g), "pwr_falcon_curctx_r : 0x%x",
2893 gk20a_readl(g, pwr_falcon_curctx_r()));
2894 gk20a_err(dev_from_gk20a(g), "pwr_falcon_nxtctx_r : 0x%x",
2895 gk20a_readl(g, pwr_falcon_nxtctx_r()));
2896
2897 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2898 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2899 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
2900 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_IMB : 0x%x",
2901 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2902
2903 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2904 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2905 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
2906 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_DMB : 0x%x",
2907 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2908
2909 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2910 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2911 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
2912 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CSW : 0x%x",
2913 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2914
2915 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2916 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2917 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
2918 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CTX : 0x%x",
2919 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2920
2921 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2922 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2923 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
2924 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_EXCI : 0x%x",
2925 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2926
2927 for (i = 0; i < 4; i++) {
2928 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2929 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2930 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC));
2931 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_PC : 0x%x",
2932 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2933
2934 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2935 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2936 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP));
2937 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_SP : 0x%x",
2938 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2939 }
2940
2941 /* PMU may crash due to FECS crash. Dump FECS status */
2942 gk20a_fecs_dump_falcon_stats(g);
2943}
2944
2945void gk20a_pmu_isr(struct gk20a *g)
2946{
2947 struct pmu_gk20a *pmu = &g->pmu;
2948 struct pmu_queue *queue;
2949 u32 intr, mask;
2950 bool recheck = false;
2951
2952 gk20a_dbg_fn("");
2953
2954 mutex_lock(&pmu->isr_mutex);
2955
2956 mask = gk20a_readl(g, pwr_falcon_irqmask_r()) &
2957 gk20a_readl(g, pwr_falcon_irqdest_r());
2958
2959 intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask;
2960
2961 gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr);
2962
2963 if (!intr) {
2964 mutex_unlock(&pmu->isr_mutex);
2965 return;
2966 }
2967
2968 if (intr & pwr_falcon_irqstat_halt_true_f()) {
2969 gk20a_err(dev_from_gk20a(g),
2970 "pmu halt intr not implemented");
2971 pmu_dump_falcon_stats(pmu);
2972 }
2973 if (intr & pwr_falcon_irqstat_exterr_true_f()) {
2974 gk20a_err(dev_from_gk20a(g),
2975 "pmu exterr intr not implemented. Clearing interrupt.");
2976 pmu_dump_falcon_stats(pmu);
2977
2978 gk20a_writel(g, pwr_falcon_exterrstat_r(),
2979 gk20a_readl(g, pwr_falcon_exterrstat_r()) &
2980 ~pwr_falcon_exterrstat_valid_m());
2981 }
2982 if (intr & pwr_falcon_irqstat_swgen0_true_f()) {
2983 pmu_process_message(pmu);
2984 recheck = true;
2985 }
2986
2987 gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
2988
2989 if (recheck) {
2990 queue = &pmu->queue[PMU_MESSAGE_QUEUE];
2991 if (!pmu_queue_is_empty(pmu, queue))
2992 gk20a_writel(g, pwr_falcon_irqsset_r(),
2993 pwr_falcon_irqsset_swgen0_set_f());
2994 }
2995
2996 mutex_unlock(&pmu->isr_mutex);
2997}
2998
2999static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
3000 struct pmu_msg *msg, struct pmu_payload *payload,
3001 u32 queue_id)
3002{
3003 struct gk20a *g = pmu->g;
3004 struct pmu_queue *queue;
3005 u32 in_size, out_size;
3006
3007 if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
3008 goto invalid_cmd;
3009
3010 queue = &pmu->queue[queue_id];
3011 if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
3012 goto invalid_cmd;
3013
3014 if (cmd->hdr.size > (queue->size >> 1))
3015 goto invalid_cmd;
3016
3017 if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
3018 goto invalid_cmd;
3019
3020 if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
3021 goto invalid_cmd;
3022
3023 if (payload == NULL)
3024 return true;
3025
3026 if (payload->in.buf == NULL && payload->out.buf == NULL)
3027 goto invalid_cmd;
3028
3029 if ((payload->in.buf != NULL && payload->in.size == 0) ||
3030 (payload->out.buf != NULL && payload->out.size == 0))
3031 goto invalid_cmd;
3032
3033 in_size = PMU_CMD_HDR_SIZE;
3034 if (payload->in.buf) {
3035 in_size += payload->in.offset;
3036 in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
3037 }
3038
3039 out_size = PMU_CMD_HDR_SIZE;
3040 if (payload->out.buf) {
3041 out_size += payload->out.offset;
3042 out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
3043 }
3044
3045 if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
3046 goto invalid_cmd;
3047
3048
3049 if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
3050 (payload->out.offset != 0 && payload->out.buf == NULL))
3051 goto invalid_cmd;
3052
3053 return true;
3054
3055invalid_cmd:
3056 gk20a_err(dev_from_gk20a(g), "invalid pmu cmd :\n"
3057 "queue_id=%d,\n"
3058 "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
3059 "payload in=%p, in_size=%d, in_offset=%d,\n"
3060 "payload out=%p, out_size=%d, out_offset=%d",
3061 queue_id, cmd->hdr.size, cmd->hdr.unit_id,
3062 msg, msg?msg->hdr.unit_id:~0,
3063 &payload->in, payload->in.size, payload->in.offset,
3064 &payload->out, payload->out.size, payload->out.offset);
3065
3066 return false;
3067}
3068
3069static int pmu_write_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
3070 u32 queue_id, unsigned long timeout)
3071{
3072 struct gk20a *g = pmu->g;
3073 struct pmu_queue *queue;
3074 unsigned long end_jiffies = jiffies +
3075 msecs_to_jiffies(timeout);
3076 int err;
3077
3078 gk20a_dbg_fn("");
3079
3080 queue = &pmu->queue[queue_id];
3081
3082 do {
3083 err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
3084 if (err == -EAGAIN && time_before(jiffies, end_jiffies))
3085 usleep_range(1000, 2000);
3086 else
3087 break;
3088 } while (1);
3089
3090 if (err)
3091 goto clean_up;
3092
3093 pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);
3094
3095 err = pmu_queue_close(pmu, queue, true);
3096
3097clean_up:
3098 if (err)
3099 gk20a_err(dev_from_gk20a(g),
3100 "fail to write cmd to queue %d", queue_id);
3101 else
3102 gk20a_dbg_fn("done");
3103
3104 return err;
3105}
3106
3107int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
3108 struct pmu_msg *msg, struct pmu_payload *payload,
3109 u32 queue_id, pmu_callback callback, void* cb_param,
3110 u32 *seq_desc, unsigned long timeout)
3111{
3112 struct pmu_gk20a *pmu = &g->pmu;
3113 struct pmu_v *pv = &g->ops.pmu_ver;
3114 struct pmu_sequence *seq;
3115 void *in = NULL, *out = NULL;
3116 int err;
3117
3118 gk20a_dbg_fn("");
3119
3120 BUG_ON(!cmd);
3121 BUG_ON(!seq_desc);
3122 BUG_ON(!pmu->pmu_ready);
3123
3124 if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
3125 return -EINVAL;
3126
3127 err = pmu_seq_acquire(pmu, &seq);
3128 if (err)
3129 return err;
3130
3131 cmd->hdr.seq_id = seq->id;
3132
3133 cmd->hdr.ctrl_flags = 0;
3134 cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
3135 cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;
3136
3137 seq->callback = callback;
3138 seq->cb_params = cb_param;
3139 seq->msg = msg;
3140 seq->out_payload = NULL;
3141 seq->desc = pmu->next_seq_desc++;
3142
3143 if (payload)
3144 seq->out_payload = payload->out.buf;
3145
3146 *seq_desc = seq->desc;
3147
3148 if (payload && payload->in.offset != 0) {
3149 pv->set_pmu_allocation_ptr(pmu, &in,
3150 ((u8 *)&cmd->cmd + payload->in.offset));
3151
3152 if (payload->in.buf != payload->out.buf)
3153 pv->pmu_allocation_set_dmem_size(pmu, in,
3154 (u16)payload->in.size);
3155 else
3156 pv->pmu_allocation_set_dmem_size(pmu, in,
3157 (u16)max(payload->in.size, payload->out.size));
3158
3159 err = pmu->dmem.alloc(&pmu->dmem,
3160 pv->pmu_allocation_get_dmem_offset_addr(pmu, in),
3161 pv->pmu_allocation_get_dmem_size(pmu, in));
3162 if (err)
3163 goto clean_up;
3164
3165 pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu,
3166 in)),
3167 payload->in.buf, payload->in.size, 0);
3168 pv->pmu_allocation_set_dmem_size(pmu,
3169 pv->get_pmu_seq_in_a_ptr(seq),
3170 pv->pmu_allocation_get_dmem_size(pmu, in));
3171 pv->pmu_allocation_set_dmem_offset(pmu,
3172 pv->get_pmu_seq_in_a_ptr(seq),
3173 pv->pmu_allocation_get_dmem_offset(pmu, in));
3174 }
3175
3176 if (payload && payload->out.offset != 0) {
3177 pv->set_pmu_allocation_ptr(pmu, &out,
3178 ((u8 *)&cmd->cmd + payload->out.offset));
3179 pv->pmu_allocation_set_dmem_size(pmu, out,
3180 (u16)payload->out.size);
3181
3182 if (payload->out.buf != payload->in.buf) {
3183 err = pmu->dmem.alloc(&pmu->dmem,
3184 pv->pmu_allocation_get_dmem_offset_addr(pmu, out),
3185 pv->pmu_allocation_get_dmem_size(pmu, out));
3186 if (err)
3187 goto clean_up;
3188 } else {
3189 BUG_ON(in == NULL);
3190 pv->pmu_allocation_set_dmem_offset(pmu, out,
3191 pv->pmu_allocation_get_dmem_offset(pmu, in));
3192 }
3193
3194 pv->pmu_allocation_set_dmem_size(pmu,
3195 pv->get_pmu_seq_out_a_ptr(seq),
3196 pv->pmu_allocation_get_dmem_size(pmu, out));
3197 pv->pmu_allocation_set_dmem_offset(pmu,
3198 pv->get_pmu_seq_out_a_ptr(seq),
3199 pv->pmu_allocation_get_dmem_offset(pmu, out));
3200 }
3201
3202 seq->state = PMU_SEQ_STATE_USED;
3203 err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
3204 if (err)
3205 seq->state = PMU_SEQ_STATE_PENDING;
3206
3207 gk20a_dbg_fn("done");
3208
3209 return 0;
3210
3211clean_up:
3212 gk20a_dbg_fn("fail");
3213 if (in)
3214 pmu->dmem.free(&pmu->dmem,
3215 pv->pmu_allocation_get_dmem_offset(pmu, in),
3216 pv->pmu_allocation_get_dmem_size(pmu, in));
3217 if (out)
3218 pmu->dmem.free(&pmu->dmem,
3219 pv->pmu_allocation_get_dmem_offset(pmu, out),
3220 pv->pmu_allocation_get_dmem_size(pmu, out));
3221
3222 pmu_seq_release(pmu, seq);
3223 return err;
3224}
3225
3226static int gk20a_pmu_enable_elpg_locked(struct gk20a *g)
3227{
3228 struct pmu_gk20a *pmu = &g->pmu;
3229 struct pmu_cmd cmd;
3230 u32 seq, status;
3231
3232 gk20a_dbg_fn("");
3233
3234 memset(&cmd, 0, sizeof(struct pmu_cmd));
3235 cmd.hdr.unit_id = PMU_UNIT_PG;
3236 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3237 cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3238 cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3239 cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW;
3240
3241 /* no need to wait ack for ELPG enable but set pending to sync
3242 with follow up ELPG disable */
3243 pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING;
3244
3245 status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3246 pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3247
3248 BUG_ON(status != 0);
3249
3250 gk20a_dbg_fn("done");
3251 return 0;
3252}
3253
3254int gk20a_pmu_enable_elpg(struct gk20a *g)
3255{
3256 struct pmu_gk20a *pmu = &g->pmu;
3257 struct gr_gk20a *gr = &g->gr;
3258
3259 int ret = 0;
3260
3261 gk20a_dbg_fn("");
3262
3263 if (!pmu->elpg_ready || !pmu->initialized)
3264 goto exit;
3265
3266 mutex_lock(&pmu->elpg_mutex);
3267
3268 pmu->elpg_refcnt++;
3269 if (pmu->elpg_refcnt <= 0)
3270 goto exit_unlock;
3271
3272 /* something is not right if we end up in following code path */
3273 if (unlikely(pmu->elpg_refcnt > 1)) {
3274 gk20a_warn(dev_from_gk20a(g),
3275 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3276 __func__, pmu->elpg_refcnt);
3277 WARN_ON(1);
3278 }
3279
3280 /* do NOT enable elpg until golden ctx is created,
3281 which is related with the ctx that ELPG save and restore. */
3282 if (unlikely(!gr->ctx_vars.golden_image_initialized))
3283 goto exit_unlock;
3284
3285 /* return if ELPG is already on or on_pending or off_on_pending */
3286 if (pmu->elpg_stat != PMU_ELPG_STAT_OFF)
3287 goto exit_unlock;
3288
3289 /* if ELPG is not allowed right now, mark that it should be enabled
3290 * immediately after it is allowed */
3291 if (!pmu->elpg_enable_allow) {
3292 pmu->elpg_stat = PMU_ELPG_STAT_OFF_ON_PENDING;
3293 goto exit_unlock;
3294 }
3295
3296 ret = gk20a_pmu_enable_elpg_locked(g);
3297
3298exit_unlock:
3299 mutex_unlock(&pmu->elpg_mutex);
3300exit:
3301 gk20a_dbg_fn("done");
3302 return ret;
3303}
3304
3305static void pmu_elpg_enable_allow(struct work_struct *work)
3306{
3307 struct pmu_gk20a *pmu = container_of(to_delayed_work(work),
3308 struct pmu_gk20a, elpg_enable);
3309
3310 gk20a_dbg_fn("");
3311
3312 mutex_lock(&pmu->elpg_mutex);
3313
3314 /* It is ok to enabled powergating now */
3315 pmu->elpg_enable_allow = true;
3316
3317 /* do we have pending requests? */
3318 if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
3319 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
3320 gk20a_pmu_enable_elpg_locked(pmu->g);
3321 }
3322
3323 mutex_unlock(&pmu->elpg_mutex);
3324
3325 gk20a_dbg_fn("done");
3326}
3327
3328static int gk20a_pmu_disable_elpg_defer_enable(struct gk20a *g, bool enable)
3329{
3330 struct pmu_gk20a *pmu = &g->pmu;
3331 struct pmu_cmd cmd;
3332 u32 seq;
3333 int ret = 0;
3334
3335 gk20a_dbg_fn("");
3336
3337 if (!pmu->elpg_ready || !pmu->initialized)
3338 return 0;
3339
3340 /* remove the work from queue */
3341 cancel_delayed_work_sync(&pmu->elpg_enable);
3342
3343 mutex_lock(&pmu->elpg_mutex);
3344
3345 pmu->elpg_refcnt--;
3346 if (pmu->elpg_refcnt > 0) {
3347 gk20a_warn(dev_from_gk20a(g),
3348 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3349 __func__, pmu->elpg_refcnt);
3350 WARN_ON(1);
3351 ret = 0;
3352 goto exit_unlock;
3353 }
3354
3355 /* cancel off_on_pending and return */
3356 if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
3357 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
3358 ret = 0;
3359 goto exit_reschedule;
3360 }
3361 /* wait if on_pending */
3362 else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) {
3363
3364 pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3365 &pmu->elpg_stat, PMU_ELPG_STAT_ON);
3366
3367 if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3368 gk20a_err(dev_from_gk20a(g),
3369 "ELPG_ALLOW_ACK failed, elpg_stat=%d",
3370 pmu->elpg_stat);
3371 pmu_dump_elpg_stats(pmu);
3372 pmu_dump_falcon_stats(pmu);
3373 ret = -EBUSY;
3374 goto exit_unlock;
3375 }
3376 }
3377 /* return if ELPG is already off */
3378 else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3379 ret = 0;
3380 goto exit_reschedule;
3381 }
3382
3383 memset(&cmd, 0, sizeof(struct pmu_cmd));
3384 cmd.hdr.unit_id = PMU_UNIT_PG;
3385 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3386 cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3387 cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3388 cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
3389
3390 pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING;
3391
3392 gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3393 pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3394
3395 pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3396 &pmu->elpg_stat, PMU_ELPG_STAT_OFF);
3397 if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) {
3398 gk20a_err(dev_from_gk20a(g),
3399 "ELPG_DISALLOW_ACK failed");
3400 pmu_dump_elpg_stats(pmu);
3401 pmu_dump_falcon_stats(pmu);
3402 ret = -EBUSY;
3403 goto exit_unlock;
3404 }
3405
3406exit_reschedule:
3407 if (enable) {
3408 pmu->elpg_enable_allow = false;
3409 schedule_delayed_work(&pmu->elpg_enable,
3410 msecs_to_jiffies(PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC));
3411 } else
3412 pmu->elpg_enable_allow = true;
3413
3414
3415exit_unlock:
3416 mutex_unlock(&pmu->elpg_mutex);
3417 gk20a_dbg_fn("done");
3418 return ret;
3419}
3420
3421int gk20a_pmu_disable_elpg(struct gk20a *g)
3422{
3423 return gk20a_pmu_disable_elpg_defer_enable(g, true);
3424}
3425
3426int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable)
3427{
3428 struct pmu_gk20a *pmu = &g->pmu;
3429 int err;
3430
3431 gk20a_dbg_fn("");
3432
3433 if (enable)
3434 err = pmu_perfmon_start_sampling(pmu);
3435 else
3436 err = pmu_perfmon_stop_sampling(pmu);
3437
3438 return err;
3439}
3440
3441int gk20a_pmu_destroy(struct gk20a *g)
3442{
3443 struct pmu_gk20a *pmu = &g->pmu;
3444 u32 elpg_ingating_time, elpg_ungating_time, gating_cnt;
3445
3446 gk20a_dbg_fn("");
3447
3448 if (!support_gk20a_pmu())
3449 return 0;
3450
3451 /* make sure the pending operations are finished before we continue */
3452 cancel_delayed_work_sync(&pmu->elpg_enable);
3453 cancel_work_sync(&pmu->pg_init);
3454
3455 gk20a_pmu_get_elpg_residency_gating(g, &elpg_ingating_time,
3456 &elpg_ungating_time, &gating_cnt);
3457
3458 gk20a_pmu_disable_elpg_defer_enable(g, false);
3459 pmu->initialized = false;
3460
3461 /* update the s/w ELPG residency counters */
3462 g->pg_ingating_time_us += (u64)elpg_ingating_time;
3463 g->pg_ungating_time_us += (u64)elpg_ungating_time;
3464 g->pg_gating_cnt += gating_cnt;
3465
3466 pmu_enable(pmu, false);
3467
3468 if (pmu->remove_support) {
3469 pmu->remove_support(pmu);
3470 pmu->remove_support = NULL;
3471 }
3472
3473 gk20a_dbg_fn("done");
3474 return 0;
3475}
3476
3477int gk20a_pmu_load_norm(struct gk20a *g, u32 *load)
3478{
3479 struct pmu_gk20a *pmu = &g->pmu;
3480 u16 _load = 0;
3481
3482 if (!pmu->perfmon_ready) {
3483 *load = 0;
3484 return 0;
3485 }
3486
3487 pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
3488 *load = _load / 10;
3489
3490 return 0;
3491}
3492
3493void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
3494 u32 *total_cycles)
3495{
3496 if (!g->power_on) {
3497 *busy_cycles = 0;
3498 *total_cycles = 0;
3499 return;
3500 }
3501
3502 gk20a_busy(g->dev);
3503 *busy_cycles = pwr_pmu_idle_count_value_v(
3504 gk20a_readl(g, pwr_pmu_idle_count_r(1)));
3505 rmb();
3506 *total_cycles = pwr_pmu_idle_count_value_v(
3507 gk20a_readl(g, pwr_pmu_idle_count_r(2)));
3508 gk20a_idle(g->dev);
3509}
3510
3511void gk20a_pmu_reset_load_counters(struct gk20a *g)
3512{
3513 u32 reg_val = pwr_pmu_idle_count_reset_f(1);
3514
3515 if (!g->power_on)
3516 return;
3517
3518 gk20a_busy(g->dev);
3519 gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val);
3520 wmb();
3521 gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val);
3522 gk20a_idle(g->dev);
3523}
3524
3525static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
3526 u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt)
3527{
3528 struct pmu_gk20a *pmu = &g->pmu;
3529 struct pmu_pg_stats stats;
3530
3531 if (!pmu->initialized) {
3532 *ingating_time = 0;
3533 *ungating_time = 0;
3534 *gating_cnt = 0;
3535 return 0;
3536 }
3537
3538 pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
3539 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
3540
3541 *ingating_time = stats.pg_ingating_time_us;
3542 *ungating_time = stats.pg_ungating_time_us;
3543 *gating_cnt = stats.pg_gating_cnt;
3544
3545 return 0;
3546}
3547
3548/* Send an Adaptive Power (AP) related command to PMU */
3549static int gk20a_pmu_ap_send_command(struct gk20a *g,
3550 union pmu_ap_cmd *p_ap_cmd, bool b_block)
3551{
3552 struct pmu_gk20a *pmu = &g->pmu;
3553 /* FIXME: where is the PG structure defined?? */
3554 u32 status = 0;
3555 struct pmu_cmd cmd;
3556 u32 seq;
3557 pmu_callback p_callback = NULL;
3558
3559 memset(&cmd, 0, sizeof(struct pmu_cmd));
3560
3561 /* Copy common members */
3562 cmd.hdr.unit_id = PMU_UNIT_PG;
3563 cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd);
3564
3565 cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP;
3566 cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id;
3567
3568 /* Copy other members of command */
3569 switch (p_ap_cmd->cmn.cmd_id) {
3570 case PMU_AP_CMD_ID_INIT:
3571 cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us =
3572 p_ap_cmd->init.pg_sampling_period_us;
3573 p_callback = ap_callback_init_and_enable_ctrl;
3574 break;
3575
3576 case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL:
3577 cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id =
3578 p_ap_cmd->init_and_enable_ctrl.ctrl_id;
3579 memcpy(
3580 (void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params),
3581 (void *)&(p_ap_cmd->init_and_enable_ctrl.params),
3582 sizeof(struct pmu_ap_ctrl_init_params));
3583
3584 p_callback = ap_callback_init_and_enable_ctrl;
3585 break;
3586
3587 case PMU_AP_CMD_ID_ENABLE_CTRL:
3588 cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id =
3589 p_ap_cmd->enable_ctrl.ctrl_id;
3590 break;
3591
3592 case PMU_AP_CMD_ID_DISABLE_CTRL:
3593 cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id =
3594 p_ap_cmd->disable_ctrl.ctrl_id;
3595 break;
3596
3597 case PMU_AP_CMD_ID_KICK_CTRL:
3598 cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id =
3599 p_ap_cmd->kick_ctrl.ctrl_id;
3600 cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count =
3601 p_ap_cmd->kick_ctrl.skip_count;
3602 break;
3603
3604 default:
3605 gk20a_dbg_pmu("%s: Invalid Adaptive Power command %d\n",
3606 __func__, p_ap_cmd->cmn.cmd_id);
3607 return 0x2f;
3608 }
3609
3610 status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3611 p_callback, pmu, &seq, ~0);
3612
3613 if (!status) {
3614 gk20a_dbg_pmu(
3615 "%s: Unable to submit Adaptive Power Command %d\n",
3616 __func__, p_ap_cmd->cmn.cmd_id);
3617 goto err_return;
3618 }
3619
3620 /* TODO: Implement blocking calls (b_block) */
3621
3622err_return:
3623 return status;
3624}
3625
3626static void ap_callback_init_and_enable_ctrl(
3627 struct gk20a *g, struct pmu_msg *msg,
3628 void *param, u32 seq_desc, u32 status)
3629{
3630 /* Define p_ap (i.e pointer to pmu_ap structure) */
3631 WARN_ON(!msg);
3632
3633 if (!status) {
3634 switch (msg->msg.pg.ap_msg.cmn.msg_id) {
3635 case PMU_AP_MSG_ID_INIT_ACK:
3636 break;
3637
3638 default:
3639 gk20a_dbg_pmu(
3640 "%s: Invalid Adaptive Power Message: %x\n",
3641 __func__, msg->msg.pg.ap_msg.cmn.msg_id);
3642 break;
3643 }
3644 }
3645}
3646
3647static int gk20a_aelpg_init(struct gk20a *g)
3648{
3649 int status = 0;
3650
3651 /* Remove reliance on app_ctrl field. */
3652 union pmu_ap_cmd ap_cmd;
3653
3654 /* TODO: Check for elpg being ready? */
3655 ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT;
3656 ap_cmd.init.pg_sampling_period_us =
3657 APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
3658
3659 status = gk20a_pmu_ap_send_command(g, &ap_cmd, false);
3660 return status;
3661}
3662
3663static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id)
3664{
3665 int status = 0;
3666 union pmu_ap_cmd ap_cmd;
3667
3668 /* TODO: Probably check if ELPG is ready? */
3669
3670 ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL;
3671 ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id;
3672 ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us =
3673 APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
3674 ap_cmd.init_and_enable_ctrl.params.min_target_saving_us =
3675 APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
3676 ap_cmd.init_and_enable_ctrl.params.power_break_even_us =
3677 APCTRL_POWER_BREAKEVEN_DEFAULT_US;
3678 ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max =
3679 APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
3680
3681 switch (ctrl_id) {
3682 case PMU_AP_CTRL_ID_GRAPHICS:
3683 break;
3684 default:
3685 break;
3686 }
3687
3688 status = gk20a_pmu_ap_send_command(g, &ap_cmd, true);
3689 return status;
3690}
3691
3692#if CONFIG_DEBUG_FS
3693static int elpg_residency_show(struct seq_file *s, void *data)
3694{
3695 struct gk20a *g = s->private;
3696 u32 ingating_time = 0;
3697 u32 ungating_time = 0;
3698 u32 gating_cnt;
3699 u64 total_ingating, total_ungating, residency, divisor, dividend;
3700
3701 /* Don't unnecessarily power on the device */
3702 if (g->power_on) {
3703 gk20a_busy(g->dev);
3704 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3705 &ungating_time, &gating_cnt);
3706 gk20a_idle(g->dev);
3707 }
3708 total_ingating = g->pg_ingating_time_us + (u64)ingating_time;
3709 total_ungating = g->pg_ungating_time_us + (u64)ungating_time;
3710 divisor = total_ingating + total_ungating;
3711
3712 /* We compute the residency on a scale of 1000 */
3713 dividend = total_ingating * 1000;
3714
3715 if (divisor)
3716 residency = div64_u64(dividend, divisor);
3717 else
3718 residency = 0;
3719
3720 seq_printf(s, "Time in ELPG: %llu us\n"
3721 "Time out of ELPG: %llu us\n"
3722 "ELPG residency ratio: %llu\n",
3723 total_ingating, total_ungating, residency);
3724 return 0;
3725
3726}
3727
3728static int elpg_residency_open(struct inode *inode, struct file *file)
3729{
3730 return single_open(file, elpg_residency_show, inode->i_private);
3731}
3732
3733static const struct file_operations elpg_residency_fops = {
3734 .open = elpg_residency_open,
3735 .read = seq_read,
3736 .llseek = seq_lseek,
3737 .release = single_release,
3738};
3739
3740static int elpg_transitions_show(struct seq_file *s, void *data)
3741{
3742 struct gk20a *g = s->private;
3743 u32 ingating_time, ungating_time, total_gating_cnt;
3744 u32 gating_cnt = 0;
3745
3746 if (g->power_on) {
3747 gk20a_busy(g->dev);
3748 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3749 &ungating_time, &gating_cnt);
3750 gk20a_idle(g->dev);
3751 }
3752 total_gating_cnt = g->pg_gating_cnt + gating_cnt;
3753
3754 seq_printf(s, "%u\n", total_gating_cnt);
3755 return 0;
3756
3757}
3758
3759static int elpg_transitions_open(struct inode *inode, struct file *file)
3760{
3761 return single_open(file, elpg_transitions_show, inode->i_private);
3762}
3763
3764static const struct file_operations elpg_transitions_fops = {
3765 .open = elpg_transitions_open,
3766 .read = seq_read,
3767 .llseek = seq_lseek,
3768 .release = single_release,
3769};
3770
3771int gk20a_pmu_debugfs_init(struct platform_device *dev)
3772{
3773 struct dentry *d;
3774 struct gk20a_platform *platform = platform_get_drvdata(dev);
3775 struct gk20a *g = get_gk20a(dev);
3776
3777 d = debugfs_create_file(
3778 "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
3779 &elpg_residency_fops);
3780 if (!d)
3781 goto err_out;
3782
3783 d = debugfs_create_file(
3784 "elpg_transitions", S_IRUGO, platform->debugfs, g,
3785 &elpg_transitions_fops);
3786 if (!d)
3787 goto err_out;
3788
3789 return 0;
3790
3791err_out:
3792 pr_err("%s: Failed to make debugfs node\n", __func__);
3793 debugfs_remove_recursive(platform->debugfs);
3794 return -ENOMEM;
3795}
3796#endif
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
new file mode 100644
index 00000000..c1b8ff1f
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -0,0 +1,1097 @@
1/*
2 * drivers/video/tegra/host/gk20a/pmu_gk20a.h
3 *
4 * GK20A PMU (aka. gPMU outside gk20a context)
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21#ifndef __PMU_GK20A_H__
22#define __PMU_GK20A_H__
23
24/* defined by pmu hw spec */
25#define GK20A_PMU_VA_START ((128 * 1024) << 10)
26#define GK20A_PMU_VA_SIZE (512 * 1024 * 1024)
27#define GK20A_PMU_INST_SIZE (4 * 1024)
28#define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024)
29#define GK20A_PMU_SEQ_BUF_SIZE 4096
30
31#define ZBC_MASK(i) (~(~(0) << ((i)+1)) & 0xfffe)
32
33/* PMU Command/Message Interfaces for Adaptive Power */
34/* Macro to get Histogram index */
35#define PMU_AP_HISTOGRAM(idx) (idx)
36#define PMU_AP_HISTOGRAM_CONT (4)
37
38/* Total number of histogram bins */
39#define PMU_AP_CFG_HISTOGRAM_BIN_N (16)
40
41/* Mapping between Idle counters and histograms */
42#define PMU_AP_IDLE_MASK_HIST_IDX_0 (2)
43#define PMU_AP_IDLE_MASK_HIST_IDX_1 (3)
44#define PMU_AP_IDLE_MASK_HIST_IDX_2 (5)
45#define PMU_AP_IDLE_MASK_HIST_IDX_3 (6)
46
47
48/* Mapping between AP_CTRLs and Histograms */
49#define PMU_AP_HISTOGRAM_IDX_GRAPHICS (PMU_AP_HISTOGRAM(1))
50
51/* Mapping between AP_CTRLs and Idle counters */
52#define PMU_AP_IDLE_MASK_GRAPHICS (PMU_AP_IDLE_MASK_HIST_IDX_1)
53
54#define APP_VERSION_1 17997577
55#define APP_VERSION_0 16856675
56
57
58enum pmu_perfmon_cmd_start_fields {
59 COUNTER_ALLOC
60};
61
62/* Adaptive Power Controls (AP_CTRL) */
63enum {
64 PMU_AP_CTRL_ID_GRAPHICS = 0x0,
65 /* PMU_AP_CTRL_ID_MS ,*/
66 PMU_AP_CTRL_ID_MAX ,
67};
68
69/* AP_CTRL Statistics */
70struct pmu_ap_ctrl_stat {
71 /*
72 * Represents whether AP is active or not
73 * TODO: This is NvBool in RM; is that 1 byte of 4 bytes?
74 */
75 u8 b_active;
76
77 /* Idle filter represented by histogram bin index */
78 u8 idle_filter_x;
79 u8 rsvd[2];
80
81 /* Total predicted power saving cycles. */
82 s32 power_saving_h_cycles;
83
84 /* Counts how many times AP gave us -ve power benefits. */
85 u32 bad_decision_count;
86
87 /*
88 * Number of times ap structure needs to skip AP iterations
89 * KICK_CTRL from kernel updates this parameter.
90 */
91 u32 skip_count;
92 u8 bin[PMU_AP_CFG_HISTOGRAM_BIN_N];
93};
94
95/* Parameters initialized by INITn APCTRL command */
96struct pmu_ap_ctrl_init_params {
97 /* Minimum idle filter value in Us */
98 u32 min_idle_filter_us;
99
100 /*
101 * Minimum Targeted Saving in Us. AP will update idle thresholds only
102 * if power saving achieved by updating idle thresholds is greater than
103 * Minimum targeted saving.
104 */
105 u32 min_target_saving_us;
106
107 /* Minimum targeted residency of power feature in Us */
108 u32 power_break_even_us;
109
110 /*
111 * Maximum number of allowed power feature cycles per sample.
112 *
113 * We are allowing at max "pgPerSampleMax" cycles in one iteration of AP
114 * AKA pgPerSampleMax in original algorithm.
115 */
116 u32 cycles_per_sample_max;
117};
118
119/* AP Commands/Message structures */
120
121/*
122 * Structure for Generic AP Commands
123 */
124struct pmu_ap_cmd_common {
125 u8 cmd_type;
126 u16 cmd_id;
127};
128
129/*
130 * Structure for INIT AP command
131 */
132struct pmu_ap_cmd_init {
133 u8 cmd_type;
134 u16 cmd_id;
135 u8 rsvd;
136 u32 pg_sampling_period_us;
137};
138
139/*
140 * Structure for Enable/Disable ApCtrl Commands
141 */
142struct pmu_ap_cmd_enable_ctrl {
143 u8 cmd_type;
144 u16 cmd_id;
145
146 u8 ctrl_id;
147};
148
149struct pmu_ap_cmd_disable_ctrl {
150 u8 cmd_type;
151 u16 cmd_id;
152
153 u8 ctrl_id;
154};
155
156/*
157 * Structure for INIT command
158 */
159struct pmu_ap_cmd_init_ctrl {
160 u8 cmd_type;
161 u16 cmd_id;
162 u8 ctrl_id;
163 struct pmu_ap_ctrl_init_params params;
164};
165
166struct pmu_ap_cmd_init_and_enable_ctrl {
167 u8 cmd_type;
168 u16 cmd_id;
169 u8 ctrl_id;
170 struct pmu_ap_ctrl_init_params params;
171};
172
173/*
174 * Structure for KICK_CTRL command
175 */
176struct pmu_ap_cmd_kick_ctrl {
177 u8 cmd_type;
178 u16 cmd_id;
179 u8 ctrl_id;
180
181 u32 skip_count;
182};
183
184/*
185 * Structure for PARAM command
186 */
187struct pmu_ap_cmd_param {
188 u8 cmd_type;
189 u16 cmd_id;
190 u8 ctrl_id;
191
192 u32 data;
193};
194
195/*
196 * Defines for AP commands
197 */
198enum {
199 PMU_AP_CMD_ID_INIT = 0x0 ,
200 PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL,
201 PMU_AP_CMD_ID_ENABLE_CTRL ,
202 PMU_AP_CMD_ID_DISABLE_CTRL ,
203 PMU_AP_CMD_ID_KICK_CTRL ,
204};
205
206/*
207 * AP Command
208 */
209union pmu_ap_cmd {
210 u8 cmd_type;
211 struct pmu_ap_cmd_common cmn;
212 struct pmu_ap_cmd_init init;
213 struct pmu_ap_cmd_init_and_enable_ctrl init_and_enable_ctrl;
214 struct pmu_ap_cmd_enable_ctrl enable_ctrl;
215 struct pmu_ap_cmd_disable_ctrl disable_ctrl;
216 struct pmu_ap_cmd_kick_ctrl kick_ctrl;
217};
218
219/*
220 * Structure for generic AP Message
221 */
222struct pmu_ap_msg_common {
223 u8 msg_type;
224 u16 msg_id;
225};
226
227/*
228 * Structure for INIT_ACK Message
229 */
230struct pmu_ap_msg_init_ack {
231 u8 msg_type;
232 u16 msg_id;
233 u8 ctrl_id;
234 u32 stats_dmem_offset;
235};
236
237/*
238 * Defines for AP messages
239 */
240enum {
241 PMU_AP_MSG_ID_INIT_ACK = 0x0,
242};
243
244/*
245 * AP Message
246 */
247union pmu_ap_msg {
248 u8 msg_type;
249 struct pmu_ap_msg_common cmn;
250 struct pmu_ap_msg_init_ack init_ack;
251};
252
253/* Default Sampling Period of AELPG */
254#define APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US (1000000)
255
256/* Default values of APCTRL parameters */
257#define APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US (100)
258#define APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US (10000)
259#define APCTRL_POWER_BREAKEVEN_DEFAULT_US (2000)
260#define APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT (100)
261
262/*
263 * Disable reason for Adaptive Power Controller
264 */
265enum {
266 APCTRL_DISABLE_REASON_RM_UNLOAD,
267 APCTRL_DISABLE_REASON_RMCTRL,
268};
269
270/*
271 * Adaptive Power Controller
272 */
273struct ap_ctrl {
274 u32 stats_dmem_offset;
275 u32 disable_reason_mask;
276 struct pmu_ap_ctrl_stat stat_cache;
277 u8 b_ready;
278};
279
280/*
281 * Adaptive Power structure
282 *
283 * ap structure provides generic infrastructure to make any power feature
284 * adaptive.
285 */
286struct pmu_ap {
287 u32 supported_mask;
288 struct ap_ctrl ap_ctrl[PMU_AP_CTRL_ID_MAX];
289};
290
291
292enum {
293 GK20A_PMU_DMAIDX_UCODE = 0,
294 GK20A_PMU_DMAIDX_VIRT = 1,
295 GK20A_PMU_DMAIDX_PHYS_VID = 2,
296 GK20A_PMU_DMAIDX_PHYS_SYS_COH = 3,
297 GK20A_PMU_DMAIDX_PHYS_SYS_NCOH = 4,
298 GK20A_PMU_DMAIDX_RSVD = 5,
299 GK20A_PMU_DMAIDX_PELPG = 6,
300 GK20A_PMU_DMAIDX_END = 7
301};
302
303struct pmu_mem_v0 {
304 u32 dma_base;
305 u8 dma_offset;
306 u8 dma_idx;
307};
308
309struct pmu_mem_v1 {
310 u32 dma_base;
311 u8 dma_offset;
312 u8 dma_idx;
313 u16 fb_size;
314};
315
316struct pmu_dmem {
317 u16 size;
318 u32 offset;
319};
320
321/* Make sure size of this structure is a multiple of 4 bytes */
322struct pmu_cmdline_args_v0 {
323 u32 cpu_freq_hz; /* Frequency of the clock driving PMU */
324 u32 falc_trace_size; /* falctrace buffer size (bytes) */
325 u32 falc_trace_dma_base; /* 256-byte block address */
326 u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */
327 struct pmu_mem_v0 gc6_ctx; /* dmem offset of gc6 context */
328};
329
330struct pmu_cmdline_args_v1 {
331 u32 cpu_freq_hz; /* Frequency of the clock driving PMU */
332 u32 falc_trace_size; /* falctrace buffer size (bytes) */
333 u32 falc_trace_dma_base; /* 256-byte block address */
334 u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */
335 u8 secure_mode;
336 struct pmu_mem_v1 gc6_ctx; /* dmem offset of gc6 context */
337};
338
339#define GK20A_PMU_DMEM_BLKSIZE2 8
340
341#define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32
342#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64
343
344struct pmu_ucode_desc {
345 u32 descriptor_size;
346 u32 image_size;
347 u32 tools_version;
348 u32 app_version;
349 char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH];
350 u32 bootloader_start_offset;
351 u32 bootloader_size;
352 u32 bootloader_imem_offset;
353 u32 bootloader_entry_point;
354 u32 app_start_offset;
355 u32 app_size;
356 u32 app_imem_offset;
357 u32 app_imem_entry;
358 u32 app_dmem_offset;
359 u32 app_resident_code_offset; /* Offset from appStartOffset */
360 u32 app_resident_code_size; /* Exact size of the resident code ( potentially contains CRC inside at the end ) */
361 u32 app_resident_data_offset; /* Offset from appStartOffset */
362 u32 app_resident_data_size; /* Exact size of the resident code ( potentially contains CRC inside at the end ) */
363 u32 nb_overlays;
364 struct {u32 start; u32 size;} load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY];
365 u32 compressed;
366};
367
368#define PMU_UNIT_REWIND (0x00)
369#define PMU_UNIT_I2C (0x01)
370#define PMU_UNIT_SEQ (0x02)
371#define PMU_UNIT_PG (0x03)
372#define PMU_UNIT_AVAILABLE1 (0x04)
373#define PMU_UNIT_AVAILABLE2 (0x05)
374#define PMU_UNIT_MEM (0x06)
375#define PMU_UNIT_INIT (0x07)
376#define PMU_UNIT_FBBA (0x08)
377#define PMU_UNIT_DIDLE (0x09)
378#define PMU_UNIT_AVAILABLE3 (0x0A)
379#define PMU_UNIT_AVAILABLE4 (0x0B)
380#define PMU_UNIT_HDCP_MAIN (0x0C)
381#define PMU_UNIT_HDCP_V (0x0D)
382#define PMU_UNIT_HDCP_SRM (0x0E)
383#define PMU_UNIT_NVDPS (0x0F)
384#define PMU_UNIT_DEINIT (0x10)
385#define PMU_UNIT_AVAILABLE5 (0x11)
386#define PMU_UNIT_PERFMON (0x12)
387#define PMU_UNIT_FAN (0x13)
388#define PMU_UNIT_PBI (0x14)
389#define PMU_UNIT_ISOBLIT (0x15)
390#define PMU_UNIT_DETACH (0x16)
391#define PMU_UNIT_DISP (0x17)
392#define PMU_UNIT_HDCP (0x18)
393#define PMU_UNIT_REGCACHE (0x19)
394#define PMU_UNIT_SYSMON (0x1A)
395#define PMU_UNIT_THERM (0x1B)
396#define PMU_UNIT_PMGR (0x1C)
397#define PMU_UNIT_PERF (0x1D)
398#define PMU_UNIT_PCM (0x1E)
399#define PMU_UNIT_RC (0x1F)
400#define PMU_UNIT_NULL (0x20)
401#define PMU_UNIT_LOGGER (0x21)
402#define PMU_UNIT_SMBPBI (0x22)
403#define PMU_UNIT_END (0x23)
404
405#define PMU_UNIT_TEST_START (0xFE)
406#define PMU_UNIT_END_SIM (0xFF)
407#define PMU_UNIT_TEST_END (0xFF)
408
409#define PMU_UNIT_ID_IS_VALID(id) \
410 (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START))
411
412#define PMU_DMEM_ALLOC_ALIGNMENT (32)
413#define PMU_DMEM_ALIGNMENT (4)
414
415#define PMU_CMD_FLAGS_PMU_MASK (0xF0)
416
417#define PMU_CMD_FLAGS_STATUS BIT(0)
418#define PMU_CMD_FLAGS_INTR BIT(1)
419#define PMU_CMD_FLAGS_EVENT BIT(2)
420#define PMU_CMD_FLAGS_WATERMARK BIT(3)
421
422struct pmu_hdr {
423 u8 unit_id;
424 u8 size;
425 u8 ctrl_flags;
426 u8 seq_id;
427};
428#define PMU_MSG_HDR_SIZE sizeof(struct pmu_hdr)
429#define PMU_CMD_HDR_SIZE sizeof(struct pmu_hdr)
430
431#define PMU_QUEUE_COUNT 5
432
433struct pmu_allocation_v0 {
434 u8 pad[3];
435 u8 fb_mem_use;
436 struct {
437 struct pmu_dmem dmem;
438 struct pmu_mem_v0 fb;
439 } alloc;
440};
441
442struct pmu_allocation_v1 {
443 struct {
444 struct pmu_dmem dmem;
445 struct pmu_mem_v1 fb;
446 } alloc;
447};
448
449enum {
450 PMU_INIT_MSG_TYPE_PMU_INIT = 0,
451};
452
453struct pmu_init_msg_pmu_v0 {
454 u8 msg_type;
455 u8 pad;
456
457 struct {
458 u16 size;
459 u16 offset;
460 u8 index;
461 u8 pad;
462 } queue_info[PMU_QUEUE_COUNT];
463
464 u16 sw_managed_area_offset;
465 u16 sw_managed_area_size;
466};
467
468struct pmu_init_msg_pmu_v1 {
469 u8 msg_type;
470 u8 pad;
471 u16 os_debug_entry_point;
472
473 struct {
474 u16 size;
475 u16 offset;
476 u8 index;
477 u8 pad;
478 } queue_info[PMU_QUEUE_COUNT];
479
480 u16 sw_managed_area_offset;
481 u16 sw_managed_area_size;
482};
483
484union pmu_init_msg_pmu {
485 struct pmu_init_msg_pmu_v0 v0;
486 struct pmu_init_msg_pmu_v1 v1;
487};
488
489struct pmu_init_msg {
490 union {
491 u8 msg_type;
492 struct pmu_init_msg_pmu_v1 pmu_init_v1;
493 struct pmu_init_msg_pmu_v0 pmu_init_v0;
494 };
495};
496
497enum {
498 PMU_PG_ELPG_MSG_INIT_ACK,
499 PMU_PG_ELPG_MSG_DISALLOW_ACK,
500 PMU_PG_ELPG_MSG_ALLOW_ACK,
501 PMU_PG_ELPG_MSG_FREEZE_ACK,
502 PMU_PG_ELPG_MSG_FREEZE_ABORT,
503 PMU_PG_ELPG_MSG_UNFREEZE_ACK,
504};
505
506struct pmu_pg_msg_elpg_msg {
507 u8 msg_type;
508 u8 engine_id;
509 u16 msg;
510};
511
512enum {
513 PMU_PG_STAT_MSG_RESP_DMEM_OFFSET = 0,
514};
515
516struct pmu_pg_msg_stat {
517 u8 msg_type;
518 u8 engine_id;
519 u16 sub_msg_id;
520 u32 data;
521};
522
523enum {
524 PMU_PG_MSG_ENG_BUF_LOADED,
525 PMU_PG_MSG_ENG_BUF_UNLOADED,
526 PMU_PG_MSG_ENG_BUF_FAILED,
527};
528
529struct pmu_pg_msg_eng_buf_stat {
530 u8 msg_type;
531 u8 engine_id;
532 u8 buf_idx;
533 u8 status;
534};
535
536struct pmu_pg_msg {
537 union {
538 u8 msg_type;
539 struct pmu_pg_msg_elpg_msg elpg_msg;
540 struct pmu_pg_msg_stat stat;
541 struct pmu_pg_msg_eng_buf_stat eng_buf_stat;
542 /* TBD: other pg messages */
543 union pmu_ap_msg ap_msg;
544 };
545};
546
547enum {
548 PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0,
549};
550
551struct pmu_rc_msg_unhandled_cmd {
552 u8 msg_type;
553 u8 unit_id;
554};
555
556struct pmu_rc_msg {
557 u8 msg_type;
558 struct pmu_rc_msg_unhandled_cmd unhandled_cmd;
559};
560
561enum {
562 PMU_PG_CMD_ID_ELPG_CMD = 0,
563 PMU_PG_CMD_ID_ENG_BUF_LOAD,
564 PMU_PG_CMD_ID_ENG_BUF_UNLOAD,
565 PMU_PG_CMD_ID_PG_STAT,
566 PMU_PG_CMD_ID_PG_LOG_INIT,
567 PMU_PG_CMD_ID_PG_LOG_FLUSH,
568 PMU_PG_CMD_ID_PG_PARAM,
569 PMU_PG_CMD_ID_ELPG_INIT,
570 PMU_PG_CMD_ID_ELPG_POLL_CTXSAVE,
571 PMU_PG_CMD_ID_ELPG_ABORT_POLL,
572 PMU_PG_CMD_ID_ELPG_PWR_UP,
573 PMU_PG_CMD_ID_ELPG_DISALLOW,
574 PMU_PG_CMD_ID_ELPG_ALLOW,
575 PMU_PG_CMD_ID_AP,
576 RM_PMU_PG_CMD_ID_PSI,
577 RM_PMU_PG_CMD_ID_CG,
578 PMU_PG_CMD_ID_ZBC_TABLE_UPDATE,
579 PMU_PG_CMD_ID_PWR_RAIL_GATE_DISABLE = 0x20,
580 PMU_PG_CMD_ID_PWR_RAIL_GATE_ENABLE,
581 PMU_PG_CMD_ID_PWR_RAIL_SMU_MSG_DISABLE
582};
583
584enum {
585 PMU_PG_ELPG_CMD_INIT,
586 PMU_PG_ELPG_CMD_DISALLOW,
587 PMU_PG_ELPG_CMD_ALLOW,
588 PMU_PG_ELPG_CMD_FREEZE,
589 PMU_PG_ELPG_CMD_UNFREEZE,
590};
591
592struct pmu_pg_cmd_elpg_cmd {
593 u8 cmd_type;
594 u8 engine_id;
595 u16 cmd;
596};
597
598struct pmu_pg_cmd_eng_buf_load {
599 u8 cmd_type;
600 u8 engine_id;
601 u8 buf_idx;
602 u8 pad;
603 u16 buf_size;
604 u32 dma_base;
605 u8 dma_offset;
606 u8 dma_idx;
607};
608
609enum {
610 PMU_PG_STAT_CMD_ALLOC_DMEM = 0,
611};
612
613struct pmu_pg_cmd_stat {
614 u8 cmd_type;
615 u8 engine_id;
616 u16 sub_cmd_id;
617 u32 data;
618};
619
620struct pmu_pg_cmd {
621 union {
622 u8 cmd_type;
623 struct pmu_pg_cmd_elpg_cmd elpg_cmd;
624 struct pmu_pg_cmd_eng_buf_load eng_buf_load;
625 struct pmu_pg_cmd_stat stat;
626 /* TBD: other pg commands */
627 union pmu_ap_cmd ap_cmd;
628 };
629};
630
631/* PERFMON */
632#define PMU_DOMAIN_GROUP_PSTATE 0
633#define PMU_DOMAIN_GROUP_GPC2CLK 1
634#define PMU_DOMAIN_GROUP_NUM 2
635
636/* TBD: smart strategy */
637#define PMU_PERFMON_PCT_TO_INC 58
638#define PMU_PERFMON_PCT_TO_DEC 23
639
640struct pmu_perfmon_counter {
641 u8 index;
642 u8 flags;
643 u8 group_id;
644 u8 valid;
645 u16 upper_threshold; /* units of 0.01% */
646 u16 lower_threshold; /* units of 0.01% */
647};
648
649#define PMU_PERFMON_FLAG_ENABLE_INCREASE (0x00000001)
650#define PMU_PERFMON_FLAG_ENABLE_DECREASE (0x00000002)
651#define PMU_PERFMON_FLAG_CLEAR_PREV (0x00000004)
652
653/* PERFMON CMD */
654enum {
655 PMU_PERFMON_CMD_ID_START = 0,
656 PMU_PERFMON_CMD_ID_STOP = 1,
657 PMU_PERFMON_CMD_ID_INIT = 2
658};
659
660struct pmu_perfmon_cmd_start_v1 {
661 u8 cmd_type;
662 u8 group_id;
663 u8 state_id;
664 u8 flags;
665 struct pmu_allocation_v1 counter_alloc;
666};
667
668struct pmu_perfmon_cmd_start_v0 {
669 u8 cmd_type;
670 u8 group_id;
671 u8 state_id;
672 u8 flags;
673 struct pmu_allocation_v0 counter_alloc;
674};
675
676struct pmu_perfmon_cmd_stop {
677 u8 cmd_type;
678};
679
680struct pmu_perfmon_cmd_init_v1 {
681 u8 cmd_type;
682 u8 to_decrease_count;
683 u8 base_counter_id;
684 u32 sample_period_us;
685 struct pmu_allocation_v1 counter_alloc;
686 u8 num_counters;
687 u8 samples_in_moving_avg;
688 u16 sample_buffer;
689};
690
691struct pmu_perfmon_cmd_init_v0 {
692 u8 cmd_type;
693 u8 to_decrease_count;
694 u8 base_counter_id;
695 u32 sample_period_us;
696 struct pmu_allocation_v0 counter_alloc;
697 u8 num_counters;
698 u8 samples_in_moving_avg;
699 u16 sample_buffer;
700};
701
702struct pmu_perfmon_cmd {
703 union {
704 u8 cmd_type;
705 struct pmu_perfmon_cmd_start_v0 start_v0;
706 struct pmu_perfmon_cmd_start_v1 start_v1;
707 struct pmu_perfmon_cmd_stop stop;
708 struct pmu_perfmon_cmd_init_v0 init_v0;
709 struct pmu_perfmon_cmd_init_v1 init_v1;
710 };
711};
712
713struct pmu_zbc_cmd {
714 u8 cmd_type;
715 u8 pad;
716 u16 entry_mask;
717};
718
719/* PERFMON MSG */
720enum {
721 PMU_PERFMON_MSG_ID_INCREASE_EVENT = 0,
722 PMU_PERFMON_MSG_ID_DECREASE_EVENT = 1,
723 PMU_PERFMON_MSG_ID_INIT_EVENT = 2,
724 PMU_PERFMON_MSG_ID_ACK = 3
725};
726
727struct pmu_perfmon_msg_generic {
728 u8 msg_type;
729 u8 state_id;
730 u8 group_id;
731 u8 data;
732};
733
734struct pmu_perfmon_msg {
735 union {
736 u8 msg_type;
737 struct pmu_perfmon_msg_generic gen;
738 };
739};
740
741
742struct pmu_cmd {
743 struct pmu_hdr hdr;
744 union {
745 struct pmu_perfmon_cmd perfmon;
746 struct pmu_pg_cmd pg;
747 struct pmu_zbc_cmd zbc;
748 } cmd;
749};
750
751struct pmu_msg {
752 struct pmu_hdr hdr;
753 union {
754 struct pmu_init_msg init;
755 struct pmu_perfmon_msg perfmon;
756 struct pmu_pg_msg pg;
757 struct pmu_rc_msg rc;
758 } msg;
759};
760
761#define PMU_SHA1_GID_SIGNATURE 0xA7C66AD2
762#define PMU_SHA1_GID_SIGNATURE_SIZE 4
763
764#define PMU_SHA1_GID_SIZE 16
765
766struct pmu_sha1_gid {
767 bool valid;
768 u8 gid[PMU_SHA1_GID_SIZE];
769};
770
771struct pmu_sha1_gid_data {
772 u8 signature[PMU_SHA1_GID_SIGNATURE_SIZE];
773 u8 gid[PMU_SHA1_GID_SIZE];
774};
775
776#define PMU_COMMAND_QUEUE_HPQ 0 /* write by sw, read by pmu, protected by sw mutex lock */
777#define PMU_COMMAND_QUEUE_LPQ 1 /* write by sw, read by pmu, protected by sw mutex lock */
778#define PMU_COMMAND_QUEUE_BIOS 2 /* read/write by sw/hw, protected by hw pmu mutex, id = 2 */
779#define PMU_COMMAND_QUEUE_SMI 3 /* read/write by sw/hw, protected by hw pmu mutex, id = 3 */
780#define PMU_MESSAGE_QUEUE 4 /* write by pmu, read by sw, accessed by interrupt handler, no lock */
781#define PMU_QUEUE_COUNT 5
782
783enum {
784 PMU_MUTEX_ID_RSVD1 = 0 ,
785 PMU_MUTEX_ID_GPUSER ,
786 PMU_MUTEX_ID_QUEUE_BIOS ,
787 PMU_MUTEX_ID_QUEUE_SMI ,
788 PMU_MUTEX_ID_GPMUTEX ,
789 PMU_MUTEX_ID_I2C ,
790 PMU_MUTEX_ID_RMLOCK ,
791 PMU_MUTEX_ID_MSGBOX ,
792 PMU_MUTEX_ID_FIFO ,
793 PMU_MUTEX_ID_PG ,
794 PMU_MUTEX_ID_GR ,
795 PMU_MUTEX_ID_CLK ,
796 PMU_MUTEX_ID_RSVD6 ,
797 PMU_MUTEX_ID_RSVD7 ,
798 PMU_MUTEX_ID_RSVD8 ,
799 PMU_MUTEX_ID_RSVD9 ,
800 PMU_MUTEX_ID_INVALID
801};
802
803#define PMU_IS_COMMAND_QUEUE(id) \
804 ((id) < PMU_MESSAGE_QUEUE)
805
806#define PMU_IS_SW_COMMAND_QUEUE(id) \
807 (((id) == PMU_COMMAND_QUEUE_HPQ) || \
808 ((id) == PMU_COMMAND_QUEUE_LPQ))
809
810#define PMU_IS_MESSAGE_QUEUE(id) \
811 ((id) == PMU_MESSAGE_QUEUE)
812
813enum
814{
815 OFLAG_READ = 0,
816 OFLAG_WRITE
817};
818
819#define QUEUE_SET (true)
820#define QUEUE_GET (false)
821
822#define QUEUE_ALIGNMENT (4)
823
824#define PMU_PGENG_GR_BUFFER_IDX_INIT (0)
825#define PMU_PGENG_GR_BUFFER_IDX_ZBC (1)
826#define PMU_PGENG_GR_BUFFER_IDX_FECS (2)
827
828enum
829{
830 PMU_DMAIDX_UCODE = 0,
831 PMU_DMAIDX_VIRT = 1,
832 PMU_DMAIDX_PHYS_VID = 2,
833 PMU_DMAIDX_PHYS_SYS_COH = 3,
834 PMU_DMAIDX_PHYS_SYS_NCOH = 4,
835 PMU_DMAIDX_RSVD = 5,
836 PMU_DMAIDX_PELPG = 6,
837 PMU_DMAIDX_END = 7
838};
839
840struct pmu_gk20a;
841struct pmu_queue;
842
843struct pmu_queue {
844
845 /* used by hw, for BIOS/SMI queue */
846 u32 mutex_id;
847 u32 mutex_lock;
848 /* used by sw, for LPQ/HPQ queue */
849 struct mutex mutex;
850
851 /* current write position */
852 u32 position;
853 /* physical dmem offset where this queue begins */
854 u32 offset;
855 /* logical queue identifier */
856 u32 id;
857 /* physical queue index */
858 u32 index;
859 /* in bytes */
860 u32 size;
861
862 /* open-flag */
863 u32 oflag;
864 bool opened; /* opened implies locked */
865 bool locked; /* check free space after setting locked but before setting opened */
866};
867
868
869#define PMU_MUTEX_ID_IS_VALID(id) \
870 ((id) < PMU_MUTEX_ID_INVALID)
871
872#define PMU_INVALID_MUTEX_OWNER_ID (0)
873
874struct pmu_mutex {
875 u32 id;
876 u32 index;
877 u32 ref_cnt;
878};
879
880#define PMU_MAX_NUM_SEQUENCES (256)
881#define PMU_SEQ_BIT_SHIFT (5)
882#define PMU_SEQ_TBL_SIZE \
883 (PMU_MAX_NUM_SEQUENCES >> PMU_SEQ_BIT_SHIFT)
884
885#define PMU_INVALID_SEQ_DESC (~0)
886
887enum
888{
889 PMU_SEQ_STATE_FREE = 0,
890 PMU_SEQ_STATE_PENDING,
891 PMU_SEQ_STATE_USED,
892 PMU_SEQ_STATE_CANCELLED
893};
894
895struct pmu_payload {
896 struct {
897 void *buf;
898 u32 offset;
899 u32 size;
900 } in, out;
901};
902
903typedef void (*pmu_callback)(struct gk20a *, struct pmu_msg *, void *, u32,
904 u32);
905
906struct pmu_sequence {
907 u8 id;
908 u32 state;
909 u32 desc;
910 struct pmu_msg *msg;
911 union {
912 struct pmu_allocation_v0 in_v0;
913 struct pmu_allocation_v1 in_v1;
914 };
915 union {
916 struct pmu_allocation_v0 out_v0;
917 struct pmu_allocation_v1 out_v1;
918 };
919 u8 *out_payload;
920 pmu_callback callback;
921 void* cb_params;
922};
923
924struct pmu_pg_stats {
925 u64 pg_entry_start_timestamp;
926 u64 pg_ingating_start_timestamp;
927 u64 pg_exit_start_timestamp;
928 u64 pg_ungating_start_timestamp;
929 u32 pg_avg_entry_time_us;
930 u32 pg_ingating_cnt;
931 u32 pg_ingating_time_us;
932 u32 pg_avg_exit_time_us;
933 u32 pg_ungating_count;
934 u32 pg_ungating_time_us;
935 u32 pg_gating_cnt;
936 u32 pg_gating_deny_cnt;
937};
938
939#define PMU_PG_IDLE_THRESHOLD_SIM 1000
940#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM 4000000
941/* TBD: QT or else ? */
942#define PMU_PG_IDLE_THRESHOLD 15000
943#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD 1000000
944
945/* state transition :
946 OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF
947 ON => OFF is always synchronized */
948#define PMU_ELPG_STAT_OFF 0 /* elpg is off */
949#define PMU_ELPG_STAT_ON 1 /* elpg is on */
950#define PMU_ELPG_STAT_ON_PENDING 2 /* elpg is off, ALLOW cmd has been sent, wait for ack */
951#define PMU_ELPG_STAT_OFF_PENDING 3 /* elpg is on, DISALLOW cmd has been sent, wait for ack */
952#define PMU_ELPG_STAT_OFF_ON_PENDING 4 /* elpg is off, caller has requested on, but ALLOW
953 cmd hasn't been sent due to ENABLE_ALLOW delay */
954
955/* Falcon Register index */
956#define PMU_FALCON_REG_R0 (0)
957#define PMU_FALCON_REG_R1 (1)
958#define PMU_FALCON_REG_R2 (2)
959#define PMU_FALCON_REG_R3 (3)
960#define PMU_FALCON_REG_R4 (4)
961#define PMU_FALCON_REG_R5 (5)
962#define PMU_FALCON_REG_R6 (6)
963#define PMU_FALCON_REG_R7 (7)
964#define PMU_FALCON_REG_R8 (8)
965#define PMU_FALCON_REG_R9 (9)
966#define PMU_FALCON_REG_R10 (10)
967#define PMU_FALCON_REG_R11 (11)
968#define PMU_FALCON_REG_R12 (12)
969#define PMU_FALCON_REG_R13 (13)
970#define PMU_FALCON_REG_R14 (14)
971#define PMU_FALCON_REG_R15 (15)
972#define PMU_FALCON_REG_IV0 (16)
973#define PMU_FALCON_REG_IV1 (17)
974#define PMU_FALCON_REG_UNDEFINED (18)
975#define PMU_FALCON_REG_EV (19)
976#define PMU_FALCON_REG_SP (20)
977#define PMU_FALCON_REG_PC (21)
978#define PMU_FALCON_REG_IMB (22)
979#define PMU_FALCON_REG_DMB (23)
980#define PMU_FALCON_REG_CSW (24)
981#define PMU_FALCON_REG_CCR (25)
982#define PMU_FALCON_REG_SEC (26)
983#define PMU_FALCON_REG_CTX (27)
984#define PMU_FALCON_REG_EXCI (28)
985#define PMU_FALCON_REG_RSVD0 (29)
986#define PMU_FALCON_REG_RSVD1 (30)
987#define PMU_FALCON_REG_RSVD2 (31)
988#define PMU_FALCON_REG_SIZE (32)
989
990struct pmu_gk20a {
991
992 struct gk20a *g;
993
994 struct pmu_ucode_desc *desc;
995 struct pmu_mem_desc ucode;
996
997 struct pmu_mem_desc pg_buf;
998 /* TBD: remove this if ZBC seq is fixed */
999 struct pmu_mem_desc seq_buf;
1000 bool buf_loaded;
1001
1002 struct pmu_sha1_gid gid_info;
1003
1004 struct pmu_queue queue[PMU_QUEUE_COUNT];
1005
1006 struct pmu_sequence *seq;
1007 unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE];
1008 u32 next_seq_desc;
1009
1010 struct pmu_mutex *mutex;
1011 u32 mutex_cnt;
1012
1013 struct mutex pmu_copy_lock;
1014 struct mutex pmu_seq_lock;
1015
1016 struct gk20a_allocator dmem;
1017
1018 u32 *ucode_image;
1019 bool pmu_ready;
1020
1021 u32 zbc_save_done;
1022
1023 u32 stat_dmem_offset;
1024
1025 bool elpg_ready;
1026 u32 elpg_stat;
1027 wait_queue_head_t pg_wq;
1028
1029#define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC 1 /* msec */
1030 struct delayed_work elpg_enable; /* deferred elpg enable */
1031 struct work_struct pg_init;
1032 bool elpg_enable_allow; /* true after init, false after disable, true after delay */
1033 struct mutex elpg_mutex; /* protect elpg enable/disable */
1034 int elpg_refcnt; /* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */
1035
1036 struct pmu_perfmon_counter perfmon_counter;
1037 u32 perfmon_state_id[PMU_DOMAIN_GROUP_NUM];
1038
1039 bool initialized;
1040
1041 void (*remove_support)(struct pmu_gk20a *pmu);
1042 bool sw_ready;
1043 bool perfmon_ready;
1044
1045 u32 sample_buffer;
1046
1047 struct mutex isr_mutex;
1048 bool zbc_ready;
1049 union {
1050 struct pmu_cmdline_args_v0 args_v0;
1051 struct pmu_cmdline_args_v1 args_v1;
1052 };
1053};
1054
1055struct gk20a_pmu_save_state {
1056 struct pmu_sequence *seq;
1057 u32 next_seq_desc;
1058 struct pmu_mutex *mutex;
1059 u32 mutex_cnt;
1060 struct pmu_ucode_desc *desc;
1061 struct pmu_mem_desc ucode;
1062 struct pmu_mem_desc seq_buf;
1063 struct pmu_mem_desc pg_buf;
1064 struct delayed_work elpg_enable;
1065 wait_queue_head_t pg_wq;
1066 bool sw_ready;
1067 struct work_struct pg_init;
1068};
1069
1070int gk20a_init_pmu_support(struct gk20a *g);
1071int gk20a_init_pmu_setup_hw2(struct gk20a *g);
1072
1073void gk20a_pmu_isr(struct gk20a *g);
1074
1075/* send a cmd to pmu */
1076int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, struct pmu_msg *msg,
1077 struct pmu_payload *payload, u32 queue_id,
1078 pmu_callback callback, void* cb_param,
1079 u32 *seq_desc, unsigned long timeout);
1080
1081int gk20a_pmu_enable_elpg(struct gk20a *g);
1082int gk20a_pmu_disable_elpg(struct gk20a *g);
1083
1084void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries);
1085
1086int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable);
1087
1088int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token);
1089int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token);
1090int gk20a_pmu_destroy(struct gk20a *g);
1091int gk20a_pmu_load_norm(struct gk20a *g, u32 *load);
1092int gk20a_pmu_debugfs_init(struct platform_device *dev);
1093void gk20a_pmu_reset_load_counters(struct gk20a *g);
1094void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
1095 u32 *total_cycles);
1096
1097#endif /*__PMU_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c
new file mode 100644
index 00000000..aea1a80b
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c
@@ -0,0 +1,91 @@
1/*
2 * GK20A priv ring
3 *
4 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/delay.h> /* for mdelay */
20
21#include "gk20a.h"
22#include "hw_mc_gk20a.h"
23#include "hw_pri_ringmaster_gk20a.h"
24#include "hw_pri_ringstation_sys_gk20a.h"
25#include "hw_trim_gk20a.h"
26
27void gk20a_reset_priv_ring(struct gk20a *g)
28{
29 u32 data;
30
31 if (tegra_platform_is_linsim())
32 return;
33
34 data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
35 data = set_field(data,
36 trim_sys_gpc2clk_out_bypdiv_m(),
37 trim_sys_gpc2clk_out_bypdiv_f(0));
38 gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
39
40 gk20a_reset(g, mc_enable_priv_ring_enabled_f());
41
42 gk20a_writel(g,pri_ringmaster_command_r(),
43 0x4);
44
45 gk20a_writel(g, pri_ringstation_sys_decode_config_r(),
46 0x2);
47
48 gk20a_readl(g, pri_ringstation_sys_decode_config_r());
49}
50
51void gk20a_priv_ring_isr(struct gk20a *g)
52{
53 u32 status0, status1;
54 u32 cmd;
55 s32 retry = 100;
56
57 if (tegra_platform_is_linsim())
58 return;
59
60 status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r());
61 status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r());
62
63 gk20a_dbg_info("ringmaster intr status0: 0x%08x,"
64 "status1: 0x%08x", status0, status1);
65
66 if (status0 & (0x1 | 0x2 | 0x4)) {
67 gk20a_reset_priv_ring(g);
68 }
69
70 cmd = gk20a_readl(g, pri_ringmaster_command_r());
71 cmd = set_field(cmd, pri_ringmaster_command_cmd_m(),
72 pri_ringmaster_command_cmd_ack_interrupt_f());
73 gk20a_writel(g, pri_ringmaster_command_r(), cmd);
74
75 do {
76 cmd = pri_ringmaster_command_cmd_v(
77 gk20a_readl(g, pri_ringmaster_command_r()));
78 usleep_range(20, 40);
79 } while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && --retry);
80
81 if (retry <= 0)
82 gk20a_warn(dev_from_gk20a(g),
83 "priv ringmaster cmd ack too many retries");
84
85 status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r());
86 status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r());
87
88 gk20a_dbg_info("ringmaster intr status0: 0x%08x,"
89 " status1: 0x%08x", status0, status1);
90}
91
diff --git a/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h
new file mode 100644
index 00000000..cb9d49c7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h
@@ -0,0 +1,27 @@
1/*
2 * drivers/video/tegra/host/gk20a/priv_ring_gk20a.h
3 *
4 * GK20A PRIV ringmaster
5 *
6 * Copyright (c) 2011-2012, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21#ifndef __PRIV_RING_GK20A_H__
22#define __PRIV_RING_GK20A_H__
23
24void gk20a_reset_priv_ring(struct gk20a *g);
25void gk20a_priv_ring_isr(struct gk20a *g);
26
27#endif /*__PRIV_RING_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
new file mode 100644
index 00000000..4a115fb1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
@@ -0,0 +1,704 @@
1/*
2 *
3 * Tegra GK20A GPU Debugger Driver Register Ops
4 *
5 * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved.
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include <linux/slab.h>
21#include <linux/err.h>
22#include <linux/bsearch.h>
23#include <linux/nvhost_dbg_gpu_ioctl.h>
24
25#include "gk20a.h"
26#include "gr_gk20a.h"
27#include "dbg_gpu_gk20a.h"
28#include "regops_gk20a.h"
29
30
31
32struct regop_offset_range {
33 u32 base:24;
34 u32 count:8;
35};
36
37static int regop_bsearch_range_cmp(const void *pkey, const void *pelem)
38{
39 u32 key = *(u32 *)pkey;
40 struct regop_offset_range *prange = (struct regop_offset_range *)pelem;
41 if (key < prange->base)
42 return -1;
43 else if (prange->base <= key && key < (prange->base +
44 (prange->count * 4)))
45 return 0;
46 return 1;
47}
48
49static inline bool linear_search(u32 offset, const u32 *list, int size)
50{
51 int i;
52 for (i = 0; i < size; i++)
53 if (list[i] == offset)
54 return true;
55 return false;
56}
57
58static const struct regop_offset_range gk20a_global_whitelist_ranges[] = {
59 { 0x000004f0, 1 },
60 { 0x00001a00, 3 },
61 { 0x0000259c, 1 },
62 { 0x0000280c, 1 },
63 { 0x00009400, 1 },
64 { 0x00009410, 1 },
65 { 0x00020200, 1 },
66 { 0x00022430, 7 },
67 { 0x00022548, 1 },
68 { 0x00100c18, 3 },
69 { 0x00100c84, 1 },
70 { 0x00100cc4, 1 },
71 { 0x00106640, 1 },
72 { 0x0010a0a8, 1 },
73 { 0x0010a4f0, 1 },
74 { 0x0010e064, 1 },
75 { 0x0010e164, 1 },
76 { 0x0010e490, 1 },
77 { 0x00110100, 1 },
78 { 0x00140028, 1 },
79 { 0x001408dc, 1 },
80 { 0x00140a5c, 1 },
81 { 0x001410dc, 1 },
82 { 0x0014125c, 1 },
83 { 0x0017e028, 1 },
84 { 0x0017e8dc, 1 },
85 { 0x0017ea5c, 1 },
86 { 0x0017f0dc, 1 },
87 { 0x0017f25c, 1 },
88 { 0x00180000, 68 },
89 { 0x00180200, 68 },
90 { 0x001a0000, 68 },
91 { 0x001b0000, 68 },
92 { 0x001b0200, 68 },
93 { 0x001b0400, 68 },
94 { 0x001b0600, 68 },
95 { 0x001b4000, 3 },
96 { 0x001b4010, 3 },
97 { 0x001b4020, 3 },
98 { 0x001b4040, 3 },
99 { 0x001b4050, 3 },
100 { 0x001b4060, 16 },
101 { 0x001b40a4, 1 },
102 { 0x001b4100, 6 },
103 { 0x001b4124, 2 },
104 { 0x001b8000, 7 },
105 { 0x001bc000, 7 },
106 { 0x001be000, 7 },
107 { 0x00400500, 1 },
108 { 0x00400700, 1 },
109 { 0x0040415c, 1 },
110 { 0x00405850, 1 },
111 { 0x00405908, 1 },
112 { 0x00405b40, 1 },
113 { 0x00405b50, 1 },
114 { 0x00406024, 1 },
115 { 0x00407010, 1 },
116 { 0x00407808, 1 },
117 { 0x0040803c, 1 },
118 { 0x0040880c, 1 },
119 { 0x00408910, 1 },
120 { 0x00408984, 1 },
121 { 0x004090a8, 1 },
122 { 0x004098a0, 1 },
123 { 0x0041000c, 1 },
124 { 0x00410110, 1 },
125 { 0x00410184, 1 },
126 { 0x00418384, 1 },
127 { 0x004184a0, 1 },
128 { 0x00418604, 1 },
129 { 0x00418680, 1 },
130 { 0x00418714, 1 },
131 { 0x0041881c, 1 },
132 { 0x004188c8, 2 },
133 { 0x00418b04, 1 },
134 { 0x00418c04, 1 },
135 { 0x00418c64, 2 },
136 { 0x00418c88, 1 },
137 { 0x00418cb4, 2 },
138 { 0x00418d00, 1 },
139 { 0x00418d28, 2 },
140 { 0x00418e08, 1 },
141 { 0x00418e1c, 2 },
142 { 0x00418f08, 1 },
143 { 0x00418f20, 2 },
144 { 0x00419000, 1 },
145 { 0x0041900c, 1 },
146 { 0x00419018, 1 },
147 { 0x00419854, 1 },
148 { 0x00419ab0, 1 },
149 { 0x00419ab8, 3 },
150 { 0x00419ac8, 1 },
151 { 0x00419c0c, 1 },
152 { 0x00419c8c, 3 },
153 { 0x00419ca8, 1 },
154 { 0x00419d08, 2 },
155 { 0x00419e00, 1 },
156 { 0x00419e0c, 1 },
157 { 0x00419e14, 2 },
158 { 0x00419e24, 2 },
159 { 0x00419e34, 2 },
160 { 0x00419e44, 4 },
161 { 0x00419ea4, 1 },
162 { 0x00419eb0, 1 },
163 { 0x0041a0a0, 1 },
164 { 0x0041a0a8, 1 },
165 { 0x0041a17c, 1 },
166 { 0x0041a890, 2 },
167 { 0x0041a8a0, 3 },
168 { 0x0041a8b0, 2 },
169 { 0x0041b014, 1 },
170 { 0x0041b0a0, 1 },
171 { 0x0041b0cc, 1 },
172 { 0x0041b0e8, 2 },
173 { 0x0041b1dc, 1 },
174 { 0x0041b1f8, 2 },
175 { 0x0041be14, 1 },
176 { 0x0041bea0, 1 },
177 { 0x0041becc, 1 },
178 { 0x0041bee8, 2 },
179 { 0x0041bfdc, 1 },
180 { 0x0041bff8, 2 },
181 { 0x0041c054, 1 },
182 { 0x0041c2b0, 1 },
183 { 0x0041c2b8, 3 },
184 { 0x0041c2c8, 1 },
185 { 0x0041c40c, 1 },
186 { 0x0041c48c, 3 },
187 { 0x0041c4a8, 1 },
188 { 0x0041c508, 2 },
189 { 0x0041c600, 1 },
190 { 0x0041c60c, 1 },
191 { 0x0041c614, 2 },
192 { 0x0041c624, 2 },
193 { 0x0041c634, 2 },
194 { 0x0041c644, 4 },
195 { 0x0041c6a4, 1 },
196 { 0x0041c6b0, 1 },
197 { 0x00500384, 1 },
198 { 0x005004a0, 1 },
199 { 0x00500604, 1 },
200 { 0x00500680, 1 },
201 { 0x00500714, 1 },
202 { 0x0050081c, 1 },
203 { 0x005008c8, 2 },
204 { 0x00500b04, 1 },
205 { 0x00500c04, 1 },
206 { 0x00500c64, 2 },
207 { 0x00500c88, 1 },
208 { 0x00500cb4, 2 },
209 { 0x00500d00, 1 },
210 { 0x00500d28, 2 },
211 { 0x00500e08, 1 },
212 { 0x00500e1c, 2 },
213 { 0x00500f08, 1 },
214 { 0x00500f20, 2 },
215 { 0x00501000, 1 },
216 { 0x0050100c, 1 },
217 { 0x00501018, 1 },
218 { 0x00501854, 1 },
219 { 0x00501ab0, 1 },
220 { 0x00501ab8, 3 },
221 { 0x00501ac8, 1 },
222 { 0x00501c0c, 1 },
223 { 0x00501c8c, 3 },
224 { 0x00501ca8, 1 },
225 { 0x00501d08, 2 },
226 { 0x00501e00, 1 },
227 { 0x00501e0c, 1 },
228 { 0x00501e14, 2 },
229 { 0x00501e24, 2 },
230 { 0x00501e34, 2 },
231 { 0x00501e44, 4 },
232 { 0x00501ea4, 1 },
233 { 0x00501eb0, 1 },
234 { 0x005020a0, 1 },
235 { 0x005020a8, 1 },
236 { 0x0050217c, 1 },
237 { 0x00502890, 2 },
238 { 0x005028a0, 3 },
239 { 0x005028b0, 2 },
240 { 0x00503014, 1 },
241 { 0x005030a0, 1 },
242 { 0x005030cc, 1 },
243 { 0x005030e8, 2 },
244 { 0x005031dc, 1 },
245 { 0x005031f8, 2 },
246 { 0x00503e14, 1 },
247 { 0x00503ea0, 1 },
248 { 0x00503ecc, 1 },
249 { 0x00503ee8, 2 },
250 { 0x00503fdc, 1 },
251 { 0x00503ff8, 2 },
252 { 0x00504054, 1 },
253 { 0x005042b0, 1 },
254 { 0x005042b8, 3 },
255 { 0x005042c8, 1 },
256 { 0x0050440c, 1 },
257 { 0x0050448c, 3 },
258 { 0x005044a8, 1 },
259 { 0x00504508, 2 },
260 { 0x00504600, 1 },
261 { 0x0050460c, 1 },
262 { 0x00504614, 2 },
263 { 0x00504624, 2 },
264 { 0x00504634, 2 },
265 { 0x00504644, 4 },
266 { 0x005046a4, 1 },
267 { 0x005046b0, 1 },
268};
269static const u32 gk20a_global_whitelist_ranges_count =
270 ARRAY_SIZE(gk20a_global_whitelist_ranges);
271
272/* context */
273
274static const struct regop_offset_range gk20a_context_whitelist_ranges[] = {
275 { 0x0000280c, 1 },
276 { 0x00100cc4, 1 },
277 { 0x00400500, 1 },
278 { 0x00405b40, 1 },
279 { 0x00419000, 1 },
280 { 0x00419c8c, 3 },
281 { 0x00419d08, 2 },
282 { 0x00419e04, 3 },
283 { 0x00419e14, 2 },
284 { 0x00419e24, 2 },
285 { 0x00419e34, 2 },
286 { 0x00419e44, 4 },
287 { 0x00419e58, 6 },
288 { 0x00419e84, 5 },
289 { 0x00419ea4, 1 },
290 { 0x00419eac, 2 },
291 { 0x00419f30, 8 },
292 { 0x0041c48c, 3 },
293 { 0x0041c508, 2 },
294 { 0x0041c604, 3 },
295 { 0x0041c614, 2 },
296 { 0x0041c624, 2 },
297 { 0x0041c634, 2 },
298 { 0x0041c644, 4 },
299 { 0x0041c658, 6 },
300 { 0x0041c684, 5 },
301 { 0x0041c6a4, 1 },
302 { 0x0041c6ac, 2 },
303 { 0x0041c730, 8 },
304 { 0x00501000, 1 },
305 { 0x00501c8c, 3 },
306 { 0x00501d08, 2 },
307 { 0x00501e04, 3 },
308 { 0x00501e14, 2 },
309 { 0x00501e24, 2 },
310 { 0x00501e34, 2 },
311 { 0x00501e44, 4 },
312 { 0x00501e58, 6 },
313 { 0x00501e84, 5 },
314 { 0x00501ea4, 1 },
315 { 0x00501eac, 2 },
316 { 0x00501f30, 8 },
317 { 0x0050448c, 3 },
318 { 0x00504508, 2 },
319 { 0x00504604, 3 },
320 { 0x00504614, 2 },
321 { 0x00504624, 2 },
322 { 0x00504634, 2 },
323 { 0x00504644, 4 },
324 { 0x00504658, 6 },
325 { 0x00504684, 5 },
326 { 0x005046a4, 1 },
327 { 0x005046ac, 2 },
328 { 0x00504730, 8 },
329};
330static const u32 gk20a_context_whitelist_ranges_count =
331 ARRAY_SIZE(gk20a_context_whitelist_ranges);
332
333/* runcontrol */
334static const u32 gk20a_runcontrol_whitelist[] = {
335 0x00419e10,
336 0x0041c610,
337 0x00501e10,
338 0x00504610,
339};
340static const u32 gk20a_runcontrol_whitelist_count =
341 ARRAY_SIZE(gk20a_runcontrol_whitelist);
342
343static const struct regop_offset_range gk20a_runcontrol_whitelist_ranges[] = {
344 { 0x00419e10, 1 },
345 { 0x0041c610, 1 },
346 { 0x00501e10, 1 },
347 { 0x00504610, 1 },
348};
349static const u32 gk20a_runcontrol_whitelist_ranges_count =
350 ARRAY_SIZE(gk20a_runcontrol_whitelist_ranges);
351
352
353/* quad ctl */
354static const u32 gk20a_qctl_whitelist[] = {
355 0x00504670,
356 0x00504674,
357 0x00504678,
358 0x0050467c,
359 0x00504680,
360 0x00504730,
361 0x00504734,
362 0x00504738,
363 0x0050473c,
364};
365static const u32 gk20a_qctl_whitelist_count =
366 ARRAY_SIZE(gk20a_qctl_whitelist);
367
368static const struct regop_offset_range gk20a_qctl_whitelist_ranges[] = {
369 { 0x00504670, 1 },
370 { 0x00504730, 4 },
371};
372static const u32 gk20a_qctl_whitelist_ranges_count =
373 ARRAY_SIZE(gk20a_qctl_whitelist_ranges);
374
375
376
377
378static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
379 u32 *ctx_rd_count, u32 *ctx_wr_count,
380 struct nvhost_dbg_gpu_reg_op *ops,
381 u32 op_count);
382
383
384int exec_regops_gk20a(struct dbg_session_gk20a *dbg_s,
385 struct nvhost_dbg_gpu_reg_op *ops,
386 u64 num_ops)
387{
388 int err = 0, i;
389 struct channel_gk20a *ch = NULL;
390 struct gk20a *g = dbg_s->g;
391 /*struct gr_gk20a *gr = &g->gr;*/
392 u32 data32_lo = 0, data32_hi = 0;
393 u32 ctx_rd_count = 0, ctx_wr_count = 0;
394 bool skip_read_lo, skip_read_hi;
395 bool ok;
396
397 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
398
399 ch = dbg_s->ch;
400
401 ok = validate_reg_ops(dbg_s,
402 &ctx_rd_count, &ctx_wr_count,
403 ops, num_ops);
404 if (!ok) {
405 dev_err(dbg_s->dev, "invalid op(s)");
406 err = -EINVAL;
407 /* each op has its own err/status */
408 goto clean_up;
409 }
410
411 for (i = 0; i < num_ops; i++) {
412 /* if it isn't global then it is done in the ctx ops... */
413 if (ops[i].type != REGOP(TYPE_GLOBAL))
414 continue;
415
416 switch (ops[i].op) {
417
418 case REGOP(READ_32):
419 ops[i].value_hi = 0;
420 ops[i].value_lo = gk20a_readl(g, ops[i].offset);
421 gk20a_dbg(gpu_dbg_gpu_dbg, "read_32 0x%08x from 0x%08x",
422 ops[i].value_lo, ops[i].offset);
423
424 break;
425
426 case REGOP(READ_64):
427 ops[i].value_lo = gk20a_readl(g, ops[i].offset);
428 ops[i].value_hi =
429 gk20a_readl(g, ops[i].offset + 4);
430
431 gk20a_dbg(gpu_dbg_gpu_dbg, "read_64 0x%08x:%08x from 0x%08x",
432 ops[i].value_hi, ops[i].value_lo,
433 ops[i].offset);
434 break;
435
436 case REGOP(WRITE_32):
437 case REGOP(WRITE_64):
438 /* some of this appears wonky/unnecessary but
439 we've kept it for compat with existing
440 debugger code. just in case... */
441 skip_read_lo = skip_read_hi = false;
442 if (ops[i].and_n_mask_lo == ~(u32)0) {
443 data32_lo = ops[i].value_lo;
444 skip_read_lo = true;
445 }
446
447 if ((ops[i].op == REGOP(WRITE_64)) &&
448 (ops[i].and_n_mask_hi == ~(u32)0)) {
449 data32_hi = ops[i].value_hi;
450 skip_read_hi = true;
451 }
452
453 /* read first 32bits */
454 if (unlikely(skip_read_lo == false)) {
455 data32_lo = gk20a_readl(g, ops[i].offset);
456 data32_lo &= ~ops[i].and_n_mask_lo;
457 data32_lo |= ops[i].value_lo;
458 }
459
460 /* if desired, read second 32bits */
461 if ((ops[i].op == REGOP(WRITE_64)) &&
462 !skip_read_hi) {
463 data32_hi = gk20a_readl(g, ops[i].offset + 4);
464 data32_hi &= ~ops[i].and_n_mask_hi;
465 data32_hi |= ops[i].value_hi;
466 }
467
468 /* now update first 32bits */
469 gk20a_writel(g, ops[i].offset, data32_lo);
470 gk20a_dbg(gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ",
471 data32_lo, ops[i].offset);
472 /* if desired, update second 32bits */
473 if (ops[i].op == REGOP(WRITE_64)) {
474 gk20a_writel(g, ops[i].offset + 4, data32_hi);
475 gk20a_dbg(gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ",
476 data32_hi, ops[i].offset + 4);
477
478 }
479
480
481 break;
482
483 /* shouldn't happen as we've already screened */
484 default:
485 BUG();
486 err = -EINVAL;
487 goto clean_up;
488 break;
489 }
490 }
491
492 if (ctx_wr_count | ctx_rd_count) {
493 err = gr_gk20a_exec_ctx_ops(ch, ops, num_ops,
494 ctx_wr_count, ctx_rd_count);
495 if (err) {
496 dev_warn(dbg_s->dev,
497 "failed to perform ctx ops\n");
498 goto clean_up;
499 }
500 }
501
502 clean_up:
503 gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
504 return err;
505
506}
507
508
509static int validate_reg_op_info(struct dbg_session_gk20a *dbg_s,
510 struct nvhost_dbg_gpu_reg_op *op)
511{
512 int err = 0;
513
514 op->status = REGOP(STATUS_SUCCESS);
515
516 switch (op->op) {
517 case REGOP(READ_32):
518 case REGOP(READ_64):
519 case REGOP(WRITE_32):
520 case REGOP(WRITE_64):
521 break;
522 default:
523 op->status |= REGOP(STATUS_UNSUPPORTED_OP);
524 /*gk20a_err(dbg_s->dev, "Invalid regops op %d!", op->op);*/
525 err = -EINVAL;
526 break;
527 }
528
529 switch (op->type) {
530 case REGOP(TYPE_GLOBAL):
531 case REGOP(TYPE_GR_CTX):
532 case REGOP(TYPE_GR_CTX_TPC):
533 case REGOP(TYPE_GR_CTX_SM):
534 case REGOP(TYPE_GR_CTX_CROP):
535 case REGOP(TYPE_GR_CTX_ZROP):
536 case REGOP(TYPE_GR_CTX_QUAD):
537 break;
538 /*
539 case NVHOST_DBG_GPU_REG_OP_TYPE_FB:
540 */
541 default:
542 op->status |= REGOP(STATUS_INVALID_TYPE);
543 /*gk20a_err(dbg_s->dev, "Invalid regops type %d!", op->type);*/
544 err = -EINVAL;
545 break;
546 }
547
548 return err;
549}
550
551static bool check_whitelists(struct dbg_session_gk20a *dbg_s,
552 struct nvhost_dbg_gpu_reg_op *op, u32 offset)
553{
554 bool valid = false;
555
556 if (op->type == REGOP(TYPE_GLOBAL)) {
557 /* search global list */
558 valid = !!bsearch(&offset,
559 gk20a_global_whitelist_ranges,
560 gk20a_global_whitelist_ranges_count,
561 sizeof(*gk20a_global_whitelist_ranges),
562 regop_bsearch_range_cmp);
563
564 /* if debug session and channel is bound search context list */
565 if ((!valid) && (!dbg_s->is_profiler && dbg_s->ch)) {
566 /* binary search context list */
567 valid = !!bsearch(&offset,
568 gk20a_context_whitelist_ranges,
569 gk20a_context_whitelist_ranges_count,
570 sizeof(*gk20a_context_whitelist_ranges),
571 regop_bsearch_range_cmp);
572 }
573
574 /* if debug session and channel is bound search runcontrol list */
575 if ((!valid) && (!dbg_s->is_profiler && dbg_s->ch)) {
576 valid = linear_search(offset,
577 gk20a_runcontrol_whitelist,
578 gk20a_runcontrol_whitelist_count);
579 }
580 } else if (op->type == REGOP(TYPE_GR_CTX)) {
581 /* it's a context-relative op */
582 if (!dbg_s->ch) {
583 gk20a_err(dbg_s->dev, "can't perform ctx regop unless bound");
584 op->status = REGOP(STATUS_UNSUPPORTED_OP);
585 return -ENODEV;
586 }
587
588 /* binary search context list */
589 valid = !!bsearch(&offset,
590 gk20a_context_whitelist_ranges,
591 gk20a_context_whitelist_ranges_count,
592 sizeof(*gk20a_context_whitelist_ranges),
593 regop_bsearch_range_cmp);
594
595 /* if debug session and channel is bound search runcontrol list */
596 if ((!valid) && (!dbg_s->is_profiler && dbg_s->ch)) {
597 valid = linear_search(offset,
598 gk20a_runcontrol_whitelist,
599 gk20a_runcontrol_whitelist_count);
600 }
601
602 } else if (op->type == REGOP(TYPE_GR_CTX_QUAD)) {
603 valid = linear_search(offset,
604 gk20a_qctl_whitelist,
605 gk20a_qctl_whitelist_count);
606 }
607
608 return valid;
609}
610
611/* note: the op here has already been through validate_reg_op_info */
612static int validate_reg_op_offset(struct dbg_session_gk20a *dbg_s,
613 struct nvhost_dbg_gpu_reg_op *op)
614{
615 int err;
616 u32 buf_offset_lo, buf_offset_addr, num_offsets, offset;
617 bool valid = false;
618
619 op->status = 0;
620 offset = op->offset;
621
622 /* support only 24-bit 4-byte aligned offsets */
623 if (offset & 0xFF000003) {
624 gk20a_err(dbg_s->dev, "invalid regop offset: 0x%x\n", offset);
625 op->status |= REGOP(STATUS_INVALID_OFFSET);
626 return -EINVAL;
627 }
628
629 valid = check_whitelists(dbg_s, op, offset);
630 if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid)
631 valid = check_whitelists(dbg_s, op, offset + 4);
632
633 if (valid && (op->type != REGOP(TYPE_GLOBAL))) {
634 err = gr_gk20a_get_ctx_buffer_offsets(dbg_s->g,
635 op->offset,
636 1,
637 &buf_offset_lo,
638 &buf_offset_addr,
639 &num_offsets,
640 op->type == REGOP(TYPE_GR_CTX_QUAD),
641 op->quad);
642 if (err) {
643 op->status |= REGOP(STATUS_INVALID_OFFSET);
644 return -EINVAL;
645 }
646 if (!buf_offset_lo) {
647 op->status |= REGOP(STATUS_INVALID_OFFSET);
648 return -EINVAL;
649 }
650 }
651
652 if (!valid) {
653 gk20a_err(dbg_s->dev, "invalid regop offset: 0x%x\n", offset);
654 op->status |= REGOP(STATUS_INVALID_OFFSET);
655 return -EINVAL;
656 }
657
658 return 0;
659}
660
661static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
662 u32 *ctx_rd_count, u32 *ctx_wr_count,
663 struct nvhost_dbg_gpu_reg_op *ops,
664 u32 op_count)
665{
666 u32 i;
667 int err;
668 bool ok = true;
669
670 /* keep going until the end so every op can get
671 * a separate error code if needed */
672 for (i = 0; i < op_count; i++) {
673
674 err = validate_reg_op_info(dbg_s, &ops[i]);
675 ok &= !err;
676
677 if (reg_op_is_gr_ctx(ops[i].type)) {
678 if (reg_op_is_read(ops[i].op))
679 (*ctx_rd_count)++;
680 else
681 (*ctx_wr_count)++;
682 }
683
684 err = validate_reg_op_offset(dbg_s, &ops[i]);
685 ok &= !err;
686 }
687
688 gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d\n",
689 *ctx_wr_count, *ctx_rd_count);
690
691 return ok;
692}
693
694/* exported for tools like cyclestats, etc */
695bool is_bar0_global_offset_whitelisted_gk20a(u32 offset)
696{
697
698 bool valid = !!bsearch(&offset,
699 gk20a_global_whitelist_ranges,
700 gk20a_global_whitelist_ranges_count,
701 sizeof(*gk20a_global_whitelist_ranges),
702 regop_bsearch_range_cmp);
703 return valid;
704}
diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.h b/drivers/gpu/nvgpu/gk20a/regops_gk20a.h
new file mode 100644
index 00000000..23b4865b
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.h
@@ -0,0 +1,47 @@
1/*
2 *
3 * Tegra GK20A GPU Debugger Driver Register Ops
4 *
5 * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved.
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19#ifndef __REGOPS_GK20A_H_
20#define __REGOPS_GK20A_H_
21
22int exec_regops_gk20a(struct dbg_session_gk20a *dbg_s,
23 struct nvhost_dbg_gpu_reg_op *ops,
24 u64 num_ops);
25
26/* turn seriously unwieldy names -> something shorter */
27#define REGOP(x) NVHOST_DBG_GPU_REG_OP_##x
28
29
30static inline bool reg_op_is_gr_ctx(u8 type)
31{
32 return type == REGOP(TYPE_GR_CTX) ||
33 type == REGOP(TYPE_GR_CTX_TPC) ||
34 type == REGOP(TYPE_GR_CTX_SM) ||
35 type == REGOP(TYPE_GR_CTX_CROP) ||
36 type == REGOP(TYPE_GR_CTX_ZROP) ||
37 type == REGOP(TYPE_GR_CTX_QUAD);
38}
39static inline bool reg_op_is_read(u8 op)
40{
41 return op == REGOP(READ_32) ||
42 op == REGOP(READ_64) ;
43}
44
45bool is_bar0_global_offset_whitelisted_gk20a(u32 offset);
46
47#endif /* __REGOPS_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/sim_gk20a.h b/drivers/gpu/nvgpu/gk20a/sim_gk20a.h
new file mode 100644
index 00000000..5fc8006e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/sim_gk20a.h
@@ -0,0 +1,62 @@
1/*
2 * drivers/video/tegra/host/gk20a/sim_gk20a.h
3 *
4 * GK20A sim support
5 *
6 * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21#ifndef __SIM_GK20A_H__
22#define __SIM_GK20A_H__
23
24
25struct gk20a;
26struct sim_gk20a {
27 struct gk20a *g;
28 struct resource *reg_mem;
29 void __iomem *regs;
30 struct {
31 struct page *page;
32 void *kvaddr;
33 phys_addr_t phys;
34 } send_bfr, recv_bfr, msg_bfr;
35 u32 send_ring_put;
36 u32 recv_ring_get;
37 u32 recv_ring_put;
38 u32 sequence_base;
39 void (*remove_support)(struct sim_gk20a *);
40};
41
42
43int gk20a_sim_esc_read(struct gk20a *g, char *path, u32 index,
44 u32 count, u32 *data);
45
46static inline int gk20a_sim_esc_read_no_sim(struct gk20a *g, char *p,
47 u32 i, u32 c, u32 *d)
48{
49 *d = ~(u32)0;
50 return -1;
51}
52
53static inline int gk20a_sim_esc_readl(struct gk20a *g, char * p, u32 i, u32 *d)
54{
55 if (tegra_cpu_is_asim())
56 return gk20a_sim_esc_read(g, p, i, sizeof(u32), d);
57
58 return gk20a_sim_esc_read_no_sim(g, p, i, sizeof(u32), d);
59}
60
61
62#endif /*__SIM_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/therm_gk20a.c b/drivers/gpu/nvgpu/gk20a/therm_gk20a.c
new file mode 100644
index 00000000..da911979
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/therm_gk20a.c
@@ -0,0 +1,142 @@
1/*
2 * drivers/video/tegra/host/gk20a/therm_gk20a.c
3 *
4 * GK20A Therm
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#include "gk20a.h"
23#include "hw_chiplet_pwr_gk20a.h"
24#include "hw_gr_gk20a.h"
25#include "hw_therm_gk20a.h"
26
27static int gk20a_init_therm_reset_enable_hw(struct gk20a *g)
28{
29 return 0;
30}
31
32static int gk20a_init_therm_setup_sw(struct gk20a *g)
33{
34 return 0;
35}
36
37static int gk20a_init_therm_setup_hw(struct gk20a *g)
38{
39 /* program NV_THERM registers */
40 gk20a_writel(g, therm_use_a_r(), NV_THERM_USE_A_INIT);
41 gk20a_writel(g, therm_evt_ext_therm_0_r(),
42 NV_THERM_EVT_EXT_THERM_0_INIT);
43 gk20a_writel(g, therm_evt_ext_therm_1_r(),
44 NV_THERM_EVT_EXT_THERM_1_INIT);
45 gk20a_writel(g, therm_evt_ext_therm_2_r(),
46 NV_THERM_EVT_EXT_THERM_2_INIT);
47
48/*
49 u32 data;
50
51 data = gk20a_readl(g, gr_gpcs_tpcs_l1c_cfg_r());
52 data = set_field(data, gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_m(),
53 gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_enable_f());
54 gk20a_writel(g, gr_gpcs_tpcs_l1c_cfg_r(), data);
55
56 data = gk20a_readl(g, gr_gpcs_tpcs_l1c_pm_r());
57 data = set_field(data, gr_gpcs_tpcs_l1c_pm_enable_m(),
58 gr_gpcs_tpcs_l1c_pm_enable_enable_f());
59 gk20a_writel(g, gr_gpcs_tpcs_l1c_pm_r(), data);
60
61 data = gk20a_readl(g, gr_gpcs_tpcs_sm_pm_ctrl_r());
62 data = set_field(data, gr_gpcs_tpcs_sm_pm_ctrl_core_enable_m(),
63 gr_gpcs_tpcs_sm_pm_ctrl_core_enable_enable_f());
64 data = set_field(data, gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_m(),
65 gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_enable_f());
66 gk20a_writel(g, gr_gpcs_tpcs_sm_pm_ctrl_r(), data);
67
68 data = gk20a_readl(g, gr_gpcs_tpcs_sm_halfctl_ctrl_r());
69 data = set_field(data, gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_m(),
70 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_enable_f());
71 gk20a_writel(g, gr_gpcs_tpcs_sm_halfctl_ctrl_r(), data);
72
73 data = gk20a_readl(g, gr_gpcs_tpcs_sm_debug_sfe_control_r());
74 data = set_field(data, gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_m(),
75 gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_enable_f());
76 gk20a_writel(g, gr_gpcs_tpcs_sm_debug_sfe_control_r(), data);
77
78 gk20a_writel(g, therm_peakpower_config6_r(0),
79 therm_peakpower_config6_trigger_cfg_1h_intr_f() |
80 therm_peakpower_config6_trigger_cfg_1l_intr_f());
81
82 gk20a_writel(g, chiplet_pwr_gpcs_config_1_r(),
83 chiplet_pwr_gpcs_config_1_ba_enable_yes_f());
84 gk20a_writel(g, chiplet_pwr_fbps_config_1_r(),
85 chiplet_pwr_fbps_config_1_ba_enable_yes_f());
86
87 data = gk20a_readl(g, therm_config1_r());
88 data = set_field(data, therm_config1_ba_enable_m(),
89 therm_config1_ba_enable_yes_f());
90 gk20a_writel(g, therm_config1_r(), data);
91
92 gk20a_writel(g, gr_gpcs_tpcs_sm_power_throttle_r(), 0x441a);
93
94 gk20a_writel(g, therm_weight_1_r(), 0xd3);
95 gk20a_writel(g, chiplet_pwr_gpcs_weight_6_r(), 0x7d);
96 gk20a_writel(g, chiplet_pwr_gpcs_weight_7_r(), 0xff);
97 gk20a_writel(g, chiplet_pwr_fbps_weight_0_r(), 0x13000000);
98 gk20a_writel(g, chiplet_pwr_fbps_weight_1_r(), 0x19);
99
100 gk20a_writel(g, therm_peakpower_config8_r(0), 0x8);
101 gk20a_writel(g, therm_peakpower_config9_r(0), 0x0);
102
103 gk20a_writel(g, therm_evt_ba_w0_t1h_r(), 0x100);
104
105 gk20a_writel(g, therm_use_a_r(), therm_use_a_ba_w0_t1h_yes_f());
106
107 gk20a_writel(g, therm_peakpower_config1_r(0),
108 therm_peakpower_config1_window_period_2m_f() |
109 therm_peakpower_config1_ba_sum_shift_20_f() |
110 therm_peakpower_config1_window_en_enabled_f());
111
112 gk20a_writel(g, therm_peakpower_config2_r(0),
113 therm_peakpower_config2_ba_threshold_1h_val_f(1) |
114 therm_peakpower_config2_ba_threshold_1h_en_enabled_f());
115
116 gk20a_writel(g, therm_peakpower_config4_r(0),
117 therm_peakpower_config4_ba_threshold_1l_val_f(1) |
118 therm_peakpower_config4_ba_threshold_1l_en_enabled_f());
119*/
120 return 0;
121}
122
123int gk20a_init_therm_support(struct gk20a *g)
124{
125 u32 err;
126
127 gk20a_dbg_fn("");
128
129 err = gk20a_init_therm_reset_enable_hw(g);
130 if (err)
131 return err;
132
133 err = gk20a_init_therm_setup_sw(g);
134 if (err)
135 return err;
136
137 err = gk20a_init_therm_setup_hw(g);
138 if (err)
139 return err;
140
141 return err;
142}
diff --git a/drivers/gpu/nvgpu/gk20a/therm_gk20a.h b/drivers/gpu/nvgpu/gk20a/therm_gk20a.h
new file mode 100644
index 00000000..3f67ee12
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/therm_gk20a.h
@@ -0,0 +1,33 @@
1/*
2 * drivers/video/tegra/host/gk20a/therm_gk20a.h
3 *
4 * GK20A Therm
5 *
6 * Copyright (c) 2011 - 2012, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21#ifndef _NVHOST_THERM_GK20A_H_
22#define _NVHOST_THERM_GK20A_H_
23
24/* priority for EXT_THERM_0 event set to highest */
25#define NV_THERM_EVT_EXT_THERM_0_INIT 0x3000100
26#define NV_THERM_EVT_EXT_THERM_1_INIT 0x2000200
27#define NV_THERM_EVT_EXT_THERM_2_INIT 0x1000300
28/* configures the thermal events that may cause clock slowdown */
29#define NV_THERM_USE_A_INIT 0x7
30
31int gk20a_init_therm_support(struct gk20a *g);
32
33#endif /* _NVHOST_THERM_GK20A_H_ */