diff options
82 files changed, 43318 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig new file mode 100644 index 00000000..160ec8be --- /dev/null +++ b/drivers/gpu/nvgpu/Kconfig | |||
@@ -0,0 +1,60 @@ | |||
1 | config GK20A | ||
2 | bool "Nvidia GK20A GPU support" | ||
3 | help | ||
4 | Choose this option if you have an SoC with integrated | ||
5 | Nvidia GPU IP. | ||
6 | |||
7 | config GK20A_DEFAULT_TIMEOUT | ||
8 | depends on GK20A | ||
9 | int "Default timeout for submits" | ||
10 | default 10000 | ||
11 | help | ||
12 | Default timeout for jobs in milliseconds. Set to zero for no timeout. | ||
13 | |||
14 | config GK20A_PMU | ||
15 | bool "Support GK20A PMU" | ||
16 | depends on GK20A | ||
17 | default n | ||
18 | help | ||
19 | Say Y here to enable GK20A PMU features. | ||
20 | |||
21 | choice | ||
22 | depends on GK20A | ||
23 | prompt "Enable GK20A frequency scaling" | ||
24 | default GK20A_PERFMON | ||
25 | optional | ||
26 | help | ||
27 | Select this entry to enable gk20a scaling | ||
28 | |||
29 | config GK20A_PERFMON | ||
30 | bool "Use Perfmon" | ||
31 | help | ||
32 | Select this to enable built-in perfmon scaling. | ||
33 | The built-in scaling option uses simplistic | ||
34 | scaling mechanism (if busy, increase frequency and | ||
35 | decrease frequency if idle). | ||
36 | |||
37 | config GK20A_DEVFREQ | ||
38 | bool "Use Devfreq" | ||
39 | help | ||
40 | Select this to use devfreq based scaling. | ||
41 | Devfreq is a common framework that allows using | ||
42 | variety of different governors and changing | ||
43 | between governors on the fly. By default, no | ||
44 | governor is selected. | ||
45 | |||
46 | endchoice | ||
47 | |||
48 | config GK20A_CYCLE_STATS | ||
49 | bool "Support GK20A GPU CYCLE STATS" | ||
50 | depends on GK20A | ||
51 | default y | ||
52 | help | ||
53 | Say Y here to enable the cycle stats debugging features. | ||
54 | |||
55 | config GK20A_PHYS_PAGE_TABLES | ||
56 | bool "Use physical addressing for gk20a page tables" | ||
57 | default y if TEGRA_SIMULATION_PLATFORM | ||
58 | help | ||
59 | Use physical addressing for gk20a page tables. If this is off, we | ||
60 | use SMMU translation. | ||
diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile new file mode 100644 index 00000000..f9b06b72 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/Makefile | |||
@@ -0,0 +1,36 @@ | |||
1 | |||
2 | GCOV_PROFILE := y | ||
3 | ccflags-y += -Idrivers/devfreq | ||
4 | ccflags-y += -Wno-multichar | ||
5 | ccflags-y += -Werror | ||
6 | |||
7 | obj-$(CONFIG_GK20A) += \ | ||
8 | gk20a.o \ | ||
9 | as_gk20a.o \ | ||
10 | ctrl_gk20a.o \ | ||
11 | fifo_gk20a.o \ | ||
12 | channel_gk20a.o \ | ||
13 | channel_sync_gk20a.o \ | ||
14 | debug_gk20a.o \ | ||
15 | dbg_gpu_gk20a.o \ | ||
16 | regops_gk20a.o \ | ||
17 | gr_gk20a.o \ | ||
18 | kind_gk20a.o \ | ||
19 | mm_gk20a.o \ | ||
20 | pmu_gk20a.o \ | ||
21 | priv_ring_gk20a.o \ | ||
22 | clk_gk20a.o \ | ||
23 | therm_gk20a.o \ | ||
24 | gr_ctx_gk20a_sim.o \ | ||
25 | gr_ctx_gk20a.o \ | ||
26 | gk20a_gating_reglist.o \ | ||
27 | gk20a_scale.o \ | ||
28 | gk20a_sysfs.o \ | ||
29 | ltc_gk20a.o \ | ||
30 | fb_gk20a.o \ | ||
31 | hal.o \ | ||
32 | hal_gk20a.o \ | ||
33 | gk20a_allocator.o | ||
34 | |||
35 | obj-$(CONFIG_GK20A) += platform_gk20a_generic.o | ||
36 | obj-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o | ||
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c new file mode 100644 index 00000000..65c26938 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c | |||
@@ -0,0 +1,293 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/as_gk20a.c | ||
3 | * | ||
4 | * GK20A Address Spaces | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #include <linux/slab.h> | ||
19 | #include <linux/fs.h> | ||
20 | #include <linux/cdev.h> | ||
21 | #include <linux/uaccess.h> | ||
22 | |||
23 | #include <trace/events/gk20a.h> | ||
24 | |||
25 | #include "gk20a.h" | ||
26 | |||
27 | /* dumb allocator... */ | ||
28 | static int generate_as_share_id(struct gk20a_as *as) | ||
29 | { | ||
30 | gk20a_dbg_fn(""); | ||
31 | return ++as->last_share_id; | ||
32 | } | ||
33 | /* still dumb */ | ||
34 | static void release_as_share_id(struct gk20a_as *as, int id) | ||
35 | { | ||
36 | gk20a_dbg_fn(""); | ||
37 | return; | ||
38 | } | ||
39 | |||
40 | static int gk20a_as_alloc_share(struct gk20a_as *as, | ||
41 | struct gk20a_as_share **out) | ||
42 | { | ||
43 | struct gk20a_as_share *as_share; | ||
44 | int err = 0; | ||
45 | |||
46 | gk20a_dbg_fn(""); | ||
47 | |||
48 | *out = 0; | ||
49 | as_share = kzalloc(sizeof(*as_share), GFP_KERNEL); | ||
50 | if (!as_share) | ||
51 | return -ENOMEM; | ||
52 | |||
53 | as_share->as = as; | ||
54 | as_share->id = generate_as_share_id(as_share->as); | ||
55 | as_share->ref_cnt.counter = 1; | ||
56 | |||
57 | /* this will set as_share->vm. */ | ||
58 | err = gk20a_vm_alloc_share(as_share); | ||
59 | if (err) | ||
60 | goto failed; | ||
61 | |||
62 | *out = as_share; | ||
63 | return 0; | ||
64 | |||
65 | failed: | ||
66 | kfree(as_share); | ||
67 | return err; | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * channels and the device nodes call this to release. | ||
72 | * once the ref_cnt hits zero the share is deleted. | ||
73 | */ | ||
74 | int gk20a_as_release_share(struct gk20a_as_share *as_share) | ||
75 | { | ||
76 | int err; | ||
77 | |||
78 | gk20a_dbg_fn(""); | ||
79 | |||
80 | if (atomic_dec_return(&as_share->ref_cnt) > 0) | ||
81 | return 0; | ||
82 | |||
83 | err = gk20a_vm_release_share(as_share); | ||
84 | release_as_share_id(as_share->as, as_share->id); | ||
85 | kfree(as_share); | ||
86 | return err; | ||
87 | } | ||
88 | |||
89 | static int gk20a_as_ioctl_bind_channel( | ||
90 | struct gk20a_as_share *as_share, | ||
91 | struct nvhost_as_bind_channel_args *args) | ||
92 | { | ||
93 | int err = 0; | ||
94 | struct channel_gk20a *ch; | ||
95 | |||
96 | gk20a_dbg_fn(""); | ||
97 | |||
98 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
99 | if (!ch || gk20a_channel_as_bound(ch)) | ||
100 | return -EINVAL; | ||
101 | |||
102 | atomic_inc(&as_share->ref_cnt); | ||
103 | |||
104 | /* this will set channel_gk20a->vm */ | ||
105 | err = gk20a_vm_bind_channel(as_share, ch); | ||
106 | if (err) { | ||
107 | atomic_dec(&as_share->ref_cnt); | ||
108 | return err; | ||
109 | } | ||
110 | |||
111 | return err; | ||
112 | } | ||
113 | |||
114 | static int gk20a_as_ioctl_alloc_space( | ||
115 | struct gk20a_as_share *as_share, | ||
116 | struct nvhost_as_alloc_space_args *args) | ||
117 | { | ||
118 | gk20a_dbg_fn(""); | ||
119 | return gk20a_vm_alloc_space(as_share, args); | ||
120 | } | ||
121 | |||
122 | static int gk20a_as_ioctl_free_space( | ||
123 | struct gk20a_as_share *as_share, | ||
124 | struct nvhost_as_free_space_args *args) | ||
125 | { | ||
126 | gk20a_dbg_fn(""); | ||
127 | return gk20a_vm_free_space(as_share, args); | ||
128 | } | ||
129 | |||
130 | static int gk20a_as_ioctl_map_buffer_ex( | ||
131 | struct gk20a_as_share *as_share, | ||
132 | struct nvhost_as_map_buffer_ex_args *args) | ||
133 | { | ||
134 | int i; | ||
135 | |||
136 | gk20a_dbg_fn(""); | ||
137 | |||
138 | /* ensure that padding is not set. this is required for ensuring that | ||
139 | * we can safely use these fields later */ | ||
140 | for (i = 0; i < ARRAY_SIZE(args->padding); i++) | ||
141 | if (args->padding[i]) | ||
142 | return -EINVAL; | ||
143 | |||
144 | return gk20a_vm_map_buffer(as_share, args->dmabuf_fd, | ||
145 | &args->offset, args->flags, | ||
146 | args->kind); | ||
147 | } | ||
148 | |||
149 | static int gk20a_as_ioctl_map_buffer( | ||
150 | struct gk20a_as_share *as_share, | ||
151 | struct nvhost_as_map_buffer_args *args) | ||
152 | { | ||
153 | gk20a_dbg_fn(""); | ||
154 | return gk20a_vm_map_buffer(as_share, args->nvmap_handle, | ||
155 | &args->o_a.align, | ||
156 | args->flags, NV_KIND_DEFAULT); | ||
157 | /* args->o_a.offset will be set if !err */ | ||
158 | } | ||
159 | |||
160 | static int gk20a_as_ioctl_unmap_buffer( | ||
161 | struct gk20a_as_share *as_share, | ||
162 | struct nvhost_as_unmap_buffer_args *args) | ||
163 | { | ||
164 | gk20a_dbg_fn(""); | ||
165 | return gk20a_vm_unmap_buffer(as_share, args->offset); | ||
166 | } | ||
167 | |||
168 | int gk20a_as_dev_open(struct inode *inode, struct file *filp) | ||
169 | { | ||
170 | struct gk20a_as_share *as_share; | ||
171 | struct gk20a *g; | ||
172 | int err; | ||
173 | |||
174 | gk20a_dbg_fn(""); | ||
175 | |||
176 | g = container_of(inode->i_cdev, struct gk20a, as.cdev); | ||
177 | |||
178 | err = gk20a_get_client(g); | ||
179 | if (err) { | ||
180 | gk20a_dbg_fn("fail to get channel!"); | ||
181 | return err; | ||
182 | } | ||
183 | |||
184 | err = gk20a_as_alloc_share(&g->as, &as_share); | ||
185 | if (err) { | ||
186 | gk20a_dbg_fn("failed to alloc share"); | ||
187 | gk20a_put_client(g); | ||
188 | return err; | ||
189 | } | ||
190 | |||
191 | filp->private_data = as_share; | ||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | int gk20a_as_dev_release(struct inode *inode, struct file *filp) | ||
196 | { | ||
197 | struct gk20a_as_share *as_share = filp->private_data; | ||
198 | int ret; | ||
199 | struct gk20a *g = gk20a_from_as(as_share->as); | ||
200 | |||
201 | gk20a_dbg_fn(""); | ||
202 | |||
203 | ret = gk20a_as_release_share(as_share); | ||
204 | |||
205 | gk20a_put_client(g); | ||
206 | |||
207 | return ret; | ||
208 | } | ||
209 | |||
210 | long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
211 | { | ||
212 | int err = 0; | ||
213 | struct gk20a_as_share *as_share = filp->private_data; | ||
214 | struct gk20a *g = gk20a_from_as(as_share->as); | ||
215 | |||
216 | u8 buf[NVHOST_AS_IOCTL_MAX_ARG_SIZE]; | ||
217 | |||
218 | if ((_IOC_TYPE(cmd) != NVHOST_AS_IOCTL_MAGIC) || | ||
219 | (_IOC_NR(cmd) == 0) || | ||
220 | (_IOC_NR(cmd) > NVHOST_AS_IOCTL_LAST)) | ||
221 | return -EFAULT; | ||
222 | |||
223 | BUG_ON(_IOC_SIZE(cmd) > NVHOST_AS_IOCTL_MAX_ARG_SIZE); | ||
224 | |||
225 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
226 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
227 | return -EFAULT; | ||
228 | } | ||
229 | |||
230 | err = gk20a_channel_busy(g->dev); | ||
231 | if (err) | ||
232 | return err; | ||
233 | |||
234 | switch (cmd) { | ||
235 | case NVHOST_AS_IOCTL_BIND_CHANNEL: | ||
236 | trace_gk20a_as_ioctl_bind_channel(dev_name(dev_from_gk20a(g))); | ||
237 | err = gk20a_as_ioctl_bind_channel(as_share, | ||
238 | (struct nvhost_as_bind_channel_args *)buf); | ||
239 | |||
240 | break; | ||
241 | case NVHOST32_AS_IOCTL_ALLOC_SPACE: | ||
242 | { | ||
243 | struct nvhost32_as_alloc_space_args *args32 = | ||
244 | (struct nvhost32_as_alloc_space_args *)buf; | ||
245 | struct nvhost_as_alloc_space_args args; | ||
246 | |||
247 | args.pages = args32->pages; | ||
248 | args.page_size = args32->page_size; | ||
249 | args.flags = args32->flags; | ||
250 | args.o_a.offset = args32->o_a.offset; | ||
251 | trace_gk20a_as_ioctl_alloc_space(dev_name(dev_from_gk20a(g))); | ||
252 | err = gk20a_as_ioctl_alloc_space(as_share, &args); | ||
253 | args32->o_a.offset = args.o_a.offset; | ||
254 | break; | ||
255 | } | ||
256 | case NVHOST_AS_IOCTL_ALLOC_SPACE: | ||
257 | trace_gk20a_as_ioctl_alloc_space(dev_name(dev_from_gk20a(g))); | ||
258 | err = gk20a_as_ioctl_alloc_space(as_share, | ||
259 | (struct nvhost_as_alloc_space_args *)buf); | ||
260 | break; | ||
261 | case NVHOST_AS_IOCTL_FREE_SPACE: | ||
262 | trace_gk20a_as_ioctl_free_space(dev_name(dev_from_gk20a(g))); | ||
263 | err = gk20a_as_ioctl_free_space(as_share, | ||
264 | (struct nvhost_as_free_space_args *)buf); | ||
265 | break; | ||
266 | case NVHOST_AS_IOCTL_MAP_BUFFER: | ||
267 | trace_gk20a_as_ioctl_map_buffer(dev_name(dev_from_gk20a(g))); | ||
268 | err = gk20a_as_ioctl_map_buffer(as_share, | ||
269 | (struct nvhost_as_map_buffer_args *)buf); | ||
270 | break; | ||
271 | case NVHOST_AS_IOCTL_MAP_BUFFER_EX: | ||
272 | trace_gk20a_as_ioctl_map_buffer(dev_name(dev_from_gk20a(g))); | ||
273 | err = gk20a_as_ioctl_map_buffer_ex(as_share, | ||
274 | (struct nvhost_as_map_buffer_ex_args *)buf); | ||
275 | break; | ||
276 | case NVHOST_AS_IOCTL_UNMAP_BUFFER: | ||
277 | trace_gk20a_as_ioctl_unmap_buffer(dev_name(dev_from_gk20a(g))); | ||
278 | err = gk20a_as_ioctl_unmap_buffer(as_share, | ||
279 | (struct nvhost_as_unmap_buffer_args *)buf); | ||
280 | break; | ||
281 | default: | ||
282 | dev_err(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd); | ||
283 | err = -ENOTTY; | ||
284 | break; | ||
285 | } | ||
286 | |||
287 | gk20a_channel_idle(g->dev); | ||
288 | |||
289 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
290 | err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); | ||
291 | |||
292 | return err; | ||
293 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.h b/drivers/gpu/nvgpu/gk20a/as_gk20a.h new file mode 100644 index 00000000..be0e9707 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.h | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/as_gk20a.h | ||
3 | * | ||
4 | * GK20A Address Space | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | #ifndef __GK20A_AS_H | ||
18 | #define __GK20A_AS_H | ||
19 | |||
20 | #include <linux/atomic.h> | ||
21 | #include <linux/cdev.h> | ||
22 | #include <linux/fs.h> | ||
23 | |||
24 | #include <linux/nvhost_as_ioctl.h> | ||
25 | |||
26 | struct gk20a_as; | ||
27 | struct gk20a_as_share; | ||
28 | struct vm_gk20a; | ||
29 | |||
30 | struct gk20a_as_share { | ||
31 | struct gk20a_as *as; | ||
32 | atomic_t ref_cnt; | ||
33 | int id; | ||
34 | struct vm_gk20a *vm; | ||
35 | }; | ||
36 | |||
37 | struct gk20a_as { | ||
38 | int last_share_id; /* dummy allocator for now */ | ||
39 | struct cdev cdev; | ||
40 | struct device *node; | ||
41 | }; | ||
42 | |||
43 | int gk20a_as_release_share(struct gk20a_as_share *as_share); | ||
44 | |||
45 | /* struct file_operations driver interface */ | ||
46 | int gk20a_as_dev_open(struct inode *inode, struct file *filp); | ||
47 | int gk20a_as_dev_release(struct inode *inode, struct file *filp); | ||
48 | long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); | ||
49 | |||
50 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c new file mode 100644 index 00000000..6056f558 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -0,0 +1,2111 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/channel_gk20a.c | ||
3 | * | ||
4 | * GK20A Graphics channel | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | |||
22 | #include <linux/nvhost.h> | ||
23 | #include <linux/list.h> | ||
24 | #include <linux/delay.h> | ||
25 | #include <linux/highmem.h> /* need for nvmap.h*/ | ||
26 | #include <trace/events/gk20a.h> | ||
27 | #include <linux/scatterlist.h> | ||
28 | #include <linux/file.h> | ||
29 | #include <linux/anon_inodes.h> | ||
30 | #include <linux/dma-buf.h> | ||
31 | |||
32 | #include "debug_gk20a.h" | ||
33 | |||
34 | #include "gk20a.h" | ||
35 | #include "dbg_gpu_gk20a.h" | ||
36 | |||
37 | #include "hw_ram_gk20a.h" | ||
38 | #include "hw_fifo_gk20a.h" | ||
39 | #include "hw_pbdma_gk20a.h" | ||
40 | #include "hw_ccsr_gk20a.h" | ||
41 | #include "hw_ltc_gk20a.h" | ||
42 | |||
43 | #define NVMAP_HANDLE_PARAM_SIZE 1 | ||
44 | |||
45 | static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f); | ||
46 | static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c); | ||
47 | |||
48 | static void free_priv_cmdbuf(struct channel_gk20a *c, | ||
49 | struct priv_cmd_entry *e); | ||
50 | static void recycle_priv_cmdbuf(struct channel_gk20a *c); | ||
51 | |||
52 | static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c); | ||
53 | static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); | ||
54 | |||
55 | static int channel_gk20a_commit_userd(struct channel_gk20a *c); | ||
56 | static int channel_gk20a_setup_userd(struct channel_gk20a *c); | ||
57 | static int channel_gk20a_setup_ramfc(struct channel_gk20a *c, | ||
58 | u64 gpfifo_base, u32 gpfifo_entries); | ||
59 | |||
60 | static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a); | ||
61 | static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a); | ||
62 | |||
63 | static int channel_gk20a_alloc_inst(struct gk20a *g, | ||
64 | struct channel_gk20a *ch); | ||
65 | static void channel_gk20a_free_inst(struct gk20a *g, | ||
66 | struct channel_gk20a *ch); | ||
67 | |||
68 | static int channel_gk20a_update_runlist(struct channel_gk20a *c, | ||
69 | bool add); | ||
70 | static void gk20a_free_error_notifiers(struct channel_gk20a *ch); | ||
71 | |||
72 | static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f) | ||
73 | { | ||
74 | struct channel_gk20a *ch = NULL; | ||
75 | int chid; | ||
76 | |||
77 | mutex_lock(&f->ch_inuse_mutex); | ||
78 | for (chid = 0; chid < f->num_channels; chid++) { | ||
79 | if (!f->channel[chid].in_use) { | ||
80 | f->channel[chid].in_use = true; | ||
81 | ch = &f->channel[chid]; | ||
82 | break; | ||
83 | } | ||
84 | } | ||
85 | mutex_unlock(&f->ch_inuse_mutex); | ||
86 | |||
87 | return ch; | ||
88 | } | ||
89 | |||
90 | static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c) | ||
91 | { | ||
92 | mutex_lock(&f->ch_inuse_mutex); | ||
93 | f->channel[c->hw_chid].in_use = false; | ||
94 | mutex_unlock(&f->ch_inuse_mutex); | ||
95 | } | ||
96 | |||
97 | int channel_gk20a_commit_va(struct channel_gk20a *c) | ||
98 | { | ||
99 | u64 addr; | ||
100 | u32 addr_lo; | ||
101 | u32 addr_hi; | ||
102 | void *inst_ptr; | ||
103 | |||
104 | gk20a_dbg_fn(""); | ||
105 | |||
106 | inst_ptr = c->inst_block.cpuva; | ||
107 | if (!inst_ptr) | ||
108 | return -ENOMEM; | ||
109 | |||
110 | addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl); | ||
111 | addr_lo = u64_lo32(addr >> 12); | ||
112 | addr_hi = u64_hi32(addr); | ||
113 | |||
114 | gk20a_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x", | ||
115 | (u64)addr, addr_lo, addr_hi); | ||
116 | |||
117 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | ||
118 | ram_in_page_dir_base_target_vid_mem_f() | | ||
119 | ram_in_page_dir_base_vol_true_f() | | ||
120 | ram_in_page_dir_base_lo_f(addr_lo)); | ||
121 | |||
122 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | ||
123 | ram_in_page_dir_base_hi_f(addr_hi)); | ||
124 | |||
125 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), | ||
126 | u64_lo32(c->vm->va_limit) | 0xFFF); | ||
127 | |||
128 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), | ||
129 | ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit))); | ||
130 | |||
131 | gk20a_mm_l2_invalidate(c->g); | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static int channel_gk20a_commit_userd(struct channel_gk20a *c) | ||
137 | { | ||
138 | u32 addr_lo; | ||
139 | u32 addr_hi; | ||
140 | void *inst_ptr; | ||
141 | |||
142 | gk20a_dbg_fn(""); | ||
143 | |||
144 | inst_ptr = c->inst_block.cpuva; | ||
145 | if (!inst_ptr) | ||
146 | return -ENOMEM; | ||
147 | |||
148 | addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); | ||
149 | addr_hi = u64_hi32(c->userd_iova); | ||
150 | |||
151 | gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", | ||
152 | c->hw_chid, (u64)c->userd_iova); | ||
153 | |||
154 | gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(), | ||
155 | pbdma_userd_target_vid_mem_f() | | ||
156 | pbdma_userd_addr_f(addr_lo)); | ||
157 | |||
158 | gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(), | ||
159 | pbdma_userd_target_vid_mem_f() | | ||
160 | pbdma_userd_hi_addr_f(addr_hi)); | ||
161 | |||
162 | gk20a_mm_l2_invalidate(c->g); | ||
163 | |||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, | ||
168 | u32 timeslice_timeout) | ||
169 | { | ||
170 | void *inst_ptr; | ||
171 | int shift = 3; | ||
172 | int value = timeslice_timeout; | ||
173 | |||
174 | inst_ptr = c->inst_block.cpuva; | ||
175 | if (!inst_ptr) | ||
176 | return -ENOMEM; | ||
177 | |||
178 | /* disable channel */ | ||
179 | gk20a_writel(c->g, ccsr_channel_r(c->hw_chid), | ||
180 | gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | | ||
181 | ccsr_channel_enable_clr_true_f()); | ||
182 | |||
183 | /* preempt the channel */ | ||
184 | WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid)); | ||
185 | |||
186 | /* flush GPU cache */ | ||
187 | gk20a_mm_l2_flush(c->g, true); | ||
188 | |||
189 | /* value field is 8 bits long */ | ||
190 | while (value >= 1 << 8) { | ||
191 | value >>= 1; | ||
192 | shift++; | ||
193 | } | ||
194 | |||
195 | /* time slice register is only 18bits long */ | ||
196 | if ((value << shift) >= 1<<19) { | ||
197 | pr_err("Requested timeslice value is clamped to 18 bits\n"); | ||
198 | value = 255; | ||
199 | shift = 10; | ||
200 | } | ||
201 | |||
202 | /* set new timeslice */ | ||
203 | gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(), | ||
204 | value | (shift << 12) | | ||
205 | fifo_eng_timeslice_enable_true_f()); | ||
206 | |||
207 | /* enable channel */ | ||
208 | gk20a_writel(c->g, ccsr_channel_r(c->hw_chid), | ||
209 | gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | | ||
210 | ccsr_channel_enable_set_true_f()); | ||
211 | |||
212 | gk20a_mm_l2_invalidate(c->g); | ||
213 | |||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | static int channel_gk20a_setup_ramfc(struct channel_gk20a *c, | ||
218 | u64 gpfifo_base, u32 gpfifo_entries) | ||
219 | { | ||
220 | void *inst_ptr; | ||
221 | |||
222 | gk20a_dbg_fn(""); | ||
223 | |||
224 | inst_ptr = c->inst_block.cpuva; | ||
225 | if (!inst_ptr) | ||
226 | return -ENOMEM; | ||
227 | |||
228 | memset(inst_ptr, 0, ram_fc_size_val_v()); | ||
229 | |||
230 | gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(), | ||
231 | pbdma_gp_base_offset_f( | ||
232 | u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); | ||
233 | |||
234 | gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(), | ||
235 | pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | | ||
236 | pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); | ||
237 | |||
238 | gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(), | ||
239 | pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f()); | ||
240 | |||
241 | gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(), | ||
242 | pbdma_formats_gp_fermi0_f() | | ||
243 | pbdma_formats_pb_fermi1_f() | | ||
244 | pbdma_formats_mp_fermi0_f()); | ||
245 | |||
246 | gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(), | ||
247 | pbdma_pb_header_priv_user_f() | | ||
248 | pbdma_pb_header_method_zero_f() | | ||
249 | pbdma_pb_header_subchannel_zero_f() | | ||
250 | pbdma_pb_header_level_main_f() | | ||
251 | pbdma_pb_header_first_true_f() | | ||
252 | pbdma_pb_header_type_inc_f()); | ||
253 | |||
254 | gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(), | ||
255 | pbdma_subdevice_id_f(1) | | ||
256 | pbdma_subdevice_status_active_f() | | ||
257 | pbdma_subdevice_channel_dma_enable_f()); | ||
258 | |||
259 | gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f()); | ||
260 | |||
261 | gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(), | ||
262 | pbdma_acquire_retry_man_2_f() | | ||
263 | pbdma_acquire_retry_exp_2_f() | | ||
264 | pbdma_acquire_timeout_exp_max_f() | | ||
265 | pbdma_acquire_timeout_man_max_f() | | ||
266 | pbdma_acquire_timeout_en_disable_f()); | ||
267 | |||
268 | gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(), | ||
269 | fifo_eng_timeslice_timeout_128_f() | | ||
270 | fifo_eng_timeslice_timescale_3_f() | | ||
271 | fifo_eng_timeslice_enable_true_f()); | ||
272 | |||
273 | gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(), | ||
274 | fifo_pb_timeslice_timeout_16_f() | | ||
275 | fifo_pb_timeslice_timescale_0_f() | | ||
276 | fifo_pb_timeslice_enable_true_f()); | ||
277 | |||
278 | gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); | ||
279 | |||
280 | /* TBD: alwasy priv mode? */ | ||
281 | gk20a_mem_wr32(inst_ptr, ram_fc_hce_ctrl_w(), | ||
282 | pbdma_hce_ctrl_hce_priv_mode_yes_f()); | ||
283 | |||
284 | gk20a_mm_l2_invalidate(c->g); | ||
285 | |||
286 | return 0; | ||
287 | } | ||
288 | |||
289 | static int channel_gk20a_setup_userd(struct channel_gk20a *c) | ||
290 | { | ||
291 | BUG_ON(!c->userd_cpu_va); | ||
292 | |||
293 | gk20a_dbg_fn(""); | ||
294 | |||
295 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0); | ||
296 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0); | ||
297 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0); | ||
298 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0); | ||
299 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0); | ||
300 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0); | ||
301 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0); | ||
302 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0); | ||
303 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0); | ||
304 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0); | ||
305 | |||
306 | gk20a_mm_l2_invalidate(c->g); | ||
307 | |||
308 | return 0; | ||
309 | } | ||
310 | |||
311 | static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a) | ||
312 | { | ||
313 | struct gk20a *g = ch_gk20a->g; | ||
314 | struct fifo_gk20a *f = &g->fifo; | ||
315 | struct fifo_engine_info_gk20a *engine_info = | ||
316 | f->engine_info + ENGINE_GR_GK20A; | ||
317 | |||
318 | u32 inst_ptr = ch_gk20a->inst_block.cpu_pa | ||
319 | >> ram_in_base_shift_v(); | ||
320 | |||
321 | gk20a_dbg_info("bind channel %d inst ptr 0x%08x", | ||
322 | ch_gk20a->hw_chid, inst_ptr); | ||
323 | |||
324 | ch_gk20a->bound = true; | ||
325 | |||
326 | gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid), | ||
327 | (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) & | ||
328 | ~ccsr_channel_runlist_f(~0)) | | ||
329 | ccsr_channel_runlist_f(engine_info->runlist_id)); | ||
330 | |||
331 | gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid), | ||
332 | ccsr_channel_inst_ptr_f(inst_ptr) | | ||
333 | ccsr_channel_inst_target_vid_mem_f() | | ||
334 | ccsr_channel_inst_bind_true_f()); | ||
335 | |||
336 | gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid), | ||
337 | (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) & | ||
338 | ~ccsr_channel_enable_set_f(~0)) | | ||
339 | ccsr_channel_enable_set_true_f()); | ||
340 | } | ||
341 | |||
342 | static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a) | ||
343 | { | ||
344 | struct gk20a *g = ch_gk20a->g; | ||
345 | |||
346 | gk20a_dbg_fn(""); | ||
347 | |||
348 | if (ch_gk20a->bound) | ||
349 | gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid), | ||
350 | ccsr_channel_inst_ptr_f(0) | | ||
351 | ccsr_channel_inst_bind_false_f()); | ||
352 | |||
353 | ch_gk20a->bound = false; | ||
354 | } | ||
355 | |||
356 | static int channel_gk20a_alloc_inst(struct gk20a *g, | ||
357 | struct channel_gk20a *ch) | ||
358 | { | ||
359 | struct device *d = dev_from_gk20a(g); | ||
360 | int err = 0; | ||
361 | dma_addr_t iova; | ||
362 | |||
363 | gk20a_dbg_fn(""); | ||
364 | |||
365 | ch->inst_block.size = ram_in_alloc_size_v(); | ||
366 | ch->inst_block.cpuva = dma_alloc_coherent(d, | ||
367 | ch->inst_block.size, | ||
368 | &iova, | ||
369 | GFP_KERNEL); | ||
370 | if (!ch->inst_block.cpuva) { | ||
371 | gk20a_err(d, "%s: memory allocation failed\n", __func__); | ||
372 | err = -ENOMEM; | ||
373 | goto clean_up; | ||
374 | } | ||
375 | |||
376 | ch->inst_block.iova = iova; | ||
377 | ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d, | ||
378 | ch->inst_block.iova); | ||
379 | if (!ch->inst_block.cpu_pa) { | ||
380 | gk20a_err(d, "%s: failed to get physical address\n", __func__); | ||
381 | err = -ENOMEM; | ||
382 | goto clean_up; | ||
383 | } | ||
384 | |||
385 | gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx", | ||
386 | ch->hw_chid, (u64)ch->inst_block.cpu_pa); | ||
387 | |||
388 | gk20a_dbg_fn("done"); | ||
389 | return 0; | ||
390 | |||
391 | clean_up: | ||
392 | gk20a_err(d, "fail"); | ||
393 | channel_gk20a_free_inst(g, ch); | ||
394 | return err; | ||
395 | } | ||
396 | |||
397 | static void channel_gk20a_free_inst(struct gk20a *g, | ||
398 | struct channel_gk20a *ch) | ||
399 | { | ||
400 | struct device *d = dev_from_gk20a(g); | ||
401 | |||
402 | if (ch->inst_block.cpuva) | ||
403 | dma_free_coherent(d, ch->inst_block.size, | ||
404 | ch->inst_block.cpuva, ch->inst_block.iova); | ||
405 | ch->inst_block.cpuva = NULL; | ||
406 | ch->inst_block.iova = 0; | ||
407 | memset(&ch->inst_block, 0, sizeof(struct inst_desc)); | ||
408 | } | ||
409 | |||
410 | static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add) | ||
411 | { | ||
412 | return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true); | ||
413 | } | ||
414 | |||
415 | void gk20a_disable_channel_no_update(struct channel_gk20a *ch) | ||
416 | { | ||
417 | /* ensure no fences are pending */ | ||
418 | if (ch->sync) | ||
419 | ch->sync->set_min_eq_max(ch->sync); | ||
420 | |||
421 | /* disable channel */ | ||
422 | gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), | ||
423 | gk20a_readl(ch->g, | ||
424 | ccsr_channel_r(ch->hw_chid)) | | ||
425 | ccsr_channel_enable_clr_true_f()); | ||
426 | } | ||
427 | |||
428 | static int gk20a_wait_channel_idle(struct channel_gk20a *ch) | ||
429 | { | ||
430 | bool channel_idle = false; | ||
431 | unsigned long end_jiffies = jiffies + | ||
432 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g)); | ||
433 | |||
434 | do { | ||
435 | mutex_lock(&ch->jobs_lock); | ||
436 | channel_idle = list_empty(&ch->jobs); | ||
437 | mutex_unlock(&ch->jobs_lock); | ||
438 | if (channel_idle) | ||
439 | break; | ||
440 | |||
441 | usleep_range(1000, 3000); | ||
442 | } while (time_before(jiffies, end_jiffies) | ||
443 | || !tegra_platform_is_silicon()); | ||
444 | |||
445 | if (!channel_idle) | ||
446 | gk20a_err(dev_from_gk20a(ch->g), "channel jobs not freed"); | ||
447 | |||
448 | return 0; | ||
449 | } | ||
450 | |||
451 | void gk20a_disable_channel(struct channel_gk20a *ch, | ||
452 | bool finish, | ||
453 | unsigned long finish_timeout) | ||
454 | { | ||
455 | if (finish) { | ||
456 | int err = gk20a_channel_finish(ch, finish_timeout); | ||
457 | WARN_ON(err); | ||
458 | } | ||
459 | |||
460 | /* disable the channel from hw and increment syncpoints */ | ||
461 | gk20a_disable_channel_no_update(ch); | ||
462 | |||
463 | gk20a_wait_channel_idle(ch); | ||
464 | |||
465 | /* preempt the channel */ | ||
466 | gk20a_fifo_preempt_channel(ch->g, ch->hw_chid); | ||
467 | |||
468 | /* remove channel from runlist */ | ||
469 | channel_gk20a_update_runlist(ch, false); | ||
470 | } | ||
471 | |||
472 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
473 | |||
474 | static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch) | ||
475 | { | ||
476 | /* disable existing cyclestats buffer */ | ||
477 | mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex); | ||
478 | if (ch->cyclestate.cyclestate_buffer_handler) { | ||
479 | dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler, | ||
480 | ch->cyclestate.cyclestate_buffer); | ||
481 | dma_buf_put(ch->cyclestate.cyclestate_buffer_handler); | ||
482 | ch->cyclestate.cyclestate_buffer_handler = NULL; | ||
483 | ch->cyclestate.cyclestate_buffer = NULL; | ||
484 | ch->cyclestate.cyclestate_buffer_size = 0; | ||
485 | } | ||
486 | mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex); | ||
487 | } | ||
488 | |||
489 | static int gk20a_channel_cycle_stats(struct channel_gk20a *ch, | ||
490 | struct nvhost_cycle_stats_args *args) | ||
491 | { | ||
492 | struct dma_buf *dmabuf; | ||
493 | void *virtual_address; | ||
494 | |||
495 | if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) { | ||
496 | |||
497 | /* set up new cyclestats buffer */ | ||
498 | dmabuf = dma_buf_get(args->nvmap_handle); | ||
499 | if (IS_ERR(dmabuf)) | ||
500 | return PTR_ERR(dmabuf); | ||
501 | virtual_address = dma_buf_vmap(dmabuf); | ||
502 | if (!virtual_address) | ||
503 | return -ENOMEM; | ||
504 | |||
505 | ch->cyclestate.cyclestate_buffer_handler = dmabuf; | ||
506 | ch->cyclestate.cyclestate_buffer = virtual_address; | ||
507 | ch->cyclestate.cyclestate_buffer_size = dmabuf->size; | ||
508 | return 0; | ||
509 | |||
510 | } else if (!args->nvmap_handle && | ||
511 | ch->cyclestate.cyclestate_buffer_handler) { | ||
512 | gk20a_free_cycle_stats_buffer(ch); | ||
513 | return 0; | ||
514 | |||
515 | } else if (!args->nvmap_handle && | ||
516 | !ch->cyclestate.cyclestate_buffer_handler) { | ||
517 | /* no requst from GL */ | ||
518 | return 0; | ||
519 | |||
520 | } else { | ||
521 | pr_err("channel already has cyclestats buffer\n"); | ||
522 | return -EINVAL; | ||
523 | } | ||
524 | } | ||
525 | #endif | ||
526 | |||
527 | static int gk20a_init_error_notifier(struct channel_gk20a *ch, | ||
528 | struct nvhost_set_error_notifier *args) { | ||
529 | void *va; | ||
530 | |||
531 | struct dma_buf *dmabuf; | ||
532 | |||
533 | if (!args->mem) { | ||
534 | pr_err("gk20a_init_error_notifier: invalid memory handle\n"); | ||
535 | return -EINVAL; | ||
536 | } | ||
537 | |||
538 | dmabuf = dma_buf_get(args->mem); | ||
539 | |||
540 | if (ch->error_notifier_ref) | ||
541 | gk20a_free_error_notifiers(ch); | ||
542 | |||
543 | if (IS_ERR(dmabuf)) { | ||
544 | pr_err("Invalid handle: %d\n", args->mem); | ||
545 | return -EINVAL; | ||
546 | } | ||
547 | /* map handle */ | ||
548 | va = dma_buf_vmap(dmabuf); | ||
549 | if (!va) { | ||
550 | dma_buf_put(dmabuf); | ||
551 | pr_err("Cannot map notifier handle\n"); | ||
552 | return -ENOMEM; | ||
553 | } | ||
554 | |||
555 | /* set channel notifiers pointer */ | ||
556 | ch->error_notifier_ref = dmabuf; | ||
557 | ch->error_notifier = va + args->offset; | ||
558 | ch->error_notifier_va = va; | ||
559 | memset(ch->error_notifier, 0, sizeof(struct nvhost_notification)); | ||
560 | return 0; | ||
561 | } | ||
562 | |||
563 | void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error) | ||
564 | { | ||
565 | if (ch->error_notifier_ref) { | ||
566 | struct timespec time_data; | ||
567 | u64 nsec; | ||
568 | getnstimeofday(&time_data); | ||
569 | nsec = ((u64)time_data.tv_sec) * 1000000000u + | ||
570 | (u64)time_data.tv_nsec; | ||
571 | ch->error_notifier->time_stamp.nanoseconds[0] = | ||
572 | (u32)nsec; | ||
573 | ch->error_notifier->time_stamp.nanoseconds[1] = | ||
574 | (u32)(nsec >> 32); | ||
575 | ch->error_notifier->info32 = error; | ||
576 | ch->error_notifier->status = 0xffff; | ||
577 | gk20a_err(dev_from_gk20a(ch->g), | ||
578 | "error notifier set to %d\n", error); | ||
579 | } | ||
580 | } | ||
581 | |||
582 | static void gk20a_free_error_notifiers(struct channel_gk20a *ch) | ||
583 | { | ||
584 | if (ch->error_notifier_ref) { | ||
585 | dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va); | ||
586 | dma_buf_put(ch->error_notifier_ref); | ||
587 | ch->error_notifier_ref = 0; | ||
588 | ch->error_notifier = 0; | ||
589 | ch->error_notifier_va = 0; | ||
590 | } | ||
591 | } | ||
592 | |||
593 | void gk20a_free_channel(struct channel_gk20a *ch, bool finish) | ||
594 | { | ||
595 | struct gk20a *g = ch->g; | ||
596 | struct device *d = dev_from_gk20a(g); | ||
597 | struct fifo_gk20a *f = &g->fifo; | ||
598 | struct gr_gk20a *gr = &g->gr; | ||
599 | struct vm_gk20a *ch_vm = ch->vm; | ||
600 | unsigned long timeout = gk20a_get_gr_idle_timeout(g); | ||
601 | struct dbg_session_gk20a *dbg_s; | ||
602 | |||
603 | gk20a_dbg_fn(""); | ||
604 | |||
605 | /* if engine reset was deferred, perform it now */ | ||
606 | mutex_lock(&f->deferred_reset_mutex); | ||
607 | if (g->fifo.deferred_reset_pending) { | ||
608 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" | ||
609 | " deferred, running now"); | ||
610 | fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines); | ||
611 | g->fifo.mmu_fault_engines = 0; | ||
612 | g->fifo.deferred_reset_pending = false; | ||
613 | } | ||
614 | mutex_unlock(&f->deferred_reset_mutex); | ||
615 | |||
616 | if (!ch->bound) | ||
617 | return; | ||
618 | |||
619 | if (!gk20a_channel_as_bound(ch)) | ||
620 | goto unbind; | ||
621 | |||
622 | gk20a_dbg_info("freeing bound channel context, timeout=%ld", | ||
623 | timeout); | ||
624 | |||
625 | gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout); | ||
626 | |||
627 | gk20a_free_error_notifiers(ch); | ||
628 | |||
629 | /* release channel ctx */ | ||
630 | gk20a_free_channel_ctx(ch); | ||
631 | |||
632 | gk20a_gr_flush_channel_tlb(gr); | ||
633 | |||
634 | memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); | ||
635 | |||
636 | /* free gpfifo */ | ||
637 | if (ch->gpfifo.gpu_va) | ||
638 | gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va, | ||
639 | ch->gpfifo.size, gk20a_mem_flag_none); | ||
640 | if (ch->gpfifo.cpu_va) | ||
641 | dma_free_coherent(d, ch->gpfifo.size, | ||
642 | ch->gpfifo.cpu_va, ch->gpfifo.iova); | ||
643 | ch->gpfifo.cpu_va = NULL; | ||
644 | ch->gpfifo.iova = 0; | ||
645 | |||
646 | gk20a_mm_l2_invalidate(ch->g); | ||
647 | |||
648 | memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); | ||
649 | |||
650 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
651 | gk20a_free_cycle_stats_buffer(ch); | ||
652 | #endif | ||
653 | |||
654 | channel_gk20a_free_priv_cmdbuf(ch); | ||
655 | |||
656 | if (ch->sync) { | ||
657 | ch->sync->destroy(ch->sync); | ||
658 | ch->sync = NULL; | ||
659 | } | ||
660 | |||
661 | /* release channel binding to the as_share */ | ||
662 | gk20a_as_release_share(ch_vm->as_share); | ||
663 | |||
664 | unbind: | ||
665 | channel_gk20a_unbind(ch); | ||
666 | channel_gk20a_free_inst(g, ch); | ||
667 | |||
668 | ch->vpr = false; | ||
669 | ch->vm = NULL; | ||
670 | WARN_ON(ch->sync); | ||
671 | |||
672 | /* unlink all debug sessions */ | ||
673 | mutex_lock(&ch->dbg_s_lock); | ||
674 | |||
675 | list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) { | ||
676 | dbg_s->ch = NULL; | ||
677 | list_del_init(&dbg_s->dbg_s_list_node); | ||
678 | } | ||
679 | |||
680 | mutex_unlock(&ch->dbg_s_lock); | ||
681 | |||
682 | /* ALWAYS last */ | ||
683 | release_used_channel(f, ch); | ||
684 | } | ||
685 | |||
686 | int gk20a_channel_release(struct inode *inode, struct file *filp) | ||
687 | { | ||
688 | struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data; | ||
689 | struct gk20a *g = ch->g; | ||
690 | |||
691 | trace_gk20a_channel_release(dev_name(&g->dev->dev)); | ||
692 | |||
693 | gk20a_channel_busy(ch->g->dev); | ||
694 | gk20a_free_channel(ch, true); | ||
695 | gk20a_channel_idle(ch->g->dev); | ||
696 | |||
697 | gk20a_put_client(g); | ||
698 | filp->private_data = NULL; | ||
699 | return 0; | ||
700 | } | ||
701 | |||
702 | static struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g) | ||
703 | { | ||
704 | struct fifo_gk20a *f = &g->fifo; | ||
705 | struct channel_gk20a *ch; | ||
706 | |||
707 | ch = acquire_unused_channel(f); | ||
708 | if (ch == NULL) { | ||
709 | /* TBD: we want to make this virtualizable */ | ||
710 | gk20a_err(dev_from_gk20a(g), "out of hw chids"); | ||
711 | return 0; | ||
712 | } | ||
713 | |||
714 | ch->g = g; | ||
715 | |||
716 | if (channel_gk20a_alloc_inst(g, ch)) { | ||
717 | ch->in_use = false; | ||
718 | gk20a_err(dev_from_gk20a(g), | ||
719 | "failed to open gk20a channel, out of inst mem"); | ||
720 | |||
721 | return 0; | ||
722 | } | ||
723 | g->ops.fifo.bind_channel(ch); | ||
724 | ch->pid = current->pid; | ||
725 | |||
726 | /* reset timeout counter and update timestamp */ | ||
727 | ch->timeout_accumulated_ms = 0; | ||
728 | ch->timeout_gpfifo_get = 0; | ||
729 | /* set gr host default timeout */ | ||
730 | ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g); | ||
731 | ch->timeout_debug_dump = true; | ||
732 | ch->has_timedout = false; | ||
733 | |||
734 | /* The channel is *not* runnable at this point. It still needs to have | ||
735 | * an address space bound and allocate a gpfifo and grctx. */ | ||
736 | |||
737 | init_waitqueue_head(&ch->notifier_wq); | ||
738 | init_waitqueue_head(&ch->semaphore_wq); | ||
739 | init_waitqueue_head(&ch->submit_wq); | ||
740 | |||
741 | return ch; | ||
742 | } | ||
743 | |||
744 | static int __gk20a_channel_open(struct gk20a *g, struct file *filp) | ||
745 | { | ||
746 | int err; | ||
747 | struct channel_gk20a *ch; | ||
748 | |||
749 | trace_gk20a_channel_open(dev_name(&g->dev->dev)); | ||
750 | |||
751 | err = gk20a_get_client(g); | ||
752 | if (err) { | ||
753 | gk20a_err(dev_from_gk20a(g), | ||
754 | "failed to get client ref"); | ||
755 | return err; | ||
756 | } | ||
757 | |||
758 | err = gk20a_channel_busy(g->dev); | ||
759 | if (err) { | ||
760 | gk20a_put_client(g); | ||
761 | gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err); | ||
762 | return err; | ||
763 | } | ||
764 | ch = gk20a_open_new_channel(g); | ||
765 | gk20a_channel_idle(g->dev); | ||
766 | if (!ch) { | ||
767 | gk20a_put_client(g); | ||
768 | gk20a_err(dev_from_gk20a(g), | ||
769 | "failed to get f"); | ||
770 | return -ENOMEM; | ||
771 | } | ||
772 | |||
773 | filp->private_data = ch; | ||
774 | return 0; | ||
775 | } | ||
776 | |||
777 | int gk20a_channel_open(struct inode *inode, struct file *filp) | ||
778 | { | ||
779 | struct gk20a *g = container_of(inode->i_cdev, | ||
780 | struct gk20a, channel.cdev); | ||
781 | return __gk20a_channel_open(g, filp); | ||
782 | } | ||
783 | |||
784 | /* allocate private cmd buffer. | ||
785 | used for inserting commands before/after user submitted buffers. */ | ||
786 | static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c) | ||
787 | { | ||
788 | struct device *d = dev_from_gk20a(c->g); | ||
789 | struct vm_gk20a *ch_vm = c->vm; | ||
790 | struct priv_cmd_queue *q = &c->priv_cmd_q; | ||
791 | struct priv_cmd_entry *e; | ||
792 | u32 i = 0, size; | ||
793 | int err = 0; | ||
794 | struct sg_table *sgt; | ||
795 | dma_addr_t iova; | ||
796 | |||
797 | /* Kernel can insert gpfifos before and after user gpfifos. | ||
798 | Before user gpfifos, kernel inserts fence_wait, which takes | ||
799 | syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords. | ||
800 | After user gpfifos, kernel inserts fence_get, which takes | ||
801 | wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords) | ||
802 | = 6 dwords. | ||
803 | Worse case if kernel adds both of them for every user gpfifo, | ||
804 | max size of priv_cmdbuf is : | ||
805 | (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */ | ||
806 | size = roundup_pow_of_two( | ||
807 | c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3); | ||
808 | |||
809 | q->mem.base_cpuva = dma_alloc_coherent(d, size, | ||
810 | &iova, | ||
811 | GFP_KERNEL); | ||
812 | if (!q->mem.base_cpuva) { | ||
813 | gk20a_err(d, "%s: memory allocation failed\n", __func__); | ||
814 | err = -ENOMEM; | ||
815 | goto clean_up; | ||
816 | } | ||
817 | |||
818 | q->mem.base_iova = iova; | ||
819 | q->mem.size = size; | ||
820 | |||
821 | err = gk20a_get_sgtable(d, &sgt, | ||
822 | q->mem.base_cpuva, q->mem.base_iova, size); | ||
823 | if (err) { | ||
824 | gk20a_err(d, "%s: failed to create sg table\n", __func__); | ||
825 | goto clean_up; | ||
826 | } | ||
827 | |||
828 | memset(q->mem.base_cpuva, 0, size); | ||
829 | |||
830 | q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt, | ||
831 | size, | ||
832 | 0, /* flags */ | ||
833 | gk20a_mem_flag_none); | ||
834 | if (!q->base_gpuva) { | ||
835 | gk20a_err(d, "ch %d : failed to map gpu va" | ||
836 | "for priv cmd buffer", c->hw_chid); | ||
837 | err = -ENOMEM; | ||
838 | goto clean_up_sgt; | ||
839 | } | ||
840 | |||
841 | q->size = q->mem.size / sizeof (u32); | ||
842 | |||
843 | INIT_LIST_HEAD(&q->head); | ||
844 | INIT_LIST_HEAD(&q->free); | ||
845 | |||
846 | /* pre-alloc 25% of priv cmdbuf entries and put them on free list */ | ||
847 | for (i = 0; i < q->size / 4; i++) { | ||
848 | e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); | ||
849 | if (!e) { | ||
850 | gk20a_err(d, "ch %d: fail to pre-alloc cmd entry", | ||
851 | c->hw_chid); | ||
852 | err = -ENOMEM; | ||
853 | goto clean_up_sgt; | ||
854 | } | ||
855 | e->pre_alloc = true; | ||
856 | list_add(&e->list, &q->free); | ||
857 | } | ||
858 | |||
859 | gk20a_free_sgtable(&sgt); | ||
860 | |||
861 | return 0; | ||
862 | |||
863 | clean_up_sgt: | ||
864 | gk20a_free_sgtable(&sgt); | ||
865 | clean_up: | ||
866 | channel_gk20a_free_priv_cmdbuf(c); | ||
867 | return err; | ||
868 | } | ||
869 | |||
870 | static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c) | ||
871 | { | ||
872 | struct device *d = dev_from_gk20a(c->g); | ||
873 | struct vm_gk20a *ch_vm = c->vm; | ||
874 | struct priv_cmd_queue *q = &c->priv_cmd_q; | ||
875 | struct priv_cmd_entry *e; | ||
876 | struct list_head *pos, *tmp, *head; | ||
877 | |||
878 | if (q->size == 0) | ||
879 | return; | ||
880 | |||
881 | if (q->base_gpuva) | ||
882 | gk20a_gmmu_unmap(ch_vm, q->base_gpuva, | ||
883 | q->mem.size, gk20a_mem_flag_none); | ||
884 | if (q->mem.base_cpuva) | ||
885 | dma_free_coherent(d, q->mem.size, | ||
886 | q->mem.base_cpuva, q->mem.base_iova); | ||
887 | q->mem.base_cpuva = NULL; | ||
888 | q->mem.base_iova = 0; | ||
889 | |||
890 | /* free used list */ | ||
891 | head = &q->head; | ||
892 | list_for_each_safe(pos, tmp, head) { | ||
893 | e = container_of(pos, struct priv_cmd_entry, list); | ||
894 | free_priv_cmdbuf(c, e); | ||
895 | } | ||
896 | |||
897 | /* free free list */ | ||
898 | head = &q->free; | ||
899 | list_for_each_safe(pos, tmp, head) { | ||
900 | e = container_of(pos, struct priv_cmd_entry, list); | ||
901 | e->pre_alloc = false; | ||
902 | free_priv_cmdbuf(c, e); | ||
903 | } | ||
904 | |||
905 | memset(q, 0, sizeof(struct priv_cmd_queue)); | ||
906 | } | ||
907 | |||
908 | /* allocate a cmd buffer with given size. size is number of u32 entries */ | ||
909 | int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | ||
910 | struct priv_cmd_entry **entry) | ||
911 | { | ||
912 | struct priv_cmd_queue *q = &c->priv_cmd_q; | ||
913 | struct priv_cmd_entry *e; | ||
914 | struct list_head *node; | ||
915 | u32 free_count; | ||
916 | u32 size = orig_size; | ||
917 | bool no_retry = false; | ||
918 | |||
919 | gk20a_dbg_fn("size %d", orig_size); | ||
920 | |||
921 | *entry = NULL; | ||
922 | |||
923 | /* if free space in the end is less than requested, increase the size | ||
924 | * to make the real allocated space start from beginning. */ | ||
925 | if (q->put + size > q->size) | ||
926 | size = orig_size + (q->size - q->put); | ||
927 | |||
928 | gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d", | ||
929 | c->hw_chid, q->get, q->put); | ||
930 | |||
931 | TRY_AGAIN: | ||
932 | free_count = (q->size - (q->put - q->get) - 1) % q->size; | ||
933 | |||
934 | if (size > free_count) { | ||
935 | if (!no_retry) { | ||
936 | recycle_priv_cmdbuf(c); | ||
937 | no_retry = true; | ||
938 | goto TRY_AGAIN; | ||
939 | } else | ||
940 | return -EAGAIN; | ||
941 | } | ||
942 | |||
943 | if (unlikely(list_empty(&q->free))) { | ||
944 | |||
945 | gk20a_dbg_info("ch %d: run out of pre-alloc entries", | ||
946 | c->hw_chid); | ||
947 | |||
948 | e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); | ||
949 | if (!e) { | ||
950 | gk20a_err(dev_from_gk20a(c->g), | ||
951 | "ch %d: fail to allocate priv cmd entry", | ||
952 | c->hw_chid); | ||
953 | return -ENOMEM; | ||
954 | } | ||
955 | } else { | ||
956 | node = q->free.next; | ||
957 | list_del(node); | ||
958 | e = container_of(node, struct priv_cmd_entry, list); | ||
959 | } | ||
960 | |||
961 | e->size = orig_size; | ||
962 | e->gp_get = c->gpfifo.get; | ||
963 | e->gp_put = c->gpfifo.put; | ||
964 | e->gp_wrap = c->gpfifo.wrap; | ||
965 | |||
966 | /* if we have increased size to skip free space in the end, set put | ||
967 | to beginning of cmd buffer (0) + size */ | ||
968 | if (size != orig_size) { | ||
969 | e->ptr = q->mem.base_cpuva; | ||
970 | e->gva = q->base_gpuva; | ||
971 | q->put = orig_size; | ||
972 | } else { | ||
973 | e->ptr = q->mem.base_cpuva + q->put; | ||
974 | e->gva = q->base_gpuva + q->put * sizeof(u32); | ||
975 | q->put = (q->put + orig_size) & (q->size - 1); | ||
976 | } | ||
977 | |||
978 | /* we already handled q->put + size > q->size so BUG_ON this */ | ||
979 | BUG_ON(q->put > q->size); | ||
980 | |||
981 | /* add new entry to head since we free from head */ | ||
982 | list_add(&e->list, &q->head); | ||
983 | |||
984 | *entry = e; | ||
985 | |||
986 | gk20a_dbg_fn("done"); | ||
987 | |||
988 | return 0; | ||
989 | } | ||
990 | |||
991 | /* Don't call this to free an explict cmd entry. | ||
992 | * It doesn't update priv_cmd_queue get/put */ | ||
993 | static void free_priv_cmdbuf(struct channel_gk20a *c, | ||
994 | struct priv_cmd_entry *e) | ||
995 | { | ||
996 | struct priv_cmd_queue *q = &c->priv_cmd_q; | ||
997 | |||
998 | if (!e) | ||
999 | return; | ||
1000 | |||
1001 | list_del(&e->list); | ||
1002 | |||
1003 | if (unlikely(!e->pre_alloc)) | ||
1004 | kfree(e); | ||
1005 | else { | ||
1006 | memset(e, 0, sizeof(struct priv_cmd_entry)); | ||
1007 | e->pre_alloc = true; | ||
1008 | list_add(&e->list, &q->free); | ||
1009 | } | ||
1010 | } | ||
1011 | |||
1012 | /* free entries if they're no longer being used */ | ||
1013 | static void recycle_priv_cmdbuf(struct channel_gk20a *c) | ||
1014 | { | ||
1015 | struct priv_cmd_queue *q = &c->priv_cmd_q; | ||
1016 | struct priv_cmd_entry *e, *tmp; | ||
1017 | struct list_head *head = &q->head; | ||
1018 | bool wrap_around, found = false; | ||
1019 | |||
1020 | gk20a_dbg_fn(""); | ||
1021 | |||
1022 | /* Find the most recent free entry. Free it and everything before it */ | ||
1023 | list_for_each_entry(e, head, list) { | ||
1024 | |||
1025 | gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d " | ||
1026 | "curr get:put:wrap %d:%d:%d", | ||
1027 | c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap, | ||
1028 | c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap); | ||
1029 | |||
1030 | wrap_around = (c->gpfifo.wrap != e->gp_wrap); | ||
1031 | if (e->gp_get < e->gp_put) { | ||
1032 | if (c->gpfifo.get >= e->gp_put || | ||
1033 | wrap_around) { | ||
1034 | found = true; | ||
1035 | break; | ||
1036 | } else | ||
1037 | e->gp_get = c->gpfifo.get; | ||
1038 | } else if (e->gp_get > e->gp_put) { | ||
1039 | if (wrap_around && | ||
1040 | c->gpfifo.get >= e->gp_put) { | ||
1041 | found = true; | ||
1042 | break; | ||
1043 | } else | ||
1044 | e->gp_get = c->gpfifo.get; | ||
1045 | } | ||
1046 | } | ||
1047 | |||
1048 | if (found) | ||
1049 | q->get = (e->ptr - q->mem.base_cpuva) + e->size; | ||
1050 | else { | ||
1051 | gk20a_dbg_info("no free entry recycled"); | ||
1052 | return; | ||
1053 | } | ||
1054 | |||
1055 | list_for_each_entry_safe_continue(e, tmp, head, list) { | ||
1056 | free_priv_cmdbuf(c, e); | ||
1057 | } | ||
1058 | |||
1059 | gk20a_dbg_fn("done"); | ||
1060 | } | ||
1061 | |||
1062 | |||
1063 | static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | ||
1064 | struct nvhost_alloc_gpfifo_args *args) | ||
1065 | { | ||
1066 | struct gk20a *g = c->g; | ||
1067 | struct device *d = dev_from_gk20a(g); | ||
1068 | struct vm_gk20a *ch_vm; | ||
1069 | u32 gpfifo_size; | ||
1070 | int err = 0; | ||
1071 | struct sg_table *sgt; | ||
1072 | dma_addr_t iova; | ||
1073 | |||
1074 | /* Kernel can insert one extra gpfifo entry before user submitted gpfifos | ||
1075 | and another one after, for internal usage. Triple the requested size. */ | ||
1076 | gpfifo_size = roundup_pow_of_two(args->num_entries * 3); | ||
1077 | |||
1078 | if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED) | ||
1079 | c->vpr = true; | ||
1080 | |||
1081 | /* an address space needs to have been bound at this point. */ | ||
1082 | if (!gk20a_channel_as_bound(c)) { | ||
1083 | gk20a_err(d, | ||
1084 | "not bound to an address space at time of gpfifo" | ||
1085 | " allocation. Attempting to create and bind to" | ||
1086 | " one..."); | ||
1087 | return -EINVAL; | ||
1088 | } | ||
1089 | ch_vm = c->vm; | ||
1090 | |||
1091 | c->cmds_pending = false; | ||
1092 | c->last_submit_fence.valid = false; | ||
1093 | |||
1094 | c->ramfc.offset = 0; | ||
1095 | c->ramfc.size = ram_in_ramfc_s() / 8; | ||
1096 | |||
1097 | if (c->gpfifo.cpu_va) { | ||
1098 | gk20a_err(d, "channel %d :" | ||
1099 | "gpfifo already allocated", c->hw_chid); | ||
1100 | return -EEXIST; | ||
1101 | } | ||
1102 | |||
1103 | c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo); | ||
1104 | c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d, | ||
1105 | c->gpfifo.size, | ||
1106 | &iova, | ||
1107 | GFP_KERNEL); | ||
1108 | if (!c->gpfifo.cpu_va) { | ||
1109 | gk20a_err(d, "%s: memory allocation failed\n", __func__); | ||
1110 | err = -ENOMEM; | ||
1111 | goto clean_up; | ||
1112 | } | ||
1113 | |||
1114 | c->gpfifo.iova = iova; | ||
1115 | c->gpfifo.entry_num = gpfifo_size; | ||
1116 | |||
1117 | c->gpfifo.get = c->gpfifo.put = 0; | ||
1118 | |||
1119 | err = gk20a_get_sgtable(d, &sgt, | ||
1120 | c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size); | ||
1121 | if (err) { | ||
1122 | gk20a_err(d, "%s: failed to allocate sg table\n", __func__); | ||
1123 | goto clean_up; | ||
1124 | } | ||
1125 | |||
1126 | c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm, | ||
1127 | &sgt, | ||
1128 | c->gpfifo.size, | ||
1129 | 0, /* flags */ | ||
1130 | gk20a_mem_flag_none); | ||
1131 | if (!c->gpfifo.gpu_va) { | ||
1132 | gk20a_err(d, "channel %d : failed to map" | ||
1133 | " gpu_va for gpfifo", c->hw_chid); | ||
1134 | err = -ENOMEM; | ||
1135 | goto clean_up_sgt; | ||
1136 | } | ||
1137 | |||
1138 | gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d", | ||
1139 | c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num); | ||
1140 | |||
1141 | channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num); | ||
1142 | |||
1143 | channel_gk20a_setup_userd(c); | ||
1144 | channel_gk20a_commit_userd(c); | ||
1145 | |||
1146 | gk20a_mm_l2_invalidate(c->g); | ||
1147 | |||
1148 | /* TBD: setup engine contexts */ | ||
1149 | |||
1150 | err = channel_gk20a_alloc_priv_cmdbuf(c); | ||
1151 | if (err) | ||
1152 | goto clean_up_unmap; | ||
1153 | |||
1154 | err = channel_gk20a_update_runlist(c, true); | ||
1155 | if (err) | ||
1156 | goto clean_up_unmap; | ||
1157 | |||
1158 | gk20a_free_sgtable(&sgt); | ||
1159 | |||
1160 | gk20a_dbg_fn("done"); | ||
1161 | return 0; | ||
1162 | |||
1163 | clean_up_unmap: | ||
1164 | gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va, | ||
1165 | c->gpfifo.size, gk20a_mem_flag_none); | ||
1166 | clean_up_sgt: | ||
1167 | gk20a_free_sgtable(&sgt); | ||
1168 | clean_up: | ||
1169 | dma_free_coherent(d, c->gpfifo.size, | ||
1170 | c->gpfifo.cpu_va, c->gpfifo.iova); | ||
1171 | c->gpfifo.cpu_va = NULL; | ||
1172 | c->gpfifo.iova = 0; | ||
1173 | memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); | ||
1174 | gk20a_err(d, "fail"); | ||
1175 | return err; | ||
1176 | } | ||
1177 | |||
1178 | static inline int wfi_cmd_size(void) | ||
1179 | { | ||
1180 | return 2; | ||
1181 | } | ||
1182 | void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i) | ||
1183 | { | ||
1184 | /* wfi */ | ||
1185 | cmd->ptr[(*i)++] = 0x2001001E; | ||
1186 | /* handle, ignored */ | ||
1187 | cmd->ptr[(*i)++] = 0x00000000; | ||
1188 | } | ||
1189 | |||
1190 | static inline bool check_gp_put(struct gk20a *g, | ||
1191 | struct channel_gk20a *c) | ||
1192 | { | ||
1193 | u32 put; | ||
1194 | /* gp_put changed unexpectedly since last update? */ | ||
1195 | put = gk20a_bar1_readl(g, | ||
1196 | c->userd_gpu_va + 4 * ram_userd_gp_put_w()); | ||
1197 | if (c->gpfifo.put != put) { | ||
1198 | /*TBD: BUG_ON/teardown on this*/ | ||
1199 | gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly " | ||
1200 | "since last update"); | ||
1201 | c->gpfifo.put = put; | ||
1202 | return false; /* surprise! */ | ||
1203 | } | ||
1204 | return true; /* checked out ok */ | ||
1205 | } | ||
1206 | |||
1207 | /* Update with this periodically to determine how the gpfifo is draining. */ | ||
1208 | static inline u32 update_gp_get(struct gk20a *g, | ||
1209 | struct channel_gk20a *c) | ||
1210 | { | ||
1211 | u32 new_get = gk20a_bar1_readl(g, | ||
1212 | c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w()); | ||
1213 | if (new_get < c->gpfifo.get) | ||
1214 | c->gpfifo.wrap = !c->gpfifo.wrap; | ||
1215 | c->gpfifo.get = new_get; | ||
1216 | return new_get; | ||
1217 | } | ||
1218 | |||
1219 | static inline u32 gp_free_count(struct channel_gk20a *c) | ||
1220 | { | ||
1221 | return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) % | ||
1222 | c->gpfifo.entry_num; | ||
1223 | } | ||
1224 | |||
1225 | bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, | ||
1226 | u32 timeout_delta_ms) | ||
1227 | { | ||
1228 | u32 gpfifo_get = update_gp_get(ch->g, ch); | ||
1229 | /* Count consequent timeout isr */ | ||
1230 | if (gpfifo_get == ch->timeout_gpfifo_get) { | ||
1231 | /* we didn't advance since previous channel timeout check */ | ||
1232 | ch->timeout_accumulated_ms += timeout_delta_ms; | ||
1233 | } else { | ||
1234 | /* first timeout isr encountered */ | ||
1235 | ch->timeout_accumulated_ms = timeout_delta_ms; | ||
1236 | } | ||
1237 | |||
1238 | ch->timeout_gpfifo_get = gpfifo_get; | ||
1239 | |||
1240 | return ch->g->timeouts_enabled && | ||
1241 | ch->timeout_accumulated_ms > ch->timeout_ms_max; | ||
1242 | } | ||
1243 | |||
1244 | |||
1245 | /* Issue a syncpoint increment *preceded* by a wait-for-idle | ||
1246 | * command. All commands on the channel will have been | ||
1247 | * consumed at the time the fence syncpoint increment occurs. | ||
1248 | */ | ||
1249 | static int gk20a_channel_submit_wfi(struct channel_gk20a *c) | ||
1250 | { | ||
1251 | struct priv_cmd_entry *cmd = NULL; | ||
1252 | struct gk20a *g = c->g; | ||
1253 | u32 free_count; | ||
1254 | int err; | ||
1255 | |||
1256 | if (c->has_timedout) | ||
1257 | return -ETIMEDOUT; | ||
1258 | |||
1259 | if (!c->sync) { | ||
1260 | c->sync = gk20a_channel_sync_create(c); | ||
1261 | if (!c->sync) | ||
1262 | return -ENOMEM; | ||
1263 | } | ||
1264 | |||
1265 | update_gp_get(g, c); | ||
1266 | free_count = gp_free_count(c); | ||
1267 | if (unlikely(!free_count)) { | ||
1268 | gk20a_err(dev_from_gk20a(g), | ||
1269 | "not enough gpfifo space"); | ||
1270 | return -EAGAIN; | ||
1271 | } | ||
1272 | |||
1273 | err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit_fence); | ||
1274 | if (unlikely(err)) | ||
1275 | return err; | ||
1276 | |||
1277 | WARN_ON(!c->last_submit_fence.wfi); | ||
1278 | |||
1279 | c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva); | ||
1280 | c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) | | ||
1281 | pbdma_gp_entry1_length_f(cmd->size); | ||
1282 | |||
1283 | c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); | ||
1284 | |||
1285 | /* save gp_put */ | ||
1286 | cmd->gp_put = c->gpfifo.put; | ||
1287 | |||
1288 | gk20a_bar1_writel(g, | ||
1289 | c->userd_gpu_va + 4 * ram_userd_gp_put_w(), | ||
1290 | c->gpfifo.put); | ||
1291 | |||
1292 | gk20a_dbg_info("post-submit put %d, get %d, size %d", | ||
1293 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | ||
1294 | |||
1295 | return 0; | ||
1296 | } | ||
1297 | |||
1298 | static u32 get_gp_free_count(struct channel_gk20a *c) | ||
1299 | { | ||
1300 | update_gp_get(c->g, c); | ||
1301 | return gp_free_count(c); | ||
1302 | } | ||
1303 | |||
1304 | static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g) | ||
1305 | { | ||
1306 | void *mem = NULL; | ||
1307 | unsigned int words; | ||
1308 | u64 offset; | ||
1309 | struct dma_buf *dmabuf = NULL; | ||
1310 | |||
1311 | if (gk20a_debug_trace_cmdbuf) { | ||
1312 | u64 gpu_va = (u64)g->entry0 | | ||
1313 | (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32); | ||
1314 | int err; | ||
1315 | |||
1316 | words = pbdma_gp_entry1_length_v(g->entry1); | ||
1317 | err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset); | ||
1318 | if (!err) | ||
1319 | mem = dma_buf_vmap(dmabuf); | ||
1320 | } | ||
1321 | |||
1322 | if (mem) { | ||
1323 | u32 i; | ||
1324 | /* | ||
1325 | * Write in batches of 128 as there seems to be a limit | ||
1326 | * of how much you can output to ftrace at once. | ||
1327 | */ | ||
1328 | for (i = 0; i < words; i += 128U) { | ||
1329 | trace_gk20a_push_cmdbuf( | ||
1330 | c->g->dev->name, | ||
1331 | 0, | ||
1332 | min(words - i, 128U), | ||
1333 | offset + i * sizeof(u32), | ||
1334 | mem); | ||
1335 | } | ||
1336 | dma_buf_vunmap(dmabuf, mem); | ||
1337 | } | ||
1338 | } | ||
1339 | |||
1340 | static int gk20a_channel_add_job(struct channel_gk20a *c, | ||
1341 | struct gk20a_channel_fence *fence) | ||
1342 | { | ||
1343 | struct vm_gk20a *vm = c->vm; | ||
1344 | struct channel_gk20a_job *job = NULL; | ||
1345 | struct mapped_buffer_node **mapped_buffers = NULL; | ||
1346 | int err = 0, num_mapped_buffers; | ||
1347 | |||
1348 | /* job needs reference to this vm */ | ||
1349 | gk20a_vm_get(vm); | ||
1350 | |||
1351 | err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers); | ||
1352 | if (err) { | ||
1353 | gk20a_vm_put(vm); | ||
1354 | return err; | ||
1355 | } | ||
1356 | |||
1357 | job = kzalloc(sizeof(*job), GFP_KERNEL); | ||
1358 | if (!job) { | ||
1359 | gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); | ||
1360 | gk20a_vm_put(vm); | ||
1361 | return -ENOMEM; | ||
1362 | } | ||
1363 | |||
1364 | job->num_mapped_buffers = num_mapped_buffers; | ||
1365 | job->mapped_buffers = mapped_buffers; | ||
1366 | job->fence = *fence; | ||
1367 | |||
1368 | mutex_lock(&c->jobs_lock); | ||
1369 | list_add_tail(&job->list, &c->jobs); | ||
1370 | mutex_unlock(&c->jobs_lock); | ||
1371 | |||
1372 | return 0; | ||
1373 | } | ||
1374 | |||
1375 | void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) | ||
1376 | { | ||
1377 | struct gk20a *g = c->g; | ||
1378 | struct vm_gk20a *vm = c->vm; | ||
1379 | struct channel_gk20a_job *job, *n; | ||
1380 | int i; | ||
1381 | |||
1382 | wake_up(&c->submit_wq); | ||
1383 | |||
1384 | mutex_lock(&c->jobs_lock); | ||
1385 | list_for_each_entry_safe(job, n, &c->jobs, list) { | ||
1386 | bool completed = WARN_ON(!c->sync) || | ||
1387 | c->sync->is_expired(c->sync, &job->fence); | ||
1388 | if (!completed) | ||
1389 | break; | ||
1390 | |||
1391 | gk20a_vm_put_buffers(vm, job->mapped_buffers, | ||
1392 | job->num_mapped_buffers); | ||
1393 | |||
1394 | /* job is done. release its reference to vm */ | ||
1395 | gk20a_vm_put(vm); | ||
1396 | |||
1397 | list_del_init(&job->list); | ||
1398 | kfree(job); | ||
1399 | gk20a_channel_idle(g->dev); | ||
1400 | } | ||
1401 | mutex_unlock(&c->jobs_lock); | ||
1402 | |||
1403 | for (i = 0; i < nr_completed; i++) | ||
1404 | gk20a_channel_idle(c->g->dev); | ||
1405 | } | ||
1406 | |||
1407 | static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | ||
1408 | struct nvhost_gpfifo *gpfifo, | ||
1409 | u32 num_entries, | ||
1410 | struct nvhost_fence *fence, | ||
1411 | u32 flags) | ||
1412 | { | ||
1413 | struct gk20a *g = c->g; | ||
1414 | struct device *d = dev_from_gk20a(g); | ||
1415 | u32 err = 0; | ||
1416 | int i; | ||
1417 | struct priv_cmd_entry *wait_cmd = NULL; | ||
1418 | struct priv_cmd_entry *incr_cmd = NULL; | ||
1419 | /* we might need two extra gpfifo entries - one for pre fence | ||
1420 | * and one for post fence. */ | ||
1421 | const int extra_entries = 2; | ||
1422 | |||
1423 | if (c->has_timedout) | ||
1424 | return -ETIMEDOUT; | ||
1425 | |||
1426 | if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT | | ||
1427 | NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) && | ||
1428 | !fence) | ||
1429 | return -EINVAL; | ||
1430 | |||
1431 | if (!c->sync) { | ||
1432 | c->sync = gk20a_channel_sync_create(c); | ||
1433 | if (!c->sync) | ||
1434 | return -ENOMEM; | ||
1435 | } | ||
1436 | |||
1437 | #ifdef CONFIG_DEBUG_FS | ||
1438 | /* update debug settings */ | ||
1439 | if (g->ops.ltc.sync_debugfs) | ||
1440 | g->ops.ltc.sync_debugfs(g); | ||
1441 | #endif | ||
1442 | |||
1443 | gk20a_dbg_info("channel %d", c->hw_chid); | ||
1444 | |||
1445 | /* gk20a_channel_update releases this ref. */ | ||
1446 | gk20a_channel_busy(g->dev); | ||
1447 | |||
1448 | trace_gk20a_channel_submit_gpfifo(c->g->dev->name, | ||
1449 | c->hw_chid, | ||
1450 | num_entries, | ||
1451 | flags, | ||
1452 | fence->syncpt_id, fence->value); | ||
1453 | check_gp_put(g, c); | ||
1454 | update_gp_get(g, c); | ||
1455 | |||
1456 | gk20a_dbg_info("pre-submit put %d, get %d, size %d", | ||
1457 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | ||
1458 | |||
1459 | /* Invalidate tlb if it's dirty... */ | ||
1460 | /* TBD: this should be done in the cmd stream, not with PRIs. */ | ||
1461 | /* We don't know what context is currently running... */ | ||
1462 | /* Note also: there can be more than one context associated with the */ | ||
1463 | /* address space (vm). */ | ||
1464 | gk20a_mm_tlb_invalidate(c->vm); | ||
1465 | |||
1466 | /* Make sure we have enough space for gpfifo entries. If not, | ||
1467 | * wait for signals from completed submits */ | ||
1468 | if (gp_free_count(c) < num_entries + extra_entries) { | ||
1469 | err = wait_event_interruptible(c->submit_wq, | ||
1470 | get_gp_free_count(c) >= num_entries + extra_entries || | ||
1471 | c->has_timedout); | ||
1472 | } | ||
1473 | |||
1474 | if (c->has_timedout) { | ||
1475 | err = -ETIMEDOUT; | ||
1476 | goto clean_up; | ||
1477 | } | ||
1478 | |||
1479 | if (err) { | ||
1480 | gk20a_err(d, "not enough gpfifo space"); | ||
1481 | err = -EAGAIN; | ||
1482 | goto clean_up; | ||
1483 | } | ||
1484 | |||
1485 | /* | ||
1486 | * optionally insert syncpt wait in the beginning of gpfifo submission | ||
1487 | * when user requested and the wait hasn't expired. | ||
1488 | * validate that the id makes sense, elide if not | ||
1489 | * the only reason this isn't being unceremoniously killed is to | ||
1490 | * keep running some tests which trigger this condition | ||
1491 | */ | ||
1492 | if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { | ||
1493 | if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) | ||
1494 | err = c->sync->wait_fd(c->sync, fence->syncpt_id, | ||
1495 | &wait_cmd); | ||
1496 | else | ||
1497 | err = c->sync->wait_syncpt(c->sync, fence->syncpt_id, | ||
1498 | fence->value, &wait_cmd); | ||
1499 | } | ||
1500 | if (err) | ||
1501 | goto clean_up; | ||
1502 | |||
1503 | |||
1504 | /* always insert syncpt increment at end of gpfifo submission | ||
1505 | to keep track of method completion for idle railgating */ | ||
1506 | if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET && | ||
1507 | flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) | ||
1508 | err = c->sync->incr_user_fd(c->sync, &incr_cmd, | ||
1509 | &c->last_submit_fence, | ||
1510 | &fence->syncpt_id); | ||
1511 | else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET) | ||
1512 | err = c->sync->incr_user_syncpt(c->sync, &incr_cmd, | ||
1513 | &c->last_submit_fence, | ||
1514 | &fence->syncpt_id, | ||
1515 | &fence->value); | ||
1516 | else | ||
1517 | err = c->sync->incr(c->sync, &incr_cmd, | ||
1518 | &c->last_submit_fence); | ||
1519 | if (err) | ||
1520 | goto clean_up; | ||
1521 | |||
1522 | if (wait_cmd) { | ||
1523 | c->gpfifo.cpu_va[c->gpfifo.put].entry0 = | ||
1524 | u64_lo32(wait_cmd->gva); | ||
1525 | c->gpfifo.cpu_va[c->gpfifo.put].entry1 = | ||
1526 | u64_hi32(wait_cmd->gva) | | ||
1527 | pbdma_gp_entry1_length_f(wait_cmd->size); | ||
1528 | trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]); | ||
1529 | |||
1530 | c->gpfifo.put = (c->gpfifo.put + 1) & | ||
1531 | (c->gpfifo.entry_num - 1); | ||
1532 | |||
1533 | /* save gp_put */ | ||
1534 | wait_cmd->gp_put = c->gpfifo.put; | ||
1535 | } | ||
1536 | |||
1537 | for (i = 0; i < num_entries; i++) { | ||
1538 | c->gpfifo.cpu_va[c->gpfifo.put].entry0 = | ||
1539 | gpfifo[i].entry0; /* cmd buf va low 32 */ | ||
1540 | c->gpfifo.cpu_va[c->gpfifo.put].entry1 = | ||
1541 | gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */ | ||
1542 | trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]); | ||
1543 | c->gpfifo.put = (c->gpfifo.put + 1) & | ||
1544 | (c->gpfifo.entry_num - 1); | ||
1545 | } | ||
1546 | |||
1547 | if (incr_cmd) { | ||
1548 | c->gpfifo.cpu_va[c->gpfifo.put].entry0 = | ||
1549 | u64_lo32(incr_cmd->gva); | ||
1550 | c->gpfifo.cpu_va[c->gpfifo.put].entry1 = | ||
1551 | u64_hi32(incr_cmd->gva) | | ||
1552 | pbdma_gp_entry1_length_f(incr_cmd->size); | ||
1553 | trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]); | ||
1554 | |||
1555 | c->gpfifo.put = (c->gpfifo.put + 1) & | ||
1556 | (c->gpfifo.entry_num - 1); | ||
1557 | |||
1558 | /* save gp_put */ | ||
1559 | incr_cmd->gp_put = c->gpfifo.put; | ||
1560 | } | ||
1561 | |||
1562 | /* Invalidate tlb if it's dirty... */ | ||
1563 | /* TBD: this should be done in the cmd stream, not with PRIs. */ | ||
1564 | /* We don't know what context is currently running... */ | ||
1565 | /* Note also: there can be more than one context associated with the */ | ||
1566 | /* address space (vm). */ | ||
1567 | gk20a_mm_tlb_invalidate(c->vm); | ||
1568 | |||
1569 | trace_gk20a_channel_submitted_gpfifo(c->g->dev->name, | ||
1570 | c->hw_chid, | ||
1571 | num_entries, | ||
1572 | flags, | ||
1573 | fence->syncpt_id, fence->value); | ||
1574 | |||
1575 | /* TODO! Check for errors... */ | ||
1576 | gk20a_channel_add_job(c, &c->last_submit_fence); | ||
1577 | |||
1578 | c->cmds_pending = true; | ||
1579 | gk20a_bar1_writel(g, | ||
1580 | c->userd_gpu_va + 4 * ram_userd_gp_put_w(), | ||
1581 | c->gpfifo.put); | ||
1582 | |||
1583 | gk20a_dbg_info("post-submit put %d, get %d, size %d", | ||
1584 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | ||
1585 | |||
1586 | gk20a_dbg_fn("done"); | ||
1587 | return err; | ||
1588 | |||
1589 | clean_up: | ||
1590 | gk20a_err(d, "fail"); | ||
1591 | free_priv_cmdbuf(c, wait_cmd); | ||
1592 | free_priv_cmdbuf(c, incr_cmd); | ||
1593 | gk20a_channel_idle(g->dev); | ||
1594 | return err; | ||
1595 | } | ||
1596 | |||
1597 | void gk20a_remove_channel_support(struct channel_gk20a *c) | ||
1598 | { | ||
1599 | |||
1600 | } | ||
1601 | |||
1602 | int gk20a_init_channel_support(struct gk20a *g, u32 chid) | ||
1603 | { | ||
1604 | struct channel_gk20a *c = g->fifo.channel+chid; | ||
1605 | c->g = g; | ||
1606 | c->in_use = false; | ||
1607 | c->hw_chid = chid; | ||
1608 | c->bound = false; | ||
1609 | c->remove_support = gk20a_remove_channel_support; | ||
1610 | mutex_init(&c->jobs_lock); | ||
1611 | INIT_LIST_HEAD(&c->jobs); | ||
1612 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
1613 | mutex_init(&c->cyclestate.cyclestate_buffer_mutex); | ||
1614 | #endif | ||
1615 | INIT_LIST_HEAD(&c->dbg_s_list); | ||
1616 | mutex_init(&c->dbg_s_lock); | ||
1617 | |||
1618 | return 0; | ||
1619 | } | ||
1620 | |||
1621 | int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout) | ||
1622 | { | ||
1623 | int err = 0; | ||
1624 | |||
1625 | if (!ch->cmds_pending) | ||
1626 | return 0; | ||
1627 | |||
1628 | /* Do not wait for a timedout channel */ | ||
1629 | if (ch->has_timedout) | ||
1630 | return -ETIMEDOUT; | ||
1631 | |||
1632 | if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) { | ||
1633 | gk20a_dbg_fn("issuing wfi, incr to finish the channel"); | ||
1634 | err = gk20a_channel_submit_wfi(ch); | ||
1635 | } | ||
1636 | if (err) | ||
1637 | return err; | ||
1638 | |||
1639 | BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)); | ||
1640 | |||
1641 | gk20a_dbg_fn("waiting for channel to finish thresh:%d", | ||
1642 | ch->last_submit_fence.thresh); | ||
1643 | |||
1644 | err = ch->sync->wait_cpu(ch->sync, &ch->last_submit_fence, timeout); | ||
1645 | if (WARN_ON(err)) | ||
1646 | dev_warn(dev_from_gk20a(ch->g), | ||
1647 | "timed out waiting for gk20a channel to finish"); | ||
1648 | else | ||
1649 | ch->cmds_pending = false; | ||
1650 | |||
1651 | return err; | ||
1652 | } | ||
1653 | |||
1654 | static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch, | ||
1655 | ulong id, u32 offset, | ||
1656 | u32 payload, long timeout) | ||
1657 | { | ||
1658 | struct platform_device *pdev = ch->g->dev; | ||
1659 | struct dma_buf *dmabuf; | ||
1660 | void *data; | ||
1661 | u32 *semaphore; | ||
1662 | int ret = 0; | ||
1663 | long remain; | ||
1664 | |||
1665 | /* do not wait if channel has timed out */ | ||
1666 | if (ch->has_timedout) | ||
1667 | return -ETIMEDOUT; | ||
1668 | |||
1669 | dmabuf = dma_buf_get(id); | ||
1670 | if (IS_ERR(dmabuf)) { | ||
1671 | gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx", | ||
1672 | id); | ||
1673 | return -EINVAL; | ||
1674 | } | ||
1675 | |||
1676 | data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT); | ||
1677 | if (!data) { | ||
1678 | gk20a_err(&pdev->dev, "failed to map notifier memory"); | ||
1679 | ret = -EINVAL; | ||
1680 | goto cleanup_put; | ||
1681 | } | ||
1682 | |||
1683 | semaphore = data + (offset & ~PAGE_MASK); | ||
1684 | |||
1685 | remain = wait_event_interruptible_timeout( | ||
1686 | ch->semaphore_wq, | ||
1687 | *semaphore == payload || ch->has_timedout, | ||
1688 | timeout); | ||
1689 | |||
1690 | if (remain == 0 && *semaphore != payload) | ||
1691 | ret = -ETIMEDOUT; | ||
1692 | else if (remain < 0) | ||
1693 | ret = remain; | ||
1694 | |||
1695 | dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data); | ||
1696 | cleanup_put: | ||
1697 | dma_buf_put(dmabuf); | ||
1698 | return ret; | ||
1699 | } | ||
1700 | |||
1701 | static int gk20a_channel_wait(struct channel_gk20a *ch, | ||
1702 | struct nvhost_wait_args *args) | ||
1703 | { | ||
1704 | struct device *d = dev_from_gk20a(ch->g); | ||
1705 | struct dma_buf *dmabuf; | ||
1706 | struct notification *notif; | ||
1707 | struct timespec tv; | ||
1708 | u64 jiffies; | ||
1709 | ulong id; | ||
1710 | u32 offset; | ||
1711 | unsigned long timeout; | ||
1712 | int remain, ret = 0; | ||
1713 | |||
1714 | gk20a_dbg_fn(""); | ||
1715 | |||
1716 | if (ch->has_timedout) | ||
1717 | return -ETIMEDOUT; | ||
1718 | |||
1719 | if (args->timeout == NVHOST_NO_TIMEOUT) | ||
1720 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
1721 | else | ||
1722 | timeout = (u32)msecs_to_jiffies(args->timeout); | ||
1723 | |||
1724 | switch (args->type) { | ||
1725 | case NVHOST_WAIT_TYPE_NOTIFIER: | ||
1726 | id = args->condition.notifier.nvmap_handle; | ||
1727 | offset = args->condition.notifier.offset; | ||
1728 | |||
1729 | dmabuf = dma_buf_get(id); | ||
1730 | if (IS_ERR(dmabuf)) { | ||
1731 | gk20a_err(d, "invalid notifier nvmap handle 0x%lx", | ||
1732 | id); | ||
1733 | return -EINVAL; | ||
1734 | } | ||
1735 | |||
1736 | notif = dma_buf_vmap(dmabuf); | ||
1737 | if (!notif) { | ||
1738 | gk20a_err(d, "failed to map notifier memory"); | ||
1739 | return -ENOMEM; | ||
1740 | } | ||
1741 | |||
1742 | notif = (struct notification *)((uintptr_t)notif + offset); | ||
1743 | |||
1744 | /* user should set status pending before | ||
1745 | * calling this ioctl */ | ||
1746 | remain = wait_event_interruptible_timeout( | ||
1747 | ch->notifier_wq, | ||
1748 | notif->status == 0 || ch->has_timedout, | ||
1749 | timeout); | ||
1750 | |||
1751 | if (remain == 0 && notif->status != 0) { | ||
1752 | ret = -ETIMEDOUT; | ||
1753 | goto notif_clean_up; | ||
1754 | } else if (remain < 0) { | ||
1755 | ret = -EINTR; | ||
1756 | goto notif_clean_up; | ||
1757 | } | ||
1758 | |||
1759 | /* TBD: fill in correct information */ | ||
1760 | jiffies = get_jiffies_64(); | ||
1761 | jiffies_to_timespec(jiffies, &tv); | ||
1762 | notif->timestamp.nanoseconds[0] = tv.tv_nsec; | ||
1763 | notif->timestamp.nanoseconds[1] = tv.tv_sec; | ||
1764 | notif->info32 = 0xDEADBEEF; /* should be object name */ | ||
1765 | notif->info16 = ch->hw_chid; /* should be method offset */ | ||
1766 | |||
1767 | notif_clean_up: | ||
1768 | dma_buf_vunmap(dmabuf, notif); | ||
1769 | return ret; | ||
1770 | |||
1771 | case NVHOST_WAIT_TYPE_SEMAPHORE: | ||
1772 | ret = gk20a_channel_wait_semaphore(ch, | ||
1773 | args->condition.semaphore.nvmap_handle, | ||
1774 | args->condition.semaphore.offset, | ||
1775 | args->condition.semaphore.payload, | ||
1776 | timeout); | ||
1777 | |||
1778 | break; | ||
1779 | |||
1780 | default: | ||
1781 | ret = -EINVAL; | ||
1782 | break; | ||
1783 | } | ||
1784 | |||
1785 | return ret; | ||
1786 | } | ||
1787 | |||
1788 | static int gk20a_channel_set_priority(struct channel_gk20a *ch, | ||
1789 | u32 priority) | ||
1790 | { | ||
1791 | u32 timeslice_timeout; | ||
1792 | /* set priority of graphics channel */ | ||
1793 | switch (priority) { | ||
1794 | case NVHOST_PRIORITY_LOW: | ||
1795 | /* 64 << 3 = 512us */ | ||
1796 | timeslice_timeout = 64; | ||
1797 | break; | ||
1798 | case NVHOST_PRIORITY_MEDIUM: | ||
1799 | /* 128 << 3 = 1024us */ | ||
1800 | timeslice_timeout = 128; | ||
1801 | break; | ||
1802 | case NVHOST_PRIORITY_HIGH: | ||
1803 | /* 255 << 3 = 2048us */ | ||
1804 | timeslice_timeout = 255; | ||
1805 | break; | ||
1806 | default: | ||
1807 | pr_err("Unsupported priority"); | ||
1808 | return -EINVAL; | ||
1809 | } | ||
1810 | channel_gk20a_set_schedule_params(ch, | ||
1811 | timeslice_timeout); | ||
1812 | return 0; | ||
1813 | } | ||
1814 | |||
1815 | static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, | ||
1816 | struct nvhost_zcull_bind_args *args) | ||
1817 | { | ||
1818 | struct gk20a *g = ch->g; | ||
1819 | struct gr_gk20a *gr = &g->gr; | ||
1820 | |||
1821 | gk20a_dbg_fn(""); | ||
1822 | |||
1823 | return gr_gk20a_bind_ctxsw_zcull(g, gr, ch, | ||
1824 | args->gpu_va, args->mode); | ||
1825 | } | ||
1826 | |||
1827 | /* in this context the "channel" is the host1x channel which | ||
1828 | * maps to *all* gk20a channels */ | ||
1829 | int gk20a_channel_suspend(struct gk20a *g) | ||
1830 | { | ||
1831 | struct fifo_gk20a *f = &g->fifo; | ||
1832 | u32 chid; | ||
1833 | bool channels_in_use = false; | ||
1834 | struct device *d = dev_from_gk20a(g); | ||
1835 | int err; | ||
1836 | |||
1837 | gk20a_dbg_fn(""); | ||
1838 | |||
1839 | /* idle the engine by submitting WFI on non-KEPLER_C channel */ | ||
1840 | for (chid = 0; chid < f->num_channels; chid++) { | ||
1841 | struct channel_gk20a *c = &f->channel[chid]; | ||
1842 | if (c->in_use && c->obj_class != KEPLER_C) { | ||
1843 | err = gk20a_channel_submit_wfi(c); | ||
1844 | if (err) { | ||
1845 | gk20a_err(d, "cannot idle channel %d\n", | ||
1846 | chid); | ||
1847 | return err; | ||
1848 | } | ||
1849 | |||
1850 | c->sync->wait_cpu(c->sync, &c->last_submit_fence, | ||
1851 | 500000); | ||
1852 | break; | ||
1853 | } | ||
1854 | } | ||
1855 | |||
1856 | for (chid = 0; chid < f->num_channels; chid++) { | ||
1857 | if (f->channel[chid].in_use) { | ||
1858 | |||
1859 | gk20a_dbg_info("suspend channel %d", chid); | ||
1860 | /* disable channel */ | ||
1861 | gk20a_writel(g, ccsr_channel_r(chid), | ||
1862 | gk20a_readl(g, ccsr_channel_r(chid)) | | ||
1863 | ccsr_channel_enable_clr_true_f()); | ||
1864 | /* preempt the channel */ | ||
1865 | gk20a_fifo_preempt_channel(g, chid); | ||
1866 | |||
1867 | channels_in_use = true; | ||
1868 | } | ||
1869 | } | ||
1870 | |||
1871 | if (channels_in_use) { | ||
1872 | gk20a_fifo_update_runlist(g, 0, ~0, false, true); | ||
1873 | |||
1874 | for (chid = 0; chid < f->num_channels; chid++) { | ||
1875 | if (f->channel[chid].in_use) | ||
1876 | channel_gk20a_unbind(&f->channel[chid]); | ||
1877 | } | ||
1878 | } | ||
1879 | |||
1880 | gk20a_dbg_fn("done"); | ||
1881 | return 0; | ||
1882 | } | ||
1883 | |||
1884 | /* in this context the "channel" is the host1x channel which | ||
1885 | * maps to *all* gk20a channels */ | ||
1886 | int gk20a_channel_resume(struct gk20a *g) | ||
1887 | { | ||
1888 | struct fifo_gk20a *f = &g->fifo; | ||
1889 | u32 chid; | ||
1890 | bool channels_in_use = false; | ||
1891 | |||
1892 | gk20a_dbg_fn(""); | ||
1893 | |||
1894 | for (chid = 0; chid < f->num_channels; chid++) { | ||
1895 | if (f->channel[chid].in_use) { | ||
1896 | gk20a_dbg_info("resume channel %d", chid); | ||
1897 | g->ops.fifo.bind_channel(&f->channel[chid]); | ||
1898 | channels_in_use = true; | ||
1899 | } | ||
1900 | } | ||
1901 | |||
1902 | if (channels_in_use) | ||
1903 | gk20a_fifo_update_runlist(g, 0, ~0, true, true); | ||
1904 | |||
1905 | gk20a_dbg_fn("done"); | ||
1906 | return 0; | ||
1907 | } | ||
1908 | |||
1909 | void gk20a_channel_semaphore_wakeup(struct gk20a *g) | ||
1910 | { | ||
1911 | struct fifo_gk20a *f = &g->fifo; | ||
1912 | u32 chid; | ||
1913 | |||
1914 | gk20a_dbg_fn(""); | ||
1915 | |||
1916 | for (chid = 0; chid < f->num_channels; chid++) { | ||
1917 | struct channel_gk20a *c = g->fifo.channel+chid; | ||
1918 | if (c->in_use) | ||
1919 | wake_up_interruptible_all(&c->semaphore_wq); | ||
1920 | } | ||
1921 | } | ||
1922 | |||
1923 | static int gk20a_ioctl_channel_submit_gpfifo( | ||
1924 | struct channel_gk20a *ch, | ||
1925 | struct nvhost_submit_gpfifo_args *args) | ||
1926 | { | ||
1927 | void *gpfifo; | ||
1928 | u32 size; | ||
1929 | int ret = 0; | ||
1930 | |||
1931 | gk20a_dbg_fn(""); | ||
1932 | |||
1933 | if (ch->has_timedout) | ||
1934 | return -ETIMEDOUT; | ||
1935 | |||
1936 | size = args->num_entries * sizeof(struct nvhost_gpfifo); | ||
1937 | |||
1938 | gpfifo = kzalloc(size, GFP_KERNEL); | ||
1939 | if (!gpfifo) | ||
1940 | return -ENOMEM; | ||
1941 | |||
1942 | if (copy_from_user(gpfifo, | ||
1943 | (void __user *)(uintptr_t)args->gpfifo, size)) { | ||
1944 | ret = -EINVAL; | ||
1945 | goto clean_up; | ||
1946 | } | ||
1947 | |||
1948 | ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries, | ||
1949 | &args->fence, args->flags); | ||
1950 | |||
1951 | clean_up: | ||
1952 | kfree(gpfifo); | ||
1953 | return ret; | ||
1954 | } | ||
1955 | |||
1956 | void gk20a_init_fifo(struct gpu_ops *gops) | ||
1957 | { | ||
1958 | gops->fifo.bind_channel = channel_gk20a_bind; | ||
1959 | } | ||
1960 | |||
1961 | long gk20a_channel_ioctl(struct file *filp, | ||
1962 | unsigned int cmd, unsigned long arg) | ||
1963 | { | ||
1964 | struct channel_gk20a *ch = filp->private_data; | ||
1965 | struct platform_device *dev = ch->g->dev; | ||
1966 | u8 buf[NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE]; | ||
1967 | int err = 0; | ||
1968 | |||
1969 | if ((_IOC_TYPE(cmd) != NVHOST_IOCTL_MAGIC) || | ||
1970 | (_IOC_NR(cmd) == 0) || | ||
1971 | (_IOC_NR(cmd) > NVHOST_IOCTL_CHANNEL_LAST) || | ||
1972 | (_IOC_SIZE(cmd) > NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE)) | ||
1973 | return -EFAULT; | ||
1974 | |||
1975 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
1976 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
1977 | return -EFAULT; | ||
1978 | } | ||
1979 | |||
1980 | switch (cmd) { | ||
1981 | case NVHOST_IOCTL_CHANNEL_OPEN: | ||
1982 | { | ||
1983 | int fd; | ||
1984 | struct file *file; | ||
1985 | char *name; | ||
1986 | |||
1987 | err = get_unused_fd_flags(O_RDWR); | ||
1988 | if (err < 0) | ||
1989 | break; | ||
1990 | fd = err; | ||
1991 | |||
1992 | name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d", | ||
1993 | dev_name(&dev->dev), fd); | ||
1994 | if (!name) { | ||
1995 | err = -ENOMEM; | ||
1996 | put_unused_fd(fd); | ||
1997 | break; | ||
1998 | } | ||
1999 | |||
2000 | file = anon_inode_getfile(name, filp->f_op, NULL, O_RDWR); | ||
2001 | kfree(name); | ||
2002 | if (IS_ERR(file)) { | ||
2003 | err = PTR_ERR(file); | ||
2004 | put_unused_fd(fd); | ||
2005 | break; | ||
2006 | } | ||
2007 | fd_install(fd, file); | ||
2008 | |||
2009 | err = __gk20a_channel_open(ch->g, file); | ||
2010 | if (err) { | ||
2011 | put_unused_fd(fd); | ||
2012 | fput(file); | ||
2013 | break; | ||
2014 | } | ||
2015 | |||
2016 | ((struct nvhost_channel_open_args *)buf)->channel_fd = fd; | ||
2017 | break; | ||
2018 | } | ||
2019 | case NVHOST_IOCTL_CHANNEL_SET_NVMAP_FD: | ||
2020 | break; | ||
2021 | case NVHOST_IOCTL_CHANNEL_ALLOC_OBJ_CTX: | ||
2022 | gk20a_channel_busy(dev); | ||
2023 | err = gk20a_alloc_obj_ctx(ch, | ||
2024 | (struct nvhost_alloc_obj_ctx_args *)buf); | ||
2025 | gk20a_channel_idle(dev); | ||
2026 | break; | ||
2027 | case NVHOST_IOCTL_CHANNEL_FREE_OBJ_CTX: | ||
2028 | gk20a_channel_busy(dev); | ||
2029 | err = gk20a_free_obj_ctx(ch, | ||
2030 | (struct nvhost_free_obj_ctx_args *)buf); | ||
2031 | gk20a_channel_idle(dev); | ||
2032 | break; | ||
2033 | case NVHOST_IOCTL_CHANNEL_ALLOC_GPFIFO: | ||
2034 | gk20a_channel_busy(dev); | ||
2035 | err = gk20a_alloc_channel_gpfifo(ch, | ||
2036 | (struct nvhost_alloc_gpfifo_args *)buf); | ||
2037 | gk20a_channel_idle(dev); | ||
2038 | break; | ||
2039 | case NVHOST_IOCTL_CHANNEL_SUBMIT_GPFIFO: | ||
2040 | err = gk20a_ioctl_channel_submit_gpfifo(ch, | ||
2041 | (struct nvhost_submit_gpfifo_args *)buf); | ||
2042 | break; | ||
2043 | case NVHOST_IOCTL_CHANNEL_WAIT: | ||
2044 | gk20a_channel_busy(dev); | ||
2045 | err = gk20a_channel_wait(ch, | ||
2046 | (struct nvhost_wait_args *)buf); | ||
2047 | gk20a_channel_idle(dev); | ||
2048 | break; | ||
2049 | case NVHOST_IOCTL_CHANNEL_ZCULL_BIND: | ||
2050 | gk20a_channel_busy(dev); | ||
2051 | err = gk20a_channel_zcull_bind(ch, | ||
2052 | (struct nvhost_zcull_bind_args *)buf); | ||
2053 | gk20a_channel_idle(dev); | ||
2054 | break; | ||
2055 | case NVHOST_IOCTL_CHANNEL_SET_ERROR_NOTIFIER: | ||
2056 | gk20a_channel_busy(dev); | ||
2057 | err = gk20a_init_error_notifier(ch, | ||
2058 | (struct nvhost_set_error_notifier *)buf); | ||
2059 | gk20a_channel_idle(dev); | ||
2060 | break; | ||
2061 | #ifdef CONFIG_GK20A_CYCLE_STATS | ||
2062 | case NVHOST_IOCTL_CHANNEL_CYCLE_STATS: | ||
2063 | gk20a_channel_busy(dev); | ||
2064 | err = gk20a_channel_cycle_stats(ch, | ||
2065 | (struct nvhost_cycle_stats_args *)buf); | ||
2066 | gk20a_channel_idle(dev); | ||
2067 | break; | ||
2068 | #endif | ||
2069 | case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT: | ||
2070 | { | ||
2071 | u32 timeout = | ||
2072 | (u32)((struct nvhost_set_timeout_args *)buf)->timeout; | ||
2073 | gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", | ||
2074 | timeout, ch->hw_chid); | ||
2075 | ch->timeout_ms_max = timeout; | ||
2076 | break; | ||
2077 | } | ||
2078 | case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT_EX: | ||
2079 | { | ||
2080 | u32 timeout = | ||
2081 | (u32)((struct nvhost_set_timeout_args *)buf)->timeout; | ||
2082 | bool timeout_debug_dump = !((u32) | ||
2083 | ((struct nvhost_set_timeout_ex_args *)buf)->flags & | ||
2084 | (1 << NVHOST_TIMEOUT_FLAG_DISABLE_DUMP)); | ||
2085 | gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", | ||
2086 | timeout, ch->hw_chid); | ||
2087 | ch->timeout_ms_max = timeout; | ||
2088 | ch->timeout_debug_dump = timeout_debug_dump; | ||
2089 | break; | ||
2090 | } | ||
2091 | case NVHOST_IOCTL_CHANNEL_GET_TIMEDOUT: | ||
2092 | ((struct nvhost_get_param_args *)buf)->value = | ||
2093 | ch->has_timedout; | ||
2094 | break; | ||
2095 | case NVHOST_IOCTL_CHANNEL_SET_PRIORITY: | ||
2096 | gk20a_channel_busy(dev); | ||
2097 | gk20a_channel_set_priority(ch, | ||
2098 | ((struct nvhost_set_priority_args *)buf)->priority); | ||
2099 | gk20a_channel_idle(dev); | ||
2100 | break; | ||
2101 | default: | ||
2102 | dev_err(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd); | ||
2103 | err = -ENOTTY; | ||
2104 | break; | ||
2105 | } | ||
2106 | |||
2107 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
2108 | err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); | ||
2109 | |||
2110 | return err; | ||
2111 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h new file mode 100644 index 00000000..429db85d --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -0,0 +1,172 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/channel_gk20a.h | ||
3 | * | ||
4 | * GK20A graphics channel | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | #ifndef __CHANNEL_GK20A_H__ | ||
22 | #define __CHANNEL_GK20A_H__ | ||
23 | |||
24 | #include <linux/log2.h> | ||
25 | #include <linux/slab.h> | ||
26 | #include <linux/wait.h> | ||
27 | #include <linux/mutex.h> | ||
28 | #include <linux/nvhost_ioctl.h> | ||
29 | struct gk20a; | ||
30 | struct gr_gk20a; | ||
31 | struct dbg_session_gk20a; | ||
32 | |||
33 | #include "channel_sync_gk20a.h" | ||
34 | |||
35 | #include "mm_gk20a.h" | ||
36 | #include "gr_gk20a.h" | ||
37 | |||
38 | struct gpfifo { | ||
39 | u32 entry0; | ||
40 | u32 entry1; | ||
41 | }; | ||
42 | |||
43 | struct notification { | ||
44 | struct { | ||
45 | u32 nanoseconds[2]; | ||
46 | } timestamp; | ||
47 | u32 info32; | ||
48 | u16 info16; | ||
49 | u16 status; | ||
50 | }; | ||
51 | |||
52 | struct fence { | ||
53 | u32 hw_chid; | ||
54 | u32 syncpt_val; | ||
55 | }; | ||
56 | |||
57 | /* contexts associated with a channel */ | ||
58 | struct channel_ctx_gk20a { | ||
59 | struct gr_ctx_desc gr_ctx; | ||
60 | struct pm_ctx_desc pm_ctx; | ||
61 | struct patch_desc patch_ctx; | ||
62 | struct zcull_ctx_desc zcull_ctx; | ||
63 | u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; | ||
64 | u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; | ||
65 | bool global_ctx_buffer_mapped; | ||
66 | }; | ||
67 | |||
68 | struct channel_gk20a_job { | ||
69 | struct mapped_buffer_node **mapped_buffers; | ||
70 | int num_mapped_buffers; | ||
71 | struct gk20a_channel_fence fence; | ||
72 | struct list_head list; | ||
73 | }; | ||
74 | |||
75 | /* this is the priv element of struct nvhost_channel */ | ||
76 | struct channel_gk20a { | ||
77 | struct gk20a *g; | ||
78 | bool in_use; | ||
79 | int hw_chid; | ||
80 | bool bound; | ||
81 | bool first_init; | ||
82 | bool vpr; | ||
83 | pid_t pid; | ||
84 | |||
85 | struct list_head jobs; | ||
86 | struct mutex jobs_lock; | ||
87 | |||
88 | struct vm_gk20a *vm; | ||
89 | |||
90 | struct gpfifo_desc gpfifo; | ||
91 | |||
92 | struct channel_ctx_gk20a ch_ctx; | ||
93 | |||
94 | struct inst_desc inst_block; | ||
95 | struct mem_desc_sub ramfc; | ||
96 | |||
97 | void *userd_cpu_va; | ||
98 | u64 userd_iova; | ||
99 | u64 userd_gpu_va; | ||
100 | |||
101 | s32 num_objects; | ||
102 | u32 obj_class; /* we support only one obj per channel */ | ||
103 | |||
104 | struct priv_cmd_queue priv_cmd_q; | ||
105 | |||
106 | wait_queue_head_t notifier_wq; | ||
107 | wait_queue_head_t semaphore_wq; | ||
108 | wait_queue_head_t submit_wq; | ||
109 | |||
110 | u32 timeout_accumulated_ms; | ||
111 | u32 timeout_gpfifo_get; | ||
112 | |||
113 | bool cmds_pending; | ||
114 | struct gk20a_channel_fence last_submit_fence; | ||
115 | |||
116 | void (*remove_support)(struct channel_gk20a *); | ||
117 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
118 | struct { | ||
119 | void *cyclestate_buffer; | ||
120 | u32 cyclestate_buffer_size; | ||
121 | struct dma_buf *cyclestate_buffer_handler; | ||
122 | struct mutex cyclestate_buffer_mutex; | ||
123 | } cyclestate; | ||
124 | #endif | ||
125 | struct mutex dbg_s_lock; | ||
126 | struct list_head dbg_s_list; | ||
127 | |||
128 | bool has_timedout; | ||
129 | u32 timeout_ms_max; | ||
130 | bool timeout_debug_dump; | ||
131 | |||
132 | struct dma_buf *error_notifier_ref; | ||
133 | struct nvhost_notification *error_notifier; | ||
134 | void *error_notifier_va; | ||
135 | |||
136 | struct gk20a_channel_sync *sync; | ||
137 | }; | ||
138 | |||
139 | static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) | ||
140 | { | ||
141 | return !!ch->vm; | ||
142 | } | ||
143 | int channel_gk20a_commit_va(struct channel_gk20a *c); | ||
144 | int gk20a_init_channel_support(struct gk20a *, u32 chid); | ||
145 | void gk20a_free_channel(struct channel_gk20a *ch, bool finish); | ||
146 | bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, | ||
147 | u32 timeout_delta_ms); | ||
148 | void gk20a_disable_channel(struct channel_gk20a *ch, | ||
149 | bool wait_for_finish, | ||
150 | unsigned long finish_timeout); | ||
151 | void gk20a_disable_channel_no_update(struct channel_gk20a *ch); | ||
152 | int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout); | ||
153 | void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error); | ||
154 | void gk20a_channel_semaphore_wakeup(struct gk20a *g); | ||
155 | int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size, | ||
156 | struct priv_cmd_entry **entry); | ||
157 | |||
158 | int gk20a_channel_suspend(struct gk20a *g); | ||
159 | int gk20a_channel_resume(struct gk20a *g); | ||
160 | |||
161 | /* Channel file operations */ | ||
162 | int gk20a_channel_open(struct inode *inode, struct file *filp); | ||
163 | long gk20a_channel_ioctl(struct file *filp, | ||
164 | unsigned int cmd, | ||
165 | unsigned long arg); | ||
166 | int gk20a_channel_release(struct inode *inode, struct file *filp); | ||
167 | struct channel_gk20a *gk20a_get_channel_from_file(int fd); | ||
168 | void gk20a_channel_update(struct channel_gk20a *c, int nr_completed); | ||
169 | |||
170 | void gk20a_init_fifo(struct gpu_ops *gops); | ||
171 | |||
172 | #endif /*__CHANNEL_GK20A_H__*/ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c new file mode 100644 index 00000000..9f9c3ba7 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -0,0 +1,356 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/channel_sync_gk20a.c | ||
3 | * | ||
4 | * GK20A Channel Synchronization Abstraction | ||
5 | * | ||
6 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #include <linux/gk20a.h> | ||
19 | |||
20 | #include "channel_sync_gk20a.h" | ||
21 | #include "gk20a.h" | ||
22 | |||
23 | #ifdef CONFIG_SYNC | ||
24 | #include "../../../staging/android/sync.h" | ||
25 | #endif | ||
26 | |||
27 | #ifdef CONFIG_TEGRA_GK20A | ||
28 | #include <linux/nvhost.h> | ||
29 | #endif | ||
30 | |||
31 | #ifdef CONFIG_TEGRA_GK20A | ||
32 | |||
33 | struct gk20a_channel_syncpt { | ||
34 | struct gk20a_channel_sync ops; | ||
35 | struct channel_gk20a *c; | ||
36 | struct platform_device *host1x_pdev; | ||
37 | u32 id; | ||
38 | }; | ||
39 | |||
40 | static void add_wait_cmd(u32 *ptr, u32 id, u32 thresh) | ||
41 | { | ||
42 | /* syncpoint_a */ | ||
43 | ptr[0] = 0x2001001C; | ||
44 | /* payload */ | ||
45 | ptr[1] = thresh; | ||
46 | /* syncpoint_b */ | ||
47 | ptr[2] = 0x2001001D; | ||
48 | /* syncpt_id, switch_en, wait */ | ||
49 | ptr[3] = (id << 8) | 0x10; | ||
50 | } | ||
51 | |||
52 | int gk20a_channel_syncpt_wait_cpu(struct gk20a_channel_sync *s, | ||
53 | struct gk20a_channel_fence *fence, | ||
54 | int timeout) | ||
55 | { | ||
56 | struct gk20a_channel_syncpt *sp = | ||
57 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
58 | if (!fence->valid) | ||
59 | return 0; | ||
60 | return nvhost_syncpt_wait_timeout_ext( | ||
61 | sp->host1x_pdev, sp->id, fence->thresh, | ||
62 | timeout, NULL, NULL); | ||
63 | } | ||
64 | |||
65 | bool gk20a_channel_syncpt_is_expired(struct gk20a_channel_sync *s, | ||
66 | struct gk20a_channel_fence *fence) | ||
67 | { | ||
68 | struct gk20a_channel_syncpt *sp = | ||
69 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
70 | if (!fence->valid) | ||
71 | return true; | ||
72 | return nvhost_syncpt_is_expired_ext(sp->host1x_pdev, sp->id, | ||
73 | fence->thresh); | ||
74 | } | ||
75 | |||
76 | int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id, | ||
77 | u32 thresh, struct priv_cmd_entry **entry) | ||
78 | { | ||
79 | struct gk20a_channel_syncpt *sp = | ||
80 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
81 | struct priv_cmd_entry *wait_cmd = NULL; | ||
82 | |||
83 | if (id >= nvhost_syncpt_nb_pts_ext(sp->host1x_pdev)) { | ||
84 | dev_warn(dev_from_gk20a(sp->c->g), | ||
85 | "invalid wait id in gpfifo submit, elided"); | ||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh)) | ||
90 | return 0; | ||
91 | |||
92 | gk20a_channel_alloc_priv_cmdbuf(sp->c, 4, &wait_cmd); | ||
93 | if (wait_cmd == NULL) { | ||
94 | gk20a_err(dev_from_gk20a(sp->c->g), | ||
95 | "not enough priv cmd buffer space"); | ||
96 | return -EAGAIN; | ||
97 | } | ||
98 | |||
99 | add_wait_cmd(&wait_cmd->ptr[0], id, thresh); | ||
100 | |||
101 | *entry = wait_cmd; | ||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, | ||
106 | struct priv_cmd_entry **entry) | ||
107 | { | ||
108 | #ifdef CONFIG_SYNC | ||
109 | int i; | ||
110 | int num_wait_cmds; | ||
111 | struct sync_pt *pt; | ||
112 | struct sync_fence *sync_fence; | ||
113 | struct priv_cmd_entry *wait_cmd = NULL; | ||
114 | struct gk20a_channel_syncpt *sp = | ||
115 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
116 | struct channel_gk20a *c = sp->c; | ||
117 | |||
118 | sync_fence = nvhost_sync_fdget(fd); | ||
119 | if (!sync_fence) | ||
120 | return -EINVAL; | ||
121 | |||
122 | num_wait_cmds = nvhost_sync_num_pts(sync_fence); | ||
123 | gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd); | ||
124 | if (wait_cmd == NULL) { | ||
125 | gk20a_err(dev_from_gk20a(c->g), | ||
126 | "not enough priv cmd buffer space"); | ||
127 | sync_fence_put(sync_fence); | ||
128 | return -EAGAIN; | ||
129 | } | ||
130 | |||
131 | i = 0; | ||
132 | list_for_each_entry(pt, &sync_fence->pt_list_head, pt_list) { | ||
133 | u32 wait_id = nvhost_sync_pt_id(pt); | ||
134 | u32 wait_value = nvhost_sync_pt_thresh(pt); | ||
135 | |||
136 | if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, | ||
137 | wait_id, wait_value)) { | ||
138 | wait_cmd->ptr[i * 4 + 0] = 0; | ||
139 | wait_cmd->ptr[i * 4 + 1] = 0; | ||
140 | wait_cmd->ptr[i * 4 + 2] = 0; | ||
141 | wait_cmd->ptr[i * 4 + 3] = 0; | ||
142 | } else | ||
143 | add_wait_cmd(&wait_cmd->ptr[i * 4], wait_id, | ||
144 | wait_value); | ||
145 | i++; | ||
146 | } | ||
147 | WARN_ON(i != num_wait_cmds); | ||
148 | sync_fence_put(sync_fence); | ||
149 | |||
150 | *entry = wait_cmd; | ||
151 | return 0; | ||
152 | #else | ||
153 | return -ENODEV; | ||
154 | #endif | ||
155 | } | ||
156 | |||
157 | static void gk20a_channel_syncpt_update(void *priv, int nr_completed) | ||
158 | { | ||
159 | struct channel_gk20a *ch20a = priv; | ||
160 | gk20a_channel_update(ch20a, nr_completed); | ||
161 | } | ||
162 | |||
163 | static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | ||
164 | bool gfx_class, bool wfi_cmd, | ||
165 | struct priv_cmd_entry **entry, | ||
166 | struct gk20a_channel_fence *fence) | ||
167 | { | ||
168 | u32 thresh; | ||
169 | int incr_cmd_size; | ||
170 | int j = 0; | ||
171 | int err; | ||
172 | struct priv_cmd_entry *incr_cmd = NULL; | ||
173 | struct gk20a_channel_syncpt *sp = | ||
174 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
175 | struct channel_gk20a *c = sp->c; | ||
176 | |||
177 | /* nvhost action_gpfifo_submit_complete releases this ref. */ | ||
178 | err = gk20a_channel_busy(c->g->dev); | ||
179 | if (err) | ||
180 | return err; | ||
181 | |||
182 | incr_cmd_size = 4; | ||
183 | if (wfi_cmd) | ||
184 | incr_cmd_size += 2; | ||
185 | |||
186 | gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd); | ||
187 | if (incr_cmd == NULL) { | ||
188 | gk20a_channel_idle(c->g->dev); | ||
189 | gk20a_err(dev_from_gk20a(c->g), | ||
190 | "not enough priv cmd buffer space"); | ||
191 | return -EAGAIN; | ||
192 | } | ||
193 | |||
194 | if (gfx_class) { | ||
195 | WARN_ON(wfi_cmd); /* No sense to use gfx class + wfi. */ | ||
196 | /* setobject KEPLER_C */ | ||
197 | incr_cmd->ptr[j++] = 0x20010000; | ||
198 | incr_cmd->ptr[j++] = KEPLER_C; | ||
199 | /* syncpt incr */ | ||
200 | incr_cmd->ptr[j++] = 0x200100B2; | ||
201 | incr_cmd->ptr[j++] = sp->id | | ||
202 | (0x1 << 20) | (0x1 << 16); | ||
203 | } else { | ||
204 | if (wfi_cmd) { | ||
205 | /* wfi */ | ||
206 | incr_cmd->ptr[j++] = 0x2001001E; | ||
207 | /* handle, ignored */ | ||
208 | incr_cmd->ptr[j++] = 0x00000000; | ||
209 | } | ||
210 | /* syncpoint_a */ | ||
211 | incr_cmd->ptr[j++] = 0x2001001C; | ||
212 | /* payload, ignored */ | ||
213 | incr_cmd->ptr[j++] = 0; | ||
214 | /* syncpoint_b */ | ||
215 | incr_cmd->ptr[j++] = 0x2001001D; | ||
216 | /* syncpt_id, incr */ | ||
217 | incr_cmd->ptr[j++] = (sp->id << 8) | 0x1; | ||
218 | } | ||
219 | WARN_ON(j != incr_cmd_size); | ||
220 | |||
221 | thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 1); | ||
222 | |||
223 | err = nvhost_intr_register_notifier(sp->host1x_pdev, sp->id, thresh, | ||
224 | gk20a_channel_syncpt_update, c); | ||
225 | |||
226 | /* Adding interrupt action should never fail. A proper error handling | ||
227 | * here would require us to decrement the syncpt max back to its | ||
228 | * original value. */ | ||
229 | if (WARN(err, "failed to set submit complete interrupt")) { | ||
230 | gk20a_channel_idle(c->g->dev); | ||
231 | err = 0; /* Ignore this error. */ | ||
232 | } | ||
233 | |||
234 | fence->thresh = thresh; | ||
235 | fence->valid = true; | ||
236 | fence->wfi = wfi_cmd; | ||
237 | *entry = incr_cmd; | ||
238 | return 0; | ||
239 | } | ||
240 | |||
241 | int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s, | ||
242 | struct priv_cmd_entry **entry, | ||
243 | struct gk20a_channel_fence *fence) | ||
244 | { | ||
245 | return __gk20a_channel_syncpt_incr(s, | ||
246 | false /* use host class */, | ||
247 | true /* wfi */, | ||
248 | entry, fence); | ||
249 | } | ||
250 | |||
251 | int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | ||
252 | struct priv_cmd_entry **entry, | ||
253 | struct gk20a_channel_fence *fence) | ||
254 | { | ||
255 | struct gk20a_channel_syncpt *sp = | ||
256 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
257 | /* Don't put wfi cmd to this one since we're not returning | ||
258 | * a fence to user space. */ | ||
259 | return __gk20a_channel_syncpt_incr(s, | ||
260 | sp->c->obj_class == KEPLER_C /* may use gfx class */, | ||
261 | false /* no wfi */, | ||
262 | entry, fence); | ||
263 | } | ||
264 | |||
265 | int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s, | ||
266 | struct priv_cmd_entry **entry, | ||
267 | struct gk20a_channel_fence *fence, | ||
268 | u32 *id, u32 *thresh) | ||
269 | { | ||
270 | struct gk20a_channel_syncpt *sp = | ||
271 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
272 | /* Need to do 'host incr + wfi' or 'gfx incr' since we return the fence | ||
273 | * to user space. */ | ||
274 | int err = __gk20a_channel_syncpt_incr(s, | ||
275 | sp->c->obj_class == KEPLER_C /* use gfx class? */, | ||
276 | sp->c->obj_class != KEPLER_C /* wfi if host class */, | ||
277 | entry, fence); | ||
278 | if (err) | ||
279 | return err; | ||
280 | *id = sp->id; | ||
281 | *thresh = fence->thresh; | ||
282 | return 0; | ||
283 | } | ||
284 | |||
285 | int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s, | ||
286 | struct priv_cmd_entry **entry, | ||
287 | struct gk20a_channel_fence *fence, | ||
288 | int *fd) | ||
289 | { | ||
290 | #ifdef CONFIG_SYNC | ||
291 | int err; | ||
292 | struct nvhost_ctrl_sync_fence_info pt; | ||
293 | struct gk20a_channel_syncpt *sp = | ||
294 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
295 | err = gk20a_channel_syncpt_incr_user_syncpt(s, entry, fence, | ||
296 | &pt.id, &pt.thresh); | ||
297 | if (err) | ||
298 | return err; | ||
299 | return nvhost_sync_create_fence_fd(sp->host1x_pdev, &pt, 1, | ||
300 | "fence", fd); | ||
301 | #else | ||
302 | return -ENODEV; | ||
303 | #endif | ||
304 | } | ||
305 | |||
306 | void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s) | ||
307 | { | ||
308 | struct gk20a_channel_syncpt *sp = | ||
309 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
310 | nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id); | ||
311 | } | ||
312 | |||
313 | static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s) | ||
314 | { | ||
315 | struct gk20a_channel_syncpt *sp = | ||
316 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
317 | nvhost_free_syncpt(sp->id); | ||
318 | kfree(sp); | ||
319 | } | ||
320 | |||
321 | static struct gk20a_channel_sync * | ||
322 | gk20a_channel_syncpt_create(struct channel_gk20a *c) | ||
323 | { | ||
324 | struct gk20a_channel_syncpt *sp; | ||
325 | |||
326 | sp = kzalloc(sizeof(*sp), GFP_KERNEL); | ||
327 | if (!sp) | ||
328 | return NULL; | ||
329 | |||
330 | sp->c = c; | ||
331 | sp->host1x_pdev = to_platform_device(c->g->dev->dev.parent); | ||
332 | sp->id = nvhost_get_syncpt_host_managed(sp->host1x_pdev, c->hw_chid); | ||
333 | |||
334 | sp->ops.wait_cpu = gk20a_channel_syncpt_wait_cpu; | ||
335 | sp->ops.is_expired = gk20a_channel_syncpt_is_expired; | ||
336 | sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt; | ||
337 | sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd; | ||
338 | sp->ops.incr = gk20a_channel_syncpt_incr; | ||
339 | sp->ops.incr_wfi = gk20a_channel_syncpt_incr_wfi; | ||
340 | sp->ops.incr_user_syncpt = gk20a_channel_syncpt_incr_user_syncpt; | ||
341 | sp->ops.incr_user_fd = gk20a_channel_syncpt_incr_user_fd; | ||
342 | sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max; | ||
343 | sp->ops.destroy = gk20a_channel_syncpt_destroy; | ||
344 | return &sp->ops; | ||
345 | } | ||
346 | #endif /* CONFIG_TEGRA_GK20A */ | ||
347 | |||
348 | struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c) | ||
349 | { | ||
350 | #ifdef CONFIG_TEGRA_GK20A | ||
351 | if (gk20a_platform_has_syncpoints(c->g->dev)) | ||
352 | return gk20a_channel_syncpt_create(c); | ||
353 | #endif | ||
354 | WARN_ON(1); | ||
355 | return NULL; | ||
356 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h new file mode 100644 index 00000000..69feb89f --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | |||
@@ -0,0 +1,102 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/channel_sync_gk20a.h | ||
3 | * | ||
4 | * GK20A Channel Synchronization Abstraction | ||
5 | * | ||
6 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #ifndef _GK20A_CHANNEL_SYNC_H_ | ||
19 | #define _GK20A_CHANNEL_SYNC_H_ | ||
20 | |||
21 | #include <linux/types.h> | ||
22 | |||
23 | struct gk20a_channel_sync; | ||
24 | struct priv_cmd_entry; | ||
25 | struct channel_gk20a; | ||
26 | |||
27 | struct gk20a_channel_fence { | ||
28 | bool valid; | ||
29 | bool wfi; /* was issued with preceding wfi */ | ||
30 | u32 thresh; /* either semaphore or syncpoint value */ | ||
31 | }; | ||
32 | |||
33 | struct gk20a_channel_sync { | ||
34 | /* CPU wait for a fence returned by incr_syncpt() or incr_fd(). */ | ||
35 | int (*wait_cpu)(struct gk20a_channel_sync *s, | ||
36 | struct gk20a_channel_fence *fence, | ||
37 | int timeout); | ||
38 | |||
39 | /* Test whether a fence returned by incr_syncpt() or incr_fd() is | ||
40 | * expired. */ | ||
41 | bool (*is_expired)(struct gk20a_channel_sync *s, | ||
42 | struct gk20a_channel_fence *fence); | ||
43 | |||
44 | /* Generate a gpu wait cmdbuf from syncpoint. */ | ||
45 | int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh, | ||
46 | struct priv_cmd_entry **entry); | ||
47 | |||
48 | /* Generate a gpu wait cmdbuf from sync fd. */ | ||
49 | int (*wait_fd)(struct gk20a_channel_sync *s, int fd, | ||
50 | struct priv_cmd_entry **entry); | ||
51 | |||
52 | /* Increment syncpoint/semaphore. | ||
53 | * Returns | ||
54 | * - a gpu cmdbuf that performs the increment when executed, | ||
55 | * - a fence that can be passed to wait_cpu() and is_expired(). | ||
56 | */ | ||
57 | int (*incr)(struct gk20a_channel_sync *s, | ||
58 | struct priv_cmd_entry **entry, | ||
59 | struct gk20a_channel_fence *fence); | ||
60 | |||
61 | /* Increment syncpoint/semaphore, preceded by a wfi. | ||
62 | * Returns | ||
63 | * - a gpu cmdbuf that performs the increment when executed, | ||
64 | * - a fence that can be passed to wait_cpu() and is_expired(). | ||
65 | */ | ||
66 | int (*incr_wfi)(struct gk20a_channel_sync *s, | ||
67 | struct priv_cmd_entry **entry, | ||
68 | struct gk20a_channel_fence *fence); | ||
69 | |||
70 | /* Increment syncpoint, so that the returned fence represents | ||
71 | * work completion (may need wfi) and can be returned to user space. | ||
72 | * Returns | ||
73 | * - a gpu cmdbuf that performs the increment when executed, | ||
74 | * - a fence that can be passed to wait_cpu() and is_expired(), | ||
75 | * - a syncpoint id/value pair that can be returned to user space. | ||
76 | */ | ||
77 | int (*incr_user_syncpt)(struct gk20a_channel_sync *s, | ||
78 | struct priv_cmd_entry **entry, | ||
79 | struct gk20a_channel_fence *fence, | ||
80 | u32 *id, u32 *thresh); | ||
81 | |||
82 | /* Increment syncpoint/semaphore, so that the returned fence represents | ||
83 | * work completion (may need wfi) and can be returned to user space. | ||
84 | * Returns | ||
85 | * - a gpu cmdbuf that performs the increment when executed, | ||
86 | * - a fence that can be passed to wait_cpu() and is_expired(), | ||
87 | * - a sync fd that can be returned to user space. | ||
88 | */ | ||
89 | int (*incr_user_fd)(struct gk20a_channel_sync *s, | ||
90 | struct priv_cmd_entry **entry, | ||
91 | struct gk20a_channel_fence *fence, | ||
92 | int *fd); | ||
93 | |||
94 | /* Reset the channel syncpoint/semaphore. */ | ||
95 | void (*set_min_eq_max)(struct gk20a_channel_sync *s); | ||
96 | |||
97 | /* Free the resources allocated by gk20a_channel_sync_create. */ | ||
98 | void (*destroy)(struct gk20a_channel_sync *s); | ||
99 | }; | ||
100 | |||
101 | struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); | ||
102 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/clk_gk20a.c b/drivers/gpu/nvgpu/gk20a/clk_gk20a.c new file mode 100644 index 00000000..151a332b --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/clk_gk20a.c | |||
@@ -0,0 +1,865 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/clk_gk20a.c | ||
3 | * | ||
4 | * GK20A Clocks | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include <linux/clk.h> | ||
22 | #include <linux/delay.h> /* for mdelay */ | ||
23 | #include <linux/module.h> | ||
24 | #include <linux/debugfs.h> | ||
25 | #include <linux/clk/tegra.h> | ||
26 | #include <mach/thermal.h> | ||
27 | |||
28 | #include "gk20a.h" | ||
29 | #include "hw_trim_gk20a.h" | ||
30 | #include "hw_timer_gk20a.h" | ||
31 | |||
32 | #define gk20a_dbg_clk(fmt, arg...) \ | ||
33 | gk20a_dbg(gpu_dbg_clk, fmt, ##arg) | ||
34 | |||
35 | /* from vbios PLL info table */ | ||
36 | struct pll_parms gpc_pll_params = { | ||
37 | 144, 2064, /* freq */ | ||
38 | 1000, 2064, /* vco */ | ||
39 | 12, 38, /* u */ | ||
40 | 1, 255, /* M */ | ||
41 | 8, 255, /* N */ | ||
42 | 1, 32, /* PL */ | ||
43 | }; | ||
44 | |||
45 | static int num_gpu_cooling_freq; | ||
46 | static struct gpufreq_table_data *gpu_cooling_freq; | ||
47 | |||
48 | struct gpufreq_table_data *tegra_gpufreq_table_get(void) | ||
49 | { | ||
50 | return gpu_cooling_freq; | ||
51 | } | ||
52 | |||
53 | unsigned int tegra_gpufreq_table_size_get(void) | ||
54 | { | ||
55 | return num_gpu_cooling_freq; | ||
56 | } | ||
57 | |||
58 | static u8 pl_to_div[] = { | ||
59 | /* PL: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 */ | ||
60 | /* p: */ 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 12, 16, 20, 24, 32 }; | ||
61 | |||
62 | /* Calculate and update M/N/PL as well as pll->freq | ||
63 | ref_clk_f = clk_in_f / src_div = clk_in_f; (src_div = 1 on gk20a) | ||
64 | u_f = ref_clk_f / M; | ||
65 | PLL output = vco_f = u_f * N = ref_clk_f * N / M; | ||
66 | gpc2clk = target clock frequency = vco_f / PL; | ||
67 | gpcclk = gpc2clk / 2; */ | ||
68 | static int clk_config_pll(struct clk_gk20a *clk, struct pll *pll, | ||
69 | struct pll_parms *pll_params, u32 *target_freq, bool best_fit) | ||
70 | { | ||
71 | u32 min_vco_f, max_vco_f; | ||
72 | u32 best_M, best_N; | ||
73 | u32 low_PL, high_PL, best_PL; | ||
74 | u32 m, n, n2; | ||
75 | u32 target_vco_f, vco_f; | ||
76 | u32 ref_clk_f, target_clk_f, u_f; | ||
77 | u32 delta, lwv, best_delta = ~0; | ||
78 | int pl; | ||
79 | |||
80 | BUG_ON(target_freq == NULL); | ||
81 | |||
82 | gk20a_dbg_fn("request target freq %d MHz", *target_freq); | ||
83 | |||
84 | ref_clk_f = pll->clk_in; | ||
85 | target_clk_f = *target_freq; | ||
86 | max_vco_f = pll_params->max_vco; | ||
87 | min_vco_f = pll_params->min_vco; | ||
88 | best_M = pll_params->max_M; | ||
89 | best_N = pll_params->min_N; | ||
90 | best_PL = pll_params->min_PL; | ||
91 | |||
92 | target_vco_f = target_clk_f + target_clk_f / 50; | ||
93 | if (max_vco_f < target_vco_f) | ||
94 | max_vco_f = target_vco_f; | ||
95 | |||
96 | high_PL = (max_vco_f + target_vco_f - 1) / target_vco_f; | ||
97 | high_PL = min(high_PL, pll_params->max_PL); | ||
98 | high_PL = max(high_PL, pll_params->min_PL); | ||
99 | |||
100 | low_PL = min_vco_f / target_vco_f; | ||
101 | low_PL = min(low_PL, pll_params->max_PL); | ||
102 | low_PL = max(low_PL, pll_params->min_PL); | ||
103 | |||
104 | /* Find Indices of high_PL and low_PL */ | ||
105 | for (pl = 0; pl < 14; pl++) { | ||
106 | if (pl_to_div[pl] >= low_PL) { | ||
107 | low_PL = pl; | ||
108 | break; | ||
109 | } | ||
110 | } | ||
111 | for (pl = 0; pl < 14; pl++) { | ||
112 | if (pl_to_div[pl] >= high_PL) { | ||
113 | high_PL = pl; | ||
114 | break; | ||
115 | } | ||
116 | } | ||
117 | gk20a_dbg_info("low_PL %d(div%d), high_PL %d(div%d)", | ||
118 | low_PL, pl_to_div[low_PL], high_PL, pl_to_div[high_PL]); | ||
119 | |||
120 | for (pl = low_PL; pl <= high_PL; pl++) { | ||
121 | target_vco_f = target_clk_f * pl_to_div[pl]; | ||
122 | |||
123 | for (m = pll_params->min_M; m <= pll_params->max_M; m++) { | ||
124 | u_f = ref_clk_f / m; | ||
125 | |||
126 | if (u_f < pll_params->min_u) | ||
127 | break; | ||
128 | if (u_f > pll_params->max_u) | ||
129 | continue; | ||
130 | |||
131 | n = (target_vco_f * m) / ref_clk_f; | ||
132 | n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f; | ||
133 | |||
134 | if (n > pll_params->max_N) | ||
135 | break; | ||
136 | |||
137 | for (; n <= n2; n++) { | ||
138 | if (n < pll_params->min_N) | ||
139 | continue; | ||
140 | if (n > pll_params->max_N) | ||
141 | break; | ||
142 | |||
143 | vco_f = ref_clk_f * n / m; | ||
144 | |||
145 | if (vco_f >= min_vco_f && vco_f <= max_vco_f) { | ||
146 | lwv = (vco_f + (pl_to_div[pl] / 2)) | ||
147 | / pl_to_div[pl]; | ||
148 | delta = abs(lwv - target_clk_f); | ||
149 | |||
150 | if (delta < best_delta) { | ||
151 | best_delta = delta; | ||
152 | best_M = m; | ||
153 | best_N = n; | ||
154 | best_PL = pl; | ||
155 | |||
156 | if (best_delta == 0 || | ||
157 | /* 0.45% for non best fit */ | ||
158 | (!best_fit && (vco_f / best_delta > 218))) { | ||
159 | goto found_match; | ||
160 | } | ||
161 | |||
162 | gk20a_dbg_info("delta %d @ M %d, N %d, PL %d", | ||
163 | delta, m, n, pl); | ||
164 | } | ||
165 | } | ||
166 | } | ||
167 | } | ||
168 | } | ||
169 | |||
170 | found_match: | ||
171 | BUG_ON(best_delta == ~0); | ||
172 | |||
173 | if (best_fit && best_delta != 0) | ||
174 | gk20a_dbg_clk("no best match for target @ %dMHz on gpc_pll", | ||
175 | target_clk_f); | ||
176 | |||
177 | pll->M = best_M; | ||
178 | pll->N = best_N; | ||
179 | pll->PL = best_PL; | ||
180 | |||
181 | /* save current frequency */ | ||
182 | pll->freq = ref_clk_f * pll->N / (pll->M * pl_to_div[pll->PL]); | ||
183 | |||
184 | *target_freq = pll->freq; | ||
185 | |||
186 | gk20a_dbg_clk("actual target freq %d MHz, M %d, N %d, PL %d(div%d)", | ||
187 | *target_freq, pll->M, pll->N, pll->PL, pl_to_div[pll->PL]); | ||
188 | |||
189 | gk20a_dbg_fn("done"); | ||
190 | |||
191 | return 0; | ||
192 | } | ||
193 | |||
194 | static int clk_slide_gpc_pll(struct gk20a *g, u32 n) | ||
195 | { | ||
196 | u32 data, coeff; | ||
197 | u32 nold; | ||
198 | int ramp_timeout = 500; | ||
199 | |||
200 | /* get old coefficients */ | ||
201 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
202 | nold = trim_sys_gpcpll_coeff_ndiv_v(coeff); | ||
203 | |||
204 | /* do nothing if NDIV is same */ | ||
205 | if (n == nold) | ||
206 | return 0; | ||
207 | |||
208 | /* setup */ | ||
209 | data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r()); | ||
210 | data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(), | ||
211 | trim_sys_gpcpll_cfg2_pll_stepa_f(0x2b)); | ||
212 | gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data); | ||
213 | data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r()); | ||
214 | data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(), | ||
215 | trim_sys_gpcpll_cfg3_pll_stepb_f(0xb)); | ||
216 | gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data); | ||
217 | |||
218 | /* pll slowdown mode */ | ||
219 | data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); | ||
220 | data = set_field(data, | ||
221 | trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(), | ||
222 | trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f()); | ||
223 | gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); | ||
224 | |||
225 | /* new ndiv ready for ramp */ | ||
226 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
227 | coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(), | ||
228 | trim_sys_gpcpll_coeff_ndiv_f(n)); | ||
229 | udelay(1); | ||
230 | gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); | ||
231 | |||
232 | /* dynamic ramp to new ndiv */ | ||
233 | data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); | ||
234 | data = set_field(data, | ||
235 | trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(), | ||
236 | trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f()); | ||
237 | udelay(1); | ||
238 | gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); | ||
239 | |||
240 | do { | ||
241 | udelay(1); | ||
242 | ramp_timeout--; | ||
243 | data = gk20a_readl( | ||
244 | g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r()); | ||
245 | if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data)) | ||
246 | break; | ||
247 | } while (ramp_timeout > 0); | ||
248 | |||
249 | /* exit slowdown mode */ | ||
250 | data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); | ||
251 | data = set_field(data, | ||
252 | trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(), | ||
253 | trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f()); | ||
254 | data = set_field(data, | ||
255 | trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(), | ||
256 | trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f()); | ||
257 | gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); | ||
258 | gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); | ||
259 | |||
260 | if (ramp_timeout <= 0) { | ||
261 | gk20a_err(dev_from_gk20a(g), "gpcpll dynamic ramp timeout"); | ||
262 | return -ETIMEDOUT; | ||
263 | } | ||
264 | return 0; | ||
265 | } | ||
266 | |||
267 | static int clk_program_gpc_pll(struct gk20a *g, struct clk_gk20a *clk, | ||
268 | int allow_slide) | ||
269 | { | ||
270 | u32 data, cfg, coeff, timeout; | ||
271 | u32 m, n, pl; | ||
272 | u32 nlo; | ||
273 | |||
274 | gk20a_dbg_fn(""); | ||
275 | |||
276 | if (!tegra_platform_is_silicon()) | ||
277 | return 0; | ||
278 | |||
279 | /* get old coefficients */ | ||
280 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
281 | m = trim_sys_gpcpll_coeff_mdiv_v(coeff); | ||
282 | n = trim_sys_gpcpll_coeff_ndiv_v(coeff); | ||
283 | pl = trim_sys_gpcpll_coeff_pldiv_v(coeff); | ||
284 | |||
285 | /* do NDIV slide if there is no change in M and PL */ | ||
286 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
287 | if (allow_slide && clk->gpc_pll.M == m && clk->gpc_pll.PL == pl | ||
288 | && trim_sys_gpcpll_cfg_enable_v(cfg)) { | ||
289 | return clk_slide_gpc_pll(g, clk->gpc_pll.N); | ||
290 | } | ||
291 | |||
292 | /* slide down to NDIV_LO */ | ||
293 | nlo = DIV_ROUND_UP(m * gpc_pll_params.min_vco, clk->gpc_pll.clk_in); | ||
294 | if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) { | ||
295 | int ret = clk_slide_gpc_pll(g, nlo); | ||
296 | if (ret) | ||
297 | return ret; | ||
298 | } | ||
299 | |||
300 | /* split FO-to-bypass jump in halfs by setting out divider 1:2 */ | ||
301 | data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); | ||
302 | data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(), | ||
303 | trim_sys_gpc2clk_out_vcodiv_f(2)); | ||
304 | gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); | ||
305 | |||
306 | /* put PLL in bypass before programming it */ | ||
307 | data = gk20a_readl(g, trim_sys_sel_vco_r()); | ||
308 | data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(), | ||
309 | trim_sys_sel_vco_gpc2clk_out_bypass_f()); | ||
310 | udelay(2); | ||
311 | gk20a_writel(g, trim_sys_sel_vco_r(), data); | ||
312 | |||
313 | /* get out from IDDQ */ | ||
314 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
315 | if (trim_sys_gpcpll_cfg_iddq_v(cfg)) { | ||
316 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_iddq_m(), | ||
317 | trim_sys_gpcpll_cfg_iddq_power_on_v()); | ||
318 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
319 | gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
320 | udelay(2); | ||
321 | } | ||
322 | |||
323 | /* disable PLL before changing coefficients */ | ||
324 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
325 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), | ||
326 | trim_sys_gpcpll_cfg_enable_no_f()); | ||
327 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
328 | gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
329 | |||
330 | /* change coefficients */ | ||
331 | nlo = DIV_ROUND_UP(clk->gpc_pll.M * gpc_pll_params.min_vco, | ||
332 | clk->gpc_pll.clk_in); | ||
333 | coeff = trim_sys_gpcpll_coeff_mdiv_f(clk->gpc_pll.M) | | ||
334 | trim_sys_gpcpll_coeff_ndiv_f(allow_slide ? | ||
335 | nlo : clk->gpc_pll.N) | | ||
336 | trim_sys_gpcpll_coeff_pldiv_f(clk->gpc_pll.PL); | ||
337 | gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); | ||
338 | |||
339 | /* enable PLL after changing coefficients */ | ||
340 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
341 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), | ||
342 | trim_sys_gpcpll_cfg_enable_yes_f()); | ||
343 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
344 | |||
345 | /* lock pll */ | ||
346 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
347 | if (cfg & trim_sys_gpcpll_cfg_enb_lckdet_power_off_f()){ | ||
348 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_enb_lckdet_m(), | ||
349 | trim_sys_gpcpll_cfg_enb_lckdet_power_on_f()); | ||
350 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
351 | } | ||
352 | |||
353 | /* wait pll lock */ | ||
354 | timeout = clk->pll_delay / 2 + 1; | ||
355 | do { | ||
356 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
357 | if (cfg & trim_sys_gpcpll_cfg_pll_lock_true_f()) | ||
358 | goto pll_locked; | ||
359 | udelay(2); | ||
360 | } while (--timeout > 0); | ||
361 | |||
362 | /* PLL is messed up. What can we do here? */ | ||
363 | BUG(); | ||
364 | return -EBUSY; | ||
365 | |||
366 | pll_locked: | ||
367 | /* put PLL back on vco */ | ||
368 | data = gk20a_readl(g, trim_sys_sel_vco_r()); | ||
369 | data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(), | ||
370 | trim_sys_sel_vco_gpc2clk_out_vco_f()); | ||
371 | gk20a_writel(g, trim_sys_sel_vco_r(), data); | ||
372 | clk->gpc_pll.enabled = true; | ||
373 | |||
374 | /* restore out divider 1:1 */ | ||
375 | data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); | ||
376 | data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(), | ||
377 | trim_sys_gpc2clk_out_vcodiv_by1_f()); | ||
378 | udelay(2); | ||
379 | gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); | ||
380 | |||
381 | /* slide up to target NDIV */ | ||
382 | return clk_slide_gpc_pll(g, clk->gpc_pll.N); | ||
383 | } | ||
384 | |||
385 | static int clk_disable_gpcpll(struct gk20a *g, int allow_slide) | ||
386 | { | ||
387 | u32 cfg, coeff, m, nlo; | ||
388 | struct clk_gk20a *clk = &g->clk; | ||
389 | |||
390 | /* slide to VCO min */ | ||
391 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
392 | if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) { | ||
393 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
394 | m = trim_sys_gpcpll_coeff_mdiv_v(coeff); | ||
395 | nlo = DIV_ROUND_UP(m * gpc_pll_params.min_vco, | ||
396 | clk->gpc_pll.clk_in); | ||
397 | clk_slide_gpc_pll(g, nlo); | ||
398 | } | ||
399 | |||
400 | /* put PLL in bypass before disabling it */ | ||
401 | cfg = gk20a_readl(g, trim_sys_sel_vco_r()); | ||
402 | cfg = set_field(cfg, trim_sys_sel_vco_gpc2clk_out_m(), | ||
403 | trim_sys_sel_vco_gpc2clk_out_bypass_f()); | ||
404 | gk20a_writel(g, trim_sys_sel_vco_r(), cfg); | ||
405 | |||
406 | /* disable PLL */ | ||
407 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
408 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), | ||
409 | trim_sys_gpcpll_cfg_enable_no_f()); | ||
410 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
411 | gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
412 | |||
413 | clk->gpc_pll.enabled = false; | ||
414 | return 0; | ||
415 | } | ||
416 | |||
417 | static int gk20a_init_clk_reset_enable_hw(struct gk20a *g) | ||
418 | { | ||
419 | gk20a_dbg_fn(""); | ||
420 | return 0; | ||
421 | } | ||
422 | |||
423 | struct clk *gk20a_clk_get(struct gk20a *g) | ||
424 | { | ||
425 | if (!g->clk.tegra_clk) { | ||
426 | struct clk *clk; | ||
427 | |||
428 | clk = clk_get_sys("tegra_gk20a", "gpu"); | ||
429 | if (IS_ERR(clk)) { | ||
430 | gk20a_err(dev_from_gk20a(g), | ||
431 | "fail to get tegra gpu clk tegra_gk20a/gpu"); | ||
432 | return NULL; | ||
433 | } | ||
434 | g->clk.tegra_clk = clk; | ||
435 | } | ||
436 | |||
437 | return g->clk.tegra_clk; | ||
438 | } | ||
439 | |||
440 | static int gk20a_init_clk_setup_sw(struct gk20a *g) | ||
441 | { | ||
442 | struct clk_gk20a *clk = &g->clk; | ||
443 | static int initialized; | ||
444 | unsigned long *freqs; | ||
445 | int err, num_freqs; | ||
446 | struct clk *ref; | ||
447 | unsigned long ref_rate; | ||
448 | |||
449 | gk20a_dbg_fn(""); | ||
450 | |||
451 | if (clk->sw_ready) { | ||
452 | gk20a_dbg_fn("skip init"); | ||
453 | return 0; | ||
454 | } | ||
455 | |||
456 | if (!gk20a_clk_get(g)) | ||
457 | return -EINVAL; | ||
458 | |||
459 | ref = clk_get_parent(clk_get_parent(clk->tegra_clk)); | ||
460 | if (IS_ERR(ref)) { | ||
461 | gk20a_err(dev_from_gk20a(g), | ||
462 | "failed to get GPCPLL reference clock"); | ||
463 | return -EINVAL; | ||
464 | } | ||
465 | ref_rate = clk_get_rate(ref); | ||
466 | |||
467 | clk->pll_delay = 300; /* usec */ | ||
468 | |||
469 | clk->gpc_pll.id = GK20A_GPC_PLL; | ||
470 | clk->gpc_pll.clk_in = ref_rate / 1000000; /* MHz */ | ||
471 | |||
472 | /* Decide initial frequency */ | ||
473 | if (!initialized) { | ||
474 | initialized = 1; | ||
475 | clk->gpc_pll.M = 1; | ||
476 | clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco, | ||
477 | clk->gpc_pll.clk_in); | ||
478 | clk->gpc_pll.PL = 1; | ||
479 | clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N; | ||
480 | clk->gpc_pll.freq /= pl_to_div[clk->gpc_pll.PL]; | ||
481 | } | ||
482 | |||
483 | err = tegra_dvfs_get_freqs(clk_get_parent(clk->tegra_clk), | ||
484 | &freqs, &num_freqs); | ||
485 | if (!err) { | ||
486 | int i, j; | ||
487 | |||
488 | /* init j for inverse traversal of frequencies */ | ||
489 | j = num_freqs - 1; | ||
490 | |||
491 | gpu_cooling_freq = kzalloc( | ||
492 | (1 + num_freqs) * sizeof(*gpu_cooling_freq), | ||
493 | GFP_KERNEL); | ||
494 | |||
495 | /* store frequencies in inverse order */ | ||
496 | for (i = 0; i < num_freqs; ++i, --j) { | ||
497 | gpu_cooling_freq[i].index = i; | ||
498 | gpu_cooling_freq[i].frequency = freqs[j]; | ||
499 | } | ||
500 | |||
501 | /* add 'end of table' marker */ | ||
502 | gpu_cooling_freq[i].index = i; | ||
503 | gpu_cooling_freq[i].frequency = GPUFREQ_TABLE_END; | ||
504 | |||
505 | /* store number of frequencies */ | ||
506 | num_gpu_cooling_freq = num_freqs + 1; | ||
507 | } | ||
508 | |||
509 | mutex_init(&clk->clk_mutex); | ||
510 | |||
511 | clk->sw_ready = true; | ||
512 | |||
513 | gk20a_dbg_fn("done"); | ||
514 | return 0; | ||
515 | } | ||
516 | |||
517 | static int gk20a_init_clk_setup_hw(struct gk20a *g) | ||
518 | { | ||
519 | u32 data; | ||
520 | |||
521 | gk20a_dbg_fn(""); | ||
522 | |||
523 | data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); | ||
524 | data = set_field(data, | ||
525 | trim_sys_gpc2clk_out_sdiv14_m() | | ||
526 | trim_sys_gpc2clk_out_vcodiv_m() | | ||
527 | trim_sys_gpc2clk_out_bypdiv_m(), | ||
528 | trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f() | | ||
529 | trim_sys_gpc2clk_out_vcodiv_by1_f() | | ||
530 | trim_sys_gpc2clk_out_bypdiv_f(0)); | ||
531 | gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); | ||
532 | |||
533 | return 0; | ||
534 | } | ||
535 | |||
536 | static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq) | ||
537 | { | ||
538 | struct clk_gk20a *clk = &g->clk; | ||
539 | |||
540 | if (freq > gpc_pll_params.max_freq) | ||
541 | freq = gpc_pll_params.max_freq; | ||
542 | else if (freq < gpc_pll_params.min_freq) | ||
543 | freq = gpc_pll_params.min_freq; | ||
544 | |||
545 | if (freq != old_freq) { | ||
546 | /* gpc_pll.freq is changed to new value here */ | ||
547 | if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params, | ||
548 | &freq, true)) { | ||
549 | gk20a_err(dev_from_gk20a(g), | ||
550 | "failed to set pll target for %d", freq); | ||
551 | return -EINVAL; | ||
552 | } | ||
553 | } | ||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | static int set_pll_freq(struct gk20a *g, u32 freq, u32 old_freq) | ||
558 | { | ||
559 | struct clk_gk20a *clk = &g->clk; | ||
560 | int err = 0; | ||
561 | |||
562 | gk20a_dbg_fn("curr freq: %dMHz, target freq %dMHz", old_freq, freq); | ||
563 | |||
564 | if ((freq == old_freq) && clk->gpc_pll.enabled) | ||
565 | return 0; | ||
566 | |||
567 | /* change frequency only if power is on */ | ||
568 | if (g->clk.clk_hw_on) { | ||
569 | err = clk_program_gpc_pll(g, clk, 1); | ||
570 | if (err) | ||
571 | err = clk_program_gpc_pll(g, clk, 0); | ||
572 | } | ||
573 | |||
574 | /* Just report error but not restore PLL since dvfs could already change | ||
575 | voltage even when it returns error. */ | ||
576 | if (err) | ||
577 | gk20a_err(dev_from_gk20a(g), | ||
578 | "failed to set pll to %d", freq); | ||
579 | return err; | ||
580 | } | ||
581 | |||
582 | static int gk20a_clk_export_set_rate(void *data, unsigned long *rate) | ||
583 | { | ||
584 | u32 old_freq; | ||
585 | int ret = -ENODATA; | ||
586 | struct gk20a *g = data; | ||
587 | struct clk_gk20a *clk = &g->clk; | ||
588 | |||
589 | if (rate) { | ||
590 | mutex_lock(&clk->clk_mutex); | ||
591 | old_freq = clk->gpc_pll.freq; | ||
592 | ret = set_pll_target(g, rate_gpu_to_gpc2clk(*rate), old_freq); | ||
593 | if (!ret && clk->gpc_pll.enabled) | ||
594 | ret = set_pll_freq(g, clk->gpc_pll.freq, old_freq); | ||
595 | if (!ret) | ||
596 | *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq); | ||
597 | mutex_unlock(&clk->clk_mutex); | ||
598 | } | ||
599 | return ret; | ||
600 | } | ||
601 | |||
602 | static int gk20a_clk_export_enable(void *data) | ||
603 | { | ||
604 | int ret; | ||
605 | struct gk20a *g = data; | ||
606 | struct clk_gk20a *clk = &g->clk; | ||
607 | |||
608 | mutex_lock(&clk->clk_mutex); | ||
609 | ret = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq); | ||
610 | mutex_unlock(&clk->clk_mutex); | ||
611 | return ret; | ||
612 | } | ||
613 | |||
614 | static void gk20a_clk_export_disable(void *data) | ||
615 | { | ||
616 | struct gk20a *g = data; | ||
617 | struct clk_gk20a *clk = &g->clk; | ||
618 | |||
619 | mutex_lock(&clk->clk_mutex); | ||
620 | if (g->clk.clk_hw_on) | ||
621 | clk_disable_gpcpll(g, 1); | ||
622 | mutex_unlock(&clk->clk_mutex); | ||
623 | } | ||
624 | |||
625 | static void gk20a_clk_export_init(void *data, unsigned long *rate, bool *state) | ||
626 | { | ||
627 | struct gk20a *g = data; | ||
628 | struct clk_gk20a *clk = &g->clk; | ||
629 | |||
630 | mutex_lock(&clk->clk_mutex); | ||
631 | if (state) | ||
632 | *state = clk->gpc_pll.enabled; | ||
633 | if (rate) | ||
634 | *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq); | ||
635 | mutex_unlock(&clk->clk_mutex); | ||
636 | } | ||
637 | |||
638 | static struct tegra_clk_export_ops gk20a_clk_export_ops = { | ||
639 | .init = gk20a_clk_export_init, | ||
640 | .enable = gk20a_clk_export_enable, | ||
641 | .disable = gk20a_clk_export_disable, | ||
642 | .set_rate = gk20a_clk_export_set_rate, | ||
643 | }; | ||
644 | |||
645 | static int gk20a_clk_register_export_ops(struct gk20a *g) | ||
646 | { | ||
647 | int ret; | ||
648 | struct clk *c; | ||
649 | |||
650 | if (gk20a_clk_export_ops.data) | ||
651 | return 0; | ||
652 | |||
653 | gk20a_clk_export_ops.data = (void *)g; | ||
654 | c = g->clk.tegra_clk; | ||
655 | if (!c || !clk_get_parent(c)) | ||
656 | return -ENOSYS; | ||
657 | |||
658 | ret = tegra_clk_register_export_ops(clk_get_parent(c), | ||
659 | &gk20a_clk_export_ops); | ||
660 | |||
661 | return ret; | ||
662 | } | ||
663 | |||
664 | int gk20a_init_clk_support(struct gk20a *g) | ||
665 | { | ||
666 | struct clk_gk20a *clk = &g->clk; | ||
667 | u32 err; | ||
668 | |||
669 | gk20a_dbg_fn(""); | ||
670 | |||
671 | clk->g = g; | ||
672 | |||
673 | err = gk20a_init_clk_reset_enable_hw(g); | ||
674 | if (err) | ||
675 | return err; | ||
676 | |||
677 | err = gk20a_init_clk_setup_sw(g); | ||
678 | if (err) | ||
679 | return err; | ||
680 | |||
681 | mutex_lock(&clk->clk_mutex); | ||
682 | clk->clk_hw_on = true; | ||
683 | |||
684 | err = gk20a_init_clk_setup_hw(g); | ||
685 | mutex_unlock(&clk->clk_mutex); | ||
686 | if (err) | ||
687 | return err; | ||
688 | |||
689 | err = gk20a_clk_register_export_ops(g); | ||
690 | if (err) | ||
691 | return err; | ||
692 | |||
693 | /* FIXME: this effectively prevents host level clock gating */ | ||
694 | err = clk_enable(g->clk.tegra_clk); | ||
695 | if (err) | ||
696 | return err; | ||
697 | |||
698 | /* The prev call may not enable PLL if gbus is unbalanced - force it */ | ||
699 | mutex_lock(&clk->clk_mutex); | ||
700 | err = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq); | ||
701 | mutex_unlock(&clk->clk_mutex); | ||
702 | if (err) | ||
703 | return err; | ||
704 | |||
705 | return err; | ||
706 | } | ||
707 | |||
708 | unsigned long gk20a_clk_get_rate(struct gk20a *g) | ||
709 | { | ||
710 | struct clk_gk20a *clk = &g->clk; | ||
711 | return rate_gpc2clk_to_gpu(clk->gpc_pll.freq); | ||
712 | } | ||
713 | |||
714 | long gk20a_clk_round_rate(struct gk20a *g, unsigned long rate) | ||
715 | { | ||
716 | /* make sure the clock is available */ | ||
717 | if (!gk20a_clk_get(g)) | ||
718 | return rate; | ||
719 | |||
720 | return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate); | ||
721 | } | ||
722 | |||
723 | int gk20a_clk_set_rate(struct gk20a *g, unsigned long rate) | ||
724 | { | ||
725 | return clk_set_rate(g->clk.tegra_clk, rate); | ||
726 | } | ||
727 | |||
728 | int gk20a_suspend_clk_support(struct gk20a *g) | ||
729 | { | ||
730 | int ret; | ||
731 | |||
732 | clk_disable(g->clk.tegra_clk); | ||
733 | |||
734 | /* The prev call may not disable PLL if gbus is unbalanced - force it */ | ||
735 | mutex_lock(&g->clk.clk_mutex); | ||
736 | ret = clk_disable_gpcpll(g, 1); | ||
737 | g->clk.clk_hw_on = false; | ||
738 | mutex_unlock(&g->clk.clk_mutex); | ||
739 | return ret; | ||
740 | } | ||
741 | |||
742 | #ifdef CONFIG_DEBUG_FS | ||
743 | |||
744 | static int rate_get(void *data, u64 *val) | ||
745 | { | ||
746 | struct gk20a *g = (struct gk20a *)data; | ||
747 | *val = (u64)gk20a_clk_get_rate(g); | ||
748 | return 0; | ||
749 | } | ||
750 | static int rate_set(void *data, u64 val) | ||
751 | { | ||
752 | struct gk20a *g = (struct gk20a *)data; | ||
753 | return gk20a_clk_set_rate(g, (u32)val); | ||
754 | } | ||
755 | DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n"); | ||
756 | |||
757 | static int pll_reg_show(struct seq_file *s, void *data) | ||
758 | { | ||
759 | struct gk20a *g = s->private; | ||
760 | u32 reg, m, n, pl, f; | ||
761 | |||
762 | mutex_lock(&g->clk.clk_mutex); | ||
763 | if (!g->clk.clk_hw_on) { | ||
764 | seq_printf(s, "gk20a powered down - no access to registers\n"); | ||
765 | mutex_unlock(&g->clk.clk_mutex); | ||
766 | return 0; | ||
767 | } | ||
768 | |||
769 | reg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
770 | seq_printf(s, "cfg = 0x%x : %s : %s\n", reg, | ||
771 | trim_sys_gpcpll_cfg_enable_v(reg) ? "enabled" : "disabled", | ||
772 | trim_sys_gpcpll_cfg_pll_lock_v(reg) ? "locked" : "unlocked"); | ||
773 | |||
774 | reg = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
775 | m = trim_sys_gpcpll_coeff_mdiv_v(reg); | ||
776 | n = trim_sys_gpcpll_coeff_ndiv_v(reg); | ||
777 | pl = trim_sys_gpcpll_coeff_pldiv_v(reg); | ||
778 | f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div[pl]); | ||
779 | seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl); | ||
780 | seq_printf(s, " : pll_f(gpu_f) = %u(%u) MHz\n", f, f/2); | ||
781 | mutex_unlock(&g->clk.clk_mutex); | ||
782 | return 0; | ||
783 | } | ||
784 | |||
785 | static int pll_reg_open(struct inode *inode, struct file *file) | ||
786 | { | ||
787 | return single_open(file, pll_reg_show, inode->i_private); | ||
788 | } | ||
789 | |||
790 | static const struct file_operations pll_reg_fops = { | ||
791 | .open = pll_reg_open, | ||
792 | .read = seq_read, | ||
793 | .llseek = seq_lseek, | ||
794 | .release = single_release, | ||
795 | }; | ||
796 | |||
797 | static int monitor_get(void *data, u64 *val) | ||
798 | { | ||
799 | struct gk20a *g = (struct gk20a *)data; | ||
800 | struct clk_gk20a *clk = &g->clk; | ||
801 | int err; | ||
802 | |||
803 | u32 ncycle = 100; /* count GPCCLK for ncycle of clkin */ | ||
804 | u32 clkin = clk->gpc_pll.clk_in; | ||
805 | u32 count1, count2; | ||
806 | |||
807 | err = gk20a_busy(g->dev); | ||
808 | if (err) | ||
809 | return err; | ||
810 | |||
811 | gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0), | ||
812 | trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f()); | ||
813 | gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0), | ||
814 | trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f() | | ||
815 | trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f() | | ||
816 | trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(ncycle)); | ||
817 | /* start */ | ||
818 | |||
819 | /* It should take about 8us to finish 100 cycle of 12MHz. | ||
820 | But longer than 100us delay is required here. */ | ||
821 | gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0)); | ||
822 | udelay(2000); | ||
823 | |||
824 | count1 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0)); | ||
825 | udelay(100); | ||
826 | count2 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0)); | ||
827 | *val = (u64)(trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(count2) * clkin / ncycle); | ||
828 | gk20a_idle(g->dev); | ||
829 | |||
830 | if (count1 != count2) | ||
831 | return -EBUSY; | ||
832 | return 0; | ||
833 | } | ||
834 | DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n"); | ||
835 | |||
836 | int clk_gk20a_debugfs_init(struct platform_device *dev) | ||
837 | { | ||
838 | struct dentry *d; | ||
839 | struct gk20a_platform *platform = platform_get_drvdata(dev); | ||
840 | struct gk20a *g = get_gk20a(dev); | ||
841 | |||
842 | d = debugfs_create_file( | ||
843 | "rate", S_IRUGO|S_IWUSR, platform->debugfs, g, &rate_fops); | ||
844 | if (!d) | ||
845 | goto err_out; | ||
846 | |||
847 | d = debugfs_create_file( | ||
848 | "pll_reg", S_IRUGO, platform->debugfs, g, &pll_reg_fops); | ||
849 | if (!d) | ||
850 | goto err_out; | ||
851 | |||
852 | d = debugfs_create_file( | ||
853 | "monitor", S_IRUGO, platform->debugfs, g, &monitor_fops); | ||
854 | if (!d) | ||
855 | goto err_out; | ||
856 | |||
857 | return 0; | ||
858 | |||
859 | err_out: | ||
860 | pr_err("%s: Failed to make debugfs node\n", __func__); | ||
861 | debugfs_remove_recursive(platform->debugfs); | ||
862 | return -ENOMEM; | ||
863 | } | ||
864 | |||
865 | #endif /* CONFIG_DEBUG_FS */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/clk_gk20a.h b/drivers/gpu/nvgpu/gk20a/clk_gk20a.h new file mode 100644 index 00000000..d2665259 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/clk_gk20a.h | |||
@@ -0,0 +1,94 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/clk_gk20a.h | ||
3 | * | ||
4 | * GK20A Graphics | ||
5 | * | ||
6 | * Copyright (c) 2011 - 2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | #ifndef _NVHOST_CLK_GK20A_H_ | ||
22 | #define _NVHOST_CLK_GK20A_H_ | ||
23 | |||
24 | #include <linux/mutex.h> | ||
25 | |||
26 | #define GPUFREQ_TABLE_END ~(u32)1 | ||
27 | enum { | ||
28 | /* only one PLL for gk20a */ | ||
29 | GK20A_GPC_PLL = 0, | ||
30 | }; | ||
31 | |||
32 | struct pll { | ||
33 | u32 id; | ||
34 | u32 clk_in; /* MHz */ | ||
35 | u32 M; | ||
36 | u32 N; | ||
37 | u32 PL; | ||
38 | u32 freq; /* MHz */ | ||
39 | bool enabled; | ||
40 | }; | ||
41 | |||
42 | struct pll_parms { | ||
43 | u32 min_freq, max_freq; /* MHz */ | ||
44 | u32 min_vco, max_vco; /* MHz */ | ||
45 | u32 min_u, max_u; /* MHz */ | ||
46 | u32 min_M, max_M; | ||
47 | u32 min_N, max_N; | ||
48 | u32 min_PL, max_PL; | ||
49 | }; | ||
50 | |||
51 | struct clk_gk20a { | ||
52 | struct gk20a *g; | ||
53 | struct clk *tegra_clk; | ||
54 | struct pll gpc_pll; | ||
55 | u32 pll_delay; /* default PLL settle time */ | ||
56 | struct mutex clk_mutex; | ||
57 | bool sw_ready; | ||
58 | bool clk_hw_on; | ||
59 | }; | ||
60 | |||
61 | struct gpufreq_table_data { | ||
62 | unsigned int index; | ||
63 | unsigned int frequency; /* MHz */ | ||
64 | }; | ||
65 | |||
66 | struct gpufreq_table_data *tegra_gpufreq_table_get(void); | ||
67 | |||
68 | unsigned int tegra_gpufreq_table_size_get(void); | ||
69 | |||
70 | int gk20a_init_clk_support(struct gk20a *g); | ||
71 | |||
72 | unsigned long gk20a_clk_get_rate(struct gk20a *g); | ||
73 | int gk20a_clk_set_rate(struct gk20a *g, unsigned long rate); | ||
74 | int gk20a_suspend_clk_support(struct gk20a *g); | ||
75 | struct clk *gk20a_clk_get(struct gk20a *g); | ||
76 | long gk20a_clk_round_rate(struct gk20a *g, unsigned long rate); | ||
77 | |||
78 | extern struct pll_parms gpc_pll_params; | ||
79 | |||
80 | #define KHZ 1000 | ||
81 | #define MHZ 1000000 | ||
82 | |||
83 | static inline unsigned long rate_gpc2clk_to_gpu(unsigned long rate) | ||
84 | { | ||
85 | /* convert the MHz gpc2clk frequency to Hz gpcpll frequency */ | ||
86 | return (rate * MHZ) / 2; | ||
87 | } | ||
88 | static inline unsigned long rate_gpu_to_gpc2clk(unsigned long rate) | ||
89 | { | ||
90 | /* convert the Hz gpcpll frequency to MHz gpc2clk frequency */ | ||
91 | return (rate * 2) / MHZ; | ||
92 | } | ||
93 | |||
94 | #endif /* _NVHOST_CLK_GK20A_H_ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c new file mode 100644 index 00000000..9128959f --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -0,0 +1,240 @@ | |||
1 | /* | ||
2 | * GK20A Ctrl | ||
3 | * | ||
4 | * Copyright (c) 2011-2014, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/highmem.h> | ||
20 | #include <linux/cdev.h> | ||
21 | #include <linux/nvhost_gpu_ioctl.h> | ||
22 | |||
23 | #include "gk20a.h" | ||
24 | |||
25 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) | ||
26 | { | ||
27 | int err; | ||
28 | struct gk20a *g; | ||
29 | |||
30 | gk20a_dbg_fn(""); | ||
31 | |||
32 | g = container_of(inode->i_cdev, | ||
33 | struct gk20a, ctrl.cdev); | ||
34 | |||
35 | filp->private_data = g->dev; | ||
36 | |||
37 | err = gk20a_get_client(g); | ||
38 | if (err) { | ||
39 | gk20a_dbg_fn("fail to get channel!"); | ||
40 | return err; | ||
41 | } | ||
42 | |||
43 | return 0; | ||
44 | } | ||
45 | |||
46 | int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp) | ||
47 | { | ||
48 | struct platform_device *dev = filp->private_data; | ||
49 | |||
50 | gk20a_dbg_fn(""); | ||
51 | |||
52 | gk20a_put_client(get_gk20a(dev)); | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | static long | ||
57 | gk20a_ctrl_ioctl_gpu_characteristics( | ||
58 | struct gk20a *g, | ||
59 | struct nvhost_gpu_get_characteristics *request) | ||
60 | { | ||
61 | struct nvhost_gpu_characteristics *pgpu = &g->gpu_characteristics; | ||
62 | long err = 0; | ||
63 | |||
64 | if (request->gpu_characteristics_buf_size > 0) { | ||
65 | size_t write_size = sizeof(*pgpu); | ||
66 | |||
67 | if (write_size > request->gpu_characteristics_buf_size) | ||
68 | write_size = request->gpu_characteristics_buf_size; | ||
69 | |||
70 | err = copy_to_user((void __user *)(uintptr_t) | ||
71 | request->gpu_characteristics_buf_addr, | ||
72 | pgpu, write_size); | ||
73 | } | ||
74 | |||
75 | if (err == 0) | ||
76 | request->gpu_characteristics_buf_size = sizeof(*pgpu); | ||
77 | |||
78 | return err; | ||
79 | } | ||
80 | |||
81 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
82 | { | ||
83 | struct platform_device *dev = filp->private_data; | ||
84 | struct gk20a *g = get_gk20a(dev); | ||
85 | struct nvhost_gpu_zcull_get_ctx_size_args *get_ctx_size_args; | ||
86 | struct nvhost_gpu_zcull_get_info_args *get_info_args; | ||
87 | struct nvhost_gpu_zbc_set_table_args *set_table_args; | ||
88 | struct nvhost_gpu_zbc_query_table_args *query_table_args; | ||
89 | u8 buf[NVHOST_GPU_IOCTL_MAX_ARG_SIZE]; | ||
90 | struct gr_zcull_info *zcull_info; | ||
91 | struct zbc_entry *zbc_val; | ||
92 | struct zbc_query_params *zbc_tbl; | ||
93 | int i, err = 0; | ||
94 | |||
95 | gk20a_dbg_fn(""); | ||
96 | |||
97 | if ((_IOC_TYPE(cmd) != NVHOST_GPU_IOCTL_MAGIC) || | ||
98 | (_IOC_NR(cmd) == 0) || | ||
99 | (_IOC_NR(cmd) > NVHOST_GPU_IOCTL_LAST)) | ||
100 | return -EFAULT; | ||
101 | |||
102 | BUG_ON(_IOC_SIZE(cmd) > NVHOST_GPU_IOCTL_MAX_ARG_SIZE); | ||
103 | |||
104 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
105 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
106 | return -EFAULT; | ||
107 | } | ||
108 | |||
109 | if (!g->gr.sw_ready) { | ||
110 | err = gk20a_busy(g->dev); | ||
111 | if (err) | ||
112 | return err; | ||
113 | |||
114 | gk20a_idle(g->dev); | ||
115 | } | ||
116 | |||
117 | switch (cmd) { | ||
118 | case NVHOST_GPU_IOCTL_ZCULL_GET_CTX_SIZE: | ||
119 | get_ctx_size_args = (struct nvhost_gpu_zcull_get_ctx_size_args *)buf; | ||
120 | |||
121 | get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr); | ||
122 | |||
123 | break; | ||
124 | case NVHOST_GPU_IOCTL_ZCULL_GET_INFO: | ||
125 | get_info_args = (struct nvhost_gpu_zcull_get_info_args *)buf; | ||
126 | |||
127 | memset(get_info_args, 0, sizeof(struct nvhost_gpu_zcull_get_info_args)); | ||
128 | |||
129 | zcull_info = kzalloc(sizeof(struct gr_zcull_info), GFP_KERNEL); | ||
130 | if (zcull_info == NULL) | ||
131 | return -ENOMEM; | ||
132 | |||
133 | err = gr_gk20a_get_zcull_info(g, &g->gr, zcull_info); | ||
134 | if (err) { | ||
135 | kfree(zcull_info); | ||
136 | break; | ||
137 | } | ||
138 | |||
139 | get_info_args->width_align_pixels = zcull_info->width_align_pixels; | ||
140 | get_info_args->height_align_pixels = zcull_info->height_align_pixels; | ||
141 | get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots; | ||
142 | get_info_args->aliquot_total = zcull_info->aliquot_total; | ||
143 | get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier; | ||
144 | get_info_args->region_header_size = zcull_info->region_header_size; | ||
145 | get_info_args->subregion_header_size = zcull_info->subregion_header_size; | ||
146 | get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels; | ||
147 | get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels; | ||
148 | get_info_args->subregion_count = zcull_info->subregion_count; | ||
149 | |||
150 | kfree(zcull_info); | ||
151 | break; | ||
152 | case NVHOST_GPU_IOCTL_ZBC_SET_TABLE: | ||
153 | set_table_args = (struct nvhost_gpu_zbc_set_table_args *)buf; | ||
154 | |||
155 | zbc_val = kzalloc(sizeof(struct zbc_entry), GFP_KERNEL); | ||
156 | if (zbc_val == NULL) | ||
157 | return -ENOMEM; | ||
158 | |||
159 | zbc_val->format = set_table_args->format; | ||
160 | zbc_val->type = set_table_args->type; | ||
161 | |||
162 | switch (zbc_val->type) { | ||
163 | case GK20A_ZBC_TYPE_COLOR: | ||
164 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | ||
165 | zbc_val->color_ds[i] = set_table_args->color_ds[i]; | ||
166 | zbc_val->color_l2[i] = set_table_args->color_l2[i]; | ||
167 | } | ||
168 | break; | ||
169 | case GK20A_ZBC_TYPE_DEPTH: | ||
170 | zbc_val->depth = set_table_args->depth; | ||
171 | break; | ||
172 | default: | ||
173 | err = -EINVAL; | ||
174 | } | ||
175 | |||
176 | if (!err) { | ||
177 | gk20a_busy(dev); | ||
178 | err = gk20a_gr_zbc_set_table(g, &g->gr, zbc_val); | ||
179 | gk20a_idle(dev); | ||
180 | } | ||
181 | |||
182 | if (zbc_val) | ||
183 | kfree(zbc_val); | ||
184 | break; | ||
185 | case NVHOST_GPU_IOCTL_ZBC_QUERY_TABLE: | ||
186 | query_table_args = (struct nvhost_gpu_zbc_query_table_args *)buf; | ||
187 | |||
188 | zbc_tbl = kzalloc(sizeof(struct zbc_query_params), GFP_KERNEL); | ||
189 | if (zbc_tbl == NULL) | ||
190 | return -ENOMEM; | ||
191 | |||
192 | zbc_tbl->type = query_table_args->type; | ||
193 | zbc_tbl->index_size = query_table_args->index_size; | ||
194 | |||
195 | err = gr_gk20a_query_zbc(g, &g->gr, zbc_tbl); | ||
196 | |||
197 | if (!err) { | ||
198 | switch (zbc_tbl->type) { | ||
199 | case GK20A_ZBC_TYPE_COLOR: | ||
200 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | ||
201 | query_table_args->color_ds[i] = zbc_tbl->color_ds[i]; | ||
202 | query_table_args->color_l2[i] = zbc_tbl->color_l2[i]; | ||
203 | } | ||
204 | break; | ||
205 | case GK20A_ZBC_TYPE_DEPTH: | ||
206 | query_table_args->depth = zbc_tbl->depth; | ||
207 | break; | ||
208 | case GK20A_ZBC_TYPE_INVALID: | ||
209 | query_table_args->index_size = zbc_tbl->index_size; | ||
210 | break; | ||
211 | default: | ||
212 | err = -EINVAL; | ||
213 | } | ||
214 | if (!err) { | ||
215 | query_table_args->format = zbc_tbl->format; | ||
216 | query_table_args->ref_cnt = zbc_tbl->ref_cnt; | ||
217 | } | ||
218 | } | ||
219 | |||
220 | if (zbc_tbl) | ||
221 | kfree(zbc_tbl); | ||
222 | break; | ||
223 | |||
224 | case NVHOST_GPU_IOCTL_GET_CHARACTERISTICS: | ||
225 | err = gk20a_ctrl_ioctl_gpu_characteristics( | ||
226 | g, (struct nvhost_gpu_get_characteristics *)buf); | ||
227 | break; | ||
228 | |||
229 | default: | ||
230 | gk20a_err(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); | ||
231 | err = -ENOTTY; | ||
232 | break; | ||
233 | } | ||
234 | |||
235 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
236 | err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); | ||
237 | |||
238 | return err; | ||
239 | } | ||
240 | |||
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h new file mode 100644 index 00000000..ac9c253e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h | |||
@@ -0,0 +1,28 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/gk20a_ctrl.h | ||
3 | * | ||
4 | * GK20A Ctrl | ||
5 | * | ||
6 | * Copyright (c) 2011-2012, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | #ifndef _NVHOST_GK20A_CTRL_H_ | ||
22 | #define _NVHOST_GK20A_CTRL_H_ | ||
23 | |||
24 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp); | ||
25 | int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp); | ||
26 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); | ||
27 | |||
28 | #endif /* _NVHOST_GK20A_CTRL_H_ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c new file mode 100644 index 00000000..da7d733e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -0,0 +1,699 @@ | |||
1 | /* | ||
2 | * Tegra GK20A GPU Debugger/Profiler Driver | ||
3 | * | ||
4 | * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/fs.h> | ||
20 | #include <linux/file.h> | ||
21 | #include <linux/cdev.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/nvhost.h> | ||
24 | #include <linux/nvhost_dbg_gpu_ioctl.h> | ||
25 | |||
26 | #include "gk20a.h" | ||
27 | #include "gr_gk20a.h" | ||
28 | #include "dbg_gpu_gk20a.h" | ||
29 | #include "regops_gk20a.h" | ||
30 | #include "hw_therm_gk20a.h" | ||
31 | |||
32 | struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = { | ||
33 | .exec_reg_ops = exec_regops_gk20a, | ||
34 | }; | ||
35 | |||
36 | /* silly allocator - just increment session id */ | ||
37 | static atomic_t session_id = ATOMIC_INIT(0); | ||
38 | static int generate_session_id(void) | ||
39 | { | ||
40 | return atomic_add_return(1, &session_id); | ||
41 | } | ||
42 | |||
43 | static int alloc_session(struct dbg_session_gk20a **_dbg_s) | ||
44 | { | ||
45 | struct dbg_session_gk20a *dbg_s; | ||
46 | *_dbg_s = NULL; | ||
47 | |||
48 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
49 | |||
50 | dbg_s = kzalloc(sizeof(*dbg_s), GFP_KERNEL); | ||
51 | if (!dbg_s) | ||
52 | return -ENOMEM; | ||
53 | |||
54 | dbg_s->id = generate_session_id(); | ||
55 | dbg_s->ops = &dbg_gpu_session_ops_gk20a; | ||
56 | *_dbg_s = dbg_s; | ||
57 | return 0; | ||
58 | } | ||
59 | |||
60 | int gk20a_dbg_gpu_do_dev_open(struct inode *inode, struct file *filp, bool is_profiler) | ||
61 | { | ||
62 | struct dbg_session_gk20a *dbg_session; | ||
63 | struct gk20a *g; | ||
64 | |||
65 | struct platform_device *pdev; | ||
66 | struct device *dev; | ||
67 | |||
68 | int err; | ||
69 | |||
70 | if (!is_profiler) | ||
71 | g = container_of(inode->i_cdev, | ||
72 | struct gk20a, dbg.cdev); | ||
73 | else | ||
74 | g = container_of(inode->i_cdev, | ||
75 | struct gk20a, prof.cdev); | ||
76 | pdev = g->dev; | ||
77 | dev = &pdev->dev; | ||
78 | |||
79 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", dev_name(dev)); | ||
80 | |||
81 | err = alloc_session(&dbg_session); | ||
82 | if (err) | ||
83 | return err; | ||
84 | |||
85 | filp->private_data = dbg_session; | ||
86 | dbg_session->pdev = pdev; | ||
87 | dbg_session->dev = dev; | ||
88 | dbg_session->g = g; | ||
89 | dbg_session->is_profiler = is_profiler; | ||
90 | dbg_session->is_pg_disabled = false; | ||
91 | |||
92 | INIT_LIST_HEAD(&dbg_session->dbg_s_list_node); | ||
93 | init_waitqueue_head(&dbg_session->dbg_events.wait_queue); | ||
94 | dbg_session->dbg_events.events_enabled = false; | ||
95 | dbg_session->dbg_events.num_pending_events = 0; | ||
96 | |||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | /* used in scenarios where the debugger session can take just the inter-session | ||
101 | * lock for performance, but the profiler session must take the per-gpu lock | ||
102 | * since it might not have an associated channel. */ | ||
103 | static void gk20a_dbg_session_mutex_lock(struct dbg_session_gk20a *dbg_s) | ||
104 | { | ||
105 | if (dbg_s->is_profiler) | ||
106 | mutex_lock(&dbg_s->g->dbg_sessions_lock); | ||
107 | else | ||
108 | mutex_lock(&dbg_s->ch->dbg_s_lock); | ||
109 | } | ||
110 | |||
111 | static void gk20a_dbg_session_mutex_unlock(struct dbg_session_gk20a *dbg_s) | ||
112 | { | ||
113 | if (dbg_s->is_profiler) | ||
114 | mutex_unlock(&dbg_s->g->dbg_sessions_lock); | ||
115 | else | ||
116 | mutex_unlock(&dbg_s->ch->dbg_s_lock); | ||
117 | } | ||
118 | |||
119 | static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s) | ||
120 | { | ||
121 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
122 | |||
123 | gk20a_dbg_session_mutex_lock(dbg_s); | ||
124 | |||
125 | dbg_s->dbg_events.events_enabled = true; | ||
126 | dbg_s->dbg_events.num_pending_events = 0; | ||
127 | |||
128 | gk20a_dbg_session_mutex_unlock(dbg_s); | ||
129 | } | ||
130 | |||
131 | static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s) | ||
132 | { | ||
133 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
134 | |||
135 | gk20a_dbg_session_mutex_lock(dbg_s); | ||
136 | |||
137 | dbg_s->dbg_events.events_enabled = false; | ||
138 | dbg_s->dbg_events.num_pending_events = 0; | ||
139 | |||
140 | gk20a_dbg_session_mutex_unlock(dbg_s); | ||
141 | } | ||
142 | |||
143 | static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s) | ||
144 | { | ||
145 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
146 | |||
147 | gk20a_dbg_session_mutex_lock(dbg_s); | ||
148 | |||
149 | if (dbg_s->dbg_events.events_enabled && | ||
150 | dbg_s->dbg_events.num_pending_events > 0) | ||
151 | dbg_s->dbg_events.num_pending_events--; | ||
152 | |||
153 | gk20a_dbg_session_mutex_unlock(dbg_s); | ||
154 | } | ||
155 | |||
156 | static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s, | ||
157 | struct nvhost_dbg_gpu_events_ctrl_args *args) | ||
158 | { | ||
159 | int ret = 0; | ||
160 | |||
161 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd); | ||
162 | |||
163 | if (!dbg_s->ch) { | ||
164 | gk20a_err(dev_from_gk20a(dbg_s->g), | ||
165 | "no channel bound to dbg session\n"); | ||
166 | return -EINVAL; | ||
167 | } | ||
168 | |||
169 | switch (args->cmd) { | ||
170 | case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_ENABLE: | ||
171 | gk20a_dbg_gpu_events_enable(dbg_s); | ||
172 | break; | ||
173 | |||
174 | case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_DISABLE: | ||
175 | gk20a_dbg_gpu_events_disable(dbg_s); | ||
176 | break; | ||
177 | |||
178 | case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_CLEAR: | ||
179 | gk20a_dbg_gpu_events_clear(dbg_s); | ||
180 | break; | ||
181 | |||
182 | default: | ||
183 | gk20a_err(dev_from_gk20a(dbg_s->g), | ||
184 | "unrecognized dbg gpu events ctrl cmd: 0x%x", | ||
185 | args->cmd); | ||
186 | ret = -EINVAL; | ||
187 | break; | ||
188 | } | ||
189 | |||
190 | return ret; | ||
191 | } | ||
192 | |||
193 | unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait) | ||
194 | { | ||
195 | unsigned int mask = 0; | ||
196 | struct dbg_session_gk20a *dbg_s = filep->private_data; | ||
197 | |||
198 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
199 | |||
200 | poll_wait(filep, &dbg_s->dbg_events.wait_queue, wait); | ||
201 | |||
202 | gk20a_dbg_session_mutex_lock(dbg_s); | ||
203 | |||
204 | if (dbg_s->dbg_events.events_enabled && | ||
205 | dbg_s->dbg_events.num_pending_events > 0) { | ||
206 | gk20a_dbg(gpu_dbg_gpu_dbg, "found pending event on session id %d", | ||
207 | dbg_s->id); | ||
208 | gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending", | ||
209 | dbg_s->dbg_events.num_pending_events); | ||
210 | mask = (POLLPRI | POLLIN); | ||
211 | } | ||
212 | |||
213 | gk20a_dbg_session_mutex_unlock(dbg_s); | ||
214 | |||
215 | return mask; | ||
216 | } | ||
217 | |||
218 | int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp) | ||
219 | { | ||
220 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
221 | return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */); | ||
222 | } | ||
223 | |||
224 | int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp) | ||
225 | { | ||
226 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
227 | return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */); | ||
228 | } | ||
229 | |||
230 | void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch) | ||
231 | { | ||
232 | struct dbg_session_gk20a *dbg_s; | ||
233 | |||
234 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
235 | |||
236 | /* guard against the session list being modified */ | ||
237 | mutex_lock(&ch->dbg_s_lock); | ||
238 | |||
239 | list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) { | ||
240 | if (dbg_s->dbg_events.events_enabled) { | ||
241 | gk20a_dbg(gpu_dbg_gpu_dbg, "posting event on session id %d", | ||
242 | dbg_s->id); | ||
243 | gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending", | ||
244 | dbg_s->dbg_events.num_pending_events); | ||
245 | |||
246 | dbg_s->dbg_events.num_pending_events++; | ||
247 | |||
248 | wake_up_interruptible_all(&dbg_s->dbg_events.wait_queue); | ||
249 | } | ||
250 | } | ||
251 | |||
252 | mutex_unlock(&ch->dbg_s_lock); | ||
253 | } | ||
254 | |||
255 | |||
256 | static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, | ||
257 | __u32 powermode); | ||
258 | |||
259 | static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s) | ||
260 | { | ||
261 | struct channel_gk20a *ch_gk20a = dbg_s->ch; | ||
262 | struct gk20a *g = dbg_s->g; | ||
263 | |||
264 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
265 | |||
266 | /* wasn't bound to start with ? */ | ||
267 | if (!ch_gk20a) { | ||
268 | gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "not bound already?"); | ||
269 | return -ENODEV; | ||
270 | } | ||
271 | |||
272 | mutex_lock(&g->dbg_sessions_lock); | ||
273 | mutex_lock(&ch_gk20a->dbg_s_lock); | ||
274 | |||
275 | --g->dbg_sessions; | ||
276 | |||
277 | /* Powergate enable is called here as possibility of dbg_session | ||
278 | * which called powergate disable ioctl, to be killed without calling | ||
279 | * powergate enable ioctl | ||
280 | */ | ||
281 | dbg_set_powergate(dbg_s, NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE); | ||
282 | |||
283 | dbg_s->ch = NULL; | ||
284 | fput(dbg_s->ch_f); | ||
285 | dbg_s->ch_f = NULL; | ||
286 | |||
287 | list_del_init(&dbg_s->dbg_s_list_node); | ||
288 | |||
289 | mutex_unlock(&ch_gk20a->dbg_s_lock); | ||
290 | mutex_unlock(&g->dbg_sessions_lock); | ||
291 | |||
292 | return 0; | ||
293 | } | ||
294 | |||
295 | int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) | ||
296 | { | ||
297 | struct dbg_session_gk20a *dbg_s = filp->private_data; | ||
298 | |||
299 | gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", dev_name(dbg_s->dev)); | ||
300 | |||
301 | /* unbind if it was bound */ | ||
302 | if (!dbg_s->ch) | ||
303 | return 0; | ||
304 | dbg_unbind_channel_gk20a(dbg_s); | ||
305 | |||
306 | kfree(dbg_s); | ||
307 | return 0; | ||
308 | } | ||
309 | |||
310 | static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s, | ||
311 | struct nvhost_dbg_gpu_bind_channel_args *args) | ||
312 | { | ||
313 | struct file *f; | ||
314 | struct gk20a *g; | ||
315 | struct channel_gk20a *ch; | ||
316 | |||
317 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d", | ||
318 | dev_name(dbg_s->dev), args->channel_fd); | ||
319 | |||
320 | if (args->channel_fd == ~0) | ||
321 | return dbg_unbind_channel_gk20a(dbg_s); | ||
322 | |||
323 | /* even though get_file_channel is doing this it releases it as well */ | ||
324 | /* by holding it here we'll keep it from disappearing while the | ||
325 | * debugger is in session */ | ||
326 | f = fget(args->channel_fd); | ||
327 | if (!f) | ||
328 | return -ENODEV; | ||
329 | |||
330 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
331 | if (!ch) { | ||
332 | gk20a_dbg_fn("no channel found for fd"); | ||
333 | fput(f); | ||
334 | return -EINVAL; | ||
335 | } | ||
336 | |||
337 | g = dbg_s->g; | ||
338 | gk20a_dbg_fn("%s hwchid=%d", dev_name(dbg_s->dev), ch->hw_chid); | ||
339 | |||
340 | mutex_lock(&g->dbg_sessions_lock); | ||
341 | mutex_lock(&ch->dbg_s_lock); | ||
342 | |||
343 | dbg_s->ch_f = f; | ||
344 | dbg_s->ch = ch; | ||
345 | list_add(&dbg_s->dbg_s_list_node, &dbg_s->ch->dbg_s_list); | ||
346 | |||
347 | g->dbg_sessions++; | ||
348 | |||
349 | mutex_unlock(&ch->dbg_s_lock); | ||
350 | mutex_unlock(&g->dbg_sessions_lock); | ||
351 | return 0; | ||
352 | } | ||
353 | |||
354 | static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | ||
355 | struct nvhost_dbg_gpu_exec_reg_ops_args *args); | ||
356 | |||
357 | static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, | ||
358 | struct nvhost_dbg_gpu_powergate_args *args); | ||
359 | |||
360 | static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
361 | struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args); | ||
362 | |||
363 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | ||
364 | unsigned long arg) | ||
365 | { | ||
366 | struct dbg_session_gk20a *dbg_s = filp->private_data; | ||
367 | struct gk20a *g = get_gk20a(dbg_s->pdev); | ||
368 | u8 buf[NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE]; | ||
369 | int err = 0; | ||
370 | |||
371 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
372 | |||
373 | if ((_IOC_TYPE(cmd) != NVHOST_DBG_GPU_IOCTL_MAGIC) || | ||
374 | (_IOC_NR(cmd) == 0) || | ||
375 | (_IOC_NR(cmd) > NVHOST_DBG_GPU_IOCTL_LAST)) | ||
376 | return -EFAULT; | ||
377 | |||
378 | BUG_ON(_IOC_SIZE(cmd) > NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE); | ||
379 | |||
380 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
381 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
382 | return -EFAULT; | ||
383 | } | ||
384 | |||
385 | if (!g->gr.sw_ready) { | ||
386 | err = gk20a_busy(g->dev); | ||
387 | if (err) | ||
388 | return err; | ||
389 | |||
390 | gk20a_idle(g->dev); | ||
391 | } | ||
392 | |||
393 | switch (cmd) { | ||
394 | case NVHOST_DBG_GPU_IOCTL_BIND_CHANNEL: | ||
395 | err = dbg_bind_channel_gk20a(dbg_s, | ||
396 | (struct nvhost_dbg_gpu_bind_channel_args *)buf); | ||
397 | gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); | ||
398 | break; | ||
399 | |||
400 | case NVHOST_DBG_GPU_IOCTL_REG_OPS: | ||
401 | err = nvhost_ioctl_channel_reg_ops(dbg_s, | ||
402 | (struct nvhost_dbg_gpu_exec_reg_ops_args *)buf); | ||
403 | gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); | ||
404 | break; | ||
405 | |||
406 | case NVHOST_DBG_GPU_IOCTL_POWERGATE: | ||
407 | err = nvhost_ioctl_powergate_gk20a(dbg_s, | ||
408 | (struct nvhost_dbg_gpu_powergate_args *)buf); | ||
409 | gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); | ||
410 | break; | ||
411 | |||
412 | case NVHOST_DBG_GPU_IOCTL_EVENTS_CTRL: | ||
413 | err = gk20a_dbg_gpu_events_ctrl(dbg_s, | ||
414 | (struct nvhost_dbg_gpu_events_ctrl_args *)buf); | ||
415 | break; | ||
416 | |||
417 | case NVHOST_DBG_GPU_IOCTL_SMPC_CTXSW_MODE: | ||
418 | err = nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s, | ||
419 | (struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *)buf); | ||
420 | break; | ||
421 | |||
422 | default: | ||
423 | gk20a_err(dev_from_gk20a(g), | ||
424 | "unrecognized dbg gpu ioctl cmd: 0x%x", | ||
425 | cmd); | ||
426 | err = -ENOTTY; | ||
427 | break; | ||
428 | } | ||
429 | |||
430 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
431 | err = copy_to_user((void __user *)arg, | ||
432 | buf, _IOC_SIZE(cmd)); | ||
433 | |||
434 | return err; | ||
435 | } | ||
436 | |||
437 | /* In order to perform a context relative op the context has | ||
438 | * to be created already... which would imply that the | ||
439 | * context switch mechanism has already been put in place. | ||
440 | * So by the time we perform such an opertation it should always | ||
441 | * be possible to query for the appropriate context offsets, etc. | ||
442 | * | ||
443 | * But note: while the dbg_gpu bind requires the a channel fd, | ||
444 | * it doesn't require an allocated gr/compute obj at that point... | ||
445 | */ | ||
446 | static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s, | ||
447 | struct gr_gk20a *gr) | ||
448 | { | ||
449 | int err; | ||
450 | |||
451 | mutex_lock(&gr->ctx_mutex); | ||
452 | err = !gr->ctx_vars.golden_image_initialized; | ||
453 | mutex_unlock(&gr->ctx_mutex); | ||
454 | if (err) | ||
455 | return false; | ||
456 | return true; | ||
457 | |||
458 | } | ||
459 | |||
460 | static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | ||
461 | struct nvhost_dbg_gpu_exec_reg_ops_args *args) | ||
462 | { | ||
463 | int err; | ||
464 | struct device *dev = dbg_s->dev; | ||
465 | struct gk20a *g = get_gk20a(dbg_s->pdev); | ||
466 | struct nvhost_dbg_gpu_reg_op *ops; | ||
467 | u64 ops_size = sizeof(ops[0]) * args->num_ops; | ||
468 | |||
469 | gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size); | ||
470 | |||
471 | if (!dbg_s->ops) { | ||
472 | gk20a_err(dev, "can't call reg_ops on an unbound debugger session"); | ||
473 | return -EINVAL; | ||
474 | } | ||
475 | |||
476 | if (!dbg_s->is_profiler && !dbg_s->ch) { | ||
477 | gk20a_err(dev, "bind a channel before regops for a debugging session"); | ||
478 | return -EINVAL; | ||
479 | } | ||
480 | |||
481 | /* be sure that ctx info is in place */ | ||
482 | if (!gr_context_info_available(dbg_s, &g->gr)) { | ||
483 | gk20a_err(dev, "gr context data not available\n"); | ||
484 | return -ENODEV; | ||
485 | } | ||
486 | |||
487 | ops = kzalloc(ops_size, GFP_KERNEL); | ||
488 | if (!ops) { | ||
489 | gk20a_err(dev, "Allocating memory failed!"); | ||
490 | return -ENOMEM; | ||
491 | } | ||
492 | |||
493 | gk20a_dbg_fn("Copying regops from userspace"); | ||
494 | |||
495 | if (copy_from_user(ops, (void *)(uintptr_t)args->ops, ops_size)) { | ||
496 | dev_err(dev, "copy_from_user failed!"); | ||
497 | err = -EFAULT; | ||
498 | goto clean_up; | ||
499 | } | ||
500 | |||
501 | /* since exec_reg_ops sends methods to the ucode, it must take the | ||
502 | * global gpu lock to protect against mixing methods from debug sessions | ||
503 | * on other channels */ | ||
504 | mutex_lock(&g->dbg_sessions_lock); | ||
505 | |||
506 | err = dbg_s->ops->exec_reg_ops(dbg_s, ops, args->num_ops); | ||
507 | |||
508 | mutex_unlock(&g->dbg_sessions_lock); | ||
509 | |||
510 | if (err) { | ||
511 | gk20a_err(dev, "dbg regops failed"); | ||
512 | goto clean_up; | ||
513 | } | ||
514 | |||
515 | gk20a_dbg_fn("Copying result to userspace"); | ||
516 | |||
517 | if (copy_to_user((void *)(uintptr_t)args->ops, ops, ops_size)) { | ||
518 | dev_err(dev, "copy_to_user failed!"); | ||
519 | err = -EFAULT; | ||
520 | goto clean_up; | ||
521 | } | ||
522 | return 0; | ||
523 | clean_up: | ||
524 | kfree(ops); | ||
525 | return err; | ||
526 | } | ||
527 | |||
528 | static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, | ||
529 | __u32 powermode) | ||
530 | { | ||
531 | int err = 0; | ||
532 | struct gk20a *g = get_gk20a(dbg_s->pdev); | ||
533 | |||
534 | /* This function must be called with g->dbg_sessions_lock held */ | ||
535 | |||
536 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %d", | ||
537 | dev_name(dbg_s->dev), powermode); | ||
538 | |||
539 | switch (powermode) { | ||
540 | case NVHOST_DBG_GPU_POWERGATE_MODE_DISABLE: | ||
541 | /* save off current powergate, clk state. | ||
542 | * set gpu module's can_powergate = 0. | ||
543 | * set gpu module's clk to max. | ||
544 | * while *a* debug session is active there will be no power or | ||
545 | * clocking state changes allowed from mainline code (but they | ||
546 | * should be saved). | ||
547 | */ | ||
548 | /* Allow powergate disable if the current dbg_session doesn't | ||
549 | * call a powergate disable ioctl and the global | ||
550 | * powergating_disabled_refcount is zero | ||
551 | */ | ||
552 | |||
553 | if ((dbg_s->is_pg_disabled == false) && | ||
554 | (g->dbg_powergating_disabled_refcount++ == 0)) { | ||
555 | |||
556 | gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module busy"); | ||
557 | gk20a_busy(g->dev); | ||
558 | gk20a_channel_busy(dbg_s->pdev); | ||
559 | |||
560 | g->ops.clock_gating.slcg_gr_load_gating_prod(g, | ||
561 | false); | ||
562 | g->ops.clock_gating.slcg_perf_load_gating_prod(g, | ||
563 | false); | ||
564 | gr_gk20a_init_blcg_mode(g, BLCG_RUN, ENGINE_GR_GK20A); | ||
565 | |||
566 | g->elcg_enabled = false; | ||
567 | gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A); | ||
568 | gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A); | ||
569 | |||
570 | gk20a_pmu_disable_elpg(g); | ||
571 | } | ||
572 | |||
573 | dbg_s->is_pg_disabled = true; | ||
574 | break; | ||
575 | |||
576 | case NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE: | ||
577 | /* restore (can) powergate, clk state */ | ||
578 | /* release pending exceptions to fault/be handled as usual */ | ||
579 | /*TBD: ordering of these? */ | ||
580 | |||
581 | /* Re-enabling powergate as no other sessions want | ||
582 | * powergate disabled and the current dbg-sessions had | ||
583 | * requested the powergate disable through ioctl | ||
584 | */ | ||
585 | if (dbg_s->is_pg_disabled && | ||
586 | --g->dbg_powergating_disabled_refcount == 0) { | ||
587 | |||
588 | g->elcg_enabled = true; | ||
589 | gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A); | ||
590 | gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A); | ||
591 | gr_gk20a_init_blcg_mode(g, BLCG_AUTO, ENGINE_GR_GK20A); | ||
592 | |||
593 | g->ops.clock_gating.slcg_gr_load_gating_prod(g, | ||
594 | g->slcg_enabled); | ||
595 | g->ops.clock_gating.slcg_perf_load_gating_prod(g, | ||
596 | g->slcg_enabled); | ||
597 | |||
598 | gk20a_pmu_enable_elpg(g); | ||
599 | |||
600 | gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle"); | ||
601 | gk20a_channel_idle(dbg_s->pdev); | ||
602 | gk20a_idle(g->dev); | ||
603 | } | ||
604 | |||
605 | dbg_s->is_pg_disabled = false; | ||
606 | break; | ||
607 | |||
608 | default: | ||
609 | gk20a_err(dev_from_gk20a(g), | ||
610 | "unrecognized dbg gpu powergate mode: 0x%x", | ||
611 | powermode); | ||
612 | err = -ENOTTY; | ||
613 | break; | ||
614 | } | ||
615 | |||
616 | return err; | ||
617 | } | ||
618 | |||
619 | static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, | ||
620 | struct nvhost_dbg_gpu_powergate_args *args) | ||
621 | { | ||
622 | int err; | ||
623 | struct gk20a *g = get_gk20a(dbg_s->pdev); | ||
624 | gk20a_dbg_fn("%s powergate mode = %d", | ||
625 | dev_name(dbg_s->dev), args->mode); | ||
626 | |||
627 | mutex_lock(&g->dbg_sessions_lock); | ||
628 | err = dbg_set_powergate(dbg_s, args->mode); | ||
629 | mutex_unlock(&g->dbg_sessions_lock); | ||
630 | return err; | ||
631 | } | ||
632 | |||
633 | static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
634 | struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args) | ||
635 | { | ||
636 | int err; | ||
637 | struct gk20a *g = get_gk20a(dbg_s->pdev); | ||
638 | struct channel_gk20a *ch_gk20a; | ||
639 | |||
640 | gk20a_dbg_fn("%s smpc ctxsw mode = %d", | ||
641 | dev_name(dbg_s->dev), args->mode); | ||
642 | |||
643 | /* Take the global lock, since we'll be doing global regops */ | ||
644 | mutex_lock(&g->dbg_sessions_lock); | ||
645 | |||
646 | ch_gk20a = dbg_s->ch; | ||
647 | |||
648 | if (!ch_gk20a) { | ||
649 | gk20a_err(dev_from_gk20a(dbg_s->g), | ||
650 | "no bound channel for smpc ctxsw mode update\n"); | ||
651 | err = -EINVAL; | ||
652 | goto clean_up; | ||
653 | } | ||
654 | |||
655 | err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a, | ||
656 | args->mode == NVHOST_DBG_GPU_SMPC_CTXSW_MODE_CTXSW); | ||
657 | if (err) { | ||
658 | gk20a_err(dev_from_gk20a(dbg_s->g), | ||
659 | "error (%d) during smpc ctxsw mode update\n", err); | ||
660 | goto clean_up; | ||
661 | } | ||
662 | /* The following regops are a hack/war to make up for the fact that we | ||
663 | * just scribbled into the ctxsw image w/o really knowing whether | ||
664 | * it was already swapped out in/out once or not, etc. | ||
665 | */ | ||
666 | { | ||
667 | struct nvhost_dbg_gpu_reg_op ops[4]; | ||
668 | int i; | ||
669 | for (i = 0; i < ARRAY_SIZE(ops); i++) { | ||
670 | ops[i].op = NVHOST_DBG_GPU_REG_OP_WRITE_32; | ||
671 | ops[i].type = NVHOST_DBG_GPU_REG_OP_TYPE_GR_CTX; | ||
672 | ops[i].status = NVHOST_DBG_GPU_REG_OP_STATUS_SUCCESS; | ||
673 | ops[i].value_hi = 0; | ||
674 | ops[i].and_n_mask_lo = 0; | ||
675 | ops[i].and_n_mask_hi = 0; | ||
676 | } | ||
677 | /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control_sel1_r();*/ | ||
678 | ops[0].offset = 0x00419e08; | ||
679 | ops[0].value_lo = 0x1d; | ||
680 | |||
681 | /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control5_r(); */ | ||
682 | ops[1].offset = 0x00419e58; | ||
683 | ops[1].value_lo = 0x1; | ||
684 | |||
685 | /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control3_r(); */ | ||
686 | ops[2].offset = 0x00419e68; | ||
687 | ops[2].value_lo = 0xaaaa; | ||
688 | |||
689 | /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter4_control_r(); */ | ||
690 | ops[3].offset = 0x00419f40; | ||
691 | ops[3].value_lo = 0x18; | ||
692 | |||
693 | err = dbg_s->ops->exec_reg_ops(dbg_s, ops, ARRAY_SIZE(ops)); | ||
694 | } | ||
695 | |||
696 | clean_up: | ||
697 | mutex_unlock(&g->dbg_sessions_lock); | ||
698 | return err; | ||
699 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h new file mode 100644 index 00000000..49827608 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h | |||
@@ -0,0 +1,83 @@ | |||
1 | /* | ||
2 | * Tegra GK20A GPU Debugger Driver | ||
3 | * | ||
4 | * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | #ifndef __DBG_GPU_GK20A_H_ | ||
19 | #define __DBG_GPU_GK20A_H_ | ||
20 | #include <linux/poll.h> | ||
21 | |||
22 | /* module debug driver interface */ | ||
23 | int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp); | ||
24 | int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp); | ||
25 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); | ||
26 | unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait); | ||
27 | |||
28 | /* used by profiler driver interface */ | ||
29 | int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp); | ||
30 | |||
31 | /* used by the interrupt handler to post events */ | ||
32 | void gk20a_dbg_gpu_post_events(struct channel_gk20a *fault_ch); | ||
33 | |||
34 | struct dbg_gpu_session_ops { | ||
35 | int (*exec_reg_ops)(struct dbg_session_gk20a *dbg_s, | ||
36 | struct nvhost_dbg_gpu_reg_op *ops, | ||
37 | u64 num_ops); | ||
38 | }; | ||
39 | |||
40 | struct dbg_gpu_session_events { | ||
41 | wait_queue_head_t wait_queue; | ||
42 | bool events_enabled; | ||
43 | int num_pending_events; | ||
44 | }; | ||
45 | |||
46 | struct dbg_session_gk20a { | ||
47 | /* dbg session id used for trace/prints */ | ||
48 | int id; | ||
49 | |||
50 | /* profiler session, if any */ | ||
51 | bool is_profiler; | ||
52 | |||
53 | /* power enabled or disabled */ | ||
54 | bool is_pg_disabled; | ||
55 | |||
56 | /* | ||
57 | * There can be different versions of the whitelists | ||
58 | * between both global and per-context sets; as well | ||
59 | * as between debugger and profiler interfaces. | ||
60 | */ | ||
61 | struct regops_whitelist *global; | ||
62 | struct regops_whitelist *per_context; | ||
63 | |||
64 | /* gpu module vagaries */ | ||
65 | struct device *dev; | ||
66 | struct platform_device *pdev; | ||
67 | struct gk20a *g; | ||
68 | |||
69 | /* bound channel, if any */ | ||
70 | struct file *ch_f; | ||
71 | struct channel_gk20a *ch; | ||
72 | |||
73 | /* session operations */ | ||
74 | struct dbg_gpu_session_ops *ops; | ||
75 | |||
76 | /* event support */ | ||
77 | struct dbg_gpu_session_events dbg_events; | ||
78 | struct list_head dbg_s_list_node; | ||
79 | }; | ||
80 | |||
81 | extern struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a; | ||
82 | |||
83 | #endif /* __DBG_GPU_GK20A_H_ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c new file mode 100644 index 00000000..c5b6953c --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c | |||
@@ -0,0 +1,295 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/t20/debug_gk20a.c | ||
3 | * | ||
4 | * Copyright (C) 2011-2014 NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This software is licensed under the terms of the GNU General Public | ||
7 | * License version 2, as published by the Free Software Foundation, and | ||
8 | * may be copied, distributed, and modified under those terms. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | */ | ||
16 | |||
17 | #include <linux/nvhost.h> | ||
18 | #include <linux/debugfs.h> | ||
19 | #include <linux/seq_file.h> | ||
20 | |||
21 | #include <linux/io.h> | ||
22 | |||
23 | #include "gk20a.h" | ||
24 | #include "debug_gk20a.h" | ||
25 | |||
26 | #include "hw_ram_gk20a.h" | ||
27 | #include "hw_fifo_gk20a.h" | ||
28 | #include "hw_ccsr_gk20a.h" | ||
29 | #include "hw_pbdma_gk20a.h" | ||
30 | |||
31 | unsigned int gk20a_debug_trace_cmdbuf; | ||
32 | struct platform_device *gk20a_device; | ||
33 | |||
34 | struct gk20a_debug_output { | ||
35 | void (*fn)(void *ctx, const char *str, size_t len); | ||
36 | void *ctx; | ||
37 | char buf[256]; | ||
38 | }; | ||
39 | |||
40 | static const char * const ccsr_chan_status_str[] = { | ||
41 | "idle", | ||
42 | "pending", | ||
43 | "pending_ctx_reload", | ||
44 | "pending_acquire", | ||
45 | "pending_acq_ctx_reload", | ||
46 | "on_pbdma", | ||
47 | "on_pbdma_and_eng", | ||
48 | "on_eng", | ||
49 | "on_eng_pending_acquire", | ||
50 | "on_eng_pending", | ||
51 | "on_pbdma_ctx_reload", | ||
52 | "on_pbdma_and_eng_ctx_reload", | ||
53 | "on_eng_ctx_reload", | ||
54 | "on_eng_pending_ctx_reload", | ||
55 | "on_eng_pending_acq_ctx_reload", | ||
56 | }; | ||
57 | |||
58 | static const char * const chan_status_str[] = { | ||
59 | "invalid", | ||
60 | "valid", | ||
61 | "chsw_load", | ||
62 | "chsw_save", | ||
63 | "chsw_switch", | ||
64 | }; | ||
65 | |||
66 | static const char * const ctx_status_str[] = { | ||
67 | "invalid", | ||
68 | "valid", | ||
69 | NULL, | ||
70 | NULL, | ||
71 | NULL, | ||
72 | "ctxsw_load", | ||
73 | "ctxsw_save", | ||
74 | "ctxsw_switch", | ||
75 | }; | ||
76 | |||
77 | static inline void gk20a_debug_write_printk(void *ctx, const char *str, | ||
78 | size_t len) | ||
79 | { | ||
80 | pr_info("%s", str); | ||
81 | } | ||
82 | |||
83 | static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str, | ||
84 | size_t len) | ||
85 | { | ||
86 | seq_write((struct seq_file *)ctx, str, len); | ||
87 | } | ||
88 | |||
89 | void gk20a_debug_output(struct gk20a_debug_output *o, const char *fmt, ...) | ||
90 | { | ||
91 | va_list args; | ||
92 | int len; | ||
93 | |||
94 | va_start(args, fmt); | ||
95 | len = vsnprintf(o->buf, sizeof(o->buf), fmt, args); | ||
96 | va_end(args); | ||
97 | o->fn(o->ctx, o->buf, len); | ||
98 | } | ||
99 | |||
100 | static void gk20a_debug_show_channel(struct gk20a *g, | ||
101 | struct gk20a_debug_output *o, | ||
102 | struct channel_gk20a *ch) | ||
103 | { | ||
104 | u32 channel = gk20a_readl(g, ccsr_channel_r(ch->hw_chid)); | ||
105 | u32 status = ccsr_channel_status_v(channel); | ||
106 | u32 syncpointa, syncpointb; | ||
107 | void *inst_ptr; | ||
108 | |||
109 | inst_ptr = ch->inst_block.cpuva; | ||
110 | if (!inst_ptr) | ||
111 | return; | ||
112 | |||
113 | syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w()); | ||
114 | syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w()); | ||
115 | |||
116 | gk20a_debug_output(o, "%d-%s, pid %d: ", ch->hw_chid, | ||
117 | ch->g->dev->name, | ||
118 | ch->pid); | ||
119 | gk20a_debug_output(o, "%s in use %s %s\n", | ||
120 | ccsr_channel_enable_v(channel) ? "" : "not", | ||
121 | ccsr_chan_status_str[status], | ||
122 | ccsr_channel_busy_v(channel) ? "busy" : "not busy"); | ||
123 | gk20a_debug_output(o, "TOP: %016llx PUT: %016llx GET: %016llx " | ||
124 | "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n" | ||
125 | "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n", | ||
126 | (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_top_level_get_w()) + | ||
127 | ((u64)gk20a_mem_rd32(inst_ptr, | ||
128 | ram_fc_pb_top_level_get_hi_w()) << 32ULL), | ||
129 | (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_w()) + | ||
130 | ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_hi_w()) << 32ULL), | ||
131 | (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_w()) + | ||
132 | ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_hi_w()) << 32ULL), | ||
133 | (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_w()) + | ||
134 | ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_hi_w()) << 32ULL), | ||
135 | gk20a_mem_rd32(inst_ptr, ram_fc_pb_header_w()), | ||
136 | gk20a_mem_rd32(inst_ptr, ram_fc_pb_count_w()), | ||
137 | syncpointa, | ||
138 | syncpointb, | ||
139 | gk20a_mem_rd32(inst_ptr, ram_fc_semaphorea_w()), | ||
140 | gk20a_mem_rd32(inst_ptr, ram_fc_semaphoreb_w()), | ||
141 | gk20a_mem_rd32(inst_ptr, ram_fc_semaphorec_w()), | ||
142 | gk20a_mem_rd32(inst_ptr, ram_fc_semaphored_w())); | ||
143 | |||
144 | if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v()) | ||
145 | && (pbdma_syncpointb_wait_switch_v(syncpointb) == | ||
146 | pbdma_syncpointb_wait_switch_en_v())) | ||
147 | gk20a_debug_output(o, "Waiting on syncpt %u (%s) val %u\n", | ||
148 | pbdma_syncpointb_syncpt_index_v(syncpointb), | ||
149 | nvhost_syncpt_get_name( | ||
150 | to_platform_device(g->dev->dev.parent), | ||
151 | pbdma_syncpointb_syncpt_index_v(syncpointb)), | ||
152 | pbdma_syncpointa_payload_v(syncpointa)); | ||
153 | |||
154 | gk20a_debug_output(o, "\n"); | ||
155 | } | ||
156 | |||
157 | void gk20a_debug_show_dump(struct platform_device *pdev, | ||
158 | struct gk20a_debug_output *o) | ||
159 | { | ||
160 | struct gk20a_platform *platform = gk20a_get_platform(pdev); | ||
161 | struct gk20a *g = platform->g; | ||
162 | struct fifo_gk20a *f = &g->fifo; | ||
163 | u32 chid; | ||
164 | int i; | ||
165 | |||
166 | gk20a_busy(g->dev); | ||
167 | for (i = 0; i < fifo_pbdma_status__size_1_v(); i++) { | ||
168 | u32 status = gk20a_readl(g, fifo_pbdma_status_r(i)); | ||
169 | u32 chan_status = fifo_pbdma_status_chan_status_v(status); | ||
170 | |||
171 | gk20a_debug_output(o, "%s pbdma %d: ", g->dev->name, i); | ||
172 | gk20a_debug_output(o, | ||
173 | "id: %d (%s), next_id: %d (%s) status: %s\n", | ||
174 | fifo_pbdma_status_id_v(status), | ||
175 | fifo_pbdma_status_id_type_v(status) ? | ||
176 | "tsg" : "channel", | ||
177 | fifo_pbdma_status_next_id_v(status), | ||
178 | fifo_pbdma_status_next_id_type_v(status) ? | ||
179 | "tsg" : "channel", | ||
180 | chan_status_str[chan_status]); | ||
181 | gk20a_debug_output(o, "PUT: %016llx GET: %016llx " | ||
182 | "FETCH: %08x HEADER: %08x\n", | ||
183 | (u64)gk20a_readl(g, pbdma_put_r(i)) + | ||
184 | ((u64)gk20a_readl(g, pbdma_put_hi_r(i)) << 32ULL), | ||
185 | (u64)gk20a_readl(g, pbdma_get_r(i)) + | ||
186 | ((u64)gk20a_readl(g, pbdma_get_hi_r(i)) << 32ULL), | ||
187 | gk20a_readl(g, pbdma_gp_fetch_r(i)), | ||
188 | gk20a_readl(g, pbdma_pb_header_r(i))); | ||
189 | } | ||
190 | gk20a_debug_output(o, "\n"); | ||
191 | |||
192 | for (i = 0; i < fifo_engine_status__size_1_v(); i++) { | ||
193 | u32 status = gk20a_readl(g, fifo_engine_status_r(i)); | ||
194 | u32 ctx_status = fifo_engine_status_ctx_status_v(status); | ||
195 | |||
196 | gk20a_debug_output(o, "%s eng %d: ", g->dev->name, i); | ||
197 | gk20a_debug_output(o, | ||
198 | "id: %d (%s), next_id: %d (%s), ctx: %s ", | ||
199 | fifo_engine_status_id_v(status), | ||
200 | fifo_engine_status_id_type_v(status) ? | ||
201 | "tsg" : "channel", | ||
202 | fifo_engine_status_next_id_v(status), | ||
203 | fifo_engine_status_next_id_type_v(status) ? | ||
204 | "tsg" : "channel", | ||
205 | ctx_status_str[ctx_status]); | ||
206 | |||
207 | if (fifo_engine_status_faulted_v(status)) | ||
208 | gk20a_debug_output(o, "faulted "); | ||
209 | if (fifo_engine_status_engine_v(status)) | ||
210 | gk20a_debug_output(o, "busy "); | ||
211 | gk20a_debug_output(o, "\n"); | ||
212 | } | ||
213 | gk20a_debug_output(o, "\n"); | ||
214 | |||
215 | for (chid = 0; chid < f->num_channels; chid++) { | ||
216 | if (f->channel[chid].in_use) { | ||
217 | struct channel_gk20a *gpu_ch = &f->channel[chid]; | ||
218 | gk20a_debug_show_channel(g, o, gpu_ch); | ||
219 | } | ||
220 | } | ||
221 | gk20a_idle(g->dev); | ||
222 | } | ||
223 | |||
224 | void gk20a_debug_dump(struct platform_device *pdev) | ||
225 | { | ||
226 | struct gk20a_platform *platform = gk20a_get_platform(pdev); | ||
227 | struct gk20a_debug_output o = { | ||
228 | .fn = gk20a_debug_write_printk | ||
229 | }; | ||
230 | |||
231 | if (platform->dump_platform_dependencies) | ||
232 | platform->dump_platform_dependencies(pdev); | ||
233 | |||
234 | gk20a_debug_show_dump(pdev, &o); | ||
235 | } | ||
236 | |||
237 | void gk20a_debug_dump_device(struct platform_device *pdev) | ||
238 | { | ||
239 | struct gk20a_debug_output o = { | ||
240 | .fn = gk20a_debug_write_printk | ||
241 | }; | ||
242 | |||
243 | /* Dump the first device if no info is provided */ | ||
244 | if (!pdev && gk20a_device) | ||
245 | pdev = gk20a_device; | ||
246 | |||
247 | gk20a_debug_show_dump(pdev, &o); | ||
248 | } | ||
249 | EXPORT_SYMBOL(gk20a_debug_dump_device); | ||
250 | |||
251 | static int gk20a_debug_show(struct seq_file *s, void *unused) | ||
252 | { | ||
253 | struct platform_device *pdev = s->private; | ||
254 | struct gk20a_debug_output o = { | ||
255 | .fn = gk20a_debug_write_to_seqfile, | ||
256 | .ctx = s, | ||
257 | }; | ||
258 | gk20a_debug_show_dump(pdev, &o); | ||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | static int gk20a_debug_open(struct inode *inode, struct file *file) | ||
263 | { | ||
264 | return single_open(file, gk20a_debug_show, inode->i_private); | ||
265 | } | ||
266 | |||
267 | static const struct file_operations gk20a_debug_fops = { | ||
268 | .open = gk20a_debug_open, | ||
269 | .read = seq_read, | ||
270 | .llseek = seq_lseek, | ||
271 | .release = single_release, | ||
272 | }; | ||
273 | |||
274 | void gk20a_debug_init(struct platform_device *pdev) | ||
275 | { | ||
276 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | ||
277 | |||
278 | /* Store the first device */ | ||
279 | if (!gk20a_device) | ||
280 | gk20a_device = pdev; | ||
281 | |||
282 | platform->debugfs = debugfs_create_dir(pdev->name, NULL); | ||
283 | |||
284 | debugfs_create_file("status", S_IRUGO, platform->debugfs, | ||
285 | pdev, &gk20a_debug_fops); | ||
286 | debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, platform->debugfs, | ||
287 | &gk20a_debug_trace_cmdbuf); | ||
288 | |||
289 | #if defined(GK20A_DEBUG) | ||
290 | debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR, platform->debugfs, | ||
291 | &gk20a_dbg_mask); | ||
292 | debugfs_create_u32("dbg_ftrace", S_IRUGO|S_IWUSR, platform->debugfs, | ||
293 | &gk20a_dbg_ftrace); | ||
294 | #endif | ||
295 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.h b/drivers/gpu/nvgpu/gk20a/debug_gk20a.h new file mode 100644 index 00000000..cd2e09c3 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.h | |||
@@ -0,0 +1,25 @@ | |||
1 | /* | ||
2 | * GK20A Debug functionality | ||
3 | * | ||
4 | * Copyright (C) 2011-2014 NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This software is licensed under the terms of the GNU General Public | ||
7 | * License version 2, as published by the Free Software Foundation, and | ||
8 | * may be copied, distributed, and modified under those terms. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | */ | ||
16 | |||
17 | #ifndef _DEBUG_GK20A_H_ | ||
18 | #define _DEBUG_GK20A_H_ | ||
19 | |||
20 | extern unsigned int gk20a_debug_trace_cmdbuf; | ||
21 | |||
22 | void gk20a_debug_dump(struct platform_device *pdev); | ||
23 | void gk20a_debug_init(struct platform_device *pdev); | ||
24 | |||
25 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c new file mode 100644 index 00000000..52f2db4d --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c | |||
@@ -0,0 +1,37 @@ | |||
1 | /* | ||
2 | * GK20A memory interface | ||
3 | * | ||
4 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/types.h> | ||
17 | |||
18 | #include "gk20a.h" | ||
19 | #include "kind_gk20a.h" | ||
20 | #include "hw_mc_gk20a.h" | ||
21 | |||
22 | static void fb_gk20a_reset(struct gk20a *g) | ||
23 | { | ||
24 | gk20a_dbg_info("reset gk20a fb"); | ||
25 | |||
26 | gk20a_reset(g, mc_enable_pfb_enabled_f() | ||
27 | | mc_enable_l2_enabled_f() | ||
28 | | mc_enable_xbar_enabled_f() | ||
29 | | mc_enable_hub_enabled_f()); | ||
30 | } | ||
31 | |||
32 | void gk20a_init_fb(struct gpu_ops *gops) | ||
33 | { | ||
34 | gops->fb.reset = fb_gk20a_reset; | ||
35 | gk20a_init_uncompressed_kind_map(); | ||
36 | gk20a_init_kind_attr(); | ||
37 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.h b/drivers/gpu/nvgpu/gk20a/fb_gk20a.h new file mode 100644 index 00000000..34c21c9b --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * GK20A FB | ||
3 | * | ||
4 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #ifndef _NVHOST_GK20A_FB | ||
17 | #define _NVHOST_GK20A_FB | ||
18 | struct gk20a; | ||
19 | |||
20 | void gk20a_init_fb(struct gpu_ops *gops); | ||
21 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c new file mode 100644 index 00000000..5575b995 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -0,0 +1,1836 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/fifo_gk20a.c | ||
3 | * | ||
4 | * GK20A Graphics FIFO (gr host) | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | #include <linux/delay.h> | ||
22 | #include <linux/slab.h> | ||
23 | #include <linux/scatterlist.h> | ||
24 | #include <trace/events/gk20a.h> | ||
25 | #include <linux/dma-mapping.h> | ||
26 | #include <linux/nvhost.h> | ||
27 | |||
28 | #include "gk20a.h" | ||
29 | #include "debug_gk20a.h" | ||
30 | #include "hw_fifo_gk20a.h" | ||
31 | #include "hw_pbdma_gk20a.h" | ||
32 | #include "hw_ccsr_gk20a.h" | ||
33 | #include "hw_ram_gk20a.h" | ||
34 | #include "hw_proj_gk20a.h" | ||
35 | #include "hw_top_gk20a.h" | ||
36 | #include "hw_mc_gk20a.h" | ||
37 | #include "hw_gr_gk20a.h" | ||
38 | |||
39 | static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | ||
40 | u32 hw_chid, bool add, | ||
41 | bool wait_for_finish); | ||
42 | static void gk20a_fifo_handle_mmu_fault_thread(struct work_struct *work); | ||
43 | |||
44 | /* | ||
45 | * Link engine IDs to MMU IDs and vice versa. | ||
46 | */ | ||
47 | |||
48 | static inline u32 gk20a_engine_id_to_mmu_id(u32 engine_id) | ||
49 | { | ||
50 | switch (engine_id) { | ||
51 | case ENGINE_GR_GK20A: | ||
52 | return 0x00; | ||
53 | case ENGINE_CE2_GK20A: | ||
54 | return 0x1b; | ||
55 | default: | ||
56 | return ~0; | ||
57 | } | ||
58 | } | ||
59 | |||
60 | static inline u32 gk20a_mmu_id_to_engine_id(u32 engine_id) | ||
61 | { | ||
62 | switch (engine_id) { | ||
63 | case 0x00: | ||
64 | return ENGINE_GR_GK20A; | ||
65 | case 0x1b: | ||
66 | return ENGINE_CE2_GK20A; | ||
67 | default: | ||
68 | return ~0; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | |||
73 | static int init_engine_info(struct fifo_gk20a *f) | ||
74 | { | ||
75 | struct gk20a *g = f->g; | ||
76 | struct device *d = dev_from_gk20a(g); | ||
77 | struct fifo_engine_info_gk20a *gr_info; | ||
78 | const u32 gr_sw_id = ENGINE_GR_GK20A; | ||
79 | u32 i; | ||
80 | u32 max_info_entries = top_device_info__size_1_v(); | ||
81 | |||
82 | gk20a_dbg_fn(""); | ||
83 | |||
84 | /* all we really care about finding is the graphics entry */ | ||
85 | /* especially early on in sim it probably thinks it has more */ | ||
86 | f->num_engines = 1; | ||
87 | |||
88 | gr_info = f->engine_info + gr_sw_id; | ||
89 | |||
90 | gr_info->sw_id = gr_sw_id; | ||
91 | gr_info->name = "gr"; | ||
92 | gr_info->dev_info_id = top_device_info_type_enum_graphics_v(); | ||
93 | gr_info->mmu_fault_id = fifo_intr_mmu_fault_eng_id_graphics_v(); | ||
94 | gr_info->runlist_id = ~0; | ||
95 | gr_info->pbdma_id = ~0; | ||
96 | gr_info->engine_id = ~0; | ||
97 | |||
98 | for (i = 0; i < max_info_entries; i++) { | ||
99 | u32 table_entry = gk20a_readl(f->g, top_device_info_r(i)); | ||
100 | u32 entry = top_device_info_entry_v(table_entry); | ||
101 | u32 engine_enum = top_device_info_type_enum_v(table_entry); | ||
102 | u32 table_entry2 = 0; | ||
103 | |||
104 | if (entry == top_device_info_entry_not_valid_v()) | ||
105 | continue; | ||
106 | |||
107 | if (top_device_info_chain_v(table_entry) == | ||
108 | top_device_info_chain_enable_v()) { | ||
109 | |||
110 | table_entry2 = gk20a_readl(f->g, | ||
111 | top_device_info_r(++i)); | ||
112 | |||
113 | engine_enum = top_device_info_type_enum_v(table_entry2); | ||
114 | } | ||
115 | |||
116 | /* we only care about GR engine here */ | ||
117 | if (entry == top_device_info_entry_enum_v() && | ||
118 | engine_enum == gr_info->dev_info_id) { | ||
119 | int pbdma_id; | ||
120 | u32 runlist_bit; | ||
121 | |||
122 | gr_info->runlist_id = | ||
123 | top_device_info_runlist_enum_v(table_entry); | ||
124 | gk20a_dbg_info("gr info: runlist_id %d", gr_info->runlist_id); | ||
125 | |||
126 | gr_info->engine_id = | ||
127 | top_device_info_engine_enum_v(table_entry); | ||
128 | gk20a_dbg_info("gr info: engine_id %d", gr_info->engine_id); | ||
129 | |||
130 | runlist_bit = 1 << gr_info->runlist_id; | ||
131 | |||
132 | for (pbdma_id = 0; pbdma_id < f->num_pbdma; pbdma_id++) { | ||
133 | gk20a_dbg_info("gr info: pbdma_map[%d]=%d", | ||
134 | pbdma_id, f->pbdma_map[pbdma_id]); | ||
135 | if (f->pbdma_map[pbdma_id] & runlist_bit) | ||
136 | break; | ||
137 | } | ||
138 | |||
139 | if (pbdma_id == f->num_pbdma) { | ||
140 | gk20a_err(d, "busted pbmda map"); | ||
141 | return -EINVAL; | ||
142 | } | ||
143 | gr_info->pbdma_id = pbdma_id; | ||
144 | |||
145 | break; | ||
146 | } | ||
147 | } | ||
148 | |||
149 | if (gr_info->runlist_id == ~0) { | ||
150 | gk20a_err(d, "busted device info"); | ||
151 | return -EINVAL; | ||
152 | } | ||
153 | |||
154 | return 0; | ||
155 | } | ||
156 | |||
157 | void gk20a_remove_fifo_support(struct fifo_gk20a *f) | ||
158 | { | ||
159 | struct gk20a *g = f->g; | ||
160 | struct device *d = dev_from_gk20a(g); | ||
161 | struct fifo_engine_info_gk20a *engine_info; | ||
162 | struct fifo_runlist_info_gk20a *runlist; | ||
163 | u32 runlist_id; | ||
164 | u32 i; | ||
165 | |||
166 | gk20a_dbg_fn(""); | ||
167 | |||
168 | if (f->channel) { | ||
169 | int c; | ||
170 | for (c = 0; c < f->num_channels; c++) { | ||
171 | if (f->channel[c].remove_support) | ||
172 | f->channel[c].remove_support(f->channel+c); | ||
173 | } | ||
174 | kfree(f->channel); | ||
175 | } | ||
176 | if (f->userd.gpu_va) | ||
177 | gk20a_gmmu_unmap(&g->mm.bar1.vm, | ||
178 | f->userd.gpu_va, | ||
179 | f->userd.size, | ||
180 | gk20a_mem_flag_none); | ||
181 | |||
182 | if (f->userd.sgt) | ||
183 | gk20a_free_sgtable(&f->userd.sgt); | ||
184 | |||
185 | if (f->userd.cpuva) | ||
186 | dma_free_coherent(d, | ||
187 | f->userd_total_size, | ||
188 | f->userd.cpuva, | ||
189 | f->userd.iova); | ||
190 | f->userd.cpuva = NULL; | ||
191 | f->userd.iova = 0; | ||
192 | |||
193 | engine_info = f->engine_info + ENGINE_GR_GK20A; | ||
194 | runlist_id = engine_info->runlist_id; | ||
195 | runlist = &f->runlist_info[runlist_id]; | ||
196 | |||
197 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | ||
198 | if (runlist->mem[i].cpuva) | ||
199 | dma_free_coherent(d, | ||
200 | runlist->mem[i].size, | ||
201 | runlist->mem[i].cpuva, | ||
202 | runlist->mem[i].iova); | ||
203 | runlist->mem[i].cpuva = NULL; | ||
204 | runlist->mem[i].iova = 0; | ||
205 | } | ||
206 | |||
207 | kfree(runlist->active_channels); | ||
208 | |||
209 | kfree(f->runlist_info); | ||
210 | kfree(f->pbdma_map); | ||
211 | kfree(f->engine_info); | ||
212 | } | ||
213 | |||
214 | /* reads info from hardware and fills in pbmda exception info record */ | ||
215 | static inline void get_exception_pbdma_info( | ||
216 | struct gk20a *g, | ||
217 | struct fifo_engine_info_gk20a *eng_info) | ||
218 | { | ||
219 | struct fifo_pbdma_exception_info_gk20a *e = | ||
220 | &eng_info->pbdma_exception_info; | ||
221 | |||
222 | u32 pbdma_status_r = e->status_r = gk20a_readl(g, | ||
223 | fifo_pbdma_status_r(eng_info->pbdma_id)); | ||
224 | e->id = fifo_pbdma_status_id_v(pbdma_status_r); /* vs. id_hw_v()? */ | ||
225 | e->id_is_chid = fifo_pbdma_status_id_type_v(pbdma_status_r) == | ||
226 | fifo_pbdma_status_id_type_chid_v(); | ||
227 | e->chan_status_v = fifo_pbdma_status_chan_status_v(pbdma_status_r); | ||
228 | e->next_id_is_chid = | ||
229 | fifo_pbdma_status_next_id_type_v(pbdma_status_r) == | ||
230 | fifo_pbdma_status_next_id_type_chid_v(); | ||
231 | e->next_id = fifo_pbdma_status_next_id_v(pbdma_status_r); | ||
232 | e->chsw_in_progress = | ||
233 | fifo_pbdma_status_chsw_v(pbdma_status_r) == | ||
234 | fifo_pbdma_status_chsw_in_progress_v(); | ||
235 | } | ||
236 | |||
237 | static void fifo_pbdma_exception_status(struct gk20a *g, | ||
238 | struct fifo_engine_info_gk20a *eng_info) | ||
239 | { | ||
240 | struct fifo_pbdma_exception_info_gk20a *e; | ||
241 | get_exception_pbdma_info(g, eng_info); | ||
242 | e = &eng_info->pbdma_exception_info; | ||
243 | |||
244 | gk20a_dbg_fn("pbdma_id %d, " | ||
245 | "id_type %s, id %d, chan_status %d, " | ||
246 | "next_id_type %s, next_id %d, " | ||
247 | "chsw_in_progress %d", | ||
248 | eng_info->pbdma_id, | ||
249 | e->id_is_chid ? "chid" : "tsgid", e->id, e->chan_status_v, | ||
250 | e->next_id_is_chid ? "chid" : "tsgid", e->next_id, | ||
251 | e->chsw_in_progress); | ||
252 | } | ||
253 | |||
254 | /* reads info from hardware and fills in pbmda exception info record */ | ||
255 | static inline void get_exception_engine_info( | ||
256 | struct gk20a *g, | ||
257 | struct fifo_engine_info_gk20a *eng_info) | ||
258 | { | ||
259 | struct fifo_engine_exception_info_gk20a *e = | ||
260 | &eng_info->engine_exception_info; | ||
261 | u32 engine_status_r = e->status_r = | ||
262 | gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id)); | ||
263 | e->id = fifo_engine_status_id_v(engine_status_r); /* vs. id_hw_v()? */ | ||
264 | e->id_is_chid = fifo_engine_status_id_type_v(engine_status_r) == | ||
265 | fifo_engine_status_id_type_chid_v(); | ||
266 | e->ctx_status_v = fifo_engine_status_ctx_status_v(engine_status_r); | ||
267 | e->faulted = | ||
268 | fifo_engine_status_faulted_v(engine_status_r) == | ||
269 | fifo_engine_status_faulted_true_v(); | ||
270 | e->idle = | ||
271 | fifo_engine_status_engine_v(engine_status_r) == | ||
272 | fifo_engine_status_engine_idle_v(); | ||
273 | e->ctxsw_in_progress = | ||
274 | fifo_engine_status_ctxsw_v(engine_status_r) == | ||
275 | fifo_engine_status_ctxsw_in_progress_v(); | ||
276 | } | ||
277 | |||
278 | static void fifo_engine_exception_status(struct gk20a *g, | ||
279 | struct fifo_engine_info_gk20a *eng_info) | ||
280 | { | ||
281 | struct fifo_engine_exception_info_gk20a *e; | ||
282 | get_exception_engine_info(g, eng_info); | ||
283 | e = &eng_info->engine_exception_info; | ||
284 | |||
285 | gk20a_dbg_fn("engine_id %d, id_type %s, id %d, ctx_status %d, " | ||
286 | "faulted %d, idle %d, ctxsw_in_progress %d, ", | ||
287 | eng_info->engine_id, e->id_is_chid ? "chid" : "tsgid", | ||
288 | e->id, e->ctx_status_v, | ||
289 | e->faulted, e->idle, e->ctxsw_in_progress); | ||
290 | } | ||
291 | |||
292 | static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | ||
293 | { | ||
294 | struct fifo_engine_info_gk20a *engine_info; | ||
295 | struct fifo_runlist_info_gk20a *runlist; | ||
296 | struct device *d = dev_from_gk20a(g); | ||
297 | u32 runlist_id; | ||
298 | u32 i; | ||
299 | u64 runlist_size; | ||
300 | |||
301 | gk20a_dbg_fn(""); | ||
302 | |||
303 | f->max_runlists = fifo_eng_runlist_base__size_1_v(); | ||
304 | f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) * | ||
305 | f->max_runlists, GFP_KERNEL); | ||
306 | if (!f->runlist_info) | ||
307 | goto clean_up; | ||
308 | |||
309 | engine_info = f->engine_info + ENGINE_GR_GK20A; | ||
310 | runlist_id = engine_info->runlist_id; | ||
311 | runlist = &f->runlist_info[runlist_id]; | ||
312 | |||
313 | runlist->active_channels = | ||
314 | kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), | ||
315 | GFP_KERNEL); | ||
316 | if (!runlist->active_channels) | ||
317 | goto clean_up_runlist_info; | ||
318 | |||
319 | runlist_size = ram_rl_entry_size_v() * f->num_channels; | ||
320 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | ||
321 | dma_addr_t iova; | ||
322 | |||
323 | runlist->mem[i].cpuva = | ||
324 | dma_alloc_coherent(d, | ||
325 | runlist_size, | ||
326 | &iova, | ||
327 | GFP_KERNEL); | ||
328 | if (!runlist->mem[i].cpuva) { | ||
329 | dev_err(d, "memory allocation failed\n"); | ||
330 | goto clean_up_runlist; | ||
331 | } | ||
332 | runlist->mem[i].iova = iova; | ||
333 | runlist->mem[i].size = runlist_size; | ||
334 | } | ||
335 | mutex_init(&runlist->mutex); | ||
336 | init_waitqueue_head(&runlist->runlist_wq); | ||
337 | |||
338 | /* None of buffers is pinned if this value doesn't change. | ||
339 | Otherwise, one of them (cur_buffer) must have been pinned. */ | ||
340 | runlist->cur_buffer = MAX_RUNLIST_BUFFERS; | ||
341 | |||
342 | gk20a_dbg_fn("done"); | ||
343 | return 0; | ||
344 | |||
345 | clean_up_runlist: | ||
346 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | ||
347 | if (runlist->mem[i].cpuva) | ||
348 | dma_free_coherent(d, | ||
349 | runlist->mem[i].size, | ||
350 | runlist->mem[i].cpuva, | ||
351 | runlist->mem[i].iova); | ||
352 | runlist->mem[i].cpuva = NULL; | ||
353 | runlist->mem[i].iova = 0; | ||
354 | } | ||
355 | |||
356 | kfree(runlist->active_channels); | ||
357 | runlist->active_channels = NULL; | ||
358 | |||
359 | clean_up_runlist_info: | ||
360 | kfree(f->runlist_info); | ||
361 | f->runlist_info = NULL; | ||
362 | |||
363 | clean_up: | ||
364 | gk20a_dbg_fn("fail"); | ||
365 | return -ENOMEM; | ||
366 | } | ||
367 | |||
368 | #define GRFIFO_TIMEOUT_CHECK_PERIOD_US 100000 | ||
369 | |||
370 | int gk20a_init_fifo_reset_enable_hw(struct gk20a *g) | ||
371 | { | ||
372 | u32 intr_stall; | ||
373 | u32 mask; | ||
374 | u32 timeout; | ||
375 | int i; | ||
376 | |||
377 | gk20a_dbg_fn(""); | ||
378 | /* enable pmc pfifo */ | ||
379 | gk20a_reset(g, mc_enable_pfifo_enabled_f() | ||
380 | | mc_enable_ce2_enabled_f()); | ||
381 | |||
382 | /* enable pbdma */ | ||
383 | mask = 0; | ||
384 | for (i = 0; i < proj_host_num_pbdma_v(); ++i) | ||
385 | mask |= mc_enable_pb_sel_f(mc_enable_pb_0_enabled_v(), i); | ||
386 | gk20a_writel(g, mc_enable_pb_r(), mask); | ||
387 | |||
388 | /* enable pfifo interrupt */ | ||
389 | gk20a_writel(g, fifo_intr_0_r(), 0xFFFFFFFF); | ||
390 | gk20a_writel(g, fifo_intr_en_0_r(), 0x7FFFFFFF); | ||
391 | gk20a_writel(g, fifo_intr_en_1_r(), 0x80000000); | ||
392 | |||
393 | /* enable pbdma interrupt */ | ||
394 | mask = 0; | ||
395 | for (i = 0; i < proj_host_num_pbdma_v(); i++) { | ||
396 | intr_stall = gk20a_readl(g, pbdma_intr_stall_r(i)); | ||
397 | intr_stall &= ~pbdma_intr_stall_lbreq_enabled_f(); | ||
398 | gk20a_writel(g, pbdma_intr_stall_r(i), intr_stall); | ||
399 | gk20a_writel(g, pbdma_intr_0_r(i), 0xFFFFFFFF); | ||
400 | gk20a_writel(g, pbdma_intr_en_0_r(i), | ||
401 | (~0) & ~pbdma_intr_en_0_lbreq_enabled_f()); | ||
402 | gk20a_writel(g, pbdma_intr_1_r(i), 0xFFFFFFFF); | ||
403 | gk20a_writel(g, pbdma_intr_en_1_r(i), 0xFFFFFFFF); | ||
404 | } | ||
405 | |||
406 | /* TBD: apply overrides */ | ||
407 | |||
408 | /* TBD: BLCG prod */ | ||
409 | |||
410 | /* reset runlist interrupts */ | ||
411 | gk20a_writel(g, fifo_intr_runlist_r(), ~0); | ||
412 | |||
413 | /* TBD: do we need those? */ | ||
414 | timeout = gk20a_readl(g, fifo_fb_timeout_r()); | ||
415 | timeout = set_field(timeout, fifo_fb_timeout_period_m(), | ||
416 | fifo_fb_timeout_period_max_f()); | ||
417 | gk20a_writel(g, fifo_fb_timeout_r(), timeout); | ||
418 | |||
419 | if (tegra_platform_is_silicon()) { | ||
420 | timeout = gk20a_readl(g, fifo_pb_timeout_r()); | ||
421 | timeout &= ~fifo_pb_timeout_detection_enabled_f(); | ||
422 | gk20a_writel(g, fifo_pb_timeout_r(), timeout); | ||
423 | } | ||
424 | |||
425 | timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US | | ||
426 | fifo_eng_timeout_detection_enabled_f(); | ||
427 | gk20a_writel(g, fifo_eng_timeout_r(), timeout); | ||
428 | |||
429 | gk20a_dbg_fn("done"); | ||
430 | |||
431 | return 0; | ||
432 | } | ||
433 | |||
434 | static void gk20a_init_fifo_pbdma_intr_descs(struct fifo_gk20a *f) | ||
435 | { | ||
436 | /* These are all errors which indicate something really wrong | ||
437 | * going on in the device. */ | ||
438 | f->intr.pbdma.device_fatal_0 = | ||
439 | pbdma_intr_0_memreq_pending_f() | | ||
440 | pbdma_intr_0_memack_timeout_pending_f() | | ||
441 | pbdma_intr_0_memack_extra_pending_f() | | ||
442 | pbdma_intr_0_memdat_timeout_pending_f() | | ||
443 | pbdma_intr_0_memdat_extra_pending_f() | | ||
444 | pbdma_intr_0_memflush_pending_f() | | ||
445 | pbdma_intr_0_memop_pending_f() | | ||
446 | pbdma_intr_0_lbconnect_pending_f() | | ||
447 | pbdma_intr_0_lbreq_pending_f() | | ||
448 | pbdma_intr_0_lback_timeout_pending_f() | | ||
449 | pbdma_intr_0_lback_extra_pending_f() | | ||
450 | pbdma_intr_0_lbdat_timeout_pending_f() | | ||
451 | pbdma_intr_0_lbdat_extra_pending_f() | | ||
452 | pbdma_intr_0_xbarconnect_pending_f() | | ||
453 | pbdma_intr_0_pri_pending_f(); | ||
454 | |||
455 | /* These are data parsing, framing errors or others which can be | ||
456 | * recovered from with intervention... or just resetting the | ||
457 | * channel. */ | ||
458 | f->intr.pbdma.channel_fatal_0 = | ||
459 | pbdma_intr_0_gpfifo_pending_f() | | ||
460 | pbdma_intr_0_gpptr_pending_f() | | ||
461 | pbdma_intr_0_gpentry_pending_f() | | ||
462 | pbdma_intr_0_gpcrc_pending_f() | | ||
463 | pbdma_intr_0_pbptr_pending_f() | | ||
464 | pbdma_intr_0_pbentry_pending_f() | | ||
465 | pbdma_intr_0_pbcrc_pending_f() | | ||
466 | pbdma_intr_0_method_pending_f() | | ||
467 | pbdma_intr_0_methodcrc_pending_f() | | ||
468 | pbdma_intr_0_pbseg_pending_f() | | ||
469 | pbdma_intr_0_signature_pending_f(); | ||
470 | |||
471 | /* Can be used for sw-methods, or represents | ||
472 | * a recoverable timeout. */ | ||
473 | f->intr.pbdma.restartable_0 = | ||
474 | pbdma_intr_0_device_pending_f() | | ||
475 | pbdma_intr_0_acquire_pending_f(); | ||
476 | } | ||
477 | |||
478 | static int gk20a_init_fifo_setup_sw(struct gk20a *g) | ||
479 | { | ||
480 | struct fifo_gk20a *f = &g->fifo; | ||
481 | struct device *d = dev_from_gk20a(g); | ||
482 | int chid, i, err = 0; | ||
483 | dma_addr_t iova; | ||
484 | |||
485 | gk20a_dbg_fn(""); | ||
486 | |||
487 | if (f->sw_ready) { | ||
488 | gk20a_dbg_fn("skip init"); | ||
489 | return 0; | ||
490 | } | ||
491 | |||
492 | f->g = g; | ||
493 | |||
494 | INIT_WORK(&f->fault_restore_thread, | ||
495 | gk20a_fifo_handle_mmu_fault_thread); | ||
496 | mutex_init(&f->intr.isr.mutex); | ||
497 | gk20a_init_fifo_pbdma_intr_descs(f); /* just filling in data/tables */ | ||
498 | |||
499 | f->num_channels = ccsr_channel__size_1_v(); | ||
500 | f->num_pbdma = proj_host_num_pbdma_v(); | ||
501 | f->max_engines = ENGINE_INVAL_GK20A; | ||
502 | |||
503 | f->userd_entry_size = 1 << ram_userd_base_shift_v(); | ||
504 | f->userd_total_size = f->userd_entry_size * f->num_channels; | ||
505 | |||
506 | f->userd.cpuva = dma_alloc_coherent(d, | ||
507 | f->userd_total_size, | ||
508 | &iova, | ||
509 | GFP_KERNEL); | ||
510 | if (!f->userd.cpuva) { | ||
511 | dev_err(d, "memory allocation failed\n"); | ||
512 | goto clean_up; | ||
513 | } | ||
514 | |||
515 | f->userd.iova = iova; | ||
516 | err = gk20a_get_sgtable(d, &f->userd.sgt, | ||
517 | f->userd.cpuva, f->userd.iova, | ||
518 | f->userd_total_size); | ||
519 | if (err) { | ||
520 | dev_err(d, "failed to create sg table\n"); | ||
521 | goto clean_up; | ||
522 | } | ||
523 | |||
524 | /* bar1 va */ | ||
525 | f->userd.gpu_va = gk20a_gmmu_map(&g->mm.bar1.vm, | ||
526 | &f->userd.sgt, | ||
527 | f->userd_total_size, | ||
528 | 0, /* flags */ | ||
529 | gk20a_mem_flag_none); | ||
530 | if (!f->userd.gpu_va) { | ||
531 | dev_err(d, "gmmu mapping failed\n"); | ||
532 | goto clean_up; | ||
533 | } | ||
534 | |||
535 | gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); | ||
536 | |||
537 | f->userd.size = f->userd_total_size; | ||
538 | |||
539 | f->channel = kzalloc(f->num_channels * sizeof(*f->channel), | ||
540 | GFP_KERNEL); | ||
541 | f->pbdma_map = kzalloc(f->num_pbdma * sizeof(*f->pbdma_map), | ||
542 | GFP_KERNEL); | ||
543 | f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info), | ||
544 | GFP_KERNEL); | ||
545 | |||
546 | if (!(f->channel && f->pbdma_map && f->engine_info)) { | ||
547 | err = -ENOMEM; | ||
548 | goto clean_up; | ||
549 | } | ||
550 | |||
551 | /* pbdma map needs to be in place before calling engine info init */ | ||
552 | for (i = 0; i < f->num_pbdma; ++i) | ||
553 | f->pbdma_map[i] = gk20a_readl(g, fifo_pbdma_map_r(i)); | ||
554 | |||
555 | init_engine_info(f); | ||
556 | |||
557 | init_runlist(g, f); | ||
558 | |||
559 | for (chid = 0; chid < f->num_channels; chid++) { | ||
560 | f->channel[chid].userd_cpu_va = | ||
561 | f->userd.cpuva + chid * f->userd_entry_size; | ||
562 | f->channel[chid].userd_iova = | ||
563 | NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova) | ||
564 | + chid * f->userd_entry_size; | ||
565 | f->channel[chid].userd_gpu_va = | ||
566 | f->userd.gpu_va + chid * f->userd_entry_size; | ||
567 | |||
568 | gk20a_init_channel_support(g, chid); | ||
569 | } | ||
570 | mutex_init(&f->ch_inuse_mutex); | ||
571 | |||
572 | f->remove_support = gk20a_remove_fifo_support; | ||
573 | |||
574 | f->deferred_reset_pending = false; | ||
575 | mutex_init(&f->deferred_reset_mutex); | ||
576 | |||
577 | f->sw_ready = true; | ||
578 | |||
579 | gk20a_dbg_fn("done"); | ||
580 | return 0; | ||
581 | |||
582 | clean_up: | ||
583 | gk20a_dbg_fn("fail"); | ||
584 | if (f->userd.gpu_va) | ||
585 | gk20a_gmmu_unmap(&g->mm.bar1.vm, | ||
586 | f->userd.gpu_va, | ||
587 | f->userd.size, | ||
588 | gk20a_mem_flag_none); | ||
589 | if (f->userd.sgt) | ||
590 | gk20a_free_sgtable(&f->userd.sgt); | ||
591 | if (f->userd.cpuva) | ||
592 | dma_free_coherent(d, | ||
593 | f->userd_total_size, | ||
594 | f->userd.cpuva, | ||
595 | f->userd.iova); | ||
596 | f->userd.cpuva = NULL; | ||
597 | f->userd.iova = 0; | ||
598 | |||
599 | memset(&f->userd, 0, sizeof(struct userd_desc)); | ||
600 | |||
601 | kfree(f->channel); | ||
602 | f->channel = NULL; | ||
603 | kfree(f->pbdma_map); | ||
604 | f->pbdma_map = NULL; | ||
605 | kfree(f->engine_info); | ||
606 | f->engine_info = NULL; | ||
607 | |||
608 | return err; | ||
609 | } | ||
610 | |||
611 | static void gk20a_fifo_handle_runlist_event(struct gk20a *g) | ||
612 | { | ||
613 | struct fifo_gk20a *f = &g->fifo; | ||
614 | struct fifo_runlist_info_gk20a *runlist; | ||
615 | unsigned long runlist_event; | ||
616 | u32 runlist_id; | ||
617 | |||
618 | runlist_event = gk20a_readl(g, fifo_intr_runlist_r()); | ||
619 | gk20a_writel(g, fifo_intr_runlist_r(), runlist_event); | ||
620 | |||
621 | for_each_set_bit(runlist_id, &runlist_event, f->max_runlists) { | ||
622 | runlist = &f->runlist_info[runlist_id]; | ||
623 | wake_up(&runlist->runlist_wq); | ||
624 | } | ||
625 | |||
626 | } | ||
627 | |||
628 | static int gk20a_init_fifo_setup_hw(struct gk20a *g) | ||
629 | { | ||
630 | struct fifo_gk20a *f = &g->fifo; | ||
631 | |||
632 | gk20a_dbg_fn(""); | ||
633 | |||
634 | /* test write, read through bar1 @ userd region before | ||
635 | * turning on the snooping */ | ||
636 | { | ||
637 | struct fifo_gk20a *f = &g->fifo; | ||
638 | u32 v, v1 = 0x33, v2 = 0x55; | ||
639 | |||
640 | u32 bar1_vaddr = f->userd.gpu_va; | ||
641 | volatile u32 *cpu_vaddr = f->userd.cpuva; | ||
642 | |||
643 | gk20a_dbg_info("test bar1 @ vaddr 0x%x", | ||
644 | bar1_vaddr); | ||
645 | |||
646 | v = gk20a_bar1_readl(g, bar1_vaddr); | ||
647 | |||
648 | *cpu_vaddr = v1; | ||
649 | smp_mb(); | ||
650 | |||
651 | if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { | ||
652 | gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!"); | ||
653 | return -EINVAL; | ||
654 | } | ||
655 | |||
656 | gk20a_bar1_writel(g, bar1_vaddr, v2); | ||
657 | |||
658 | if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) { | ||
659 | gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!"); | ||
660 | return -EINVAL; | ||
661 | } | ||
662 | |||
663 | /* is it visible to the cpu? */ | ||
664 | if (*cpu_vaddr != v2) { | ||
665 | gk20a_err(dev_from_gk20a(g), | ||
666 | "cpu didn't see bar1 write @ %p!", | ||
667 | cpu_vaddr); | ||
668 | } | ||
669 | |||
670 | /* put it back */ | ||
671 | gk20a_bar1_writel(g, bar1_vaddr, v); | ||
672 | } | ||
673 | |||
674 | /*XXX all manner of flushes and caching worries, etc */ | ||
675 | |||
676 | /* set the base for the userd region now */ | ||
677 | gk20a_writel(g, fifo_bar1_base_r(), | ||
678 | fifo_bar1_base_ptr_f(f->userd.gpu_va >> 12) | | ||
679 | fifo_bar1_base_valid_true_f()); | ||
680 | |||
681 | gk20a_dbg_fn("done"); | ||
682 | |||
683 | return 0; | ||
684 | } | ||
685 | |||
686 | int gk20a_init_fifo_support(struct gk20a *g) | ||
687 | { | ||
688 | u32 err; | ||
689 | |||
690 | err = gk20a_init_fifo_setup_sw(g); | ||
691 | if (err) | ||
692 | return err; | ||
693 | |||
694 | err = gk20a_init_fifo_setup_hw(g); | ||
695 | if (err) | ||
696 | return err; | ||
697 | |||
698 | return err; | ||
699 | } | ||
700 | |||
701 | static struct channel_gk20a * | ||
702 | channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr) | ||
703 | { | ||
704 | int ci; | ||
705 | if (unlikely(!f->channel)) | ||
706 | return NULL; | ||
707 | for (ci = 0; ci < f->num_channels; ci++) { | ||
708 | struct channel_gk20a *c = f->channel+ci; | ||
709 | if (c->inst_block.cpuva && | ||
710 | (inst_ptr == c->inst_block.cpu_pa)) | ||
711 | return f->channel+ci; | ||
712 | } | ||
713 | return NULL; | ||
714 | } | ||
715 | |||
716 | /* fault info/descriptions. | ||
717 | * tbd: move to setup | ||
718 | * */ | ||
719 | static const char * const fault_type_descs[] = { | ||
720 | "pde", /*fifo_intr_mmu_fault_info_type_pde_v() == 0 */ | ||
721 | "pde size", | ||
722 | "pte", | ||
723 | "va limit viol", | ||
724 | "unbound inst", | ||
725 | "priv viol", | ||
726 | "ro viol", | ||
727 | "wo viol", | ||
728 | "pitch mask", | ||
729 | "work creation", | ||
730 | "bad aperture", | ||
731 | "compression failure", | ||
732 | "bad kind", | ||
733 | "region viol", | ||
734 | "dual ptes", | ||
735 | "poisoned", | ||
736 | }; | ||
737 | /* engine descriptions */ | ||
738 | static const char * const engine_subid_descs[] = { | ||
739 | "gpc", | ||
740 | "hub", | ||
741 | }; | ||
742 | |||
743 | static const char * const hub_client_descs[] = { | ||
744 | "vip", "ce0", "ce1", "dniso", "fe", "fecs", "host", "host cpu", | ||
745 | "host cpu nb", "iso", "mmu", "mspdec", "msppp", "msvld", | ||
746 | "niso", "p2p", "pd", "perf", "pmu", "raster twod", "scc", | ||
747 | "scc nb", "sec", "ssync", "gr copy", "ce2", "xv", "mmu nb", | ||
748 | "msenc", "d falcon", "sked", "a falcon", "n/a", | ||
749 | }; | ||
750 | |||
751 | static const char * const gpc_client_descs[] = { | ||
752 | "l1 0", "t1 0", "pe 0", | ||
753 | "l1 1", "t1 1", "pe 1", | ||
754 | "l1 2", "t1 2", "pe 2", | ||
755 | "l1 3", "t1 3", "pe 3", | ||
756 | "rast", "gcc", "gpccs", | ||
757 | "prop 0", "prop 1", "prop 2", "prop 3", | ||
758 | "l1 4", "t1 4", "pe 4", | ||
759 | "l1 5", "t1 5", "pe 5", | ||
760 | "l1 6", "t1 6", "pe 6", | ||
761 | "l1 7", "t1 7", "pe 7", | ||
762 | "gpm", | ||
763 | "ltp utlb 0", "ltp utlb 1", "ltp utlb 2", "ltp utlb 3", | ||
764 | "rgg utlb", | ||
765 | }; | ||
766 | |||
767 | /* reads info from hardware and fills in mmu fault info record */ | ||
768 | static inline void get_exception_mmu_fault_info( | ||
769 | struct gk20a *g, u32 engine_id, | ||
770 | struct fifo_mmu_fault_info_gk20a *f) | ||
771 | { | ||
772 | u32 fault_info_v; | ||
773 | |||
774 | gk20a_dbg_fn("engine_id %d", engine_id); | ||
775 | |||
776 | memset(f, 0, sizeof(*f)); | ||
777 | |||
778 | f->fault_info_v = fault_info_v = gk20a_readl(g, | ||
779 | fifo_intr_mmu_fault_info_r(engine_id)); | ||
780 | f->fault_type_v = | ||
781 | fifo_intr_mmu_fault_info_type_v(fault_info_v); | ||
782 | f->engine_subid_v = | ||
783 | fifo_intr_mmu_fault_info_engine_subid_v(fault_info_v); | ||
784 | f->client_v = fifo_intr_mmu_fault_info_client_v(fault_info_v); | ||
785 | |||
786 | BUG_ON(f->fault_type_v >= ARRAY_SIZE(fault_type_descs)); | ||
787 | f->fault_type_desc = fault_type_descs[f->fault_type_v]; | ||
788 | |||
789 | BUG_ON(f->engine_subid_v >= ARRAY_SIZE(engine_subid_descs)); | ||
790 | f->engine_subid_desc = engine_subid_descs[f->engine_subid_v]; | ||
791 | |||
792 | if (f->engine_subid_v == | ||
793 | fifo_intr_mmu_fault_info_engine_subid_hub_v()) { | ||
794 | |||
795 | BUG_ON(f->client_v >= ARRAY_SIZE(hub_client_descs)); | ||
796 | f->client_desc = hub_client_descs[f->client_v]; | ||
797 | } else if (f->engine_subid_v == | ||
798 | fifo_intr_mmu_fault_info_engine_subid_gpc_v()) { | ||
799 | BUG_ON(f->client_v >= ARRAY_SIZE(gpc_client_descs)); | ||
800 | f->client_desc = gpc_client_descs[f->client_v]; | ||
801 | } else { | ||
802 | BUG_ON(1); | ||
803 | } | ||
804 | |||
805 | f->fault_hi_v = gk20a_readl(g, fifo_intr_mmu_fault_hi_r(engine_id)); | ||
806 | f->fault_lo_v = gk20a_readl(g, fifo_intr_mmu_fault_lo_r(engine_id)); | ||
807 | /* note:ignoring aperture on gk20a... */ | ||
808 | f->inst_ptr = fifo_intr_mmu_fault_inst_ptr_v( | ||
809 | gk20a_readl(g, fifo_intr_mmu_fault_inst_r(engine_id))); | ||
810 | /* note: inst_ptr is a 40b phys addr. */ | ||
811 | f->inst_ptr <<= fifo_intr_mmu_fault_inst_ptr_align_shift_v(); | ||
812 | } | ||
813 | |||
814 | static void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id) | ||
815 | { | ||
816 | gk20a_dbg_fn(""); | ||
817 | |||
818 | if (engine_id == top_device_info_type_enum_graphics_v()) { | ||
819 | /* resetting engine using mc_enable_r() is not enough, | ||
820 | * we do full init sequence */ | ||
821 | gk20a_gr_reset(g); | ||
822 | } | ||
823 | if (engine_id == top_device_info_type_enum_copy0_v()) | ||
824 | gk20a_reset(g, mc_enable_ce2_m()); | ||
825 | } | ||
826 | |||
827 | static void gk20a_fifo_handle_mmu_fault_thread(struct work_struct *work) | ||
828 | { | ||
829 | struct fifo_gk20a *f = container_of(work, struct fifo_gk20a, | ||
830 | fault_restore_thread); | ||
831 | struct gk20a *g = f->g; | ||
832 | int i; | ||
833 | |||
834 | /* Reinitialise FECS and GR */ | ||
835 | gk20a_init_pmu_setup_hw2(g); | ||
836 | |||
837 | /* It is safe to enable ELPG again. */ | ||
838 | gk20a_pmu_enable_elpg(g); | ||
839 | |||
840 | /* Restore the runlist */ | ||
841 | for (i = 0; i < g->fifo.max_runlists; i++) | ||
842 | gk20a_fifo_update_runlist_locked(g, i, ~0, true, true); | ||
843 | |||
844 | /* unlock all runlists */ | ||
845 | for (i = 0; i < g->fifo.max_runlists; i++) | ||
846 | mutex_unlock(&g->fifo.runlist_info[i].mutex); | ||
847 | |||
848 | } | ||
849 | |||
850 | static void gk20a_fifo_handle_chsw_fault(struct gk20a *g) | ||
851 | { | ||
852 | u32 intr; | ||
853 | |||
854 | intr = gk20a_readl(g, fifo_intr_chsw_error_r()); | ||
855 | gk20a_err(dev_from_gk20a(g), "chsw: %08x\n", intr); | ||
856 | gk20a_fecs_dump_falcon_stats(g); | ||
857 | gk20a_writel(g, fifo_intr_chsw_error_r(), intr); | ||
858 | } | ||
859 | |||
860 | static void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g) | ||
861 | { | ||
862 | struct device *dev = dev_from_gk20a(g); | ||
863 | u32 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r()); | ||
864 | gk20a_err(dev, "dropped mmu fault (0x%08x)", fault_id); | ||
865 | } | ||
866 | |||
867 | static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id, | ||
868 | struct fifo_mmu_fault_info_gk20a *f, bool fake_fault) | ||
869 | { | ||
870 | /* channel recovery is only deferred if an sm debugger | ||
871 | is attached and has MMU debug mode is enabled */ | ||
872 | if (!gk20a_gr_sm_debugger_attached(g) || | ||
873 | !gk20a_mm_mmu_debug_mode_enabled(g)) | ||
874 | return false; | ||
875 | |||
876 | /* if this fault is fake (due to RC recovery), don't defer recovery */ | ||
877 | if (fake_fault) | ||
878 | return false; | ||
879 | |||
880 | if (engine_id != ENGINE_GR_GK20A || | ||
881 | f->engine_subid_v != fifo_intr_mmu_fault_info_engine_subid_gpc_v()) | ||
882 | return false; | ||
883 | |||
884 | return true; | ||
885 | } | ||
886 | |||
887 | void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g, | ||
888 | unsigned long fault_id) { | ||
889 | u32 engine_mmu_id; | ||
890 | int i; | ||
891 | |||
892 | /* reset engines */ | ||
893 | for_each_set_bit(engine_mmu_id, &fault_id, 32) { | ||
894 | u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id); | ||
895 | if (engine_id != ~0) | ||
896 | gk20a_fifo_reset_engine(g, engine_id); | ||
897 | } | ||
898 | |||
899 | /* CLEAR the runlists. Do not wait for runlist to start as | ||
900 | * some engines may not be available right now */ | ||
901 | for (i = 0; i < g->fifo.max_runlists; i++) | ||
902 | gk20a_fifo_update_runlist_locked(g, i, ~0, false, false); | ||
903 | |||
904 | /* clear interrupt */ | ||
905 | gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id); | ||
906 | |||
907 | /* resume scheduler */ | ||
908 | gk20a_writel(g, fifo_error_sched_disable_r(), | ||
909 | gk20a_readl(g, fifo_error_sched_disable_r())); | ||
910 | |||
911 | /* Spawn a work to enable PMU and restore runlists */ | ||
912 | schedule_work(&g->fifo.fault_restore_thread); | ||
913 | } | ||
914 | |||
915 | static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g, | ||
916 | struct channel_gk20a *ch) { | ||
917 | bool verbose = true; | ||
918 | if (!ch) | ||
919 | return verbose; | ||
920 | |||
921 | gk20a_err(dev_from_gk20a(g), | ||
922 | "channel %d generated a mmu fault", | ||
923 | ch->hw_chid); | ||
924 | if (ch->error_notifier) { | ||
925 | u32 err = ch->error_notifier->info32; | ||
926 | if (ch->error_notifier->status == 0xffff) { | ||
927 | /* If error code is already set, this mmu fault | ||
928 | * was triggered as part of recovery from other | ||
929 | * error condition. | ||
930 | * Don't overwrite error flag. */ | ||
931 | /* Fifo timeout debug spew is controlled by user */ | ||
932 | if (err == NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT) | ||
933 | verbose = ch->timeout_debug_dump; | ||
934 | } else { | ||
935 | gk20a_set_error_notifier(ch, | ||
936 | NVHOST_CHANNEL_FIFO_ERROR_MMU_ERR_FLT); | ||
937 | } | ||
938 | } | ||
939 | /* mark channel as faulted */ | ||
940 | ch->has_timedout = true; | ||
941 | wmb(); | ||
942 | /* unblock pending waits */ | ||
943 | wake_up(&ch->semaphore_wq); | ||
944 | wake_up(&ch->notifier_wq); | ||
945 | wake_up(&ch->submit_wq); | ||
946 | return verbose; | ||
947 | } | ||
948 | |||
949 | |||
950 | static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) | ||
951 | { | ||
952 | bool fake_fault; | ||
953 | unsigned long fault_id; | ||
954 | unsigned long engine_mmu_id; | ||
955 | int i; | ||
956 | bool verbose = true; | ||
957 | gk20a_dbg_fn(""); | ||
958 | |||
959 | g->fifo.deferred_reset_pending = false; | ||
960 | |||
961 | /* Disable ELPG */ | ||
962 | gk20a_pmu_disable_elpg(g); | ||
963 | |||
964 | /* If we have recovery in progress, MMU fault id is invalid */ | ||
965 | if (g->fifo.mmu_fault_engines) { | ||
966 | fault_id = g->fifo.mmu_fault_engines; | ||
967 | g->fifo.mmu_fault_engines = 0; | ||
968 | fake_fault = true; | ||
969 | } else { | ||
970 | fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r()); | ||
971 | fake_fault = false; | ||
972 | gk20a_debug_dump(g->dev); | ||
973 | } | ||
974 | |||
975 | /* lock all runlists. Note that locks are are released in | ||
976 | * gk20a_fifo_handle_mmu_fault_thread() */ | ||
977 | for (i = 0; i < g->fifo.max_runlists; i++) | ||
978 | mutex_lock(&g->fifo.runlist_info[i].mutex); | ||
979 | |||
980 | /* go through all faulted engines */ | ||
981 | for_each_set_bit(engine_mmu_id, &fault_id, 32) { | ||
982 | /* bits in fifo_intr_mmu_fault_id_r do not correspond 1:1 to | ||
983 | * engines. Convert engine_mmu_id to engine_id */ | ||
984 | u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id); | ||
985 | struct fifo_runlist_info_gk20a *runlist = g->fifo.runlist_info; | ||
986 | struct fifo_mmu_fault_info_gk20a f; | ||
987 | struct channel_gk20a *ch = NULL; | ||
988 | |||
989 | get_exception_mmu_fault_info(g, engine_mmu_id, &f); | ||
990 | trace_gk20a_mmu_fault(f.fault_hi_v, | ||
991 | f.fault_lo_v, | ||
992 | f.fault_info_v, | ||
993 | f.inst_ptr, | ||
994 | engine_id, | ||
995 | f.engine_subid_desc, | ||
996 | f.client_desc, | ||
997 | f.fault_type_desc); | ||
998 | gk20a_err(dev_from_gk20a(g), "mmu fault on engine %d, " | ||
999 | "engine subid %d (%s), client %d (%s), " | ||
1000 | "addr 0x%08x:0x%08x, type %d (%s), info 0x%08x," | ||
1001 | "inst_ptr 0x%llx\n", | ||
1002 | engine_id, | ||
1003 | f.engine_subid_v, f.engine_subid_desc, | ||
1004 | f.client_v, f.client_desc, | ||
1005 | f.fault_hi_v, f.fault_lo_v, | ||
1006 | f.fault_type_v, f.fault_type_desc, | ||
1007 | f.fault_info_v, f.inst_ptr); | ||
1008 | |||
1009 | /* get the channel */ | ||
1010 | if (fake_fault) { | ||
1011 | /* read and parse engine status */ | ||
1012 | u32 status = gk20a_readl(g, | ||
1013 | fifo_engine_status_r(engine_id)); | ||
1014 | u32 ctx_status = | ||
1015 | fifo_engine_status_ctx_status_v(status); | ||
1016 | bool type_ch = fifo_pbdma_status_id_type_v(status) == | ||
1017 | fifo_pbdma_status_id_type_chid_v(); | ||
1018 | |||
1019 | /* use next_id if context load is failing */ | ||
1020 | u32 id = (ctx_status == | ||
1021 | fifo_engine_status_ctx_status_ctxsw_load_v()) ? | ||
1022 | fifo_engine_status_next_id_v(status) : | ||
1023 | fifo_engine_status_id_v(status); | ||
1024 | |||
1025 | if (type_ch) { | ||
1026 | ch = g->fifo.channel + id; | ||
1027 | } else { | ||
1028 | gk20a_err(dev_from_gk20a(g), "non-chid type not supported"); | ||
1029 | WARN_ON(1); | ||
1030 | } | ||
1031 | } else { | ||
1032 | /* read channel based on instruction pointer */ | ||
1033 | ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr); | ||
1034 | } | ||
1035 | |||
1036 | if (ch) { | ||
1037 | if (ch->in_use) { | ||
1038 | /* disable the channel from hw and increment | ||
1039 | * syncpoints */ | ||
1040 | gk20a_disable_channel_no_update(ch); | ||
1041 | |||
1042 | /* remove the channel from runlist */ | ||
1043 | clear_bit(ch->hw_chid, | ||
1044 | runlist->active_channels); | ||
1045 | } | ||
1046 | |||
1047 | /* check if engine reset should be deferred */ | ||
1048 | if (gk20a_fifo_should_defer_engine_reset(g, engine_id, &f, fake_fault)) { | ||
1049 | g->fifo.mmu_fault_engines = fault_id; | ||
1050 | |||
1051 | /* handled during channel free */ | ||
1052 | g->fifo.deferred_reset_pending = true; | ||
1053 | } else | ||
1054 | verbose = gk20a_fifo_set_ctx_mmu_error(g, ch); | ||
1055 | |||
1056 | } else if (f.inst_ptr == | ||
1057 | g->mm.bar1.inst_block.cpu_pa) { | ||
1058 | gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); | ||
1059 | } else if (f.inst_ptr == | ||
1060 | g->mm.pmu.inst_block.cpu_pa) { | ||
1061 | gk20a_err(dev_from_gk20a(g), "mmu fault from pmu"); | ||
1062 | } else | ||
1063 | gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault"); | ||
1064 | } | ||
1065 | |||
1066 | if (g->fifo.deferred_reset_pending) { | ||
1067 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "sm debugger attached," | ||
1068 | " deferring channel recovery to channel free"); | ||
1069 | /* clear interrupt */ | ||
1070 | gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id); | ||
1071 | return verbose; | ||
1072 | } | ||
1073 | |||
1074 | /* resetting the engines and clearing the runlists is done in | ||
1075 | a separate function to allow deferred reset. */ | ||
1076 | fifo_gk20a_finish_mmu_fault_handling(g, fault_id); | ||
1077 | return verbose; | ||
1078 | } | ||
1079 | |||
1080 | static void gk20a_fifo_get_faulty_channel(struct gk20a *g, int engine_id, | ||
1081 | u32 *chid, bool *type_ch) | ||
1082 | { | ||
1083 | u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id)); | ||
1084 | u32 ctx_status = fifo_engine_status_ctx_status_v(status); | ||
1085 | |||
1086 | *type_ch = fifo_pbdma_status_id_type_v(status) == | ||
1087 | fifo_pbdma_status_id_type_chid_v(); | ||
1088 | /* use next_id if context load is failing */ | ||
1089 | *chid = (ctx_status == | ||
1090 | fifo_engine_status_ctx_status_ctxsw_load_v()) ? | ||
1091 | fifo_engine_status_next_id_v(status) : | ||
1092 | fifo_engine_status_id_v(status); | ||
1093 | } | ||
1094 | |||
1095 | void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, | ||
1096 | bool verbose) | ||
1097 | { | ||
1098 | unsigned long end_jiffies = jiffies + | ||
1099 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
1100 | unsigned long delay = GR_IDLE_CHECK_DEFAULT; | ||
1101 | unsigned long engine_id, i; | ||
1102 | unsigned long _engine_ids = __engine_ids; | ||
1103 | unsigned long engine_ids = 0; | ||
1104 | int ret; | ||
1105 | |||
1106 | if (verbose) | ||
1107 | gk20a_debug_dump(g->dev); | ||
1108 | |||
1109 | /* store faulted engines in advance */ | ||
1110 | g->fifo.mmu_fault_engines = 0; | ||
1111 | for_each_set_bit(engine_id, &_engine_ids, 32) { | ||
1112 | bool ref_type_ch; | ||
1113 | int ref_chid; | ||
1114 | gk20a_fifo_get_faulty_channel(g, engine_id, &ref_chid, | ||
1115 | &ref_type_ch); | ||
1116 | |||
1117 | /* Reset *all* engines that use the | ||
1118 | * same channel as faulty engine */ | ||
1119 | for (i = 0; i < g->fifo.max_engines; i++) { | ||
1120 | bool type_ch; | ||
1121 | u32 chid; | ||
1122 | gk20a_fifo_get_faulty_channel(g, i, &chid, &type_ch); | ||
1123 | if (ref_type_ch == type_ch && ref_chid == chid) { | ||
1124 | engine_ids |= BIT(i); | ||
1125 | g->fifo.mmu_fault_engines |= | ||
1126 | BIT(gk20a_engine_id_to_mmu_id(i)); | ||
1127 | } | ||
1128 | } | ||
1129 | |||
1130 | } | ||
1131 | |||
1132 | /* trigger faults for all bad engines */ | ||
1133 | for_each_set_bit(engine_id, &engine_ids, 32) { | ||
1134 | if (engine_id > g->fifo.max_engines) { | ||
1135 | WARN_ON(true); | ||
1136 | break; | ||
1137 | } | ||
1138 | |||
1139 | gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id), | ||
1140 | fifo_trigger_mmu_fault_id_f( | ||
1141 | gk20a_engine_id_to_mmu_id(engine_id)) | | ||
1142 | fifo_trigger_mmu_fault_enable_f(1)); | ||
1143 | } | ||
1144 | |||
1145 | /* Wait for MMU fault to trigger */ | ||
1146 | ret = -EBUSY; | ||
1147 | do { | ||
1148 | if (gk20a_readl(g, fifo_intr_0_r()) & | ||
1149 | fifo_intr_0_mmu_fault_pending_f()) { | ||
1150 | ret = 0; | ||
1151 | break; | ||
1152 | } | ||
1153 | |||
1154 | usleep_range(delay, delay * 2); | ||
1155 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
1156 | } while (time_before(jiffies, end_jiffies) || | ||
1157 | !tegra_platform_is_silicon()); | ||
1158 | |||
1159 | if (ret) | ||
1160 | gk20a_err(dev_from_gk20a(g), "mmu fault timeout"); | ||
1161 | |||
1162 | /* release mmu fault trigger */ | ||
1163 | for_each_set_bit(engine_id, &engine_ids, 32) | ||
1164 | gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id), 0); | ||
1165 | } | ||
1166 | |||
1167 | |||
1168 | static bool gk20a_fifo_handle_sched_error(struct gk20a *g) | ||
1169 | { | ||
1170 | u32 sched_error; | ||
1171 | u32 engine_id; | ||
1172 | int id = -1; | ||
1173 | bool non_chid = false; | ||
1174 | |||
1175 | /* read and reset the scheduler error register */ | ||
1176 | sched_error = gk20a_readl(g, fifo_intr_sched_error_r()); | ||
1177 | gk20a_writel(g, fifo_intr_0_r(), fifo_intr_0_sched_error_reset_f()); | ||
1178 | |||
1179 | for (engine_id = 0; engine_id < g->fifo.max_engines; engine_id++) { | ||
1180 | u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id)); | ||
1181 | u32 ctx_status = fifo_engine_status_ctx_status_v(status); | ||
1182 | bool failing_engine; | ||
1183 | |||
1184 | /* we are interested in busy engines */ | ||
1185 | failing_engine = fifo_engine_status_engine_v(status) == | ||
1186 | fifo_engine_status_engine_busy_v(); | ||
1187 | |||
1188 | /* ..that are doing context switch */ | ||
1189 | failing_engine = failing_engine && | ||
1190 | (ctx_status == | ||
1191 | fifo_engine_status_ctx_status_ctxsw_switch_v() | ||
1192 | || ctx_status == | ||
1193 | fifo_engine_status_ctx_status_ctxsw_save_v() | ||
1194 | || ctx_status == | ||
1195 | fifo_engine_status_ctx_status_ctxsw_load_v()); | ||
1196 | |||
1197 | if (failing_engine) { | ||
1198 | id = (ctx_status == | ||
1199 | fifo_engine_status_ctx_status_ctxsw_load_v()) ? | ||
1200 | fifo_engine_status_next_id_v(status) : | ||
1201 | fifo_engine_status_id_v(status); | ||
1202 | non_chid = fifo_pbdma_status_id_type_v(status) != | ||
1203 | fifo_pbdma_status_id_type_chid_v(); | ||
1204 | break; | ||
1205 | } | ||
1206 | } | ||
1207 | |||
1208 | /* could not find the engine - should never happen */ | ||
1209 | if (unlikely(engine_id >= g->fifo.max_engines)) | ||
1210 | goto err; | ||
1211 | |||
1212 | if (fifo_intr_sched_error_code_f(sched_error) == | ||
1213 | fifo_intr_sched_error_code_ctxsw_timeout_v()) { | ||
1214 | struct fifo_gk20a *f = &g->fifo; | ||
1215 | struct channel_gk20a *ch = &f->channel[id]; | ||
1216 | |||
1217 | if (non_chid) { | ||
1218 | gk20a_fifo_recover(g, BIT(engine_id), true); | ||
1219 | goto err; | ||
1220 | } | ||
1221 | |||
1222 | if (gk20a_channel_update_and_check_timeout(ch, | ||
1223 | GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000)) { | ||
1224 | gk20a_set_error_notifier(ch, | ||
1225 | NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); | ||
1226 | gk20a_err(dev_from_gk20a(g), | ||
1227 | "fifo sched ctxsw timeout error:" | ||
1228 | "engine = %u, ch = %d", engine_id, id); | ||
1229 | gk20a_fifo_recover(g, BIT(engine_id), | ||
1230 | ch->timeout_debug_dump); | ||
1231 | } else { | ||
1232 | gk20a_warn(dev_from_gk20a(g), | ||
1233 | "fifo is waiting for ctx switch for %d ms," | ||
1234 | "ch = %d\n", | ||
1235 | ch->timeout_accumulated_ms, | ||
1236 | id); | ||
1237 | } | ||
1238 | return ch->timeout_debug_dump; | ||
1239 | } | ||
1240 | err: | ||
1241 | gk20a_err(dev_from_gk20a(g), "fifo sched error : 0x%08x, engine=%u, %s=%d", | ||
1242 | sched_error, engine_id, non_chid ? "non-ch" : "ch", id); | ||
1243 | |||
1244 | return true; | ||
1245 | } | ||
1246 | |||
1247 | static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr) | ||
1248 | { | ||
1249 | bool print_channel_reset_log = false, reset_engine = false; | ||
1250 | struct device *dev = dev_from_gk20a(g); | ||
1251 | u32 handled = 0; | ||
1252 | |||
1253 | gk20a_dbg_fn(""); | ||
1254 | |||
1255 | if (fifo_intr & fifo_intr_0_pio_error_pending_f()) { | ||
1256 | /* pio mode is unused. this shouldn't happen, ever. */ | ||
1257 | /* should we clear it or just leave it pending? */ | ||
1258 | gk20a_err(dev, "fifo pio error!\n"); | ||
1259 | BUG_ON(1); | ||
1260 | } | ||
1261 | |||
1262 | if (fifo_intr & fifo_intr_0_bind_error_pending_f()) { | ||
1263 | u32 bind_error = gk20a_readl(g, fifo_intr_bind_error_r()); | ||
1264 | gk20a_err(dev, "fifo bind error: 0x%08x", bind_error); | ||
1265 | print_channel_reset_log = true; | ||
1266 | handled |= fifo_intr_0_bind_error_pending_f(); | ||
1267 | } | ||
1268 | |||
1269 | if (fifo_intr & fifo_intr_0_sched_error_pending_f()) { | ||
1270 | print_channel_reset_log = gk20a_fifo_handle_sched_error(g); | ||
1271 | handled |= fifo_intr_0_sched_error_pending_f(); | ||
1272 | } | ||
1273 | |||
1274 | if (fifo_intr & fifo_intr_0_chsw_error_pending_f()) { | ||
1275 | gk20a_fifo_handle_chsw_fault(g); | ||
1276 | handled |= fifo_intr_0_chsw_error_pending_f(); | ||
1277 | } | ||
1278 | |||
1279 | if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) { | ||
1280 | print_channel_reset_log = gk20a_fifo_handle_mmu_fault(g); | ||
1281 | reset_engine = true; | ||
1282 | handled |= fifo_intr_0_mmu_fault_pending_f(); | ||
1283 | } | ||
1284 | |||
1285 | if (fifo_intr & fifo_intr_0_dropped_mmu_fault_pending_f()) { | ||
1286 | gk20a_fifo_handle_dropped_mmu_fault(g); | ||
1287 | handled |= fifo_intr_0_dropped_mmu_fault_pending_f(); | ||
1288 | } | ||
1289 | |||
1290 | print_channel_reset_log = !g->fifo.deferred_reset_pending | ||
1291 | && print_channel_reset_log; | ||
1292 | |||
1293 | if (print_channel_reset_log) { | ||
1294 | int engine_id; | ||
1295 | gk20a_err(dev_from_gk20a(g), | ||
1296 | "channel reset initated from %s", __func__); | ||
1297 | for (engine_id = 0; | ||
1298 | engine_id < g->fifo.max_engines; | ||
1299 | engine_id++) { | ||
1300 | gk20a_dbg_fn("enum:%d -> engine_id:%d", engine_id, | ||
1301 | g->fifo.engine_info[engine_id].engine_id); | ||
1302 | fifo_pbdma_exception_status(g, | ||
1303 | &g->fifo.engine_info[engine_id]); | ||
1304 | fifo_engine_exception_status(g, | ||
1305 | &g->fifo.engine_info[engine_id]); | ||
1306 | } | ||
1307 | } | ||
1308 | |||
1309 | return handled; | ||
1310 | } | ||
1311 | |||
1312 | |||
1313 | static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev, | ||
1314 | struct gk20a *g, | ||
1315 | struct fifo_gk20a *f, | ||
1316 | u32 pbdma_id) | ||
1317 | { | ||
1318 | u32 pbdma_intr_0 = gk20a_readl(g, pbdma_intr_0_r(pbdma_id)); | ||
1319 | u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id)); | ||
1320 | u32 handled = 0; | ||
1321 | bool reset_device = false; | ||
1322 | bool reset_channel = false; | ||
1323 | |||
1324 | gk20a_dbg_fn(""); | ||
1325 | |||
1326 | gk20a_dbg(gpu_dbg_intr, "pbdma id intr pending %d %08x %08x", pbdma_id, | ||
1327 | pbdma_intr_0, pbdma_intr_1); | ||
1328 | if (pbdma_intr_0) { | ||
1329 | if (f->intr.pbdma.device_fatal_0 & pbdma_intr_0) { | ||
1330 | dev_err(dev, "unrecoverable device error: " | ||
1331 | "pbdma_intr_0(%d):0x%08x", pbdma_id, pbdma_intr_0); | ||
1332 | reset_device = true; | ||
1333 | /* TODO: disable pbdma intrs */ | ||
1334 | handled |= f->intr.pbdma.device_fatal_0 & pbdma_intr_0; | ||
1335 | } | ||
1336 | if (f->intr.pbdma.channel_fatal_0 & pbdma_intr_0) { | ||
1337 | dev_warn(dev, "channel error: " | ||
1338 | "pbdma_intr_0(%d):0x%08x", pbdma_id, pbdma_intr_0); | ||
1339 | reset_channel = true; | ||
1340 | /* TODO: clear pbdma channel errors */ | ||
1341 | handled |= f->intr.pbdma.channel_fatal_0 & pbdma_intr_0; | ||
1342 | } | ||
1343 | if (f->intr.pbdma.restartable_0 & pbdma_intr_0) { | ||
1344 | dev_warn(dev, "sw method: %08x %08x", | ||
1345 | gk20a_readl(g, pbdma_method0_r(0)), | ||
1346 | gk20a_readl(g, pbdma_method0_r(0)+4)); | ||
1347 | gk20a_writel(g, pbdma_method0_r(0), 0); | ||
1348 | gk20a_writel(g, pbdma_method0_r(0)+4, 0); | ||
1349 | handled |= f->intr.pbdma.restartable_0 & pbdma_intr_0; | ||
1350 | } | ||
1351 | |||
1352 | gk20a_writel(g, pbdma_intr_0_r(pbdma_id), pbdma_intr_0); | ||
1353 | } | ||
1354 | |||
1355 | /* all intrs in _intr_1 are "host copy engine" related, | ||
1356 | * which gk20a doesn't have. for now just make them channel fatal. */ | ||
1357 | if (pbdma_intr_1) { | ||
1358 | dev_err(dev, "channel hce error: pbdma_intr_1(%d): 0x%08x", | ||
1359 | pbdma_id, pbdma_intr_1); | ||
1360 | reset_channel = true; | ||
1361 | gk20a_writel(g, pbdma_intr_1_r(pbdma_id), pbdma_intr_1); | ||
1362 | } | ||
1363 | |||
1364 | |||
1365 | |||
1366 | return handled; | ||
1367 | } | ||
1368 | |||
1369 | static u32 fifo_channel_isr(struct gk20a *g, u32 fifo_intr) | ||
1370 | { | ||
1371 | gk20a_channel_semaphore_wakeup(g); | ||
1372 | return fifo_intr_0_channel_intr_pending_f(); | ||
1373 | } | ||
1374 | |||
1375 | |||
1376 | static u32 fifo_pbdma_isr(struct gk20a *g, u32 fifo_intr) | ||
1377 | { | ||
1378 | struct device *dev = dev_from_gk20a(g); | ||
1379 | struct fifo_gk20a *f = &g->fifo; | ||
1380 | u32 clear_intr = 0, i; | ||
1381 | u32 pbdma_pending = gk20a_readl(g, fifo_intr_pbdma_id_r()); | ||
1382 | |||
1383 | for (i = 0; i < fifo_intr_pbdma_id_status__size_1_v(); i++) { | ||
1384 | if (fifo_intr_pbdma_id_status_f(pbdma_pending, i)) { | ||
1385 | gk20a_dbg(gpu_dbg_intr, "pbdma id %d intr pending", i); | ||
1386 | clear_intr |= | ||
1387 | gk20a_fifo_handle_pbdma_intr(dev, g, f, i); | ||
1388 | } | ||
1389 | } | ||
1390 | return fifo_intr_0_pbdma_intr_pending_f(); | ||
1391 | } | ||
1392 | |||
1393 | void gk20a_fifo_isr(struct gk20a *g) | ||
1394 | { | ||
1395 | u32 error_intr_mask = | ||
1396 | fifo_intr_0_bind_error_pending_f() | | ||
1397 | fifo_intr_0_sched_error_pending_f() | | ||
1398 | fifo_intr_0_chsw_error_pending_f() | | ||
1399 | fifo_intr_0_fb_flush_timeout_pending_f() | | ||
1400 | fifo_intr_0_dropped_mmu_fault_pending_f() | | ||
1401 | fifo_intr_0_mmu_fault_pending_f() | | ||
1402 | fifo_intr_0_lb_error_pending_f() | | ||
1403 | fifo_intr_0_pio_error_pending_f(); | ||
1404 | |||
1405 | u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r()); | ||
1406 | u32 clear_intr = 0; | ||
1407 | |||
1408 | /* note we're not actually in an "isr", but rather | ||
1409 | * in a threaded interrupt context... */ | ||
1410 | mutex_lock(&g->fifo.intr.isr.mutex); | ||
1411 | |||
1412 | gk20a_dbg(gpu_dbg_intr, "fifo isr %08x\n", fifo_intr); | ||
1413 | |||
1414 | /* handle runlist update */ | ||
1415 | if (fifo_intr & fifo_intr_0_runlist_event_pending_f()) { | ||
1416 | gk20a_fifo_handle_runlist_event(g); | ||
1417 | clear_intr |= fifo_intr_0_runlist_event_pending_f(); | ||
1418 | } | ||
1419 | if (fifo_intr & fifo_intr_0_pbdma_intr_pending_f()) | ||
1420 | clear_intr |= fifo_pbdma_isr(g, fifo_intr); | ||
1421 | |||
1422 | if (unlikely(fifo_intr & error_intr_mask)) | ||
1423 | clear_intr = fifo_error_isr(g, fifo_intr); | ||
1424 | |||
1425 | gk20a_writel(g, fifo_intr_0_r(), clear_intr); | ||
1426 | |||
1427 | mutex_unlock(&g->fifo.intr.isr.mutex); | ||
1428 | |||
1429 | return; | ||
1430 | } | ||
1431 | |||
1432 | void gk20a_fifo_nonstall_isr(struct gk20a *g) | ||
1433 | { | ||
1434 | u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r()); | ||
1435 | u32 clear_intr = 0; | ||
1436 | |||
1437 | gk20a_dbg(gpu_dbg_intr, "fifo nonstall isr %08x\n", fifo_intr); | ||
1438 | |||
1439 | if (fifo_intr & fifo_intr_0_channel_intr_pending_f()) | ||
1440 | clear_intr |= fifo_channel_isr(g, fifo_intr); | ||
1441 | |||
1442 | gk20a_writel(g, fifo_intr_0_r(), clear_intr); | ||
1443 | |||
1444 | return; | ||
1445 | } | ||
1446 | |||
1447 | int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid) | ||
1448 | { | ||
1449 | struct fifo_gk20a *f = &g->fifo; | ||
1450 | unsigned long end_jiffies = jiffies | ||
1451 | + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
1452 | u32 delay = GR_IDLE_CHECK_DEFAULT; | ||
1453 | u32 ret = 0; | ||
1454 | u32 token = PMU_INVALID_MUTEX_OWNER_ID; | ||
1455 | u32 elpg_off = 0; | ||
1456 | u32 i; | ||
1457 | |||
1458 | gk20a_dbg_fn("%d", hw_chid); | ||
1459 | |||
1460 | /* we have no idea which runlist we are using. lock all */ | ||
1461 | for (i = 0; i < g->fifo.max_runlists; i++) | ||
1462 | mutex_lock(&f->runlist_info[i].mutex); | ||
1463 | |||
1464 | /* disable elpg if failed to acquire pmu mutex */ | ||
1465 | elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); | ||
1466 | if (elpg_off) | ||
1467 | gk20a_pmu_disable_elpg(g); | ||
1468 | |||
1469 | /* issue preempt */ | ||
1470 | gk20a_writel(g, fifo_preempt_r(), | ||
1471 | fifo_preempt_chid_f(hw_chid) | | ||
1472 | fifo_preempt_type_channel_f()); | ||
1473 | |||
1474 | /* wait for preempt */ | ||
1475 | ret = -EBUSY; | ||
1476 | do { | ||
1477 | if (!(gk20a_readl(g, fifo_preempt_r()) & | ||
1478 | fifo_preempt_pending_true_f())) { | ||
1479 | ret = 0; | ||
1480 | break; | ||
1481 | } | ||
1482 | |||
1483 | usleep_range(delay, delay * 2); | ||
1484 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
1485 | } while (time_before(jiffies, end_jiffies) || | ||
1486 | !tegra_platform_is_silicon()); | ||
1487 | |||
1488 | if (ret) { | ||
1489 | int i; | ||
1490 | u32 engines = 0; | ||
1491 | struct fifo_gk20a *f = &g->fifo; | ||
1492 | struct channel_gk20a *ch = &f->channel[hw_chid]; | ||
1493 | |||
1494 | gk20a_err(dev_from_gk20a(g), "preempt channel %d timeout\n", | ||
1495 | hw_chid); | ||
1496 | |||
1497 | /* forcefully reset all busy engines using this channel */ | ||
1498 | for (i = 0; i < g->fifo.max_engines; i++) { | ||
1499 | u32 status = gk20a_readl(g, fifo_engine_status_r(i)); | ||
1500 | u32 ctx_status = | ||
1501 | fifo_engine_status_ctx_status_v(status); | ||
1502 | bool type_ch = fifo_pbdma_status_id_type_v(status) == | ||
1503 | fifo_pbdma_status_id_type_chid_v(); | ||
1504 | bool busy = fifo_engine_status_engine_v(status) == | ||
1505 | fifo_engine_status_engine_busy_v(); | ||
1506 | u32 id = (ctx_status == | ||
1507 | fifo_engine_status_ctx_status_ctxsw_load_v()) ? | ||
1508 | fifo_engine_status_next_id_v(status) : | ||
1509 | fifo_engine_status_id_v(status); | ||
1510 | |||
1511 | if (type_ch && busy && id == hw_chid) | ||
1512 | engines |= BIT(i); | ||
1513 | } | ||
1514 | gk20a_set_error_notifier(ch, | ||
1515 | NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); | ||
1516 | gk20a_fifo_recover(g, engines, true); | ||
1517 | } | ||
1518 | |||
1519 | /* re-enable elpg or release pmu mutex */ | ||
1520 | if (elpg_off) | ||
1521 | gk20a_pmu_enable_elpg(g); | ||
1522 | else | ||
1523 | pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); | ||
1524 | |||
1525 | for (i = 0; i < g->fifo.max_runlists; i++) | ||
1526 | mutex_unlock(&f->runlist_info[i].mutex); | ||
1527 | |||
1528 | return ret; | ||
1529 | } | ||
1530 | |||
1531 | int gk20a_fifo_enable_engine_activity(struct gk20a *g, | ||
1532 | struct fifo_engine_info_gk20a *eng_info) | ||
1533 | { | ||
1534 | u32 token = PMU_INVALID_MUTEX_OWNER_ID; | ||
1535 | u32 elpg_off; | ||
1536 | u32 enable; | ||
1537 | |||
1538 | gk20a_dbg_fn(""); | ||
1539 | |||
1540 | /* disable elpg if failed to acquire pmu mutex */ | ||
1541 | elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); | ||
1542 | if (elpg_off) | ||
1543 | gk20a_pmu_disable_elpg(g); | ||
1544 | |||
1545 | enable = gk20a_readl(g, fifo_sched_disable_r()); | ||
1546 | enable &= ~(fifo_sched_disable_true_v() >> eng_info->runlist_id); | ||
1547 | gk20a_writel(g, fifo_sched_disable_r(), enable); | ||
1548 | |||
1549 | /* re-enable elpg or release pmu mutex */ | ||
1550 | if (elpg_off) | ||
1551 | gk20a_pmu_enable_elpg(g); | ||
1552 | else | ||
1553 | pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); | ||
1554 | |||
1555 | gk20a_dbg_fn("done"); | ||
1556 | return 0; | ||
1557 | } | ||
1558 | |||
1559 | int gk20a_fifo_disable_engine_activity(struct gk20a *g, | ||
1560 | struct fifo_engine_info_gk20a *eng_info, | ||
1561 | bool wait_for_idle) | ||
1562 | { | ||
1563 | u32 gr_stat, pbdma_stat, chan_stat, eng_stat, ctx_stat; | ||
1564 | u32 pbdma_chid = ~0, engine_chid = ~0, disable; | ||
1565 | u32 token = PMU_INVALID_MUTEX_OWNER_ID; | ||
1566 | u32 elpg_off; | ||
1567 | u32 err = 0; | ||
1568 | |||
1569 | gk20a_dbg_fn(""); | ||
1570 | |||
1571 | gr_stat = | ||
1572 | gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id)); | ||
1573 | if (fifo_engine_status_engine_v(gr_stat) == | ||
1574 | fifo_engine_status_engine_busy_v() && !wait_for_idle) | ||
1575 | return -EBUSY; | ||
1576 | |||
1577 | /* disable elpg if failed to acquire pmu mutex */ | ||
1578 | elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); | ||
1579 | if (elpg_off) | ||
1580 | gk20a_pmu_disable_elpg(g); | ||
1581 | |||
1582 | disable = gk20a_readl(g, fifo_sched_disable_r()); | ||
1583 | disable = set_field(disable, | ||
1584 | fifo_sched_disable_runlist_m(eng_info->runlist_id), | ||
1585 | fifo_sched_disable_runlist_f(fifo_sched_disable_true_v(), | ||
1586 | eng_info->runlist_id)); | ||
1587 | gk20a_writel(g, fifo_sched_disable_r(), disable); | ||
1588 | |||
1589 | /* chid from pbdma status */ | ||
1590 | pbdma_stat = gk20a_readl(g, fifo_pbdma_status_r(eng_info->pbdma_id)); | ||
1591 | chan_stat = fifo_pbdma_status_chan_status_v(pbdma_stat); | ||
1592 | if (chan_stat == fifo_pbdma_status_chan_status_valid_v() || | ||
1593 | chan_stat == fifo_pbdma_status_chan_status_chsw_save_v()) | ||
1594 | pbdma_chid = fifo_pbdma_status_id_v(pbdma_stat); | ||
1595 | else if (chan_stat == fifo_pbdma_status_chan_status_chsw_load_v() || | ||
1596 | chan_stat == fifo_pbdma_status_chan_status_chsw_switch_v()) | ||
1597 | pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat); | ||
1598 | |||
1599 | if (pbdma_chid != ~0) { | ||
1600 | err = gk20a_fifo_preempt_channel(g, pbdma_chid); | ||
1601 | if (err) | ||
1602 | goto clean_up; | ||
1603 | } | ||
1604 | |||
1605 | /* chid from engine status */ | ||
1606 | eng_stat = gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id)); | ||
1607 | ctx_stat = fifo_engine_status_ctx_status_v(eng_stat); | ||
1608 | if (ctx_stat == fifo_engine_status_ctx_status_valid_v() || | ||
1609 | ctx_stat == fifo_engine_status_ctx_status_ctxsw_save_v()) | ||
1610 | engine_chid = fifo_engine_status_id_v(eng_stat); | ||
1611 | else if (ctx_stat == fifo_engine_status_ctx_status_ctxsw_load_v() || | ||
1612 | ctx_stat == fifo_engine_status_ctx_status_ctxsw_switch_v()) | ||
1613 | engine_chid = fifo_engine_status_next_id_v(eng_stat); | ||
1614 | |||
1615 | if (engine_chid != ~0 && engine_chid != pbdma_chid) { | ||
1616 | err = gk20a_fifo_preempt_channel(g, engine_chid); | ||
1617 | if (err) | ||
1618 | goto clean_up; | ||
1619 | } | ||
1620 | |||
1621 | clean_up: | ||
1622 | /* re-enable elpg or release pmu mutex */ | ||
1623 | if (elpg_off) | ||
1624 | gk20a_pmu_enable_elpg(g); | ||
1625 | else | ||
1626 | pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); | ||
1627 | |||
1628 | if (err) { | ||
1629 | gk20a_dbg_fn("failed"); | ||
1630 | if (gk20a_fifo_enable_engine_activity(g, eng_info)) | ||
1631 | gk20a_err(dev_from_gk20a(g), | ||
1632 | "failed to enable gr engine activity\n"); | ||
1633 | } else { | ||
1634 | gk20a_dbg_fn("done"); | ||
1635 | } | ||
1636 | return err; | ||
1637 | } | ||
1638 | |||
1639 | static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id) | ||
1640 | { | ||
1641 | struct fifo_gk20a *f = &g->fifo; | ||
1642 | u32 engines = 0; | ||
1643 | int i; | ||
1644 | |||
1645 | for (i = 0; i < f->max_engines; i++) { | ||
1646 | u32 status = gk20a_readl(g, fifo_engine_status_r(i)); | ||
1647 | bool engine_busy = fifo_engine_status_engine_v(status) == | ||
1648 | fifo_engine_status_engine_busy_v(); | ||
1649 | |||
1650 | if (engine_busy && | ||
1651 | (f->engine_info[i].runlist_id == runlist_id)) | ||
1652 | engines |= BIT(i); | ||
1653 | } | ||
1654 | gk20a_fifo_recover(g, engines, true); | ||
1655 | } | ||
1656 | |||
1657 | static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id) | ||
1658 | { | ||
1659 | struct fifo_runlist_info_gk20a *runlist; | ||
1660 | u32 remain; | ||
1661 | bool pending; | ||
1662 | |||
1663 | runlist = &g->fifo.runlist_info[runlist_id]; | ||
1664 | remain = wait_event_timeout(runlist->runlist_wq, | ||
1665 | ((pending = gk20a_readl(g, fifo_eng_runlist_r(runlist_id)) & | ||
1666 | fifo_eng_runlist_pending_true_f()) == 0), | ||
1667 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g))); | ||
1668 | |||
1669 | if (remain == 0 && pending != 0) | ||
1670 | return -ETIMEDOUT; | ||
1671 | |||
1672 | return 0; | ||
1673 | } | ||
1674 | |||
1675 | static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | ||
1676 | u32 hw_chid, bool add, | ||
1677 | bool wait_for_finish) | ||
1678 | { | ||
1679 | u32 ret = 0; | ||
1680 | struct device *d = dev_from_gk20a(g); | ||
1681 | struct fifo_gk20a *f = &g->fifo; | ||
1682 | struct fifo_runlist_info_gk20a *runlist = NULL; | ||
1683 | u32 *runlist_entry_base = NULL; | ||
1684 | u32 *runlist_entry = NULL; | ||
1685 | phys_addr_t runlist_pa; | ||
1686 | u32 old_buf, new_buf; | ||
1687 | u32 chid; | ||
1688 | u32 count = 0; | ||
1689 | runlist = &f->runlist_info[runlist_id]; | ||
1690 | |||
1691 | /* valid channel, add/remove it from active list. | ||
1692 | Otherwise, keep active list untouched for suspend/resume. */ | ||
1693 | if (hw_chid != ~0) { | ||
1694 | if (add) { | ||
1695 | if (test_and_set_bit(hw_chid, | ||
1696 | runlist->active_channels) == 1) | ||
1697 | return 0; | ||
1698 | } else { | ||
1699 | if (test_and_clear_bit(hw_chid, | ||
1700 | runlist->active_channels) == 0) | ||
1701 | return 0; | ||
1702 | } | ||
1703 | } | ||
1704 | |||
1705 | old_buf = runlist->cur_buffer; | ||
1706 | new_buf = !runlist->cur_buffer; | ||
1707 | |||
1708 | gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx", | ||
1709 | runlist_id, runlist->mem[new_buf].iova); | ||
1710 | |||
1711 | runlist_pa = gk20a_get_phys_from_iova(d, runlist->mem[new_buf].iova); | ||
1712 | if (!runlist_pa) { | ||
1713 | ret = -EINVAL; | ||
1714 | goto clean_up; | ||
1715 | } | ||
1716 | |||
1717 | runlist_entry_base = runlist->mem[new_buf].cpuva; | ||
1718 | if (!runlist_entry_base) { | ||
1719 | ret = -ENOMEM; | ||
1720 | goto clean_up; | ||
1721 | } | ||
1722 | |||
1723 | if (hw_chid != ~0 || /* add/remove a valid channel */ | ||
1724 | add /* resume to add all channels back */) { | ||
1725 | runlist_entry = runlist_entry_base; | ||
1726 | for_each_set_bit(chid, | ||
1727 | runlist->active_channels, f->num_channels) { | ||
1728 | gk20a_dbg_info("add channel %d to runlist", chid); | ||
1729 | runlist_entry[0] = chid; | ||
1730 | runlist_entry[1] = 0; | ||
1731 | runlist_entry += 2; | ||
1732 | count++; | ||
1733 | } | ||
1734 | } else /* suspend to remove all channels */ | ||
1735 | count = 0; | ||
1736 | |||
1737 | if (count != 0) { | ||
1738 | gk20a_writel(g, fifo_runlist_base_r(), | ||
1739 | fifo_runlist_base_ptr_f(u64_lo32(runlist_pa >> 12)) | | ||
1740 | fifo_runlist_base_target_vid_mem_f()); | ||
1741 | } | ||
1742 | |||
1743 | gk20a_writel(g, fifo_runlist_r(), | ||
1744 | fifo_runlist_engine_f(runlist_id) | | ||
1745 | fifo_eng_runlist_length_f(count)); | ||
1746 | |||
1747 | if (wait_for_finish) { | ||
1748 | ret = gk20a_fifo_runlist_wait_pending(g, runlist_id); | ||
1749 | |||
1750 | if (ret == -ETIMEDOUT) { | ||
1751 | gk20a_err(dev_from_gk20a(g), | ||
1752 | "runlist update timeout"); | ||
1753 | |||
1754 | gk20a_fifo_runlist_reset_engines(g, runlist_id); | ||
1755 | |||
1756 | /* engine reset needs the lock. drop it */ | ||
1757 | mutex_unlock(&runlist->mutex); | ||
1758 | /* wait until the runlist is active again */ | ||
1759 | ret = gk20a_fifo_runlist_wait_pending(g, runlist_id); | ||
1760 | /* get the lock back. at this point everything should | ||
1761 | * should be fine */ | ||
1762 | mutex_lock(&runlist->mutex); | ||
1763 | |||
1764 | if (ret) | ||
1765 | gk20a_err(dev_from_gk20a(g), | ||
1766 | "runlist update failed: %d", ret); | ||
1767 | } else if (ret == -EINTR) | ||
1768 | gk20a_err(dev_from_gk20a(g), | ||
1769 | "runlist update interrupted"); | ||
1770 | } | ||
1771 | |||
1772 | runlist->cur_buffer = new_buf; | ||
1773 | |||
1774 | clean_up: | ||
1775 | return ret; | ||
1776 | } | ||
1777 | |||
1778 | /* add/remove a channel from runlist | ||
1779 | special cases below: runlist->active_channels will NOT be changed. | ||
1780 | (hw_chid == ~0 && !add) means remove all active channels from runlist. | ||
1781 | (hw_chid == ~0 && add) means restore all active channels on runlist. */ | ||
1782 | int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 hw_chid, | ||
1783 | bool add, bool wait_for_finish) | ||
1784 | { | ||
1785 | struct fifo_runlist_info_gk20a *runlist = NULL; | ||
1786 | struct fifo_gk20a *f = &g->fifo; | ||
1787 | u32 token = PMU_INVALID_MUTEX_OWNER_ID; | ||
1788 | u32 elpg_off; | ||
1789 | u32 ret = 0; | ||
1790 | |||
1791 | runlist = &f->runlist_info[runlist_id]; | ||
1792 | |||
1793 | mutex_lock(&runlist->mutex); | ||
1794 | |||
1795 | /* disable elpg if failed to acquire pmu mutex */ | ||
1796 | elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); | ||
1797 | if (elpg_off) | ||
1798 | gk20a_pmu_disable_elpg(g); | ||
1799 | |||
1800 | ret = gk20a_fifo_update_runlist_locked(g, runlist_id, hw_chid, add, | ||
1801 | wait_for_finish); | ||
1802 | |||
1803 | /* re-enable elpg or release pmu mutex */ | ||
1804 | if (elpg_off) | ||
1805 | gk20a_pmu_enable_elpg(g); | ||
1806 | else | ||
1807 | pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); | ||
1808 | |||
1809 | mutex_unlock(&runlist->mutex); | ||
1810 | return ret; | ||
1811 | } | ||
1812 | |||
1813 | int gk20a_fifo_suspend(struct gk20a *g) | ||
1814 | { | ||
1815 | gk20a_dbg_fn(""); | ||
1816 | |||
1817 | /* stop bar1 snooping */ | ||
1818 | gk20a_writel(g, fifo_bar1_base_r(), | ||
1819 | fifo_bar1_base_valid_false_f()); | ||
1820 | |||
1821 | /* disable fifo intr */ | ||
1822 | gk20a_writel(g, fifo_intr_en_0_r(), 0); | ||
1823 | gk20a_writel(g, fifo_intr_en_1_r(), 0); | ||
1824 | |||
1825 | gk20a_dbg_fn("done"); | ||
1826 | return 0; | ||
1827 | } | ||
1828 | |||
1829 | bool gk20a_fifo_mmu_fault_pending(struct gk20a *g) | ||
1830 | { | ||
1831 | if (gk20a_readl(g, fifo_intr_0_r()) & | ||
1832 | fifo_intr_0_mmu_fault_pending_f()) | ||
1833 | return true; | ||
1834 | else | ||
1835 | return false; | ||
1836 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h new file mode 100644 index 00000000..051acda2 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | |||
@@ -0,0 +1,164 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/fifo_gk20a.h | ||
3 | * | ||
4 | * GK20A graphics fifo (gr host) | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | #ifndef __FIFO_GK20A_H__ | ||
22 | #define __FIFO_GK20A_H__ | ||
23 | |||
24 | #include "channel_gk20a.h" | ||
25 | |||
26 | #define MAX_RUNLIST_BUFFERS 2 | ||
27 | |||
28 | /* generally corresponds to the "pbdma" engine */ | ||
29 | |||
30 | struct fifo_runlist_info_gk20a { | ||
31 | unsigned long *active_channels; | ||
32 | /* Each engine has its own SW and HW runlist buffer.*/ | ||
33 | struct runlist_mem_desc mem[MAX_RUNLIST_BUFFERS]; | ||
34 | u32 cur_buffer; | ||
35 | u32 total_entries; | ||
36 | bool stopped; | ||
37 | bool support_tsg; | ||
38 | struct mutex mutex; /* protect channel preempt and runlist upate */ | ||
39 | wait_queue_head_t runlist_wq; | ||
40 | }; | ||
41 | |||
42 | /* so far gk20a has two engines: gr and ce2(gr_copy) */ | ||
43 | enum { | ||
44 | ENGINE_GR_GK20A = 0, | ||
45 | ENGINE_CE2_GK20A = 1, | ||
46 | ENGINE_INVAL_GK20A | ||
47 | }; | ||
48 | |||
49 | struct fifo_pbdma_exception_info_gk20a { | ||
50 | u32 status_r; /* raw register value from hardware */ | ||
51 | u32 id, next_id; | ||
52 | u32 chan_status_v; /* raw value from hardware */ | ||
53 | bool id_is_chid, next_id_is_chid; | ||
54 | bool chsw_in_progress; | ||
55 | }; | ||
56 | |||
57 | struct fifo_engine_exception_info_gk20a { | ||
58 | u32 status_r; /* raw register value from hardware */ | ||
59 | u32 id, next_id; | ||
60 | u32 ctx_status_v; /* raw value from hardware */ | ||
61 | bool id_is_chid, next_id_is_chid; | ||
62 | bool faulted, idle, ctxsw_in_progress; | ||
63 | }; | ||
64 | |||
65 | struct fifo_mmu_fault_info_gk20a { | ||
66 | u32 fault_info_v; | ||
67 | u32 fault_type_v; | ||
68 | u32 engine_subid_v; | ||
69 | u32 client_v; | ||
70 | u32 fault_hi_v; | ||
71 | u32 fault_lo_v; | ||
72 | u64 inst_ptr; | ||
73 | const char *fault_type_desc; | ||
74 | const char *engine_subid_desc; | ||
75 | const char *client_desc; | ||
76 | }; | ||
77 | |||
78 | struct fifo_engine_info_gk20a { | ||
79 | u32 sw_id; | ||
80 | const char *name; | ||
81 | u32 dev_info_id; | ||
82 | u32 engine_id; | ||
83 | u32 runlist_id; | ||
84 | u32 pbdma_id; | ||
85 | u32 mmu_fault_id; | ||
86 | u32 rc_mask; | ||
87 | struct fifo_pbdma_exception_info_gk20a pbdma_exception_info; | ||
88 | struct fifo_engine_exception_info_gk20a engine_exception_info; | ||
89 | struct fifo_mmu_fault_info_gk20a mmu_fault_info; | ||
90 | |||
91 | }; | ||
92 | |||
93 | struct fifo_gk20a { | ||
94 | struct gk20a *g; | ||
95 | int num_channels; | ||
96 | |||
97 | int num_pbdma; | ||
98 | u32 *pbdma_map; | ||
99 | |||
100 | struct fifo_engine_info_gk20a *engine_info; | ||
101 | u32 max_engines; | ||
102 | u32 num_engines; | ||
103 | |||
104 | struct fifo_runlist_info_gk20a *runlist_info; | ||
105 | u32 max_runlists; | ||
106 | |||
107 | struct userd_desc userd; | ||
108 | u32 userd_entry_size; | ||
109 | u32 userd_total_size; | ||
110 | |||
111 | struct channel_gk20a *channel; | ||
112 | struct mutex ch_inuse_mutex; /* protect unused chid look up */ | ||
113 | |||
114 | void (*remove_support)(struct fifo_gk20a *); | ||
115 | bool sw_ready; | ||
116 | struct { | ||
117 | /* share info between isrs and non-isr code */ | ||
118 | struct { | ||
119 | struct mutex mutex; | ||
120 | } isr; | ||
121 | struct { | ||
122 | u32 device_fatal_0; | ||
123 | u32 channel_fatal_0; | ||
124 | u32 restartable_0; | ||
125 | } pbdma; | ||
126 | struct { | ||
127 | |||
128 | } engine; | ||
129 | |||
130 | |||
131 | } intr; | ||
132 | |||
133 | u32 mmu_fault_engines; | ||
134 | bool deferred_reset_pending; | ||
135 | struct mutex deferred_reset_mutex; | ||
136 | |||
137 | struct work_struct fault_restore_thread; | ||
138 | }; | ||
139 | |||
140 | int gk20a_init_fifo_support(struct gk20a *g); | ||
141 | |||
142 | void gk20a_fifo_isr(struct gk20a *g); | ||
143 | void gk20a_fifo_nonstall_isr(struct gk20a *g); | ||
144 | |||
145 | int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid); | ||
146 | |||
147 | int gk20a_fifo_enable_engine_activity(struct gk20a *g, | ||
148 | struct fifo_engine_info_gk20a *eng_info); | ||
149 | int gk20a_fifo_disable_engine_activity(struct gk20a *g, | ||
150 | struct fifo_engine_info_gk20a *eng_info, | ||
151 | bool wait_for_idle); | ||
152 | |||
153 | int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 hw_chid, | ||
154 | bool add, bool wait_for_finish); | ||
155 | |||
156 | int gk20a_fifo_suspend(struct gk20a *g); | ||
157 | |||
158 | bool gk20a_fifo_mmu_fault_pending(struct gk20a *g); | ||
159 | void gk20a_fifo_recover(struct gk20a *g, u32 engine_ids, bool verbose); | ||
160 | int gk20a_init_fifo_reset_enable_hw(struct gk20a *g); | ||
161 | |||
162 | void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g, | ||
163 | unsigned long fault_id); | ||
164 | #endif /*__GR_GK20A_H__*/ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c new file mode 100644 index 00000000..4cc500de --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -0,0 +1,1681 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/gk20a.c | ||
3 | * | ||
4 | * GK20A Graphics | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #define CREATE_TRACE_POINTS | ||
22 | #include <trace/events/gk20a.h> | ||
23 | |||
24 | #include <linux/dma-mapping.h> | ||
25 | #include <linux/highmem.h> | ||
26 | #include <linux/string.h> | ||
27 | #include <linux/cdev.h> | ||
28 | #include <linux/delay.h> | ||
29 | #include <linux/firmware.h> | ||
30 | #include <linux/interrupt.h> | ||
31 | #include <linux/irq.h> | ||
32 | #include <linux/export.h> | ||
33 | #include <linux/file.h> | ||
34 | #include <linux/of.h> | ||
35 | #include <linux/of_device.h> | ||
36 | #include <linux/of_platform.h> | ||
37 | #include <linux/pm_runtime.h> | ||
38 | #include <linux/thermal.h> | ||
39 | #include <asm/cacheflush.h> | ||
40 | #include <linux/debugfs.h> | ||
41 | #include <linux/spinlock.h> | ||
42 | #include <linux/tegra-powergate.h> | ||
43 | |||
44 | #include <linux/sched.h> | ||
45 | #include <linux/input-cfboost.h> | ||
46 | |||
47 | #include <mach/pm_domains.h> | ||
48 | |||
49 | #include "gk20a.h" | ||
50 | #include "debug_gk20a.h" | ||
51 | #include "ctrl_gk20a.h" | ||
52 | #include "hw_mc_gk20a.h" | ||
53 | #include "hw_timer_gk20a.h" | ||
54 | #include "hw_bus_gk20a.h" | ||
55 | #include "hw_sim_gk20a.h" | ||
56 | #include "hw_top_gk20a.h" | ||
57 | #include "hw_ltc_gk20a.h" | ||
58 | #include "gk20a_scale.h" | ||
59 | #include "dbg_gpu_gk20a.h" | ||
60 | #include "hal.h" | ||
61 | |||
62 | #ifdef CONFIG_ARM64 | ||
63 | #define __cpuc_flush_dcache_area __flush_dcache_area | ||
64 | #endif | ||
65 | |||
66 | #define CLASS_NAME "nvidia-gpu" | ||
67 | /* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */ | ||
68 | #define INTERFACE_NAME "nvhost%s-gpu" | ||
69 | |||
70 | #define GK20A_NUM_CDEVS 5 | ||
71 | |||
72 | #if defined(GK20A_DEBUG) | ||
73 | u32 gk20a_dbg_mask = GK20A_DEFAULT_DBG_MASK; | ||
74 | u32 gk20a_dbg_ftrace; | ||
75 | #endif | ||
76 | |||
77 | static int gk20a_pm_finalize_poweron(struct device *dev); | ||
78 | static int gk20a_pm_prepare_poweroff(struct device *dev); | ||
79 | |||
80 | static inline void set_gk20a(struct platform_device *dev, struct gk20a *gk20a) | ||
81 | { | ||
82 | gk20a_get_platform(dev)->g = gk20a; | ||
83 | } | ||
84 | |||
85 | static const struct file_operations gk20a_channel_ops = { | ||
86 | .owner = THIS_MODULE, | ||
87 | .release = gk20a_channel_release, | ||
88 | .open = gk20a_channel_open, | ||
89 | #ifdef CONFIG_COMPAT | ||
90 | .compat_ioctl = gk20a_channel_ioctl, | ||
91 | #endif | ||
92 | .unlocked_ioctl = gk20a_channel_ioctl, | ||
93 | }; | ||
94 | |||
95 | static const struct file_operations gk20a_ctrl_ops = { | ||
96 | .owner = THIS_MODULE, | ||
97 | .release = gk20a_ctrl_dev_release, | ||
98 | .open = gk20a_ctrl_dev_open, | ||
99 | .unlocked_ioctl = gk20a_ctrl_dev_ioctl, | ||
100 | #ifdef CONFIG_COMPAT | ||
101 | .compat_ioctl = gk20a_ctrl_dev_ioctl, | ||
102 | #endif | ||
103 | }; | ||
104 | |||
105 | static const struct file_operations gk20a_dbg_ops = { | ||
106 | .owner = THIS_MODULE, | ||
107 | .release = gk20a_dbg_gpu_dev_release, | ||
108 | .open = gk20a_dbg_gpu_dev_open, | ||
109 | .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
110 | .poll = gk20a_dbg_gpu_dev_poll, | ||
111 | #ifdef CONFIG_COMPAT | ||
112 | .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
113 | #endif | ||
114 | }; | ||
115 | |||
116 | static const struct file_operations gk20a_as_ops = { | ||
117 | .owner = THIS_MODULE, | ||
118 | .release = gk20a_as_dev_release, | ||
119 | .open = gk20a_as_dev_open, | ||
120 | #ifdef CONFIG_COMPAT | ||
121 | .compat_ioctl = gk20a_as_dev_ioctl, | ||
122 | #endif | ||
123 | .unlocked_ioctl = gk20a_as_dev_ioctl, | ||
124 | }; | ||
125 | |||
126 | /* | ||
127 | * Note: We use a different 'open' to trigger handling of the profiler session. | ||
128 | * Most of the code is shared between them... Though, at some point if the | ||
129 | * code does get too tangled trying to handle each in the same path we can | ||
130 | * separate them cleanly. | ||
131 | */ | ||
132 | static const struct file_operations gk20a_prof_ops = { | ||
133 | .owner = THIS_MODULE, | ||
134 | .release = gk20a_dbg_gpu_dev_release, | ||
135 | .open = gk20a_prof_gpu_dev_open, | ||
136 | .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
137 | /* .mmap = gk20a_prof_gpu_dev_mmap,*/ | ||
138 | /*int (*mmap) (struct file *, struct vm_area_struct *);*/ | ||
139 | .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
140 | #ifdef CONFIG_COMPAT | ||
141 | .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
142 | #endif | ||
143 | }; | ||
144 | |||
145 | static inline void sim_writel(struct gk20a *g, u32 r, u32 v) | ||
146 | { | ||
147 | writel(v, g->sim.regs+r); | ||
148 | } | ||
149 | |||
150 | static inline u32 sim_readl(struct gk20a *g, u32 r) | ||
151 | { | ||
152 | return readl(g->sim.regs+r); | ||
153 | } | ||
154 | |||
155 | static void kunmap_and_free_iopage(void **kvaddr, struct page **page) | ||
156 | { | ||
157 | if (*kvaddr) { | ||
158 | kunmap(*kvaddr); | ||
159 | *kvaddr = 0; | ||
160 | } | ||
161 | if (*page) { | ||
162 | __free_page(*page); | ||
163 | *page = 0; | ||
164 | } | ||
165 | } | ||
166 | |||
167 | static void gk20a_free_sim_support(struct gk20a *g) | ||
168 | { | ||
169 | /* free sim mappings, bfrs */ | ||
170 | kunmap_and_free_iopage(&g->sim.send_bfr.kvaddr, | ||
171 | &g->sim.send_bfr.page); | ||
172 | |||
173 | kunmap_and_free_iopage(&g->sim.recv_bfr.kvaddr, | ||
174 | &g->sim.recv_bfr.page); | ||
175 | |||
176 | kunmap_and_free_iopage(&g->sim.msg_bfr.kvaddr, | ||
177 | &g->sim.msg_bfr.page); | ||
178 | } | ||
179 | |||
180 | static void gk20a_remove_sim_support(struct sim_gk20a *s) | ||
181 | { | ||
182 | struct gk20a *g = s->g; | ||
183 | if (g->sim.regs) | ||
184 | sim_writel(g, sim_config_r(), sim_config_mode_disabled_v()); | ||
185 | gk20a_free_sim_support(g); | ||
186 | } | ||
187 | |||
188 | static int alloc_and_kmap_iopage(struct device *d, | ||
189 | void **kvaddr, | ||
190 | phys_addr_t *phys, | ||
191 | struct page **page) | ||
192 | { | ||
193 | int err = 0; | ||
194 | *page = alloc_page(GFP_KERNEL); | ||
195 | |||
196 | if (!*page) { | ||
197 | err = -ENOMEM; | ||
198 | dev_err(d, "couldn't allocate io page\n"); | ||
199 | goto fail; | ||
200 | } | ||
201 | |||
202 | *kvaddr = kmap(*page); | ||
203 | if (!*kvaddr) { | ||
204 | err = -ENOMEM; | ||
205 | dev_err(d, "couldn't kmap io page\n"); | ||
206 | goto fail; | ||
207 | } | ||
208 | *phys = page_to_phys(*page); | ||
209 | return 0; | ||
210 | |||
211 | fail: | ||
212 | kunmap_and_free_iopage(kvaddr, page); | ||
213 | return err; | ||
214 | |||
215 | } | ||
216 | |||
217 | static void __iomem *gk20a_ioremap_resource(struct platform_device *dev, int i, | ||
218 | struct resource **out) | ||
219 | { | ||
220 | struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i); | ||
221 | if (!r) | ||
222 | return NULL; | ||
223 | if (out) | ||
224 | *out = r; | ||
225 | return devm_request_and_ioremap(&dev->dev, r); | ||
226 | } | ||
227 | |||
228 | /* TBD: strip from released */ | ||
229 | static int gk20a_init_sim_support(struct platform_device *dev) | ||
230 | { | ||
231 | int err = 0; | ||
232 | struct gk20a *g = get_gk20a(dev); | ||
233 | struct device *d = &dev->dev; | ||
234 | phys_addr_t phys; | ||
235 | |||
236 | g->sim.g = g; | ||
237 | g->sim.regs = gk20a_ioremap_resource(dev, GK20A_SIM_IORESOURCE_MEM, | ||
238 | &g->sim.reg_mem); | ||
239 | if (!g->sim.regs) { | ||
240 | dev_err(d, "failed to remap gk20a sim regs\n"); | ||
241 | err = -ENXIO; | ||
242 | goto fail; | ||
243 | } | ||
244 | |||
245 | /* allocate sim event/msg buffers */ | ||
246 | err = alloc_and_kmap_iopage(d, &g->sim.send_bfr.kvaddr, | ||
247 | &g->sim.send_bfr.phys, | ||
248 | &g->sim.send_bfr.page); | ||
249 | |||
250 | err = err || alloc_and_kmap_iopage(d, &g->sim.recv_bfr.kvaddr, | ||
251 | &g->sim.recv_bfr.phys, | ||
252 | &g->sim.recv_bfr.page); | ||
253 | |||
254 | err = err || alloc_and_kmap_iopage(d, &g->sim.msg_bfr.kvaddr, | ||
255 | &g->sim.msg_bfr.phys, | ||
256 | &g->sim.msg_bfr.page); | ||
257 | |||
258 | if (!(g->sim.send_bfr.kvaddr && g->sim.recv_bfr.kvaddr && | ||
259 | g->sim.msg_bfr.kvaddr)) { | ||
260 | dev_err(d, "couldn't allocate all sim buffers\n"); | ||
261 | goto fail; | ||
262 | } | ||
263 | |||
264 | /*mark send ring invalid*/ | ||
265 | sim_writel(g, sim_send_ring_r(), sim_send_ring_status_invalid_f()); | ||
266 | |||
267 | /*read get pointer and make equal to put*/ | ||
268 | g->sim.send_ring_put = sim_readl(g, sim_send_get_r()); | ||
269 | sim_writel(g, sim_send_put_r(), g->sim.send_ring_put); | ||
270 | |||
271 | /*write send ring address and make it valid*/ | ||
272 | /*TBD: work for >32b physmem*/ | ||
273 | phys = g->sim.send_bfr.phys; | ||
274 | sim_writel(g, sim_send_ring_hi_r(), 0); | ||
275 | sim_writel(g, sim_send_ring_r(), | ||
276 | sim_send_ring_status_valid_f() | | ||
277 | sim_send_ring_target_phys_pci_coherent_f() | | ||
278 | sim_send_ring_size_4kb_f() | | ||
279 | sim_send_ring_addr_lo_f(phys >> PAGE_SHIFT)); | ||
280 | |||
281 | /*repeat for recv ring (but swap put,get as roles are opposite) */ | ||
282 | sim_writel(g, sim_recv_ring_r(), sim_recv_ring_status_invalid_f()); | ||
283 | |||
284 | /*read put pointer and make equal to get*/ | ||
285 | g->sim.recv_ring_get = sim_readl(g, sim_recv_put_r()); | ||
286 | sim_writel(g, sim_recv_get_r(), g->sim.recv_ring_get); | ||
287 | |||
288 | /*write send ring address and make it valid*/ | ||
289 | /*TBD: work for >32b physmem*/ | ||
290 | phys = g->sim.recv_bfr.phys; | ||
291 | sim_writel(g, sim_recv_ring_hi_r(), 0); | ||
292 | sim_writel(g, sim_recv_ring_r(), | ||
293 | sim_recv_ring_status_valid_f() | | ||
294 | sim_recv_ring_target_phys_pci_coherent_f() | | ||
295 | sim_recv_ring_size_4kb_f() | | ||
296 | sim_recv_ring_addr_lo_f(phys >> PAGE_SHIFT)); | ||
297 | |||
298 | g->sim.remove_support = gk20a_remove_sim_support; | ||
299 | return 0; | ||
300 | |||
301 | fail: | ||
302 | gk20a_free_sim_support(g); | ||
303 | return err; | ||
304 | } | ||
305 | |||
306 | static inline u32 sim_msg_header_size(void) | ||
307 | { | ||
308 | return 24;/*TBD: fix the header to gt this from NV_VGPU_MSG_HEADER*/ | ||
309 | } | ||
310 | |||
311 | static inline u32 *sim_msg_bfr(struct gk20a *g, u32 byte_offset) | ||
312 | { | ||
313 | return (u32 *)(g->sim.msg_bfr.kvaddr + byte_offset); | ||
314 | } | ||
315 | |||
316 | static inline u32 *sim_msg_hdr(struct gk20a *g, u32 byte_offset) | ||
317 | { | ||
318 | return sim_msg_bfr(g, byte_offset); /*starts at 0*/ | ||
319 | } | ||
320 | |||
321 | static inline u32 *sim_msg_param(struct gk20a *g, u32 byte_offset) | ||
322 | { | ||
323 | /*starts after msg header/cmn*/ | ||
324 | return sim_msg_bfr(g, byte_offset + sim_msg_header_size()); | ||
325 | } | ||
326 | |||
327 | static inline void sim_write_hdr(struct gk20a *g, u32 func, u32 size) | ||
328 | { | ||
329 | /*memset(g->sim.msg_bfr.kvaddr,0,min(PAGE_SIZE,size));*/ | ||
330 | *sim_msg_hdr(g, sim_msg_signature_r()) = sim_msg_signature_valid_v(); | ||
331 | *sim_msg_hdr(g, sim_msg_result_r()) = sim_msg_result_rpc_pending_v(); | ||
332 | *sim_msg_hdr(g, sim_msg_spare_r()) = sim_msg_spare__init_v(); | ||
333 | *sim_msg_hdr(g, sim_msg_function_r()) = func; | ||
334 | *sim_msg_hdr(g, sim_msg_length_r()) = size + sim_msg_header_size(); | ||
335 | } | ||
336 | |||
337 | static inline u32 sim_escape_read_hdr_size(void) | ||
338 | { | ||
339 | return 12; /*TBD: fix NV_VGPU_SIM_ESCAPE_READ_HEADER*/ | ||
340 | } | ||
341 | |||
342 | static u32 *sim_send_ring_bfr(struct gk20a *g, u32 byte_offset) | ||
343 | { | ||
344 | return (u32 *)(g->sim.send_bfr.kvaddr + byte_offset); | ||
345 | } | ||
346 | |||
347 | static int rpc_send_message(struct gk20a *g) | ||
348 | { | ||
349 | /* calculations done in units of u32s */ | ||
350 | u32 send_base = sim_send_put_pointer_v(g->sim.send_ring_put) * 2; | ||
351 | u32 dma_offset = send_base + sim_dma_r()/sizeof(u32); | ||
352 | u32 dma_hi_offset = send_base + sim_dma_hi_r()/sizeof(u32); | ||
353 | |||
354 | *sim_send_ring_bfr(g, dma_offset*sizeof(u32)) = | ||
355 | sim_dma_target_phys_pci_coherent_f() | | ||
356 | sim_dma_status_valid_f() | | ||
357 | sim_dma_size_4kb_f() | | ||
358 | sim_dma_addr_lo_f(g->sim.msg_bfr.phys >> PAGE_SHIFT); | ||
359 | |||
360 | *sim_send_ring_bfr(g, dma_hi_offset*sizeof(u32)) = 0; /*TBD >32b phys*/ | ||
361 | |||
362 | *sim_msg_hdr(g, sim_msg_sequence_r()) = g->sim.sequence_base++; | ||
363 | |||
364 | g->sim.send_ring_put = (g->sim.send_ring_put + 2 * sizeof(u32)) % | ||
365 | PAGE_SIZE; | ||
366 | |||
367 | __cpuc_flush_dcache_area(g->sim.msg_bfr.kvaddr, PAGE_SIZE); | ||
368 | __cpuc_flush_dcache_area(g->sim.send_bfr.kvaddr, PAGE_SIZE); | ||
369 | __cpuc_flush_dcache_area(g->sim.recv_bfr.kvaddr, PAGE_SIZE); | ||
370 | |||
371 | /* Update the put pointer. This will trap into the host. */ | ||
372 | sim_writel(g, sim_send_put_r(), g->sim.send_ring_put); | ||
373 | |||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | static inline u32 *sim_recv_ring_bfr(struct gk20a *g, u32 byte_offset) | ||
378 | { | ||
379 | return (u32 *)(g->sim.recv_bfr.kvaddr + byte_offset); | ||
380 | } | ||
381 | |||
382 | static int rpc_recv_poll(struct gk20a *g) | ||
383 | { | ||
384 | phys_addr_t recv_phys_addr; | ||
385 | |||
386 | /* XXX This read is not required (?) */ | ||
387 | /*pVGpu->recv_ring_get = VGPU_REG_RD32(pGpu, NV_VGPU_RECV_GET);*/ | ||
388 | |||
389 | /* Poll the recv ring get pointer in an infinite loop*/ | ||
390 | do { | ||
391 | g->sim.recv_ring_put = sim_readl(g, sim_recv_put_r()); | ||
392 | } while (g->sim.recv_ring_put == g->sim.recv_ring_get); | ||
393 | |||
394 | /* process all replies */ | ||
395 | while (g->sim.recv_ring_put != g->sim.recv_ring_get) { | ||
396 | /* these are in u32 offsets*/ | ||
397 | u32 dma_lo_offset = | ||
398 | sim_recv_put_pointer_v(g->sim.recv_ring_get)*2 + 0; | ||
399 | /*u32 dma_hi_offset = dma_lo_offset + 1;*/ | ||
400 | u32 recv_phys_addr_lo = sim_dma_addr_lo_v(*sim_recv_ring_bfr(g, dma_lo_offset*4)); | ||
401 | |||
402 | /*u32 recv_phys_addr_hi = sim_dma_hi_addr_v( | ||
403 | (phys_addr_t)sim_recv_ring_bfr(g,dma_hi_offset*4));*/ | ||
404 | |||
405 | /*TBD >32b phys addr */ | ||
406 | recv_phys_addr = recv_phys_addr_lo << PAGE_SHIFT; | ||
407 | |||
408 | if (recv_phys_addr != g->sim.msg_bfr.phys) { | ||
409 | dev_err(dev_from_gk20a(g), "%s Error in RPC reply\n", | ||
410 | __func__); | ||
411 | return -1; | ||
412 | } | ||
413 | |||
414 | /* Update GET pointer */ | ||
415 | g->sim.recv_ring_get = (g->sim.recv_ring_get + 2*sizeof(u32)) % | ||
416 | PAGE_SIZE; | ||
417 | |||
418 | __cpuc_flush_dcache_area(g->sim.msg_bfr.kvaddr, PAGE_SIZE); | ||
419 | __cpuc_flush_dcache_area(g->sim.send_bfr.kvaddr, PAGE_SIZE); | ||
420 | __cpuc_flush_dcache_area(g->sim.recv_bfr.kvaddr, PAGE_SIZE); | ||
421 | |||
422 | sim_writel(g, sim_recv_get_r(), g->sim.recv_ring_get); | ||
423 | |||
424 | g->sim.recv_ring_put = sim_readl(g, sim_recv_put_r()); | ||
425 | } | ||
426 | |||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | static int issue_rpc_and_wait(struct gk20a *g) | ||
431 | { | ||
432 | int err; | ||
433 | |||
434 | err = rpc_send_message(g); | ||
435 | if (err) { | ||
436 | dev_err(dev_from_gk20a(g), "%s failed rpc_send_message\n", | ||
437 | __func__); | ||
438 | return err; | ||
439 | } | ||
440 | |||
441 | err = rpc_recv_poll(g); | ||
442 | if (err) { | ||
443 | dev_err(dev_from_gk20a(g), "%s failed rpc_recv_poll\n", | ||
444 | __func__); | ||
445 | return err; | ||
446 | } | ||
447 | |||
448 | /* Now check if RPC really succeeded */ | ||
449 | if (*sim_msg_hdr(g, sim_msg_result_r()) != sim_msg_result_success_v()) { | ||
450 | dev_err(dev_from_gk20a(g), "%s received failed status!\n", | ||
451 | __func__); | ||
452 | return -(*sim_msg_hdr(g, sim_msg_result_r())); | ||
453 | } | ||
454 | return 0; | ||
455 | } | ||
456 | |||
457 | int gk20a_sim_esc_read(struct gk20a *g, char *path, u32 index, u32 count, u32 *data) | ||
458 | { | ||
459 | int err; | ||
460 | size_t pathlen = strlen(path); | ||
461 | u32 data_offset; | ||
462 | |||
463 | sim_write_hdr(g, sim_msg_function_sim_escape_read_v(), | ||
464 | sim_escape_read_hdr_size()); | ||
465 | *sim_msg_param(g, 0) = index; | ||
466 | *sim_msg_param(g, 4) = count; | ||
467 | data_offset = roundup(0xc + pathlen + 1, sizeof(u32)); | ||
468 | *sim_msg_param(g, 8) = data_offset; | ||
469 | strcpy((char *)sim_msg_param(g, 0xc), path); | ||
470 | |||
471 | err = issue_rpc_and_wait(g); | ||
472 | |||
473 | if (!err) | ||
474 | memcpy(data, sim_msg_param(g, data_offset), count); | ||
475 | return err; | ||
476 | } | ||
477 | |||
478 | static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id) | ||
479 | { | ||
480 | struct gk20a *g = dev_id; | ||
481 | u32 mc_intr_0; | ||
482 | |||
483 | if (!g->power_on) | ||
484 | return IRQ_NONE; | ||
485 | |||
486 | /* not from gpu when sharing irq with others */ | ||
487 | mc_intr_0 = gk20a_readl(g, mc_intr_0_r()); | ||
488 | if (unlikely(!mc_intr_0)) | ||
489 | return IRQ_NONE; | ||
490 | |||
491 | gk20a_writel(g, mc_intr_en_0_r(), | ||
492 | mc_intr_en_0_inta_disabled_f()); | ||
493 | |||
494 | /* flush previous write */ | ||
495 | gk20a_readl(g, mc_intr_en_0_r()); | ||
496 | |||
497 | return IRQ_WAKE_THREAD; | ||
498 | } | ||
499 | |||
500 | static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id) | ||
501 | { | ||
502 | struct gk20a *g = dev_id; | ||
503 | u32 mc_intr_1; | ||
504 | |||
505 | if (!g->power_on) | ||
506 | return IRQ_NONE; | ||
507 | |||
508 | /* not from gpu when sharing irq with others */ | ||
509 | mc_intr_1 = gk20a_readl(g, mc_intr_1_r()); | ||
510 | if (unlikely(!mc_intr_1)) | ||
511 | return IRQ_NONE; | ||
512 | |||
513 | gk20a_writel(g, mc_intr_en_1_r(), | ||
514 | mc_intr_en_1_inta_disabled_f()); | ||
515 | |||
516 | /* flush previous write */ | ||
517 | gk20a_readl(g, mc_intr_en_1_r()); | ||
518 | |||
519 | return IRQ_WAKE_THREAD; | ||
520 | } | ||
521 | |||
522 | static void gk20a_pbus_isr(struct gk20a *g) | ||
523 | { | ||
524 | u32 val; | ||
525 | val = gk20a_readl(g, bus_intr_0_r()); | ||
526 | if (val & (bus_intr_0_pri_squash_m() | | ||
527 | bus_intr_0_pri_fecserr_m() | | ||
528 | bus_intr_0_pri_timeout_m())) { | ||
529 | gk20a_err(dev_from_gk20a(g), "top_fs_status_r : 0x%x", | ||
530 | gk20a_readl(g, top_fs_status_r())); | ||
531 | gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x", | ||
532 | gk20a_readl(g, mc_enable_r())); | ||
533 | gk20a_err(&g->dev->dev, | ||
534 | "NV_PTIMER_PRI_TIMEOUT_SAVE_0: 0x%x\n", | ||
535 | gk20a_readl(g, timer_pri_timeout_save_0_r())); | ||
536 | gk20a_err(&g->dev->dev, | ||
537 | "NV_PTIMER_PRI_TIMEOUT_SAVE_1: 0x%x\n", | ||
538 | gk20a_readl(g, timer_pri_timeout_save_1_r())); | ||
539 | gk20a_err(&g->dev->dev, | ||
540 | "NV_PTIMER_PRI_TIMEOUT_FECS_ERRCODE: 0x%x\n", | ||
541 | gk20a_readl(g, timer_pri_timeout_fecs_errcode_r())); | ||
542 | } | ||
543 | |||
544 | if (val) | ||
545 | gk20a_err(&g->dev->dev, | ||
546 | "Unhandled pending pbus interrupt\n"); | ||
547 | |||
548 | gk20a_writel(g, bus_intr_0_r(), val); | ||
549 | } | ||
550 | |||
551 | static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id) | ||
552 | { | ||
553 | struct gk20a *g = dev_id; | ||
554 | u32 mc_intr_0; | ||
555 | |||
556 | gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); | ||
557 | |||
558 | mc_intr_0 = gk20a_readl(g, mc_intr_0_r()); | ||
559 | |||
560 | gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0); | ||
561 | |||
562 | if (mc_intr_0 & mc_intr_0_pgraph_pending_f()) | ||
563 | gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g)); | ||
564 | if (mc_intr_0 & mc_intr_0_pfifo_pending_f()) | ||
565 | gk20a_fifo_isr(g); | ||
566 | if (mc_intr_0 & mc_intr_0_pmu_pending_f()) | ||
567 | gk20a_pmu_isr(g); | ||
568 | if (mc_intr_0 & mc_intr_0_priv_ring_pending_f()) | ||
569 | gk20a_priv_ring_isr(g); | ||
570 | if (mc_intr_0 & mc_intr_0_ltc_pending_f()) | ||
571 | gk20a_mm_ltc_isr(g); | ||
572 | if (mc_intr_0 & mc_intr_0_pbus_pending_f()) | ||
573 | gk20a_pbus_isr(g); | ||
574 | |||
575 | gk20a_writel(g, mc_intr_en_0_r(), | ||
576 | mc_intr_en_0_inta_hardware_f()); | ||
577 | |||
578 | /* flush previous write */ | ||
579 | gk20a_readl(g, mc_intr_en_0_r()); | ||
580 | |||
581 | return IRQ_HANDLED; | ||
582 | } | ||
583 | |||
584 | static irqreturn_t gk20a_intr_thread_nonstall(int irq, void *dev_id) | ||
585 | { | ||
586 | struct gk20a *g = dev_id; | ||
587 | u32 mc_intr_1; | ||
588 | |||
589 | gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); | ||
590 | |||
591 | mc_intr_1 = gk20a_readl(g, mc_intr_1_r()); | ||
592 | |||
593 | gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1); | ||
594 | |||
595 | if (mc_intr_1 & mc_intr_0_pfifo_pending_f()) | ||
596 | gk20a_fifo_nonstall_isr(g); | ||
597 | if (mc_intr_1 & mc_intr_0_pgraph_pending_f()) | ||
598 | gk20a_gr_nonstall_isr(g); | ||
599 | |||
600 | gk20a_writel(g, mc_intr_en_1_r(), | ||
601 | mc_intr_en_1_inta_hardware_f()); | ||
602 | |||
603 | /* flush previous write */ | ||
604 | gk20a_readl(g, mc_intr_en_1_r()); | ||
605 | |||
606 | return IRQ_HANDLED; | ||
607 | } | ||
608 | |||
609 | static void gk20a_remove_support(struct platform_device *dev) | ||
610 | { | ||
611 | struct gk20a *g = get_gk20a(dev); | ||
612 | |||
613 | /* pmu support should already be removed when driver turns off | ||
614 | gpu power rail in prepapre_poweroff */ | ||
615 | if (g->gk20a_cdev.gk20a_cooling_dev) | ||
616 | thermal_cooling_device_unregister(g->gk20a_cdev.gk20a_cooling_dev); | ||
617 | |||
618 | if (g->gr.remove_support) | ||
619 | g->gr.remove_support(&g->gr); | ||
620 | |||
621 | if (g->fifo.remove_support) | ||
622 | g->fifo.remove_support(&g->fifo); | ||
623 | |||
624 | if (g->mm.remove_support) | ||
625 | g->mm.remove_support(&g->mm); | ||
626 | |||
627 | if (g->sim.remove_support) | ||
628 | g->sim.remove_support(&g->sim); | ||
629 | |||
630 | release_firmware(g->pmu_fw); | ||
631 | |||
632 | if (g->irq_requested) { | ||
633 | free_irq(g->irq_stall, g); | ||
634 | free_irq(g->irq_nonstall, g); | ||
635 | g->irq_requested = false; | ||
636 | } | ||
637 | |||
638 | /* free mappings to registers, etc*/ | ||
639 | |||
640 | if (g->regs) { | ||
641 | iounmap(g->regs); | ||
642 | g->regs = 0; | ||
643 | } | ||
644 | if (g->bar1) { | ||
645 | iounmap(g->bar1); | ||
646 | g->bar1 = 0; | ||
647 | } | ||
648 | } | ||
649 | |||
650 | static int gk20a_init_support(struct platform_device *dev) | ||
651 | { | ||
652 | int err = 0; | ||
653 | struct gk20a *g = get_gk20a(dev); | ||
654 | |||
655 | g->regs = gk20a_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM, | ||
656 | &g->reg_mem); | ||
657 | if (!g->regs) { | ||
658 | dev_err(dev_from_gk20a(g), "failed to remap gk20a registers\n"); | ||
659 | err = -ENXIO; | ||
660 | goto fail; | ||
661 | } | ||
662 | |||
663 | g->bar1 = gk20a_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM, | ||
664 | &g->bar1_mem); | ||
665 | if (!g->bar1) { | ||
666 | dev_err(dev_from_gk20a(g), "failed to remap gk20a bar1\n"); | ||
667 | err = -ENXIO; | ||
668 | goto fail; | ||
669 | } | ||
670 | |||
671 | /* Get interrupt numbers */ | ||
672 | g->irq_stall = platform_get_irq(dev, 0); | ||
673 | g->irq_nonstall = platform_get_irq(dev, 1); | ||
674 | if (g->irq_stall < 0 || g->irq_nonstall < 0) { | ||
675 | err = -ENXIO; | ||
676 | goto fail; | ||
677 | } | ||
678 | |||
679 | if (tegra_cpu_is_asim()) { | ||
680 | err = gk20a_init_sim_support(dev); | ||
681 | if (err) | ||
682 | goto fail; | ||
683 | } | ||
684 | |||
685 | mutex_init(&g->dbg_sessions_lock); | ||
686 | mutex_init(&g->client_lock); | ||
687 | |||
688 | g->remove_support = gk20a_remove_support; | ||
689 | return 0; | ||
690 | |||
691 | fail: | ||
692 | gk20a_remove_support(dev); | ||
693 | return err; | ||
694 | } | ||
695 | |||
696 | static int gk20a_init_client(struct platform_device *dev) | ||
697 | { | ||
698 | struct gk20a *g = get_gk20a(dev); | ||
699 | int err; | ||
700 | |||
701 | gk20a_dbg_fn(""); | ||
702 | |||
703 | #ifndef CONFIG_PM_RUNTIME | ||
704 | gk20a_pm_finalize_poweron(&dev->dev); | ||
705 | #endif | ||
706 | |||
707 | err = gk20a_init_mm_setup_sw(g); | ||
708 | if (err) | ||
709 | return err; | ||
710 | |||
711 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) | ||
712 | gk20a_scale_hw_init(dev); | ||
713 | return 0; | ||
714 | } | ||
715 | |||
716 | static void gk20a_deinit_client(struct platform_device *dev) | ||
717 | { | ||
718 | gk20a_dbg_fn(""); | ||
719 | #ifndef CONFIG_PM_RUNTIME | ||
720 | gk20a_pm_prepare_poweroff(&dev->dev); | ||
721 | #endif | ||
722 | } | ||
723 | |||
724 | int gk20a_get_client(struct gk20a *g) | ||
725 | { | ||
726 | int err = 0; | ||
727 | |||
728 | mutex_lock(&g->client_lock); | ||
729 | if (g->client_refcount == 0) | ||
730 | err = gk20a_init_client(g->dev); | ||
731 | if (!err) | ||
732 | g->client_refcount++; | ||
733 | mutex_unlock(&g->client_lock); | ||
734 | return err; | ||
735 | } | ||
736 | |||
737 | void gk20a_put_client(struct gk20a *g) | ||
738 | { | ||
739 | mutex_lock(&g->client_lock); | ||
740 | if (g->client_refcount == 1) | ||
741 | gk20a_deinit_client(g->dev); | ||
742 | g->client_refcount--; | ||
743 | mutex_unlock(&g->client_lock); | ||
744 | WARN_ON(g->client_refcount < 0); | ||
745 | } | ||
746 | |||
747 | static int gk20a_pm_prepare_poweroff(struct device *_dev) | ||
748 | { | ||
749 | struct platform_device *dev = to_platform_device(_dev); | ||
750 | struct gk20a *g = get_gk20a(dev); | ||
751 | int ret = 0; | ||
752 | |||
753 | gk20a_dbg_fn(""); | ||
754 | |||
755 | if (!g->power_on) | ||
756 | return 0; | ||
757 | |||
758 | ret |= gk20a_channel_suspend(g); | ||
759 | |||
760 | /* disable elpg before gr or fifo suspend */ | ||
761 | ret |= gk20a_pmu_destroy(g); | ||
762 | ret |= gk20a_gr_suspend(g); | ||
763 | ret |= gk20a_mm_suspend(g); | ||
764 | ret |= gk20a_fifo_suspend(g); | ||
765 | |||
766 | /* | ||
767 | * After this point, gk20a interrupts should not get | ||
768 | * serviced. | ||
769 | */ | ||
770 | if (g->irq_requested) { | ||
771 | free_irq(g->irq_stall, g); | ||
772 | free_irq(g->irq_nonstall, g); | ||
773 | g->irq_requested = false; | ||
774 | } | ||
775 | |||
776 | /* Disable GPCPLL */ | ||
777 | ret |= gk20a_suspend_clk_support(g); | ||
778 | g->power_on = false; | ||
779 | |||
780 | return ret; | ||
781 | } | ||
782 | |||
783 | static void gk20a_detect_chip(struct gk20a *g) | ||
784 | { | ||
785 | struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics; | ||
786 | |||
787 | u32 mc_boot_0_value = gk20a_readl(g, mc_boot_0_r()); | ||
788 | gpu->arch = mc_boot_0_architecture_v(mc_boot_0_value) << | ||
789 | NVHOST_GPU_ARCHITECTURE_SHIFT; | ||
790 | gpu->impl = mc_boot_0_implementation_v(mc_boot_0_value); | ||
791 | gpu->rev = | ||
792 | (mc_boot_0_major_revision_v(mc_boot_0_value) << 4) | | ||
793 | mc_boot_0_minor_revision_v(mc_boot_0_value); | ||
794 | |||
795 | gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n", | ||
796 | g->gpu_characteristics.arch, | ||
797 | g->gpu_characteristics.impl, | ||
798 | g->gpu_characteristics.rev); | ||
799 | } | ||
800 | |||
801 | static int gk20a_pm_finalize_poweron(struct device *_dev) | ||
802 | { | ||
803 | struct platform_device *dev = to_platform_device(_dev); | ||
804 | struct gk20a *g = get_gk20a(dev); | ||
805 | int err, nice_value; | ||
806 | |||
807 | gk20a_dbg_fn(""); | ||
808 | |||
809 | if (g->power_on) | ||
810 | return 0; | ||
811 | |||
812 | nice_value = task_nice(current); | ||
813 | set_user_nice(current, -20); | ||
814 | |||
815 | if (!g->irq_requested) { | ||
816 | err = request_threaded_irq(g->irq_stall, | ||
817 | gk20a_intr_isr_stall, | ||
818 | gk20a_intr_thread_stall, | ||
819 | 0, "gk20a_stall", g); | ||
820 | if (err) { | ||
821 | dev_err(dev_from_gk20a(g), | ||
822 | "failed to request stall intr irq @ %lld\n", | ||
823 | (u64)g->irq_stall); | ||
824 | goto done; | ||
825 | } | ||
826 | err = request_threaded_irq(g->irq_nonstall, | ||
827 | gk20a_intr_isr_nonstall, | ||
828 | gk20a_intr_thread_nonstall, | ||
829 | 0, "gk20a_nonstall", g); | ||
830 | if (err) { | ||
831 | dev_err(dev_from_gk20a(g), | ||
832 | "failed to request non-stall intr irq @ %lld\n", | ||
833 | (u64)g->irq_nonstall); | ||
834 | goto done; | ||
835 | } | ||
836 | g->irq_requested = true; | ||
837 | } | ||
838 | |||
839 | g->power_on = true; | ||
840 | |||
841 | gk20a_writel(g, mc_intr_mask_1_r(), | ||
842 | mc_intr_0_pfifo_pending_f() | ||
843 | | mc_intr_0_pgraph_pending_f()); | ||
844 | gk20a_writel(g, mc_intr_en_1_r(), | ||
845 | mc_intr_en_1_inta_hardware_f()); | ||
846 | |||
847 | gk20a_writel(g, mc_intr_mask_0_r(), | ||
848 | mc_intr_0_pgraph_pending_f() | ||
849 | | mc_intr_0_pfifo_pending_f() | ||
850 | | mc_intr_0_priv_ring_pending_f() | ||
851 | | mc_intr_0_ltc_pending_f() | ||
852 | | mc_intr_0_pbus_pending_f()); | ||
853 | gk20a_writel(g, mc_intr_en_0_r(), | ||
854 | mc_intr_en_0_inta_hardware_f()); | ||
855 | |||
856 | if (!tegra_platform_is_silicon()) | ||
857 | gk20a_writel(g, bus_intr_en_0_r(), 0x0); | ||
858 | else | ||
859 | gk20a_writel(g, bus_intr_en_0_r(), | ||
860 | bus_intr_en_0_pri_squash_m() | | ||
861 | bus_intr_en_0_pri_fecserr_m() | | ||
862 | bus_intr_en_0_pri_timeout_m()); | ||
863 | gk20a_reset_priv_ring(g); | ||
864 | |||
865 | gk20a_detect_chip(g); | ||
866 | err = gpu_init_hal(g); | ||
867 | if (err) | ||
868 | goto done; | ||
869 | |||
870 | /* TBD: move this after graphics init in which blcg/slcg is enabled. | ||
871 | This function removes SlowdownOnBoot which applies 32x divider | ||
872 | on gpcpll bypass path. The purpose of slowdown is to save power | ||
873 | during boot but it also significantly slows down gk20a init on | ||
874 | simulation and emulation. We should remove SOB after graphics power | ||
875 | saving features (blcg/slcg) are enabled. For now, do it here. */ | ||
876 | err = gk20a_init_clk_support(g); | ||
877 | if (err) { | ||
878 | gk20a_err(&dev->dev, "failed to init gk20a clk"); | ||
879 | goto done; | ||
880 | } | ||
881 | |||
882 | /* enable pri timeout only on silicon */ | ||
883 | if (tegra_platform_is_silicon()) { | ||
884 | gk20a_writel(g, | ||
885 | timer_pri_timeout_r(), | ||
886 | timer_pri_timeout_period_f(0x186A0) | | ||
887 | timer_pri_timeout_en_en_enabled_f()); | ||
888 | } else { | ||
889 | gk20a_writel(g, | ||
890 | timer_pri_timeout_r(), | ||
891 | timer_pri_timeout_period_f(0x186A0) | | ||
892 | timer_pri_timeout_en_en_disabled_f()); | ||
893 | } | ||
894 | |||
895 | err = gk20a_init_fifo_reset_enable_hw(g); | ||
896 | if (err) { | ||
897 | gk20a_err(&dev->dev, "failed to reset gk20a fifo"); | ||
898 | goto done; | ||
899 | } | ||
900 | |||
901 | err = gk20a_init_mm_support(g); | ||
902 | if (err) { | ||
903 | gk20a_err(&dev->dev, "failed to init gk20a mm"); | ||
904 | goto done; | ||
905 | } | ||
906 | |||
907 | err = gk20a_init_pmu_support(g); | ||
908 | if (err) { | ||
909 | gk20a_err(&dev->dev, "failed to init gk20a pmu"); | ||
910 | goto done; | ||
911 | } | ||
912 | |||
913 | err = gk20a_init_fifo_support(g); | ||
914 | if (err) { | ||
915 | gk20a_err(&dev->dev, "failed to init gk20a fifo"); | ||
916 | goto done; | ||
917 | } | ||
918 | |||
919 | err = gk20a_init_gr_support(g); | ||
920 | if (err) { | ||
921 | gk20a_err(&dev->dev, "failed to init gk20a gr"); | ||
922 | goto done; | ||
923 | } | ||
924 | |||
925 | err = gk20a_init_pmu_setup_hw2(g); | ||
926 | if (err) { | ||
927 | gk20a_err(&dev->dev, "failed to init gk20a pmu_hw2"); | ||
928 | goto done; | ||
929 | } | ||
930 | |||
931 | err = gk20a_init_therm_support(g); | ||
932 | if (err) { | ||
933 | gk20a_err(&dev->dev, "failed to init gk20a therm"); | ||
934 | goto done; | ||
935 | } | ||
936 | |||
937 | err = gk20a_init_gpu_characteristics(g); | ||
938 | if (err) { | ||
939 | gk20a_err(&dev->dev, "failed to init gk20a gpu characteristics"); | ||
940 | goto done; | ||
941 | } | ||
942 | |||
943 | gk20a_channel_resume(g); | ||
944 | set_user_nice(current, nice_value); | ||
945 | |||
946 | done: | ||
947 | return err; | ||
948 | } | ||
949 | |||
950 | static struct of_device_id tegra_gk20a_of_match[] = { | ||
951 | #ifdef CONFIG_TEGRA_GK20A | ||
952 | { .compatible = "nvidia,tegra124-gk20a", | ||
953 | .data = &gk20a_tegra_platform }, | ||
954 | #endif | ||
955 | { .compatible = "nvidia,generic-gk20a", | ||
956 | .data = &gk20a_generic_platform }, | ||
957 | { }, | ||
958 | }; | ||
959 | |||
960 | int tegra_gpu_get_max_state(struct thermal_cooling_device *cdev, | ||
961 | unsigned long *max_state) | ||
962 | { | ||
963 | struct cooling_device_gk20a *gk20a_gpufreq_device = cdev->devdata; | ||
964 | |||
965 | *max_state = gk20a_gpufreq_device->gk20a_freq_table_size - 1; | ||
966 | return 0; | ||
967 | } | ||
968 | |||
969 | int tegra_gpu_get_cur_state(struct thermal_cooling_device *cdev, | ||
970 | unsigned long *cur_state) | ||
971 | { | ||
972 | struct cooling_device_gk20a *gk20a_gpufreq_device = cdev->devdata; | ||
973 | |||
974 | *cur_state = gk20a_gpufreq_device->gk20a_freq_state; | ||
975 | return 0; | ||
976 | } | ||
977 | |||
978 | int tegra_gpu_set_cur_state(struct thermal_cooling_device *c_dev, | ||
979 | unsigned long cur_state) | ||
980 | { | ||
981 | u32 target_freq; | ||
982 | struct gk20a *g; | ||
983 | struct gpufreq_table_data *gpu_cooling_table; | ||
984 | struct cooling_device_gk20a *gk20a_gpufreq_device = c_dev->devdata; | ||
985 | |||
986 | BUG_ON(cur_state >= gk20a_gpufreq_device->gk20a_freq_table_size); | ||
987 | |||
988 | g = container_of(gk20a_gpufreq_device, struct gk20a, gk20a_cdev); | ||
989 | |||
990 | gpu_cooling_table = tegra_gpufreq_table_get(); | ||
991 | target_freq = gpu_cooling_table[cur_state].frequency; | ||
992 | |||
993 | /* ensure a query for state will get the proper value */ | ||
994 | gk20a_gpufreq_device->gk20a_freq_state = cur_state; | ||
995 | |||
996 | gk20a_clk_set_rate(g, target_freq); | ||
997 | |||
998 | return 0; | ||
999 | } | ||
1000 | |||
1001 | static struct thermal_cooling_device_ops tegra_gpu_cooling_ops = { | ||
1002 | .get_max_state = tegra_gpu_get_max_state, | ||
1003 | .get_cur_state = tegra_gpu_get_cur_state, | ||
1004 | .set_cur_state = tegra_gpu_set_cur_state, | ||
1005 | }; | ||
1006 | |||
1007 | static int gk20a_create_device( | ||
1008 | struct platform_device *pdev, int devno, const char *cdev_name, | ||
1009 | struct cdev *cdev, struct device **out, | ||
1010 | const struct file_operations *ops) | ||
1011 | { | ||
1012 | struct device *dev; | ||
1013 | int err; | ||
1014 | struct gk20a *g = get_gk20a(pdev); | ||
1015 | |||
1016 | gk20a_dbg_fn(""); | ||
1017 | |||
1018 | cdev_init(cdev, ops); | ||
1019 | cdev->owner = THIS_MODULE; | ||
1020 | |||
1021 | err = cdev_add(cdev, devno, 1); | ||
1022 | if (err) { | ||
1023 | dev_err(&pdev->dev, | ||
1024 | "failed to add %s cdev\n", cdev_name); | ||
1025 | return err; | ||
1026 | } | ||
1027 | |||
1028 | dev = device_create(g->class, NULL, devno, NULL, | ||
1029 | (pdev->id <= 0) ? INTERFACE_NAME : INTERFACE_NAME ".%d", | ||
1030 | cdev_name, pdev->id); | ||
1031 | |||
1032 | if (IS_ERR(dev)) { | ||
1033 | err = PTR_ERR(dev); | ||
1034 | cdev_del(cdev); | ||
1035 | dev_err(&pdev->dev, | ||
1036 | "failed to create %s device for %s\n", | ||
1037 | cdev_name, pdev->name); | ||
1038 | return err; | ||
1039 | } | ||
1040 | |||
1041 | *out = dev; | ||
1042 | return 0; | ||
1043 | } | ||
1044 | |||
1045 | static void gk20a_user_deinit(struct platform_device *dev) | ||
1046 | { | ||
1047 | struct gk20a *g = get_gk20a(dev); | ||
1048 | |||
1049 | if (g->channel.node) { | ||
1050 | device_destroy(g->class, g->channel.cdev.dev); | ||
1051 | cdev_del(&g->channel.cdev); | ||
1052 | } | ||
1053 | |||
1054 | if (g->as.node) { | ||
1055 | device_destroy(g->class, g->as.cdev.dev); | ||
1056 | cdev_del(&g->as.cdev); | ||
1057 | } | ||
1058 | |||
1059 | if (g->ctrl.node) { | ||
1060 | device_destroy(g->class, g->ctrl.cdev.dev); | ||
1061 | cdev_del(&g->ctrl.cdev); | ||
1062 | } | ||
1063 | |||
1064 | if (g->dbg.node) { | ||
1065 | device_destroy(g->class, g->dbg.cdev.dev); | ||
1066 | cdev_del(&g->dbg.cdev); | ||
1067 | } | ||
1068 | |||
1069 | if (g->prof.node) { | ||
1070 | device_destroy(g->class, g->prof.cdev.dev); | ||
1071 | cdev_del(&g->prof.cdev); | ||
1072 | } | ||
1073 | |||
1074 | if (g->cdev_region) | ||
1075 | unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS); | ||
1076 | |||
1077 | if (g->class) | ||
1078 | class_destroy(g->class); | ||
1079 | } | ||
1080 | |||
1081 | static int gk20a_user_init(struct platform_device *dev) | ||
1082 | { | ||
1083 | int err; | ||
1084 | dev_t devno; | ||
1085 | struct gk20a *g = get_gk20a(dev); | ||
1086 | |||
1087 | g->class = class_create(THIS_MODULE, CLASS_NAME); | ||
1088 | if (IS_ERR(g->class)) { | ||
1089 | err = PTR_ERR(g->class); | ||
1090 | g->class = NULL; | ||
1091 | dev_err(&dev->dev, | ||
1092 | "failed to create " CLASS_NAME " class\n"); | ||
1093 | goto fail; | ||
1094 | } | ||
1095 | |||
1096 | err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, CLASS_NAME); | ||
1097 | if (err) { | ||
1098 | dev_err(&dev->dev, "failed to allocate devno\n"); | ||
1099 | goto fail; | ||
1100 | } | ||
1101 | g->cdev_region = devno; | ||
1102 | |||
1103 | err = gk20a_create_device(dev, devno++, "", | ||
1104 | &g->channel.cdev, &g->channel.node, | ||
1105 | &gk20a_channel_ops); | ||
1106 | if (err) | ||
1107 | goto fail; | ||
1108 | |||
1109 | err = gk20a_create_device(dev, devno++, "-as", | ||
1110 | &g->as.cdev, &g->as.node, | ||
1111 | &gk20a_as_ops); | ||
1112 | if (err) | ||
1113 | goto fail; | ||
1114 | |||
1115 | err = gk20a_create_device(dev, devno++, "-ctrl", | ||
1116 | &g->ctrl.cdev, &g->ctrl.node, | ||
1117 | &gk20a_ctrl_ops); | ||
1118 | if (err) | ||
1119 | goto fail; | ||
1120 | |||
1121 | err = gk20a_create_device(dev, devno++, "-dbg", | ||
1122 | &g->dbg.cdev, &g->dbg.node, | ||
1123 | &gk20a_dbg_ops); | ||
1124 | if (err) | ||
1125 | goto fail; | ||
1126 | |||
1127 | err = gk20a_create_device(dev, devno++, "-prof", | ||
1128 | &g->prof.cdev, &g->prof.node, | ||
1129 | &gk20a_prof_ops); | ||
1130 | if (err) | ||
1131 | goto fail; | ||
1132 | |||
1133 | return 0; | ||
1134 | fail: | ||
1135 | gk20a_user_deinit(dev); | ||
1136 | return err; | ||
1137 | } | ||
1138 | |||
1139 | struct channel_gk20a *gk20a_get_channel_from_file(int fd) | ||
1140 | { | ||
1141 | struct channel_gk20a *ch; | ||
1142 | struct file *f = fget(fd); | ||
1143 | if (!f) | ||
1144 | return 0; | ||
1145 | |||
1146 | if (f->f_op != &gk20a_channel_ops) { | ||
1147 | fput(f); | ||
1148 | return 0; | ||
1149 | } | ||
1150 | |||
1151 | ch = (struct channel_gk20a *)f->private_data; | ||
1152 | fput(f); | ||
1153 | return ch; | ||
1154 | } | ||
1155 | |||
1156 | static int gk20a_pm_enable_clk(struct device *dev) | ||
1157 | { | ||
1158 | int index = 0; | ||
1159 | struct gk20a_platform *platform; | ||
1160 | |||
1161 | platform = dev_get_drvdata(dev); | ||
1162 | if (!platform) | ||
1163 | return -EINVAL; | ||
1164 | |||
1165 | for (index = 0; index < platform->num_clks; index++) { | ||
1166 | int err = clk_prepare_enable(platform->clk[index]); | ||
1167 | if (err) | ||
1168 | return -EINVAL; | ||
1169 | } | ||
1170 | |||
1171 | return 0; | ||
1172 | } | ||
1173 | |||
1174 | static int gk20a_pm_disable_clk(struct device *dev) | ||
1175 | { | ||
1176 | int index = 0; | ||
1177 | struct gk20a_platform *platform; | ||
1178 | |||
1179 | platform = dev_get_drvdata(dev); | ||
1180 | if (!platform) | ||
1181 | return -EINVAL; | ||
1182 | |||
1183 | for (index = 0; index < platform->num_clks; index++) | ||
1184 | clk_disable_unprepare(platform->clk[index]); | ||
1185 | |||
1186 | return 0; | ||
1187 | } | ||
1188 | |||
1189 | #ifdef CONFIG_PM | ||
1190 | const struct dev_pm_ops gk20a_pm_ops = { | ||
1191 | #if defined(CONFIG_PM_RUNTIME) && !defined(CONFIG_PM_GENERIC_DOMAINS) | ||
1192 | .runtime_resume = gk20a_pm_enable_clk, | ||
1193 | .runtime_suspend = gk20a_pm_disable_clk, | ||
1194 | #endif | ||
1195 | }; | ||
1196 | #endif | ||
1197 | |||
1198 | static int gk20a_pm_railgate(struct generic_pm_domain *domain) | ||
1199 | { | ||
1200 | struct gk20a *g = container_of(domain, struct gk20a, pd); | ||
1201 | struct gk20a_platform *platform = platform_get_drvdata(g->dev); | ||
1202 | int ret = 0; | ||
1203 | |||
1204 | if (platform->railgate) | ||
1205 | ret = platform->railgate(platform->g->dev); | ||
1206 | |||
1207 | return ret; | ||
1208 | } | ||
1209 | |||
1210 | static int gk20a_pm_unrailgate(struct generic_pm_domain *domain) | ||
1211 | { | ||
1212 | struct gk20a *g = container_of(domain, struct gk20a, pd); | ||
1213 | struct gk20a_platform *platform = platform_get_drvdata(g->dev); | ||
1214 | int ret = 0; | ||
1215 | |||
1216 | if (platform->unrailgate) | ||
1217 | ret = platform->unrailgate(platform->g->dev); | ||
1218 | |||
1219 | return ret; | ||
1220 | } | ||
1221 | |||
1222 | static int gk20a_pm_suspend(struct device *dev) | ||
1223 | { | ||
1224 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
1225 | int ret = 0; | ||
1226 | |||
1227 | if (atomic_read(&dev->power.usage_count) > 1) | ||
1228 | return -EBUSY; | ||
1229 | |||
1230 | ret = gk20a_pm_prepare_poweroff(dev); | ||
1231 | if (ret) | ||
1232 | return ret; | ||
1233 | |||
1234 | gk20a_scale_suspend(to_platform_device(dev)); | ||
1235 | |||
1236 | if (platform->suspend) | ||
1237 | platform->suspend(dev); | ||
1238 | |||
1239 | return 0; | ||
1240 | } | ||
1241 | |||
1242 | static int gk20a_pm_resume(struct device *dev) | ||
1243 | { | ||
1244 | int ret = 0; | ||
1245 | |||
1246 | ret = gk20a_pm_finalize_poweron(dev); | ||
1247 | if (ret) | ||
1248 | return ret; | ||
1249 | |||
1250 | gk20a_scale_resume(to_platform_device(dev)); | ||
1251 | |||
1252 | return 0; | ||
1253 | } | ||
1254 | |||
1255 | static int gk20a_pm_initialise_domain(struct platform_device *pdev) | ||
1256 | { | ||
1257 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | ||
1258 | struct dev_power_governor *pm_domain_gov = NULL; | ||
1259 | struct generic_pm_domain *domain = &platform->g->pd; | ||
1260 | int ret = 0; | ||
1261 | |||
1262 | domain->name = kstrdup(pdev->name, GFP_KERNEL); | ||
1263 | |||
1264 | if (!platform->can_railgate) | ||
1265 | pm_domain_gov = &pm_domain_always_on_gov; | ||
1266 | |||
1267 | pm_genpd_init(domain, pm_domain_gov, true); | ||
1268 | |||
1269 | domain->power_off = gk20a_pm_railgate; | ||
1270 | domain->power_on = gk20a_pm_unrailgate; | ||
1271 | domain->dev_ops.start = gk20a_pm_enable_clk; | ||
1272 | domain->dev_ops.stop = gk20a_pm_disable_clk; | ||
1273 | domain->dev_ops.save_state = gk20a_pm_prepare_poweroff; | ||
1274 | domain->dev_ops.restore_state = gk20a_pm_finalize_poweron; | ||
1275 | domain->dev_ops.suspend = gk20a_pm_suspend; | ||
1276 | domain->dev_ops.resume = gk20a_pm_resume; | ||
1277 | |||
1278 | device_set_wakeup_capable(&pdev->dev, 0); | ||
1279 | ret = pm_genpd_add_device(domain, &pdev->dev); | ||
1280 | |||
1281 | if (platform->railgate_delay) | ||
1282 | pm_genpd_set_poweroff_delay(domain, platform->railgate_delay); | ||
1283 | |||
1284 | return ret; | ||
1285 | } | ||
1286 | |||
1287 | static int gk20a_pm_init(struct platform_device *dev) | ||
1288 | { | ||
1289 | struct gk20a_platform *platform = platform_get_drvdata(dev); | ||
1290 | int err = 0; | ||
1291 | |||
1292 | /* Initialise pm runtime */ | ||
1293 | if (platform->clockgate_delay) { | ||
1294 | pm_runtime_set_autosuspend_delay(&dev->dev, | ||
1295 | platform->clockgate_delay); | ||
1296 | pm_runtime_use_autosuspend(&dev->dev); | ||
1297 | } | ||
1298 | |||
1299 | pm_runtime_enable(&dev->dev); | ||
1300 | if (!pm_runtime_enabled(&dev->dev)) | ||
1301 | gk20a_pm_enable_clk(&dev->dev); | ||
1302 | |||
1303 | /* Enable runtime railgating if possible. If not, | ||
1304 | * turn on the rail now. */ | ||
1305 | if (platform->can_railgate && IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) | ||
1306 | platform->railgate(dev); | ||
1307 | else | ||
1308 | platform->unrailgate(dev); | ||
1309 | |||
1310 | /* genpd will take care of runtime power management if it is enabled */ | ||
1311 | if (IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) | ||
1312 | err = gk20a_pm_initialise_domain(dev); | ||
1313 | |||
1314 | return err; | ||
1315 | } | ||
1316 | |||
1317 | static int gk20a_probe(struct platform_device *dev) | ||
1318 | { | ||
1319 | struct gk20a *gk20a; | ||
1320 | int err; | ||
1321 | struct gk20a_platform *platform = NULL; | ||
1322 | struct cooling_device_gk20a *gpu_cdev = NULL; | ||
1323 | |||
1324 | if (dev->dev.of_node) { | ||
1325 | const struct of_device_id *match; | ||
1326 | |||
1327 | match = of_match_device(tegra_gk20a_of_match, &dev->dev); | ||
1328 | if (match) | ||
1329 | platform = (struct gk20a_platform *)match->data; | ||
1330 | } else | ||
1331 | platform = (struct gk20a_platform *)dev->dev.platform_data; | ||
1332 | |||
1333 | if (!platform) { | ||
1334 | dev_err(&dev->dev, "no platform data\n"); | ||
1335 | return -ENODATA; | ||
1336 | } | ||
1337 | |||
1338 | gk20a_dbg_fn(""); | ||
1339 | |||
1340 | platform_set_drvdata(dev, platform); | ||
1341 | |||
1342 | gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL); | ||
1343 | if (!gk20a) { | ||
1344 | dev_err(&dev->dev, "couldn't allocate gk20a support"); | ||
1345 | return -ENOMEM; | ||
1346 | } | ||
1347 | |||
1348 | set_gk20a(dev, gk20a); | ||
1349 | gk20a->dev = dev; | ||
1350 | |||
1351 | err = gk20a_user_init(dev); | ||
1352 | if (err) | ||
1353 | return err; | ||
1354 | |||
1355 | gk20a_init_support(dev); | ||
1356 | |||
1357 | spin_lock_init(&gk20a->mc_enable_lock); | ||
1358 | |||
1359 | /* Initialize the platform interface. */ | ||
1360 | err = platform->probe(dev); | ||
1361 | if (err) { | ||
1362 | dev_err(&dev->dev, "platform probe failed"); | ||
1363 | return err; | ||
1364 | } | ||
1365 | |||
1366 | err = gk20a_pm_init(dev); | ||
1367 | if (err) { | ||
1368 | dev_err(&dev->dev, "pm init failed"); | ||
1369 | return err; | ||
1370 | } | ||
1371 | |||
1372 | /* Initialise scaling */ | ||
1373 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) | ||
1374 | gk20a_scale_init(dev); | ||
1375 | |||
1376 | if (platform->late_probe) { | ||
1377 | err = platform->late_probe(dev); | ||
1378 | if (err) { | ||
1379 | dev_err(&dev->dev, "late probe failed"); | ||
1380 | return err; | ||
1381 | } | ||
1382 | } | ||
1383 | |||
1384 | gk20a_debug_init(dev); | ||
1385 | |||
1386 | /* Set DMA parameters to allow larger sgt lists */ | ||
1387 | dev->dev.dma_parms = &gk20a->dma_parms; | ||
1388 | dma_set_max_seg_size(&dev->dev, UINT_MAX); | ||
1389 | |||
1390 | gpu_cdev = &gk20a->gk20a_cdev; | ||
1391 | gpu_cdev->gk20a_freq_table_size = tegra_gpufreq_table_size_get(); | ||
1392 | gpu_cdev->gk20a_freq_state = 0; | ||
1393 | gpu_cdev->g = gk20a; | ||
1394 | gpu_cdev->gk20a_cooling_dev = thermal_cooling_device_register("gk20a_cdev", gpu_cdev, | ||
1395 | &tegra_gpu_cooling_ops); | ||
1396 | |||
1397 | gk20a->gr_idle_timeout_default = | ||
1398 | CONFIG_GK20A_DEFAULT_TIMEOUT; | ||
1399 | gk20a->timeouts_enabled = true; | ||
1400 | |||
1401 | /* Set up initial clock gating settings */ | ||
1402 | if (tegra_platform_is_silicon()) { | ||
1403 | gk20a->slcg_enabled = true; | ||
1404 | gk20a->blcg_enabled = true; | ||
1405 | gk20a->elcg_enabled = true; | ||
1406 | gk20a->elpg_enabled = true; | ||
1407 | gk20a->aelpg_enabled = true; | ||
1408 | } | ||
1409 | |||
1410 | gk20a_create_sysfs(dev); | ||
1411 | |||
1412 | #ifdef CONFIG_DEBUG_FS | ||
1413 | clk_gk20a_debugfs_init(dev); | ||
1414 | |||
1415 | spin_lock_init(&gk20a->debugfs_lock); | ||
1416 | gk20a->mm.ltc_enabled = true; | ||
1417 | gk20a->mm.ltc_enabled_debug = true; | ||
1418 | gk20a->debugfs_ltc_enabled = | ||
1419 | debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR, | ||
1420 | platform->debugfs, | ||
1421 | &gk20a->mm.ltc_enabled_debug); | ||
1422 | gk20a->mm.ltc_enabled_debug = true; | ||
1423 | gk20a->debugfs_gr_idle_timeout_default = | ||
1424 | debugfs_create_u32("gr_idle_timeout_default_us", | ||
1425 | S_IRUGO|S_IWUSR, platform->debugfs, | ||
1426 | &gk20a->gr_idle_timeout_default); | ||
1427 | gk20a->debugfs_timeouts_enabled = | ||
1428 | debugfs_create_bool("timeouts_enabled", | ||
1429 | S_IRUGO|S_IWUSR, | ||
1430 | platform->debugfs, | ||
1431 | &gk20a->timeouts_enabled); | ||
1432 | gk20a_pmu_debugfs_init(dev); | ||
1433 | #endif | ||
1434 | |||
1435 | #ifdef CONFIG_INPUT_CFBOOST | ||
1436 | cfb_add_device(&dev->dev); | ||
1437 | #endif | ||
1438 | |||
1439 | return 0; | ||
1440 | } | ||
1441 | |||
1442 | static int __exit gk20a_remove(struct platform_device *dev) | ||
1443 | { | ||
1444 | struct gk20a *g = get_gk20a(dev); | ||
1445 | gk20a_dbg_fn(""); | ||
1446 | |||
1447 | #ifdef CONFIG_INPUT_CFBOOST | ||
1448 | cfb_remove_device(&dev->dev); | ||
1449 | #endif | ||
1450 | |||
1451 | if (g->remove_support) | ||
1452 | g->remove_support(dev); | ||
1453 | |||
1454 | gk20a_user_deinit(dev); | ||
1455 | |||
1456 | set_gk20a(dev, 0); | ||
1457 | #ifdef CONFIG_DEBUG_FS | ||
1458 | debugfs_remove(g->debugfs_ltc_enabled); | ||
1459 | debugfs_remove(g->debugfs_gr_idle_timeout_default); | ||
1460 | debugfs_remove(g->debugfs_timeouts_enabled); | ||
1461 | #endif | ||
1462 | |||
1463 | kfree(g); | ||
1464 | |||
1465 | #ifdef CONFIG_PM_RUNTIME | ||
1466 | pm_runtime_put(&dev->dev); | ||
1467 | pm_runtime_disable(&dev->dev); | ||
1468 | #else | ||
1469 | nvhost_module_disable_clk(&dev->dev); | ||
1470 | #endif | ||
1471 | |||
1472 | return 0; | ||
1473 | } | ||
1474 | |||
1475 | static struct platform_driver gk20a_driver = { | ||
1476 | .probe = gk20a_probe, | ||
1477 | .remove = __exit_p(gk20a_remove), | ||
1478 | .driver = { | ||
1479 | .owner = THIS_MODULE, | ||
1480 | .name = "gk20a", | ||
1481 | #ifdef CONFIG_OF | ||
1482 | .of_match_table = tegra_gk20a_of_match, | ||
1483 | #endif | ||
1484 | #ifdef CONFIG_PM | ||
1485 | .pm = &gk20a_pm_ops, | ||
1486 | #endif | ||
1487 | } | ||
1488 | }; | ||
1489 | |||
1490 | static int __init gk20a_init(void) | ||
1491 | { | ||
1492 | return platform_driver_register(&gk20a_driver); | ||
1493 | } | ||
1494 | |||
1495 | static void __exit gk20a_exit(void) | ||
1496 | { | ||
1497 | platform_driver_unregister(&gk20a_driver); | ||
1498 | } | ||
1499 | |||
1500 | bool is_gk20a_module(struct platform_device *dev) | ||
1501 | { | ||
1502 | return &gk20a_driver.driver == dev->dev.driver; | ||
1503 | } | ||
1504 | |||
1505 | void gk20a_busy_noresume(struct platform_device *pdev) | ||
1506 | { | ||
1507 | pm_runtime_get_noresume(&pdev->dev); | ||
1508 | } | ||
1509 | |||
1510 | int gk20a_channel_busy(struct platform_device *pdev) | ||
1511 | { | ||
1512 | int ret = 0; | ||
1513 | |||
1514 | ret = gk20a_platform_channel_busy(pdev); | ||
1515 | if (ret) | ||
1516 | return ret; | ||
1517 | |||
1518 | ret = gk20a_busy(pdev); | ||
1519 | if (ret) | ||
1520 | gk20a_platform_channel_idle(pdev); | ||
1521 | |||
1522 | return ret; | ||
1523 | } | ||
1524 | |||
1525 | void gk20a_channel_idle(struct platform_device *pdev) | ||
1526 | { | ||
1527 | gk20a_idle(pdev); | ||
1528 | gk20a_platform_channel_idle(pdev); | ||
1529 | } | ||
1530 | |||
1531 | int gk20a_busy(struct platform_device *pdev) | ||
1532 | { | ||
1533 | int ret = 0; | ||
1534 | |||
1535 | #ifdef CONFIG_PM_RUNTIME | ||
1536 | ret = pm_runtime_get_sync(&pdev->dev); | ||
1537 | #endif | ||
1538 | gk20a_scale_notify_busy(pdev); | ||
1539 | |||
1540 | return ret < 0 ? ret : 0; | ||
1541 | } | ||
1542 | |||
1543 | void gk20a_idle(struct platform_device *pdev) | ||
1544 | { | ||
1545 | #ifdef CONFIG_PM_RUNTIME | ||
1546 | if (atomic_read(&pdev->dev.power.usage_count) == 1) | ||
1547 | gk20a_scale_notify_idle(pdev); | ||
1548 | pm_runtime_mark_last_busy(&pdev->dev); | ||
1549 | pm_runtime_put_sync_autosuspend(&pdev->dev); | ||
1550 | #else | ||
1551 | gk20a_scale_notify_idle(pdev); | ||
1552 | #endif | ||
1553 | } | ||
1554 | |||
1555 | void gk20a_disable(struct gk20a *g, u32 units) | ||
1556 | { | ||
1557 | u32 pmc; | ||
1558 | |||
1559 | gk20a_dbg(gpu_dbg_info, "pmc disable: %08x\n", units); | ||
1560 | |||
1561 | spin_lock(&g->mc_enable_lock); | ||
1562 | pmc = gk20a_readl(g, mc_enable_r()); | ||
1563 | pmc &= ~units; | ||
1564 | gk20a_writel(g, mc_enable_r(), pmc); | ||
1565 | spin_unlock(&g->mc_enable_lock); | ||
1566 | } | ||
1567 | |||
1568 | void gk20a_enable(struct gk20a *g, u32 units) | ||
1569 | { | ||
1570 | u32 pmc; | ||
1571 | |||
1572 | gk20a_dbg(gpu_dbg_info, "pmc enable: %08x\n", units); | ||
1573 | |||
1574 | spin_lock(&g->mc_enable_lock); | ||
1575 | pmc = gk20a_readl(g, mc_enable_r()); | ||
1576 | pmc |= units; | ||
1577 | gk20a_writel(g, mc_enable_r(), pmc); | ||
1578 | spin_unlock(&g->mc_enable_lock); | ||
1579 | gk20a_readl(g, mc_enable_r()); | ||
1580 | |||
1581 | udelay(20); | ||
1582 | } | ||
1583 | |||
1584 | void gk20a_reset(struct gk20a *g, u32 units) | ||
1585 | { | ||
1586 | gk20a_disable(g, units); | ||
1587 | udelay(20); | ||
1588 | gk20a_enable(g, units); | ||
1589 | } | ||
1590 | |||
1591 | int gk20a_init_gpu_characteristics(struct gk20a *g) | ||
1592 | { | ||
1593 | struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics; | ||
1594 | |||
1595 | gpu->L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g); | ||
1596 | gpu->on_board_video_memory_size = 0; /* integrated GPU */ | ||
1597 | |||
1598 | gpu->num_gpc = g->gr.gpc_count; | ||
1599 | gpu->num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count; | ||
1600 | |||
1601 | gpu->bus_type = NVHOST_GPU_BUS_TYPE_AXI; /* always AXI for now */ | ||
1602 | |||
1603 | gpu->big_page_size = g->mm.big_page_size; | ||
1604 | gpu->compression_page_size = g->mm.compression_page_size; | ||
1605 | |||
1606 | return 0; | ||
1607 | } | ||
1608 | |||
1609 | int nvhost_vpr_info_fetch(void) | ||
1610 | { | ||
1611 | struct gk20a *g = get_gk20a(to_platform_device( | ||
1612 | bus_find_device_by_name(&platform_bus_type, | ||
1613 | NULL, "gk20a.0"))); | ||
1614 | |||
1615 | if (!g) { | ||
1616 | pr_info("gk20a ins't ready yet\n"); | ||
1617 | return 0; | ||
1618 | } | ||
1619 | |||
1620 | return gk20a_mm_mmu_vpr_info_fetch(g); | ||
1621 | } | ||
1622 | |||
1623 | static const struct firmware * | ||
1624 | do_request_firmware(struct device *dev, const char *prefix, const char *fw_name) | ||
1625 | { | ||
1626 | const struct firmware *fw; | ||
1627 | char *fw_path = NULL; | ||
1628 | int path_len, err; | ||
1629 | |||
1630 | if (prefix) { | ||
1631 | path_len = strlen(prefix) + strlen(fw_name); | ||
1632 | path_len += 2; /* for the path separator and zero terminator*/ | ||
1633 | |||
1634 | fw_path = kzalloc(sizeof(*fw_path) * path_len, GFP_KERNEL); | ||
1635 | if (!fw_path) | ||
1636 | return NULL; | ||
1637 | |||
1638 | sprintf(fw_path, "%s/%s", prefix, fw_name); | ||
1639 | fw_name = fw_path; | ||
1640 | } | ||
1641 | |||
1642 | err = request_firmware(&fw, fw_name, dev); | ||
1643 | kfree(fw_path); | ||
1644 | if (err) | ||
1645 | return NULL; | ||
1646 | return fw; | ||
1647 | } | ||
1648 | |||
1649 | /* This is a simple wrapper around request_firmware that takes 'fw_name' and | ||
1650 | * applies an IP specific relative path prefix to it. The caller is | ||
1651 | * responsible for calling release_firmware later. */ | ||
1652 | const struct firmware * | ||
1653 | gk20a_request_firmware(struct gk20a *g, const char *fw_name) | ||
1654 | { | ||
1655 | struct device *dev = &g->dev->dev; | ||
1656 | const struct firmware *fw; | ||
1657 | |||
1658 | /* current->fs is NULL when calling from SYS_EXIT. | ||
1659 | Add a check here to prevent crash in request_firmware */ | ||
1660 | if (!current->fs || !fw_name) | ||
1661 | return NULL; | ||
1662 | |||
1663 | BUG_ON(!g->ops.name); | ||
1664 | fw = do_request_firmware(dev, g->ops.name, fw_name); | ||
1665 | |||
1666 | #ifdef CONFIG_TEGRA_GK20A | ||
1667 | /* TO BE REMOVED - Support loading from legacy SOC specific path. */ | ||
1668 | if (!fw) | ||
1669 | fw = nvhost_client_request_firmware(g->dev, fw_name); | ||
1670 | #endif | ||
1671 | |||
1672 | if (!fw) { | ||
1673 | dev_err(dev, "failed to get firmware\n"); | ||
1674 | return NULL; | ||
1675 | } | ||
1676 | |||
1677 | return fw; | ||
1678 | } | ||
1679 | |||
1680 | module_init(gk20a_init); | ||
1681 | module_exit(gk20a_exit); | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h new file mode 100644 index 00000000..a9081a9d --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -0,0 +1,559 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/gk20a.h | ||
3 | * | ||
4 | * GK20A Graphics | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | #ifndef _NVHOST_GK20A_H_ | ||
22 | #define _NVHOST_GK20A_H_ | ||
23 | |||
24 | |||
25 | struct gk20a; | ||
26 | struct fifo_gk20a; | ||
27 | struct channel_gk20a; | ||
28 | struct gr_gk20a; | ||
29 | struct sim_gk20a; | ||
30 | |||
31 | #include <linux/sched.h> | ||
32 | #include <linux/spinlock.h> | ||
33 | #include <linux/nvhost_gpu_ioctl.h> | ||
34 | #include <linux/tegra-soc.h> | ||
35 | |||
36 | #include "../../../arch/arm/mach-tegra/iomap.h" | ||
37 | |||
38 | #include "as_gk20a.h" | ||
39 | #include "clk_gk20a.h" | ||
40 | #include "fifo_gk20a.h" | ||
41 | #include "gr_gk20a.h" | ||
42 | #include "sim_gk20a.h" | ||
43 | #include "pmu_gk20a.h" | ||
44 | #include "priv_ring_gk20a.h" | ||
45 | #include "therm_gk20a.h" | ||
46 | #include "platform_gk20a.h" | ||
47 | |||
48 | extern struct platform_device tegra_gk20a_device; | ||
49 | |||
50 | bool is_gk20a_module(struct platform_device *dev); | ||
51 | |||
52 | struct cooling_device_gk20a { | ||
53 | struct thermal_cooling_device *gk20a_cooling_dev; | ||
54 | unsigned int gk20a_freq_state; | ||
55 | unsigned int gk20a_freq_table_size; | ||
56 | struct gk20a *g; | ||
57 | }; | ||
58 | |||
59 | struct gpu_ops { | ||
60 | struct { | ||
61 | int (*determine_L2_size_bytes)(struct gk20a *gk20a); | ||
62 | void (*set_max_ways_evict_last)(struct gk20a *g, u32 max_ways); | ||
63 | int (*init_comptags)(struct gk20a *g, struct gr_gk20a *gr); | ||
64 | int (*clear_comptags)(struct gk20a *g, u32 min, u32 max); | ||
65 | void (*set_zbc_color_entry)(struct gk20a *g, | ||
66 | struct zbc_entry *color_val, | ||
67 | u32 index); | ||
68 | void (*set_zbc_depth_entry)(struct gk20a *g, | ||
69 | struct zbc_entry *depth_val, | ||
70 | u32 index); | ||
71 | void (*clear_zbc_color_entry)(struct gk20a *g, u32 index); | ||
72 | void (*clear_zbc_depth_entry)(struct gk20a *g, u32 index); | ||
73 | int (*init_zbc)(struct gk20a *g, struct gr_gk20a *gr); | ||
74 | void (*init_cbc)(struct gk20a *g, struct gr_gk20a *gr); | ||
75 | void (*sync_debugfs)(struct gk20a *g); | ||
76 | void (*elpg_flush)(struct gk20a *g); | ||
77 | } ltc; | ||
78 | struct { | ||
79 | int (*init_fs_state)(struct gk20a *g); | ||
80 | void (*access_smpc_reg)(struct gk20a *g, u32 quad, u32 offset); | ||
81 | void (*bundle_cb_defaults)(struct gk20a *g); | ||
82 | void (*cb_size_default)(struct gk20a *g); | ||
83 | int (*calc_global_ctx_buffer_size)(struct gk20a *g); | ||
84 | void (*commit_global_attrib_cb)(struct gk20a *g, | ||
85 | struct channel_ctx_gk20a *ch_ctx, | ||
86 | u64 addr, bool patch); | ||
87 | void (*commit_global_bundle_cb)(struct gk20a *g, | ||
88 | struct channel_ctx_gk20a *ch_ctx, | ||
89 | u64 addr, u64 size, bool patch); | ||
90 | int (*commit_global_cb_manager)(struct gk20a *g, | ||
91 | struct channel_gk20a *ch, | ||
92 | bool patch); | ||
93 | void (*commit_global_pagepool)(struct gk20a *g, | ||
94 | struct channel_ctx_gk20a *ch_ctx, | ||
95 | u64 addr, u32 size, bool patch); | ||
96 | void (*init_gpc_mmu)(struct gk20a *g); | ||
97 | int (*handle_sw_method)(struct gk20a *g, u32 addr, | ||
98 | u32 class_num, u32 offset, u32 data); | ||
99 | void (*set_alpha_circular_buffer_size)(struct gk20a *g, | ||
100 | u32 data); | ||
101 | void (*set_circular_buffer_size)(struct gk20a *g, u32 data); | ||
102 | void (*enable_hww_exceptions)(struct gk20a *g); | ||
103 | bool (*is_valid_class)(struct gk20a *g, u32 class_num); | ||
104 | void (*get_sm_dsm_perf_regs)(struct gk20a *g, | ||
105 | u32 *num_sm_dsm_perf_regs, | ||
106 | u32 **sm_dsm_perf_regs, | ||
107 | u32 *perf_register_stride); | ||
108 | void (*get_sm_dsm_perf_ctrl_regs)(struct gk20a *g, | ||
109 | u32 *num_sm_dsm_perf_regs, | ||
110 | u32 **sm_dsm_perf_regs, | ||
111 | u32 *perf_register_stride); | ||
112 | void (*set_hww_esr_report_mask)(struct gk20a *g); | ||
113 | int (*setup_alpha_beta_tables)(struct gk20a *g, | ||
114 | struct gr_gk20a *gr); | ||
115 | } gr; | ||
116 | const char *name; | ||
117 | struct { | ||
118 | void (*init_fs_state)(struct gk20a *g); | ||
119 | void (*reset)(struct gk20a *g); | ||
120 | void (*init_uncompressed_kind_map)(struct gk20a *g); | ||
121 | void (*init_kind_attr)(struct gk20a *g); | ||
122 | } fb; | ||
123 | struct { | ||
124 | void (*slcg_gr_load_gating_prod)(struct gk20a *g, bool prod); | ||
125 | void (*slcg_perf_load_gating_prod)(struct gk20a *g, bool prod); | ||
126 | void (*blcg_gr_load_gating_prod)(struct gk20a *g, bool prod); | ||
127 | void (*pg_gr_load_gating_prod)(struct gk20a *g, bool prod); | ||
128 | void (*slcg_therm_load_gating_prod)(struct gk20a *g, bool prod); | ||
129 | } clock_gating; | ||
130 | struct { | ||
131 | void (*bind_channel)(struct channel_gk20a *ch_gk20a); | ||
132 | } fifo; | ||
133 | struct pmu_v { | ||
134 | /*used for change of enum zbc update cmd id from ver 0 to ver1*/ | ||
135 | u32 cmd_id_zbc_table_update; | ||
136 | u32 (*get_pmu_cmdline_args_size)(struct pmu_gk20a *pmu); | ||
137 | void (*set_pmu_cmdline_args_cpu_freq)(struct pmu_gk20a *pmu, | ||
138 | u32 freq); | ||
139 | void * (*get_pmu_cmdline_args_ptr)(struct pmu_gk20a *pmu); | ||
140 | u32 (*get_pmu_allocation_struct_size)(struct pmu_gk20a *pmu); | ||
141 | void (*set_pmu_allocation_ptr)(struct pmu_gk20a *pmu, | ||
142 | void **pmu_alloc_ptr, void *assign_ptr); | ||
143 | void (*pmu_allocation_set_dmem_size)(struct pmu_gk20a *pmu, | ||
144 | void *pmu_alloc_ptr, u16 size); | ||
145 | u16 (*pmu_allocation_get_dmem_size)(struct pmu_gk20a *pmu, | ||
146 | void *pmu_alloc_ptr); | ||
147 | u32 (*pmu_allocation_get_dmem_offset)(struct pmu_gk20a *pmu, | ||
148 | void *pmu_alloc_ptr); | ||
149 | u32 * (*pmu_allocation_get_dmem_offset_addr)( | ||
150 | struct pmu_gk20a *pmu, void *pmu_alloc_ptr); | ||
151 | void (*pmu_allocation_set_dmem_offset)(struct pmu_gk20a *pmu, | ||
152 | void *pmu_alloc_ptr, u32 offset); | ||
153 | void (*get_pmu_init_msg_pmu_queue_params)( | ||
154 | struct pmu_queue *queue, u32 id, | ||
155 | void *pmu_init_msg); | ||
156 | void *(*get_pmu_msg_pmu_init_msg_ptr)( | ||
157 | struct pmu_init_msg *init); | ||
158 | u16 (*get_pmu_init_msg_pmu_sw_mg_off)( | ||
159 | union pmu_init_msg_pmu *init_msg); | ||
160 | u16 (*get_pmu_init_msg_pmu_sw_mg_size)( | ||
161 | union pmu_init_msg_pmu *init_msg); | ||
162 | u32 (*get_pmu_perfmon_cmd_start_size)(void); | ||
163 | int (*get_perfmon_cmd_start_offsetofvar)( | ||
164 | enum pmu_perfmon_cmd_start_fields field); | ||
165 | void (*perfmon_start_set_cmd_type)(struct pmu_perfmon_cmd *pc, | ||
166 | u8 value); | ||
167 | void (*perfmon_start_set_group_id)(struct pmu_perfmon_cmd *pc, | ||
168 | u8 value); | ||
169 | void (*perfmon_start_set_state_id)(struct pmu_perfmon_cmd *pc, | ||
170 | u8 value); | ||
171 | void (*perfmon_start_set_flags)(struct pmu_perfmon_cmd *pc, | ||
172 | u8 value); | ||
173 | u8 (*perfmon_start_get_flags)(struct pmu_perfmon_cmd *pc); | ||
174 | u32 (*get_pmu_perfmon_cmd_init_size)(void); | ||
175 | int (*get_perfmon_cmd_init_offsetofvar)( | ||
176 | enum pmu_perfmon_cmd_start_fields field); | ||
177 | void (*perfmon_cmd_init_set_sample_buffer)( | ||
178 | struct pmu_perfmon_cmd *pc, u16 value); | ||
179 | void (*perfmon_cmd_init_set_dec_cnt)( | ||
180 | struct pmu_perfmon_cmd *pc, u8 value); | ||
181 | void (*perfmon_cmd_init_set_base_cnt_id)( | ||
182 | struct pmu_perfmon_cmd *pc, u8 value); | ||
183 | void (*perfmon_cmd_init_set_samp_period_us)( | ||
184 | struct pmu_perfmon_cmd *pc, u32 value); | ||
185 | void (*perfmon_cmd_init_set_num_cnt)(struct pmu_perfmon_cmd *pc, | ||
186 | u8 value); | ||
187 | void (*perfmon_cmd_init_set_mov_avg)(struct pmu_perfmon_cmd *pc, | ||
188 | u8 value); | ||
189 | void *(*get_pmu_seq_in_a_ptr)( | ||
190 | struct pmu_sequence *seq); | ||
191 | void *(*get_pmu_seq_out_a_ptr)( | ||
192 | struct pmu_sequence *seq); | ||
193 | } pmu_ver; | ||
194 | }; | ||
195 | |||
196 | struct gk20a { | ||
197 | struct platform_device *dev; | ||
198 | |||
199 | struct resource *reg_mem; | ||
200 | void __iomem *regs; | ||
201 | |||
202 | struct resource *bar1_mem; | ||
203 | void __iomem *bar1; | ||
204 | |||
205 | bool power_on; | ||
206 | bool irq_requested; | ||
207 | |||
208 | struct clk_gk20a clk; | ||
209 | struct fifo_gk20a fifo; | ||
210 | struct gr_gk20a gr; | ||
211 | struct sim_gk20a sim; | ||
212 | struct mm_gk20a mm; | ||
213 | struct pmu_gk20a pmu; | ||
214 | struct cooling_device_gk20a gk20a_cdev; | ||
215 | |||
216 | /* Save pmu fw here so that it lives cross suspend/resume. | ||
217 | pmu suspend destroys all pmu sw/hw states. Loading pmu | ||
218 | fw in resume crashes when the resume is from sys_exit. */ | ||
219 | const struct firmware *pmu_fw; | ||
220 | |||
221 | u32 gr_idle_timeout_default; | ||
222 | u32 timeouts_enabled; | ||
223 | |||
224 | bool slcg_enabled; | ||
225 | bool blcg_enabled; | ||
226 | bool elcg_enabled; | ||
227 | bool elpg_enabled; | ||
228 | bool aelpg_enabled; | ||
229 | |||
230 | #ifdef CONFIG_DEBUG_FS | ||
231 | spinlock_t debugfs_lock; | ||
232 | struct dentry *debugfs_ltc_enabled; | ||
233 | struct dentry *debugfs_timeouts_enabled; | ||
234 | struct dentry *debugfs_gr_idle_timeout_default; | ||
235 | #endif | ||
236 | struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; | ||
237 | |||
238 | /* held while manipulating # of debug/profiler sessions present */ | ||
239 | /* also prevents debug sessions from attaching until released */ | ||
240 | struct mutex dbg_sessions_lock; | ||
241 | int dbg_sessions; /* number attached */ | ||
242 | int dbg_powergating_disabled_refcount; /*refcount for pg disable */ | ||
243 | |||
244 | void (*remove_support)(struct platform_device *); | ||
245 | |||
246 | u64 pg_ingating_time_us; | ||
247 | u64 pg_ungating_time_us; | ||
248 | u32 pg_gating_cnt; | ||
249 | |||
250 | spinlock_t mc_enable_lock; | ||
251 | |||
252 | struct nvhost_gpu_characteristics gpu_characteristics; | ||
253 | |||
254 | struct { | ||
255 | struct cdev cdev; | ||
256 | struct device *node; | ||
257 | } channel; | ||
258 | |||
259 | struct gk20a_as as; | ||
260 | |||
261 | struct { | ||
262 | struct cdev cdev; | ||
263 | struct device *node; | ||
264 | } ctrl; | ||
265 | |||
266 | struct { | ||
267 | struct cdev cdev; | ||
268 | struct device *node; | ||
269 | } dbg; | ||
270 | |||
271 | struct { | ||
272 | struct cdev cdev; | ||
273 | struct device *node; | ||
274 | } prof; | ||
275 | |||
276 | struct mutex client_lock; | ||
277 | int client_refcount; /* open channels and ctrl nodes */ | ||
278 | |||
279 | dev_t cdev_region; | ||
280 | struct class *class; | ||
281 | |||
282 | struct gpu_ops ops; | ||
283 | |||
284 | int irq_stall; | ||
285 | int irq_nonstall; | ||
286 | |||
287 | struct generic_pm_domain pd; | ||
288 | |||
289 | struct devfreq *devfreq; | ||
290 | |||
291 | struct gk20a_scale_profile *scale_profile; | ||
292 | |||
293 | struct device_dma_parameters dma_parms; | ||
294 | }; | ||
295 | |||
296 | static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) | ||
297 | { | ||
298 | return g->timeouts_enabled ? | ||
299 | g->gr_idle_timeout_default : MAX_SCHEDULE_TIMEOUT; | ||
300 | } | ||
301 | |||
302 | static inline struct gk20a *get_gk20a(struct platform_device *dev) | ||
303 | { | ||
304 | return gk20a_get_platform(dev)->g; | ||
305 | } | ||
306 | |||
307 | enum BAR0_DEBUG_OPERATION { | ||
308 | BARO_ZERO_NOP = 0, | ||
309 | OP_END = 'DONE', | ||
310 | BAR0_READ32 = '0R32', | ||
311 | BAR0_WRITE32 = '0W32', | ||
312 | }; | ||
313 | |||
314 | struct share_buffer_head { | ||
315 | enum BAR0_DEBUG_OPERATION operation; | ||
316 | /* size of the operation item */ | ||
317 | u32 size; | ||
318 | u32 completed; | ||
319 | u32 failed; | ||
320 | u64 context; | ||
321 | u64 completion_callback; | ||
322 | }; | ||
323 | |||
324 | struct gk20a_cyclestate_buffer_elem { | ||
325 | struct share_buffer_head head; | ||
326 | /* in */ | ||
327 | u64 p_data; | ||
328 | u64 p_done; | ||
329 | u32 offset_bar0; | ||
330 | u16 first_bit; | ||
331 | u16 last_bit; | ||
332 | /* out */ | ||
333 | /* keep 64 bits to be consistent */ | ||
334 | u64 data; | ||
335 | }; | ||
336 | |||
337 | /* debug accessories */ | ||
338 | |||
339 | #ifdef CONFIG_DEBUG_FS | ||
340 | /* debug info, default is compiled-in but effectively disabled (0 mask) */ | ||
341 | #define GK20A_DEBUG | ||
342 | /*e.g: echo 1 > /d/tegra_host/dbg_mask */ | ||
343 | #define GK20A_DEFAULT_DBG_MASK 0 | ||
344 | #else | ||
345 | /* manually enable and turn it on the mask */ | ||
346 | /*#define NVHOST_DEBUG*/ | ||
347 | #define GK20A_DEFAULT_DBG_MASK (dbg_info) | ||
348 | #endif | ||
349 | |||
350 | enum gk20a_dbg_categories { | ||
351 | gpu_dbg_info = BIT(0), /* lightly verbose info */ | ||
352 | gpu_dbg_fn = BIT(2), /* fn name tracing */ | ||
353 | gpu_dbg_reg = BIT(3), /* register accesses, very verbose */ | ||
354 | gpu_dbg_pte = BIT(4), /* gmmu ptes */ | ||
355 | gpu_dbg_intr = BIT(5), /* interrupts */ | ||
356 | gpu_dbg_pmu = BIT(6), /* gk20a pmu */ | ||
357 | gpu_dbg_clk = BIT(7), /* gk20a clk */ | ||
358 | gpu_dbg_map = BIT(8), /* mem mappings */ | ||
359 | gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ | ||
360 | gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ | ||
361 | }; | ||
362 | |||
363 | #if defined(GK20A_DEBUG) | ||
364 | extern u32 gk20a_dbg_mask; | ||
365 | extern u32 gk20a_dbg_ftrace; | ||
366 | #define gk20a_dbg(dbg_mask, format, arg...) \ | ||
367 | do { \ | ||
368 | if (unlikely((dbg_mask) & gk20a_dbg_mask)) { \ | ||
369 | if (gk20a_dbg_ftrace) \ | ||
370 | trace_printk(format "\n", ##arg); \ | ||
371 | else \ | ||
372 | pr_info("gk20a %s: " format "\n", \ | ||
373 | __func__, ##arg); \ | ||
374 | } \ | ||
375 | } while (0) | ||
376 | |||
377 | #else /* GK20A_DEBUG */ | ||
378 | #define gk20a_dbg(dbg_mask, format, arg...) \ | ||
379 | do { \ | ||
380 | if (0) \ | ||
381 | pr_info("gk20a %s: " format "\n", __func__, ##arg);\ | ||
382 | } while (0) | ||
383 | |||
384 | #endif | ||
385 | |||
386 | #define gk20a_err(d, fmt, arg...) \ | ||
387 | dev_err(d, "%s: " fmt "\n", __func__, ##arg) | ||
388 | |||
389 | #define gk20a_warn(d, fmt, arg...) \ | ||
390 | dev_warn(d, "%s: " fmt "\n", __func__, ##arg) | ||
391 | |||
392 | #define gk20a_dbg_fn(fmt, arg...) \ | ||
393 | gk20a_dbg(gpu_dbg_fn, fmt, ##arg) | ||
394 | |||
395 | #define gk20a_dbg_info(fmt, arg...) \ | ||
396 | gk20a_dbg(gpu_dbg_info, fmt, ##arg) | ||
397 | |||
398 | /* mem access with dbg_mem logging */ | ||
399 | static inline u8 gk20a_mem_rd08(void *ptr, int b) | ||
400 | { | ||
401 | u8 _b = ((const u8 *)ptr)[b]; | ||
402 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
403 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, _b); | ||
404 | #endif | ||
405 | return _b; | ||
406 | } | ||
407 | static inline u16 gk20a_mem_rd16(void *ptr, int s) | ||
408 | { | ||
409 | u16 _s = ((const u16 *)ptr)[s]; | ||
410 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
411 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, _s); | ||
412 | #endif | ||
413 | return _s; | ||
414 | } | ||
415 | static inline u32 gk20a_mem_rd32(void *ptr, int w) | ||
416 | { | ||
417 | u32 _w = ((const u32 *)ptr)[w]; | ||
418 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
419 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + sizeof(u32)*w, _w); | ||
420 | #endif | ||
421 | return _w; | ||
422 | } | ||
423 | static inline void gk20a_mem_wr08(void *ptr, int b, u8 data) | ||
424 | { | ||
425 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
426 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, data); | ||
427 | #endif | ||
428 | ((u8 *)ptr)[b] = data; | ||
429 | } | ||
430 | static inline void gk20a_mem_wr16(void *ptr, int s, u16 data) | ||
431 | { | ||
432 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
433 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, data); | ||
434 | #endif | ||
435 | ((u16 *)ptr)[s] = data; | ||
436 | } | ||
437 | static inline void gk20a_mem_wr32(void *ptr, int w, u32 data) | ||
438 | { | ||
439 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
440 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u32)*w, data); | ||
441 | #endif | ||
442 | ((u32 *)ptr)[w] = data; | ||
443 | } | ||
444 | |||
445 | /* register accessors */ | ||
446 | static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v) | ||
447 | { | ||
448 | gk20a_dbg(gpu_dbg_reg, " r=0x%x v=0x%x", r, v); | ||
449 | writel(v, g->regs + r); | ||
450 | } | ||
451 | static inline u32 gk20a_readl(struct gk20a *g, u32 r) | ||
452 | { | ||
453 | u32 v = readl(g->regs + r); | ||
454 | gk20a_dbg(gpu_dbg_reg, " r=0x%x v=0x%x", r, v); | ||
455 | return v; | ||
456 | } | ||
457 | |||
458 | static inline void gk20a_bar1_writel(struct gk20a *g, u32 b, u32 v) | ||
459 | { | ||
460 | gk20a_dbg(gpu_dbg_reg, " b=0x%x v=0x%x", b, v); | ||
461 | writel(v, g->bar1 + b); | ||
462 | } | ||
463 | |||
464 | static inline u32 gk20a_bar1_readl(struct gk20a *g, u32 b) | ||
465 | { | ||
466 | u32 v = readl(g->bar1 + b); | ||
467 | gk20a_dbg(gpu_dbg_reg, " b=0x%x v=0x%x", b, v); | ||
468 | return v; | ||
469 | } | ||
470 | |||
471 | /* convenience */ | ||
472 | static inline struct device *dev_from_gk20a(struct gk20a *g) | ||
473 | { | ||
474 | return &g->dev->dev; | ||
475 | } | ||
476 | static inline struct gk20a *gk20a_from_as(struct gk20a_as *as) | ||
477 | { | ||
478 | return container_of(as, struct gk20a, as); | ||
479 | } | ||
480 | static inline u32 u64_hi32(u64 n) | ||
481 | { | ||
482 | return (u32)((n >> 32) & ~(u32)0); | ||
483 | } | ||
484 | |||
485 | static inline u32 u64_lo32(u64 n) | ||
486 | { | ||
487 | return (u32)(n & ~(u32)0); | ||
488 | } | ||
489 | |||
490 | static inline u32 set_field(u32 val, u32 mask, u32 field) | ||
491 | { | ||
492 | return ((val & ~mask) | field); | ||
493 | } | ||
494 | |||
495 | /* invalidate channel lookup tlb */ | ||
496 | static inline void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr) | ||
497 | { | ||
498 | spin_lock(&gr->ch_tlb_lock); | ||
499 | memset(gr->chid_tlb, 0, | ||
500 | sizeof(struct gr_channel_map_tlb_entry) * | ||
501 | GR_CHANNEL_MAP_TLB_SIZE); | ||
502 | spin_unlock(&gr->ch_tlb_lock); | ||
503 | } | ||
504 | |||
505 | /* classes that the device supports */ | ||
506 | /* TBD: get these from an open-sourced SDK? */ | ||
507 | enum { | ||
508 | KEPLER_C = 0xA297, | ||
509 | FERMI_TWOD_A = 0x902D, | ||
510 | KEPLER_COMPUTE_A = 0xA0C0, | ||
511 | KEPLER_INLINE_TO_MEMORY_A = 0xA040, | ||
512 | KEPLER_DMA_COPY_A = 0xA0B5, /*not sure about this one*/ | ||
513 | }; | ||
514 | |||
515 | #if defined(CONFIG_GK20A_PMU) | ||
516 | static inline int support_gk20a_pmu(void) | ||
517 | { | ||
518 | return 1; | ||
519 | } | ||
520 | #else | ||
521 | static inline int support_gk20a_pmu(void){return 0;} | ||
522 | #endif | ||
523 | |||
524 | void gk20a_create_sysfs(struct platform_device *dev); | ||
525 | |||
526 | #ifdef CONFIG_DEBUG_FS | ||
527 | int clk_gk20a_debugfs_init(struct platform_device *dev); | ||
528 | #endif | ||
529 | |||
530 | #define GK20A_BAR0_IORESOURCE_MEM 0 | ||
531 | #define GK20A_BAR1_IORESOURCE_MEM 1 | ||
532 | #define GK20A_SIM_IORESOURCE_MEM 2 | ||
533 | |||
534 | void gk20a_busy_noresume(struct platform_device *pdev); | ||
535 | int gk20a_busy(struct platform_device *pdev); | ||
536 | void gk20a_idle(struct platform_device *pdev); | ||
537 | int gk20a_channel_busy(struct platform_device *pdev); | ||
538 | void gk20a_channel_idle(struct platform_device *pdev); | ||
539 | void gk20a_disable(struct gk20a *g, u32 units); | ||
540 | void gk20a_enable(struct gk20a *g, u32 units); | ||
541 | void gk20a_reset(struct gk20a *g, u32 units); | ||
542 | int gk20a_get_client(struct gk20a *g); | ||
543 | void gk20a_put_client(struct gk20a *g); | ||
544 | |||
545 | const struct firmware * | ||
546 | gk20a_request_firmware(struct gk20a *g, const char *fw_name); | ||
547 | |||
548 | #define NVHOST_GPU_ARCHITECTURE_SHIFT 4 | ||
549 | |||
550 | /* constructs unique and compact GPUID from nvhost_gpu_characteristics | ||
551 | * arch/impl fields */ | ||
552 | #define GK20A_GPUID(arch, impl) ((u32) ((arch) | (impl))) | ||
553 | |||
554 | #define GK20A_GPUID_GK20A \ | ||
555 | GK20A_GPUID(NVHOST_GPU_ARCH_GK100, NVHOST_GPU_IMPL_GK20A) | ||
556 | |||
557 | int gk20a_init_gpu_characteristics(struct gk20a *g); | ||
558 | |||
559 | #endif /* _NVHOST_GK20A_H_ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c new file mode 100644 index 00000000..32c003b6 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c | |||
@@ -0,0 +1,1247 @@ | |||
1 | /* | ||
2 | * gk20a allocator | ||
3 | * | ||
4 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include "gk20a_allocator.h" | ||
20 | |||
21 | static inline void link_block_list(struct gk20a_allocator *allocator, | ||
22 | struct gk20a_alloc_block *block, | ||
23 | struct gk20a_alloc_block *prev, | ||
24 | struct rb_node *rb_parent); | ||
25 | static inline void link_block_rb(struct gk20a_allocator *allocator, | ||
26 | struct gk20a_alloc_block *block, | ||
27 | struct rb_node **rb_link, | ||
28 | struct rb_node *rb_parent); | ||
29 | static void link_block(struct gk20a_allocator *allocator, | ||
30 | struct gk20a_alloc_block *block, | ||
31 | struct gk20a_alloc_block *prev, struct rb_node **rb_link, | ||
32 | struct rb_node *rb_parent); | ||
33 | static void insert_block(struct gk20a_allocator *allocator, | ||
34 | struct gk20a_alloc_block *block); | ||
35 | |||
36 | static void unlink_block(struct gk20a_allocator *allocator, | ||
37 | struct gk20a_alloc_block *block, | ||
38 | struct gk20a_alloc_block *prev); | ||
39 | static struct gk20a_alloc_block *unlink_blocks( | ||
40 | struct gk20a_allocator *allocator, | ||
41 | struct gk20a_alloc_block *block, | ||
42 | struct gk20a_alloc_block *prev, u32 end); | ||
43 | |||
44 | static struct gk20a_alloc_block *find_block( | ||
45 | struct gk20a_allocator *allocator, u32 addr); | ||
46 | static struct gk20a_alloc_block *find_block_prev( | ||
47 | struct gk20a_allocator *allocator, u32 addr, | ||
48 | struct gk20a_alloc_block **pprev); | ||
49 | static struct gk20a_alloc_block *find_block_prepare( | ||
50 | struct gk20a_allocator *allocator, u32 addr, | ||
51 | struct gk20a_alloc_block **pprev, struct rb_node ***rb_link, | ||
52 | struct rb_node **rb_parent); | ||
53 | |||
54 | static u32 check_free_space(u32 addr, u32 limit, u32 len, u32 align); | ||
55 | static void update_free_addr_cache(struct gk20a_allocator *allocator, | ||
56 | struct gk20a_alloc_block *block, | ||
57 | u32 addr, u32 len, bool free); | ||
58 | static int find_free_area(struct gk20a_allocator *allocator, | ||
59 | u32 *addr, u32 len); | ||
60 | static int find_free_area_nc(struct gk20a_allocator *allocator, | ||
61 | u32 *addr, u32 *len); | ||
62 | |||
63 | static void adjust_block(struct gk20a_alloc_block *block, | ||
64 | u32 start, u32 end, | ||
65 | struct gk20a_alloc_block *insert); | ||
66 | static struct gk20a_alloc_block *merge_block( | ||
67 | struct gk20a_allocator *allocator, | ||
68 | struct gk20a_alloc_block *block, u32 addr, u32 end); | ||
69 | static int split_block(struct gk20a_allocator *allocator, | ||
70 | struct gk20a_alloc_block *block, | ||
71 | u32 addr, int new_below); | ||
72 | |||
73 | static int block_alloc_single_locked(struct gk20a_allocator *allocator, | ||
74 | u32 *addr, u32 len); | ||
75 | static int block_alloc_list_locked(struct gk20a_allocator *allocator, | ||
76 | u32 *addr, u32 len, | ||
77 | struct gk20a_alloc_block **pblock); | ||
78 | static int block_free_locked(struct gk20a_allocator *allocator, | ||
79 | u32 addr, u32 len); | ||
80 | static void block_free_list_locked(struct gk20a_allocator *allocator, | ||
81 | struct gk20a_alloc_block *list); | ||
82 | |||
83 | /* link a block into allocator block list */ | ||
84 | static inline void link_block_list(struct gk20a_allocator *allocator, | ||
85 | struct gk20a_alloc_block *block, | ||
86 | struct gk20a_alloc_block *prev, | ||
87 | struct rb_node *rb_parent) | ||
88 | { | ||
89 | struct gk20a_alloc_block *next; | ||
90 | |||
91 | block->prev = prev; | ||
92 | if (prev) { | ||
93 | next = prev->next; | ||
94 | prev->next = block; | ||
95 | } else { | ||
96 | allocator->block_first = block; | ||
97 | if (rb_parent) | ||
98 | next = rb_entry(rb_parent, | ||
99 | struct gk20a_alloc_block, rb); | ||
100 | else | ||
101 | next = NULL; | ||
102 | } | ||
103 | block->next = next; | ||
104 | if (next) | ||
105 | next->prev = block; | ||
106 | } | ||
107 | |||
108 | /* link a block into allocator rb tree */ | ||
109 | static inline void link_block_rb(struct gk20a_allocator *allocator, | ||
110 | struct gk20a_alloc_block *block, struct rb_node **rb_link, | ||
111 | struct rb_node *rb_parent) | ||
112 | { | ||
113 | rb_link_node(&block->rb, rb_parent, rb_link); | ||
114 | rb_insert_color(&block->rb, &allocator->rb_root); | ||
115 | } | ||
116 | |||
117 | /* add a block to allocator with known location */ | ||
118 | static void link_block(struct gk20a_allocator *allocator, | ||
119 | struct gk20a_alloc_block *block, | ||
120 | struct gk20a_alloc_block *prev, struct rb_node **rb_link, | ||
121 | struct rb_node *rb_parent) | ||
122 | { | ||
123 | struct gk20a_alloc_block *next; | ||
124 | |||
125 | link_block_list(allocator, block, prev, rb_parent); | ||
126 | link_block_rb(allocator, block, rb_link, rb_parent); | ||
127 | allocator->block_count++; | ||
128 | |||
129 | next = block->next; | ||
130 | allocator_dbg(allocator, "link new block %d:%d between block %d:%d and block %d:%d", | ||
131 | block->start, block->end, | ||
132 | prev ? prev->start : -1, prev ? prev->end : -1, | ||
133 | next ? next->start : -1, next ? next->end : -1); | ||
134 | } | ||
135 | |||
136 | /* add a block to allocator */ | ||
137 | static void insert_block(struct gk20a_allocator *allocator, | ||
138 | struct gk20a_alloc_block *block) | ||
139 | { | ||
140 | struct gk20a_alloc_block *prev; | ||
141 | struct rb_node **rb_link, *rb_parent; | ||
142 | |||
143 | find_block_prepare(allocator, block->start, | ||
144 | &prev, &rb_link, &rb_parent); | ||
145 | link_block(allocator, block, prev, rb_link, rb_parent); | ||
146 | } | ||
147 | |||
148 | /* remove a block from allocator */ | ||
149 | static void unlink_block(struct gk20a_allocator *allocator, | ||
150 | struct gk20a_alloc_block *block, | ||
151 | struct gk20a_alloc_block *prev) | ||
152 | { | ||
153 | struct gk20a_alloc_block *next = block->next; | ||
154 | |||
155 | allocator_dbg(allocator, "unlink block %d:%d between block %d:%d and block %d:%d", | ||
156 | block->start, block->end, | ||
157 | prev ? prev->start : -1, prev ? prev->end : -1, | ||
158 | next ? next->start : -1, next ? next->end : -1); | ||
159 | |||
160 | BUG_ON(block->start < allocator->base); | ||
161 | BUG_ON(block->end > allocator->limit); | ||
162 | |||
163 | if (prev) | ||
164 | prev->next = next; | ||
165 | else | ||
166 | allocator->block_first = next; | ||
167 | |||
168 | if (next) | ||
169 | next->prev = prev; | ||
170 | rb_erase(&block->rb, &allocator->rb_root); | ||
171 | if (allocator->block_recent == block) | ||
172 | allocator->block_recent = prev; | ||
173 | |||
174 | allocator->block_count--; | ||
175 | } | ||
176 | |||
177 | /* remove a list of blocks from allocator. the list can contain both | ||
178 | regular blocks and non-contiguous blocks. skip all non-contiguous | ||
179 | blocks, remove regular blocks into a separate list, return list head */ | ||
180 | static struct gk20a_alloc_block * | ||
181 | unlink_blocks(struct gk20a_allocator *allocator, | ||
182 | struct gk20a_alloc_block *block, | ||
183 | struct gk20a_alloc_block *prev, | ||
184 | u32 end) | ||
185 | { | ||
186 | struct gk20a_alloc_block **insertion_point; | ||
187 | struct gk20a_alloc_block *last_unfreed_block = prev; | ||
188 | struct gk20a_alloc_block *last_freed_block = NULL; | ||
189 | struct gk20a_alloc_block *first_freed_block = NULL; | ||
190 | |||
191 | insertion_point = (prev ? &prev->next : &allocator->block_first); | ||
192 | *insertion_point = NULL; | ||
193 | |||
194 | do { | ||
195 | if (!block->nc_block) { | ||
196 | allocator_dbg(allocator, "unlink block %d:%d", | ||
197 | block->start, block->end); | ||
198 | if (last_freed_block) | ||
199 | last_freed_block->next = block; | ||
200 | block->prev = last_freed_block; | ||
201 | rb_erase(&block->rb, &allocator->rb_root); | ||
202 | last_freed_block = block; | ||
203 | allocator->block_count--; | ||
204 | if (!first_freed_block) | ||
205 | first_freed_block = block; | ||
206 | } else { | ||
207 | allocator_dbg(allocator, "skip nc block %d:%d", | ||
208 | block->start, block->end); | ||
209 | if (!*insertion_point) | ||
210 | *insertion_point = block; | ||
211 | if (last_unfreed_block) | ||
212 | last_unfreed_block->next = block; | ||
213 | block->prev = last_unfreed_block; | ||
214 | last_unfreed_block = block; | ||
215 | } | ||
216 | block = block->next; | ||
217 | } while (block && block->start < end); | ||
218 | |||
219 | if (!*insertion_point) | ||
220 | *insertion_point = block; | ||
221 | |||
222 | if (block) | ||
223 | block->prev = last_unfreed_block; | ||
224 | if (last_unfreed_block) | ||
225 | last_unfreed_block->next = block; | ||
226 | if (last_freed_block) | ||
227 | last_freed_block->next = NULL; | ||
228 | |||
229 | allocator->block_recent = NULL; | ||
230 | |||
231 | return first_freed_block; | ||
232 | } | ||
233 | |||
234 | /* Look up the first block which satisfies addr < block->end, | ||
235 | NULL if none */ | ||
236 | static struct gk20a_alloc_block * | ||
237 | find_block(struct gk20a_allocator *allocator, u32 addr) | ||
238 | { | ||
239 | struct gk20a_alloc_block *block = allocator->block_recent; | ||
240 | |||
241 | if (!(block && block->end > addr && block->start <= addr)) { | ||
242 | struct rb_node *rb_node; | ||
243 | |||
244 | rb_node = allocator->rb_root.rb_node; | ||
245 | block = NULL; | ||
246 | |||
247 | while (rb_node) { | ||
248 | struct gk20a_alloc_block *block_tmp; | ||
249 | |||
250 | block_tmp = rb_entry(rb_node, | ||
251 | struct gk20a_alloc_block, rb); | ||
252 | |||
253 | if (block_tmp->end > addr) { | ||
254 | block = block_tmp; | ||
255 | if (block_tmp->start <= addr) | ||
256 | break; | ||
257 | rb_node = rb_node->rb_left; | ||
258 | } else | ||
259 | rb_node = rb_node->rb_right; | ||
260 | if (block) | ||
261 | allocator->block_recent = block; | ||
262 | } | ||
263 | } | ||
264 | return block; | ||
265 | } | ||
266 | |||
267 | /* Same as find_block, but also return a pointer to the previous block */ | ||
268 | static struct gk20a_alloc_block * | ||
269 | find_block_prev(struct gk20a_allocator *allocator, u32 addr, | ||
270 | struct gk20a_alloc_block **pprev) | ||
271 | { | ||
272 | struct gk20a_alloc_block *block = NULL, *prev = NULL; | ||
273 | struct rb_node *rb_node; | ||
274 | if (!allocator) | ||
275 | goto out; | ||
276 | |||
277 | block = allocator->block_first; | ||
278 | |||
279 | rb_node = allocator->rb_root.rb_node; | ||
280 | |||
281 | while (rb_node) { | ||
282 | struct gk20a_alloc_block *block_tmp; | ||
283 | block_tmp = rb_entry(rb_node, struct gk20a_alloc_block, rb); | ||
284 | |||
285 | if (addr < block_tmp->end) | ||
286 | rb_node = rb_node->rb_left; | ||
287 | else { | ||
288 | prev = block_tmp; | ||
289 | if (!prev->next || addr < prev->next->end) | ||
290 | break; | ||
291 | rb_node = rb_node->rb_right; | ||
292 | } | ||
293 | } | ||
294 | |||
295 | out: | ||
296 | *pprev = prev; | ||
297 | return prev ? prev->next : block; | ||
298 | } | ||
299 | |||
300 | /* Same as find_block, but also return a pointer to the previous block | ||
301 | and return rb_node to prepare for rbtree insertion */ | ||
302 | static struct gk20a_alloc_block * | ||
303 | find_block_prepare(struct gk20a_allocator *allocator, u32 addr, | ||
304 | struct gk20a_alloc_block **pprev, struct rb_node ***rb_link, | ||
305 | struct rb_node **rb_parent) | ||
306 | { | ||
307 | struct gk20a_alloc_block *block; | ||
308 | struct rb_node **__rb_link, *__rb_parent, *rb_prev; | ||
309 | |||
310 | __rb_link = &allocator->rb_root.rb_node; | ||
311 | rb_prev = __rb_parent = NULL; | ||
312 | block = NULL; | ||
313 | |||
314 | while (*__rb_link) { | ||
315 | struct gk20a_alloc_block *block_tmp; | ||
316 | |||
317 | __rb_parent = *__rb_link; | ||
318 | block_tmp = rb_entry(__rb_parent, | ||
319 | struct gk20a_alloc_block, rb); | ||
320 | |||
321 | if (block_tmp->end > addr) { | ||
322 | block = block_tmp; | ||
323 | if (block_tmp->start <= addr) | ||
324 | break; | ||
325 | __rb_link = &__rb_parent->rb_left; | ||
326 | } else { | ||
327 | rb_prev = __rb_parent; | ||
328 | __rb_link = &__rb_parent->rb_right; | ||
329 | } | ||
330 | } | ||
331 | |||
332 | *pprev = NULL; | ||
333 | if (rb_prev) | ||
334 | *pprev = rb_entry(rb_prev, struct gk20a_alloc_block, rb); | ||
335 | *rb_link = __rb_link; | ||
336 | *rb_parent = __rb_parent; | ||
337 | return block; | ||
338 | } | ||
339 | |||
340 | /* return available space */ | ||
341 | static u32 check_free_space(u32 addr, u32 limit, u32 len, u32 align) | ||
342 | { | ||
343 | if (addr >= limit) | ||
344 | return 0; | ||
345 | if (addr + len <= limit) | ||
346 | return len; | ||
347 | return (limit - addr) & ~(align - 1); | ||
348 | } | ||
349 | |||
350 | /* update first_free_addr/last_free_addr based on new free addr | ||
351 | called when free block(s) and allocate block(s) */ | ||
352 | static void update_free_addr_cache(struct gk20a_allocator *allocator, | ||
353 | struct gk20a_alloc_block *next, | ||
354 | u32 addr, u32 len, bool free) | ||
355 | { | ||
356 | /* update from block free */ | ||
357 | if (free) { | ||
358 | if (allocator->first_free_addr > addr) | ||
359 | allocator->first_free_addr = addr; | ||
360 | } else { /* update from block alloc */ | ||
361 | if (allocator->last_free_addr < addr + len) | ||
362 | allocator->last_free_addr = addr + len; | ||
363 | if (allocator->first_free_addr == addr) { | ||
364 | if (!next || next->start > addr + len) | ||
365 | allocator->first_free_addr = addr + len; | ||
366 | else | ||
367 | allocator->first_free_addr = next->end; | ||
368 | } | ||
369 | } | ||
370 | |||
371 | if (allocator->first_free_addr > allocator->last_free_addr) | ||
372 | allocator->first_free_addr = allocator->last_free_addr; | ||
373 | } | ||
374 | |||
375 | /* find a free address range for a fixed len */ | ||
376 | static int find_free_area(struct gk20a_allocator *allocator, | ||
377 | u32 *addr, u32 len) | ||
378 | { | ||
379 | struct gk20a_alloc_block *block; | ||
380 | u32 start_addr, search_base, search_limit; | ||
381 | |||
382 | /* fixed addr allocation */ | ||
383 | /* note: constraints for fixed are handled by caller */ | ||
384 | if (*addr) { | ||
385 | block = find_block(allocator, *addr); | ||
386 | if (allocator->limit - len >= *addr && | ||
387 | (!block || *addr + len <= block->start)) { | ||
388 | update_free_addr_cache(allocator, block, | ||
389 | *addr, len, false); | ||
390 | return 0; | ||
391 | } else | ||
392 | return -ENOMEM; | ||
393 | } | ||
394 | |||
395 | if (!allocator->constraint.enable) { | ||
396 | search_base = allocator->base; | ||
397 | search_limit = allocator->limit; | ||
398 | } else { | ||
399 | start_addr = *addr = allocator->constraint.base; | ||
400 | search_base = allocator->constraint.base; | ||
401 | search_limit = allocator->constraint.limit; | ||
402 | } | ||
403 | |||
404 | /* cached_hole_size has max free space up to last_free_addr */ | ||
405 | if (len > allocator->cached_hole_size) | ||
406 | start_addr = *addr = allocator->last_free_addr; | ||
407 | else { | ||
408 | start_addr = *addr = allocator->base; | ||
409 | allocator->cached_hole_size = 0; | ||
410 | } | ||
411 | |||
412 | allocator_dbg(allocator, "start search addr : %d", start_addr); | ||
413 | |||
414 | full_search: | ||
415 | for (block = find_block(allocator, *addr);; block = block->next) { | ||
416 | if (search_limit - len < *addr) { | ||
417 | /* start a new search in case we missed any hole */ | ||
418 | if (start_addr != search_base) { | ||
419 | start_addr = *addr = search_base; | ||
420 | allocator->cached_hole_size = 0; | ||
421 | allocator_dbg(allocator, "start a new search from base"); | ||
422 | goto full_search; | ||
423 | } | ||
424 | return -ENOMEM; | ||
425 | } | ||
426 | if (!block || *addr + len <= block->start) { | ||
427 | update_free_addr_cache(allocator, block, | ||
428 | *addr, len, false); | ||
429 | allocator_dbg(allocator, "free space from %d, len %d", | ||
430 | *addr, len); | ||
431 | allocator_dbg(allocator, "next free addr: %d", | ||
432 | allocator->last_free_addr); | ||
433 | return 0; | ||
434 | } | ||
435 | if (*addr + allocator->cached_hole_size < block->start) | ||
436 | allocator->cached_hole_size = block->start - *addr; | ||
437 | *addr = block->end; | ||
438 | } | ||
439 | } | ||
440 | |||
441 | /* find a free address range for as long as it meets alignment or meet len */ | ||
442 | static int find_free_area_nc(struct gk20a_allocator *allocator, | ||
443 | u32 *addr, u32 *len) | ||
444 | { | ||
445 | struct gk20a_alloc_block *block; | ||
446 | u32 start_addr; | ||
447 | u32 avail_len; | ||
448 | |||
449 | /* fixed addr allocation */ | ||
450 | if (*addr) { | ||
451 | block = find_block(allocator, *addr); | ||
452 | if (allocator->limit - *len >= *addr) { | ||
453 | if (!block) | ||
454 | return 0; | ||
455 | |||
456 | avail_len = check_free_space(*addr, block->start, | ||
457 | *len, allocator->align); | ||
458 | if (avail_len != 0) { | ||
459 | update_free_addr_cache(allocator, block, | ||
460 | *addr, avail_len, false); | ||
461 | allocator_dbg(allocator, | ||
462 | "free space between %d, %d, len %d", | ||
463 | *addr, block->start, avail_len); | ||
464 | allocator_dbg(allocator, "next free addr: %d", | ||
465 | allocator->last_free_addr); | ||
466 | *len = avail_len; | ||
467 | return 0; | ||
468 | } else | ||
469 | return -ENOMEM; | ||
470 | } else | ||
471 | return -ENOMEM; | ||
472 | } | ||
473 | |||
474 | start_addr = *addr = allocator->first_free_addr; | ||
475 | |||
476 | allocator_dbg(allocator, "start search addr : %d", start_addr); | ||
477 | |||
478 | for (block = find_block(allocator, *addr);; block = block->next) { | ||
479 | if (allocator->limit - *len < *addr) | ||
480 | return -ENOMEM; | ||
481 | if (!block) { | ||
482 | update_free_addr_cache(allocator, block, | ||
483 | *addr, *len, false); | ||
484 | allocator_dbg(allocator, "free space from %d, len %d", | ||
485 | *addr, *len); | ||
486 | allocator_dbg(allocator, "next free addr: %d", | ||
487 | allocator->first_free_addr); | ||
488 | return 0; | ||
489 | } | ||
490 | |||
491 | avail_len = check_free_space(*addr, block->start, | ||
492 | *len, allocator->align); | ||
493 | if (avail_len != 0) { | ||
494 | update_free_addr_cache(allocator, block, | ||
495 | *addr, avail_len, false); | ||
496 | allocator_dbg(allocator, "free space between %d, %d, len %d", | ||
497 | *addr, block->start, avail_len); | ||
498 | allocator_dbg(allocator, "next free addr: %d", | ||
499 | allocator->first_free_addr); | ||
500 | *len = avail_len; | ||
501 | return 0; | ||
502 | } | ||
503 | if (*addr + allocator->cached_hole_size < block->start) | ||
504 | allocator->cached_hole_size = block->start - *addr; | ||
505 | *addr = block->end; | ||
506 | } | ||
507 | } | ||
508 | |||
509 | /* expand/shrink a block with new start and new end | ||
510 | split_block function provides insert block for shrink */ | ||
511 | static void adjust_block(struct gk20a_alloc_block *block, | ||
512 | u32 start, u32 end, struct gk20a_alloc_block *insert) | ||
513 | { | ||
514 | struct gk20a_allocator *allocator = block->allocator; | ||
515 | |||
516 | allocator_dbg(allocator, "curr block %d:%d, new start %d, new end %d", | ||
517 | block->start, block->end, start, end); | ||
518 | |||
519 | /* expand */ | ||
520 | if (!insert) { | ||
521 | if (start == block->end) { | ||
522 | struct gk20a_alloc_block *next = block->next; | ||
523 | |||
524 | if (next && end == next->start) { | ||
525 | /* ....AAAA.... */ | ||
526 | /* PPPP....NNNN */ | ||
527 | /* PPPPPPPPPPPP */ | ||
528 | unlink_block(allocator, next, block); | ||
529 | block->end = next->end; | ||
530 | kmem_cache_free(allocator->block_cache, next); | ||
531 | } else { | ||
532 | /* ....AAAA.... */ | ||
533 | /* PPPP........ */ | ||
534 | /* PPPPPPPP.... */ | ||
535 | block->end = end; | ||
536 | } | ||
537 | } | ||
538 | |||
539 | if (end == block->start) { | ||
540 | /* ....AAAA.... */ | ||
541 | /* ........NNNN */ | ||
542 | /* PP..NNNNNNNN ....NNNNNNNN */ | ||
543 | block->start = start; | ||
544 | } | ||
545 | } else { /* shrink */ | ||
546 | /* BBBBBBBB -> BBBBIIII OR BBBBBBBB -> IIIIBBBB */ | ||
547 | block->start = start; | ||
548 | block->end = end; | ||
549 | insert_block(allocator, insert); | ||
550 | } | ||
551 | } | ||
552 | |||
553 | /* given a range [addr, end], merge it with blocks before or after or both | ||
554 | if they can be combined into a contiguous block */ | ||
555 | static struct gk20a_alloc_block * | ||
556 | merge_block(struct gk20a_allocator *allocator, | ||
557 | struct gk20a_alloc_block *prev, u32 addr, u32 end) | ||
558 | { | ||
559 | struct gk20a_alloc_block *next; | ||
560 | |||
561 | if (prev) | ||
562 | next = prev->next; | ||
563 | else | ||
564 | next = allocator->block_first; | ||
565 | |||
566 | allocator_dbg(allocator, "curr block %d:%d", addr, end); | ||
567 | if (prev) | ||
568 | allocator_dbg(allocator, "prev block %d:%d", | ||
569 | prev->start, prev->end); | ||
570 | if (next) | ||
571 | allocator_dbg(allocator, "next block %d:%d", | ||
572 | next->start, next->end); | ||
573 | |||
574 | /* don't merge with non-contiguous allocation block */ | ||
575 | if (prev && prev->end == addr && !prev->nc_block) { | ||
576 | adjust_block(prev, addr, end, NULL); | ||
577 | return prev; | ||
578 | } | ||
579 | |||
580 | /* don't merge with non-contiguous allocation block */ | ||
581 | if (next && end == next->start && !next->nc_block) { | ||
582 | adjust_block(next, addr, end, NULL); | ||
583 | return next; | ||
584 | } | ||
585 | |||
586 | return NULL; | ||
587 | } | ||
588 | |||
589 | /* split a block based on addr. addr must be within (start, end). | ||
590 | if new_below == 1, link new block before adjusted current block */ | ||
591 | static int split_block(struct gk20a_allocator *allocator, | ||
592 | struct gk20a_alloc_block *block, u32 addr, int new_below) | ||
593 | { | ||
594 | struct gk20a_alloc_block *new_block; | ||
595 | |||
596 | allocator_dbg(allocator, "start %d, split %d, end %d, new_below %d", | ||
597 | block->start, addr, block->end, new_below); | ||
598 | |||
599 | BUG_ON(!(addr > block->start && addr < block->end)); | ||
600 | |||
601 | new_block = kmem_cache_alloc(allocator->block_cache, GFP_KERNEL); | ||
602 | if (!new_block) | ||
603 | return -ENOMEM; | ||
604 | |||
605 | *new_block = *block; | ||
606 | |||
607 | if (new_below) | ||
608 | new_block->end = addr; | ||
609 | else | ||
610 | new_block->start = addr; | ||
611 | |||
612 | if (new_below) | ||
613 | adjust_block(block, addr, block->end, new_block); | ||
614 | else | ||
615 | adjust_block(block, block->start, addr, new_block); | ||
616 | |||
617 | return 0; | ||
618 | } | ||
619 | |||
620 | /* free a list of blocks */ | ||
621 | static void free_blocks(struct gk20a_allocator *allocator, | ||
622 | struct gk20a_alloc_block *block) | ||
623 | { | ||
624 | struct gk20a_alloc_block *curr_block; | ||
625 | while (block) { | ||
626 | curr_block = block; | ||
627 | block = block->next; | ||
628 | kmem_cache_free(allocator->block_cache, curr_block); | ||
629 | } | ||
630 | } | ||
631 | |||
632 | /* called with rw_sema acquired */ | ||
633 | static int block_alloc_single_locked(struct gk20a_allocator *allocator, | ||
634 | u32 *addr_req, u32 len) | ||
635 | { | ||
636 | struct gk20a_alloc_block *block, *prev; | ||
637 | struct rb_node **rb_link, *rb_parent; | ||
638 | u32 addr = *addr_req; | ||
639 | int err; | ||
640 | |||
641 | *addr_req = ~0; | ||
642 | |||
643 | err = find_free_area(allocator, &addr, len); | ||
644 | if (err) | ||
645 | return err; | ||
646 | |||
647 | find_block_prepare(allocator, addr, &prev, &rb_link, &rb_parent); | ||
648 | |||
649 | /* merge requested free space with existing block(s) | ||
650 | if they can be combined into one contiguous block */ | ||
651 | block = merge_block(allocator, prev, addr, addr + len); | ||
652 | if (block) { | ||
653 | *addr_req = addr; | ||
654 | return 0; | ||
655 | } | ||
656 | |||
657 | /* create a new block if cannot merge */ | ||
658 | block = kmem_cache_zalloc(allocator->block_cache, GFP_KERNEL); | ||
659 | if (!block) | ||
660 | return -ENOMEM; | ||
661 | |||
662 | block->allocator = allocator; | ||
663 | block->start = addr; | ||
664 | block->end = addr + len; | ||
665 | |||
666 | link_block(allocator, block, prev, rb_link, rb_parent); | ||
667 | |||
668 | *addr_req = addr; | ||
669 | |||
670 | return 0; | ||
671 | } | ||
672 | |||
673 | static int block_alloc_list_locked(struct gk20a_allocator *allocator, | ||
674 | u32 *addr_req, u32 nc_len, struct gk20a_alloc_block **pblock) | ||
675 | { | ||
676 | struct gk20a_alloc_block *block; | ||
677 | struct gk20a_alloc_block *nc_head = NULL, *nc_prev = NULL; | ||
678 | u32 addr = *addr_req, len = nc_len; | ||
679 | int err = 0; | ||
680 | |||
681 | *addr_req = ~0; | ||
682 | |||
683 | while (nc_len > 0) { | ||
684 | err = find_free_area_nc(allocator, &addr, &len); | ||
685 | if (err) { | ||
686 | allocator_dbg(allocator, "not enough free space"); | ||
687 | goto clean_up; | ||
688 | } | ||
689 | |||
690 | /* never merge non-contiguous allocation block, | ||
691 | just create a new block */ | ||
692 | block = kmem_cache_zalloc(allocator->block_cache, | ||
693 | GFP_KERNEL); | ||
694 | if (!block) { | ||
695 | err = -ENOMEM; | ||
696 | goto clean_up; | ||
697 | } | ||
698 | |||
699 | block->allocator = allocator; | ||
700 | block->start = addr; | ||
701 | block->end = addr + len; | ||
702 | |||
703 | insert_block(allocator, block); | ||
704 | |||
705 | block->nc_prev = nc_prev; | ||
706 | if (nc_prev) | ||
707 | nc_prev->nc_next = block; | ||
708 | nc_prev = block; | ||
709 | block->nc_block = true; | ||
710 | |||
711 | if (!nc_head) | ||
712 | nc_head = block; | ||
713 | |||
714 | if (*addr_req == ~0) | ||
715 | *addr_req = addr; | ||
716 | |||
717 | addr = 0; | ||
718 | nc_len -= len; | ||
719 | len = nc_len; | ||
720 | allocator_dbg(allocator, "remaining length %d", nc_len); | ||
721 | } | ||
722 | |||
723 | clean_up: | ||
724 | if (err) { | ||
725 | while (nc_head) { | ||
726 | unlink_block(allocator, nc_head, nc_head->prev); | ||
727 | nc_prev = nc_head; | ||
728 | nc_head = nc_head->nc_next; | ||
729 | kmem_cache_free(allocator->block_cache, nc_prev); | ||
730 | } | ||
731 | *pblock = NULL; | ||
732 | *addr_req = ~0; | ||
733 | } else { | ||
734 | *pblock = nc_head; | ||
735 | } | ||
736 | |||
737 | return err; | ||
738 | } | ||
739 | |||
740 | /* called with rw_sema acquired */ | ||
741 | static int block_free_locked(struct gk20a_allocator *allocator, | ||
742 | u32 addr, u32 len) | ||
743 | { | ||
744 | struct gk20a_alloc_block *block, *prev, *last; | ||
745 | u32 end; | ||
746 | int err; | ||
747 | |||
748 | /* no block has block->end > addr, already free */ | ||
749 | block = find_block_prev(allocator, addr, &prev); | ||
750 | if (!block) | ||
751 | return 0; | ||
752 | |||
753 | allocator_dbg(allocator, "first block in free range %d:%d", | ||
754 | block->start, block->end); | ||
755 | |||
756 | end = addr + len; | ||
757 | /* not in any block, already free */ | ||
758 | if (block->start >= end) | ||
759 | return 0; | ||
760 | |||
761 | /* don't touch nc_block in range free */ | ||
762 | if (addr > block->start && !block->nc_block) { | ||
763 | int err = split_block(allocator, block, addr, 0); | ||
764 | if (err) | ||
765 | return err; | ||
766 | prev = block; | ||
767 | } | ||
768 | |||
769 | last = find_block(allocator, end); | ||
770 | if (last && end > last->start && !last->nc_block) { | ||
771 | |||
772 | allocator_dbg(allocator, "last block in free range %d:%d", | ||
773 | last->start, last->end); | ||
774 | |||
775 | err = split_block(allocator, last, end, 1); | ||
776 | if (err) | ||
777 | return err; | ||
778 | } | ||
779 | |||
780 | block = prev ? prev->next : allocator->block_first; | ||
781 | |||
782 | allocator_dbg(allocator, "first block for free %d:%d", | ||
783 | block->start, block->end); | ||
784 | |||
785 | /* remove blocks between [addr, addr + len) from rb tree | ||
786 | and put them in a list */ | ||
787 | block = unlink_blocks(allocator, block, prev, end); | ||
788 | free_blocks(allocator, block); | ||
789 | |||
790 | update_free_addr_cache(allocator, NULL, addr, len, true); | ||
791 | |||
792 | return 0; | ||
793 | } | ||
794 | |||
795 | /* called with rw_sema acquired */ | ||
796 | static void block_free_list_locked(struct gk20a_allocator *allocator, | ||
797 | struct gk20a_alloc_block *list) | ||
798 | { | ||
799 | struct gk20a_alloc_block *block; | ||
800 | u32 len; | ||
801 | |||
802 | update_free_addr_cache(allocator, NULL, | ||
803 | list->start, list->end - list->start, true); | ||
804 | |||
805 | while (list) { | ||
806 | block = list; | ||
807 | unlink_block(allocator, block, block->prev); | ||
808 | |||
809 | len = block->end - block->start; | ||
810 | if (allocator->cached_hole_size < len) | ||
811 | allocator->cached_hole_size = len; | ||
812 | |||
813 | list = block->nc_next; | ||
814 | kmem_cache_free(allocator->block_cache, block); | ||
815 | } | ||
816 | } | ||
817 | |||
818 | static int | ||
819 | gk20a_allocator_constrain(struct gk20a_allocator *a, | ||
820 | bool enable, u32 base, u32 limit) | ||
821 | { | ||
822 | if (enable) { | ||
823 | a->constraint.enable = (base >= a->base && | ||
824 | limit <= a->limit); | ||
825 | if (!a->constraint.enable) | ||
826 | return -EINVAL; | ||
827 | a->constraint.base = base; | ||
828 | a->constraint.limit = limit; | ||
829 | a->first_free_addr = a->last_free_addr = base; | ||
830 | |||
831 | } else { | ||
832 | a->constraint.enable = false; | ||
833 | a->first_free_addr = a->last_free_addr = a->base; | ||
834 | } | ||
835 | |||
836 | a->cached_hole_size = 0; | ||
837 | |||
838 | return 0; | ||
839 | } | ||
840 | |||
841 | /* init allocator struct */ | ||
842 | int gk20a_allocator_init(struct gk20a_allocator *allocator, | ||
843 | const char *name, u32 start, u32 len, u32 align) | ||
844 | { | ||
845 | memset(allocator, 0, sizeof(struct gk20a_allocator)); | ||
846 | |||
847 | strncpy(allocator->name, name, 32); | ||
848 | |||
849 | allocator->block_cache = | ||
850 | kmem_cache_create(allocator->name, | ||
851 | sizeof(struct gk20a_alloc_block), 0, | ||
852 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
853 | if (!allocator->block_cache) | ||
854 | return -ENOMEM; | ||
855 | |||
856 | allocator->rb_root = RB_ROOT; | ||
857 | |||
858 | allocator->base = start; | ||
859 | allocator->limit = start + len - 1; | ||
860 | allocator->align = align; | ||
861 | |||
862 | allocator_dbg(allocator, "%s : base %d, limit %d, align %d", | ||
863 | allocator->name, allocator->base, | ||
864 | allocator->limit, allocator->align); | ||
865 | |||
866 | allocator->first_free_addr = allocator->last_free_addr = start; | ||
867 | allocator->cached_hole_size = len; | ||
868 | |||
869 | init_rwsem(&allocator->rw_sema); | ||
870 | |||
871 | allocator->alloc = gk20a_allocator_block_alloc; | ||
872 | allocator->alloc_nc = gk20a_allocator_block_alloc_nc; | ||
873 | allocator->free = gk20a_allocator_block_free; | ||
874 | allocator->free_nc = gk20a_allocator_block_free_nc; | ||
875 | allocator->constrain = gk20a_allocator_constrain; | ||
876 | |||
877 | return 0; | ||
878 | } | ||
879 | |||
880 | /* destroy allocator, free all remaining blocks if any */ | ||
881 | void gk20a_allocator_destroy(struct gk20a_allocator *allocator) | ||
882 | { | ||
883 | struct gk20a_alloc_block *block, *next; | ||
884 | u32 free_count = 0; | ||
885 | |||
886 | down_write(&allocator->rw_sema); | ||
887 | |||
888 | for (block = allocator->block_first; block; ) { | ||
889 | allocator_dbg(allocator, "free remaining block %d:%d", | ||
890 | block->start, block->end); | ||
891 | next = block->next; | ||
892 | kmem_cache_free(allocator->block_cache, block); | ||
893 | free_count++; | ||
894 | block = next; | ||
895 | } | ||
896 | |||
897 | up_write(&allocator->rw_sema); | ||
898 | |||
899 | /* block_count doesn't match real number of blocks */ | ||
900 | BUG_ON(free_count != allocator->block_count); | ||
901 | |||
902 | kmem_cache_destroy(allocator->block_cache); | ||
903 | |||
904 | memset(allocator, 0, sizeof(struct gk20a_allocator)); | ||
905 | } | ||
906 | |||
907 | /* | ||
908 | * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is | ||
909 | * returned to caller in *addr. | ||
910 | * | ||
911 | * contiguous allocation, which allocates one block of | ||
912 | * contiguous address. | ||
913 | */ | ||
914 | int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, | ||
915 | u32 *addr, u32 len) | ||
916 | { | ||
917 | int ret; | ||
918 | #if defined(ALLOCATOR_DEBUG) | ||
919 | struct gk20a_alloc_block *block; | ||
920 | bool should_fail = false; | ||
921 | #endif | ||
922 | |||
923 | allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); | ||
924 | |||
925 | if (*addr + len > allocator->limit || /* check addr range */ | ||
926 | *addr & (allocator->align - 1) || /* check addr alignment */ | ||
927 | len == 0) /* check len */ | ||
928 | return -EINVAL; | ||
929 | |||
930 | if (allocator->constraint.enable && | ||
931 | (*addr + len > allocator->constraint.limit || | ||
932 | *addr > allocator->constraint.base)) | ||
933 | return -EINVAL; | ||
934 | |||
935 | len = ALIGN(len, allocator->align); | ||
936 | if (!len) | ||
937 | return -ENOMEM; | ||
938 | |||
939 | down_write(&allocator->rw_sema); | ||
940 | |||
941 | #if defined(ALLOCATOR_DEBUG) | ||
942 | if (*addr) { | ||
943 | for (block = allocator->block_first; | ||
944 | block; block = block->next) { | ||
945 | if (block->end > *addr && block->start < *addr + len) { | ||
946 | should_fail = true; | ||
947 | break; | ||
948 | } | ||
949 | } | ||
950 | } | ||
951 | #endif | ||
952 | |||
953 | ret = block_alloc_single_locked(allocator, addr, len); | ||
954 | |||
955 | #if defined(ALLOCATOR_DEBUG) | ||
956 | if (!ret) { | ||
957 | bool allocated = false; | ||
958 | BUG_ON(should_fail); | ||
959 | BUG_ON(*addr < allocator->base); | ||
960 | BUG_ON(*addr + len > allocator->limit); | ||
961 | for (block = allocator->block_first; | ||
962 | block; block = block->next) { | ||
963 | if (!block->nc_block && | ||
964 | block->start <= *addr && | ||
965 | block->end >= *addr + len) { | ||
966 | allocated = true; | ||
967 | break; | ||
968 | } | ||
969 | } | ||
970 | BUG_ON(!allocated); | ||
971 | } | ||
972 | #endif | ||
973 | |||
974 | up_write(&allocator->rw_sema); | ||
975 | |||
976 | allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); | ||
977 | |||
978 | return ret; | ||
979 | } | ||
980 | |||
981 | /* | ||
982 | * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is | ||
983 | * returned to caller in *addr. | ||
984 | * | ||
985 | * non-contiguous allocation, which returns a list of blocks with aggregated | ||
986 | * size == len. Individual block size must meet alignment requirement. | ||
987 | */ | ||
988 | int gk20a_allocator_block_alloc_nc(struct gk20a_allocator *allocator, | ||
989 | u32 *addr, u32 len, struct gk20a_alloc_block **pblock) | ||
990 | { | ||
991 | int ret; | ||
992 | |||
993 | allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); | ||
994 | |||
995 | BUG_ON(pblock == NULL); | ||
996 | *pblock = NULL; | ||
997 | |||
998 | if (*addr + len > allocator->limit || /* check addr range */ | ||
999 | *addr & (allocator->align - 1) || /* check addr alignment */ | ||
1000 | len == 0) /* check len */ | ||
1001 | return -EINVAL; | ||
1002 | |||
1003 | len = ALIGN(len, allocator->align); | ||
1004 | if (!len) | ||
1005 | return -ENOMEM; | ||
1006 | |||
1007 | down_write(&allocator->rw_sema); | ||
1008 | |||
1009 | ret = block_alloc_list_locked(allocator, addr, len, pblock); | ||
1010 | |||
1011 | #if defined(ALLOCATOR_DEBUG) | ||
1012 | if (!ret) { | ||
1013 | struct gk20a_alloc_block *block = *pblock; | ||
1014 | BUG_ON(!block); | ||
1015 | BUG_ON(block->start < allocator->base); | ||
1016 | while (block->nc_next) { | ||
1017 | BUG_ON(block->end > block->nc_next->start); | ||
1018 | block = block->nc_next; | ||
1019 | } | ||
1020 | BUG_ON(block->end > allocator->limit); | ||
1021 | } | ||
1022 | #endif | ||
1023 | |||
1024 | up_write(&allocator->rw_sema); | ||
1025 | |||
1026 | allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); | ||
1027 | |||
1028 | return ret; | ||
1029 | } | ||
1030 | |||
1031 | /* free all blocks between start and end */ | ||
1032 | int gk20a_allocator_block_free(struct gk20a_allocator *allocator, | ||
1033 | u32 addr, u32 len) | ||
1034 | { | ||
1035 | int ret; | ||
1036 | |||
1037 | allocator_dbg(allocator, "[in] addr %d, len %d", addr, len); | ||
1038 | |||
1039 | if (addr + len > allocator->limit || /* check addr range */ | ||
1040 | addr < allocator->base || | ||
1041 | addr & (allocator->align - 1)) /* check addr alignment */ | ||
1042 | return -EINVAL; | ||
1043 | |||
1044 | len = ALIGN(len, allocator->align); | ||
1045 | if (!len) | ||
1046 | return -EINVAL; | ||
1047 | |||
1048 | down_write(&allocator->rw_sema); | ||
1049 | |||
1050 | ret = block_free_locked(allocator, addr, len); | ||
1051 | |||
1052 | #if defined(ALLOCATOR_DEBUG) | ||
1053 | if (!ret) { | ||
1054 | struct gk20a_alloc_block *block; | ||
1055 | for (block = allocator->block_first; | ||
1056 | block; block = block->next) { | ||
1057 | if (!block->nc_block) | ||
1058 | BUG_ON(block->start >= addr && | ||
1059 | block->end <= addr + len); | ||
1060 | } | ||
1061 | } | ||
1062 | #endif | ||
1063 | up_write(&allocator->rw_sema); | ||
1064 | |||
1065 | allocator_dbg(allocator, "[out] addr %d, len %d", addr, len); | ||
1066 | |||
1067 | return ret; | ||
1068 | } | ||
1069 | |||
1070 | /* free non-contiguous allocation block list */ | ||
1071 | void gk20a_allocator_block_free_nc(struct gk20a_allocator *allocator, | ||
1072 | struct gk20a_alloc_block *block) | ||
1073 | { | ||
1074 | /* nothing to free */ | ||
1075 | if (!block) | ||
1076 | return; | ||
1077 | |||
1078 | down_write(&allocator->rw_sema); | ||
1079 | block_free_list_locked(allocator, block); | ||
1080 | up_write(&allocator->rw_sema); | ||
1081 | } | ||
1082 | |||
1083 | #if defined(ALLOCATOR_DEBUG) | ||
1084 | |||
1085 | #include <linux/random.h> | ||
1086 | |||
1087 | /* test suite */ | ||
1088 | void gk20a_allocator_test(void) | ||
1089 | { | ||
1090 | struct gk20a_allocator allocator; | ||
1091 | struct gk20a_alloc_block *list[5]; | ||
1092 | u32 addr, len; | ||
1093 | u32 count; | ||
1094 | int n; | ||
1095 | |||
1096 | gk20a_allocator_init(&allocator, "test", 0, 10, 1); | ||
1097 | |||
1098 | /* alloc/free a single block in the beginning */ | ||
1099 | addr = 0; | ||
1100 | gk20a_allocator_block_alloc(&allocator, &addr, 2); | ||
1101 | gk20a_allocator_dump(&allocator); | ||
1102 | gk20a_allocator_block_free(&allocator, addr, 2); | ||
1103 | gk20a_allocator_dump(&allocator); | ||
1104 | /* alloc/free a single block in the middle */ | ||
1105 | addr = 4; | ||
1106 | gk20a_allocator_block_alloc(&allocator, &addr, 2); | ||
1107 | gk20a_allocator_dump(&allocator); | ||
1108 | gk20a_allocator_block_free(&allocator, addr, 2); | ||
1109 | gk20a_allocator_dump(&allocator); | ||
1110 | /* alloc/free a single block in the end */ | ||
1111 | addr = 8; | ||
1112 | gk20a_allocator_block_alloc(&allocator, &addr, 2); | ||
1113 | gk20a_allocator_dump(&allocator); | ||
1114 | gk20a_allocator_block_free(&allocator, addr, 2); | ||
1115 | gk20a_allocator_dump(&allocator); | ||
1116 | |||
1117 | /* allocate contiguous blocks */ | ||
1118 | addr = 0; | ||
1119 | gk20a_allocator_block_alloc(&allocator, &addr, 2); | ||
1120 | gk20a_allocator_dump(&allocator); | ||
1121 | addr = 0; | ||
1122 | gk20a_allocator_block_alloc(&allocator, &addr, 4); | ||
1123 | gk20a_allocator_dump(&allocator); | ||
1124 | addr = 0; | ||
1125 | gk20a_allocator_block_alloc(&allocator, &addr, 4); | ||
1126 | gk20a_allocator_dump(&allocator); | ||
1127 | |||
1128 | /* no free space */ | ||
1129 | addr = 0; | ||
1130 | gk20a_allocator_block_alloc(&allocator, &addr, 2); | ||
1131 | gk20a_allocator_dump(&allocator); | ||
1132 | |||
1133 | /* free in the end */ | ||
1134 | gk20a_allocator_block_free(&allocator, 8, 2); | ||
1135 | gk20a_allocator_dump(&allocator); | ||
1136 | /* free in the beginning */ | ||
1137 | gk20a_allocator_block_free(&allocator, 0, 2); | ||
1138 | gk20a_allocator_dump(&allocator); | ||
1139 | /* free in the middle */ | ||
1140 | gk20a_allocator_block_free(&allocator, 4, 2); | ||
1141 | gk20a_allocator_dump(&allocator); | ||
1142 | |||
1143 | /* merge case PPPPAAAANNNN */ | ||
1144 | addr = 4; | ||
1145 | gk20a_allocator_block_alloc(&allocator, &addr, 2); | ||
1146 | gk20a_allocator_dump(&allocator); | ||
1147 | /* merge case ....AAAANNNN */ | ||
1148 | addr = 0; | ||
1149 | gk20a_allocator_block_alloc(&allocator, &addr, 2); | ||
1150 | gk20a_allocator_dump(&allocator); | ||
1151 | /* merge case PPPPAAAA.... */ | ||
1152 | addr = 8; | ||
1153 | gk20a_allocator_block_alloc(&allocator, &addr, 2); | ||
1154 | gk20a_allocator_dump(&allocator); | ||
1155 | |||
1156 | /* test free across multiple blocks and split */ | ||
1157 | gk20a_allocator_block_free(&allocator, 2, 2); | ||
1158 | gk20a_allocator_dump(&allocator); | ||
1159 | gk20a_allocator_block_free(&allocator, 6, 2); | ||
1160 | gk20a_allocator_dump(&allocator); | ||
1161 | gk20a_allocator_block_free(&allocator, 1, 8); | ||
1162 | gk20a_allocator_dump(&allocator); | ||
1163 | |||
1164 | /* test non-contiguous allocation */ | ||
1165 | addr = 4; | ||
1166 | gk20a_allocator_block_alloc(&allocator, &addr, 2); | ||
1167 | gk20a_allocator_dump(&allocator); | ||
1168 | addr = 0; | ||
1169 | gk20a_allocator_block_alloc_nc(&allocator, &addr, 5, &list[0]); | ||
1170 | gk20a_allocator_dump(&allocator); | ||
1171 | gk20a_allocator_dump_nc_list(&allocator, list[0]); | ||
1172 | |||
1173 | /* test free a range overlaping non-contiguous blocks */ | ||
1174 | gk20a_allocator_block_free(&allocator, 2, 6); | ||
1175 | gk20a_allocator_dump(&allocator); | ||
1176 | |||
1177 | /* test non-contiguous free */ | ||
1178 | gk20a_allocator_block_free_nc(&allocator, list[0]); | ||
1179 | gk20a_allocator_dump(&allocator); | ||
1180 | |||
1181 | gk20a_allocator_destroy(&allocator); | ||
1182 | |||
1183 | /* random stress test */ | ||
1184 | gk20a_allocator_init(&allocator, "test", 4096, 4096 * 1024, 4096); | ||
1185 | for (;;) { | ||
1186 | pr_debug("alloc tests...\n"); | ||
1187 | for (count = 0; count < 50; count++) { | ||
1188 | addr = 0; | ||
1189 | len = random32() % (4096 * 1024 / 16); | ||
1190 | gk20a_allocator_block_alloc(&allocator, &addr, len); | ||
1191 | gk20a_allocator_dump(&allocator); | ||
1192 | } | ||
1193 | |||
1194 | pr_debug("free tests...\n"); | ||
1195 | for (count = 0; count < 30; count++) { | ||
1196 | addr = (random32() % (4096 * 1024)) & ~(4096 - 1); | ||
1197 | len = random32() % (4096 * 1024 / 16); | ||
1198 | gk20a_allocator_block_free(&allocator, addr, len); | ||
1199 | gk20a_allocator_dump(&allocator); | ||
1200 | } | ||
1201 | |||
1202 | pr_debug("non-contiguous alloc tests...\n"); | ||
1203 | for (n = 0; n < 5; n++) { | ||
1204 | addr = 0; | ||
1205 | len = random32() % (4096 * 1024 / 8); | ||
1206 | gk20a_allocator_block_alloc_nc(&allocator, &addr, | ||
1207 | len, &list[n]); | ||
1208 | gk20a_allocator_dump(&allocator); | ||
1209 | gk20a_allocator_dump_nc_list(&allocator, list[n]); | ||
1210 | } | ||
1211 | |||
1212 | pr_debug("free tests...\n"); | ||
1213 | for (count = 0; count < 10; count++) { | ||
1214 | addr = (random32() % (4096 * 1024)) & ~(4096 - 1); | ||
1215 | len = random32() % (4096 * 1024 / 16); | ||
1216 | gk20a_allocator_block_free(&allocator, addr, len); | ||
1217 | gk20a_allocator_dump(&allocator); | ||
1218 | } | ||
1219 | |||
1220 | pr_debug("non-contiguous free tests...\n"); | ||
1221 | for (n = 4; n >= 0; n--) { | ||
1222 | gk20a_allocator_dump_nc_list(&allocator, list[n]); | ||
1223 | gk20a_allocator_block_free_nc(&allocator, list[n]); | ||
1224 | gk20a_allocator_dump(&allocator); | ||
1225 | } | ||
1226 | |||
1227 | pr_debug("fixed addr alloc tests...\n"); | ||
1228 | for (count = 0; count < 10; count++) { | ||
1229 | addr = (random32() % (4096 * 1024)) & ~(4096 - 1); | ||
1230 | len = random32() % (4096 * 1024 / 32); | ||
1231 | gk20a_allocator_block_alloc(&allocator, &addr, len); | ||
1232 | gk20a_allocator_dump(&allocator); | ||
1233 | } | ||
1234 | |||
1235 | pr_debug("free tests...\n"); | ||
1236 | for (count = 0; count < 10; count++) { | ||
1237 | addr = (random32() % (4096 * 1024)) & ~(4096 - 1); | ||
1238 | len = random32() % (4096 * 1024 / 16); | ||
1239 | gk20a_allocator_block_free(&allocator, addr, len); | ||
1240 | gk20a_allocator_dump(&allocator); | ||
1241 | } | ||
1242 | } | ||
1243 | gk20a_allocator_destroy(&allocator); | ||
1244 | } | ||
1245 | |||
1246 | #endif /* ALLOCATOR_DEBUG */ | ||
1247 | |||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h new file mode 100644 index 00000000..dba397e2 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | |||
@@ -0,0 +1,177 @@ | |||
1 | /* | ||
2 | * gk20a allocator | ||
3 | * | ||
4 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef __NVHOST_ALLOCATOR_H__ | ||
20 | #define __NVHOST_ALLOCATOR_H__ | ||
21 | |||
22 | #include <linux/rbtree.h> | ||
23 | #include <linux/rwsem.h> | ||
24 | #include <linux/slab.h> | ||
25 | |||
26 | /* #define ALLOCATOR_DEBUG */ | ||
27 | |||
28 | struct allocator_block; | ||
29 | |||
30 | /* main struct */ | ||
31 | struct gk20a_allocator { | ||
32 | |||
33 | char name[32]; /* name for allocator */ | ||
34 | struct rb_root rb_root; /* rb tree root for blocks */ | ||
35 | |||
36 | u32 base; /* min value of this linear space */ | ||
37 | u32 limit; /* max value = limit - 1 */ | ||
38 | u32 align; /* alignment size, power of 2 */ | ||
39 | |||
40 | struct gk20a_alloc_block *block_first; /* first block in list */ | ||
41 | struct gk20a_alloc_block *block_recent; /* last visited block */ | ||
42 | |||
43 | u32 first_free_addr; /* first free addr, non-contigous | ||
44 | allocation preferred start, | ||
45 | in order to pick up small holes */ | ||
46 | u32 last_free_addr; /* last free addr, contiguous | ||
47 | allocation preferred start */ | ||
48 | u32 cached_hole_size; /* max free hole size up to | ||
49 | last_free_addr */ | ||
50 | u32 block_count; /* number of blocks */ | ||
51 | |||
52 | struct rw_semaphore rw_sema; /* lock */ | ||
53 | struct kmem_cache *block_cache; /* slab cache */ | ||
54 | |||
55 | /* if enabled, constrain to [base, limit) */ | ||
56 | struct { | ||
57 | bool enable; | ||
58 | u32 base; | ||
59 | u32 limit; | ||
60 | } constraint; | ||
61 | |||
62 | int (*alloc)(struct gk20a_allocator *allocator, | ||
63 | u32 *addr, u32 len); | ||
64 | int (*alloc_nc)(struct gk20a_allocator *allocator, | ||
65 | u32 *addr, u32 len, | ||
66 | struct gk20a_alloc_block **pblock); | ||
67 | int (*free)(struct gk20a_allocator *allocator, | ||
68 | u32 addr, u32 len); | ||
69 | void (*free_nc)(struct gk20a_allocator *allocator, | ||
70 | struct gk20a_alloc_block *block); | ||
71 | |||
72 | int (*constrain)(struct gk20a_allocator *a, | ||
73 | bool enable, | ||
74 | u32 base, u32 limit); | ||
75 | }; | ||
76 | |||
77 | /* a block of linear space range [start, end) */ | ||
78 | struct gk20a_alloc_block { | ||
79 | struct gk20a_allocator *allocator; /* parent allocator */ | ||
80 | struct rb_node rb; /* rb tree node */ | ||
81 | |||
82 | u32 start; /* linear space range | ||
83 | [start, end) */ | ||
84 | u32 end; | ||
85 | |||
86 | void *priv; /* backing structure for this | ||
87 | linear space block | ||
88 | page table, comp tag, etc */ | ||
89 | |||
90 | struct gk20a_alloc_block *prev; /* prev block with lower address */ | ||
91 | struct gk20a_alloc_block *next; /* next block with higher address */ | ||
92 | |||
93 | bool nc_block; | ||
94 | struct gk20a_alloc_block *nc_prev; /* prev block for | ||
95 | non-contiguous allocation */ | ||
96 | struct gk20a_alloc_block *nc_next; /* next block for | ||
97 | non-contiguous allocation */ | ||
98 | }; | ||
99 | |||
100 | int gk20a_allocator_init(struct gk20a_allocator *allocator, | ||
101 | const char *name, u32 base, u32 size, u32 align); | ||
102 | void gk20a_allocator_destroy(struct gk20a_allocator *allocator); | ||
103 | |||
104 | int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, | ||
105 | u32 *addr, u32 len); | ||
106 | int gk20a_allocator_block_alloc_nc(struct gk20a_allocator *allocator, | ||
107 | u32 *addr, u32 len, | ||
108 | struct gk20a_alloc_block **pblock); | ||
109 | |||
110 | int gk20a_allocator_block_free(struct gk20a_allocator *allocator, | ||
111 | u32 addr, u32 len); | ||
112 | void gk20a_allocator_block_free_nc(struct gk20a_allocator *allocator, | ||
113 | struct gk20a_alloc_block *block); | ||
114 | |||
115 | #if defined(ALLOCATOR_DEBUG) | ||
116 | |||
117 | #define allocator_dbg(alloctor, format, arg...) \ | ||
118 | do { \ | ||
119 | if (1) \ | ||
120 | pr_debug("gk20a_allocator (%s) %s: " format "\n",\ | ||
121 | alloctor->name, __func__, ##arg);\ | ||
122 | } while (0) | ||
123 | |||
124 | static inline void | ||
125 | gk20a_allocator_dump(struct gk20a_allocator *allocator) { | ||
126 | struct gk20a_alloc_block *block; | ||
127 | u32 count = 0; | ||
128 | |||
129 | down_read(&allocator->rw_sema); | ||
130 | for (block = allocator->block_first; block; block = block->next) { | ||
131 | allocator_dbg(allocator, "block %d - %d:%d, nc %d", | ||
132 | count++, block->start, block->end, block->nc_block); | ||
133 | |||
134 | if (block->prev) | ||
135 | BUG_ON(block->prev->end > block->start); | ||
136 | if (block->next) | ||
137 | BUG_ON(block->next->start < block->end); | ||
138 | } | ||
139 | allocator_dbg(allocator, "tracked count %d, actual count %d", | ||
140 | allocator->block_count, count); | ||
141 | allocator_dbg(allocator, "first block %d:%d", | ||
142 | allocator->block_first ? allocator->block_first->start : -1, | ||
143 | allocator->block_first ? allocator->block_first->end : -1); | ||
144 | allocator_dbg(allocator, "first free addr %d", | ||
145 | allocator->first_free_addr); | ||
146 | allocator_dbg(allocator, "last free addr %d", | ||
147 | allocator->last_free_addr); | ||
148 | allocator_dbg(allocator, "cached hole size %d", | ||
149 | allocator->cached_hole_size); | ||
150 | up_read(&allocator->rw_sema); | ||
151 | |||
152 | BUG_ON(count != allocator->block_count); | ||
153 | } | ||
154 | |||
155 | static inline void | ||
156 | gk20a_allocator_dump_nc_list( | ||
157 | struct gk20a_allocator *allocator, | ||
158 | struct gk20a_alloc_block *block) | ||
159 | { | ||
160 | down_read(&allocator->rw_sema); | ||
161 | while (block) { | ||
162 | pr_debug("non-contiguous block %d:%d\n", | ||
163 | block->start, block->end); | ||
164 | block = block->nc_next; | ||
165 | } | ||
166 | up_read(&allocator->rw_sema); | ||
167 | } | ||
168 | |||
169 | void gk20a_allocator_test(void); | ||
170 | |||
171 | #else /* ALLOCATOR_DEBUG */ | ||
172 | |||
173 | #define allocator_dbg(format, arg...) | ||
174 | |||
175 | #endif /* ALLOCATOR_DEBUG */ | ||
176 | |||
177 | #endif /*__NVHOST_ALLOCATOR_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c new file mode 100644 index 00000000..c6478a5e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c | |||
@@ -0,0 +1,374 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2014, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License along | ||
14 | * with this program; if not, write to the Free Software Foundation, Inc., | ||
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
16 | * | ||
17 | * This file is autogenerated. Do not edit. | ||
18 | */ | ||
19 | |||
20 | #ifndef __gk20a_gating_reglist_h__ | ||
21 | #define __gk20a_gating_reglist_h__ | ||
22 | |||
23 | #include <linux/types.h> | ||
24 | #include "gk20a_gating_reglist.h" | ||
25 | |||
26 | struct gating_desc { | ||
27 | u32 addr; | ||
28 | u32 prod; | ||
29 | u32 disable; | ||
30 | }; | ||
31 | /* slcg gr */ | ||
32 | const struct gating_desc gk20a_slcg_gr[] = { | ||
33 | {.addr = 0x004041f4, .prod = 0x00000000, .disable = 0x03fffffe}, | ||
34 | {.addr = 0x00409894, .prod = 0x00000040, .disable = 0x0003fffe}, | ||
35 | {.addr = 0x004078c4, .prod = 0x00000000, .disable = 0x000001fe}, | ||
36 | {.addr = 0x00406004, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
37 | {.addr = 0x00405864, .prod = 0x00000000, .disable = 0x000001fe}, | ||
38 | {.addr = 0x00405910, .prod = 0x00000000, .disable = 0xfffffffe}, | ||
39 | {.addr = 0x00408044, .prod = 0x00000000, .disable = 0x000007fe}, | ||
40 | {.addr = 0x00407004, .prod = 0x00000000, .disable = 0x0000001e}, | ||
41 | {.addr = 0x0041a894, .prod = 0x00000000, .disable = 0x0003fffe}, | ||
42 | {.addr = 0x00418504, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
43 | {.addr = 0x0041860c, .prod = 0x00000000, .disable = 0x000001fe}, | ||
44 | {.addr = 0x0041868c, .prod = 0x00000000, .disable = 0x0000001e}, | ||
45 | {.addr = 0x0041871c, .prod = 0x00000000, .disable = 0x0000003e}, | ||
46 | {.addr = 0x00418388, .prod = 0x00000000, .disable = 0x00000001}, | ||
47 | {.addr = 0x0041882c, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
48 | {.addr = 0x00418bc0, .prod = 0x00000000, .disable = 0x000001fe}, | ||
49 | {.addr = 0x00418974, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
50 | {.addr = 0x00418c74, .prod = 0x00000000, .disable = 0xfffffffe}, | ||
51 | {.addr = 0x00418cf4, .prod = 0x00000000, .disable = 0xfffffffe}, | ||
52 | {.addr = 0x00418d74, .prod = 0x00000000, .disable = 0xfffffffe}, | ||
53 | {.addr = 0x00418f10, .prod = 0x00000000, .disable = 0xfffffffe}, | ||
54 | {.addr = 0x00418e10, .prod = 0x00000000, .disable = 0xfffffffe}, | ||
55 | {.addr = 0x00419024, .prod = 0x00000000, .disable = 0x000001fe}, | ||
56 | {.addr = 0x00419a44, .prod = 0x00000000, .disable = 0x0000000e}, | ||
57 | {.addr = 0x00419a4c, .prod = 0x00000000, .disable = 0x000001fe}, | ||
58 | {.addr = 0x00419a54, .prod = 0x00000000, .disable = 0x0000003e}, | ||
59 | {.addr = 0x00419a5c, .prod = 0x00000000, .disable = 0x0000000e}, | ||
60 | {.addr = 0x00419a64, .prod = 0x00000000, .disable = 0x000001fe}, | ||
61 | {.addr = 0x00419a6c, .prod = 0x00000000, .disable = 0x0000000e}, | ||
62 | {.addr = 0x00419a74, .prod = 0x00000000, .disable = 0x0000000e}, | ||
63 | {.addr = 0x00419a7c, .prod = 0x00000000, .disable = 0x0000003e}, | ||
64 | {.addr = 0x00419a84, .prod = 0x00000000, .disable = 0x0000000e}, | ||
65 | {.addr = 0x00419ad0, .prod = 0x00000000, .disable = 0x0000000e}, | ||
66 | {.addr = 0x0041986c, .prod = 0x0000dfc0, .disable = 0x00fffffe}, | ||
67 | {.addr = 0x00419cd8, .prod = 0x00000000, .disable = 0x001ffffe}, | ||
68 | {.addr = 0x00419ce0, .prod = 0x00000000, .disable = 0x001ffffe}, | ||
69 | {.addr = 0x00419c74, .prod = 0x00000000, .disable = 0x0000001e}, | ||
70 | {.addr = 0x00419fd4, .prod = 0x00000000, .disable = 0x0003fffe}, | ||
71 | {.addr = 0x00419fdc, .prod = 0x00000000, .disable = 0xfffffffe}, | ||
72 | {.addr = 0x00419fe4, .prod = 0x00000000, .disable = 0x0000000e}, | ||
73 | {.addr = 0x00419ff4, .prod = 0x00000000, .disable = 0x00003ffe}, | ||
74 | {.addr = 0x00419ffc, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
75 | {.addr = 0x0041be2c, .prod = 0x020bbfc0, .disable = 0xfffffffe}, | ||
76 | {.addr = 0x0041bfec, .prod = 0x00000000, .disable = 0xfffffffe}, | ||
77 | {.addr = 0x0041bed4, .prod = 0x00000000, .disable = 0xfffffffe}, | ||
78 | {.addr = 0x00408814, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
79 | {.addr = 0x0040881c, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
80 | {.addr = 0x00408a84, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
81 | {.addr = 0x00408a8c, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
82 | {.addr = 0x00408a94, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
83 | {.addr = 0x00408a9c, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
84 | {.addr = 0x00408aa4, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
85 | {.addr = 0x00408aac, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
86 | {.addr = 0x004089ac, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
87 | {.addr = 0x00408a24, .prod = 0x00000000, .disable = 0x000001ff}, | ||
88 | {.addr = 0x0017e050, .prod = 0x00000000, .disable = 0x00fffffe}, | ||
89 | {.addr = 0x001200a8, .prod = 0x00000000, .disable = 0x00000001}, | ||
90 | {.addr = 0x0010e48c, .prod = 0x00000000, .disable = 0x0000003e}, | ||
91 | {.addr = 0x00001c04, .prod = 0x00000000, .disable = 0x000000fe}, | ||
92 | {.addr = 0x00106f28, .prod = 0x00000040, .disable = 0x000007fe}, | ||
93 | {.addr = 0x000206b8, .prod = 0x00000000, .disable = 0x0000000f}, | ||
94 | {.addr = 0x0017ea98, .prod = 0x00000000, .disable = 0xfffffffe}, | ||
95 | {.addr = 0x00106f28, .prod = 0x00000040, .disable = 0x000007fe}, | ||
96 | {.addr = 0x00120048, .prod = 0x00000000, .disable = 0x00000049}, | ||
97 | }; | ||
98 | |||
99 | /* slcg perf */ | ||
100 | const struct gating_desc gk20a_slcg_perf[] = { | ||
101 | {.addr = 0x001be018, .prod = 0x000001ff, .disable = 0x00000000}, | ||
102 | {.addr = 0x001bc018, .prod = 0x000001ff, .disable = 0x00000000}, | ||
103 | {.addr = 0x001b8018, .prod = 0x000001ff, .disable = 0x00000000}, | ||
104 | {.addr = 0x001b4124, .prod = 0x00000001, .disable = 0x00000000}, | ||
105 | }; | ||
106 | |||
107 | /* blcg gr */ | ||
108 | const struct gating_desc gk20a_blcg_gr[] = { | ||
109 | {.addr = 0x004041f0, .prod = 0x00004046, .disable = 0x00000000}, | ||
110 | {.addr = 0x00409890, .prod = 0x0000007f, .disable = 0x00000000}, | ||
111 | {.addr = 0x004098b0, .prod = 0x0000007f, .disable = 0x00000000}, | ||
112 | {.addr = 0x004078c0, .prod = 0x00000042, .disable = 0x00000000}, | ||
113 | {.addr = 0x00406000, .prod = 0x00004044, .disable = 0x00000000}, | ||
114 | {.addr = 0x00405860, .prod = 0x00004042, .disable = 0x00000000}, | ||
115 | {.addr = 0x0040590c, .prod = 0x00004044, .disable = 0x00000000}, | ||
116 | {.addr = 0x00408040, .prod = 0x00004044, .disable = 0x00000000}, | ||
117 | {.addr = 0x00407000, .prod = 0x00004041, .disable = 0x00000000}, | ||
118 | {.addr = 0x00405bf0, .prod = 0x00004044, .disable = 0x00000000}, | ||
119 | {.addr = 0x0041a890, .prod = 0x0000007f, .disable = 0x00000000}, | ||
120 | {.addr = 0x0041a8b0, .prod = 0x0000007f, .disable = 0x00000000}, | ||
121 | {.addr = 0x00418500, .prod = 0x00004044, .disable = 0x00000000}, | ||
122 | {.addr = 0x00418608, .prod = 0x00004042, .disable = 0x00000000}, | ||
123 | {.addr = 0x00418688, .prod = 0x00004042, .disable = 0x00000000}, | ||
124 | {.addr = 0x00418718, .prod = 0x00000042, .disable = 0x00000000}, | ||
125 | {.addr = 0x00418828, .prod = 0x00000044, .disable = 0x00000000}, | ||
126 | {.addr = 0x00418bbc, .prod = 0x00004042, .disable = 0x00000000}, | ||
127 | {.addr = 0x00418970, .prod = 0x00004042, .disable = 0x00000000}, | ||
128 | {.addr = 0x00418c70, .prod = 0x00004044, .disable = 0x00000000}, | ||
129 | {.addr = 0x00418cf0, .prod = 0x00004044, .disable = 0x00000000}, | ||
130 | {.addr = 0x00418d70, .prod = 0x00004044, .disable = 0x00000000}, | ||
131 | {.addr = 0x00418f0c, .prod = 0x00004044, .disable = 0x00000000}, | ||
132 | {.addr = 0x00418e0c, .prod = 0x00004044, .disable = 0x00000000}, | ||
133 | {.addr = 0x00419020, .prod = 0x00004042, .disable = 0x00000000}, | ||
134 | {.addr = 0x00419038, .prod = 0x00000042, .disable = 0x00000000}, | ||
135 | {.addr = 0x00419a40, .prod = 0x00004042, .disable = 0x00000000}, | ||
136 | {.addr = 0x00419a48, .prod = 0x00004042, .disable = 0x00000000}, | ||
137 | {.addr = 0x00419a50, .prod = 0x00004042, .disable = 0x00000000}, | ||
138 | {.addr = 0x00419a58, .prod = 0x00004042, .disable = 0x00000000}, | ||
139 | {.addr = 0x00419a60, .prod = 0x00004042, .disable = 0x00000000}, | ||
140 | {.addr = 0x00419a68, .prod = 0x00004042, .disable = 0x00000000}, | ||
141 | {.addr = 0x00419a70, .prod = 0x00004042, .disable = 0x00000000}, | ||
142 | {.addr = 0x00419a78, .prod = 0x00004042, .disable = 0x00000000}, | ||
143 | {.addr = 0x00419a80, .prod = 0x00004042, .disable = 0x00000000}, | ||
144 | {.addr = 0x00419acc, .prod = 0x00004047, .disable = 0x00000000}, | ||
145 | {.addr = 0x00419868, .prod = 0x00000043, .disable = 0x00000000}, | ||
146 | {.addr = 0x00419cd4, .prod = 0x00004042, .disable = 0x00000000}, | ||
147 | {.addr = 0x00419cdc, .prod = 0x00004042, .disable = 0x00000000}, | ||
148 | {.addr = 0x00419c70, .prod = 0x00004045, .disable = 0x00000000}, | ||
149 | {.addr = 0x00419fd0, .prod = 0x00004043, .disable = 0x00000000}, | ||
150 | {.addr = 0x00419fd8, .prod = 0x00004045, .disable = 0x00000000}, | ||
151 | {.addr = 0x00419fe0, .prod = 0x00004042, .disable = 0x00000000}, | ||
152 | {.addr = 0x00419fe8, .prod = 0x00004042, .disable = 0x00000000}, | ||
153 | {.addr = 0x00419ff0, .prod = 0x00004044, .disable = 0x00000000}, | ||
154 | {.addr = 0x00419ff8, .prod = 0x00004042, .disable = 0x00000000}, | ||
155 | {.addr = 0x00419f90, .prod = 0x00004042, .disable = 0x00000000}, | ||
156 | {.addr = 0x0041be28, .prod = 0x00000042, .disable = 0x00000000}, | ||
157 | {.addr = 0x0041bfe8, .prod = 0x00004044, .disable = 0x00000000}, | ||
158 | {.addr = 0x0041bed0, .prod = 0x00004044, .disable = 0x00000000}, | ||
159 | {.addr = 0x00408810, .prod = 0x00004042, .disable = 0x00000000}, | ||
160 | {.addr = 0x00408818, .prod = 0x00004042, .disable = 0x00000000}, | ||
161 | {.addr = 0x00408a80, .prod = 0x00004042, .disable = 0x00000000}, | ||
162 | {.addr = 0x00408a88, .prod = 0x00004042, .disable = 0x00000000}, | ||
163 | {.addr = 0x00408a90, .prod = 0x00004042, .disable = 0x00000000}, | ||
164 | {.addr = 0x00408a98, .prod = 0x00004042, .disable = 0x00000000}, | ||
165 | {.addr = 0x00408aa0, .prod = 0x00004042, .disable = 0x00000000}, | ||
166 | {.addr = 0x00408aa8, .prod = 0x00004042, .disable = 0x00000000}, | ||
167 | {.addr = 0x004089a8, .prod = 0x00004042, .disable = 0x00000000}, | ||
168 | {.addr = 0x004089b0, .prod = 0x00000042, .disable = 0x00000000}, | ||
169 | {.addr = 0x004089b8, .prod = 0x00004042, .disable = 0x00000000}, | ||
170 | {.addr = 0x0017ea60, .prod = 0x00000044, .disable = 0x00000000}, | ||
171 | {.addr = 0x0017ea68, .prod = 0x00000044, .disable = 0x00000000}, | ||
172 | {.addr = 0x00100d30, .prod = 0x0000c242, .disable = 0x00000000}, | ||
173 | {.addr = 0x00100d48, .prod = 0x0000c242, .disable = 0x00000000}, | ||
174 | {.addr = 0x00100d3c, .prod = 0x00000242, .disable = 0x00000000}, | ||
175 | {.addr = 0x0017ea78, .prod = 0x00000044, .disable = 0x00000000}, | ||
176 | {.addr = 0x0017e040, .prod = 0x00000044, .disable = 0x00000000}, | ||
177 | {.addr = 0x00100d1c, .prod = 0x00000042, .disable = 0x00000000}, | ||
178 | {.addr = 0x00106f24, .prod = 0x0000c242, .disable = 0x00000000}, | ||
179 | {.addr = 0x0041be00, .prod = 0x00000004, .disable = 0x00000007}, | ||
180 | {.addr = 0x00100d10, .prod = 0x0000c242, .disable = 0x00000000}, | ||
181 | {.addr = 0x0017ea70, .prod = 0x00000044, .disable = 0x00000000}, | ||
182 | {.addr = 0x00001c00, .prod = 0x00000042, .disable = 0x00000000}, | ||
183 | {.addr = 0x00100c98, .prod = 0x00000242, .disable = 0x00000000}, | ||
184 | {.addr = 0x0017e030, .prod = 0x00000044, .disable = 0x00000000}, | ||
185 | }; | ||
186 | |||
187 | /* pg gr */ | ||
188 | const struct gating_desc gk20a_pg_gr[] = { | ||
189 | {.addr = 0x004041f8, .prod = 0x10940000, .disable = 0x00000000}, | ||
190 | {.addr = 0x004041fc, .prod = 0xff00a725, .disable = 0x00000000}, | ||
191 | {.addr = 0x00409898, .prod = 0x10140000, .disable = 0x00000000}, | ||
192 | {.addr = 0x0040989c, .prod = 0xff00000a, .disable = 0x00000000}, | ||
193 | {.addr = 0x004078c8, .prod = 0x10940000, .disable = 0x00000000}, | ||
194 | {.addr = 0x004078cc, .prod = 0xff00a725, .disable = 0x00000000}, | ||
195 | {.addr = 0x00406008, .prod = 0x10940000, .disable = 0x00000000}, | ||
196 | {.addr = 0x0040600c, .prod = 0xff00a725, .disable = 0x00000000}, | ||
197 | {.addr = 0x00405868, .prod = 0x10940000, .disable = 0x00000000}, | ||
198 | {.addr = 0x0040586c, .prod = 0xff00a725, .disable = 0x00000000}, | ||
199 | {.addr = 0x00405914, .prod = 0x10940000, .disable = 0x00000000}, | ||
200 | {.addr = 0x00405924, .prod = 0xff00a725, .disable = 0x00000000}, | ||
201 | {.addr = 0x00408048, .prod = 0x10940000, .disable = 0x00000000}, | ||
202 | {.addr = 0x0040804c, .prod = 0xff00a725, .disable = 0x00000000}, | ||
203 | {.addr = 0x00407008, .prod = 0x10140000, .disable = 0x00000000}, | ||
204 | {.addr = 0x0040700c, .prod = 0xff00000a, .disable = 0x00000000}, | ||
205 | {.addr = 0x00405bf8, .prod = 0x10940000, .disable = 0x00000000}, | ||
206 | {.addr = 0x00405bfc, .prod = 0xff00a725, .disable = 0x00000000}, | ||
207 | {.addr = 0x0041a898, .prod = 0x10140000, .disable = 0x00000000}, | ||
208 | {.addr = 0x0041a89c, .prod = 0xff00000a, .disable = 0x00000000}, | ||
209 | {.addr = 0x00418510, .prod = 0x10940000, .disable = 0x00000000}, | ||
210 | {.addr = 0x00418514, .prod = 0xff00a725, .disable = 0x00000000}, | ||
211 | {.addr = 0x00418610, .prod = 0x10940000, .disable = 0x00000000}, | ||
212 | {.addr = 0x00418614, .prod = 0xff00a725, .disable = 0x00000000}, | ||
213 | {.addr = 0x00418690, .prod = 0x10940000, .disable = 0x00000000}, | ||
214 | {.addr = 0x00418694, .prod = 0xff00a725, .disable = 0x00000000}, | ||
215 | {.addr = 0x00418720, .prod = 0x10940000, .disable = 0x00000000}, | ||
216 | {.addr = 0x00418724, .prod = 0xff00a725, .disable = 0x00000000}, | ||
217 | {.addr = 0x00418840, .prod = 0x10940000, .disable = 0x00000000}, | ||
218 | {.addr = 0x00418844, .prod = 0xff00a725, .disable = 0x00000000}, | ||
219 | {.addr = 0x00418bc4, .prod = 0x10940000, .disable = 0x00000000}, | ||
220 | {.addr = 0x00418bc8, .prod = 0xff00a725, .disable = 0x00000000}, | ||
221 | {.addr = 0x00418978, .prod = 0x10940000, .disable = 0x00000000}, | ||
222 | {.addr = 0x0041897c, .prod = 0xff00a725, .disable = 0x00000000}, | ||
223 | {.addr = 0x00418c78, .prod = 0x10940000, .disable = 0x00000000}, | ||
224 | {.addr = 0x00418c7c, .prod = 0xff00a725, .disable = 0x00000000}, | ||
225 | {.addr = 0x00418cf8, .prod = 0x10940000, .disable = 0x00000000}, | ||
226 | {.addr = 0x00418cfc, .prod = 0xff00a725, .disable = 0x00000000}, | ||
227 | {.addr = 0x00418d78, .prod = 0x10940000, .disable = 0x00000000}, | ||
228 | {.addr = 0x00418d7c, .prod = 0xff00a725, .disable = 0x00000000}, | ||
229 | {.addr = 0x00418f14, .prod = 0x10940000, .disable = 0x00000000}, | ||
230 | {.addr = 0x00418f18, .prod = 0xff00a725, .disable = 0x00000000}, | ||
231 | {.addr = 0x00418e14, .prod = 0x10940000, .disable = 0x00000000}, | ||
232 | {.addr = 0x00418e18, .prod = 0xff00a725, .disable = 0x00000000}, | ||
233 | {.addr = 0x00419030, .prod = 0x10940000, .disable = 0x00000000}, | ||
234 | {.addr = 0x00419050, .prod = 0xff00a725, .disable = 0x00000000}, | ||
235 | {.addr = 0x00419a88, .prod = 0x10940000, .disable = 0x00000000}, | ||
236 | {.addr = 0x00419a8c, .prod = 0xff00a725, .disable = 0x00000000}, | ||
237 | {.addr = 0x00419a90, .prod = 0x10940000, .disable = 0x00000000}, | ||
238 | {.addr = 0x00419a94, .prod = 0xff00a725, .disable = 0x00000000}, | ||
239 | {.addr = 0x00419a98, .prod = 0x10940000, .disable = 0x00000000}, | ||
240 | {.addr = 0x00419a9c, .prod = 0xff00a725, .disable = 0x00000000}, | ||
241 | {.addr = 0x00419aa0, .prod = 0x10940000, .disable = 0x00000000}, | ||
242 | {.addr = 0x00419aa4, .prod = 0xff00a725, .disable = 0x00000000}, | ||
243 | {.addr = 0x00419ad4, .prod = 0x10940000, .disable = 0x00000000}, | ||
244 | {.addr = 0x00419ad8, .prod = 0xff00a725, .disable = 0x00000000}, | ||
245 | {.addr = 0x00419870, .prod = 0x10940000, .disable = 0x00000000}, | ||
246 | {.addr = 0x00419874, .prod = 0xff00a725, .disable = 0x00000000}, | ||
247 | {.addr = 0x00419ce4, .prod = 0x10940000, .disable = 0x00000000}, | ||
248 | {.addr = 0x00419cf0, .prod = 0xff00a725, .disable = 0x00000000}, | ||
249 | {.addr = 0x00419c78, .prod = 0x10940000, .disable = 0x00000000}, | ||
250 | {.addr = 0x00419c7c, .prod = 0xff00a725, .disable = 0x00000000}, | ||
251 | {.addr = 0x00419fa0, .prod = 0x10940000, .disable = 0x00000000}, | ||
252 | {.addr = 0x00419fa4, .prod = 0xff00a725, .disable = 0x00000000}, | ||
253 | {.addr = 0x00419fa8, .prod = 0x10940000, .disable = 0x00000000}, | ||
254 | {.addr = 0x00419fac, .prod = 0xff00a725, .disable = 0x00000000}, | ||
255 | {.addr = 0x00419fb0, .prod = 0x10940000, .disable = 0x00000000}, | ||
256 | {.addr = 0x00419fb4, .prod = 0xff00a725, .disable = 0x00000000}, | ||
257 | {.addr = 0x00419fb8, .prod = 0x10940000, .disable = 0x00000000}, | ||
258 | {.addr = 0x00419fbc, .prod = 0xff00a725, .disable = 0x00000000}, | ||
259 | {.addr = 0x00419fc0, .prod = 0x10940000, .disable = 0x00000000}, | ||
260 | {.addr = 0x00419fc4, .prod = 0xff00a725, .disable = 0x00000000}, | ||
261 | {.addr = 0x00419fc8, .prod = 0x10940000, .disable = 0x00000000}, | ||
262 | {.addr = 0x00419fcc, .prod = 0xff00a725, .disable = 0x00000000}, | ||
263 | {.addr = 0x0041be30, .prod = 0x10940000, .disable = 0x00000000}, | ||
264 | {.addr = 0x0041be34, .prod = 0xff00a725, .disable = 0x00000000}, | ||
265 | {.addr = 0x0041bff0, .prod = 0x10747c00, .disable = 0x00000000}, | ||
266 | {.addr = 0x0041bff4, .prod = 0xff00000a, .disable = 0x00000000}, | ||
267 | {.addr = 0x0041bed8, .prod = 0x10240a00, .disable = 0x00000000}, | ||
268 | {.addr = 0x0041bee0, .prod = 0xff00000a, .disable = 0x00000000}, | ||
269 | {.addr = 0x00408820, .prod = 0x10940000, .disable = 0x00000000}, | ||
270 | {.addr = 0x00408824, .prod = 0xff00a725, .disable = 0x00000000}, | ||
271 | {.addr = 0x00408828, .prod = 0x10940000, .disable = 0x00000000}, | ||
272 | {.addr = 0x0040882c, .prod = 0xff00a725, .disable = 0x00000000}, | ||
273 | {.addr = 0x00408ac0, .prod = 0x10940000, .disable = 0x00000000}, | ||
274 | {.addr = 0x00408ac4, .prod = 0xff00a725, .disable = 0x00000000}, | ||
275 | {.addr = 0x00408ac8, .prod = 0x10940000, .disable = 0x00000000}, | ||
276 | {.addr = 0x00408acc, .prod = 0xff00a725, .disable = 0x00000000}, | ||
277 | {.addr = 0x00408ad0, .prod = 0x10940000, .disable = 0x00000000}, | ||
278 | {.addr = 0x00408ad4, .prod = 0xff00a725, .disable = 0x00000000}, | ||
279 | {.addr = 0x00408ad8, .prod = 0x10940000, .disable = 0x00000000}, | ||
280 | {.addr = 0x00408adc, .prod = 0xff00a725, .disable = 0x00000000}, | ||
281 | {.addr = 0x00408ae0, .prod = 0x10940000, .disable = 0x00000000}, | ||
282 | {.addr = 0x00408ae4, .prod = 0xff00a725, .disable = 0x00000000}, | ||
283 | {.addr = 0x00408ae8, .prod = 0x10940000, .disable = 0x00000000}, | ||
284 | {.addr = 0x00408aec, .prod = 0xff00a725, .disable = 0x00000000}, | ||
285 | {.addr = 0x004089c0, .prod = 0x10940000, .disable = 0x00000000}, | ||
286 | {.addr = 0x004089c4, .prod = 0xff00a725, .disable = 0x00000000}, | ||
287 | {.addr = 0x004089c8, .prod = 0x10940000, .disable = 0x00000000}, | ||
288 | {.addr = 0x004089cc, .prod = 0xff00a725, .disable = 0x00000000}, | ||
289 | {.addr = 0x004089d0, .prod = 0x10940000, .disable = 0x00000000}, | ||
290 | {.addr = 0x004089d4, .prod = 0xff00a725, .disable = 0x00000000}, | ||
291 | }; | ||
292 | |||
293 | /* therm gr */ | ||
294 | const struct gating_desc gk20a_slcg_therm[] = { | ||
295 | {.addr = 0x000206b8, .prod = 0x00000000, .disable = 0x0000000f}, | ||
296 | }; | ||
297 | |||
298 | /* static inline functions */ | ||
299 | void gr_gk20a_slcg_gr_load_gating_prod(struct gk20a *g, | ||
300 | bool prod) | ||
301 | { | ||
302 | u32 i; | ||
303 | u32 size = sizeof(gk20a_slcg_gr) / sizeof(struct gating_desc); | ||
304 | for (i = 0; i < size; i++) { | ||
305 | if (prod) | ||
306 | gk20a_writel(g, gk20a_slcg_gr[i].addr, | ||
307 | gk20a_slcg_gr[i].prod); | ||
308 | else | ||
309 | gk20a_writel(g, gk20a_slcg_gr[i].addr, | ||
310 | gk20a_slcg_gr[i].disable); | ||
311 | } | ||
312 | } | ||
313 | |||
314 | void gr_gk20a_slcg_perf_load_gating_prod(struct gk20a *g, | ||
315 | bool prod) | ||
316 | { | ||
317 | u32 i; | ||
318 | u32 size = sizeof(gk20a_slcg_perf) / sizeof(struct gating_desc); | ||
319 | for (i = 0; i < size; i++) { | ||
320 | if (prod) | ||
321 | gk20a_writel(g, gk20a_slcg_perf[i].addr, | ||
322 | gk20a_slcg_perf[i].prod); | ||
323 | else | ||
324 | gk20a_writel(g, gk20a_slcg_perf[i].addr, | ||
325 | gk20a_slcg_perf[i].disable); | ||
326 | } | ||
327 | } | ||
328 | |||
329 | void gr_gk20a_blcg_gr_load_gating_prod(struct gk20a *g, | ||
330 | bool prod) | ||
331 | { | ||
332 | u32 i; | ||
333 | u32 size = sizeof(gk20a_blcg_gr) / sizeof(struct gating_desc); | ||
334 | for (i = 0; i < size; i++) { | ||
335 | if (prod) | ||
336 | gk20a_writel(g, gk20a_blcg_gr[i].addr, | ||
337 | gk20a_blcg_gr[i].prod); | ||
338 | else | ||
339 | gk20a_writel(g, gk20a_blcg_gr[i].addr, | ||
340 | gk20a_blcg_gr[i].disable); | ||
341 | } | ||
342 | } | ||
343 | |||
344 | void gr_gk20a_pg_gr_load_gating_prod(struct gk20a *g, | ||
345 | bool prod) | ||
346 | { | ||
347 | u32 i; | ||
348 | u32 size = sizeof(gk20a_pg_gr) / sizeof(struct gating_desc); | ||
349 | for (i = 0; i < size; i++) { | ||
350 | if (prod) | ||
351 | gk20a_writel(g, gk20a_pg_gr[i].addr, | ||
352 | gk20a_pg_gr[i].prod); | ||
353 | else | ||
354 | gk20a_writel(g, gk20a_pg_gr[i].addr, | ||
355 | gk20a_pg_gr[i].disable); | ||
356 | } | ||
357 | } | ||
358 | |||
359 | void gr_gk20a_slcg_therm_load_gating_prod(struct gk20a *g, | ||
360 | bool prod) | ||
361 | { | ||
362 | u32 i; | ||
363 | u32 size = sizeof(gk20a_slcg_therm) / sizeof(struct gating_desc); | ||
364 | for (i = 0; i < size; i++) { | ||
365 | if (prod) | ||
366 | gk20a_writel(g, gk20a_slcg_therm[i].addr, | ||
367 | gk20a_slcg_therm[i].prod); | ||
368 | else | ||
369 | gk20a_writel(g, gk20a_slcg_therm[i].addr, | ||
370 | gk20a_slcg_therm[i].disable); | ||
371 | } | ||
372 | } | ||
373 | |||
374 | #endif /* __gk20a_gating_reglist_h__ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h new file mode 100644 index 00000000..40a6c545 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/gk20a_gating_reglist.h | ||
3 | * | ||
4 | * Copyright (c) 2012, NVIDIA Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License along | ||
16 | * with this program; if not, write to the Free Software Foundation, Inc., | ||
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
18 | * | ||
19 | * This file is autogenerated. Do not edit. | ||
20 | */ | ||
21 | |||
22 | #include "gk20a.h" | ||
23 | |||
24 | void gr_gk20a_slcg_gr_load_gating_prod(struct gk20a *g, | ||
25 | bool prod); | ||
26 | |||
27 | void gr_gk20a_slcg_perf_load_gating_prod(struct gk20a *g, | ||
28 | bool prod); | ||
29 | |||
30 | void gr_gk20a_blcg_gr_load_gating_prod(struct gk20a *g, | ||
31 | bool prod); | ||
32 | |||
33 | void gr_gk20a_pg_gr_load_gating_prod(struct gk20a *g, | ||
34 | bool prod); | ||
35 | |||
36 | void gr_gk20a_slcg_therm_load_gating_prod(struct gk20a *g, | ||
37 | bool prod); | ||
38 | |||
39 | |||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_scale.c b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c new file mode 100644 index 00000000..d1fd71fe --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c | |||
@@ -0,0 +1,358 @@ | |||
1 | /* | ||
2 | * gk20a clock scaling profile | ||
3 | * | ||
4 | * Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/devfreq.h> | ||
20 | #include <linux/debugfs.h> | ||
21 | #include <linux/types.h> | ||
22 | #include <linux/clk.h> | ||
23 | #include <linux/export.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/clk/tegra.h> | ||
26 | #include <linux/tegra-soc.h> | ||
27 | #include <linux/platform_data/tegra_edp.h> | ||
28 | #include <linux/pm_qos.h> | ||
29 | |||
30 | #include <governor.h> | ||
31 | |||
32 | #include "gk20a.h" | ||
33 | #include "pmu_gk20a.h" | ||
34 | #include "clk_gk20a.h" | ||
35 | #include "gk20a_scale.h" | ||
36 | |||
37 | static ssize_t gk20a_scale_load_show(struct device *dev, | ||
38 | struct device_attribute *attr, | ||
39 | char *buf) | ||
40 | { | ||
41 | struct platform_device *pdev = to_platform_device(dev); | ||
42 | struct gk20a *g = get_gk20a(pdev); | ||
43 | u32 busy_time; | ||
44 | ssize_t res; | ||
45 | |||
46 | if (!g->power_on) { | ||
47 | busy_time = 0; | ||
48 | } else { | ||
49 | gk20a_busy(g->dev); | ||
50 | gk20a_pmu_load_norm(g, &busy_time); | ||
51 | gk20a_idle(g->dev); | ||
52 | } | ||
53 | |||
54 | res = snprintf(buf, PAGE_SIZE, "%u\n", busy_time); | ||
55 | |||
56 | return res; | ||
57 | } | ||
58 | |||
59 | static DEVICE_ATTR(load, S_IRUGO, gk20a_scale_load_show, NULL); | ||
60 | |||
61 | /* | ||
62 | * gk20a_scale_qos_notify() | ||
63 | * | ||
64 | * This function is called when the minimum QoS requirement for the device | ||
65 | * has changed. The function calls postscaling callback if it is defined. | ||
66 | */ | ||
67 | |||
68 | static int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
69 | unsigned long n, void *p) | ||
70 | { | ||
71 | struct gk20a_scale_profile *profile = | ||
72 | container_of(nb, struct gk20a_scale_profile, | ||
73 | qos_notify_block); | ||
74 | struct gk20a_platform *platform = platform_get_drvdata(profile->pdev); | ||
75 | struct gk20a *g = get_gk20a(profile->pdev); | ||
76 | unsigned long freq; | ||
77 | |||
78 | if (!platform->postscale) | ||
79 | return NOTIFY_OK; | ||
80 | |||
81 | /* get the frequency requirement. if devfreq is enabled, check if it | ||
82 | * has higher demand than qos */ | ||
83 | freq = gk20a_clk_round_rate(g, pm_qos_request(platform->qos_id)); | ||
84 | if (g->devfreq) | ||
85 | freq = max(g->devfreq->previous_freq, freq); | ||
86 | |||
87 | platform->postscale(profile->pdev, freq); | ||
88 | |||
89 | return NOTIFY_OK; | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | * gk20a_scale_make_freq_table(profile) | ||
94 | * | ||
95 | * This function initialises the frequency table for the given device profile | ||
96 | */ | ||
97 | |||
98 | static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile) | ||
99 | { | ||
100 | struct gk20a *g = get_gk20a(profile->pdev); | ||
101 | unsigned long *freqs; | ||
102 | int num_freqs, err; | ||
103 | |||
104 | /* make sure the clock is available */ | ||
105 | if (!gk20a_clk_get(g)) | ||
106 | return -ENOSYS; | ||
107 | |||
108 | /* get gpu dvfs table */ | ||
109 | err = tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk), | ||
110 | &freqs, &num_freqs); | ||
111 | if (err) | ||
112 | return -ENOSYS; | ||
113 | |||
114 | profile->devfreq_profile.freq_table = (unsigned long *)freqs; | ||
115 | profile->devfreq_profile.max_state = num_freqs; | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | /* | ||
121 | * gk20a_scale_target(dev, *freq, flags) | ||
122 | * | ||
123 | * This function scales the clock | ||
124 | */ | ||
125 | |||
126 | static int gk20a_scale_target(struct device *dev, unsigned long *freq, | ||
127 | u32 flags) | ||
128 | { | ||
129 | struct gk20a *g = get_gk20a(to_platform_device(dev)); | ||
130 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
131 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
132 | unsigned long rounded_rate = gk20a_clk_round_rate(g, *freq); | ||
133 | |||
134 | if (gk20a_clk_get_rate(g) == rounded_rate) { | ||
135 | *freq = rounded_rate; | ||
136 | return 0; | ||
137 | } | ||
138 | |||
139 | gk20a_clk_set_rate(g, rounded_rate); | ||
140 | if (platform->postscale) | ||
141 | platform->postscale(profile->pdev, rounded_rate); | ||
142 | *freq = gk20a_clk_get_rate(g); | ||
143 | |||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * update_load_estimate_gpmu(profile) | ||
149 | * | ||
150 | * Update load estimate using gpmu. The gpmu value is normalised | ||
151 | * based on the time it was asked last time. | ||
152 | */ | ||
153 | |||
154 | static void update_load_estimate_gpmu(struct platform_device *pdev) | ||
155 | { | ||
156 | struct gk20a *g = get_gk20a(pdev); | ||
157 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
158 | unsigned long dt; | ||
159 | u32 busy_time; | ||
160 | ktime_t t; | ||
161 | |||
162 | t = ktime_get(); | ||
163 | dt = ktime_us_delta(t, profile->last_event_time); | ||
164 | |||
165 | profile->dev_stat.total_time = dt; | ||
166 | profile->last_event_time = t; | ||
167 | gk20a_pmu_load_norm(g, &busy_time); | ||
168 | profile->dev_stat.busy_time = (busy_time * dt) / 1000; | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | * gk20a_scale_suspend(pdev) | ||
173 | * | ||
174 | * This function informs devfreq of suspend | ||
175 | */ | ||
176 | |||
177 | void gk20a_scale_suspend(struct platform_device *pdev) | ||
178 | { | ||
179 | struct gk20a *g = get_gk20a(pdev); | ||
180 | struct devfreq *devfreq = g->devfreq; | ||
181 | |||
182 | if (!devfreq) | ||
183 | return; | ||
184 | |||
185 | devfreq_suspend_device(devfreq); | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * gk20a_scale_resume(pdev) | ||
190 | * | ||
191 | * This functions informs devfreq of resume | ||
192 | */ | ||
193 | |||
194 | void gk20a_scale_resume(struct platform_device *pdev) | ||
195 | { | ||
196 | struct gk20a *g = get_gk20a(pdev); | ||
197 | struct devfreq *devfreq = g->devfreq; | ||
198 | |||
199 | if (!devfreq) | ||
200 | return; | ||
201 | |||
202 | devfreq_resume_device(devfreq); | ||
203 | } | ||
204 | |||
205 | /* | ||
206 | * gk20a_scale_notify(pdev, busy) | ||
207 | * | ||
208 | * Calling this function informs that the device is idling (..or busy). This | ||
209 | * data is used to estimate the current load | ||
210 | */ | ||
211 | |||
212 | static void gk20a_scale_notify(struct platform_device *pdev, bool busy) | ||
213 | { | ||
214 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | ||
215 | struct gk20a *g = get_gk20a(pdev); | ||
216 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
217 | struct devfreq *devfreq = g->devfreq; | ||
218 | |||
219 | /* inform edp about new constraint */ | ||
220 | if (platform->prescale) | ||
221 | platform->prescale(pdev); | ||
222 | |||
223 | /* Is the device profile initialised? */ | ||
224 | if (!(profile && devfreq)) | ||
225 | return; | ||
226 | |||
227 | mutex_lock(&devfreq->lock); | ||
228 | profile->dev_stat.busy = busy; | ||
229 | update_devfreq(devfreq); | ||
230 | mutex_unlock(&devfreq->lock); | ||
231 | } | ||
232 | |||
233 | void gk20a_scale_notify_idle(struct platform_device *pdev) | ||
234 | { | ||
235 | gk20a_scale_notify(pdev, false); | ||
236 | |||
237 | } | ||
238 | |||
239 | void gk20a_scale_notify_busy(struct platform_device *pdev) | ||
240 | { | ||
241 | gk20a_scale_notify(pdev, true); | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * gk20a_scale_get_dev_status(dev, *stat) | ||
246 | * | ||
247 | * This function queries the current device status. | ||
248 | */ | ||
249 | |||
250 | static int gk20a_scale_get_dev_status(struct device *dev, | ||
251 | struct devfreq_dev_status *stat) | ||
252 | { | ||
253 | struct gk20a *g = get_gk20a(to_platform_device(dev)); | ||
254 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
255 | |||
256 | /* Make sure there are correct values for the current frequency */ | ||
257 | profile->dev_stat.current_frequency = gk20a_clk_get_rate(g); | ||
258 | |||
259 | /* Update load estimate */ | ||
260 | update_load_estimate_gpmu(to_platform_device(dev)); | ||
261 | |||
262 | /* Copy the contents of the current device status */ | ||
263 | *stat = profile->dev_stat; | ||
264 | |||
265 | /* Finally, clear out the local values */ | ||
266 | profile->dev_stat.total_time = 0; | ||
267 | profile->dev_stat.busy_time = 0; | ||
268 | |||
269 | return 0; | ||
270 | } | ||
271 | |||
272 | /* | ||
273 | * gk20a_scale_init(pdev) | ||
274 | */ | ||
275 | |||
276 | void gk20a_scale_init(struct platform_device *pdev) | ||
277 | { | ||
278 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | ||
279 | struct gk20a *g = platform->g; | ||
280 | struct gk20a_scale_profile *profile; | ||
281 | int err; | ||
282 | |||
283 | if (g->scale_profile) | ||
284 | return; | ||
285 | |||
286 | profile = kzalloc(sizeof(*profile), GFP_KERNEL); | ||
287 | |||
288 | profile->pdev = pdev; | ||
289 | profile->dev_stat.busy = false; | ||
290 | |||
291 | /* Create frequency table */ | ||
292 | err = gk20a_scale_make_freq_table(profile); | ||
293 | if (err || !profile->devfreq_profile.max_state) | ||
294 | goto err_get_freqs; | ||
295 | |||
296 | if (device_create_file(&pdev->dev, &dev_attr_load)) | ||
297 | goto err_create_sysfs_entry; | ||
298 | |||
299 | /* Store device profile so we can access it if devfreq governor | ||
300 | * init needs that */ | ||
301 | g->scale_profile = profile; | ||
302 | |||
303 | if (platform->devfreq_governor) { | ||
304 | struct devfreq *devfreq; | ||
305 | |||
306 | profile->devfreq_profile.initial_freq = | ||
307 | profile->devfreq_profile.freq_table[0]; | ||
308 | profile->devfreq_profile.target = gk20a_scale_target; | ||
309 | profile->devfreq_profile.get_dev_status = | ||
310 | gk20a_scale_get_dev_status; | ||
311 | |||
312 | devfreq = devfreq_add_device(&pdev->dev, | ||
313 | &profile->devfreq_profile, | ||
314 | platform->devfreq_governor, NULL); | ||
315 | |||
316 | if (IS_ERR(devfreq)) | ||
317 | devfreq = NULL; | ||
318 | |||
319 | g->devfreq = devfreq; | ||
320 | } | ||
321 | |||
322 | /* Should we register QoS callback for this device? */ | ||
323 | if (platform->qos_id < PM_QOS_NUM_CLASSES && | ||
324 | platform->qos_id != PM_QOS_RESERVED && | ||
325 | platform->postscale) { | ||
326 | profile->qos_notify_block.notifier_call = | ||
327 | &gk20a_scale_qos_notify; | ||
328 | pm_qos_add_notifier(platform->qos_id, | ||
329 | &profile->qos_notify_block); | ||
330 | } | ||
331 | |||
332 | return; | ||
333 | |||
334 | err_get_freqs: | ||
335 | device_remove_file(&pdev->dev, &dev_attr_load); | ||
336 | err_create_sysfs_entry: | ||
337 | kfree(g->scale_profile); | ||
338 | g->scale_profile = NULL; | ||
339 | } | ||
340 | |||
341 | /* | ||
342 | * gk20a_scale_hw_init(dev) | ||
343 | * | ||
344 | * Initialize hardware portion of the device | ||
345 | */ | ||
346 | |||
347 | void gk20a_scale_hw_init(struct platform_device *pdev) | ||
348 | { | ||
349 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | ||
350 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
351 | |||
352 | /* make sure that scaling has bee initialised */ | ||
353 | if (!profile) | ||
354 | return; | ||
355 | |||
356 | profile->dev_stat.total_time = 0; | ||
357 | profile->last_event_time = ktime_get(); | ||
358 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_scale.h b/drivers/gpu/nvgpu/gk20a/gk20a_scale.h new file mode 100644 index 00000000..e76b1662 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_scale.h | |||
@@ -0,0 +1,51 @@ | |||
1 | /* | ||
2 | * gk20a clock scaling profile | ||
3 | * | ||
4 | * Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef GK20A_SCALE_H | ||
20 | #define GK20A_SCALE_H | ||
21 | |||
22 | #include <linux/nvhost.h> | ||
23 | #include <linux/devfreq.h> | ||
24 | |||
25 | struct platform_device; | ||
26 | struct clk; | ||
27 | |||
28 | struct gk20a_scale_profile { | ||
29 | struct platform_device *pdev; | ||
30 | ktime_t last_event_time; | ||
31 | struct devfreq_dev_profile devfreq_profile; | ||
32 | struct devfreq_dev_status dev_stat; | ||
33 | struct notifier_block qos_notify_block; | ||
34 | void *private_data; | ||
35 | }; | ||
36 | |||
37 | /* Initialization and de-initialization for module */ | ||
38 | void gk20a_scale_init(struct platform_device *); | ||
39 | void gk20a_scale_hw_init(struct platform_device *pdev); | ||
40 | |||
41 | /* | ||
42 | * call when performing submit to notify scaling mechanism that the module is | ||
43 | * in use | ||
44 | */ | ||
45 | void gk20a_scale_notify_busy(struct platform_device *); | ||
46 | void gk20a_scale_notify_idle(struct platform_device *); | ||
47 | |||
48 | void gk20a_scale_suspend(struct platform_device *); | ||
49 | void gk20a_scale_resume(struct platform_device *); | ||
50 | |||
51 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c new file mode 100644 index 00000000..f6b43f50 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c | |||
@@ -0,0 +1,335 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/gk20a_sysfs.c | ||
3 | * | ||
4 | * GK20A Graphics | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include <linux/platform_device.h> | ||
22 | #include <linux/pm_runtime.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/fb.h> | ||
25 | |||
26 | #include <mach/clk.h> | ||
27 | |||
28 | #include "gk20a.h" | ||
29 | #include "gr_gk20a.h" | ||
30 | #include "fifo_gk20a.h" | ||
31 | |||
32 | |||
33 | #define PTIMER_FP_FACTOR 1000000 | ||
34 | /* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. 32 ns is | ||
35 | the resolution of ptimer. */ | ||
36 | #define PTIMER_REF_FREQ_HZ 31250000 | ||
37 | |||
38 | |||
39 | static ssize_t elcg_enable_store(struct device *device, | ||
40 | struct device_attribute *attr, const char *buf, size_t count) | ||
41 | { | ||
42 | struct platform_device *ndev = to_platform_device(device); | ||
43 | struct gk20a *g = get_gk20a(ndev); | ||
44 | unsigned long val = 0; | ||
45 | |||
46 | if (kstrtoul(buf, 10, &val) < 0) | ||
47 | return -EINVAL; | ||
48 | |||
49 | gk20a_busy(g->dev); | ||
50 | if (val) { | ||
51 | g->elcg_enabled = true; | ||
52 | gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A); | ||
53 | gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A); | ||
54 | } else { | ||
55 | g->elcg_enabled = false; | ||
56 | gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A); | ||
57 | gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A); | ||
58 | } | ||
59 | gk20a_idle(g->dev); | ||
60 | |||
61 | dev_info(device, "ELCG is %s.\n", g->elcg_enabled ? "enabled" : | ||
62 | "disabled"); | ||
63 | |||
64 | return count; | ||
65 | } | ||
66 | |||
67 | static ssize_t elcg_enable_read(struct device *device, | ||
68 | struct device_attribute *attr, char *buf) | ||
69 | { | ||
70 | struct platform_device *ndev = to_platform_device(device); | ||
71 | struct gk20a *g = get_gk20a(ndev); | ||
72 | |||
73 | return sprintf(buf, "%d\n", g->elcg_enabled ? 1 : 0); | ||
74 | } | ||
75 | |||
76 | static DEVICE_ATTR(elcg_enable, S_IRWXUGO, elcg_enable_read, elcg_enable_store); | ||
77 | |||
78 | static ssize_t blcg_enable_store(struct device *device, | ||
79 | struct device_attribute *attr, const char *buf, size_t count) | ||
80 | { | ||
81 | struct platform_device *ndev = to_platform_device(device); | ||
82 | struct gk20a *g = get_gk20a(ndev); | ||
83 | unsigned long val = 0; | ||
84 | |||
85 | if (kstrtoul(buf, 10, &val) < 0) | ||
86 | return -EINVAL; | ||
87 | |||
88 | if (val) | ||
89 | g->blcg_enabled = true; | ||
90 | else | ||
91 | g->blcg_enabled = false; | ||
92 | |||
93 | gk20a_busy(g->dev); | ||
94 | g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled); | ||
95 | gk20a_idle(g->dev); | ||
96 | |||
97 | dev_info(device, "BLCG is %s.\n", g->blcg_enabled ? "enabled" : | ||
98 | "disabled"); | ||
99 | |||
100 | return count; | ||
101 | } | ||
102 | |||
103 | static ssize_t blcg_enable_read(struct device *device, | ||
104 | struct device_attribute *attr, char *buf) | ||
105 | { | ||
106 | struct platform_device *ndev = to_platform_device(device); | ||
107 | struct gk20a *g = get_gk20a(ndev); | ||
108 | |||
109 | return sprintf(buf, "%d\n", g->blcg_enabled ? 1 : 0); | ||
110 | } | ||
111 | |||
112 | static DEVICE_ATTR(blcg_enable, S_IRWXUGO, blcg_enable_read, blcg_enable_store); | ||
113 | |||
114 | static ssize_t slcg_enable_store(struct device *device, | ||
115 | struct device_attribute *attr, const char *buf, size_t count) | ||
116 | { | ||
117 | struct platform_device *ndev = to_platform_device(device); | ||
118 | struct gk20a *g = get_gk20a(ndev); | ||
119 | unsigned long val = 0; | ||
120 | |||
121 | if (kstrtoul(buf, 10, &val) < 0) | ||
122 | return -EINVAL; | ||
123 | |||
124 | if (val) | ||
125 | g->slcg_enabled = true; | ||
126 | else | ||
127 | g->slcg_enabled = false; | ||
128 | |||
129 | /* | ||
130 | * TODO: slcg_therm_load_gating is not enabled anywhere during | ||
131 | * init. Therefore, it would be incongruous to add it here. Once | ||
132 | * it is added to init, we should add it here too. | ||
133 | */ | ||
134 | gk20a_busy(g->dev); | ||
135 | g->ops.clock_gating.slcg_gr_load_gating_prod(g, g->slcg_enabled); | ||
136 | g->ops.clock_gating.slcg_perf_load_gating_prod(g, g->slcg_enabled); | ||
137 | gk20a_idle(g->dev); | ||
138 | |||
139 | dev_info(device, "SLCG is %s.\n", g->slcg_enabled ? "enabled" : | ||
140 | "disabled"); | ||
141 | |||
142 | return count; | ||
143 | } | ||
144 | |||
145 | static ssize_t slcg_enable_read(struct device *device, | ||
146 | struct device_attribute *attr, char *buf) | ||
147 | { | ||
148 | struct platform_device *ndev = to_platform_device(device); | ||
149 | struct gk20a *g = get_gk20a(ndev); | ||
150 | |||
151 | return sprintf(buf, "%d\n", g->slcg_enabled ? 1 : 0); | ||
152 | } | ||
153 | |||
154 | static DEVICE_ATTR(slcg_enable, S_IRWXUGO, slcg_enable_read, slcg_enable_store); | ||
155 | |||
156 | static ssize_t ptimer_scale_factor_show(struct device *dev, | ||
157 | struct device_attribute *attr, | ||
158 | char *buf) | ||
159 | { | ||
160 | u32 tsc_freq_hz = clk_get_rate(clk_get_sys(NULL, "clk_m")); | ||
161 | u32 scaling_factor_fp = (u32)(PTIMER_REF_FREQ_HZ) / | ||
162 | ((u32)(tsc_freq_hz) / | ||
163 | (u32)(PTIMER_FP_FACTOR)); | ||
164 | ssize_t res = snprintf(buf, | ||
165 | PAGE_SIZE, | ||
166 | "%u.%u\n", | ||
167 | scaling_factor_fp / PTIMER_FP_FACTOR, | ||
168 | scaling_factor_fp % PTIMER_FP_FACTOR); | ||
169 | |||
170 | return res; | ||
171 | } | ||
172 | |||
173 | static DEVICE_ATTR(ptimer_scale_factor, | ||
174 | S_IRUGO, | ||
175 | ptimer_scale_factor_show, | ||
176 | NULL); | ||
177 | |||
178 | static ssize_t railgate_delay_store(struct device *dev, | ||
179 | struct device_attribute *attr, | ||
180 | const char *buf, size_t count) | ||
181 | { | ||
182 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
183 | int railgate_delay = 0, ret = 0; | ||
184 | |||
185 | if (!platform->can_railgate) { | ||
186 | dev_info(dev, "does not support power-gating\n"); | ||
187 | return count; | ||
188 | } | ||
189 | |||
190 | ret = sscanf(buf, "%d", &railgate_delay); | ||
191 | if (ret == 1 && railgate_delay >= 0) { | ||
192 | struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain); | ||
193 | platform->railgate_delay = railgate_delay; | ||
194 | pm_genpd_set_poweroff_delay(genpd, platform->railgate_delay); | ||
195 | } else | ||
196 | dev_err(dev, "Invalid powergate delay\n"); | ||
197 | |||
198 | return count; | ||
199 | } | ||
200 | static ssize_t railgate_delay_show(struct device *dev, | ||
201 | struct device_attribute *attr, char *buf) | ||
202 | { | ||
203 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
204 | return snprintf(buf, PAGE_SIZE, "%d\n", platform->railgate_delay); | ||
205 | } | ||
206 | static DEVICE_ATTR(railgate_delay, S_IRWXUGO, railgate_delay_show, | ||
207 | railgate_delay_store); | ||
208 | |||
209 | static ssize_t clockgate_delay_store(struct device *dev, | ||
210 | struct device_attribute *attr, | ||
211 | const char *buf, size_t count) | ||
212 | { | ||
213 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
214 | int clockgate_delay = 0, ret = 0; | ||
215 | |||
216 | ret = sscanf(buf, "%d", &clockgate_delay); | ||
217 | if (ret == 1 && clockgate_delay >= 0) { | ||
218 | platform->clockgate_delay = clockgate_delay; | ||
219 | pm_runtime_set_autosuspend_delay(dev, | ||
220 | platform->clockgate_delay); | ||
221 | } else | ||
222 | dev_err(dev, "Invalid clockgate delay\n"); | ||
223 | |||
224 | return count; | ||
225 | } | ||
226 | static ssize_t clockgate_delay_show(struct device *dev, | ||
227 | struct device_attribute *attr, char *buf) | ||
228 | { | ||
229 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
230 | return snprintf(buf, PAGE_SIZE, "%d\n", platform->clockgate_delay); | ||
231 | } | ||
232 | static DEVICE_ATTR(clockgate_delay, S_IRWXUGO, clockgate_delay_show, | ||
233 | clockgate_delay_store); | ||
234 | |||
235 | static ssize_t counters_show(struct device *dev, | ||
236 | struct device_attribute *attr, char *buf) | ||
237 | { | ||
238 | struct platform_device *pdev = to_platform_device(dev); | ||
239 | struct gk20a *g = get_gk20a(pdev); | ||
240 | u32 busy_cycles, total_cycles; | ||
241 | ssize_t res; | ||
242 | |||
243 | gk20a_pmu_get_load_counters(g, &busy_cycles, &total_cycles); | ||
244 | |||
245 | res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles); | ||
246 | |||
247 | return res; | ||
248 | } | ||
249 | |||
250 | static DEVICE_ATTR(counters, S_IRUGO, counters_show, NULL); | ||
251 | static ssize_t counters_show_reset(struct device *dev, | ||
252 | struct device_attribute *attr, char *buf) | ||
253 | { | ||
254 | ssize_t res = counters_show(dev, attr, buf); | ||
255 | struct platform_device *pdev = to_platform_device(dev); | ||
256 | struct gk20a *g = get_gk20a(pdev); | ||
257 | |||
258 | gk20a_pmu_reset_load_counters(g); | ||
259 | |||
260 | return res; | ||
261 | } | ||
262 | |||
263 | static DEVICE_ATTR(counters_reset, S_IRUGO, counters_show_reset, NULL); | ||
264 | |||
265 | static ssize_t elpg_enable_store(struct device *device, | ||
266 | struct device_attribute *attr, const char *buf, size_t count) | ||
267 | { | ||
268 | struct platform_device *ndev = to_platform_device(device); | ||
269 | struct gk20a *g = get_gk20a(ndev); | ||
270 | unsigned long val = 0; | ||
271 | |||
272 | if (kstrtoul(buf, 10, &val) < 0) | ||
273 | return -EINVAL; | ||
274 | |||
275 | /* | ||
276 | * Since elpg is refcounted, we should not unnecessarily call | ||
277 | * enable/disable if it is already so. | ||
278 | */ | ||
279 | gk20a_channel_busy(g->dev); | ||
280 | if (val && !g->elpg_enabled) { | ||
281 | g->elpg_enabled = true; | ||
282 | gk20a_pmu_enable_elpg(g); | ||
283 | } else if (!val && g->elpg_enabled) { | ||
284 | g->elpg_enabled = false; | ||
285 | gk20a_pmu_disable_elpg(g); | ||
286 | } | ||
287 | gk20a_channel_idle(g->dev); | ||
288 | |||
289 | dev_info(device, "ELPG is %s.\n", g->elpg_enabled ? "enabled" : | ||
290 | "disabled"); | ||
291 | |||
292 | return count; | ||
293 | } | ||
294 | |||
295 | static ssize_t elpg_enable_read(struct device *device, | ||
296 | struct device_attribute *attr, char *buf) | ||
297 | { | ||
298 | struct platform_device *ndev = to_platform_device(device); | ||
299 | struct gk20a *g = get_gk20a(ndev); | ||
300 | |||
301 | return sprintf(buf, "%d\n", g->elpg_enabled ? 1 : 0); | ||
302 | } | ||
303 | |||
304 | static DEVICE_ATTR(elpg_enable, S_IRWXUGO, elpg_enable_read, elpg_enable_store); | ||
305 | |||
306 | void gk20a_remove_sysfs(struct device *dev) | ||
307 | { | ||
308 | device_remove_file(dev, &dev_attr_elcg_enable); | ||
309 | device_remove_file(dev, &dev_attr_blcg_enable); | ||
310 | device_remove_file(dev, &dev_attr_slcg_enable); | ||
311 | device_remove_file(dev, &dev_attr_ptimer_scale_factor); | ||
312 | device_remove_file(dev, &dev_attr_elpg_enable); | ||
313 | device_remove_file(dev, &dev_attr_counters); | ||
314 | device_remove_file(dev, &dev_attr_counters_reset); | ||
315 | device_remove_file(dev, &dev_attr_railgate_delay); | ||
316 | device_remove_file(dev, &dev_attr_clockgate_delay); | ||
317 | } | ||
318 | |||
319 | void gk20a_create_sysfs(struct platform_device *dev) | ||
320 | { | ||
321 | int error = 0; | ||
322 | |||
323 | error |= device_create_file(&dev->dev, &dev_attr_elcg_enable); | ||
324 | error |= device_create_file(&dev->dev, &dev_attr_blcg_enable); | ||
325 | error |= device_create_file(&dev->dev, &dev_attr_slcg_enable); | ||
326 | error |= device_create_file(&dev->dev, &dev_attr_ptimer_scale_factor); | ||
327 | error |= device_create_file(&dev->dev, &dev_attr_elpg_enable); | ||
328 | error |= device_create_file(&dev->dev, &dev_attr_counters); | ||
329 | error |= device_create_file(&dev->dev, &dev_attr_counters_reset); | ||
330 | error |= device_create_file(&dev->dev, &dev_attr_railgate_delay); | ||
331 | error |= device_create_file(&dev->dev, &dev_attr_clockgate_delay); | ||
332 | |||
333 | if (error) | ||
334 | dev_err(&dev->dev, "Failed to create sysfs attributes!\n"); | ||
335 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c new file mode 100644 index 00000000..59404f1d --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c | |||
@@ -0,0 +1,333 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/gr_ctx_gk20a.c | ||
3 | * | ||
4 | * GK20A Graphics Context | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | |||
22 | #include <linux/firmware.h> | ||
23 | |||
24 | #include "gk20a.h" | ||
25 | #include "gr_ctx_gk20a.h" | ||
26 | #include "hw_gr_gk20a.h" | ||
27 | |||
28 | static int gr_gk20a_alloc_load_netlist_u32(u32 *src, u32 len, | ||
29 | struct u32_list_gk20a *u32_list) | ||
30 | { | ||
31 | u32_list->count = (len + sizeof(u32) - 1) / sizeof(u32); | ||
32 | if (!alloc_u32_list_gk20a(u32_list)) | ||
33 | return -ENOMEM; | ||
34 | |||
35 | memcpy(u32_list->l, src, len); | ||
36 | |||
37 | return 0; | ||
38 | } | ||
39 | |||
40 | static int gr_gk20a_alloc_load_netlist_av(u32 *src, u32 len, | ||
41 | struct av_list_gk20a *av_list) | ||
42 | { | ||
43 | av_list->count = len / sizeof(struct av_gk20a); | ||
44 | if (!alloc_av_list_gk20a(av_list)) | ||
45 | return -ENOMEM; | ||
46 | |||
47 | memcpy(av_list->l, src, len); | ||
48 | |||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | static int gr_gk20a_alloc_load_netlist_aiv(u32 *src, u32 len, | ||
53 | struct aiv_list_gk20a *aiv_list) | ||
54 | { | ||
55 | aiv_list->count = len / sizeof(struct aiv_gk20a); | ||
56 | if (!alloc_aiv_list_gk20a(aiv_list)) | ||
57 | return -ENOMEM; | ||
58 | |||
59 | memcpy(aiv_list->l, src, len); | ||
60 | |||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | static int gr_gk20a_get_netlist_name(int index, char *name) | ||
65 | { | ||
66 | switch (index) { | ||
67 | #ifdef GK20A_NETLIST_IMAGE_FW_NAME | ||
68 | case NETLIST_FINAL: | ||
69 | sprintf(name, GK20A_NETLIST_IMAGE_FW_NAME); | ||
70 | return 0; | ||
71 | #endif | ||
72 | #ifdef GK20A_NETLIST_IMAGE_A | ||
73 | case NETLIST_SLOT_A: | ||
74 | sprintf(name, GK20A_NETLIST_IMAGE_A); | ||
75 | return 0; | ||
76 | #endif | ||
77 | #ifdef GK20A_NETLIST_IMAGE_B | ||
78 | case NETLIST_SLOT_B: | ||
79 | sprintf(name, GK20A_NETLIST_IMAGE_B); | ||
80 | return 0; | ||
81 | #endif | ||
82 | #ifdef GK20A_NETLIST_IMAGE_C | ||
83 | case NETLIST_SLOT_C: | ||
84 | sprintf(name, GK20A_NETLIST_IMAGE_C); | ||
85 | return 0; | ||
86 | #endif | ||
87 | #ifdef GK20A_NETLIST_IMAGE_D | ||
88 | case NETLIST_SLOT_D: | ||
89 | sprintf(name, GK20A_NETLIST_IMAGE_D); | ||
90 | return 0; | ||
91 | #endif | ||
92 | default: | ||
93 | return -1; | ||
94 | } | ||
95 | |||
96 | return -1; | ||
97 | } | ||
98 | |||
99 | static int gr_gk20a_init_ctx_vars_fw(struct gk20a *g, struct gr_gk20a *gr) | ||
100 | { | ||
101 | struct device *d = dev_from_gk20a(g); | ||
102 | const struct firmware *netlist_fw; | ||
103 | struct netlist_image *netlist = NULL; | ||
104 | char name[MAX_NETLIST_NAME]; | ||
105 | u32 i, major_v = ~0, major_v_hw, netlist_num; | ||
106 | int net, max, err = -ENOENT; | ||
107 | |||
108 | gk20a_dbg_fn(""); | ||
109 | |||
110 | #ifdef GK20A_NETLIST_IMAGE_FW_NAME | ||
111 | net = NETLIST_FINAL; | ||
112 | max = 0; | ||
113 | major_v_hw = ~0; | ||
114 | g->gr.ctx_vars.dynamic = false; | ||
115 | #else | ||
116 | net = NETLIST_SLOT_A; | ||
117 | max = MAX_NETLIST; | ||
118 | major_v_hw = gk20a_readl(g, gr_fecs_ctx_state_store_major_rev_id_r()); | ||
119 | g->gr.ctx_vars.dynamic = true; | ||
120 | #endif | ||
121 | |||
122 | for (; net < max; net++) { | ||
123 | |||
124 | if (gr_gk20a_get_netlist_name(net, name) != 0) { | ||
125 | gk20a_warn(d, "invalid netlist index %d", net); | ||
126 | continue; | ||
127 | } | ||
128 | |||
129 | netlist_fw = gk20a_request_firmware(g, name); | ||
130 | if (!netlist_fw) { | ||
131 | gk20a_warn(d, "failed to load netlist %s", name); | ||
132 | continue; | ||
133 | } | ||
134 | |||
135 | netlist = (struct netlist_image *)netlist_fw->data; | ||
136 | |||
137 | for (i = 0; i < netlist->header.regions; i++) { | ||
138 | u32 *src = (u32 *)((u8 *)netlist + netlist->regions[i].data_offset); | ||
139 | u32 size = netlist->regions[i].data_size; | ||
140 | |||
141 | switch (netlist->regions[i].region_id) { | ||
142 | case NETLIST_REGIONID_FECS_UCODE_DATA: | ||
143 | gk20a_dbg_info("NETLIST_REGIONID_FECS_UCODE_DATA"); | ||
144 | err = gr_gk20a_alloc_load_netlist_u32( | ||
145 | src, size, &g->gr.ctx_vars.ucode.fecs.data); | ||
146 | if (err) | ||
147 | goto clean_up; | ||
148 | break; | ||
149 | case NETLIST_REGIONID_FECS_UCODE_INST: | ||
150 | gk20a_dbg_info("NETLIST_REGIONID_FECS_UCODE_INST"); | ||
151 | err = gr_gk20a_alloc_load_netlist_u32( | ||
152 | src, size, &g->gr.ctx_vars.ucode.fecs.inst); | ||
153 | if (err) | ||
154 | goto clean_up; | ||
155 | break; | ||
156 | case NETLIST_REGIONID_GPCCS_UCODE_DATA: | ||
157 | gk20a_dbg_info("NETLIST_REGIONID_GPCCS_UCODE_DATA"); | ||
158 | err = gr_gk20a_alloc_load_netlist_u32( | ||
159 | src, size, &g->gr.ctx_vars.ucode.gpccs.data); | ||
160 | if (err) | ||
161 | goto clean_up; | ||
162 | break; | ||
163 | case NETLIST_REGIONID_GPCCS_UCODE_INST: | ||
164 | gk20a_dbg_info("NETLIST_REGIONID_GPCCS_UCODE_INST"); | ||
165 | err = gr_gk20a_alloc_load_netlist_u32( | ||
166 | src, size, &g->gr.ctx_vars.ucode.gpccs.inst); | ||
167 | if (err) | ||
168 | goto clean_up; | ||
169 | break; | ||
170 | case NETLIST_REGIONID_SW_BUNDLE_INIT: | ||
171 | gk20a_dbg_info("NETLIST_REGIONID_SW_BUNDLE_INIT"); | ||
172 | err = gr_gk20a_alloc_load_netlist_av( | ||
173 | src, size, &g->gr.ctx_vars.sw_bundle_init); | ||
174 | if (err) | ||
175 | goto clean_up; | ||
176 | break; | ||
177 | case NETLIST_REGIONID_SW_METHOD_INIT: | ||
178 | gk20a_dbg_info("NETLIST_REGIONID_SW_METHOD_INIT"); | ||
179 | err = gr_gk20a_alloc_load_netlist_av( | ||
180 | src, size, &g->gr.ctx_vars.sw_method_init); | ||
181 | if (err) | ||
182 | goto clean_up; | ||
183 | break; | ||
184 | case NETLIST_REGIONID_SW_CTX_LOAD: | ||
185 | gk20a_dbg_info("NETLIST_REGIONID_SW_CTX_LOAD"); | ||
186 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
187 | src, size, &g->gr.ctx_vars.sw_ctx_load); | ||
188 | if (err) | ||
189 | goto clean_up; | ||
190 | break; | ||
191 | case NETLIST_REGIONID_SW_NON_CTX_LOAD: | ||
192 | gk20a_dbg_info("NETLIST_REGIONID_SW_NON_CTX_LOAD"); | ||
193 | err = gr_gk20a_alloc_load_netlist_av( | ||
194 | src, size, &g->gr.ctx_vars.sw_non_ctx_load); | ||
195 | if (err) | ||
196 | goto clean_up; | ||
197 | break; | ||
198 | case NETLIST_REGIONID_CTXREG_SYS: | ||
199 | gk20a_dbg_info("NETLIST_REGIONID_CTXREG_SYS"); | ||
200 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
201 | src, size, &g->gr.ctx_vars.ctxsw_regs.sys); | ||
202 | if (err) | ||
203 | goto clean_up; | ||
204 | break; | ||
205 | case NETLIST_REGIONID_CTXREG_GPC: | ||
206 | gk20a_dbg_info("NETLIST_REGIONID_CTXREG_GPC"); | ||
207 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
208 | src, size, &g->gr.ctx_vars.ctxsw_regs.gpc); | ||
209 | if (err) | ||
210 | goto clean_up; | ||
211 | break; | ||
212 | case NETLIST_REGIONID_CTXREG_TPC: | ||
213 | gk20a_dbg_info("NETLIST_REGIONID_CTXREG_TPC"); | ||
214 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
215 | src, size, &g->gr.ctx_vars.ctxsw_regs.tpc); | ||
216 | if (err) | ||
217 | goto clean_up; | ||
218 | break; | ||
219 | case NETLIST_REGIONID_CTXREG_ZCULL_GPC: | ||
220 | gk20a_dbg_info("NETLIST_REGIONID_CTXREG_ZCULL_GPC"); | ||
221 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
222 | src, size, &g->gr.ctx_vars.ctxsw_regs.zcull_gpc); | ||
223 | if (err) | ||
224 | goto clean_up; | ||
225 | break; | ||
226 | case NETLIST_REGIONID_CTXREG_PPC: | ||
227 | gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PPC"); | ||
228 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
229 | src, size, &g->gr.ctx_vars.ctxsw_regs.ppc); | ||
230 | if (err) | ||
231 | goto clean_up; | ||
232 | break; | ||
233 | case NETLIST_REGIONID_CTXREG_PM_SYS: | ||
234 | gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PM_SYS"); | ||
235 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
236 | src, size, &g->gr.ctx_vars.ctxsw_regs.pm_sys); | ||
237 | if (err) | ||
238 | goto clean_up; | ||
239 | break; | ||
240 | case NETLIST_REGIONID_CTXREG_PM_GPC: | ||
241 | gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PM_GPC"); | ||
242 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
243 | src, size, &g->gr.ctx_vars.ctxsw_regs.pm_gpc); | ||
244 | if (err) | ||
245 | goto clean_up; | ||
246 | break; | ||
247 | case NETLIST_REGIONID_CTXREG_PM_TPC: | ||
248 | gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PM_TPC"); | ||
249 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
250 | src, size, &g->gr.ctx_vars.ctxsw_regs.pm_tpc); | ||
251 | if (err) | ||
252 | goto clean_up; | ||
253 | break; | ||
254 | case NETLIST_REGIONID_BUFFER_SIZE: | ||
255 | g->gr.ctx_vars.buffer_size = *src; | ||
256 | gk20a_dbg_info("NETLIST_REGIONID_BUFFER_SIZE : %d", | ||
257 | g->gr.ctx_vars.buffer_size); | ||
258 | break; | ||
259 | case NETLIST_REGIONID_CTXSW_REG_BASE_INDEX: | ||
260 | g->gr.ctx_vars.regs_base_index = *src; | ||
261 | gk20a_dbg_info("NETLIST_REGIONID_CTXSW_REG_BASE_INDEX : %d", | ||
262 | g->gr.ctx_vars.regs_base_index); | ||
263 | break; | ||
264 | case NETLIST_REGIONID_MAJORV: | ||
265 | major_v = *src; | ||
266 | gk20a_dbg_info("NETLIST_REGIONID_MAJORV : %d", | ||
267 | major_v); | ||
268 | break; | ||
269 | case NETLIST_REGIONID_NETLIST_NUM: | ||
270 | netlist_num = *src; | ||
271 | gk20a_dbg_info("NETLIST_REGIONID_NETLIST_NUM : %d", | ||
272 | netlist_num); | ||
273 | break; | ||
274 | case NETLIST_REGIONID_CTXREG_PMPPC: | ||
275 | gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMPPC skipped"); | ||
276 | break; | ||
277 | default: | ||
278 | gk20a_warn(d, "unrecognized region %d skipped", i); | ||
279 | break; | ||
280 | } | ||
281 | } | ||
282 | |||
283 | if (net != NETLIST_FINAL && major_v != major_v_hw) { | ||
284 | gk20a_dbg_info("skip %s: major_v 0x%08x doesn't match hw 0x%08x", | ||
285 | name, major_v, major_v_hw); | ||
286 | goto clean_up; | ||
287 | } | ||
288 | |||
289 | g->gr.ctx_vars.valid = true; | ||
290 | g->gr.netlist = net; | ||
291 | |||
292 | release_firmware(netlist_fw); | ||
293 | gk20a_dbg_fn("done"); | ||
294 | goto done; | ||
295 | |||
296 | clean_up: | ||
297 | kfree(g->gr.ctx_vars.ucode.fecs.inst.l); | ||
298 | kfree(g->gr.ctx_vars.ucode.fecs.data.l); | ||
299 | kfree(g->gr.ctx_vars.ucode.gpccs.inst.l); | ||
300 | kfree(g->gr.ctx_vars.ucode.gpccs.data.l); | ||
301 | kfree(g->gr.ctx_vars.sw_bundle_init.l); | ||
302 | kfree(g->gr.ctx_vars.sw_method_init.l); | ||
303 | kfree(g->gr.ctx_vars.sw_ctx_load.l); | ||
304 | kfree(g->gr.ctx_vars.sw_non_ctx_load.l); | ||
305 | kfree(g->gr.ctx_vars.ctxsw_regs.sys.l); | ||
306 | kfree(g->gr.ctx_vars.ctxsw_regs.gpc.l); | ||
307 | kfree(g->gr.ctx_vars.ctxsw_regs.tpc.l); | ||
308 | kfree(g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l); | ||
309 | kfree(g->gr.ctx_vars.ctxsw_regs.ppc.l); | ||
310 | kfree(g->gr.ctx_vars.ctxsw_regs.pm_sys.l); | ||
311 | kfree(g->gr.ctx_vars.ctxsw_regs.pm_gpc.l); | ||
312 | kfree(g->gr.ctx_vars.ctxsw_regs.pm_tpc.l); | ||
313 | release_firmware(netlist_fw); | ||
314 | err = -ENOENT; | ||
315 | } | ||
316 | |||
317 | done: | ||
318 | if (g->gr.ctx_vars.valid) { | ||
319 | gk20a_dbg_info("netlist image %s loaded", name); | ||
320 | return 0; | ||
321 | } else { | ||
322 | gk20a_err(d, "failed to load netlist image!!"); | ||
323 | return err; | ||
324 | } | ||
325 | } | ||
326 | |||
327 | int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr) | ||
328 | { | ||
329 | if (tegra_platform_is_linsim()) | ||
330 | return gr_gk20a_init_ctx_vars_sim(g, gr); | ||
331 | else | ||
332 | return gr_gk20a_init_ctx_vars_fw(g, gr); | ||
333 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h new file mode 100644 index 00000000..909a166a --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h | |||
@@ -0,0 +1,149 @@ | |||
1 | /* | ||
2 | * GK20A Graphics Context | ||
3 | * | ||
4 | * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | #ifndef __GR_CTX_GK20A_H__ | ||
19 | #define __GR_CTX_GK20A_H__ | ||
20 | |||
21 | |||
22 | /* production netlist, one and only one from below */ | ||
23 | /*#undef GK20A_NETLIST_IMAGE_FW_NAME*/ | ||
24 | #define GK20A_NETLIST_IMAGE_FW_NAME GK20A_NETLIST_IMAGE_B | ||
25 | /* emulation netlists, match majorV with HW */ | ||
26 | #define GK20A_NETLIST_IMAGE_A "NETA_img.bin" | ||
27 | #define GK20A_NETLIST_IMAGE_B "NETB_img.bin" | ||
28 | #define GK20A_NETLIST_IMAGE_C "NETC_img.bin" | ||
29 | #define GK20A_NETLIST_IMAGE_D "NETD_img.bin" | ||
30 | |||
31 | union __max_name { | ||
32 | #ifdef GK20A_NETLIST_IMAGE_A | ||
33 | char __name_a[sizeof(GK20A_NETLIST_IMAGE_A)]; | ||
34 | #endif | ||
35 | #ifdef GK20A_NETLIST_IMAGE_B | ||
36 | char __name_b[sizeof(GK20A_NETLIST_IMAGE_B)]; | ||
37 | #endif | ||
38 | #ifdef GK20A_NETLIST_IMAGE_C | ||
39 | char __name_c[sizeof(GK20A_NETLIST_IMAGE_C)]; | ||
40 | #endif | ||
41 | #ifdef GK20A_NETLIST_IMAGE_D | ||
42 | char __name_d[sizeof(GK20A_NETLIST_IMAGE_D)]; | ||
43 | #endif | ||
44 | }; | ||
45 | |||
46 | #define MAX_NETLIST_NAME sizeof(union __max_name) | ||
47 | |||
48 | /* index for emulation netlists */ | ||
49 | #define NETLIST_FINAL -1 | ||
50 | #define NETLIST_SLOT_A 0 | ||
51 | #define NETLIST_SLOT_B 1 | ||
52 | #define NETLIST_SLOT_C 2 | ||
53 | #define NETLIST_SLOT_D 3 | ||
54 | #define MAX_NETLIST 4 | ||
55 | |||
56 | /* netlist regions */ | ||
57 | #define NETLIST_REGIONID_FECS_UCODE_DATA 0 | ||
58 | #define NETLIST_REGIONID_FECS_UCODE_INST 1 | ||
59 | #define NETLIST_REGIONID_GPCCS_UCODE_DATA 2 | ||
60 | #define NETLIST_REGIONID_GPCCS_UCODE_INST 3 | ||
61 | #define NETLIST_REGIONID_SW_BUNDLE_INIT 4 | ||
62 | #define NETLIST_REGIONID_SW_CTX_LOAD 5 | ||
63 | #define NETLIST_REGIONID_SW_NON_CTX_LOAD 6 | ||
64 | #define NETLIST_REGIONID_SW_METHOD_INIT 7 | ||
65 | #define NETLIST_REGIONID_CTXREG_SYS 8 | ||
66 | #define NETLIST_REGIONID_CTXREG_GPC 9 | ||
67 | #define NETLIST_REGIONID_CTXREG_TPC 10 | ||
68 | #define NETLIST_REGIONID_CTXREG_ZCULL_GPC 11 | ||
69 | #define NETLIST_REGIONID_CTXREG_PM_SYS 12 | ||
70 | #define NETLIST_REGIONID_CTXREG_PM_GPC 13 | ||
71 | #define NETLIST_REGIONID_CTXREG_PM_TPC 14 | ||
72 | #define NETLIST_REGIONID_MAJORV 15 | ||
73 | #define NETLIST_REGIONID_BUFFER_SIZE 16 | ||
74 | #define NETLIST_REGIONID_CTXSW_REG_BASE_INDEX 17 | ||
75 | #define NETLIST_REGIONID_NETLIST_NUM 18 | ||
76 | #define NETLIST_REGIONID_CTXREG_PPC 19 | ||
77 | #define NETLIST_REGIONID_CTXREG_PMPPC 20 | ||
78 | |||
79 | struct netlist_region { | ||
80 | u32 region_id; | ||
81 | u32 data_size; | ||
82 | u32 data_offset; | ||
83 | }; | ||
84 | |||
85 | struct netlist_image_header { | ||
86 | u32 version; | ||
87 | u32 regions; | ||
88 | }; | ||
89 | |||
90 | struct netlist_image { | ||
91 | struct netlist_image_header header; | ||
92 | struct netlist_region regions[1]; | ||
93 | }; | ||
94 | |||
95 | struct av_gk20a { | ||
96 | u32 addr; | ||
97 | u32 value; | ||
98 | }; | ||
99 | struct aiv_gk20a { | ||
100 | u32 addr; | ||
101 | u32 index; | ||
102 | u32 value; | ||
103 | }; | ||
104 | struct aiv_list_gk20a { | ||
105 | struct aiv_gk20a *l; | ||
106 | u32 count; | ||
107 | }; | ||
108 | struct av_list_gk20a { | ||
109 | struct av_gk20a *l; | ||
110 | u32 count; | ||
111 | }; | ||
112 | struct u32_list_gk20a { | ||
113 | u32 *l; | ||
114 | u32 count; | ||
115 | }; | ||
116 | |||
117 | static inline | ||
118 | struct av_gk20a *alloc_av_list_gk20a(struct av_list_gk20a *avl) | ||
119 | { | ||
120 | avl->l = kzalloc(avl->count * sizeof(*avl->l), GFP_KERNEL); | ||
121 | return avl->l; | ||
122 | } | ||
123 | |||
124 | static inline | ||
125 | struct aiv_gk20a *alloc_aiv_list_gk20a(struct aiv_list_gk20a *aivl) | ||
126 | { | ||
127 | aivl->l = kzalloc(aivl->count * sizeof(*aivl->l), GFP_KERNEL); | ||
128 | return aivl->l; | ||
129 | } | ||
130 | |||
131 | static inline | ||
132 | u32 *alloc_u32_list_gk20a(struct u32_list_gk20a *u32l) | ||
133 | { | ||
134 | u32l->l = kzalloc(u32l->count * sizeof(*u32l->l), GFP_KERNEL); | ||
135 | return u32l->l; | ||
136 | } | ||
137 | |||
138 | struct gr_ucode_gk20a { | ||
139 | struct { | ||
140 | struct u32_list_gk20a inst; | ||
141 | struct u32_list_gk20a data; | ||
142 | } gpccs, fecs; | ||
143 | }; | ||
144 | |||
145 | /* main entry for grctx loading */ | ||
146 | int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr); | ||
147 | int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr); | ||
148 | |||
149 | #endif /*__GR_CTX_GK20A_H__*/ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c new file mode 100644 index 00000000..12bba1fd --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c | |||
@@ -0,0 +1,256 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/gr_ctx_sim_gk20a.c | ||
3 | * | ||
4 | * GK20A Graphics Context for Simulation | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | |||
22 | #include "gk20a.h" | ||
23 | #include "gr_ctx_gk20a.h" | ||
24 | |||
25 | int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr) | ||
26 | { | ||
27 | int err = 0; | ||
28 | u32 i, temp; | ||
29 | char *size_path = NULL; | ||
30 | char *reg_path = NULL; | ||
31 | char *value_path = NULL; | ||
32 | |||
33 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, | ||
34 | "querying grctx info from chiplib"); | ||
35 | |||
36 | g->gr.ctx_vars.dynamic = true; | ||
37 | g->gr.netlist = GR_NETLIST_DYNAMIC; | ||
38 | |||
39 | /* query sizes and counts */ | ||
40 | gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS_COUNT", 0, | ||
41 | &g->gr.ctx_vars.ucode.fecs.inst.count); | ||
42 | gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS_COUNT", 0, | ||
43 | &g->gr.ctx_vars.ucode.fecs.data.count); | ||
44 | gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS_COUNT", 0, | ||
45 | &g->gr.ctx_vars.ucode.gpccs.inst.count); | ||
46 | gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS_COUNT", 0, | ||
47 | &g->gr.ctx_vars.ucode.gpccs.data.count); | ||
48 | gk20a_sim_esc_readl(g, "GRCTX_ALL_CTX_TOTAL_WORDS", 0, &temp); | ||
49 | g->gr.ctx_vars.buffer_size = temp << 2; | ||
50 | gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT_SIZE", 0, | ||
51 | &g->gr.ctx_vars.sw_bundle_init.count); | ||
52 | gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT_SIZE", 0, | ||
53 | &g->gr.ctx_vars.sw_method_init.count); | ||
54 | gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD_SIZE", 0, | ||
55 | &g->gr.ctx_vars.sw_ctx_load.count); | ||
56 | |||
57 | switch (0) { /*g->gr.ctx_vars.reg_init_override)*/ | ||
58 | #if 0 | ||
59 | case NV_REG_STR_RM_GR_REG_INIT_OVERRIDE_PROD_DIFF: | ||
60 | sizePath = "GRCTX_NONCTXSW_PROD_DIFF_REG_SIZE"; | ||
61 | regPath = "GRCTX_NONCTXSW_PROD_DIFF_REG:REG"; | ||
62 | valuePath = "GRCTX_NONCTXSW_PROD_DIFF_REG:VALUE"; | ||
63 | break; | ||
64 | #endif | ||
65 | default: | ||
66 | size_path = "GRCTX_NONCTXSW_REG_SIZE"; | ||
67 | reg_path = "GRCTX_NONCTXSW_REG:REG"; | ||
68 | value_path = "GRCTX_NONCTXSW_REG:VALUE"; | ||
69 | break; | ||
70 | } | ||
71 | |||
72 | gk20a_sim_esc_readl(g, size_path, 0, | ||
73 | &g->gr.ctx_vars.sw_non_ctx_load.count); | ||
74 | |||
75 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS_COUNT", 0, | ||
76 | &g->gr.ctx_vars.ctxsw_regs.sys.count); | ||
77 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC_COUNT", 0, | ||
78 | &g->gr.ctx_vars.ctxsw_regs.gpc.count); | ||
79 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC_COUNT", 0, | ||
80 | &g->gr.ctx_vars.ctxsw_regs.tpc.count); | ||
81 | #if 0 | ||
82 | /* looks to be unused, actually chokes the sim */ | ||
83 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0, | ||
84 | &g->gr.ctx_vars.ctxsw_regs.ppc.count); | ||
85 | #endif | ||
86 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC_COUNT", 0, | ||
87 | &g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count); | ||
88 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS_COUNT", 0, | ||
89 | &g->gr.ctx_vars.ctxsw_regs.pm_sys.count); | ||
90 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC_COUNT", 0, | ||
91 | &g->gr.ctx_vars.ctxsw_regs.pm_gpc.count); | ||
92 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC_COUNT", 0, | ||
93 | &g->gr.ctx_vars.ctxsw_regs.pm_tpc.count); | ||
94 | |||
95 | err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.inst); | ||
96 | err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.data); | ||
97 | err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.inst); | ||
98 | err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.data); | ||
99 | err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_bundle_init); | ||
100 | err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_method_init); | ||
101 | err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.sw_ctx_load); | ||
102 | err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_non_ctx_load); | ||
103 | err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.sys); | ||
104 | err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.gpc); | ||
105 | err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.tpc); | ||
106 | err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.zcull_gpc); | ||
107 | err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.ppc); | ||
108 | err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_sys); | ||
109 | err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_gpc); | ||
110 | err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_tpc); | ||
111 | |||
112 | if (err) | ||
113 | goto fail; | ||
114 | |||
115 | for (i = 0; i < g->gr.ctx_vars.ucode.fecs.inst.count; i++) | ||
116 | gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS", | ||
117 | i, &g->gr.ctx_vars.ucode.fecs.inst.l[i]); | ||
118 | |||
119 | for (i = 0; i < g->gr.ctx_vars.ucode.fecs.data.count; i++) | ||
120 | gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS", | ||
121 | i, &g->gr.ctx_vars.ucode.fecs.data.l[i]); | ||
122 | |||
123 | for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.inst.count; i++) | ||
124 | gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS", | ||
125 | i, &g->gr.ctx_vars.ucode.gpccs.inst.l[i]); | ||
126 | |||
127 | for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.data.count; i++) | ||
128 | gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS", | ||
129 | i, &g->gr.ctx_vars.ucode.gpccs.data.l[i]); | ||
130 | |||
131 | for (i = 0; i < g->gr.ctx_vars.sw_bundle_init.count; i++) { | ||
132 | struct av_gk20a *l = g->gr.ctx_vars.sw_bundle_init.l; | ||
133 | gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:ADDR", | ||
134 | i, &l[i].addr); | ||
135 | gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:VALUE", | ||
136 | i, &l[i].value); | ||
137 | } | ||
138 | |||
139 | for (i = 0; i < g->gr.ctx_vars.sw_method_init.count; i++) { | ||
140 | struct av_gk20a *l = g->gr.ctx_vars.sw_method_init.l; | ||
141 | gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:ADDR", | ||
142 | i, &l[i].addr); | ||
143 | gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:VALUE", | ||
144 | i, &l[i].value); | ||
145 | } | ||
146 | |||
147 | for (i = 0; i < g->gr.ctx_vars.sw_ctx_load.count; i++) { | ||
148 | struct aiv_gk20a *l = g->gr.ctx_vars.sw_ctx_load.l; | ||
149 | gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:ADDR", | ||
150 | i, &l[i].addr); | ||
151 | gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:INDEX", | ||
152 | i, &l[i].index); | ||
153 | gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:VALUE", | ||
154 | i, &l[i].value); | ||
155 | } | ||
156 | |||
157 | for (i = 0; i < g->gr.ctx_vars.sw_non_ctx_load.count; i++) { | ||
158 | struct av_gk20a *l = g->gr.ctx_vars.sw_non_ctx_load.l; | ||
159 | gk20a_sim_esc_readl(g, reg_path, i, &l[i].addr); | ||
160 | gk20a_sim_esc_readl(g, value_path, i, &l[i].value); | ||
161 | } | ||
162 | |||
163 | for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) { | ||
164 | struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.sys.l; | ||
165 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:ADDR", | ||
166 | i, &l[i].addr); | ||
167 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:INDEX", | ||
168 | i, &l[i].index); | ||
169 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:VALUE", | ||
170 | i, &l[i].value); | ||
171 | } | ||
172 | |||
173 | for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) { | ||
174 | struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.gpc.l; | ||
175 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:ADDR", | ||
176 | i, &l[i].addr); | ||
177 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:INDEX", | ||
178 | i, &l[i].index); | ||
179 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:VALUE", | ||
180 | i, &l[i].value); | ||
181 | } | ||
182 | |||
183 | for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) { | ||
184 | struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.tpc.l; | ||
185 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:ADDR", | ||
186 | i, &l[i].addr); | ||
187 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:INDEX", | ||
188 | i, &l[i].index); | ||
189 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:VALUE", | ||
190 | i, &l[i].value); | ||
191 | } | ||
192 | |||
193 | for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) { | ||
194 | struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.ppc.l; | ||
195 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:ADDR", | ||
196 | i, &l[i].addr); | ||
197 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:INDEX", | ||
198 | i, &l[i].index); | ||
199 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:VALUE", | ||
200 | i, &l[i].value); | ||
201 | } | ||
202 | |||
203 | for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count; i++) { | ||
204 | struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l; | ||
205 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:ADDR", | ||
206 | i, &l[i].addr); | ||
207 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:INDEX", | ||
208 | i, &l[i].index); | ||
209 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:VALUE", | ||
210 | i, &l[i].value); | ||
211 | } | ||
212 | |||
213 | for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_sys.count; i++) { | ||
214 | struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_sys.l; | ||
215 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:ADDR", | ||
216 | i, &l[i].addr); | ||
217 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:INDEX", | ||
218 | i, &l[i].index); | ||
219 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:VALUE", | ||
220 | i, &l[i].value); | ||
221 | } | ||
222 | |||
223 | for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_gpc.count; i++) { | ||
224 | struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_gpc.l; | ||
225 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:ADDR", | ||
226 | i, &l[i].addr); | ||
227 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:INDEX", | ||
228 | i, &l[i].index); | ||
229 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:VALUE", | ||
230 | i, &l[i].value); | ||
231 | } | ||
232 | |||
233 | for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_tpc.count; i++) { | ||
234 | struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_tpc.l; | ||
235 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:ADDR", | ||
236 | i, &l[i].addr); | ||
237 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:INDEX", | ||
238 | i, &l[i].index); | ||
239 | gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:VALUE", | ||
240 | i, &l[i].value); | ||
241 | } | ||
242 | |||
243 | g->gr.ctx_vars.valid = true; | ||
244 | |||
245 | gk20a_sim_esc_readl(g, "GRCTX_GEN_CTX_REGS_BASE_INDEX", 0, | ||
246 | &g->gr.ctx_vars.regs_base_index); | ||
247 | |||
248 | gk20a_dbg(gpu_dbg_info | gpu_dbg_fn, "finished querying grctx info from chiplib"); | ||
249 | return 0; | ||
250 | fail: | ||
251 | gk20a_err(dev_from_gk20a(g), | ||
252 | "failed querying grctx info from chiplib"); | ||
253 | return err; | ||
254 | |||
255 | } | ||
256 | |||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c new file mode 100644 index 00000000..0f93940b --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -0,0 +1,6747 @@ | |||
1 | /* | ||
2 | * GK20A Graphics | ||
3 | * | ||
4 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License along with | ||
16 | * this program; if not, write to the Free Software Foundation, Inc., | ||
17 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
18 | */ | ||
19 | |||
20 | #include <linux/delay.h> /* for udelay */ | ||
21 | #include <linux/mm.h> /* for totalram_pages */ | ||
22 | #include <linux/scatterlist.h> | ||
23 | #include <linux/tegra-soc.h> | ||
24 | #include <linux/nvhost_dbg_gpu_ioctl.h> | ||
25 | #include <linux/vmalloc.h> | ||
26 | #include <linux/dma-mapping.h> | ||
27 | #include <linux/firmware.h> | ||
28 | #include <linux/nvhost.h> | ||
29 | |||
30 | #include "gk20a.h" | ||
31 | #include "kind_gk20a.h" | ||
32 | #include "gr_ctx_gk20a.h" | ||
33 | |||
34 | #include "hw_ccsr_gk20a.h" | ||
35 | #include "hw_ctxsw_prog_gk20a.h" | ||
36 | #include "hw_fifo_gk20a.h" | ||
37 | #include "hw_gr_gk20a.h" | ||
38 | #include "hw_gmmu_gk20a.h" | ||
39 | #include "hw_mc_gk20a.h" | ||
40 | #include "hw_ram_gk20a.h" | ||
41 | #include "hw_pri_ringmaster_gk20a.h" | ||
42 | #include "hw_pri_ringstation_sys_gk20a.h" | ||
43 | #include "hw_pri_ringstation_gpc_gk20a.h" | ||
44 | #include "hw_pri_ringstation_fbp_gk20a.h" | ||
45 | #include "hw_proj_gk20a.h" | ||
46 | #include "hw_top_gk20a.h" | ||
47 | #include "hw_ltc_gk20a.h" | ||
48 | #include "hw_fb_gk20a.h" | ||
49 | #include "hw_therm_gk20a.h" | ||
50 | #include "hw_pbdma_gk20a.h" | ||
51 | #include "gr_pri_gk20a.h" | ||
52 | #include "regops_gk20a.h" | ||
53 | #include "dbg_gpu_gk20a.h" | ||
54 | |||
55 | #define BLK_SIZE (256) | ||
56 | |||
57 | static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va); | ||
58 | |||
59 | /* global ctx buffer */ | ||
60 | static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g); | ||
61 | static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g); | ||
62 | static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | ||
63 | struct channel_gk20a *c); | ||
64 | static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c); | ||
65 | |||
66 | /* channel gr ctx buffer */ | ||
67 | static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, | ||
68 | struct channel_gk20a *c); | ||
69 | static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c); | ||
70 | |||
71 | /* channel patch ctx buffer */ | ||
72 | static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, | ||
73 | struct channel_gk20a *c); | ||
74 | static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c); | ||
75 | |||
76 | /* golden ctx image */ | ||
77 | static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | ||
78 | struct channel_gk20a *c); | ||
79 | static int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | ||
80 | struct channel_gk20a *c); | ||
81 | |||
82 | void gk20a_fecs_dump_falcon_stats(struct gk20a *g) | ||
83 | { | ||
84 | int i; | ||
85 | |||
86 | gk20a_err(dev_from_gk20a(g), "gr_fecs_os_r : %d", | ||
87 | gk20a_readl(g, gr_fecs_os_r())); | ||
88 | gk20a_err(dev_from_gk20a(g), "gr_fecs_cpuctl_r : 0x%x", | ||
89 | gk20a_readl(g, gr_fecs_cpuctl_r())); | ||
90 | gk20a_err(dev_from_gk20a(g), "gr_fecs_idlestate_r : 0x%x", | ||
91 | gk20a_readl(g, gr_fecs_idlestate_r())); | ||
92 | gk20a_err(dev_from_gk20a(g), "gr_fecs_mailbox0_r : 0x%x", | ||
93 | gk20a_readl(g, gr_fecs_mailbox0_r())); | ||
94 | gk20a_err(dev_from_gk20a(g), "gr_fecs_mailbox1_r : 0x%x", | ||
95 | gk20a_readl(g, gr_fecs_mailbox1_r())); | ||
96 | gk20a_err(dev_from_gk20a(g), "gr_fecs_irqstat_r : 0x%x", | ||
97 | gk20a_readl(g, gr_fecs_irqstat_r())); | ||
98 | gk20a_err(dev_from_gk20a(g), "gr_fecs_irqmode_r : 0x%x", | ||
99 | gk20a_readl(g, gr_fecs_irqmode_r())); | ||
100 | gk20a_err(dev_from_gk20a(g), "gr_fecs_irqmask_r : 0x%x", | ||
101 | gk20a_readl(g, gr_fecs_irqmask_r())); | ||
102 | gk20a_err(dev_from_gk20a(g), "gr_fecs_irqdest_r : 0x%x", | ||
103 | gk20a_readl(g, gr_fecs_irqdest_r())); | ||
104 | gk20a_err(dev_from_gk20a(g), "gr_fecs_debug1_r : 0x%x", | ||
105 | gk20a_readl(g, gr_fecs_debug1_r())); | ||
106 | gk20a_err(dev_from_gk20a(g), "gr_fecs_debuginfo_r : 0x%x", | ||
107 | gk20a_readl(g, gr_fecs_debuginfo_r())); | ||
108 | |||
109 | for (i = 0; i < gr_fecs_ctxsw_mailbox__size_1_v(); i++) | ||
110 | gk20a_err(dev_from_gk20a(g), "gr_fecs_ctxsw_mailbox_r(%d) : 0x%x", | ||
111 | i, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(i))); | ||
112 | |||
113 | gk20a_err(dev_from_gk20a(g), "gr_fecs_engctl_r : 0x%x", | ||
114 | gk20a_readl(g, gr_fecs_engctl_r())); | ||
115 | gk20a_err(dev_from_gk20a(g), "gr_fecs_curctx_r : 0x%x", | ||
116 | gk20a_readl(g, gr_fecs_curctx_r())); | ||
117 | gk20a_err(dev_from_gk20a(g), "gr_fecs_nxtctx_r : 0x%x", | ||
118 | gk20a_readl(g, gr_fecs_nxtctx_r())); | ||
119 | |||
120 | gk20a_writel(g, gr_fecs_icd_cmd_r(), | ||
121 | gr_fecs_icd_cmd_opc_rreg_f() | | ||
122 | gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_IMB)); | ||
123 | gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_IMB : 0x%x", | ||
124 | gk20a_readl(g, gr_fecs_icd_rdata_r())); | ||
125 | |||
126 | gk20a_writel(g, gr_fecs_icd_cmd_r(), | ||
127 | gr_fecs_icd_cmd_opc_rreg_f() | | ||
128 | gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_DMB)); | ||
129 | gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_DMB : 0x%x", | ||
130 | gk20a_readl(g, gr_fecs_icd_rdata_r())); | ||
131 | |||
132 | gk20a_writel(g, gr_fecs_icd_cmd_r(), | ||
133 | gr_fecs_icd_cmd_opc_rreg_f() | | ||
134 | gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_CSW)); | ||
135 | gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_CSW : 0x%x", | ||
136 | gk20a_readl(g, gr_fecs_icd_rdata_r())); | ||
137 | |||
138 | gk20a_writel(g, gr_fecs_icd_cmd_r(), | ||
139 | gr_fecs_icd_cmd_opc_rreg_f() | | ||
140 | gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_CTX)); | ||
141 | gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_CTX : 0x%x", | ||
142 | gk20a_readl(g, gr_fecs_icd_rdata_r())); | ||
143 | |||
144 | gk20a_writel(g, gr_fecs_icd_cmd_r(), | ||
145 | gr_fecs_icd_cmd_opc_rreg_f() | | ||
146 | gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_EXCI)); | ||
147 | gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_EXCI : 0x%x", | ||
148 | gk20a_readl(g, gr_fecs_icd_rdata_r())); | ||
149 | |||
150 | for (i = 0; i < 4; i++) { | ||
151 | gk20a_writel(g, gr_fecs_icd_cmd_r(), | ||
152 | gr_fecs_icd_cmd_opc_rreg_f() | | ||
153 | gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_PC)); | ||
154 | gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_PC : 0x%x", | ||
155 | gk20a_readl(g, gr_fecs_icd_rdata_r())); | ||
156 | |||
157 | gk20a_writel(g, gr_fecs_icd_cmd_r(), | ||
158 | gr_fecs_icd_cmd_opc_rreg_f() | | ||
159 | gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_SP)); | ||
160 | gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_SP : 0x%x", | ||
161 | gk20a_readl(g, gr_fecs_icd_rdata_r())); | ||
162 | } | ||
163 | } | ||
164 | |||
165 | static void gr_gk20a_load_falcon_dmem(struct gk20a *g) | ||
166 | { | ||
167 | u32 i, ucode_u32_size; | ||
168 | const u32 *ucode_u32_data; | ||
169 | u32 checksum; | ||
170 | |||
171 | gk20a_dbg_fn(""); | ||
172 | |||
173 | gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) | | ||
174 | gr_gpccs_dmemc_blk_f(0) | | ||
175 | gr_gpccs_dmemc_aincw_f(1))); | ||
176 | |||
177 | ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count; | ||
178 | ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l; | ||
179 | |||
180 | for (i = 0, checksum = 0; i < ucode_u32_size; i++) { | ||
181 | gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]); | ||
182 | checksum += ucode_u32_data[i]; | ||
183 | } | ||
184 | |||
185 | gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) | | ||
186 | gr_fecs_dmemc_blk_f(0) | | ||
187 | gr_fecs_dmemc_aincw_f(1))); | ||
188 | |||
189 | ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count; | ||
190 | ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l; | ||
191 | |||
192 | for (i = 0, checksum = 0; i < ucode_u32_size; i++) { | ||
193 | gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]); | ||
194 | checksum += ucode_u32_data[i]; | ||
195 | } | ||
196 | gk20a_dbg_fn("done"); | ||
197 | } | ||
198 | |||
199 | static void gr_gk20a_load_falcon_imem(struct gk20a *g) | ||
200 | { | ||
201 | u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size; | ||
202 | const u32 *ucode_u32_data; | ||
203 | u32 tag, i, pad_start, pad_end; | ||
204 | u32 checksum; | ||
205 | |||
206 | gk20a_dbg_fn(""); | ||
207 | |||
208 | cfg = gk20a_readl(g, gr_fecs_cfg_r()); | ||
209 | fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg); | ||
210 | |||
211 | cfg = gk20a_readl(g, gr_gpc0_cfg_r()); | ||
212 | gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg); | ||
213 | |||
214 | /* Use the broadcast address to access all of the GPCCS units. */ | ||
215 | gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) | | ||
216 | gr_gpccs_imemc_blk_f(0) | | ||
217 | gr_gpccs_imemc_aincw_f(1))); | ||
218 | |||
219 | /* Setup the tags for the instruction memory. */ | ||
220 | tag = 0; | ||
221 | gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag)); | ||
222 | |||
223 | ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count; | ||
224 | ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l; | ||
225 | |||
226 | for (i = 0, checksum = 0; i < ucode_u32_size; i++) { | ||
227 | if (i && ((i % (256/sizeof(u32))) == 0)) { | ||
228 | tag++; | ||
229 | gk20a_writel(g, gr_gpccs_imemt_r(0), | ||
230 | gr_gpccs_imemt_tag_f(tag)); | ||
231 | } | ||
232 | gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]); | ||
233 | checksum += ucode_u32_data[i]; | ||
234 | } | ||
235 | |||
236 | pad_start = i*4; | ||
237 | pad_end = pad_start+(256-pad_start%256)+256; | ||
238 | for (i = pad_start; | ||
239 | (i < gpccs_imem_size * 256) && (i < pad_end); | ||
240 | i += 4) { | ||
241 | if (i && ((i % 256) == 0)) { | ||
242 | tag++; | ||
243 | gk20a_writel(g, gr_gpccs_imemt_r(0), | ||
244 | gr_gpccs_imemt_tag_f(tag)); | ||
245 | } | ||
246 | gk20a_writel(g, gr_gpccs_imemd_r(0), 0); | ||
247 | } | ||
248 | |||
249 | gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) | | ||
250 | gr_fecs_imemc_blk_f(0) | | ||
251 | gr_fecs_imemc_aincw_f(1))); | ||
252 | |||
253 | /* Setup the tags for the instruction memory. */ | ||
254 | tag = 0; | ||
255 | gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag)); | ||
256 | |||
257 | ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count; | ||
258 | ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l; | ||
259 | |||
260 | for (i = 0, checksum = 0; i < ucode_u32_size; i++) { | ||
261 | if (i && ((i % (256/sizeof(u32))) == 0)) { | ||
262 | tag++; | ||
263 | gk20a_writel(g, gr_fecs_imemt_r(0), | ||
264 | gr_fecs_imemt_tag_f(tag)); | ||
265 | } | ||
266 | gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]); | ||
267 | checksum += ucode_u32_data[i]; | ||
268 | } | ||
269 | |||
270 | pad_start = i*4; | ||
271 | pad_end = pad_start+(256-pad_start%256)+256; | ||
272 | for (i = pad_start; (i < fecs_imem_size * 256) && i < pad_end; i += 4) { | ||
273 | if (i && ((i % 256) == 0)) { | ||
274 | tag++; | ||
275 | gk20a_writel(g, gr_fecs_imemt_r(0), | ||
276 | gr_fecs_imemt_tag_f(tag)); | ||
277 | } | ||
278 | gk20a_writel(g, gr_fecs_imemd_r(0), 0); | ||
279 | } | ||
280 | } | ||
281 | |||
282 | static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, | ||
283 | u32 expect_delay) | ||
284 | { | ||
285 | u32 delay = expect_delay; | ||
286 | bool gr_enabled; | ||
287 | bool ctxsw_active; | ||
288 | bool gr_busy; | ||
289 | |||
290 | gk20a_dbg_fn(""); | ||
291 | |||
292 | do { | ||
293 | /* fmodel: host gets fifo_engine_status(gr) from gr | ||
294 | only when gr_status is read */ | ||
295 | gk20a_readl(g, gr_status_r()); | ||
296 | |||
297 | gr_enabled = gk20a_readl(g, mc_enable_r()) & | ||
298 | mc_enable_pgraph_enabled_f(); | ||
299 | |||
300 | ctxsw_active = gk20a_readl(g, | ||
301 | fifo_engine_status_r(ENGINE_GR_GK20A)) & | ||
302 | fifo_engine_status_ctxsw_in_progress_f(); | ||
303 | |||
304 | gr_busy = gk20a_readl(g, gr_engine_status_r()) & | ||
305 | gr_engine_status_value_busy_f(); | ||
306 | |||
307 | if (!gr_enabled || (!gr_busy && !ctxsw_active)) { | ||
308 | gk20a_dbg_fn("done"); | ||
309 | return 0; | ||
310 | } | ||
311 | |||
312 | usleep_range(delay, delay * 2); | ||
313 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
314 | |||
315 | } while (time_before(jiffies, end_jiffies) | ||
316 | || !tegra_platform_is_silicon()); | ||
317 | |||
318 | gk20a_err(dev_from_gk20a(g), | ||
319 | "timeout, ctxsw busy : %d, gr busy : %d", | ||
320 | ctxsw_active, gr_busy); | ||
321 | |||
322 | return -EAGAIN; | ||
323 | } | ||
324 | |||
325 | static int gr_gk20a_ctx_reset(struct gk20a *g, u32 rst_mask) | ||
326 | { | ||
327 | u32 delay = GR_IDLE_CHECK_DEFAULT; | ||
328 | unsigned long end_jiffies = jiffies + | ||
329 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
330 | u32 reg; | ||
331 | |||
332 | gk20a_dbg_fn(""); | ||
333 | |||
334 | if (!tegra_platform_is_linsim()) { | ||
335 | /* Force clocks on */ | ||
336 | gk20a_writel(g, gr_fe_pwr_mode_r(), | ||
337 | gr_fe_pwr_mode_req_send_f() | | ||
338 | gr_fe_pwr_mode_mode_force_on_f()); | ||
339 | |||
340 | /* Wait for the clocks to indicate that they are on */ | ||
341 | do { | ||
342 | reg = gk20a_readl(g, gr_fe_pwr_mode_r()); | ||
343 | |||
344 | if (gr_fe_pwr_mode_req_v(reg) == | ||
345 | gr_fe_pwr_mode_req_done_v()) | ||
346 | break; | ||
347 | |||
348 | usleep_range(delay, delay * 2); | ||
349 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
350 | |||
351 | } while (time_before(jiffies, end_jiffies)); | ||
352 | |||
353 | if (!time_before(jiffies, end_jiffies)) { | ||
354 | gk20a_err(dev_from_gk20a(g), | ||
355 | "failed to force the clocks on\n"); | ||
356 | WARN_ON(1); | ||
357 | } | ||
358 | } | ||
359 | if (rst_mask) { | ||
360 | gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(), rst_mask); | ||
361 | } else { | ||
362 | gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(), | ||
363 | gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() | | ||
364 | gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() | | ||
365 | gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() | | ||
366 | gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() | | ||
367 | gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() | | ||
368 | gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() | | ||
369 | gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f() | | ||
370 | gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f() | | ||
371 | gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f()); | ||
372 | } | ||
373 | |||
374 | /* we need to read the reset register *and* wait for a moment to ensure | ||
375 | * reset propagation */ | ||
376 | |||
377 | gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r()); | ||
378 | udelay(20); | ||
379 | |||
380 | gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(), | ||
381 | gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() | | ||
382 | gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() | | ||
383 | gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() | | ||
384 | gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() | | ||
385 | gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() | | ||
386 | gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() | | ||
387 | gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f() | | ||
388 | gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f() | | ||
389 | gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f()); | ||
390 | |||
391 | /* we need to readl the reset and then wait a small moment after that */ | ||
392 | gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r()); | ||
393 | udelay(20); | ||
394 | |||
395 | if (!tegra_platform_is_linsim()) { | ||
396 | /* Set power mode back to auto */ | ||
397 | gk20a_writel(g, gr_fe_pwr_mode_r(), | ||
398 | gr_fe_pwr_mode_req_send_f() | | ||
399 | gr_fe_pwr_mode_mode_auto_f()); | ||
400 | |||
401 | /* Wait for the request to complete */ | ||
402 | end_jiffies = jiffies + | ||
403 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
404 | do { | ||
405 | reg = gk20a_readl(g, gr_fe_pwr_mode_r()); | ||
406 | |||
407 | if (gr_fe_pwr_mode_req_v(reg) == | ||
408 | gr_fe_pwr_mode_req_done_v()) | ||
409 | break; | ||
410 | |||
411 | usleep_range(delay, delay * 2); | ||
412 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
413 | |||
414 | } while (time_before(jiffies, end_jiffies)); | ||
415 | |||
416 | if (!time_before(jiffies, end_jiffies)) | ||
417 | gk20a_warn(dev_from_gk20a(g), | ||
418 | "failed to set power mode to auto\n"); | ||
419 | } | ||
420 | |||
421 | return 0; | ||
422 | } | ||
423 | |||
424 | static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, | ||
425 | u32 *mailbox_ret, u32 opc_success, | ||
426 | u32 mailbox_ok, u32 opc_fail, | ||
427 | u32 mailbox_fail) | ||
428 | { | ||
429 | unsigned long end_jiffies = jiffies + | ||
430 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
431 | u32 delay = GR_IDLE_CHECK_DEFAULT; | ||
432 | u32 check = WAIT_UCODE_LOOP; | ||
433 | u32 reg; | ||
434 | |||
435 | gk20a_dbg_fn(""); | ||
436 | |||
437 | while (check == WAIT_UCODE_LOOP) { | ||
438 | if (!time_before(jiffies, end_jiffies) && | ||
439 | tegra_platform_is_silicon()) | ||
440 | check = WAIT_UCODE_TIMEOUT; | ||
441 | |||
442 | reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(mailbox_id)); | ||
443 | |||
444 | if (mailbox_ret) | ||
445 | *mailbox_ret = reg; | ||
446 | |||
447 | switch (opc_success) { | ||
448 | case GR_IS_UCODE_OP_EQUAL: | ||
449 | if (reg == mailbox_ok) | ||
450 | check = WAIT_UCODE_OK; | ||
451 | break; | ||
452 | case GR_IS_UCODE_OP_NOT_EQUAL: | ||
453 | if (reg != mailbox_ok) | ||
454 | check = WAIT_UCODE_OK; | ||
455 | break; | ||
456 | case GR_IS_UCODE_OP_AND: | ||
457 | if (reg & mailbox_ok) | ||
458 | check = WAIT_UCODE_OK; | ||
459 | break; | ||
460 | case GR_IS_UCODE_OP_LESSER: | ||
461 | if (reg < mailbox_ok) | ||
462 | check = WAIT_UCODE_OK; | ||
463 | break; | ||
464 | case GR_IS_UCODE_OP_LESSER_EQUAL: | ||
465 | if (reg <= mailbox_ok) | ||
466 | check = WAIT_UCODE_OK; | ||
467 | break; | ||
468 | case GR_IS_UCODE_OP_SKIP: | ||
469 | /* do no success check */ | ||
470 | break; | ||
471 | default: | ||
472 | gk20a_err(dev_from_gk20a(g), | ||
473 | "invalid success opcode 0x%x", opc_success); | ||
474 | |||
475 | check = WAIT_UCODE_ERROR; | ||
476 | break; | ||
477 | } | ||
478 | |||
479 | switch (opc_fail) { | ||
480 | case GR_IS_UCODE_OP_EQUAL: | ||
481 | if (reg == mailbox_fail) | ||
482 | check = WAIT_UCODE_ERROR; | ||
483 | break; | ||
484 | case GR_IS_UCODE_OP_NOT_EQUAL: | ||
485 | if (reg != mailbox_fail) | ||
486 | check = WAIT_UCODE_ERROR; | ||
487 | break; | ||
488 | case GR_IS_UCODE_OP_AND: | ||
489 | if (reg & mailbox_fail) | ||
490 | check = WAIT_UCODE_ERROR; | ||
491 | break; | ||
492 | case GR_IS_UCODE_OP_LESSER: | ||
493 | if (reg < mailbox_fail) | ||
494 | check = WAIT_UCODE_ERROR; | ||
495 | break; | ||
496 | case GR_IS_UCODE_OP_LESSER_EQUAL: | ||
497 | if (reg <= mailbox_fail) | ||
498 | check = WAIT_UCODE_ERROR; | ||
499 | break; | ||
500 | case GR_IS_UCODE_OP_SKIP: | ||
501 | /* do no check on fail*/ | ||
502 | break; | ||
503 | default: | ||
504 | gk20a_err(dev_from_gk20a(g), | ||
505 | "invalid fail opcode 0x%x", opc_fail); | ||
506 | check = WAIT_UCODE_ERROR; | ||
507 | break; | ||
508 | } | ||
509 | |||
510 | usleep_range(delay, delay * 2); | ||
511 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
512 | } | ||
513 | |||
514 | if (check == WAIT_UCODE_TIMEOUT) { | ||
515 | gk20a_err(dev_from_gk20a(g), | ||
516 | "timeout waiting on ucode response"); | ||
517 | gk20a_fecs_dump_falcon_stats(g); | ||
518 | return -1; | ||
519 | } else if (check == WAIT_UCODE_ERROR) { | ||
520 | gk20a_err(dev_from_gk20a(g), | ||
521 | "ucode method failed on mailbox=%d value=0x%08x", | ||
522 | mailbox_id, reg); | ||
523 | gk20a_fecs_dump_falcon_stats(g); | ||
524 | return -1; | ||
525 | } | ||
526 | |||
527 | gk20a_dbg_fn("done"); | ||
528 | return 0; | ||
529 | } | ||
530 | |||
531 | /* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...) | ||
532 | * We should replace most, if not all, fecs method calls to this instead. */ | ||
533 | struct fecs_method_op_gk20a { | ||
534 | struct { | ||
535 | u32 addr; | ||
536 | u32 data; | ||
537 | } method; | ||
538 | |||
539 | struct { | ||
540 | u32 id; | ||
541 | u32 data; | ||
542 | u32 clr; | ||
543 | u32 *ret; | ||
544 | u32 ok; | ||
545 | u32 fail; | ||
546 | } mailbox; | ||
547 | |||
548 | struct { | ||
549 | u32 ok; | ||
550 | u32 fail; | ||
551 | } cond; | ||
552 | |||
553 | }; | ||
554 | |||
555 | int gr_gk20a_submit_fecs_method_op(struct gk20a *g, | ||
556 | struct fecs_method_op_gk20a op) | ||
557 | { | ||
558 | struct gr_gk20a *gr = &g->gr; | ||
559 | int ret; | ||
560 | |||
561 | mutex_lock(&gr->fecs_mutex); | ||
562 | |||
563 | if (op.mailbox.id != 0) | ||
564 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id), | ||
565 | op.mailbox.data); | ||
566 | |||
567 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), | ||
568 | gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr)); | ||
569 | |||
570 | gk20a_writel(g, gr_fecs_method_data_r(), op.method.data); | ||
571 | gk20a_writel(g, gr_fecs_method_push_r(), | ||
572 | gr_fecs_method_push_adr_f(op.method.addr)); | ||
573 | |||
574 | /* op.mb.id == 4 cases require waiting for completion on | ||
575 | * for op.mb.id == 0 */ | ||
576 | if (op.mailbox.id == 4) | ||
577 | op.mailbox.id = 0; | ||
578 | |||
579 | ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret, | ||
580 | op.cond.ok, op.mailbox.ok, | ||
581 | op.cond.fail, op.mailbox.fail); | ||
582 | |||
583 | mutex_unlock(&gr->fecs_mutex); | ||
584 | |||
585 | return ret; | ||
586 | } | ||
587 | |||
588 | int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret) | ||
589 | { | ||
590 | return gr_gk20a_submit_fecs_method_op(g, | ||
591 | (struct fecs_method_op_gk20a) { | ||
592 | .method.addr = fecs_method, | ||
593 | .method.data = ~0, | ||
594 | .mailbox = { .id = 1, /*sideband?*/ | ||
595 | .data = ~0, .clr = ~0, .ret = ret, | ||
596 | .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), | ||
597 | .fail = gr_fecs_ctxsw_mailbox_value_fail_v(), }, | ||
598 | .cond.ok = GR_IS_UCODE_OP_EQUAL, | ||
599 | .cond.fail = GR_IS_UCODE_OP_EQUAL }); | ||
600 | } | ||
601 | |||
602 | /* Stop processing (stall) context switches at FECS. | ||
603 | * The caller must hold the dbg_sessions_lock, else if mutliple stop methods | ||
604 | * are sent to the ucode in sequence, it can get into an undefined state. */ | ||
605 | int gr_gk20a_disable_ctxsw(struct gk20a *g) | ||
606 | { | ||
607 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
608 | return gr_gk20a_ctrl_ctxsw(g, gr_fecs_method_push_adr_stop_ctxsw_v(), 0); | ||
609 | } | ||
610 | |||
611 | /* Start processing (continue) context switches at FECS */ | ||
612 | int gr_gk20a_enable_ctxsw(struct gk20a *g) | ||
613 | { | ||
614 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
615 | return gr_gk20a_ctrl_ctxsw(g, gr_fecs_method_push_adr_start_ctxsw_v(), 0); | ||
616 | } | ||
617 | |||
618 | |||
619 | static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) | ||
620 | { | ||
621 | u32 addr_lo; | ||
622 | u32 addr_hi; | ||
623 | void *inst_ptr = NULL; | ||
624 | |||
625 | gk20a_dbg_fn(""); | ||
626 | |||
627 | /* flush gpu_va before commit */ | ||
628 | gk20a_mm_fb_flush(c->g); | ||
629 | gk20a_mm_l2_flush(c->g, true); | ||
630 | |||
631 | inst_ptr = c->inst_block.cpuva; | ||
632 | if (!inst_ptr) | ||
633 | return -ENOMEM; | ||
634 | |||
635 | addr_lo = u64_lo32(gpu_va) >> 12; | ||
636 | addr_hi = u64_hi32(gpu_va); | ||
637 | |||
638 | gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(), | ||
639 | ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() | | ||
640 | ram_in_gr_wfi_ptr_lo_f(addr_lo)); | ||
641 | |||
642 | gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(), | ||
643 | ram_in_gr_wfi_ptr_hi_f(addr_hi)); | ||
644 | |||
645 | gk20a_mm_l2_invalidate(c->g); | ||
646 | |||
647 | return 0; | ||
648 | } | ||
649 | |||
650 | /* | ||
651 | * Context state can be written directly or "patched" at times. | ||
652 | * So that code can be used in either situation it is written | ||
653 | * using a series _ctx_patch_write(..., patch) statements. | ||
654 | * However any necessary cpu map/unmap and gpu l2 invalidates | ||
655 | * should be minimized (to avoid doing it once per patch write). | ||
656 | * Before a sequence of these set up with "_ctx_patch_write_begin" | ||
657 | * and close with "_ctx_patch_write_end." | ||
658 | */ | ||
659 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, | ||
660 | struct channel_ctx_gk20a *ch_ctx) | ||
661 | { | ||
662 | /* being defensive still... */ | ||
663 | if (ch_ctx->patch_ctx.cpu_va) { | ||
664 | gk20a_err(dev_from_gk20a(g), "nested ctx patch begin?"); | ||
665 | return -EBUSY; | ||
666 | } | ||
667 | |||
668 | ch_ctx->patch_ctx.cpu_va = vmap(ch_ctx->patch_ctx.pages, | ||
669 | PAGE_ALIGN(ch_ctx->patch_ctx.size) >> PAGE_SHIFT, | ||
670 | 0, pgprot_dmacoherent(PAGE_KERNEL)); | ||
671 | |||
672 | if (!ch_ctx->patch_ctx.cpu_va) | ||
673 | return -ENOMEM; | ||
674 | |||
675 | return 0; | ||
676 | } | ||
677 | |||
678 | int gr_gk20a_ctx_patch_write_end(struct gk20a *g, | ||
679 | struct channel_ctx_gk20a *ch_ctx) | ||
680 | { | ||
681 | /* being defensive still... */ | ||
682 | if (!ch_ctx->patch_ctx.cpu_va) { | ||
683 | gk20a_err(dev_from_gk20a(g), "dangling ctx patch end?"); | ||
684 | return -EINVAL; | ||
685 | } | ||
686 | |||
687 | vunmap(ch_ctx->patch_ctx.cpu_va); | ||
688 | ch_ctx->patch_ctx.cpu_va = NULL; | ||
689 | |||
690 | gk20a_mm_l2_invalidate(g); | ||
691 | return 0; | ||
692 | } | ||
693 | |||
694 | int gr_gk20a_ctx_patch_write(struct gk20a *g, | ||
695 | struct channel_ctx_gk20a *ch_ctx, | ||
696 | u32 addr, u32 data, bool patch) | ||
697 | { | ||
698 | u32 patch_slot = 0; | ||
699 | void *patch_ptr = NULL; | ||
700 | bool mapped_here = false; | ||
701 | |||
702 | BUG_ON(patch != 0 && ch_ctx == NULL); | ||
703 | |||
704 | if (patch) { | ||
705 | if (!ch_ctx) | ||
706 | return -EINVAL; | ||
707 | /* we added an optimization prolog, epilog | ||
708 | * to get rid of unnecessary maps and l2 invals. | ||
709 | * but be defensive still... */ | ||
710 | if (!ch_ctx->patch_ctx.cpu_va) { | ||
711 | int err; | ||
712 | gk20a_err(dev_from_gk20a(g), | ||
713 | "per-write ctx patch begin?"); | ||
714 | /* yes, gr_gk20a_ctx_patch_smpc causes this one */ | ||
715 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | ||
716 | if (err) | ||
717 | return err; | ||
718 | mapped_here = true; | ||
719 | } else | ||
720 | mapped_here = false; | ||
721 | |||
722 | patch_ptr = ch_ctx->patch_ctx.cpu_va; | ||
723 | patch_slot = ch_ctx->patch_ctx.data_count * 2; | ||
724 | |||
725 | gk20a_mem_wr32(patch_ptr, patch_slot++, addr); | ||
726 | gk20a_mem_wr32(patch_ptr, patch_slot++, data); | ||
727 | |||
728 | ch_ctx->patch_ctx.data_count++; | ||
729 | |||
730 | if (mapped_here) | ||
731 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); | ||
732 | |||
733 | } else | ||
734 | gk20a_writel(g, addr, data); | ||
735 | |||
736 | return 0; | ||
737 | } | ||
738 | |||
739 | static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, | ||
740 | struct channel_gk20a *c) | ||
741 | { | ||
742 | u32 inst_base_ptr = u64_lo32(c->inst_block.cpu_pa | ||
743 | >> ram_in_base_shift_v()); | ||
744 | u32 ret; | ||
745 | |||
746 | gk20a_dbg_info("bind channel %d inst ptr 0x%08x", | ||
747 | c->hw_chid, inst_base_ptr); | ||
748 | |||
749 | ret = gr_gk20a_submit_fecs_method_op(g, | ||
750 | (struct fecs_method_op_gk20a) { | ||
751 | .method.addr = gr_fecs_method_push_adr_bind_pointer_v(), | ||
752 | .method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) | | ||
753 | gr_fecs_current_ctx_target_vid_mem_f() | | ||
754 | gr_fecs_current_ctx_valid_f(1)), | ||
755 | .mailbox = { .id = 0, .data = 0, | ||
756 | .clr = 0x30, | ||
757 | .ret = NULL, | ||
758 | .ok = 0x10, | ||
759 | .fail = 0x20, }, | ||
760 | .cond.ok = GR_IS_UCODE_OP_AND, | ||
761 | .cond.fail = GR_IS_UCODE_OP_AND}); | ||
762 | if (ret) | ||
763 | gk20a_err(dev_from_gk20a(g), | ||
764 | "bind channel instance failed"); | ||
765 | |||
766 | return ret; | ||
767 | } | ||
768 | |||
769 | static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c, | ||
770 | bool disable_fifo) | ||
771 | { | ||
772 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
773 | struct fifo_gk20a *f = &g->fifo; | ||
774 | struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A; | ||
775 | u32 va_lo, va_hi, va; | ||
776 | int ret = 0; | ||
777 | void *ctx_ptr = NULL; | ||
778 | |||
779 | gk20a_dbg_fn(""); | ||
780 | |||
781 | ctx_ptr = vmap(ch_ctx->gr_ctx.pages, | ||
782 | PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, | ||
783 | 0, pgprot_dmacoherent(PAGE_KERNEL)); | ||
784 | if (!ctx_ptr) | ||
785 | return -ENOMEM; | ||
786 | |||
787 | if (ch_ctx->zcull_ctx.gpu_va == 0 && | ||
788 | ch_ctx->zcull_ctx.ctx_sw_mode == | ||
789 | ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) { | ||
790 | ret = -EINVAL; | ||
791 | goto clean_up; | ||
792 | } | ||
793 | |||
794 | va_lo = u64_lo32(ch_ctx->zcull_ctx.gpu_va); | ||
795 | va_hi = u64_hi32(ch_ctx->zcull_ctx.gpu_va); | ||
796 | va = ((va_lo >> 8) & 0x00FFFFFF) | ((va_hi << 24) & 0xFF000000); | ||
797 | |||
798 | if (disable_fifo) { | ||
799 | ret = gk20a_fifo_disable_engine_activity(g, gr_info, true); | ||
800 | if (ret) { | ||
801 | gk20a_err(dev_from_gk20a(g), | ||
802 | "failed to disable gr engine activity\n"); | ||
803 | goto clean_up; | ||
804 | } | ||
805 | } | ||
806 | |||
807 | /* Channel gr_ctx buffer is gpu cacheable. | ||
808 | Flush and invalidate before cpu update. */ | ||
809 | gk20a_mm_fb_flush(g); | ||
810 | gk20a_mm_l2_flush(g, true); | ||
811 | |||
812 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, | ||
813 | ch_ctx->zcull_ctx.ctx_sw_mode); | ||
814 | |||
815 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, va); | ||
816 | |||
817 | if (disable_fifo) { | ||
818 | ret = gk20a_fifo_enable_engine_activity(g, gr_info); | ||
819 | if (ret) { | ||
820 | gk20a_err(dev_from_gk20a(g), | ||
821 | "failed to enable gr engine activity\n"); | ||
822 | goto clean_up; | ||
823 | } | ||
824 | } | ||
825 | gk20a_mm_l2_invalidate(g); | ||
826 | |||
827 | clean_up: | ||
828 | vunmap(ctx_ptr); | ||
829 | |||
830 | return ret; | ||
831 | } | ||
832 | |||
833 | static int gr_gk20a_commit_global_cb_manager(struct gk20a *g, | ||
834 | struct channel_gk20a *c, bool patch) | ||
835 | { | ||
836 | struct gr_gk20a *gr = &g->gr; | ||
837 | struct channel_ctx_gk20a *ch_ctx = NULL; | ||
838 | u32 attrib_offset_in_chunk = 0; | ||
839 | u32 alpha_offset_in_chunk = 0; | ||
840 | u32 pd_ab_max_output; | ||
841 | u32 gpc_index, ppc_index; | ||
842 | u32 temp; | ||
843 | u32 cbm_cfg_size1, cbm_cfg_size2; | ||
844 | |||
845 | gk20a_dbg_fn(""); | ||
846 | |||
847 | if (patch) { | ||
848 | int err; | ||
849 | ch_ctx = &c->ch_ctx; | ||
850 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | ||
851 | if (err) | ||
852 | return err; | ||
853 | } | ||
854 | |||
855 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(), | ||
856 | gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) | | ||
857 | gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size), | ||
858 | patch); | ||
859 | |||
860 | pd_ab_max_output = (gr->alpha_cb_default_size * | ||
861 | gr_gpc0_ppc0_cbm_cfg_size_granularity_v()) / | ||
862 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | ||
863 | |||
864 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), | ||
865 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | | ||
866 | gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); | ||
867 | |||
868 | alpha_offset_in_chunk = attrib_offset_in_chunk + | ||
869 | gr->tpc_count * gr->attrib_cb_size; | ||
870 | |||
871 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
872 | temp = proj_gpc_stride_v() * gpc_index; | ||
873 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
874 | ppc_index++) { | ||
875 | cbm_cfg_size1 = gr->attrib_cb_default_size * | ||
876 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
877 | cbm_cfg_size2 = gr->alpha_cb_default_size * | ||
878 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
879 | |||
880 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
881 | gr_gpc0_ppc0_cbm_cfg_r() + temp + | ||
882 | proj_ppc_in_gpc_stride_v() * ppc_index, | ||
883 | gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) | | ||
884 | gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) | | ||
885 | gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch); | ||
886 | |||
887 | attrib_offset_in_chunk += gr->attrib_cb_size * | ||
888 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
889 | |||
890 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
891 | gr_gpc0_ppc0_cbm_cfg2_r() + temp + | ||
892 | proj_ppc_in_gpc_stride_v() * ppc_index, | ||
893 | gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) | | ||
894 | gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch); | ||
895 | |||
896 | alpha_offset_in_chunk += gr->alpha_cb_size * | ||
897 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
898 | } | ||
899 | } | ||
900 | |||
901 | if (patch) | ||
902 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); | ||
903 | |||
904 | return 0; | ||
905 | } | ||
906 | |||
907 | static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | ||
908 | struct channel_gk20a *c, bool patch) | ||
909 | { | ||
910 | struct gr_gk20a *gr = &g->gr; | ||
911 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
912 | u64 addr; | ||
913 | u32 size; | ||
914 | |||
915 | gk20a_dbg_fn(""); | ||
916 | if (patch) { | ||
917 | int err; | ||
918 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | ||
919 | if (err) | ||
920 | return err; | ||
921 | } | ||
922 | |||
923 | /* global pagepool buffer */ | ||
924 | addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >> | ||
925 | gr_scc_pagepool_base_addr_39_8_align_bits_v()) | | ||
926 | (u64_hi32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) << | ||
927 | (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v())); | ||
928 | |||
929 | size = gr->global_ctx_buffer[PAGEPOOL].size / | ||
930 | gr_scc_pagepool_total_pages_byte_granularity_v(); | ||
931 | |||
932 | if (size == gr_scc_pagepool_total_pages_hwmax_value_v()) | ||
933 | size = gr_scc_pagepool_total_pages_hwmax_v(); | ||
934 | |||
935 | gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d", | ||
936 | addr, size); | ||
937 | |||
938 | g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, patch); | ||
939 | |||
940 | /* global bundle cb */ | ||
941 | addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >> | ||
942 | gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) | | ||
943 | (u64_hi32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) << | ||
944 | (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v())); | ||
945 | |||
946 | size = gr->bundle_cb_default_size; | ||
947 | |||
948 | gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d", | ||
949 | addr, size); | ||
950 | |||
951 | g->ops.gr.commit_global_bundle_cb(g, ch_ctx, addr, size, patch); | ||
952 | |||
953 | /* global attrib cb */ | ||
954 | addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >> | ||
955 | gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) | | ||
956 | (u64_hi32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) << | ||
957 | (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); | ||
958 | |||
959 | gk20a_dbg_info("attrib cb addr : 0x%016llx", addr); | ||
960 | g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, patch); | ||
961 | |||
962 | if (patch) | ||
963 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); | ||
964 | |||
965 | return 0; | ||
966 | } | ||
967 | |||
968 | static void gr_gk20a_commit_global_attrib_cb(struct gk20a *g, | ||
969 | struct channel_ctx_gk20a *ch_ctx, | ||
970 | u64 addr, bool patch) | ||
971 | { | ||
972 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(), | ||
973 | gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) | | ||
974 | gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch); | ||
975 | |||
976 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(), | ||
977 | gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) | | ||
978 | gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch); | ||
979 | } | ||
980 | |||
981 | static void gr_gk20a_commit_global_bundle_cb(struct gk20a *g, | ||
982 | struct channel_ctx_gk20a *ch_ctx, | ||
983 | u64 addr, u64 size, bool patch) | ||
984 | { | ||
985 | u32 data; | ||
986 | |||
987 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), | ||
988 | gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); | ||
989 | |||
990 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), | ||
991 | gr_scc_bundle_cb_size_div_256b_f(size) | | ||
992 | gr_scc_bundle_cb_size_valid_true_f(), patch); | ||
993 | |||
994 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_bundle_cb_base_r(), | ||
995 | gr_gpcs_setup_bundle_cb_base_addr_39_8_f(addr), patch); | ||
996 | |||
997 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_bundle_cb_size_r(), | ||
998 | gr_gpcs_setup_bundle_cb_size_div_256b_f(size) | | ||
999 | gr_gpcs_setup_bundle_cb_size_valid_true_f(), patch); | ||
1000 | |||
1001 | /* data for state_limit */ | ||
1002 | data = (g->gr.bundle_cb_default_size * | ||
1003 | gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / | ||
1004 | gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); | ||
1005 | |||
1006 | data = min_t(u32, data, g->gr.min_gpm_fifo_depth); | ||
1007 | |||
1008 | gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", | ||
1009 | g->gr.bundle_cb_token_limit, data); | ||
1010 | |||
1011 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), | ||
1012 | gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | | ||
1013 | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); | ||
1014 | |||
1015 | } | ||
1016 | |||
1017 | static int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, bool patch) | ||
1018 | { | ||
1019 | struct gr_gk20a *gr = &g->gr; | ||
1020 | struct channel_ctx_gk20a *ch_ctx = NULL; | ||
1021 | u32 gpm_pd_cfg; | ||
1022 | u32 pd_ab_dist_cfg0; | ||
1023 | u32 ds_debug; | ||
1024 | u32 mpc_vtg_debug; | ||
1025 | u32 pe_vaf; | ||
1026 | u32 pe_vsc_vpc; | ||
1027 | |||
1028 | gk20a_dbg_fn(""); | ||
1029 | |||
1030 | gpm_pd_cfg = gk20a_readl(g, gr_gpcs_gpm_pd_cfg_r()); | ||
1031 | pd_ab_dist_cfg0 = gk20a_readl(g, gr_pd_ab_dist_cfg0_r()); | ||
1032 | ds_debug = gk20a_readl(g, gr_ds_debug_r()); | ||
1033 | mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r()); | ||
1034 | |||
1035 | if (patch) { | ||
1036 | int err; | ||
1037 | ch_ctx = &c->ch_ctx; | ||
1038 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | ||
1039 | if (err) | ||
1040 | return err; | ||
1041 | } | ||
1042 | |||
1043 | if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) { | ||
1044 | pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r()); | ||
1045 | pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r()); | ||
1046 | |||
1047 | gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f() | gpm_pd_cfg; | ||
1048 | pe_vaf = gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f() | pe_vaf; | ||
1049 | pe_vsc_vpc = gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f() | pe_vsc_vpc; | ||
1050 | pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_en_f() | pd_ab_dist_cfg0; | ||
1051 | ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug; | ||
1052 | mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug; | ||
1053 | |||
1054 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch); | ||
1055 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, patch); | ||
1056 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, patch); | ||
1057 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch); | ||
1058 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch); | ||
1059 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch); | ||
1060 | } else { | ||
1061 | gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg; | ||
1062 | pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0; | ||
1063 | ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug; | ||
1064 | mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug; | ||
1065 | |||
1066 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch); | ||
1067 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch); | ||
1068 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch); | ||
1069 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch); | ||
1070 | } | ||
1071 | |||
1072 | if (patch) | ||
1073 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); | ||
1074 | |||
1075 | return 0; | ||
1076 | } | ||
1077 | |||
1078 | int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) | ||
1079 | { | ||
1080 | u32 norm_entries, norm_shift; | ||
1081 | u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod; | ||
1082 | u32 map0, map1, map2, map3, map4, map5; | ||
1083 | |||
1084 | if (!gr->map_tiles) | ||
1085 | return -1; | ||
1086 | |||
1087 | gk20a_dbg_fn(""); | ||
1088 | |||
1089 | gk20a_writel(g, gr_crstr_map_table_cfg_r(), | ||
1090 | gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) | | ||
1091 | gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count)); | ||
1092 | |||
1093 | map0 = gr_crstr_gpc_map0_tile0_f(gr->map_tiles[0]) | | ||
1094 | gr_crstr_gpc_map0_tile1_f(gr->map_tiles[1]) | | ||
1095 | gr_crstr_gpc_map0_tile2_f(gr->map_tiles[2]) | | ||
1096 | gr_crstr_gpc_map0_tile3_f(gr->map_tiles[3]) | | ||
1097 | gr_crstr_gpc_map0_tile4_f(gr->map_tiles[4]) | | ||
1098 | gr_crstr_gpc_map0_tile5_f(gr->map_tiles[5]); | ||
1099 | |||
1100 | map1 = gr_crstr_gpc_map1_tile6_f(gr->map_tiles[6]) | | ||
1101 | gr_crstr_gpc_map1_tile7_f(gr->map_tiles[7]) | | ||
1102 | gr_crstr_gpc_map1_tile8_f(gr->map_tiles[8]) | | ||
1103 | gr_crstr_gpc_map1_tile9_f(gr->map_tiles[9]) | | ||
1104 | gr_crstr_gpc_map1_tile10_f(gr->map_tiles[10]) | | ||
1105 | gr_crstr_gpc_map1_tile11_f(gr->map_tiles[11]); | ||
1106 | |||
1107 | map2 = gr_crstr_gpc_map2_tile12_f(gr->map_tiles[12]) | | ||
1108 | gr_crstr_gpc_map2_tile13_f(gr->map_tiles[13]) | | ||
1109 | gr_crstr_gpc_map2_tile14_f(gr->map_tiles[14]) | | ||
1110 | gr_crstr_gpc_map2_tile15_f(gr->map_tiles[15]) | | ||
1111 | gr_crstr_gpc_map2_tile16_f(gr->map_tiles[16]) | | ||
1112 | gr_crstr_gpc_map2_tile17_f(gr->map_tiles[17]); | ||
1113 | |||
1114 | map3 = gr_crstr_gpc_map3_tile18_f(gr->map_tiles[18]) | | ||
1115 | gr_crstr_gpc_map3_tile19_f(gr->map_tiles[19]) | | ||
1116 | gr_crstr_gpc_map3_tile20_f(gr->map_tiles[20]) | | ||
1117 | gr_crstr_gpc_map3_tile21_f(gr->map_tiles[21]) | | ||
1118 | gr_crstr_gpc_map3_tile22_f(gr->map_tiles[22]) | | ||
1119 | gr_crstr_gpc_map3_tile23_f(gr->map_tiles[23]); | ||
1120 | |||
1121 | map4 = gr_crstr_gpc_map4_tile24_f(gr->map_tiles[24]) | | ||
1122 | gr_crstr_gpc_map4_tile25_f(gr->map_tiles[25]) | | ||
1123 | gr_crstr_gpc_map4_tile26_f(gr->map_tiles[26]) | | ||
1124 | gr_crstr_gpc_map4_tile27_f(gr->map_tiles[27]) | | ||
1125 | gr_crstr_gpc_map4_tile28_f(gr->map_tiles[28]) | | ||
1126 | gr_crstr_gpc_map4_tile29_f(gr->map_tiles[29]); | ||
1127 | |||
1128 | map5 = gr_crstr_gpc_map5_tile30_f(gr->map_tiles[30]) | | ||
1129 | gr_crstr_gpc_map5_tile31_f(gr->map_tiles[31]) | | ||
1130 | gr_crstr_gpc_map5_tile32_f(0) | | ||
1131 | gr_crstr_gpc_map5_tile33_f(0) | | ||
1132 | gr_crstr_gpc_map5_tile34_f(0) | | ||
1133 | gr_crstr_gpc_map5_tile35_f(0); | ||
1134 | |||
1135 | gk20a_writel(g, gr_crstr_gpc_map0_r(), map0); | ||
1136 | gk20a_writel(g, gr_crstr_gpc_map1_r(), map1); | ||
1137 | gk20a_writel(g, gr_crstr_gpc_map2_r(), map2); | ||
1138 | gk20a_writel(g, gr_crstr_gpc_map3_r(), map3); | ||
1139 | gk20a_writel(g, gr_crstr_gpc_map4_r(), map4); | ||
1140 | gk20a_writel(g, gr_crstr_gpc_map5_r(), map5); | ||
1141 | |||
1142 | switch (gr->tpc_count) { | ||
1143 | case 1: | ||
1144 | norm_shift = 4; | ||
1145 | break; | ||
1146 | case 2: | ||
1147 | case 3: | ||
1148 | norm_shift = 3; | ||
1149 | break; | ||
1150 | case 4: | ||
1151 | case 5: | ||
1152 | case 6: | ||
1153 | case 7: | ||
1154 | norm_shift = 2; | ||
1155 | break; | ||
1156 | case 8: | ||
1157 | case 9: | ||
1158 | case 10: | ||
1159 | case 11: | ||
1160 | case 12: | ||
1161 | case 13: | ||
1162 | case 14: | ||
1163 | case 15: | ||
1164 | norm_shift = 1; | ||
1165 | break; | ||
1166 | default: | ||
1167 | norm_shift = 0; | ||
1168 | break; | ||
1169 | } | ||
1170 | |||
1171 | norm_entries = gr->tpc_count << norm_shift; | ||
1172 | coeff5_mod = (1 << 5) % norm_entries; | ||
1173 | coeff6_mod = (1 << 6) % norm_entries; | ||
1174 | coeff7_mod = (1 << 7) % norm_entries; | ||
1175 | coeff8_mod = (1 << 8) % norm_entries; | ||
1176 | coeff9_mod = (1 << 9) % norm_entries; | ||
1177 | coeff10_mod = (1 << 10) % norm_entries; | ||
1178 | coeff11_mod = (1 << 11) % norm_entries; | ||
1179 | |||
1180 | gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(), | ||
1181 | gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) | | ||
1182 | gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) | | ||
1183 | gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) | | ||
1184 | gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) | | ||
1185 | gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count)); | ||
1186 | |||
1187 | gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(), | ||
1188 | gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) | | ||
1189 | gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(coeff7_mod) | | ||
1190 | gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(coeff8_mod) | | ||
1191 | gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(coeff9_mod) | | ||
1192 | gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(coeff10_mod) | | ||
1193 | gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(coeff11_mod)); | ||
1194 | |||
1195 | gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map0_r(), map0); | ||
1196 | gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map1_r(), map1); | ||
1197 | gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map2_r(), map2); | ||
1198 | gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map3_r(), map3); | ||
1199 | gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map4_r(), map4); | ||
1200 | gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5); | ||
1201 | |||
1202 | gk20a_writel(g, gr_rstr2d_map_table_cfg_r(), | ||
1203 | gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) | | ||
1204 | gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count)); | ||
1205 | |||
1206 | gk20a_writel(g, gr_rstr2d_gpc_map0_r(), map0); | ||
1207 | gk20a_writel(g, gr_rstr2d_gpc_map1_r(), map1); | ||
1208 | gk20a_writel(g, gr_rstr2d_gpc_map2_r(), map2); | ||
1209 | gk20a_writel(g, gr_rstr2d_gpc_map3_r(), map3); | ||
1210 | gk20a_writel(g, gr_rstr2d_gpc_map4_r(), map4); | ||
1211 | gk20a_writel(g, gr_rstr2d_gpc_map5_r(), map5); | ||
1212 | |||
1213 | return 0; | ||
1214 | } | ||
1215 | |||
1216 | static inline u32 count_bits(u32 mask) | ||
1217 | { | ||
1218 | u32 temp = mask; | ||
1219 | u32 count; | ||
1220 | for (count = 0; temp != 0; count++) | ||
1221 | temp &= temp - 1; | ||
1222 | |||
1223 | return count; | ||
1224 | } | ||
1225 | |||
1226 | static inline u32 clear_count_bits(u32 num, u32 clear_count) | ||
1227 | { | ||
1228 | u32 count = clear_count; | ||
1229 | for (; (num != 0) && (count != 0); count--) | ||
1230 | num &= num - 1; | ||
1231 | |||
1232 | return num; | ||
1233 | } | ||
1234 | |||
1235 | static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g, | ||
1236 | struct gr_gk20a *gr) | ||
1237 | { | ||
1238 | u32 table_index_bits = 5; | ||
1239 | u32 rows = (1 << table_index_bits); | ||
1240 | u32 row_stride = gr_pd_alpha_ratio_table__size_1_v() / rows; | ||
1241 | |||
1242 | u32 row; | ||
1243 | u32 index; | ||
1244 | u32 gpc_index; | ||
1245 | u32 gpcs_per_reg = 4; | ||
1246 | u32 pes_index; | ||
1247 | u32 tpc_count_pes; | ||
1248 | u32 num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); | ||
1249 | |||
1250 | u32 alpha_target, beta_target; | ||
1251 | u32 alpha_bits, beta_bits; | ||
1252 | u32 alpha_mask, beta_mask, partial_mask; | ||
1253 | u32 reg_offset; | ||
1254 | bool assign_alpha; | ||
1255 | |||
1256 | u32 map_alpha[gr_pd_alpha_ratio_table__size_1_v()]; | ||
1257 | u32 map_beta[gr_pd_alpha_ratio_table__size_1_v()]; | ||
1258 | u32 map_reg_used[gr_pd_alpha_ratio_table__size_1_v()]; | ||
1259 | |||
1260 | gk20a_dbg_fn(""); | ||
1261 | |||
1262 | memset(map_alpha, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32)); | ||
1263 | memset(map_beta, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32)); | ||
1264 | memset(map_reg_used, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32)); | ||
1265 | |||
1266 | for (row = 0; row < rows; ++row) { | ||
1267 | alpha_target = max_t(u32, gr->tpc_count * row / rows, 1); | ||
1268 | beta_target = gr->tpc_count - alpha_target; | ||
1269 | |||
1270 | assign_alpha = (alpha_target < beta_target); | ||
1271 | |||
1272 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
1273 | reg_offset = (row * row_stride) + (gpc_index / gpcs_per_reg); | ||
1274 | alpha_mask = beta_mask = 0; | ||
1275 | |||
1276 | for (pes_index = 0; pes_index < num_pes_per_gpc; pes_index++) { | ||
1277 | tpc_count_pes = gr->pes_tpc_count[pes_index][gpc_index]; | ||
1278 | |||
1279 | if (assign_alpha) { | ||
1280 | alpha_bits = (alpha_target == 0) ? 0 : tpc_count_pes; | ||
1281 | beta_bits = tpc_count_pes - alpha_bits; | ||
1282 | } else { | ||
1283 | beta_bits = (beta_target == 0) ? 0 : tpc_count_pes; | ||
1284 | alpha_bits = tpc_count_pes - beta_bits; | ||
1285 | } | ||
1286 | |||
1287 | partial_mask = gr->pes_tpc_mask[pes_index][gpc_index]; | ||
1288 | partial_mask = clear_count_bits(partial_mask, tpc_count_pes - alpha_bits); | ||
1289 | alpha_mask |= partial_mask; | ||
1290 | |||
1291 | partial_mask = gr->pes_tpc_mask[pes_index][gpc_index] ^ partial_mask; | ||
1292 | beta_mask |= partial_mask; | ||
1293 | |||
1294 | alpha_target -= min(alpha_bits, alpha_target); | ||
1295 | beta_target -= min(beta_bits, beta_target); | ||
1296 | |||
1297 | if ((alpha_bits > 0) || (beta_bits > 0)) | ||
1298 | assign_alpha = !assign_alpha; | ||
1299 | } | ||
1300 | |||
1301 | switch (gpc_index % gpcs_per_reg) { | ||
1302 | case 0: | ||
1303 | map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n0_mask_f(alpha_mask); | ||
1304 | map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n0_mask_f(beta_mask); | ||
1305 | break; | ||
1306 | case 1: | ||
1307 | map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n1_mask_f(alpha_mask); | ||
1308 | map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n1_mask_f(beta_mask); | ||
1309 | break; | ||
1310 | case 2: | ||
1311 | map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n2_mask_f(alpha_mask); | ||
1312 | map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n2_mask_f(beta_mask); | ||
1313 | break; | ||
1314 | case 3: | ||
1315 | map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n3_mask_f(alpha_mask); | ||
1316 | map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n3_mask_f(beta_mask); | ||
1317 | break; | ||
1318 | } | ||
1319 | map_reg_used[reg_offset] = true; | ||
1320 | } | ||
1321 | } | ||
1322 | |||
1323 | for (index = 0; index < gr_pd_alpha_ratio_table__size_1_v(); index++) { | ||
1324 | if (map_reg_used[index]) { | ||
1325 | gk20a_writel(g, gr_pd_alpha_ratio_table_r(index), map_alpha[index]); | ||
1326 | gk20a_writel(g, gr_pd_beta_ratio_table_r(index), map_beta[index]); | ||
1327 | } | ||
1328 | } | ||
1329 | |||
1330 | return 0; | ||
1331 | } | ||
1332 | |||
1333 | static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g) | ||
1334 | { | ||
1335 | struct gr_gk20a *gr = &g->gr; | ||
1336 | u32 tpc_index, gpc_index; | ||
1337 | u32 tpc_offset, gpc_offset; | ||
1338 | u32 sm_id = 0, gpc_id = 0; | ||
1339 | u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()]; | ||
1340 | u32 tpc_per_gpc; | ||
1341 | u32 max_ways_evict = INVALID_MAX_WAYS; | ||
1342 | u32 l1c_dbg_reg_val; | ||
1343 | |||
1344 | gk20a_dbg_fn(""); | ||
1345 | |||
1346 | for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) { | ||
1347 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
1348 | gpc_offset = proj_gpc_stride_v() * gpc_index; | ||
1349 | if (tpc_index < gr->gpc_tpc_count[gpc_index]) { | ||
1350 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; | ||
1351 | |||
1352 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | ||
1353 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); | ||
1354 | gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1355 | gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id)); | ||
1356 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset, | ||
1357 | gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); | ||
1358 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1359 | gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); | ||
1360 | |||
1361 | sm_id_to_gpc_id[sm_id] = gpc_index; | ||
1362 | sm_id++; | ||
1363 | } | ||
1364 | |||
1365 | gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset, | ||
1366 | gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); | ||
1367 | gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset, | ||
1368 | gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); | ||
1369 | } | ||
1370 | } | ||
1371 | |||
1372 | for (tpc_index = 0, gpc_id = 0; | ||
1373 | tpc_index < gr_pd_num_tpc_per_gpc__size_1_v(); | ||
1374 | tpc_index++, gpc_id += 8) { | ||
1375 | |||
1376 | if (gpc_id >= gr->gpc_count) | ||
1377 | gpc_id = 0; | ||
1378 | |||
1379 | tpc_per_gpc = | ||
1380 | gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) | | ||
1381 | gr_pd_num_tpc_per_gpc_count1_f(gr->gpc_tpc_count[gpc_id + 1]) | | ||
1382 | gr_pd_num_tpc_per_gpc_count2_f(gr->gpc_tpc_count[gpc_id + 2]) | | ||
1383 | gr_pd_num_tpc_per_gpc_count3_f(gr->gpc_tpc_count[gpc_id + 3]) | | ||
1384 | gr_pd_num_tpc_per_gpc_count4_f(gr->gpc_tpc_count[gpc_id + 4]) | | ||
1385 | gr_pd_num_tpc_per_gpc_count5_f(gr->gpc_tpc_count[gpc_id + 5]) | | ||
1386 | gr_pd_num_tpc_per_gpc_count6_f(gr->gpc_tpc_count[gpc_id + 6]) | | ||
1387 | gr_pd_num_tpc_per_gpc_count7_f(gr->gpc_tpc_count[gpc_id + 7]); | ||
1388 | |||
1389 | gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc); | ||
1390 | gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc); | ||
1391 | } | ||
1392 | |||
1393 | /* gr__setup_pd_mapping stubbed for gk20a */ | ||
1394 | gr_gk20a_setup_rop_mapping(g, gr); | ||
1395 | if (g->ops.gr.setup_alpha_beta_tables) | ||
1396 | g->ops.gr.setup_alpha_beta_tables(g, gr); | ||
1397 | |||
1398 | if (gr->num_fbps == 1) | ||
1399 | max_ways_evict = 9; | ||
1400 | |||
1401 | if (max_ways_evict != INVALID_MAX_WAYS) | ||
1402 | g->ops.ltc.set_max_ways_evict_last(g, max_ways_evict); | ||
1403 | |||
1404 | for (gpc_index = 0; | ||
1405 | gpc_index < gr_pd_dist_skip_table__size_1_v() * 4; | ||
1406 | gpc_index += 4) { | ||
1407 | |||
1408 | gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4), | ||
1409 | gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) || | ||
1410 | gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) || | ||
1411 | gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) || | ||
1412 | gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); | ||
1413 | } | ||
1414 | |||
1415 | gk20a_writel(g, gr_cwd_fs_r(), | ||
1416 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | ||
1417 | gr_cwd_fs_num_tpcs_f(gr->tpc_count)); | ||
1418 | |||
1419 | gk20a_writel(g, gr_bes_zrop_settings_r(), | ||
1420 | gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps)); | ||
1421 | gk20a_writel(g, gr_bes_crop_settings_r(), | ||
1422 | gr_bes_crop_settings_num_active_fbps_f(gr->num_fbps)); | ||
1423 | |||
1424 | /* turn on cya15 bit for a default val that missed the cut */ | ||
1425 | l1c_dbg_reg_val = gk20a_readl(g, gr_gpc0_tpc0_l1c_dbg_r()); | ||
1426 | l1c_dbg_reg_val |= gr_gpc0_tpc0_l1c_dbg_cya15_en_f(); | ||
1427 | gk20a_writel(g, gr_gpc0_tpc0_l1c_dbg_r(), l1c_dbg_reg_val); | ||
1428 | |||
1429 | return 0; | ||
1430 | } | ||
1431 | |||
1432 | static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type) | ||
1433 | { | ||
1434 | struct gk20a *g = c->g; | ||
1435 | int ret; | ||
1436 | |||
1437 | u32 inst_base_ptr = | ||
1438 | u64_lo32(c->inst_block.cpu_pa | ||
1439 | >> ram_in_base_shift_v()); | ||
1440 | |||
1441 | |||
1442 | gk20a_dbg_fn(""); | ||
1443 | |||
1444 | ret = gr_gk20a_submit_fecs_method_op(g, | ||
1445 | (struct fecs_method_op_gk20a) { | ||
1446 | .method.addr = save_type, | ||
1447 | .method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) | | ||
1448 | gr_fecs_current_ctx_target_vid_mem_f() | | ||
1449 | gr_fecs_current_ctx_valid_f(1)), | ||
1450 | .mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL, | ||
1451 | .ok = 1, .fail = 2, | ||
1452 | }, | ||
1453 | .cond.ok = GR_IS_UCODE_OP_AND, | ||
1454 | .cond.fail = GR_IS_UCODE_OP_AND, | ||
1455 | }); | ||
1456 | |||
1457 | if (ret) | ||
1458 | gk20a_err(dev_from_gk20a(g), "save context image failed"); | ||
1459 | |||
1460 | return ret; | ||
1461 | } | ||
1462 | |||
1463 | static u32 gk20a_init_sw_bundle(struct gk20a *g) | ||
1464 | { | ||
1465 | struct av_list_gk20a *sw_bundle_init = &g->gr.ctx_vars.sw_bundle_init; | ||
1466 | u32 last_bundle_data = 0; | ||
1467 | u32 err = 0; | ||
1468 | int i; | ||
1469 | unsigned long end_jiffies = jiffies + | ||
1470 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
1471 | |||
1472 | /* enable pipe mode override */ | ||
1473 | gk20a_writel(g, gr_pipe_bundle_config_r(), | ||
1474 | gr_pipe_bundle_config_override_pipe_mode_enabled_f()); | ||
1475 | |||
1476 | /* load bundle init */ | ||
1477 | for (i = 0; i < sw_bundle_init->count; i++) { | ||
1478 | |||
1479 | if (i == 0 || last_bundle_data != sw_bundle_init->l[i].value) { | ||
1480 | gk20a_writel(g, gr_pipe_bundle_data_r(), | ||
1481 | sw_bundle_init->l[i].value); | ||
1482 | last_bundle_data = sw_bundle_init->l[i].value; | ||
1483 | } | ||
1484 | |||
1485 | gk20a_writel(g, gr_pipe_bundle_address_r(), | ||
1486 | sw_bundle_init->l[i].addr); | ||
1487 | |||
1488 | if (gr_pipe_bundle_address_value_v(sw_bundle_init->l[i].addr) == | ||
1489 | GR_GO_IDLE_BUNDLE) | ||
1490 | err |= gr_gk20a_wait_idle(g, end_jiffies, | ||
1491 | GR_IDLE_CHECK_DEFAULT); | ||
1492 | } | ||
1493 | |||
1494 | /* disable pipe mode override */ | ||
1495 | gk20a_writel(g, gr_pipe_bundle_config_r(), | ||
1496 | gr_pipe_bundle_config_override_pipe_mode_disabled_f()); | ||
1497 | |||
1498 | return err; | ||
1499 | } | ||
1500 | |||
1501 | /* init global golden image from a fresh gr_ctx in channel ctx. | ||
1502 | save a copy in local_golden_image in ctx_vars */ | ||
1503 | static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | ||
1504 | struct channel_gk20a *c) | ||
1505 | { | ||
1506 | struct gr_gk20a *gr = &g->gr; | ||
1507 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
1508 | u32 ctx_header_bytes = ctxsw_prog_fecs_header_v(); | ||
1509 | u32 ctx_header_words; | ||
1510 | u32 i; | ||
1511 | u32 data; | ||
1512 | void *ctx_ptr = NULL; | ||
1513 | void *gold_ptr = NULL; | ||
1514 | u32 err = 0; | ||
1515 | |||
1516 | gk20a_dbg_fn(""); | ||
1517 | |||
1518 | /* golden ctx is global to all channels. Although only the first | ||
1519 | channel initializes golden image, driver needs to prevent multiple | ||
1520 | channels from initializing golden ctx at the same time */ | ||
1521 | mutex_lock(&gr->ctx_mutex); | ||
1522 | |||
1523 | if (gr->ctx_vars.golden_image_initialized) | ||
1524 | goto clean_up; | ||
1525 | |||
1526 | err = gr_gk20a_fecs_ctx_bind_channel(g, c); | ||
1527 | if (err) | ||
1528 | goto clean_up; | ||
1529 | |||
1530 | err = gk20a_init_sw_bundle(g); | ||
1531 | if (err) | ||
1532 | goto clean_up; | ||
1533 | |||
1534 | err = gr_gk20a_elpg_protected_call(g, | ||
1535 | gr_gk20a_commit_global_ctx_buffers(g, c, false)); | ||
1536 | if (err) | ||
1537 | goto clean_up; | ||
1538 | |||
1539 | gold_ptr = vmap(gr->global_ctx_buffer[GOLDEN_CTX].pages, | ||
1540 | PAGE_ALIGN(gr->global_ctx_buffer[GOLDEN_CTX].size) >> | ||
1541 | PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL)); | ||
1542 | if (!gold_ptr) | ||
1543 | goto clean_up; | ||
1544 | |||
1545 | ctx_ptr = vmap(ch_ctx->gr_ctx.pages, | ||
1546 | PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, | ||
1547 | 0, pgprot_dmacoherent(PAGE_KERNEL)); | ||
1548 | if (!ctx_ptr) | ||
1549 | goto clean_up; | ||
1550 | |||
1551 | ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); | ||
1552 | ctx_header_words >>= 2; | ||
1553 | |||
1554 | /* Channel gr_ctx buffer is gpu cacheable. | ||
1555 | Flush before cpu read. */ | ||
1556 | gk20a_mm_fb_flush(g); | ||
1557 | gk20a_mm_l2_flush(g, false); | ||
1558 | |||
1559 | for (i = 0; i < ctx_header_words; i++) { | ||
1560 | data = gk20a_mem_rd32(ctx_ptr, i); | ||
1561 | gk20a_mem_wr32(gold_ptr, i, data); | ||
1562 | } | ||
1563 | |||
1564 | gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_o(), 0, | ||
1565 | ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); | ||
1566 | |||
1567 | gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, 0); | ||
1568 | |||
1569 | gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); | ||
1570 | |||
1571 | gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v()); | ||
1572 | |||
1573 | if (gr->ctx_vars.local_golden_image == NULL) { | ||
1574 | |||
1575 | gr->ctx_vars.local_golden_image = | ||
1576 | kzalloc(gr->ctx_vars.golden_image_size, GFP_KERNEL); | ||
1577 | |||
1578 | if (gr->ctx_vars.local_golden_image == NULL) { | ||
1579 | err = -ENOMEM; | ||
1580 | goto clean_up; | ||
1581 | } | ||
1582 | |||
1583 | for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) | ||
1584 | gr->ctx_vars.local_golden_image[i] = | ||
1585 | gk20a_mem_rd32(gold_ptr, i); | ||
1586 | } | ||
1587 | |||
1588 | gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va); | ||
1589 | |||
1590 | gr->ctx_vars.golden_image_initialized = true; | ||
1591 | |||
1592 | gk20a_mm_l2_invalidate(g); | ||
1593 | |||
1594 | gk20a_writel(g, gr_fecs_current_ctx_r(), | ||
1595 | gr_fecs_current_ctx_valid_false_f()); | ||
1596 | |||
1597 | clean_up: | ||
1598 | if (err) | ||
1599 | gk20a_err(dev_from_gk20a(g), "fail"); | ||
1600 | else | ||
1601 | gk20a_dbg_fn("done"); | ||
1602 | |||
1603 | if (gold_ptr) | ||
1604 | vunmap(gold_ptr); | ||
1605 | if (ctx_ptr) | ||
1606 | vunmap(ctx_ptr); | ||
1607 | |||
1608 | mutex_unlock(&gr->ctx_mutex); | ||
1609 | return err; | ||
1610 | } | ||
1611 | |||
1612 | int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | ||
1613 | struct channel_gk20a *c, | ||
1614 | bool enable_smpc_ctxsw) | ||
1615 | { | ||
1616 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
1617 | void *ctx_ptr = NULL; | ||
1618 | u32 data; | ||
1619 | |||
1620 | /*XXX caller responsible for making sure the channel is quiesced? */ | ||
1621 | |||
1622 | /* Channel gr_ctx buffer is gpu cacheable. | ||
1623 | Flush and invalidate before cpu update. */ | ||
1624 | gk20a_mm_fb_flush(g); | ||
1625 | gk20a_mm_l2_flush(g, true); | ||
1626 | |||
1627 | ctx_ptr = vmap(ch_ctx->gr_ctx.pages, | ||
1628 | PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, | ||
1629 | 0, pgprot_dmacoherent(PAGE_KERNEL)); | ||
1630 | if (!ctx_ptr) | ||
1631 | return -ENOMEM; | ||
1632 | |||
1633 | data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); | ||
1634 | data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); | ||
1635 | data |= enable_smpc_ctxsw ? | ||
1636 | ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : | ||
1637 | ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); | ||
1638 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, | ||
1639 | data); | ||
1640 | |||
1641 | vunmap(ctx_ptr); | ||
1642 | |||
1643 | gk20a_mm_l2_invalidate(g); | ||
1644 | |||
1645 | return 0; | ||
1646 | } | ||
1647 | |||
1648 | /* load saved fresh copy of gloden image into channel gr_ctx */ | ||
1649 | static int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | ||
1650 | struct channel_gk20a *c) | ||
1651 | { | ||
1652 | struct gr_gk20a *gr = &g->gr; | ||
1653 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
1654 | u32 virt_addr_lo; | ||
1655 | u32 virt_addr_hi; | ||
1656 | u32 i, v, data; | ||
1657 | int ret = 0; | ||
1658 | void *ctx_ptr = NULL; | ||
1659 | |||
1660 | gk20a_dbg_fn(""); | ||
1661 | |||
1662 | if (gr->ctx_vars.local_golden_image == NULL) | ||
1663 | return -1; | ||
1664 | |||
1665 | /* Channel gr_ctx buffer is gpu cacheable. | ||
1666 | Flush and invalidate before cpu update. */ | ||
1667 | gk20a_mm_fb_flush(g); | ||
1668 | gk20a_mm_l2_flush(g, true); | ||
1669 | |||
1670 | ctx_ptr = vmap(ch_ctx->gr_ctx.pages, | ||
1671 | PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, | ||
1672 | 0, pgprot_dmacoherent(PAGE_KERNEL)); | ||
1673 | if (!ctx_ptr) | ||
1674 | return -ENOMEM; | ||
1675 | |||
1676 | for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) | ||
1677 | gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); | ||
1678 | |||
1679 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); | ||
1680 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); | ||
1681 | |||
1682 | virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va); | ||
1683 | virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va); | ||
1684 | |||
1685 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0, | ||
1686 | ch_ctx->patch_ctx.data_count); | ||
1687 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_o(), 0, | ||
1688 | virt_addr_lo); | ||
1689 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0, | ||
1690 | virt_addr_hi); | ||
1691 | |||
1692 | /* no user for client managed performance counter ctx */ | ||
1693 | ch_ctx->pm_ctx.ctx_sw_mode = | ||
1694 | ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | ||
1695 | data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); | ||
1696 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); | ||
1697 | data |= ch_ctx->pm_ctx.ctx_sw_mode; | ||
1698 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, | ||
1699 | data); | ||
1700 | |||
1701 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, 0); | ||
1702 | |||
1703 | /* set priv access map */ | ||
1704 | virt_addr_lo = | ||
1705 | u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); | ||
1706 | virt_addr_hi = | ||
1707 | u64_hi32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); | ||
1708 | |||
1709 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_config_o(), 0, | ||
1710 | ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f()); | ||
1711 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), 0, | ||
1712 | virt_addr_lo); | ||
1713 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), 0, | ||
1714 | virt_addr_hi); | ||
1715 | /* disable verif features */ | ||
1716 | v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0); | ||
1717 | v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); | ||
1718 | v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); | ||
1719 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v); | ||
1720 | |||
1721 | |||
1722 | vunmap(ctx_ptr); | ||
1723 | |||
1724 | gk20a_mm_l2_invalidate(g); | ||
1725 | |||
1726 | if (tegra_platform_is_linsim()) { | ||
1727 | u32 inst_base_ptr = | ||
1728 | u64_lo32(c->inst_block.cpu_pa | ||
1729 | >> ram_in_base_shift_v()); | ||
1730 | |||
1731 | ret = gr_gk20a_submit_fecs_method_op(g, | ||
1732 | (struct fecs_method_op_gk20a) { | ||
1733 | .method.data = | ||
1734 | (gr_fecs_current_ctx_ptr_f(inst_base_ptr) | | ||
1735 | gr_fecs_current_ctx_target_vid_mem_f() | | ||
1736 | gr_fecs_current_ctx_valid_f(1)), | ||
1737 | .method.addr = | ||
1738 | gr_fecs_method_push_adr_restore_golden_v(), | ||
1739 | .mailbox = { | ||
1740 | .id = 0, .data = 0, | ||
1741 | .clr = ~0, .ret = NULL, | ||
1742 | .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), | ||
1743 | .fail = 0}, | ||
1744 | .cond.ok = GR_IS_UCODE_OP_EQUAL, | ||
1745 | .cond.fail = GR_IS_UCODE_OP_SKIP}); | ||
1746 | |||
1747 | if (ret) | ||
1748 | gk20a_err(dev_from_gk20a(g), | ||
1749 | "restore context image failed"); | ||
1750 | } | ||
1751 | |||
1752 | return ret; | ||
1753 | } | ||
1754 | |||
1755 | static void gr_gk20a_start_falcon_ucode(struct gk20a *g) | ||
1756 | { | ||
1757 | gk20a_dbg_fn(""); | ||
1758 | |||
1759 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), | ||
1760 | gr_fecs_ctxsw_mailbox_clear_value_f(~0)); | ||
1761 | |||
1762 | gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0)); | ||
1763 | gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0)); | ||
1764 | |||
1765 | gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1)); | ||
1766 | gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1)); | ||
1767 | |||
1768 | gk20a_dbg_fn("done"); | ||
1769 | } | ||
1770 | |||
1771 | static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) | ||
1772 | { | ||
1773 | struct mm_gk20a *mm = &g->mm; | ||
1774 | struct vm_gk20a *vm = &mm->pmu.vm; | ||
1775 | struct device *d = dev_from_gk20a(g); | ||
1776 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; | ||
1777 | void *inst_ptr; | ||
1778 | u32 pde_addr_lo; | ||
1779 | u32 pde_addr_hi; | ||
1780 | u64 pde_addr; | ||
1781 | dma_addr_t iova; | ||
1782 | |||
1783 | /* Alloc mem of inst block */ | ||
1784 | ucode_info->inst_blk_desc.size = ram_in_alloc_size_v(); | ||
1785 | ucode_info->inst_blk_desc.cpuva = dma_alloc_coherent(d, | ||
1786 | ucode_info->inst_blk_desc.size, | ||
1787 | &iova, | ||
1788 | GFP_KERNEL); | ||
1789 | if (!ucode_info->inst_blk_desc.cpuva) { | ||
1790 | gk20a_err(d, "failed to allocate memory\n"); | ||
1791 | return -ENOMEM; | ||
1792 | } | ||
1793 | |||
1794 | ucode_info->inst_blk_desc.iova = iova; | ||
1795 | ucode_info->inst_blk_desc.cpu_pa = gk20a_get_phys_from_iova(d, | ||
1796 | ucode_info->inst_blk_desc.iova); | ||
1797 | |||
1798 | inst_ptr = ucode_info->inst_blk_desc.cpuva; | ||
1799 | |||
1800 | /* Set inst block */ | ||
1801 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), | ||
1802 | u64_lo32(vm->va_limit) | 0xFFF); | ||
1803 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), | ||
1804 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); | ||
1805 | |||
1806 | pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); | ||
1807 | pde_addr_lo = u64_lo32(pde_addr >> 12); | ||
1808 | pde_addr_hi = u64_hi32(pde_addr); | ||
1809 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | ||
1810 | ram_in_page_dir_base_target_vid_mem_f() | | ||
1811 | ram_in_page_dir_base_vol_true_f() | | ||
1812 | ram_in_page_dir_base_lo_f(pde_addr_lo)); | ||
1813 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | ||
1814 | ram_in_page_dir_base_hi_f(pde_addr_hi)); | ||
1815 | |||
1816 | /* Map ucode surface to GMMU */ | ||
1817 | ucode_info->ucode_gpuva = gk20a_gmmu_map(vm, | ||
1818 | &ucode_info->surface_desc.sgt, | ||
1819 | ucode_info->surface_desc.size, | ||
1820 | 0, /* flags */ | ||
1821 | gk20a_mem_flag_read_only); | ||
1822 | if (!ucode_info->ucode_gpuva) { | ||
1823 | gk20a_err(d, "failed to update gmmu ptes\n"); | ||
1824 | return -ENOMEM; | ||
1825 | } | ||
1826 | |||
1827 | return 0; | ||
1828 | } | ||
1829 | |||
1830 | static void gr_gk20a_init_ctxsw_ucode_segment( | ||
1831 | struct gk20a_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size) | ||
1832 | { | ||
1833 | p_seg->offset = *offset; | ||
1834 | p_seg->size = size; | ||
1835 | *offset = ALIGN(*offset + size, BLK_SIZE); | ||
1836 | } | ||
1837 | |||
1838 | static void gr_gk20a_init_ctxsw_ucode_segments( | ||
1839 | struct gk20a_ctxsw_ucode_segments *segments, u32 *offset, | ||
1840 | struct gk20a_ctxsw_bootloader_desc *bootdesc, | ||
1841 | u32 code_size, u32 data_size) | ||
1842 | { | ||
1843 | u32 boot_size = ALIGN(bootdesc->size, sizeof(u32)); | ||
1844 | segments->boot_entry = bootdesc->entry_point; | ||
1845 | segments->boot_imem_offset = bootdesc->imem_offset; | ||
1846 | gr_gk20a_init_ctxsw_ucode_segment(&segments->boot, offset, boot_size); | ||
1847 | gr_gk20a_init_ctxsw_ucode_segment(&segments->code, offset, code_size); | ||
1848 | gr_gk20a_init_ctxsw_ucode_segment(&segments->data, offset, data_size); | ||
1849 | } | ||
1850 | |||
1851 | static int gr_gk20a_copy_ctxsw_ucode_segments( | ||
1852 | u8 *buf, | ||
1853 | struct gk20a_ctxsw_ucode_segments *segments, | ||
1854 | u32 *bootimage, | ||
1855 | u32 *code, u32 *data) | ||
1856 | { | ||
1857 | memcpy(buf + segments->boot.offset, bootimage, segments->boot.size); | ||
1858 | memcpy(buf + segments->code.offset, code, segments->code.size); | ||
1859 | memcpy(buf + segments->data.offset, data, segments->data.size); | ||
1860 | return 0; | ||
1861 | } | ||
1862 | |||
1863 | static int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) | ||
1864 | { | ||
1865 | struct device *d = dev_from_gk20a(g); | ||
1866 | struct mm_gk20a *mm = &g->mm; | ||
1867 | struct vm_gk20a *vm = &mm->pmu.vm; | ||
1868 | struct gk20a_ctxsw_bootloader_desc *fecs_boot_desc; | ||
1869 | struct gk20a_ctxsw_bootloader_desc *gpccs_boot_desc; | ||
1870 | const struct firmware *fecs_fw; | ||
1871 | const struct firmware *gpccs_fw; | ||
1872 | u32 *fecs_boot_image; | ||
1873 | u32 *gpccs_boot_image; | ||
1874 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; | ||
1875 | u8 *buf; | ||
1876 | u32 ucode_size; | ||
1877 | int err = 0; | ||
1878 | dma_addr_t iova; | ||
1879 | DEFINE_DMA_ATTRS(attrs); | ||
1880 | |||
1881 | fecs_fw = gk20a_request_firmware(g, GK20A_FECS_UCODE_IMAGE); | ||
1882 | if (!fecs_fw) { | ||
1883 | gk20a_err(d, "failed to load fecs ucode!!"); | ||
1884 | return -ENOENT; | ||
1885 | } | ||
1886 | |||
1887 | fecs_boot_desc = (void *)fecs_fw->data; | ||
1888 | fecs_boot_image = (void *)(fecs_fw->data + | ||
1889 | sizeof(struct gk20a_ctxsw_bootloader_desc)); | ||
1890 | |||
1891 | gpccs_fw = gk20a_request_firmware(g, GK20A_GPCCS_UCODE_IMAGE); | ||
1892 | if (!gpccs_fw) { | ||
1893 | release_firmware(fecs_fw); | ||
1894 | gk20a_err(d, "failed to load gpccs ucode!!"); | ||
1895 | return -ENOENT; | ||
1896 | } | ||
1897 | |||
1898 | gpccs_boot_desc = (void *)gpccs_fw->data; | ||
1899 | gpccs_boot_image = (void *)(gpccs_fw->data + | ||
1900 | sizeof(struct gk20a_ctxsw_bootloader_desc)); | ||
1901 | |||
1902 | ucode_size = 0; | ||
1903 | gr_gk20a_init_ctxsw_ucode_segments(&ucode_info->fecs, &ucode_size, | ||
1904 | fecs_boot_desc, | ||
1905 | g->gr.ctx_vars.ucode.fecs.inst.count * sizeof(u32), | ||
1906 | g->gr.ctx_vars.ucode.fecs.data.count * sizeof(u32)); | ||
1907 | gr_gk20a_init_ctxsw_ucode_segments(&ucode_info->gpccs, &ucode_size, | ||
1908 | gpccs_boot_desc, | ||
1909 | g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32), | ||
1910 | g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); | ||
1911 | |||
1912 | ucode_info->surface_desc.size = ucode_size; | ||
1913 | dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); | ||
1914 | ucode_info->surface_desc.cpuva = dma_alloc_attrs(d, | ||
1915 | ucode_info->surface_desc.size, | ||
1916 | &iova, | ||
1917 | GFP_KERNEL, | ||
1918 | &attrs); | ||
1919 | if (!ucode_info->surface_desc.cpuva) { | ||
1920 | gk20a_err(d, "memory allocation failed\n"); | ||
1921 | err = -ENOMEM; | ||
1922 | goto clean_up; | ||
1923 | } | ||
1924 | |||
1925 | ucode_info->surface_desc.iova = iova; | ||
1926 | err = gk20a_get_sgtable(d, &ucode_info->surface_desc.sgt, | ||
1927 | ucode_info->surface_desc.cpuva, | ||
1928 | ucode_info->surface_desc.iova, | ||
1929 | ucode_info->surface_desc.size); | ||
1930 | if (err) { | ||
1931 | gk20a_err(d, "failed to create sg table\n"); | ||
1932 | goto clean_up; | ||
1933 | } | ||
1934 | |||
1935 | buf = (u8 *)ucode_info->surface_desc.cpuva; | ||
1936 | if (!buf) { | ||
1937 | gk20a_err(d, "failed to map surface desc buffer"); | ||
1938 | err = -ENOMEM; | ||
1939 | goto clean_up; | ||
1940 | } | ||
1941 | |||
1942 | gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->fecs, | ||
1943 | fecs_boot_image, | ||
1944 | g->gr.ctx_vars.ucode.fecs.inst.l, | ||
1945 | g->gr.ctx_vars.ucode.fecs.data.l); | ||
1946 | |||
1947 | release_firmware(fecs_fw); | ||
1948 | fecs_fw = NULL; | ||
1949 | |||
1950 | gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->gpccs, | ||
1951 | gpccs_boot_image, | ||
1952 | g->gr.ctx_vars.ucode.gpccs.inst.l, | ||
1953 | g->gr.ctx_vars.ucode.gpccs.data.l); | ||
1954 | |||
1955 | release_firmware(gpccs_fw); | ||
1956 | gpccs_fw = NULL; | ||
1957 | |||
1958 | err = gr_gk20a_init_ctxsw_ucode_vaspace(g); | ||
1959 | if (err) | ||
1960 | goto clean_up; | ||
1961 | |||
1962 | gk20a_free_sgtable(&ucode_info->surface_desc.sgt); | ||
1963 | |||
1964 | return 0; | ||
1965 | |||
1966 | clean_up: | ||
1967 | if (ucode_info->ucode_gpuva) | ||
1968 | gk20a_gmmu_unmap(vm, ucode_info->ucode_gpuva, | ||
1969 | ucode_info->surface_desc.size, gk20a_mem_flag_none); | ||
1970 | if (ucode_info->surface_desc.sgt) | ||
1971 | gk20a_free_sgtable(&ucode_info->surface_desc.sgt); | ||
1972 | if (ucode_info->surface_desc.cpuva) | ||
1973 | dma_free_attrs(d, ucode_info->surface_desc.size, | ||
1974 | ucode_info->surface_desc.cpuva, | ||
1975 | ucode_info->surface_desc.iova, | ||
1976 | &attrs); | ||
1977 | ucode_info->surface_desc.cpuva = NULL; | ||
1978 | ucode_info->surface_desc.iova = 0; | ||
1979 | |||
1980 | release_firmware(gpccs_fw); | ||
1981 | gpccs_fw = NULL; | ||
1982 | release_firmware(fecs_fw); | ||
1983 | fecs_fw = NULL; | ||
1984 | |||
1985 | return err; | ||
1986 | } | ||
1987 | |||
1988 | static void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) | ||
1989 | { | ||
1990 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; | ||
1991 | int retries = 20; | ||
1992 | phys_addr_t inst_ptr; | ||
1993 | u32 val; | ||
1994 | |||
1995 | while ((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) & | ||
1996 | gr_fecs_ctxsw_status_1_arb_busy_m()) && retries) { | ||
1997 | udelay(2); | ||
1998 | retries--; | ||
1999 | } | ||
2000 | if (!retries) | ||
2001 | gk20a_err(dev_from_gk20a(g), "arbiter idle timeout"); | ||
2002 | |||
2003 | gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0); | ||
2004 | |||
2005 | inst_ptr = ucode_info->inst_blk_desc.cpu_pa; | ||
2006 | gk20a_writel(g, gr_fecs_new_ctx_r(), | ||
2007 | gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | | ||
2008 | gr_fecs_new_ctx_target_m() | | ||
2009 | gr_fecs_new_ctx_valid_m()); | ||
2010 | |||
2011 | gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(), | ||
2012 | gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) | | ||
2013 | gr_fecs_arb_ctx_ptr_target_m()); | ||
2014 | |||
2015 | gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); | ||
2016 | |||
2017 | /* Wait for arbiter command to complete */ | ||
2018 | retries = 20; | ||
2019 | val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()); | ||
2020 | while (gr_fecs_arb_ctx_cmd_cmd_v(val) && retries) { | ||
2021 | udelay(2); | ||
2022 | retries--; | ||
2023 | val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()); | ||
2024 | } | ||
2025 | if (!retries) | ||
2026 | gk20a_err(dev_from_gk20a(g), "arbiter complete timeout"); | ||
2027 | |||
2028 | gk20a_writel(g, gr_fecs_current_ctx_r(), | ||
2029 | gr_fecs_current_ctx_ptr_f(inst_ptr >> 12) | | ||
2030 | gr_fecs_current_ctx_target_m() | | ||
2031 | gr_fecs_current_ctx_valid_m()); | ||
2032 | /* Send command to arbiter to flush */ | ||
2033 | gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), gr_fecs_arb_ctx_cmd_cmd_s()); | ||
2034 | |||
2035 | retries = 20; | ||
2036 | val = (gk20a_readl(g, gr_fecs_arb_ctx_cmd_r())); | ||
2037 | while (gr_fecs_arb_ctx_cmd_cmd_v(val) && retries) { | ||
2038 | udelay(2); | ||
2039 | retries--; | ||
2040 | val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()); | ||
2041 | } | ||
2042 | if (!retries) | ||
2043 | gk20a_err(dev_from_gk20a(g), "arbiter complete timeout"); | ||
2044 | } | ||
2045 | |||
2046 | static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | ||
2047 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) | ||
2048 | { | ||
2049 | u32 addr_code32; | ||
2050 | u32 addr_data32; | ||
2051 | u32 addr_load32; | ||
2052 | u32 dst = 0; | ||
2053 | u32 blocks; | ||
2054 | u32 b; | ||
2055 | |||
2056 | addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8); | ||
2057 | addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8); | ||
2058 | addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8); | ||
2059 | |||
2060 | gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), | ||
2061 | gr_fecs_dmactl_require_ctx_f(0)); | ||
2062 | |||
2063 | /* | ||
2064 | * Copy falcon bootloader header into dmem at offset 0. | ||
2065 | * Configure dmem port 0 for auto-incrementing writes starting at dmem | ||
2066 | * offset 0. | ||
2067 | */ | ||
2068 | gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0), | ||
2069 | gr_fecs_dmemc_offs_f(0) | | ||
2070 | gr_fecs_dmemc_blk_f(0) | | ||
2071 | gr_fecs_dmemc_aincw_f(1)); | ||
2072 | |||
2073 | /* Write out the actual data */ | ||
2074 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2075 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32); | ||
2076 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2077 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->code.size); | ||
2078 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2079 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_data32); | ||
2080 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->data.size); | ||
2081 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32); | ||
2082 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2083 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2084 | |||
2085 | blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8; | ||
2086 | |||
2087 | /* | ||
2088 | * Set the base FB address for the DMA transfer. Subtract off the 256 | ||
2089 | * byte IMEM block offset such that the relative FB and IMEM offsets | ||
2090 | * match, allowing the IMEM tags to be properly created. | ||
2091 | */ | ||
2092 | |||
2093 | dst = segments->boot_imem_offset; | ||
2094 | gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(), | ||
2095 | (addr_load32 - (dst >> 8))); | ||
2096 | |||
2097 | for (b = 0; b < blocks; b++) { | ||
2098 | /* Setup destination IMEM offset */ | ||
2099 | gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(), | ||
2100 | dst + (b << 8)); | ||
2101 | |||
2102 | /* Setup source offset (relative to BASE) */ | ||
2103 | gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(), | ||
2104 | dst + (b << 8)); | ||
2105 | |||
2106 | gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(), | ||
2107 | gr_fecs_dmatrfcmd_imem_f(0x01) | | ||
2108 | gr_fecs_dmatrfcmd_write_f(0x00) | | ||
2109 | gr_fecs_dmatrfcmd_size_f(0x06) | | ||
2110 | gr_fecs_dmatrfcmd_ctxdma_f(0)); | ||
2111 | } | ||
2112 | |||
2113 | /* Specify the falcon boot vector */ | ||
2114 | gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(), | ||
2115 | gr_fecs_bootvec_vec_f(segments->boot_entry)); | ||
2116 | |||
2117 | /* Write to CPUCTL to start the falcon */ | ||
2118 | gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(), | ||
2119 | gr_fecs_cpuctl_startcpu_f(0x01)); | ||
2120 | |||
2121 | return 0; | ||
2122 | } | ||
2123 | |||
2124 | static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) | ||
2125 | { | ||
2126 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; | ||
2127 | u64 addr_base = ucode_info->ucode_gpuva; | ||
2128 | |||
2129 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0); | ||
2130 | |||
2131 | gr_gk20a_load_falcon_bind_instblk(g); | ||
2132 | |||
2133 | gr_gk20a_load_ctxsw_ucode_segments(g, addr_base, | ||
2134 | &g->ctxsw_ucode_info.fecs, 0); | ||
2135 | |||
2136 | gr_gk20a_load_ctxsw_ucode_segments(g, addr_base, | ||
2137 | &g->ctxsw_ucode_info.gpccs, | ||
2138 | gr_gpcs_gpccs_falcon_hwcfg_r() - | ||
2139 | gr_fecs_falcon_hwcfg_r()); | ||
2140 | } | ||
2141 | |||
2142 | static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr) | ||
2143 | { | ||
2144 | u32 ret; | ||
2145 | |||
2146 | gk20a_dbg_fn(""); | ||
2147 | |||
2148 | if (tegra_platform_is_linsim()) { | ||
2149 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7), | ||
2150 | gr_fecs_ctxsw_mailbox_value_f(0xc0de7777)); | ||
2151 | gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7), | ||
2152 | gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777)); | ||
2153 | } | ||
2154 | |||
2155 | /* | ||
2156 | * In case the gPMU falcon is not being used, revert to the old way of | ||
2157 | * loading gr ucode, without the faster bootstrap routine. | ||
2158 | */ | ||
2159 | if (!support_gk20a_pmu()) { | ||
2160 | gr_gk20a_load_falcon_dmem(g); | ||
2161 | gr_gk20a_load_falcon_imem(g); | ||
2162 | gr_gk20a_start_falcon_ucode(g); | ||
2163 | } else { | ||
2164 | if (!gr->skip_ucode_init) | ||
2165 | gr_gk20a_init_ctxsw_ucode(g); | ||
2166 | gr_gk20a_load_falcon_with_bootloader(g); | ||
2167 | gr->skip_ucode_init = true; | ||
2168 | } | ||
2169 | |||
2170 | ret = gr_gk20a_ctx_wait_ucode(g, 0, 0, | ||
2171 | GR_IS_UCODE_OP_EQUAL, | ||
2172 | eUcodeHandshakeInitComplete, | ||
2173 | GR_IS_UCODE_OP_SKIP, 0); | ||
2174 | if (ret) { | ||
2175 | gk20a_err(dev_from_gk20a(g), "falcon ucode init timeout"); | ||
2176 | return ret; | ||
2177 | } | ||
2178 | |||
2179 | if (support_gk20a_pmu()) | ||
2180 | gk20a_writel(g, gr_fecs_current_ctx_r(), | ||
2181 | gr_fecs_current_ctx_valid_false_f()); | ||
2182 | |||
2183 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff); | ||
2184 | gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff); | ||
2185 | gk20a_writel(g, gr_fecs_method_push_r(), | ||
2186 | gr_fecs_method_push_adr_set_watchdog_timeout_f()); | ||
2187 | |||
2188 | gk20a_dbg_fn("done"); | ||
2189 | return 0; | ||
2190 | } | ||
2191 | |||
2192 | static int gr_gk20a_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr) | ||
2193 | { | ||
2194 | u32 golden_ctx_image_size = 0; | ||
2195 | u32 zcull_ctx_image_size = 0; | ||
2196 | u32 pm_ctx_image_size = 0; | ||
2197 | u32 ret; | ||
2198 | struct fecs_method_op_gk20a op = { | ||
2199 | .mailbox = { .id = 0, .data = 0, | ||
2200 | .clr = ~0, .ok = 0, .fail = 0}, | ||
2201 | .method.data = 0, | ||
2202 | .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL, | ||
2203 | .cond.fail = GR_IS_UCODE_OP_SKIP, | ||
2204 | }; | ||
2205 | |||
2206 | gk20a_dbg_fn(""); | ||
2207 | op.method.addr = gr_fecs_method_push_adr_discover_image_size_v(); | ||
2208 | op.mailbox.ret = &golden_ctx_image_size; | ||
2209 | ret = gr_gk20a_submit_fecs_method_op(g, op); | ||
2210 | if (ret) { | ||
2211 | gk20a_err(dev_from_gk20a(g), | ||
2212 | "query golden image size failed"); | ||
2213 | return ret; | ||
2214 | } | ||
2215 | op.method.addr = gr_fecs_method_push_adr_discover_zcull_image_size_v(); | ||
2216 | op.mailbox.ret = &zcull_ctx_image_size; | ||
2217 | ret = gr_gk20a_submit_fecs_method_op(g, op); | ||
2218 | if (ret) { | ||
2219 | gk20a_err(dev_from_gk20a(g), | ||
2220 | "query zcull ctx image size failed"); | ||
2221 | return ret; | ||
2222 | } | ||
2223 | op.method.addr = gr_fecs_method_push_adr_discover_pm_image_size_v(); | ||
2224 | op.mailbox.ret = &pm_ctx_image_size; | ||
2225 | ret = gr_gk20a_submit_fecs_method_op(g, op); | ||
2226 | if (ret) { | ||
2227 | gk20a_err(dev_from_gk20a(g), | ||
2228 | "query pm ctx image size failed"); | ||
2229 | return ret; | ||
2230 | } | ||
2231 | |||
2232 | if (!g->gr.ctx_vars.golden_image_size && | ||
2233 | !g->gr.ctx_vars.zcull_ctxsw_image_size) { | ||
2234 | g->gr.ctx_vars.golden_image_size = golden_ctx_image_size; | ||
2235 | g->gr.ctx_vars.zcull_ctxsw_image_size = zcull_ctx_image_size; | ||
2236 | } else { | ||
2237 | /* hw is different after railgating? */ | ||
2238 | BUG_ON(g->gr.ctx_vars.golden_image_size != golden_ctx_image_size); | ||
2239 | BUG_ON(g->gr.ctx_vars.zcull_ctxsw_image_size != zcull_ctx_image_size); | ||
2240 | } | ||
2241 | |||
2242 | g->gr.ctx_vars.priv_access_map_size = 512 * 1024; | ||
2243 | |||
2244 | gk20a_dbg_fn("done"); | ||
2245 | return 0; | ||
2246 | } | ||
2247 | |||
2248 | static void gk20a_gr_destroy_ctx_buffer(struct platform_device *pdev, | ||
2249 | struct gr_ctx_buffer_desc *desc) | ||
2250 | { | ||
2251 | struct device *dev = &pdev->dev; | ||
2252 | gk20a_free_sgtable(&desc->sgt); | ||
2253 | dma_free_attrs(dev, desc->size, desc->pages, | ||
2254 | desc->iova, &desc->attrs); | ||
2255 | } | ||
2256 | |||
2257 | static int gk20a_gr_alloc_ctx_buffer(struct platform_device *pdev, | ||
2258 | struct gr_ctx_buffer_desc *desc, | ||
2259 | size_t size) | ||
2260 | { | ||
2261 | struct device *dev = &pdev->dev; | ||
2262 | DEFINE_DMA_ATTRS(attrs); | ||
2263 | dma_addr_t iova; | ||
2264 | int err = 0; | ||
2265 | |||
2266 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
2267 | |||
2268 | desc->pages = dma_alloc_attrs(&pdev->dev, size, &iova, | ||
2269 | GFP_KERNEL, &attrs); | ||
2270 | if (!desc->pages) | ||
2271 | return -ENOMEM; | ||
2272 | |||
2273 | desc->iova = iova; | ||
2274 | desc->size = size; | ||
2275 | desc->attrs = attrs; | ||
2276 | desc->destroy = gk20a_gr_destroy_ctx_buffer; | ||
2277 | err = gk20a_get_sgtable_from_pages(&pdev->dev, &desc->sgt, desc->pages, | ||
2278 | desc->iova, desc->size); | ||
2279 | if (err) { | ||
2280 | dma_free_attrs(dev, desc->size, desc->pages, | ||
2281 | desc->iova, &desc->attrs); | ||
2282 | memset(desc, 0, sizeof(*desc)); | ||
2283 | } | ||
2284 | |||
2285 | return err; | ||
2286 | } | ||
2287 | |||
2288 | static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g) | ||
2289 | { | ||
2290 | struct gk20a_platform *platform = platform_get_drvdata(g->dev); | ||
2291 | struct gr_gk20a *gr = &g->gr; | ||
2292 | int i, attr_buffer_size, err; | ||
2293 | struct platform_device *pdev = g->dev; | ||
2294 | |||
2295 | u32 cb_buffer_size = gr->bundle_cb_default_size * | ||
2296 | gr_scc_bundle_cb_size_div_256b_byte_granularity_v(); | ||
2297 | |||
2298 | u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() * | ||
2299 | gr_scc_pagepool_total_pages_byte_granularity_v(); | ||
2300 | |||
2301 | gk20a_dbg_fn(""); | ||
2302 | |||
2303 | attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g); | ||
2304 | |||
2305 | gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size); | ||
2306 | |||
2307 | err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[CIRCULAR], | ||
2308 | cb_buffer_size); | ||
2309 | if (err) | ||
2310 | goto clean_up; | ||
2311 | |||
2312 | if (platform->secure_alloc) | ||
2313 | platform->secure_alloc(pdev, | ||
2314 | &gr->global_ctx_buffer[CIRCULAR_VPR], | ||
2315 | cb_buffer_size); | ||
2316 | |||
2317 | gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size); | ||
2318 | |||
2319 | err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[PAGEPOOL], | ||
2320 | pagepool_buffer_size); | ||
2321 | if (err) | ||
2322 | goto clean_up; | ||
2323 | |||
2324 | if (platform->secure_alloc) | ||
2325 | platform->secure_alloc(pdev, | ||
2326 | &gr->global_ctx_buffer[PAGEPOOL_VPR], | ||
2327 | pagepool_buffer_size); | ||
2328 | |||
2329 | gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size); | ||
2330 | |||
2331 | err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[ATTRIBUTE], | ||
2332 | attr_buffer_size); | ||
2333 | if (err) | ||
2334 | goto clean_up; | ||
2335 | |||
2336 | if (platform->secure_alloc) | ||
2337 | platform->secure_alloc(pdev, | ||
2338 | &gr->global_ctx_buffer[ATTRIBUTE_VPR], | ||
2339 | attr_buffer_size); | ||
2340 | |||
2341 | gk20a_dbg_info("golden_image_size : %d", | ||
2342 | gr->ctx_vars.golden_image_size); | ||
2343 | |||
2344 | err = gk20a_gr_alloc_ctx_buffer(pdev, | ||
2345 | &gr->global_ctx_buffer[GOLDEN_CTX], | ||
2346 | gr->ctx_vars.golden_image_size); | ||
2347 | if (err) | ||
2348 | goto clean_up; | ||
2349 | |||
2350 | gk20a_dbg_info("priv_access_map_size : %d", | ||
2351 | gr->ctx_vars.priv_access_map_size); | ||
2352 | |||
2353 | err = gk20a_gr_alloc_ctx_buffer(pdev, | ||
2354 | &gr->global_ctx_buffer[PRIV_ACCESS_MAP], | ||
2355 | gr->ctx_vars.priv_access_map_size); | ||
2356 | |||
2357 | if (err) | ||
2358 | goto clean_up; | ||
2359 | |||
2360 | gk20a_dbg_fn("done"); | ||
2361 | return 0; | ||
2362 | |||
2363 | clean_up: | ||
2364 | gk20a_err(dev_from_gk20a(g), "fail"); | ||
2365 | for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) { | ||
2366 | if (gr->global_ctx_buffer[i].destroy) { | ||
2367 | gr->global_ctx_buffer[i].destroy(pdev, | ||
2368 | &gr->global_ctx_buffer[i]); | ||
2369 | } | ||
2370 | } | ||
2371 | return -ENOMEM; | ||
2372 | } | ||
2373 | |||
2374 | static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g) | ||
2375 | { | ||
2376 | struct platform_device *pdev = g->dev; | ||
2377 | struct gr_gk20a *gr = &g->gr; | ||
2378 | DEFINE_DMA_ATTRS(attrs); | ||
2379 | u32 i; | ||
2380 | |||
2381 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
2382 | |||
2383 | for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) { | ||
2384 | gr->global_ctx_buffer[i].destroy(pdev, | ||
2385 | &gr->global_ctx_buffer[i]); | ||
2386 | } | ||
2387 | |||
2388 | gk20a_dbg_fn("done"); | ||
2389 | } | ||
2390 | |||
2391 | static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | ||
2392 | struct channel_gk20a *c) | ||
2393 | { | ||
2394 | struct vm_gk20a *ch_vm = c->vm; | ||
2395 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | ||
2396 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | ||
2397 | struct gr_gk20a *gr = &g->gr; | ||
2398 | struct sg_table *sgt; | ||
2399 | u64 size; | ||
2400 | u64 gpu_va; | ||
2401 | u32 i; | ||
2402 | gk20a_dbg_fn(""); | ||
2403 | |||
2404 | /* Circular Buffer */ | ||
2405 | if (!c->vpr || (gr->global_ctx_buffer[CIRCULAR_VPR].sgt == NULL)) { | ||
2406 | sgt = gr->global_ctx_buffer[CIRCULAR].sgt; | ||
2407 | size = gr->global_ctx_buffer[CIRCULAR].size; | ||
2408 | } else { | ||
2409 | sgt = gr->global_ctx_buffer[CIRCULAR_VPR].sgt; | ||
2410 | size = gr->global_ctx_buffer[CIRCULAR_VPR].size; | ||
2411 | } | ||
2412 | |||
2413 | gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, | ||
2414 | NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
2415 | gk20a_mem_flag_none); | ||
2416 | if (!gpu_va) | ||
2417 | goto clean_up; | ||
2418 | g_bfr_va[CIRCULAR_VA] = gpu_va; | ||
2419 | g_bfr_size[CIRCULAR_VA] = size; | ||
2420 | |||
2421 | /* Attribute Buffer */ | ||
2422 | if (!c->vpr || (gr->global_ctx_buffer[ATTRIBUTE_VPR].sgt == NULL)) { | ||
2423 | sgt = gr->global_ctx_buffer[ATTRIBUTE].sgt; | ||
2424 | size = gr->global_ctx_buffer[ATTRIBUTE].size; | ||
2425 | } else { | ||
2426 | sgt = gr->global_ctx_buffer[ATTRIBUTE_VPR].sgt; | ||
2427 | size = gr->global_ctx_buffer[ATTRIBUTE_VPR].size; | ||
2428 | } | ||
2429 | |||
2430 | gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, | ||
2431 | NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
2432 | gk20a_mem_flag_none); | ||
2433 | if (!gpu_va) | ||
2434 | goto clean_up; | ||
2435 | g_bfr_va[ATTRIBUTE_VA] = gpu_va; | ||
2436 | g_bfr_size[ATTRIBUTE_VA] = size; | ||
2437 | |||
2438 | /* Page Pool */ | ||
2439 | if (!c->vpr || (gr->global_ctx_buffer[PAGEPOOL_VPR].sgt == NULL)) { | ||
2440 | sgt = gr->global_ctx_buffer[PAGEPOOL].sgt; | ||
2441 | size = gr->global_ctx_buffer[PAGEPOOL].size; | ||
2442 | } else { | ||
2443 | sgt = gr->global_ctx_buffer[PAGEPOOL_VPR].sgt; | ||
2444 | size = gr->global_ctx_buffer[PAGEPOOL_VPR].size; | ||
2445 | } | ||
2446 | |||
2447 | gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, | ||
2448 | NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
2449 | gk20a_mem_flag_none); | ||
2450 | if (!gpu_va) | ||
2451 | goto clean_up; | ||
2452 | g_bfr_va[PAGEPOOL_VA] = gpu_va; | ||
2453 | g_bfr_size[PAGEPOOL_VA] = size; | ||
2454 | |||
2455 | /* Golden Image */ | ||
2456 | sgt = gr->global_ctx_buffer[GOLDEN_CTX].sgt; | ||
2457 | size = gr->global_ctx_buffer[GOLDEN_CTX].size; | ||
2458 | gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, 0, | ||
2459 | gk20a_mem_flag_none); | ||
2460 | if (!gpu_va) | ||
2461 | goto clean_up; | ||
2462 | g_bfr_va[GOLDEN_CTX_VA] = gpu_va; | ||
2463 | g_bfr_size[GOLDEN_CTX_VA] = size; | ||
2464 | |||
2465 | /* Priv register Access Map */ | ||
2466 | sgt = gr->global_ctx_buffer[PRIV_ACCESS_MAP].sgt; | ||
2467 | size = gr->global_ctx_buffer[PRIV_ACCESS_MAP].size; | ||
2468 | gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, 0, | ||
2469 | gk20a_mem_flag_none); | ||
2470 | if (!gpu_va) | ||
2471 | goto clean_up; | ||
2472 | g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; | ||
2473 | g_bfr_size[PRIV_ACCESS_MAP_VA] = size; | ||
2474 | |||
2475 | c->ch_ctx.global_ctx_buffer_mapped = true; | ||
2476 | return 0; | ||
2477 | |||
2478 | clean_up: | ||
2479 | for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { | ||
2480 | if (g_bfr_va[i]) { | ||
2481 | gk20a_gmmu_unmap(ch_vm, g_bfr_va[i], | ||
2482 | gr->global_ctx_buffer[i].size, | ||
2483 | gk20a_mem_flag_none); | ||
2484 | g_bfr_va[i] = 0; | ||
2485 | } | ||
2486 | } | ||
2487 | return -ENOMEM; | ||
2488 | } | ||
2489 | |||
2490 | static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) | ||
2491 | { | ||
2492 | struct vm_gk20a *ch_vm = c->vm; | ||
2493 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | ||
2494 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | ||
2495 | u32 i; | ||
2496 | |||
2497 | gk20a_dbg_fn(""); | ||
2498 | |||
2499 | for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { | ||
2500 | if (g_bfr_va[i]) { | ||
2501 | gk20a_gmmu_unmap(ch_vm, g_bfr_va[i], | ||
2502 | g_bfr_size[i], | ||
2503 | gk20a_mem_flag_none); | ||
2504 | g_bfr_va[i] = 0; | ||
2505 | g_bfr_size[i] = 0; | ||
2506 | } | ||
2507 | } | ||
2508 | c->ch_ctx.global_ctx_buffer_mapped = false; | ||
2509 | } | ||
2510 | |||
2511 | static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, | ||
2512 | struct channel_gk20a *c) | ||
2513 | { | ||
2514 | struct gr_gk20a *gr = &g->gr; | ||
2515 | struct gr_ctx_desc *gr_ctx = &c->ch_ctx.gr_ctx; | ||
2516 | struct vm_gk20a *ch_vm = c->vm; | ||
2517 | struct device *d = dev_from_gk20a(g); | ||
2518 | struct sg_table *sgt; | ||
2519 | DEFINE_DMA_ATTRS(attrs); | ||
2520 | int err = 0; | ||
2521 | dma_addr_t iova; | ||
2522 | |||
2523 | gk20a_dbg_fn(""); | ||
2524 | |||
2525 | if (gr->ctx_vars.buffer_size == 0) | ||
2526 | return 0; | ||
2527 | |||
2528 | /* alloc channel gr ctx buffer */ | ||
2529 | gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; | ||
2530 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; | ||
2531 | |||
2532 | gr_ctx->size = gr->ctx_vars.buffer_total_size; | ||
2533 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
2534 | gr_ctx->pages = dma_alloc_attrs(d, gr_ctx->size, | ||
2535 | &iova, GFP_KERNEL, &attrs); | ||
2536 | if (!gr_ctx->pages) | ||
2537 | return -ENOMEM; | ||
2538 | |||
2539 | gr_ctx->iova = iova; | ||
2540 | err = gk20a_get_sgtable_from_pages(d, &sgt, gr_ctx->pages, | ||
2541 | gr_ctx->iova, gr_ctx->size); | ||
2542 | if (err) | ||
2543 | goto err_free; | ||
2544 | |||
2545 | gr_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, gr_ctx->size, | ||
2546 | NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
2547 | gk20a_mem_flag_none); | ||
2548 | if (!gr_ctx->gpu_va) | ||
2549 | goto err_free_sgt; | ||
2550 | |||
2551 | gk20a_free_sgtable(&sgt); | ||
2552 | |||
2553 | return 0; | ||
2554 | |||
2555 | err_free_sgt: | ||
2556 | gk20a_free_sgtable(&sgt); | ||
2557 | err_free: | ||
2558 | dma_free_attrs(d, gr_ctx->size, | ||
2559 | gr_ctx->pages, gr_ctx->iova, &attrs); | ||
2560 | gr_ctx->pages = NULL; | ||
2561 | gr_ctx->iova = 0; | ||
2562 | |||
2563 | return err; | ||
2564 | } | ||
2565 | |||
2566 | static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c) | ||
2567 | { | ||
2568 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
2569 | struct vm_gk20a *ch_vm = c->vm; | ||
2570 | struct gk20a *g = c->g; | ||
2571 | struct device *d = dev_from_gk20a(g); | ||
2572 | DEFINE_DMA_ATTRS(attrs); | ||
2573 | |||
2574 | gk20a_dbg_fn(""); | ||
2575 | |||
2576 | if (!ch_ctx->gr_ctx.gpu_va) | ||
2577 | return; | ||
2578 | |||
2579 | gk20a_gmmu_unmap(ch_vm, ch_ctx->gr_ctx.gpu_va, | ||
2580 | ch_ctx->gr_ctx.size, gk20a_mem_flag_none); | ||
2581 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
2582 | dma_free_attrs(d, ch_ctx->gr_ctx.size, | ||
2583 | ch_ctx->gr_ctx.pages, ch_ctx->gr_ctx.iova, &attrs); | ||
2584 | ch_ctx->gr_ctx.pages = NULL; | ||
2585 | ch_ctx->gr_ctx.iova = 0; | ||
2586 | } | ||
2587 | |||
2588 | static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, | ||
2589 | struct channel_gk20a *c) | ||
2590 | { | ||
2591 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | ||
2592 | struct device *d = dev_from_gk20a(g); | ||
2593 | struct vm_gk20a *ch_vm = c->vm; | ||
2594 | DEFINE_DMA_ATTRS(attrs); | ||
2595 | struct sg_table *sgt; | ||
2596 | int err = 0; | ||
2597 | dma_addr_t iova; | ||
2598 | |||
2599 | gk20a_dbg_fn(""); | ||
2600 | |||
2601 | patch_ctx->size = 128 * sizeof(u32); | ||
2602 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
2603 | patch_ctx->pages = dma_alloc_attrs(d, patch_ctx->size, | ||
2604 | &iova, GFP_KERNEL, | ||
2605 | &attrs); | ||
2606 | if (!patch_ctx->pages) | ||
2607 | return -ENOMEM; | ||
2608 | |||
2609 | patch_ctx->iova = iova; | ||
2610 | err = gk20a_get_sgtable_from_pages(d, &sgt, patch_ctx->pages, | ||
2611 | patch_ctx->iova, patch_ctx->size); | ||
2612 | if (err) | ||
2613 | goto err_free; | ||
2614 | |||
2615 | patch_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, patch_ctx->size, | ||
2616 | 0, gk20a_mem_flag_none); | ||
2617 | if (!patch_ctx->gpu_va) | ||
2618 | goto err_free_sgtable; | ||
2619 | |||
2620 | gk20a_free_sgtable(&sgt); | ||
2621 | |||
2622 | gk20a_dbg_fn("done"); | ||
2623 | return 0; | ||
2624 | |||
2625 | err_free_sgtable: | ||
2626 | gk20a_free_sgtable(&sgt); | ||
2627 | err_free: | ||
2628 | dma_free_attrs(d, patch_ctx->size, | ||
2629 | patch_ctx->pages, patch_ctx->iova, &attrs); | ||
2630 | patch_ctx->pages = NULL; | ||
2631 | patch_ctx->iova = 0; | ||
2632 | gk20a_err(dev_from_gk20a(g), "fail"); | ||
2633 | return err; | ||
2634 | } | ||
2635 | |||
2636 | static void gr_gk20a_unmap_channel_patch_ctx(struct channel_gk20a *c) | ||
2637 | { | ||
2638 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | ||
2639 | struct vm_gk20a *ch_vm = c->vm; | ||
2640 | |||
2641 | gk20a_dbg_fn(""); | ||
2642 | |||
2643 | if (patch_ctx->gpu_va) | ||
2644 | gk20a_gmmu_unmap(ch_vm, patch_ctx->gpu_va, | ||
2645 | patch_ctx->size, gk20a_mem_flag_none); | ||
2646 | patch_ctx->gpu_va = 0; | ||
2647 | patch_ctx->data_count = 0; | ||
2648 | } | ||
2649 | |||
2650 | static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c) | ||
2651 | { | ||
2652 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | ||
2653 | struct gk20a *g = c->g; | ||
2654 | struct device *d = dev_from_gk20a(g); | ||
2655 | DEFINE_DMA_ATTRS(attrs); | ||
2656 | |||
2657 | gk20a_dbg_fn(""); | ||
2658 | |||
2659 | gr_gk20a_unmap_channel_patch_ctx(c); | ||
2660 | |||
2661 | if (patch_ctx->pages) { | ||
2662 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
2663 | dma_free_attrs(d, patch_ctx->size, | ||
2664 | patch_ctx->pages, patch_ctx->iova, &attrs); | ||
2665 | patch_ctx->pages = NULL; | ||
2666 | patch_ctx->iova = 0; | ||
2667 | } | ||
2668 | } | ||
2669 | |||
2670 | void gk20a_free_channel_ctx(struct channel_gk20a *c) | ||
2671 | { | ||
2672 | gr_gk20a_unmap_global_ctx_buffers(c); | ||
2673 | gr_gk20a_free_channel_patch_ctx(c); | ||
2674 | gr_gk20a_free_channel_gr_ctx(c); | ||
2675 | |||
2676 | /* zcull_ctx, pm_ctx */ | ||
2677 | |||
2678 | memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); | ||
2679 | |||
2680 | c->num_objects = 0; | ||
2681 | c->first_init = false; | ||
2682 | } | ||
2683 | |||
2684 | static bool gr_gk20a_is_valid_class(struct gk20a *g, u32 class_num) | ||
2685 | { | ||
2686 | bool valid = false; | ||
2687 | |||
2688 | switch (class_num) { | ||
2689 | case KEPLER_COMPUTE_A: | ||
2690 | case KEPLER_C: | ||
2691 | case FERMI_TWOD_A: | ||
2692 | case KEPLER_DMA_COPY_A: | ||
2693 | valid = true; | ||
2694 | break; | ||
2695 | |||
2696 | default: | ||
2697 | break; | ||
2698 | } | ||
2699 | |||
2700 | return valid; | ||
2701 | } | ||
2702 | |||
2703 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | ||
2704 | struct nvhost_alloc_obj_ctx_args *args) | ||
2705 | { | ||
2706 | struct gk20a *g = c->g; | ||
2707 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
2708 | int err = 0; | ||
2709 | |||
2710 | gk20a_dbg_fn(""); | ||
2711 | |||
2712 | /* an address space needs to have been bound at this point.*/ | ||
2713 | if (!gk20a_channel_as_bound(c)) { | ||
2714 | gk20a_err(dev_from_gk20a(g), | ||
2715 | "not bound to address space at time" | ||
2716 | " of grctx allocation"); | ||
2717 | return -EINVAL; | ||
2718 | } | ||
2719 | |||
2720 | if (!g->ops.gr.is_valid_class(g, args->class_num)) { | ||
2721 | gk20a_err(dev_from_gk20a(g), | ||
2722 | "invalid obj class 0x%x", args->class_num); | ||
2723 | err = -EINVAL; | ||
2724 | goto out; | ||
2725 | } | ||
2726 | |||
2727 | /* allocate gr ctx buffer */ | ||
2728 | if (ch_ctx->gr_ctx.pages == NULL) { | ||
2729 | err = gr_gk20a_alloc_channel_gr_ctx(g, c); | ||
2730 | if (err) { | ||
2731 | gk20a_err(dev_from_gk20a(g), | ||
2732 | "fail to allocate gr ctx buffer"); | ||
2733 | goto out; | ||
2734 | } | ||
2735 | c->obj_class = args->class_num; | ||
2736 | } else { | ||
2737 | /*TBD: needs to be more subtle about which is being allocated | ||
2738 | * as some are allowed to be allocated along same channel */ | ||
2739 | gk20a_err(dev_from_gk20a(g), | ||
2740 | "too many classes alloc'd on same channel"); | ||
2741 | err = -EINVAL; | ||
2742 | goto out; | ||
2743 | } | ||
2744 | |||
2745 | /* commit gr ctx buffer */ | ||
2746 | err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va); | ||
2747 | if (err) { | ||
2748 | gk20a_err(dev_from_gk20a(g), | ||
2749 | "fail to commit gr ctx buffer"); | ||
2750 | goto out; | ||
2751 | } | ||
2752 | |||
2753 | /* allocate patch buffer */ | ||
2754 | if (ch_ctx->patch_ctx.pages == NULL) { | ||
2755 | err = gr_gk20a_alloc_channel_patch_ctx(g, c); | ||
2756 | if (err) { | ||
2757 | gk20a_err(dev_from_gk20a(g), | ||
2758 | "fail to allocate patch buffer"); | ||
2759 | goto out; | ||
2760 | } | ||
2761 | } | ||
2762 | |||
2763 | /* map global buffer to channel gpu_va and commit */ | ||
2764 | if (!ch_ctx->global_ctx_buffer_mapped) { | ||
2765 | err = gr_gk20a_map_global_ctx_buffers(g, c); | ||
2766 | if (err) { | ||
2767 | gk20a_err(dev_from_gk20a(g), | ||
2768 | "fail to map global ctx buffer"); | ||
2769 | goto out; | ||
2770 | } | ||
2771 | gr_gk20a_elpg_protected_call(g, | ||
2772 | gr_gk20a_commit_global_ctx_buffers(g, c, true)); | ||
2773 | } | ||
2774 | |||
2775 | /* init golden image, ELPG enabled after this is done */ | ||
2776 | err = gr_gk20a_init_golden_ctx_image(g, c); | ||
2777 | if (err) { | ||
2778 | gk20a_err(dev_from_gk20a(g), | ||
2779 | "fail to init golden ctx image"); | ||
2780 | goto out; | ||
2781 | } | ||
2782 | |||
2783 | /* load golden image */ | ||
2784 | if (!c->first_init) { | ||
2785 | err = gr_gk20a_elpg_protected_call(g, | ||
2786 | gr_gk20a_load_golden_ctx_image(g, c)); | ||
2787 | if (err) { | ||
2788 | gk20a_err(dev_from_gk20a(g), | ||
2789 | "fail to load golden ctx image"); | ||
2790 | goto out; | ||
2791 | } | ||
2792 | c->first_init = true; | ||
2793 | } | ||
2794 | gk20a_mm_l2_invalidate(g); | ||
2795 | |||
2796 | c->num_objects++; | ||
2797 | |||
2798 | gk20a_dbg_fn("done"); | ||
2799 | return 0; | ||
2800 | out: | ||
2801 | /* 1. gr_ctx, patch_ctx and global ctx buffer mapping | ||
2802 | can be reused so no need to release them. | ||
2803 | 2. golden image init and load is a one time thing so if | ||
2804 | they pass, no need to undo. */ | ||
2805 | gk20a_err(dev_from_gk20a(g), "fail"); | ||
2806 | return err; | ||
2807 | } | ||
2808 | |||
2809 | int gk20a_free_obj_ctx(struct channel_gk20a *c, | ||
2810 | struct nvhost_free_obj_ctx_args *args) | ||
2811 | { | ||
2812 | unsigned long timeout = gk20a_get_gr_idle_timeout(c->g); | ||
2813 | |||
2814 | gk20a_dbg_fn(""); | ||
2815 | |||
2816 | if (c->num_objects == 0) | ||
2817 | return 0; | ||
2818 | |||
2819 | c->num_objects--; | ||
2820 | |||
2821 | if (c->num_objects == 0) { | ||
2822 | c->first_init = false; | ||
2823 | gk20a_disable_channel(c, | ||
2824 | !c->has_timedout, | ||
2825 | timeout); | ||
2826 | gr_gk20a_unmap_channel_patch_ctx(c); | ||
2827 | } | ||
2828 | |||
2829 | return 0; | ||
2830 | } | ||
2831 | |||
2832 | static void gk20a_remove_gr_support(struct gr_gk20a *gr) | ||
2833 | { | ||
2834 | struct gk20a *g = gr->g; | ||
2835 | struct device *d = dev_from_gk20a(g); | ||
2836 | DEFINE_DMA_ATTRS(attrs); | ||
2837 | |||
2838 | gk20a_dbg_fn(""); | ||
2839 | |||
2840 | gr_gk20a_free_global_ctx_buffers(g); | ||
2841 | |||
2842 | dma_free_coherent(d, gr->mmu_wr_mem.size, | ||
2843 | gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova); | ||
2844 | gr->mmu_wr_mem.cpuva = NULL; | ||
2845 | gr->mmu_wr_mem.iova = 0; | ||
2846 | dma_free_coherent(d, gr->mmu_rd_mem.size, | ||
2847 | gr->mmu_rd_mem.cpuva, gr->mmu_rd_mem.iova); | ||
2848 | gr->mmu_rd_mem.cpuva = NULL; | ||
2849 | gr->mmu_rd_mem.iova = 0; | ||
2850 | |||
2851 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
2852 | dma_free_attrs(d, gr->compbit_store.size, gr->compbit_store.pages, | ||
2853 | gr->compbit_store.base_iova, &attrs); | ||
2854 | |||
2855 | memset(&gr->mmu_wr_mem, 0, sizeof(struct mmu_desc)); | ||
2856 | memset(&gr->mmu_rd_mem, 0, sizeof(struct mmu_desc)); | ||
2857 | memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc)); | ||
2858 | |||
2859 | kfree(gr->gpc_tpc_count); | ||
2860 | kfree(gr->gpc_zcb_count); | ||
2861 | kfree(gr->gpc_ppc_count); | ||
2862 | kfree(gr->pes_tpc_count[0]); | ||
2863 | kfree(gr->pes_tpc_count[1]); | ||
2864 | kfree(gr->pes_tpc_mask[0]); | ||
2865 | kfree(gr->pes_tpc_mask[1]); | ||
2866 | kfree(gr->gpc_skip_mask); | ||
2867 | kfree(gr->map_tiles); | ||
2868 | gr->gpc_tpc_count = NULL; | ||
2869 | gr->gpc_zcb_count = NULL; | ||
2870 | gr->gpc_ppc_count = NULL; | ||
2871 | gr->pes_tpc_count[0] = NULL; | ||
2872 | gr->pes_tpc_count[1] = NULL; | ||
2873 | gr->pes_tpc_mask[0] = NULL; | ||
2874 | gr->pes_tpc_mask[1] = NULL; | ||
2875 | gr->gpc_skip_mask = NULL; | ||
2876 | gr->map_tiles = NULL; | ||
2877 | |||
2878 | kfree(gr->ctx_vars.ucode.fecs.inst.l); | ||
2879 | kfree(gr->ctx_vars.ucode.fecs.data.l); | ||
2880 | kfree(gr->ctx_vars.ucode.gpccs.inst.l); | ||
2881 | kfree(gr->ctx_vars.ucode.gpccs.data.l); | ||
2882 | kfree(gr->ctx_vars.sw_bundle_init.l); | ||
2883 | kfree(gr->ctx_vars.sw_method_init.l); | ||
2884 | kfree(gr->ctx_vars.sw_ctx_load.l); | ||
2885 | kfree(gr->ctx_vars.sw_non_ctx_load.l); | ||
2886 | kfree(gr->ctx_vars.ctxsw_regs.sys.l); | ||
2887 | kfree(gr->ctx_vars.ctxsw_regs.gpc.l); | ||
2888 | kfree(gr->ctx_vars.ctxsw_regs.tpc.l); | ||
2889 | kfree(gr->ctx_vars.ctxsw_regs.zcull_gpc.l); | ||
2890 | kfree(gr->ctx_vars.ctxsw_regs.ppc.l); | ||
2891 | kfree(gr->ctx_vars.ctxsw_regs.pm_sys.l); | ||
2892 | kfree(gr->ctx_vars.ctxsw_regs.pm_gpc.l); | ||
2893 | kfree(gr->ctx_vars.ctxsw_regs.pm_tpc.l); | ||
2894 | |||
2895 | kfree(gr->ctx_vars.local_golden_image); | ||
2896 | gr->ctx_vars.local_golden_image = NULL; | ||
2897 | |||
2898 | gk20a_allocator_destroy(&gr->comp_tags); | ||
2899 | } | ||
2900 | |||
2901 | static void gr_gk20a_bundle_cb_defaults(struct gk20a *g) | ||
2902 | { | ||
2903 | struct gr_gk20a *gr = &g->gr; | ||
2904 | |||
2905 | gr->bundle_cb_default_size = | ||
2906 | gr_scc_bundle_cb_size_div_256b__prod_v(); | ||
2907 | gr->min_gpm_fifo_depth = | ||
2908 | gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); | ||
2909 | gr->bundle_cb_token_limit = | ||
2910 | gr_pd_ab_dist_cfg2_token_limit_init_v(); | ||
2911 | } | ||
2912 | |||
2913 | static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | ||
2914 | { | ||
2915 | u32 gpc_index, pes_index; | ||
2916 | u32 pes_tpc_mask; | ||
2917 | u32 pes_tpc_count; | ||
2918 | u32 pes_heavy_index; | ||
2919 | u32 gpc_new_skip_mask; | ||
2920 | u32 tmp; | ||
2921 | |||
2922 | tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r()); | ||
2923 | gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp); | ||
2924 | |||
2925 | tmp = gk20a_readl(g, top_num_gpcs_r()); | ||
2926 | gr->max_gpc_count = top_num_gpcs_value_v(tmp); | ||
2927 | |||
2928 | tmp = gk20a_readl(g, top_num_fbps_r()); | ||
2929 | gr->max_fbps_count = top_num_fbps_value_v(tmp); | ||
2930 | |||
2931 | tmp = gk20a_readl(g, top_tpc_per_gpc_r()); | ||
2932 | gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp); | ||
2933 | |||
2934 | gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count; | ||
2935 | |||
2936 | tmp = gk20a_readl(g, top_num_fbps_r()); | ||
2937 | gr->sys_count = top_num_fbps_value_v(tmp); | ||
2938 | |||
2939 | tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r()); | ||
2940 | gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp); | ||
2941 | |||
2942 | gr->pe_count_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); | ||
2943 | gr->max_zcull_per_gpc_count = proj_scal_litter_num_zcull_banks_v(); | ||
2944 | |||
2945 | if (!gr->gpc_count) { | ||
2946 | gk20a_err(dev_from_gk20a(g), "gpc_count==0!"); | ||
2947 | goto clean_up; | ||
2948 | } | ||
2949 | |||
2950 | gr->gpc_tpc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); | ||
2951 | gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); | ||
2952 | gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); | ||
2953 | gr->pes_tpc_count[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); | ||
2954 | gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); | ||
2955 | gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); | ||
2956 | gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); | ||
2957 | gr->gpc_skip_mask = | ||
2958 | kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32), | ||
2959 | GFP_KERNEL); | ||
2960 | |||
2961 | if (!gr->gpc_tpc_count || !gr->gpc_zcb_count || !gr->gpc_ppc_count || | ||
2962 | !gr->pes_tpc_count[0] || !gr->pes_tpc_count[1] || | ||
2963 | !gr->pes_tpc_mask[0] || !gr->pes_tpc_mask[1] || !gr->gpc_skip_mask) | ||
2964 | goto clean_up; | ||
2965 | |||
2966 | gr->ppc_count = 0; | ||
2967 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
2968 | tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r()); | ||
2969 | |||
2970 | gr->gpc_tpc_count[gpc_index] = | ||
2971 | gr_gpc0_fs_gpc_num_available_tpcs_v(tmp); | ||
2972 | gr->tpc_count += gr->gpc_tpc_count[gpc_index]; | ||
2973 | |||
2974 | gr->gpc_zcb_count[gpc_index] = | ||
2975 | gr_gpc0_fs_gpc_num_available_zculls_v(tmp); | ||
2976 | gr->zcb_count += gr->gpc_zcb_count[gpc_index]; | ||
2977 | |||
2978 | gr->gpc_ppc_count[gpc_index] = gr->pe_count_per_gpc; | ||
2979 | gr->ppc_count += gr->gpc_ppc_count[gpc_index]; | ||
2980 | for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) { | ||
2981 | |||
2982 | tmp = gk20a_readl(g, | ||
2983 | gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) + | ||
2984 | gpc_index * proj_gpc_stride_v()); | ||
2985 | |||
2986 | pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp); | ||
2987 | pes_tpc_count = count_bits(pes_tpc_mask); | ||
2988 | |||
2989 | gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count; | ||
2990 | gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask; | ||
2991 | } | ||
2992 | |||
2993 | gpc_new_skip_mask = 0; | ||
2994 | if (gr->pes_tpc_count[0][gpc_index] + | ||
2995 | gr->pes_tpc_count[1][gpc_index] == 5) { | ||
2996 | pes_heavy_index = | ||
2997 | gr->pes_tpc_count[0][gpc_index] > | ||
2998 | gr->pes_tpc_count[1][gpc_index] ? 0 : 1; | ||
2999 | |||
3000 | gpc_new_skip_mask = | ||
3001 | gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^ | ||
3002 | (gr->pes_tpc_mask[pes_heavy_index][gpc_index] & | ||
3003 | (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1)); | ||
3004 | |||
3005 | } else if ((gr->pes_tpc_count[0][gpc_index] + | ||
3006 | gr->pes_tpc_count[1][gpc_index] == 4) && | ||
3007 | (gr->pes_tpc_count[0][gpc_index] != | ||
3008 | gr->pes_tpc_count[1][gpc_index])) { | ||
3009 | pes_heavy_index = | ||
3010 | gr->pes_tpc_count[0][gpc_index] > | ||
3011 | gr->pes_tpc_count[1][gpc_index] ? 0 : 1; | ||
3012 | |||
3013 | gpc_new_skip_mask = | ||
3014 | gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^ | ||
3015 | (gr->pes_tpc_mask[pes_heavy_index][gpc_index] & | ||
3016 | (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1)); | ||
3017 | } | ||
3018 | gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask; | ||
3019 | } | ||
3020 | |||
3021 | gk20a_dbg_info("fbps: %d", gr->num_fbps); | ||
3022 | gk20a_dbg_info("max_gpc_count: %d", gr->max_gpc_count); | ||
3023 | gk20a_dbg_info("max_fbps_count: %d", gr->max_fbps_count); | ||
3024 | gk20a_dbg_info("max_tpc_per_gpc_count: %d", gr->max_tpc_per_gpc_count); | ||
3025 | gk20a_dbg_info("max_zcull_per_gpc_count: %d", gr->max_zcull_per_gpc_count); | ||
3026 | gk20a_dbg_info("max_tpc_count: %d", gr->max_tpc_count); | ||
3027 | gk20a_dbg_info("sys_count: %d", gr->sys_count); | ||
3028 | gk20a_dbg_info("gpc_count: %d", gr->gpc_count); | ||
3029 | gk20a_dbg_info("pe_count_per_gpc: %d", gr->pe_count_per_gpc); | ||
3030 | gk20a_dbg_info("tpc_count: %d", gr->tpc_count); | ||
3031 | gk20a_dbg_info("ppc_count: %d", gr->ppc_count); | ||
3032 | |||
3033 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) | ||
3034 | gk20a_dbg_info("gpc_tpc_count[%d] : %d", | ||
3035 | gpc_index, gr->gpc_tpc_count[gpc_index]); | ||
3036 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) | ||
3037 | gk20a_dbg_info("gpc_zcb_count[%d] : %d", | ||
3038 | gpc_index, gr->gpc_zcb_count[gpc_index]); | ||
3039 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) | ||
3040 | gk20a_dbg_info("gpc_ppc_count[%d] : %d", | ||
3041 | gpc_index, gr->gpc_ppc_count[gpc_index]); | ||
3042 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) | ||
3043 | gk20a_dbg_info("gpc_skip_mask[%d] : %d", | ||
3044 | gpc_index, gr->gpc_skip_mask[gpc_index]); | ||
3045 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) | ||
3046 | for (pes_index = 0; | ||
3047 | pes_index < gr->pe_count_per_gpc; | ||
3048 | pes_index++) | ||
3049 | gk20a_dbg_info("pes_tpc_count[%d][%d] : %d", | ||
3050 | pes_index, gpc_index, | ||
3051 | gr->pes_tpc_count[pes_index][gpc_index]); | ||
3052 | |||
3053 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) | ||
3054 | for (pes_index = 0; | ||
3055 | pes_index < gr->pe_count_per_gpc; | ||
3056 | pes_index++) | ||
3057 | gk20a_dbg_info("pes_tpc_mask[%d][%d] : %d", | ||
3058 | pes_index, gpc_index, | ||
3059 | gr->pes_tpc_mask[pes_index][gpc_index]); | ||
3060 | |||
3061 | g->ops.gr.bundle_cb_defaults(g); | ||
3062 | g->ops.gr.cb_size_default(g); | ||
3063 | g->ops.gr.calc_global_ctx_buffer_size(g); | ||
3064 | gr->timeslice_mode = gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v(); | ||
3065 | |||
3066 | gk20a_dbg_info("bundle_cb_default_size: %d", | ||
3067 | gr->bundle_cb_default_size); | ||
3068 | gk20a_dbg_info("min_gpm_fifo_depth: %d", gr->min_gpm_fifo_depth); | ||
3069 | gk20a_dbg_info("bundle_cb_token_limit: %d", gr->bundle_cb_token_limit); | ||
3070 | gk20a_dbg_info("attrib_cb_default_size: %d", | ||
3071 | gr->attrib_cb_default_size); | ||
3072 | gk20a_dbg_info("attrib_cb_size: %d", gr->attrib_cb_size); | ||
3073 | gk20a_dbg_info("alpha_cb_default_size: %d", gr->alpha_cb_default_size); | ||
3074 | gk20a_dbg_info("alpha_cb_size: %d", gr->alpha_cb_size); | ||
3075 | gk20a_dbg_info("timeslice_mode: %d", gr->timeslice_mode); | ||
3076 | |||
3077 | return 0; | ||
3078 | |||
3079 | clean_up: | ||
3080 | return -ENOMEM; | ||
3081 | } | ||
3082 | |||
3083 | static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr) | ||
3084 | { | ||
3085 | struct device *d = dev_from_gk20a(g); | ||
3086 | dma_addr_t iova; | ||
3087 | |||
3088 | gr->mmu_wr_mem_size = gr->mmu_rd_mem_size = 0x1000; | ||
3089 | |||
3090 | gr->mmu_wr_mem.size = gr->mmu_wr_mem_size; | ||
3091 | gr->mmu_wr_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_wr_mem_size, | ||
3092 | &iova, GFP_KERNEL); | ||
3093 | if (!gr->mmu_wr_mem.cpuva) | ||
3094 | goto err; | ||
3095 | |||
3096 | gr->mmu_wr_mem.iova = iova; | ||
3097 | |||
3098 | gr->mmu_rd_mem.size = gr->mmu_rd_mem_size; | ||
3099 | gr->mmu_rd_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_rd_mem_size, | ||
3100 | &iova, GFP_KERNEL); | ||
3101 | if (!gr->mmu_rd_mem.cpuva) | ||
3102 | goto err_free_wr_mem; | ||
3103 | |||
3104 | gr->mmu_rd_mem.iova = iova; | ||
3105 | return 0; | ||
3106 | |||
3107 | err_free_wr_mem: | ||
3108 | dma_free_coherent(d, gr->mmu_wr_mem.size, | ||
3109 | gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova); | ||
3110 | gr->mmu_wr_mem.cpuva = NULL; | ||
3111 | gr->mmu_wr_mem.iova = 0; | ||
3112 | err: | ||
3113 | return -ENOMEM; | ||
3114 | } | ||
3115 | |||
3116 | static u32 prime_set[18] = { | ||
3117 | 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 }; | ||
3118 | |||
3119 | static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr) | ||
3120 | { | ||
3121 | s32 comm_denom; | ||
3122 | s32 mul_factor; | ||
3123 | s32 *init_frac = NULL; | ||
3124 | s32 *init_err = NULL; | ||
3125 | s32 *run_err = NULL; | ||
3126 | s32 *sorted_num_tpcs = NULL; | ||
3127 | s32 *sorted_to_unsorted_gpc_map = NULL; | ||
3128 | u32 gpc_index; | ||
3129 | u32 gpc_mark = 0; | ||
3130 | u32 num_tpc; | ||
3131 | u32 max_tpc_count = 0; | ||
3132 | u32 swap; | ||
3133 | u32 tile_count; | ||
3134 | u32 index; | ||
3135 | bool delete_map = false; | ||
3136 | bool gpc_sorted; | ||
3137 | int ret = 0; | ||
3138 | |||
3139 | init_frac = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); | ||
3140 | init_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); | ||
3141 | run_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); | ||
3142 | sorted_num_tpcs = | ||
3143 | kzalloc(proj_scal_max_gpcs_v() * | ||
3144 | proj_scal_max_tpc_per_gpc_v() * sizeof(s32), | ||
3145 | GFP_KERNEL); | ||
3146 | sorted_to_unsorted_gpc_map = | ||
3147 | kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); | ||
3148 | |||
3149 | if (!(init_frac && init_err && run_err && sorted_num_tpcs && | ||
3150 | sorted_to_unsorted_gpc_map)) { | ||
3151 | ret = -ENOMEM; | ||
3152 | goto clean_up; | ||
3153 | } | ||
3154 | |||
3155 | gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET; | ||
3156 | |||
3157 | if (gr->tpc_count == 3) | ||
3158 | gr->map_row_offset = 2; | ||
3159 | else if (gr->tpc_count < 3) | ||
3160 | gr->map_row_offset = 1; | ||
3161 | else { | ||
3162 | gr->map_row_offset = 3; | ||
3163 | |||
3164 | for (index = 1; index < 18; index++) { | ||
3165 | u32 prime = prime_set[index]; | ||
3166 | if ((gr->tpc_count % prime) != 0) { | ||
3167 | gr->map_row_offset = prime; | ||
3168 | break; | ||
3169 | } | ||
3170 | } | ||
3171 | } | ||
3172 | |||
3173 | switch (gr->tpc_count) { | ||
3174 | case 15: | ||
3175 | gr->map_row_offset = 6; | ||
3176 | break; | ||
3177 | case 14: | ||
3178 | gr->map_row_offset = 5; | ||
3179 | break; | ||
3180 | case 13: | ||
3181 | gr->map_row_offset = 2; | ||
3182 | break; | ||
3183 | case 11: | ||
3184 | gr->map_row_offset = 7; | ||
3185 | break; | ||
3186 | case 10: | ||
3187 | gr->map_row_offset = 6; | ||
3188 | break; | ||
3189 | case 7: | ||
3190 | case 5: | ||
3191 | gr->map_row_offset = 1; | ||
3192 | break; | ||
3193 | default: | ||
3194 | break; | ||
3195 | } | ||
3196 | |||
3197 | if (gr->map_tiles) { | ||
3198 | if (gr->map_tile_count != gr->tpc_count) | ||
3199 | delete_map = true; | ||
3200 | |||
3201 | for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) { | ||
3202 | if ((u32)gr->map_tiles[tile_count] >= gr->tpc_count) | ||
3203 | delete_map = true; | ||
3204 | } | ||
3205 | |||
3206 | if (delete_map) { | ||
3207 | kfree(gr->map_tiles); | ||
3208 | gr->map_tiles = NULL; | ||
3209 | gr->map_tile_count = 0; | ||
3210 | } | ||
3211 | } | ||
3212 | |||
3213 | if (gr->map_tiles == NULL) { | ||
3214 | gr->map_tile_count = proj_scal_max_gpcs_v(); | ||
3215 | |||
3216 | gr->map_tiles = kzalloc(proj_scal_max_gpcs_v() * sizeof(u8), GFP_KERNEL); | ||
3217 | if (gr->map_tiles == NULL) { | ||
3218 | ret = -ENOMEM; | ||
3219 | goto clean_up; | ||
3220 | } | ||
3221 | |||
3222 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
3223 | sorted_num_tpcs[gpc_index] = gr->gpc_tpc_count[gpc_index]; | ||
3224 | sorted_to_unsorted_gpc_map[gpc_index] = gpc_index; | ||
3225 | } | ||
3226 | |||
3227 | gpc_sorted = false; | ||
3228 | while (!gpc_sorted) { | ||
3229 | gpc_sorted = true; | ||
3230 | for (gpc_index = 0; gpc_index < gr->gpc_count - 1; gpc_index++) { | ||
3231 | if (sorted_num_tpcs[gpc_index + 1] > sorted_num_tpcs[gpc_index]) { | ||
3232 | gpc_sorted = false; | ||
3233 | swap = sorted_num_tpcs[gpc_index]; | ||
3234 | sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1]; | ||
3235 | sorted_num_tpcs[gpc_index + 1] = swap; | ||
3236 | swap = sorted_to_unsorted_gpc_map[gpc_index]; | ||
3237 | sorted_to_unsorted_gpc_map[gpc_index] = | ||
3238 | sorted_to_unsorted_gpc_map[gpc_index + 1]; | ||
3239 | sorted_to_unsorted_gpc_map[gpc_index + 1] = swap; | ||
3240 | } | ||
3241 | } | ||
3242 | } | ||
3243 | |||
3244 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) | ||
3245 | if (gr->gpc_tpc_count[gpc_index] > max_tpc_count) | ||
3246 | max_tpc_count = gr->gpc_tpc_count[gpc_index]; | ||
3247 | |||
3248 | mul_factor = gr->gpc_count * max_tpc_count; | ||
3249 | if (mul_factor & 0x1) | ||
3250 | mul_factor = 2; | ||
3251 | else | ||
3252 | mul_factor = 1; | ||
3253 | |||
3254 | comm_denom = gr->gpc_count * max_tpc_count * mul_factor; | ||
3255 | |||
3256 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
3257 | num_tpc = sorted_num_tpcs[gpc_index]; | ||
3258 | |||
3259 | init_frac[gpc_index] = num_tpc * gr->gpc_count * mul_factor; | ||
3260 | |||
3261 | if (num_tpc != 0) | ||
3262 | init_err[gpc_index] = gpc_index * max_tpc_count * mul_factor - comm_denom/2; | ||
3263 | else | ||
3264 | init_err[gpc_index] = 0; | ||
3265 | |||
3266 | run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index]; | ||
3267 | } | ||
3268 | |||
3269 | while (gpc_mark < gr->tpc_count) { | ||
3270 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
3271 | if ((run_err[gpc_index] * 2) >= comm_denom) { | ||
3272 | gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index]; | ||
3273 | run_err[gpc_index] += init_frac[gpc_index] - comm_denom; | ||
3274 | } else | ||
3275 | run_err[gpc_index] += init_frac[gpc_index]; | ||
3276 | } | ||
3277 | } | ||
3278 | } | ||
3279 | |||
3280 | clean_up: | ||
3281 | kfree(init_frac); | ||
3282 | kfree(init_err); | ||
3283 | kfree(run_err); | ||
3284 | kfree(sorted_num_tpcs); | ||
3285 | kfree(sorted_to_unsorted_gpc_map); | ||
3286 | |||
3287 | if (ret) | ||
3288 | gk20a_err(dev_from_gk20a(g), "fail"); | ||
3289 | else | ||
3290 | gk20a_dbg_fn("done"); | ||
3291 | |||
3292 | return ret; | ||
3293 | } | ||
3294 | |||
3295 | static int gr_gk20a_init_zcull(struct gk20a *g, struct gr_gk20a *gr) | ||
3296 | { | ||
3297 | struct gr_zcull_gk20a *zcull = &gr->zcull; | ||
3298 | |||
3299 | zcull->aliquot_width = gr->tpc_count * 16; | ||
3300 | zcull->aliquot_height = 16; | ||
3301 | |||
3302 | zcull->width_align_pixels = gr->tpc_count * 16; | ||
3303 | zcull->height_align_pixels = 32; | ||
3304 | |||
3305 | zcull->aliquot_size = | ||
3306 | zcull->aliquot_width * zcull->aliquot_height; | ||
3307 | |||
3308 | /* assume no floor sweeping since we only have 1 tpc in 1 gpc */ | ||
3309 | zcull->pixel_squares_by_aliquots = | ||
3310 | gr->zcb_count * 16 * 16 * gr->tpc_count / | ||
3311 | (gr->gpc_count * gr->gpc_tpc_count[0]); | ||
3312 | |||
3313 | zcull->total_aliquots = | ||
3314 | gr_gpc0_zcull_total_ram_size_num_aliquots_f( | ||
3315 | gk20a_readl(g, gr_gpc0_zcull_total_ram_size_r())); | ||
3316 | |||
3317 | return 0; | ||
3318 | } | ||
3319 | |||
3320 | u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr) | ||
3321 | { | ||
3322 | /* assuming gr has already been initialized */ | ||
3323 | return gr->ctx_vars.zcull_ctxsw_image_size; | ||
3324 | } | ||
3325 | |||
3326 | int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, | ||
3327 | struct channel_gk20a *c, u64 zcull_va, u32 mode) | ||
3328 | { | ||
3329 | struct zcull_ctx_desc *zcull_ctx = &c->ch_ctx.zcull_ctx; | ||
3330 | |||
3331 | zcull_ctx->ctx_sw_mode = mode; | ||
3332 | zcull_ctx->gpu_va = zcull_va; | ||
3333 | |||
3334 | /* TBD: don't disable channel in sw method processing */ | ||
3335 | return gr_gk20a_ctx_zcull_setup(g, c, true); | ||
3336 | } | ||
3337 | |||
3338 | int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, | ||
3339 | struct gr_zcull_info *zcull_params) | ||
3340 | { | ||
3341 | struct gr_zcull_gk20a *zcull = &gr->zcull; | ||
3342 | |||
3343 | zcull_params->width_align_pixels = zcull->width_align_pixels; | ||
3344 | zcull_params->height_align_pixels = zcull->height_align_pixels; | ||
3345 | zcull_params->pixel_squares_by_aliquots = | ||
3346 | zcull->pixel_squares_by_aliquots; | ||
3347 | zcull_params->aliquot_total = zcull->total_aliquots; | ||
3348 | |||
3349 | zcull_params->region_byte_multiplier = | ||
3350 | gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v(); | ||
3351 | zcull_params->region_header_size = | ||
3352 | proj_scal_litter_num_gpcs_v() * | ||
3353 | gr_zcull_save_restore_header_bytes_per_gpc_v(); | ||
3354 | |||
3355 | zcull_params->subregion_header_size = | ||
3356 | proj_scal_litter_num_gpcs_v() * | ||
3357 | gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(); | ||
3358 | |||
3359 | zcull_params->subregion_width_align_pixels = | ||
3360 | gr->tpc_count * gr_gpc0_zcull_zcsize_width_subregion__multiple_v(); | ||
3361 | zcull_params->subregion_height_align_pixels = | ||
3362 | gr_gpc0_zcull_zcsize_height_subregion__multiple_v(); | ||
3363 | zcull_params->subregion_count = gr_zcull_subregion_qty_v(); | ||
3364 | |||
3365 | return 0; | ||
3366 | } | ||
3367 | |||
3368 | static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, | ||
3369 | struct zbc_entry *color_val, u32 index) | ||
3370 | { | ||
3371 | struct fifo_gk20a *f = &g->fifo; | ||
3372 | struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A; | ||
3373 | u32 i; | ||
3374 | unsigned long end_jiffies = jiffies + | ||
3375 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
3376 | u32 ret; | ||
3377 | |||
3378 | ret = gk20a_fifo_disable_engine_activity(g, gr_info, true); | ||
3379 | if (ret) { | ||
3380 | gk20a_err(dev_from_gk20a(g), | ||
3381 | "failed to disable gr engine activity\n"); | ||
3382 | return ret; | ||
3383 | } | ||
3384 | |||
3385 | ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); | ||
3386 | if (ret) { | ||
3387 | gk20a_err(dev_from_gk20a(g), | ||
3388 | "failed to idle graphics\n"); | ||
3389 | goto clean_up; | ||
3390 | } | ||
3391 | |||
3392 | /* update l2 table */ | ||
3393 | g->ops.ltc.set_zbc_color_entry(g, color_val, index); | ||
3394 | |||
3395 | /* update ds table */ | ||
3396 | gk20a_writel(g, gr_ds_zbc_color_r_r(), | ||
3397 | gr_ds_zbc_color_r_val_f(color_val->color_ds[0])); | ||
3398 | gk20a_writel(g, gr_ds_zbc_color_g_r(), | ||
3399 | gr_ds_zbc_color_g_val_f(color_val->color_ds[1])); | ||
3400 | gk20a_writel(g, gr_ds_zbc_color_b_r(), | ||
3401 | gr_ds_zbc_color_b_val_f(color_val->color_ds[2])); | ||
3402 | gk20a_writel(g, gr_ds_zbc_color_a_r(), | ||
3403 | gr_ds_zbc_color_a_val_f(color_val->color_ds[3])); | ||
3404 | |||
3405 | gk20a_writel(g, gr_ds_zbc_color_fmt_r(), | ||
3406 | gr_ds_zbc_color_fmt_val_f(color_val->format)); | ||
3407 | |||
3408 | gk20a_writel(g, gr_ds_zbc_tbl_index_r(), | ||
3409 | gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE)); | ||
3410 | |||
3411 | /* trigger the write */ | ||
3412 | gk20a_writel(g, gr_ds_zbc_tbl_ld_r(), | ||
3413 | gr_ds_zbc_tbl_ld_select_c_f() | | ||
3414 | gr_ds_zbc_tbl_ld_action_write_f() | | ||
3415 | gr_ds_zbc_tbl_ld_trigger_active_f()); | ||
3416 | |||
3417 | /* update local copy */ | ||
3418 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | ||
3419 | gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i]; | ||
3420 | gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i]; | ||
3421 | } | ||
3422 | gr->zbc_col_tbl[index].format = color_val->format; | ||
3423 | gr->zbc_col_tbl[index].ref_cnt++; | ||
3424 | |||
3425 | clean_up: | ||
3426 | ret = gk20a_fifo_enable_engine_activity(g, gr_info); | ||
3427 | if (ret) { | ||
3428 | gk20a_err(dev_from_gk20a(g), | ||
3429 | "failed to enable gr engine activity\n"); | ||
3430 | } | ||
3431 | |||
3432 | return ret; | ||
3433 | } | ||
3434 | |||
3435 | static int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, | ||
3436 | struct zbc_entry *depth_val, u32 index) | ||
3437 | { | ||
3438 | struct fifo_gk20a *f = &g->fifo; | ||
3439 | struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A; | ||
3440 | unsigned long end_jiffies = jiffies + | ||
3441 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
3442 | u32 ret; | ||
3443 | |||
3444 | ret = gk20a_fifo_disable_engine_activity(g, gr_info, true); | ||
3445 | if (ret) { | ||
3446 | gk20a_err(dev_from_gk20a(g), | ||
3447 | "failed to disable gr engine activity\n"); | ||
3448 | return ret; | ||
3449 | } | ||
3450 | |||
3451 | ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); | ||
3452 | if (ret) { | ||
3453 | gk20a_err(dev_from_gk20a(g), | ||
3454 | "failed to idle graphics\n"); | ||
3455 | goto clean_up; | ||
3456 | } | ||
3457 | |||
3458 | /* update l2 table */ | ||
3459 | g->ops.ltc.set_zbc_depth_entry(g, depth_val, index); | ||
3460 | |||
3461 | /* update ds table */ | ||
3462 | gk20a_writel(g, gr_ds_zbc_z_r(), | ||
3463 | gr_ds_zbc_z_val_f(depth_val->depth)); | ||
3464 | |||
3465 | gk20a_writel(g, gr_ds_zbc_z_fmt_r(), | ||
3466 | gr_ds_zbc_z_fmt_val_f(depth_val->format)); | ||
3467 | |||
3468 | gk20a_writel(g, gr_ds_zbc_tbl_index_r(), | ||
3469 | gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE)); | ||
3470 | |||
3471 | /* trigger the write */ | ||
3472 | gk20a_writel(g, gr_ds_zbc_tbl_ld_r(), | ||
3473 | gr_ds_zbc_tbl_ld_select_z_f() | | ||
3474 | gr_ds_zbc_tbl_ld_action_write_f() | | ||
3475 | gr_ds_zbc_tbl_ld_trigger_active_f()); | ||
3476 | |||
3477 | /* update local copy */ | ||
3478 | gr->zbc_dep_tbl[index].depth = depth_val->depth; | ||
3479 | gr->zbc_dep_tbl[index].format = depth_val->format; | ||
3480 | gr->zbc_dep_tbl[index].ref_cnt++; | ||
3481 | |||
3482 | clean_up: | ||
3483 | ret = gk20a_fifo_enable_engine_activity(g, gr_info); | ||
3484 | if (ret) { | ||
3485 | gk20a_err(dev_from_gk20a(g), | ||
3486 | "failed to enable gr engine activity\n"); | ||
3487 | } | ||
3488 | |||
3489 | return ret; | ||
3490 | } | ||
3491 | |||
3492 | int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr, | ||
3493 | struct zbc_entry *zbc_val) | ||
3494 | { | ||
3495 | struct zbc_color_table *c_tbl; | ||
3496 | struct zbc_depth_table *d_tbl; | ||
3497 | u32 i, ret = -ENOMEM; | ||
3498 | bool added = false; | ||
3499 | u32 entries; | ||
3500 | |||
3501 | /* no endian swap ? */ | ||
3502 | |||
3503 | switch (zbc_val->type) { | ||
3504 | case GK20A_ZBC_TYPE_COLOR: | ||
3505 | /* search existing tables */ | ||
3506 | for (i = 0; i < gr->max_used_color_index; i++) { | ||
3507 | |||
3508 | c_tbl = &gr->zbc_col_tbl[i]; | ||
3509 | |||
3510 | if (c_tbl->ref_cnt && c_tbl->format == zbc_val->format && | ||
3511 | memcmp(c_tbl->color_ds, zbc_val->color_ds, | ||
3512 | sizeof(zbc_val->color_ds)) == 0) { | ||
3513 | |||
3514 | if (memcmp(c_tbl->color_l2, zbc_val->color_l2, | ||
3515 | sizeof(zbc_val->color_l2))) { | ||
3516 | gk20a_err(dev_from_gk20a(g), | ||
3517 | "zbc l2 and ds color don't match with existing entries"); | ||
3518 | return -EINVAL; | ||
3519 | } | ||
3520 | added = true; | ||
3521 | c_tbl->ref_cnt++; | ||
3522 | ret = 0; | ||
3523 | break; | ||
3524 | } | ||
3525 | } | ||
3526 | /* add new table */ | ||
3527 | if (!added && | ||
3528 | gr->max_used_color_index < GK20A_ZBC_TABLE_SIZE) { | ||
3529 | |||
3530 | c_tbl = | ||
3531 | &gr->zbc_col_tbl[gr->max_used_color_index]; | ||
3532 | WARN_ON(c_tbl->ref_cnt != 0); | ||
3533 | |||
3534 | ret = gr_gk20a_add_zbc_color(g, gr, | ||
3535 | zbc_val, gr->max_used_color_index); | ||
3536 | |||
3537 | if (!ret) | ||
3538 | gr->max_used_color_index++; | ||
3539 | } | ||
3540 | break; | ||
3541 | case GK20A_ZBC_TYPE_DEPTH: | ||
3542 | /* search existing tables */ | ||
3543 | for (i = 0; i < gr->max_used_depth_index; i++) { | ||
3544 | |||
3545 | d_tbl = &gr->zbc_dep_tbl[i]; | ||
3546 | |||
3547 | if (d_tbl->ref_cnt && | ||
3548 | d_tbl->depth == zbc_val->depth && | ||
3549 | d_tbl->format == zbc_val->format) { | ||
3550 | added = true; | ||
3551 | d_tbl->ref_cnt++; | ||
3552 | ret = 0; | ||
3553 | break; | ||
3554 | } | ||
3555 | } | ||
3556 | /* add new table */ | ||
3557 | if (!added && | ||
3558 | gr->max_used_depth_index < GK20A_ZBC_TABLE_SIZE) { | ||
3559 | |||
3560 | d_tbl = | ||
3561 | &gr->zbc_dep_tbl[gr->max_used_depth_index]; | ||
3562 | WARN_ON(d_tbl->ref_cnt != 0); | ||
3563 | |||
3564 | ret = gr_gk20a_add_zbc_depth(g, gr, | ||
3565 | zbc_val, gr->max_used_depth_index); | ||
3566 | |||
3567 | if (!ret) | ||
3568 | gr->max_used_depth_index++; | ||
3569 | } | ||
3570 | break; | ||
3571 | default: | ||
3572 | gk20a_err(dev_from_gk20a(g), | ||
3573 | "invalid zbc table type %d", zbc_val->type); | ||
3574 | return -EINVAL; | ||
3575 | } | ||
3576 | |||
3577 | if (!added && ret == 0) { | ||
3578 | /* update zbc for elpg only when new entry is added */ | ||
3579 | entries = max(gr->max_used_color_index, | ||
3580 | gr->max_used_depth_index); | ||
3581 | gk20a_pmu_save_zbc(g, entries); | ||
3582 | } | ||
3583 | |||
3584 | return ret; | ||
3585 | } | ||
3586 | |||
3587 | int gr_gk20a_clear_zbc_table(struct gk20a *g, struct gr_gk20a *gr) | ||
3588 | { | ||
3589 | struct fifo_gk20a *f = &g->fifo; | ||
3590 | struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A; | ||
3591 | u32 i, j; | ||
3592 | unsigned long end_jiffies = jiffies + | ||
3593 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
3594 | u32 ret; | ||
3595 | |||
3596 | ret = gk20a_fifo_disable_engine_activity(g, gr_info, true); | ||
3597 | if (ret) { | ||
3598 | gk20a_err(dev_from_gk20a(g), | ||
3599 | "failed to disable gr engine activity\n"); | ||
3600 | return ret; | ||
3601 | } | ||
3602 | |||
3603 | ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); | ||
3604 | if (ret) { | ||
3605 | gk20a_err(dev_from_gk20a(g), | ||
3606 | "failed to idle graphics\n"); | ||
3607 | goto clean_up; | ||
3608 | } | ||
3609 | |||
3610 | for (i = 0; i < GK20A_ZBC_TABLE_SIZE; i++) { | ||
3611 | gr->zbc_col_tbl[i].format = 0; | ||
3612 | gr->zbc_col_tbl[i].ref_cnt = 0; | ||
3613 | |||
3614 | gk20a_writel(g, gr_ds_zbc_color_fmt_r(), | ||
3615 | gr_ds_zbc_color_fmt_val_invalid_f()); | ||
3616 | gk20a_writel(g, gr_ds_zbc_tbl_index_r(), | ||
3617 | gr_ds_zbc_tbl_index_val_f(i + GK20A_STARTOF_ZBC_TABLE)); | ||
3618 | |||
3619 | /* trigger the write */ | ||
3620 | gk20a_writel(g, gr_ds_zbc_tbl_ld_r(), | ||
3621 | gr_ds_zbc_tbl_ld_select_c_f() | | ||
3622 | gr_ds_zbc_tbl_ld_action_write_f() | | ||
3623 | gr_ds_zbc_tbl_ld_trigger_active_f()); | ||
3624 | |||
3625 | /* clear l2 table */ | ||
3626 | g->ops.ltc.clear_zbc_color_entry(g, i); | ||
3627 | |||
3628 | for (j = 0; j < GK20A_ZBC_COLOR_VALUE_SIZE; j++) { | ||
3629 | gr->zbc_col_tbl[i].color_l2[j] = 0; | ||
3630 | gr->zbc_col_tbl[i].color_ds[j] = 0; | ||
3631 | } | ||
3632 | } | ||
3633 | gr->max_used_color_index = 0; | ||
3634 | gr->max_default_color_index = 0; | ||
3635 | |||
3636 | for (i = 0; i < GK20A_ZBC_TABLE_SIZE; i++) { | ||
3637 | gr->zbc_dep_tbl[i].depth = 0; | ||
3638 | gr->zbc_dep_tbl[i].format = 0; | ||
3639 | gr->zbc_dep_tbl[i].ref_cnt = 0; | ||
3640 | |||
3641 | gk20a_writel(g, gr_ds_zbc_z_fmt_r(), | ||
3642 | gr_ds_zbc_z_fmt_val_invalid_f()); | ||
3643 | gk20a_writel(g, gr_ds_zbc_tbl_index_r(), | ||
3644 | gr_ds_zbc_tbl_index_val_f(i + GK20A_STARTOF_ZBC_TABLE)); | ||
3645 | |||
3646 | /* trigger the write */ | ||
3647 | gk20a_writel(g, gr_ds_zbc_tbl_ld_r(), | ||
3648 | gr_ds_zbc_tbl_ld_select_z_f() | | ||
3649 | gr_ds_zbc_tbl_ld_action_write_f() | | ||
3650 | gr_ds_zbc_tbl_ld_trigger_active_f()); | ||
3651 | |||
3652 | /* clear l2 table */ | ||
3653 | g->ops.ltc.clear_zbc_depth_entry(g, i); | ||
3654 | } | ||
3655 | gr->max_used_depth_index = 0; | ||
3656 | gr->max_default_depth_index = 0; | ||
3657 | |||
3658 | clean_up: | ||
3659 | ret = gk20a_fifo_enable_engine_activity(g, gr_info); | ||
3660 | if (ret) { | ||
3661 | gk20a_err(dev_from_gk20a(g), | ||
3662 | "failed to enable gr engine activity\n"); | ||
3663 | } | ||
3664 | |||
3665 | /* elpg stuff */ | ||
3666 | |||
3667 | return ret; | ||
3668 | } | ||
3669 | |||
3670 | /* get a zbc table entry specified by index | ||
3671 | * return table size when type is invalid */ | ||
3672 | int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr, | ||
3673 | struct zbc_query_params *query_params) | ||
3674 | { | ||
3675 | u32 index = query_params->index_size; | ||
3676 | u32 i; | ||
3677 | |||
3678 | switch (query_params->type) { | ||
3679 | case GK20A_ZBC_TYPE_INVALID: | ||
3680 | query_params->index_size = GK20A_ZBC_TABLE_SIZE; | ||
3681 | break; | ||
3682 | case GK20A_ZBC_TYPE_COLOR: | ||
3683 | if (index >= GK20A_ZBC_TABLE_SIZE) { | ||
3684 | gk20a_err(dev_from_gk20a(g), | ||
3685 | "invalid zbc color table index\n"); | ||
3686 | return -EINVAL; | ||
3687 | } | ||
3688 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | ||
3689 | query_params->color_l2[i] = | ||
3690 | gr->zbc_col_tbl[index].color_l2[i]; | ||
3691 | query_params->color_ds[i] = | ||
3692 | gr->zbc_col_tbl[index].color_ds[i]; | ||
3693 | } | ||
3694 | query_params->format = gr->zbc_col_tbl[index].format; | ||
3695 | query_params->ref_cnt = gr->zbc_col_tbl[index].ref_cnt; | ||
3696 | break; | ||
3697 | case GK20A_ZBC_TYPE_DEPTH: | ||
3698 | if (index >= GK20A_ZBC_TABLE_SIZE) { | ||
3699 | gk20a_err(dev_from_gk20a(g), | ||
3700 | "invalid zbc depth table index\n"); | ||
3701 | return -EINVAL; | ||
3702 | } | ||
3703 | query_params->depth = gr->zbc_dep_tbl[index].depth; | ||
3704 | query_params->format = gr->zbc_dep_tbl[index].format; | ||
3705 | query_params->ref_cnt = gr->zbc_dep_tbl[index].ref_cnt; | ||
3706 | break; | ||
3707 | default: | ||
3708 | gk20a_err(dev_from_gk20a(g), | ||
3709 | "invalid zbc table type\n"); | ||
3710 | return -EINVAL; | ||
3711 | } | ||
3712 | |||
3713 | return 0; | ||
3714 | } | ||
3715 | |||
3716 | int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr) | ||
3717 | { | ||
3718 | struct zbc_entry zbc_val; | ||
3719 | u32 i, err; | ||
3720 | |||
3721 | /* load default color table */ | ||
3722 | zbc_val.type = GK20A_ZBC_TYPE_COLOR; | ||
3723 | |||
3724 | zbc_val.format = gr_ds_zbc_color_fmt_val_zero_v(); | ||
3725 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | ||
3726 | zbc_val.color_ds[i] = 0; | ||
3727 | zbc_val.color_l2[i] = 0; | ||
3728 | } | ||
3729 | err = gr_gk20a_add_zbc(g, gr, &zbc_val); | ||
3730 | |||
3731 | zbc_val.format = gr_ds_zbc_color_fmt_val_unorm_one_v(); | ||
3732 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | ||
3733 | zbc_val.color_ds[i] = 0xffffffff; | ||
3734 | zbc_val.color_l2[i] = 0x3f800000; | ||
3735 | } | ||
3736 | err |= gr_gk20a_add_zbc(g, gr, &zbc_val); | ||
3737 | |||
3738 | zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v(); | ||
3739 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | ||
3740 | zbc_val.color_ds[i] = 0; | ||
3741 | zbc_val.color_l2[i] = 0; | ||
3742 | } | ||
3743 | err |= gr_gk20a_add_zbc(g, gr, &zbc_val); | ||
3744 | |||
3745 | zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v(); | ||
3746 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | ||
3747 | zbc_val.color_ds[i] = 0x3f800000; | ||
3748 | zbc_val.color_l2[i] = 0x3f800000; | ||
3749 | } | ||
3750 | err |= gr_gk20a_add_zbc(g, gr, &zbc_val); | ||
3751 | |||
3752 | if (!err) | ||
3753 | gr->max_default_color_index = 4; | ||
3754 | else { | ||
3755 | gk20a_err(dev_from_gk20a(g), | ||
3756 | "fail to load default zbc color table\n"); | ||
3757 | return err; | ||
3758 | } | ||
3759 | |||
3760 | /* load default depth table */ | ||
3761 | zbc_val.type = GK20A_ZBC_TYPE_DEPTH; | ||
3762 | |||
3763 | zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v(); | ||
3764 | zbc_val.depth = 0; | ||
3765 | err = gr_gk20a_add_zbc(g, gr, &zbc_val); | ||
3766 | |||
3767 | zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v(); | ||
3768 | zbc_val.depth = 0x3f800000; | ||
3769 | err |= gr_gk20a_add_zbc(g, gr, &zbc_val); | ||
3770 | |||
3771 | if (!err) | ||
3772 | gr->max_default_depth_index = 2; | ||
3773 | else { | ||
3774 | gk20a_err(dev_from_gk20a(g), | ||
3775 | "fail to load default zbc depth table\n"); | ||
3776 | return err; | ||
3777 | } | ||
3778 | |||
3779 | return 0; | ||
3780 | } | ||
3781 | |||
3782 | int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr, | ||
3783 | struct zbc_entry *zbc_val) | ||
3784 | { | ||
3785 | gk20a_dbg_fn(""); | ||
3786 | |||
3787 | return gr_gk20a_elpg_protected_call(g, | ||
3788 | gr_gk20a_add_zbc(g, gr, zbc_val)); | ||
3789 | } | ||
3790 | |||
3791 | void gr_gk20a_init_blcg_mode(struct gk20a *g, u32 mode, u32 engine) | ||
3792 | { | ||
3793 | u32 gate_ctrl; | ||
3794 | |||
3795 | gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine)); | ||
3796 | |||
3797 | switch (mode) { | ||
3798 | case BLCG_RUN: | ||
3799 | gate_ctrl = set_field(gate_ctrl, | ||
3800 | therm_gate_ctrl_blk_clk_m(), | ||
3801 | therm_gate_ctrl_blk_clk_run_f()); | ||
3802 | break; | ||
3803 | case BLCG_AUTO: | ||
3804 | gate_ctrl = set_field(gate_ctrl, | ||
3805 | therm_gate_ctrl_blk_clk_m(), | ||
3806 | therm_gate_ctrl_blk_clk_auto_f()); | ||
3807 | break; | ||
3808 | default: | ||
3809 | gk20a_err(dev_from_gk20a(g), | ||
3810 | "invalid blcg mode %d", mode); | ||
3811 | return; | ||
3812 | } | ||
3813 | |||
3814 | gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl); | ||
3815 | } | ||
3816 | |||
3817 | void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) | ||
3818 | { | ||
3819 | u32 gate_ctrl, idle_filter; | ||
3820 | |||
3821 | gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine)); | ||
3822 | |||
3823 | switch (mode) { | ||
3824 | case ELCG_RUN: | ||
3825 | gate_ctrl = set_field(gate_ctrl, | ||
3826 | therm_gate_ctrl_eng_clk_m(), | ||
3827 | therm_gate_ctrl_eng_clk_run_f()); | ||
3828 | gate_ctrl = set_field(gate_ctrl, | ||
3829 | therm_gate_ctrl_eng_pwr_m(), | ||
3830 | /* set elpg to auto to meet hw expectation */ | ||
3831 | therm_gate_ctrl_eng_pwr_auto_f()); | ||
3832 | break; | ||
3833 | case ELCG_STOP: | ||
3834 | gate_ctrl = set_field(gate_ctrl, | ||
3835 | therm_gate_ctrl_eng_clk_m(), | ||
3836 | therm_gate_ctrl_eng_clk_stop_f()); | ||
3837 | break; | ||
3838 | case ELCG_AUTO: | ||
3839 | gate_ctrl = set_field(gate_ctrl, | ||
3840 | therm_gate_ctrl_eng_clk_m(), | ||
3841 | therm_gate_ctrl_eng_clk_auto_f()); | ||
3842 | break; | ||
3843 | default: | ||
3844 | gk20a_err(dev_from_gk20a(g), | ||
3845 | "invalid elcg mode %d", mode); | ||
3846 | } | ||
3847 | |||
3848 | if (tegra_platform_is_linsim()) { | ||
3849 | gate_ctrl = set_field(gate_ctrl, | ||
3850 | therm_gate_ctrl_eng_delay_after_m(), | ||
3851 | therm_gate_ctrl_eng_delay_after_f(4)); | ||
3852 | } | ||
3853 | |||
3854 | /* 2 * (1 << 9) = 1024 clks */ | ||
3855 | gate_ctrl = set_field(gate_ctrl, | ||
3856 | therm_gate_ctrl_eng_idle_filt_exp_m(), | ||
3857 | therm_gate_ctrl_eng_idle_filt_exp_f(9)); | ||
3858 | gate_ctrl = set_field(gate_ctrl, | ||
3859 | therm_gate_ctrl_eng_idle_filt_mant_m(), | ||
3860 | therm_gate_ctrl_eng_idle_filt_mant_f(2)); | ||
3861 | gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl); | ||
3862 | |||
3863 | /* default fecs_idle_filter to 0 */ | ||
3864 | idle_filter = gk20a_readl(g, therm_fecs_idle_filter_r()); | ||
3865 | idle_filter &= ~therm_fecs_idle_filter_value_m(); | ||
3866 | gk20a_writel(g, therm_fecs_idle_filter_r(), idle_filter); | ||
3867 | /* default hubmmu_idle_filter to 0 */ | ||
3868 | idle_filter = gk20a_readl(g, therm_hubmmu_idle_filter_r()); | ||
3869 | idle_filter &= ~therm_hubmmu_idle_filter_value_m(); | ||
3870 | gk20a_writel(g, therm_hubmmu_idle_filter_r(), idle_filter); | ||
3871 | } | ||
3872 | |||
3873 | static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) | ||
3874 | { | ||
3875 | u32 gpc_index, gpc_tpc_count, gpc_zcull_count; | ||
3876 | u32 *zcull_map_tiles, *zcull_bank_counters; | ||
3877 | u32 map_counter; | ||
3878 | u32 rcp_conserv; | ||
3879 | u32 offset; | ||
3880 | bool floorsweep = false; | ||
3881 | |||
3882 | if (!gr->map_tiles) | ||
3883 | return -1; | ||
3884 | |||
3885 | zcull_map_tiles = kzalloc(proj_scal_max_gpcs_v() * | ||
3886 | proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL); | ||
3887 | if (!zcull_map_tiles) { | ||
3888 | gk20a_err(dev_from_gk20a(g), | ||
3889 | "failed to allocate zcull temp buffers"); | ||
3890 | return -ENOMEM; | ||
3891 | } | ||
3892 | zcull_bank_counters = kzalloc(proj_scal_max_gpcs_v() * | ||
3893 | proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL); | ||
3894 | |||
3895 | if (!zcull_bank_counters) { | ||
3896 | gk20a_err(dev_from_gk20a(g), | ||
3897 | "failed to allocate zcull temp buffers"); | ||
3898 | kfree(zcull_map_tiles); | ||
3899 | return -ENOMEM; | ||
3900 | } | ||
3901 | |||
3902 | for (map_counter = 0; map_counter < gr->tpc_count; map_counter++) { | ||
3903 | zcull_map_tiles[map_counter] = | ||
3904 | zcull_bank_counters[gr->map_tiles[map_counter]]; | ||
3905 | zcull_bank_counters[gr->map_tiles[map_counter]]++; | ||
3906 | } | ||
3907 | |||
3908 | gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(), | ||
3909 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(zcull_map_tiles[0]) | | ||
3910 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(zcull_map_tiles[1]) | | ||
3911 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(zcull_map_tiles[2]) | | ||
3912 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(zcull_map_tiles[3]) | | ||
3913 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(zcull_map_tiles[4]) | | ||
3914 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(zcull_map_tiles[5]) | | ||
3915 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(zcull_map_tiles[6]) | | ||
3916 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(zcull_map_tiles[7])); | ||
3917 | |||
3918 | gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(), | ||
3919 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(zcull_map_tiles[8]) | | ||
3920 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(zcull_map_tiles[9]) | | ||
3921 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(zcull_map_tiles[10]) | | ||
3922 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(zcull_map_tiles[11]) | | ||
3923 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(zcull_map_tiles[12]) | | ||
3924 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(zcull_map_tiles[13]) | | ||
3925 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(zcull_map_tiles[14]) | | ||
3926 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(zcull_map_tiles[15])); | ||
3927 | |||
3928 | gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(), | ||
3929 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(zcull_map_tiles[16]) | | ||
3930 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(zcull_map_tiles[17]) | | ||
3931 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(zcull_map_tiles[18]) | | ||
3932 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(zcull_map_tiles[19]) | | ||
3933 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(zcull_map_tiles[20]) | | ||
3934 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(zcull_map_tiles[21]) | | ||
3935 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(zcull_map_tiles[22]) | | ||
3936 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(zcull_map_tiles[23])); | ||
3937 | |||
3938 | gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(), | ||
3939 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(zcull_map_tiles[24]) | | ||
3940 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(zcull_map_tiles[25]) | | ||
3941 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(zcull_map_tiles[26]) | | ||
3942 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(zcull_map_tiles[27]) | | ||
3943 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(zcull_map_tiles[28]) | | ||
3944 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(zcull_map_tiles[29]) | | ||
3945 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(zcull_map_tiles[30]) | | ||
3946 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(zcull_map_tiles[31])); | ||
3947 | |||
3948 | kfree(zcull_map_tiles); | ||
3949 | kfree(zcull_bank_counters); | ||
3950 | |||
3951 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
3952 | gpc_tpc_count = gr->gpc_tpc_count[gpc_index]; | ||
3953 | gpc_zcull_count = gr->gpc_zcb_count[gpc_index]; | ||
3954 | |||
3955 | if (gpc_zcull_count != gr->max_zcull_per_gpc_count && | ||
3956 | gpc_zcull_count < gpc_tpc_count) { | ||
3957 | gk20a_err(dev_from_gk20a(g), | ||
3958 | "zcull_banks (%d) less than tpcs (%d) for gpc (%d)", | ||
3959 | gpc_zcull_count, gpc_tpc_count, gpc_index); | ||
3960 | return -EINVAL; | ||
3961 | } | ||
3962 | if (gpc_zcull_count != gr->max_zcull_per_gpc_count && | ||
3963 | gpc_zcull_count != 0) | ||
3964 | floorsweep = true; | ||
3965 | } | ||
3966 | |||
3967 | /* 1.0f / 1.0f * gr_gpc0_zcull_sm_num_rcp_conservative__max_v() */ | ||
3968 | rcp_conserv = gr_gpc0_zcull_sm_num_rcp_conservative__max_v(); | ||
3969 | |||
3970 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
3971 | offset = gpc_index * proj_gpc_stride_v(); | ||
3972 | |||
3973 | if (floorsweep) { | ||
3974 | gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, | ||
3975 | gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) | | ||
3976 | gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f( | ||
3977 | gr->max_zcull_per_gpc_count)); | ||
3978 | } else { | ||
3979 | gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, | ||
3980 | gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) | | ||
3981 | gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f( | ||
3982 | gr->gpc_tpc_count[gpc_index])); | ||
3983 | } | ||
3984 | |||
3985 | gk20a_writel(g, gr_gpc0_zcull_fs_r() + offset, | ||
3986 | gr_gpc0_zcull_fs_num_active_banks_f(gr->gpc_zcb_count[gpc_index]) | | ||
3987 | gr_gpc0_zcull_fs_num_sms_f(gr->tpc_count)); | ||
3988 | |||
3989 | gk20a_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset, | ||
3990 | gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv)); | ||
3991 | } | ||
3992 | |||
3993 | gk20a_writel(g, gr_gpcs_ppcs_wwdx_sm_num_rcp_r(), | ||
3994 | gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(rcp_conserv)); | ||
3995 | |||
3996 | return 0; | ||
3997 | } | ||
3998 | |||
3999 | static void gk20a_gr_enable_gpc_exceptions(struct gk20a *g) | ||
4000 | { | ||
4001 | /* enable tpc exception forwarding */ | ||
4002 | gk20a_writel(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r(), | ||
4003 | gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f()); | ||
4004 | |||
4005 | /* enable gpc exception forwarding */ | ||
4006 | gk20a_writel(g, gr_gpc0_gpccs_gpc_exception_en_r(), | ||
4007 | gr_gpc0_gpccs_gpc_exception_en_tpc_0_enabled_f()); | ||
4008 | } | ||
4009 | |||
4010 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g) | ||
4011 | { | ||
4012 | /* enable exceptions */ | ||
4013 | gk20a_writel(g, gr_fe_hww_esr_r(), | ||
4014 | gr_fe_hww_esr_en_enable_f() | | ||
4015 | gr_fe_hww_esr_reset_active_f()); | ||
4016 | gk20a_writel(g, gr_memfmt_hww_esr_r(), | ||
4017 | gr_memfmt_hww_esr_en_enable_f() | | ||
4018 | gr_memfmt_hww_esr_reset_active_f()); | ||
4019 | gk20a_writel(g, gr_scc_hww_esr_r(), | ||
4020 | gr_scc_hww_esr_en_enable_f() | | ||
4021 | gr_scc_hww_esr_reset_active_f()); | ||
4022 | gk20a_writel(g, gr_mme_hww_esr_r(), | ||
4023 | gr_mme_hww_esr_en_enable_f() | | ||
4024 | gr_mme_hww_esr_reset_active_f()); | ||
4025 | gk20a_writel(g, gr_pd_hww_esr_r(), | ||
4026 | gr_pd_hww_esr_en_enable_f() | | ||
4027 | gr_pd_hww_esr_reset_active_f()); | ||
4028 | gk20a_writel(g, gr_sked_hww_esr_r(), /* enabled by default */ | ||
4029 | gr_sked_hww_esr_reset_active_f()); | ||
4030 | gk20a_writel(g, gr_ds_hww_esr_r(), | ||
4031 | gr_ds_hww_esr_en_enabled_f() | | ||
4032 | gr_ds_hww_esr_reset_task_f()); | ||
4033 | gk20a_writel(g, gr_ds_hww_report_mask_r(), | ||
4034 | gr_ds_hww_report_mask_sph0_err_report_f() | | ||
4035 | gr_ds_hww_report_mask_sph1_err_report_f() | | ||
4036 | gr_ds_hww_report_mask_sph2_err_report_f() | | ||
4037 | gr_ds_hww_report_mask_sph3_err_report_f() | | ||
4038 | gr_ds_hww_report_mask_sph4_err_report_f() | | ||
4039 | gr_ds_hww_report_mask_sph5_err_report_f() | | ||
4040 | gr_ds_hww_report_mask_sph6_err_report_f() | | ||
4041 | gr_ds_hww_report_mask_sph7_err_report_f() | | ||
4042 | gr_ds_hww_report_mask_sph8_err_report_f() | | ||
4043 | gr_ds_hww_report_mask_sph9_err_report_f() | | ||
4044 | gr_ds_hww_report_mask_sph10_err_report_f() | | ||
4045 | gr_ds_hww_report_mask_sph11_err_report_f() | | ||
4046 | gr_ds_hww_report_mask_sph12_err_report_f() | | ||
4047 | gr_ds_hww_report_mask_sph13_err_report_f() | | ||
4048 | gr_ds_hww_report_mask_sph14_err_report_f() | | ||
4049 | gr_ds_hww_report_mask_sph15_err_report_f() | | ||
4050 | gr_ds_hww_report_mask_sph16_err_report_f() | | ||
4051 | gr_ds_hww_report_mask_sph17_err_report_f() | | ||
4052 | gr_ds_hww_report_mask_sph18_err_report_f() | | ||
4053 | gr_ds_hww_report_mask_sph19_err_report_f() | | ||
4054 | gr_ds_hww_report_mask_sph20_err_report_f() | | ||
4055 | gr_ds_hww_report_mask_sph21_err_report_f() | | ||
4056 | gr_ds_hww_report_mask_sph22_err_report_f() | | ||
4057 | gr_ds_hww_report_mask_sph23_err_report_f()); | ||
4058 | } | ||
4059 | |||
4060 | static void gr_gk20a_set_hww_esr_report_mask(struct gk20a *g) | ||
4061 | { | ||
4062 | /* setup sm warp esr report masks */ | ||
4063 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), | ||
4064 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() | | ||
4065 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() | | ||
4066 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() | | ||
4067 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() | | ||
4068 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() | | ||
4069 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() | | ||
4070 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() | | ||
4071 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() | | ||
4072 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() | | ||
4073 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() | | ||
4074 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() | | ||
4075 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() | | ||
4076 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() | | ||
4077 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() | | ||
4078 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() | | ||
4079 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() | | ||
4080 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() | | ||
4081 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | | ||
4082 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() | | ||
4083 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f()); | ||
4084 | |||
4085 | /* setup sm global esr report mask */ | ||
4086 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), | ||
4087 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() | | ||
4088 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f() | | ||
4089 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f() | | ||
4090 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f() | | ||
4091 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f() | | ||
4092 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f() | | ||
4093 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f()); | ||
4094 | } | ||
4095 | |||
4096 | static int gk20a_init_gr_setup_hw(struct gk20a *g) | ||
4097 | { | ||
4098 | struct gr_gk20a *gr = &g->gr; | ||
4099 | struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load; | ||
4100 | struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init; | ||
4101 | u32 data; | ||
4102 | u32 addr_lo, addr_hi; | ||
4103 | u64 addr; | ||
4104 | unsigned long end_jiffies = jiffies + | ||
4105 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
4106 | u32 fe_go_idle_timeout_save; | ||
4107 | u32 last_method_data = 0; | ||
4108 | u32 i, err; | ||
4109 | |||
4110 | gk20a_dbg_fn(""); | ||
4111 | |||
4112 | /* slcg prod values */ | ||
4113 | g->ops.clock_gating.slcg_gr_load_gating_prod(g, g->slcg_enabled); | ||
4114 | g->ops.clock_gating.slcg_perf_load_gating_prod(g, g->slcg_enabled); | ||
4115 | |||
4116 | /* init mmu debug buffer */ | ||
4117 | addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_wr_mem.iova); | ||
4118 | addr_lo = u64_lo32(addr); | ||
4119 | addr_hi = u64_hi32(addr); | ||
4120 | addr = (addr_lo >> fb_mmu_debug_wr_addr_alignment_v()) | | ||
4121 | (addr_hi << (32 - fb_mmu_debug_wr_addr_alignment_v())); | ||
4122 | |||
4123 | gk20a_writel(g, fb_mmu_debug_wr_r(), | ||
4124 | fb_mmu_debug_wr_aperture_vid_mem_f() | | ||
4125 | fb_mmu_debug_wr_vol_false_f() | | ||
4126 | fb_mmu_debug_wr_addr_v(addr)); | ||
4127 | |||
4128 | addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_rd_mem.iova); | ||
4129 | addr_lo = u64_lo32(addr); | ||
4130 | addr_hi = u64_hi32(addr); | ||
4131 | addr = (addr_lo >> fb_mmu_debug_rd_addr_alignment_v()) | | ||
4132 | (addr_hi << (32 - fb_mmu_debug_rd_addr_alignment_v())); | ||
4133 | |||
4134 | gk20a_writel(g, fb_mmu_debug_rd_r(), | ||
4135 | fb_mmu_debug_rd_aperture_vid_mem_f() | | ||
4136 | fb_mmu_debug_rd_vol_false_f() | | ||
4137 | fb_mmu_debug_rd_addr_v(addr)); | ||
4138 | |||
4139 | /* load gr floorsweeping registers */ | ||
4140 | data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r()); | ||
4141 | data = set_field(data, gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(), | ||
4142 | gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f()); | ||
4143 | gk20a_writel(g, gr_gpc0_ppc0_pes_vsc_strem_r(), data); | ||
4144 | |||
4145 | gr_gk20a_zcull_init_hw(g, gr); | ||
4146 | |||
4147 | g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled); | ||
4148 | g->ops.clock_gating.pg_gr_load_gating_prod(g, true); | ||
4149 | |||
4150 | if (g->elcg_enabled) { | ||
4151 | gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A); | ||
4152 | gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A); | ||
4153 | } else { | ||
4154 | gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A); | ||
4155 | gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A); | ||
4156 | } | ||
4157 | |||
4158 | /* Bug 1340570: increase the clock timeout to avoid potential | ||
4159 | * operation failure at high gpcclk rate. Default values are 0x400. | ||
4160 | */ | ||
4161 | gk20a_writel(g, pri_ringstation_sys_master_config_r(0x15), 0x800); | ||
4162 | gk20a_writel(g, pri_ringstation_gpc_master_config_r(0xa), 0x800); | ||
4163 | gk20a_writel(g, pri_ringstation_fbp_master_config_r(0x8), 0x800); | ||
4164 | |||
4165 | /* enable fifo access */ | ||
4166 | gk20a_writel(g, gr_gpfifo_ctl_r(), | ||
4167 | gr_gpfifo_ctl_access_enabled_f() | | ||
4168 | gr_gpfifo_ctl_semaphore_access_enabled_f()); | ||
4169 | |||
4170 | /* TBD: reload gr ucode when needed */ | ||
4171 | |||
4172 | /* enable interrupts */ | ||
4173 | gk20a_writel(g, gr_intr_r(), 0xFFFFFFFF); | ||
4174 | gk20a_writel(g, gr_intr_en_r(), 0xFFFFFFFF); | ||
4175 | |||
4176 | /* enable fecs error interrupts */ | ||
4177 | gk20a_writel(g, gr_fecs_host_int_enable_r(), | ||
4178 | gr_fecs_host_int_enable_fault_during_ctxsw_enable_f() | | ||
4179 | gr_fecs_host_int_enable_umimp_firmware_method_enable_f() | | ||
4180 | gr_fecs_host_int_enable_umimp_illegal_method_enable_f() | | ||
4181 | gr_fecs_host_int_enable_watchdog_enable_f()); | ||
4182 | |||
4183 | g->ops.gr.enable_hww_exceptions(g); | ||
4184 | g->ops.gr.set_hww_esr_report_mask(g); | ||
4185 | |||
4186 | /* enable per GPC exceptions */ | ||
4187 | gk20a_gr_enable_gpc_exceptions(g); | ||
4188 | |||
4189 | /* TBD: ECC for L1/SM */ | ||
4190 | /* TBD: enable per BE exceptions */ | ||
4191 | |||
4192 | /* reset and enable all exceptions */ | ||
4193 | gk20a_writel(g, gr_exception_r(), 0xFFFFFFFF); | ||
4194 | gk20a_writel(g, gr_exception_en_r(), 0xFFFFFFFF); | ||
4195 | gk20a_writel(g, gr_exception1_r(), 0xFFFFFFFF); | ||
4196 | gk20a_writel(g, gr_exception1_en_r(), 0xFFFFFFFF); | ||
4197 | gk20a_writel(g, gr_exception2_r(), 0xFFFFFFFF); | ||
4198 | gk20a_writel(g, gr_exception2_en_r(), 0xFFFFFFFF); | ||
4199 | |||
4200 | /* ignore status from some units */ | ||
4201 | data = gk20a_readl(g, gr_status_mask_r()); | ||
4202 | gk20a_writel(g, gr_status_mask_r(), data & gr->status_disable_mask); | ||
4203 | |||
4204 | g->ops.ltc.init_zbc(g, gr); | ||
4205 | g->ops.ltc.init_cbc(g, gr); | ||
4206 | |||
4207 | /* load ctx init */ | ||
4208 | for (i = 0; i < sw_ctx_load->count; i++) | ||
4209 | gk20a_writel(g, sw_ctx_load->l[i].addr, | ||
4210 | sw_ctx_load->l[i].value); | ||
4211 | |||
4212 | err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); | ||
4213 | if (err) | ||
4214 | goto out; | ||
4215 | |||
4216 | /* save and disable fe_go_idle */ | ||
4217 | fe_go_idle_timeout_save = | ||
4218 | gk20a_readl(g, gr_fe_go_idle_timeout_r()); | ||
4219 | gk20a_writel(g, gr_fe_go_idle_timeout_r(), | ||
4220 | (fe_go_idle_timeout_save & gr_fe_go_idle_timeout_count_f(0)) | | ||
4221 | gr_fe_go_idle_timeout_count_disabled_f()); | ||
4222 | |||
4223 | /* override a few ctx state registers */ | ||
4224 | g->ops.gr.commit_global_cb_manager(g, NULL, false); | ||
4225 | gr_gk20a_commit_global_timeslice(g, NULL, false); | ||
4226 | |||
4227 | /* floorsweep anything left */ | ||
4228 | g->ops.gr.init_fs_state(g); | ||
4229 | |||
4230 | err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); | ||
4231 | if (err) | ||
4232 | goto restore_fe_go_idle; | ||
4233 | |||
4234 | restore_fe_go_idle: | ||
4235 | /* restore fe_go_idle */ | ||
4236 | gk20a_writel(g, gr_fe_go_idle_timeout_r(), fe_go_idle_timeout_save); | ||
4237 | |||
4238 | if (err || gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT)) | ||
4239 | goto out; | ||
4240 | |||
4241 | /* load method init */ | ||
4242 | if (sw_method_init->count) { | ||
4243 | gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(), | ||
4244 | sw_method_init->l[0].value); | ||
4245 | gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(), | ||
4246 | gr_pri_mme_shadow_raw_index_write_trigger_f() | | ||
4247 | sw_method_init->l[0].addr); | ||
4248 | last_method_data = sw_method_init->l[0].value; | ||
4249 | } | ||
4250 | for (i = 1; i < sw_method_init->count; i++) { | ||
4251 | if (sw_method_init->l[i].value != last_method_data) { | ||
4252 | gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(), | ||
4253 | sw_method_init->l[i].value); | ||
4254 | last_method_data = sw_method_init->l[i].value; | ||
4255 | } | ||
4256 | gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(), | ||
4257 | gr_pri_mme_shadow_raw_index_write_trigger_f() | | ||
4258 | sw_method_init->l[i].addr); | ||
4259 | } | ||
4260 | |||
4261 | gk20a_mm_l2_invalidate(g); | ||
4262 | |||
4263 | err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); | ||
4264 | if (err) | ||
4265 | goto out; | ||
4266 | |||
4267 | out: | ||
4268 | gk20a_dbg_fn("done"); | ||
4269 | return 0; | ||
4270 | } | ||
4271 | |||
4272 | static int gk20a_init_gr_prepare(struct gk20a *g) | ||
4273 | { | ||
4274 | u32 gpfifo_ctrl, pmc_en; | ||
4275 | u32 err = 0; | ||
4276 | |||
4277 | /* disable fifo access */ | ||
4278 | pmc_en = gk20a_readl(g, mc_enable_r()); | ||
4279 | if (pmc_en & mc_enable_pgraph_enabled_f()) { | ||
4280 | gpfifo_ctrl = gk20a_readl(g, gr_gpfifo_ctl_r()); | ||
4281 | gpfifo_ctrl &= ~gr_gpfifo_ctl_access_enabled_f(); | ||
4282 | gk20a_writel(g, gr_gpfifo_ctl_r(), gpfifo_ctrl); | ||
4283 | } | ||
4284 | |||
4285 | /* reset gr engine */ | ||
4286 | gk20a_reset(g, mc_enable_pgraph_enabled_f() | ||
4287 | | mc_enable_blg_enabled_f() | ||
4288 | | mc_enable_perfmon_enabled_f()); | ||
4289 | |||
4290 | /* enable fifo access */ | ||
4291 | gk20a_writel(g, gr_gpfifo_ctl_r(), | ||
4292 | gr_gpfifo_ctl_access_enabled_f() | | ||
4293 | gr_gpfifo_ctl_semaphore_access_enabled_f()); | ||
4294 | |||
4295 | if (!g->gr.ctx_vars.valid) { | ||
4296 | err = gr_gk20a_init_ctx_vars(g, &g->gr); | ||
4297 | if (err) | ||
4298 | gk20a_err(dev_from_gk20a(g), | ||
4299 | "fail to load gr init ctx"); | ||
4300 | } | ||
4301 | return err; | ||
4302 | } | ||
4303 | |||
4304 | static int gr_gk20a_wait_mem_scrubbing(struct gk20a *g) | ||
4305 | { | ||
4306 | int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT; | ||
4307 | bool fecs_scrubbing; | ||
4308 | bool gpccs_scrubbing; | ||
4309 | |||
4310 | gk20a_dbg_fn(""); | ||
4311 | |||
4312 | do { | ||
4313 | fecs_scrubbing = gk20a_readl(g, gr_fecs_dmactl_r()) & | ||
4314 | (gr_fecs_dmactl_imem_scrubbing_m() | | ||
4315 | gr_fecs_dmactl_dmem_scrubbing_m()); | ||
4316 | |||
4317 | gpccs_scrubbing = gk20a_readl(g, gr_gpccs_dmactl_r()) & | ||
4318 | (gr_gpccs_dmactl_imem_scrubbing_m() | | ||
4319 | gr_gpccs_dmactl_imem_scrubbing_m()); | ||
4320 | |||
4321 | if (!fecs_scrubbing && !gpccs_scrubbing) { | ||
4322 | gk20a_dbg_fn("done"); | ||
4323 | return 0; | ||
4324 | } | ||
4325 | |||
4326 | udelay(GR_IDLE_CHECK_DEFAULT); | ||
4327 | } while (--retries || !tegra_platform_is_silicon()); | ||
4328 | |||
4329 | gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout"); | ||
4330 | return -ETIMEDOUT; | ||
4331 | } | ||
4332 | |||
4333 | static int gk20a_init_gr_reset_enable_hw(struct gk20a *g) | ||
4334 | { | ||
4335 | struct gr_gk20a *gr = &g->gr; | ||
4336 | struct av_list_gk20a *sw_non_ctx_load = &g->gr.ctx_vars.sw_non_ctx_load; | ||
4337 | unsigned long end_jiffies = jiffies + | ||
4338 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
4339 | u32 i, err = 0; | ||
4340 | |||
4341 | gk20a_dbg_fn(""); | ||
4342 | |||
4343 | /* enable interrupts */ | ||
4344 | gk20a_writel(g, gr_intr_r(), ~0); | ||
4345 | gk20a_writel(g, gr_intr_en_r(), ~0); | ||
4346 | |||
4347 | /* reset ctx switch state */ | ||
4348 | gr_gk20a_ctx_reset(g, 0); | ||
4349 | |||
4350 | /* clear scc ram */ | ||
4351 | gk20a_writel(g, gr_scc_init_r(), | ||
4352 | gr_scc_init_ram_trigger_f()); | ||
4353 | |||
4354 | /* load non_ctx init */ | ||
4355 | for (i = 0; i < sw_non_ctx_load->count; i++) | ||
4356 | gk20a_writel(g, sw_non_ctx_load->l[i].addr, | ||
4357 | sw_non_ctx_load->l[i].value); | ||
4358 | |||
4359 | err = gr_gk20a_wait_mem_scrubbing(g); | ||
4360 | if (err) | ||
4361 | goto out; | ||
4362 | |||
4363 | err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); | ||
4364 | if (err) | ||
4365 | goto out; | ||
4366 | |||
4367 | err = gr_gk20a_load_ctxsw_ucode(g, gr); | ||
4368 | if (err) | ||
4369 | goto out; | ||
4370 | |||
4371 | /* this appears query for sw states but fecs actually init | ||
4372 | ramchain, etc so this is hw init */ | ||
4373 | err = gr_gk20a_init_ctx_state(g, gr); | ||
4374 | if (err) | ||
4375 | goto out; | ||
4376 | |||
4377 | out: | ||
4378 | if (err) | ||
4379 | gk20a_err(dev_from_gk20a(g), "fail"); | ||
4380 | else | ||
4381 | gk20a_dbg_fn("done"); | ||
4382 | |||
4383 | return 0; | ||
4384 | } | ||
4385 | |||
4386 | /* | ||
4387 | * XXX Merge this list with the debugger/profiler | ||
4388 | * session regops whitelists? | ||
4389 | */ | ||
4390 | static u32 wl_addr_gk20a[] = { | ||
4391 | /* this list must be sorted (low to high) */ | ||
4392 | 0x404468, /* gr_pri_mme_max_instructions */ | ||
4393 | 0x418800, /* gr_pri_gpcs_setup_debug */ | ||
4394 | 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */ | ||
4395 | 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */ | ||
4396 | 0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */ | ||
4397 | 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */ | ||
4398 | }; | ||
4399 | |||
4400 | static int gr_gk20a_init_access_map(struct gk20a *g) | ||
4401 | { | ||
4402 | struct gr_gk20a *gr = &g->gr; | ||
4403 | void *data; | ||
4404 | int err = 0; | ||
4405 | u32 w, nr_pages = | ||
4406 | DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size, | ||
4407 | PAGE_SIZE); | ||
4408 | |||
4409 | data = vmap(gr->global_ctx_buffer[PRIV_ACCESS_MAP].pages, | ||
4410 | PAGE_ALIGN(gr->global_ctx_buffer[PRIV_ACCESS_MAP].size) >> | ||
4411 | PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL)); | ||
4412 | if (!data) { | ||
4413 | gk20a_err(dev_from_gk20a(g), | ||
4414 | "failed to map priv access map memory"); | ||
4415 | err = -ENOMEM; | ||
4416 | goto clean_up; | ||
4417 | } | ||
4418 | |||
4419 | memset(data, 0x0, PAGE_SIZE * nr_pages); | ||
4420 | |||
4421 | for (w = 0; w < ARRAY_SIZE(wl_addr_gk20a); w++) { | ||
4422 | u32 map_bit, map_byte, map_shift; | ||
4423 | map_bit = wl_addr_gk20a[w] >> 2; | ||
4424 | map_byte = map_bit >> 3; | ||
4425 | map_shift = map_bit & 0x7; /* i.e. 0-7 */ | ||
4426 | gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d", | ||
4427 | wl_addr_gk20a[w], map_byte, map_shift); | ||
4428 | ((u8 *)data)[map_byte] |= 1 << map_shift; | ||
4429 | } | ||
4430 | |||
4431 | clean_up: | ||
4432 | if (data) | ||
4433 | vunmap(data); | ||
4434 | return 0; | ||
4435 | } | ||
4436 | |||
4437 | static int gk20a_init_gr_setup_sw(struct gk20a *g) | ||
4438 | { | ||
4439 | struct gr_gk20a *gr = &g->gr; | ||
4440 | int err; | ||
4441 | |||
4442 | gk20a_dbg_fn(""); | ||
4443 | |||
4444 | if (gr->sw_ready) { | ||
4445 | gk20a_dbg_fn("skip init"); | ||
4446 | return 0; | ||
4447 | } | ||
4448 | |||
4449 | gr->g = g; | ||
4450 | |||
4451 | err = gr_gk20a_init_gr_config(g, gr); | ||
4452 | if (err) | ||
4453 | goto clean_up; | ||
4454 | |||
4455 | err = gr_gk20a_init_mmu_sw(g, gr); | ||
4456 | if (err) | ||
4457 | goto clean_up; | ||
4458 | |||
4459 | err = gr_gk20a_init_map_tiles(g, gr); | ||
4460 | if (err) | ||
4461 | goto clean_up; | ||
4462 | |||
4463 | if (tegra_cpu_is_asim()) | ||
4464 | gr->max_comptag_mem = 1; /* MBs worth of comptag coverage */ | ||
4465 | else { | ||
4466 | gk20a_dbg_info("total ram pages : %lu", totalram_pages); | ||
4467 | gr->max_comptag_mem = totalram_pages | ||
4468 | >> (10 - (PAGE_SHIFT - 10)); | ||
4469 | } | ||
4470 | err = g->ops.ltc.init_comptags(g, gr); | ||
4471 | if (err) | ||
4472 | goto clean_up; | ||
4473 | |||
4474 | err = gr_gk20a_init_zcull(g, gr); | ||
4475 | if (err) | ||
4476 | goto clean_up; | ||
4477 | |||
4478 | err = gr_gk20a_alloc_global_ctx_buffers(g); | ||
4479 | if (err) | ||
4480 | goto clean_up; | ||
4481 | |||
4482 | err = gr_gk20a_init_access_map(g); | ||
4483 | if (err) | ||
4484 | goto clean_up; | ||
4485 | |||
4486 | mutex_init(&gr->ctx_mutex); | ||
4487 | spin_lock_init(&gr->ch_tlb_lock); | ||
4488 | |||
4489 | gr->remove_support = gk20a_remove_gr_support; | ||
4490 | gr->sw_ready = true; | ||
4491 | |||
4492 | gk20a_dbg_fn("done"); | ||
4493 | return 0; | ||
4494 | |||
4495 | clean_up: | ||
4496 | gk20a_err(dev_from_gk20a(g), "fail"); | ||
4497 | gk20a_remove_gr_support(gr); | ||
4498 | return err; | ||
4499 | } | ||
4500 | |||
4501 | int gk20a_init_gr_support(struct gk20a *g) | ||
4502 | { | ||
4503 | u32 err; | ||
4504 | |||
4505 | gk20a_dbg_fn(""); | ||
4506 | |||
4507 | err = gk20a_init_gr_prepare(g); | ||
4508 | if (err) | ||
4509 | return err; | ||
4510 | |||
4511 | /* this is required before gr_gk20a_init_ctx_state */ | ||
4512 | mutex_init(&g->gr.fecs_mutex); | ||
4513 | |||
4514 | err = gk20a_init_gr_reset_enable_hw(g); | ||
4515 | if (err) | ||
4516 | return err; | ||
4517 | |||
4518 | err = gk20a_init_gr_setup_sw(g); | ||
4519 | if (err) | ||
4520 | return err; | ||
4521 | |||
4522 | err = gk20a_init_gr_setup_hw(g); | ||
4523 | if (err) | ||
4524 | return err; | ||
4525 | |||
4526 | return 0; | ||
4527 | } | ||
4528 | |||
4529 | #define NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE 0x02dc | ||
4530 | #define NVA297_SET_CIRCULAR_BUFFER_SIZE 0x1280 | ||
4531 | #define NVA297_SET_SHADER_EXCEPTIONS 0x1528 | ||
4532 | #define NVA0C0_SET_SHADER_EXCEPTIONS 0x1528 | ||
4533 | |||
4534 | #define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0 | ||
4535 | |||
4536 | struct gr_isr_data { | ||
4537 | u32 addr; | ||
4538 | u32 data_lo; | ||
4539 | u32 data_hi; | ||
4540 | u32 curr_ctx; | ||
4541 | u32 chid; | ||
4542 | u32 offset; | ||
4543 | u32 sub_chan; | ||
4544 | u32 class_num; | ||
4545 | }; | ||
4546 | |||
4547 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data) | ||
4548 | { | ||
4549 | gk20a_dbg_fn(""); | ||
4550 | |||
4551 | if (data == NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE) { | ||
4552 | gk20a_writel(g, | ||
4553 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), 0); | ||
4554 | gk20a_writel(g, | ||
4555 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), 0); | ||
4556 | } else { | ||
4557 | /* setup sm warp esr report masks */ | ||
4558 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), | ||
4559 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() | | ||
4560 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() | | ||
4561 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() | | ||
4562 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() | | ||
4563 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() | | ||
4564 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() | | ||
4565 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() | | ||
4566 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() | | ||
4567 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() | | ||
4568 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() | | ||
4569 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() | | ||
4570 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() | | ||
4571 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() | | ||
4572 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() | | ||
4573 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() | | ||
4574 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() | | ||
4575 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() | | ||
4576 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | | ||
4577 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() | | ||
4578 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f()); | ||
4579 | |||
4580 | /* setup sm global esr report mask */ | ||
4581 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), | ||
4582 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() | | ||
4583 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f() | | ||
4584 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f() | | ||
4585 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f() | | ||
4586 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f() | | ||
4587 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f() | | ||
4588 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f()); | ||
4589 | } | ||
4590 | } | ||
4591 | |||
4592 | static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data) | ||
4593 | { | ||
4594 | struct gr_gk20a *gr = &g->gr; | ||
4595 | u32 gpc_index, ppc_index, stride, val, offset; | ||
4596 | u32 cb_size = data * 4; | ||
4597 | |||
4598 | gk20a_dbg_fn(""); | ||
4599 | |||
4600 | if (cb_size > gr->attrib_cb_size) | ||
4601 | cb_size = gr->attrib_cb_size; | ||
4602 | |||
4603 | gk20a_writel(g, gr_ds_tga_constraintlogic_r(), | ||
4604 | (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & | ||
4605 | ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) | | ||
4606 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); | ||
4607 | |||
4608 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
4609 | stride = proj_gpc_stride_v() * gpc_index; | ||
4610 | |||
4611 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
4612 | ppc_index++) { | ||
4613 | |||
4614 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() + | ||
4615 | stride + | ||
4616 | proj_ppc_in_gpc_stride_v() * ppc_index); | ||
4617 | |||
4618 | offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val); | ||
4619 | |||
4620 | val = set_field(val, | ||
4621 | gr_gpc0_ppc0_cbm_cfg_size_m(), | ||
4622 | gr_gpc0_ppc0_cbm_cfg_size_f(cb_size * | ||
4623 | gr->pes_tpc_count[ppc_index][gpc_index])); | ||
4624 | val = set_field(val, | ||
4625 | gr_gpc0_ppc0_cbm_cfg_start_offset_m(), | ||
4626 | (offset + 1)); | ||
4627 | |||
4628 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + | ||
4629 | stride + | ||
4630 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | ||
4631 | |||
4632 | val = set_field(val, | ||
4633 | gr_gpc0_ppc0_cbm_cfg_start_offset_m(), | ||
4634 | offset); | ||
4635 | |||
4636 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + | ||
4637 | stride + | ||
4638 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | ||
4639 | } | ||
4640 | } | ||
4641 | } | ||
4642 | |||
4643 | static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | ||
4644 | { | ||
4645 | struct gr_gk20a *gr = &g->gr; | ||
4646 | u32 gpc_index, ppc_index, stride, val; | ||
4647 | u32 pd_ab_max_output; | ||
4648 | u32 alpha_cb_size = data * 4; | ||
4649 | |||
4650 | gk20a_dbg_fn(""); | ||
4651 | /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) | ||
4652 | return; */ | ||
4653 | |||
4654 | if (alpha_cb_size > gr->alpha_cb_size) | ||
4655 | alpha_cb_size = gr->alpha_cb_size; | ||
4656 | |||
4657 | gk20a_writel(g, gr_ds_tga_constraintlogic_r(), | ||
4658 | (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & | ||
4659 | ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) | | ||
4660 | gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size)); | ||
4661 | |||
4662 | pd_ab_max_output = alpha_cb_size * | ||
4663 | gr_gpc0_ppc0_cbm_cfg_size_granularity_v() / | ||
4664 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | ||
4665 | |||
4666 | gk20a_writel(g, gr_pd_ab_dist_cfg1_r(), | ||
4667 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output)); | ||
4668 | |||
4669 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
4670 | stride = proj_gpc_stride_v() * gpc_index; | ||
4671 | |||
4672 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
4673 | ppc_index++) { | ||
4674 | |||
4675 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() + | ||
4676 | stride + | ||
4677 | proj_ppc_in_gpc_stride_v() * ppc_index); | ||
4678 | |||
4679 | val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(), | ||
4680 | gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size * | ||
4681 | gr->pes_tpc_count[ppc_index][gpc_index])); | ||
4682 | |||
4683 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() + | ||
4684 | stride + | ||
4685 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | ||
4686 | } | ||
4687 | } | ||
4688 | } | ||
4689 | |||
4690 | void gk20a_gr_reset(struct gk20a *g) | ||
4691 | { | ||
4692 | int err; | ||
4693 | err = gk20a_init_gr_prepare(g); | ||
4694 | BUG_ON(err); | ||
4695 | err = gk20a_init_gr_reset_enable_hw(g); | ||
4696 | BUG_ON(err); | ||
4697 | err = gk20a_init_gr_setup_hw(g); | ||
4698 | BUG_ON(err); | ||
4699 | } | ||
4700 | |||
4701 | static int gr_gk20a_handle_sw_method(struct gk20a *g, u32 addr, | ||
4702 | u32 class_num, u32 offset, u32 data) | ||
4703 | { | ||
4704 | gk20a_dbg_fn(""); | ||
4705 | |||
4706 | if (class_num == KEPLER_COMPUTE_A) { | ||
4707 | switch (offset << 2) { | ||
4708 | case NVA0C0_SET_SHADER_EXCEPTIONS: | ||
4709 | gk20a_gr_set_shader_exceptions(g, data); | ||
4710 | break; | ||
4711 | default: | ||
4712 | goto fail; | ||
4713 | } | ||
4714 | } | ||
4715 | |||
4716 | if (class_num == KEPLER_C) { | ||
4717 | switch (offset << 2) { | ||
4718 | case NVA297_SET_SHADER_EXCEPTIONS: | ||
4719 | gk20a_gr_set_shader_exceptions(g, data); | ||
4720 | break; | ||
4721 | case NVA297_SET_CIRCULAR_BUFFER_SIZE: | ||
4722 | g->ops.gr.set_circular_buffer_size(g, data); | ||
4723 | break; | ||
4724 | case NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE: | ||
4725 | g->ops.gr.set_alpha_circular_buffer_size(g, data); | ||
4726 | break; | ||
4727 | default: | ||
4728 | goto fail; | ||
4729 | } | ||
4730 | } | ||
4731 | return 0; | ||
4732 | |||
4733 | fail: | ||
4734 | return -EINVAL; | ||
4735 | } | ||
4736 | |||
4737 | static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g, | ||
4738 | struct gr_isr_data *isr_data) | ||
4739 | { | ||
4740 | struct fifo_gk20a *f = &g->fifo; | ||
4741 | struct channel_gk20a *ch = &f->channel[isr_data->chid]; | ||
4742 | gk20a_dbg_fn(""); | ||
4743 | gk20a_set_error_notifier(ch, | ||
4744 | NVHOST_CHANNEL_GR_SEMAPHORE_TIMEOUT); | ||
4745 | gk20a_err(dev_from_gk20a(g), | ||
4746 | "gr semaphore timeout\n"); | ||
4747 | return -EINVAL; | ||
4748 | } | ||
4749 | |||
4750 | static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g, | ||
4751 | struct gr_isr_data *isr_data) | ||
4752 | { | ||
4753 | struct fifo_gk20a *f = &g->fifo; | ||
4754 | struct channel_gk20a *ch = &f->channel[isr_data->chid]; | ||
4755 | gk20a_dbg_fn(""); | ||
4756 | gk20a_set_error_notifier(ch, | ||
4757 | NVHOST_CHANNEL_GR_ILLEGAL_NOTIFY); | ||
4758 | /* This is an unrecoverable error, reset is needed */ | ||
4759 | gk20a_err(dev_from_gk20a(g), | ||
4760 | "gr semaphore timeout\n"); | ||
4761 | return -EINVAL; | ||
4762 | } | ||
4763 | |||
4764 | static int gk20a_gr_handle_illegal_method(struct gk20a *g, | ||
4765 | struct gr_isr_data *isr_data) | ||
4766 | { | ||
4767 | int ret = g->ops.gr.handle_sw_method(g, isr_data->addr, | ||
4768 | isr_data->class_num, isr_data->offset, | ||
4769 | isr_data->data_lo); | ||
4770 | if (ret) | ||
4771 | gk20a_err(dev_from_gk20a(g), "invalid method class 0x%08x" | ||
4772 | ", offset 0x%08x address 0x%08x\n", | ||
4773 | isr_data->class_num, isr_data->offset, isr_data->addr); | ||
4774 | |||
4775 | return ret; | ||
4776 | } | ||
4777 | |||
4778 | static int gk20a_gr_handle_illegal_class(struct gk20a *g, | ||
4779 | struct gr_isr_data *isr_data) | ||
4780 | { | ||
4781 | struct fifo_gk20a *f = &g->fifo; | ||
4782 | struct channel_gk20a *ch = &f->channel[isr_data->chid]; | ||
4783 | gk20a_dbg_fn(""); | ||
4784 | gk20a_set_error_notifier(ch, | ||
4785 | NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY); | ||
4786 | gk20a_err(dev_from_gk20a(g), | ||
4787 | "invalid class 0x%08x, offset 0x%08x", | ||
4788 | isr_data->class_num, isr_data->offset); | ||
4789 | return -EINVAL; | ||
4790 | } | ||
4791 | |||
4792 | static int gk20a_gr_handle_class_error(struct gk20a *g, | ||
4793 | struct gr_isr_data *isr_data) | ||
4794 | { | ||
4795 | struct fifo_gk20a *f = &g->fifo; | ||
4796 | struct channel_gk20a *ch = &f->channel[isr_data->chid]; | ||
4797 | gk20a_dbg_fn(""); | ||
4798 | |||
4799 | gk20a_set_error_notifier(ch, | ||
4800 | NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY); | ||
4801 | gk20a_err(dev_from_gk20a(g), | ||
4802 | "class error 0x%08x, offset 0x%08x", | ||
4803 | isr_data->class_num, isr_data->offset); | ||
4804 | return -EINVAL; | ||
4805 | } | ||
4806 | |||
4807 | static int gk20a_gr_handle_semaphore_pending(struct gk20a *g, | ||
4808 | struct gr_isr_data *isr_data) | ||
4809 | { | ||
4810 | struct fifo_gk20a *f = &g->fifo; | ||
4811 | struct channel_gk20a *ch = &f->channel[isr_data->chid]; | ||
4812 | |||
4813 | wake_up(&ch->semaphore_wq); | ||
4814 | |||
4815 | return 0; | ||
4816 | } | ||
4817 | |||
4818 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
4819 | static inline bool is_valid_cyclestats_bar0_offset_gk20a(struct gk20a *g, | ||
4820 | u32 offset) | ||
4821 | { | ||
4822 | /* support only 24-bit 4-byte aligned offsets */ | ||
4823 | bool valid = !(offset & 0xFF000003); | ||
4824 | /* whitelist check */ | ||
4825 | valid = valid && | ||
4826 | is_bar0_global_offset_whitelisted_gk20a(offset); | ||
4827 | /* resource size check in case there was a problem | ||
4828 | * with allocating the assumed size of bar0 */ | ||
4829 | valid = valid && | ||
4830 | offset < resource_size(g->reg_mem); | ||
4831 | return valid; | ||
4832 | } | ||
4833 | #endif | ||
4834 | |||
4835 | static int gk20a_gr_handle_notify_pending(struct gk20a *g, | ||
4836 | struct gr_isr_data *isr_data) | ||
4837 | { | ||
4838 | struct fifo_gk20a *f = &g->fifo; | ||
4839 | struct channel_gk20a *ch = &f->channel[isr_data->chid]; | ||
4840 | |||
4841 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
4842 | void *virtual_address; | ||
4843 | u32 buffer_size; | ||
4844 | u32 offset; | ||
4845 | u32 new_offset; | ||
4846 | bool exit; | ||
4847 | struct share_buffer_head *sh_hdr; | ||
4848 | u32 raw_reg; | ||
4849 | u64 mask_orig; | ||
4850 | u64 v = 0; | ||
4851 | struct gk20a_cyclestate_buffer_elem *op_elem; | ||
4852 | /* GL will never use payload 0 for cycle state */ | ||
4853 | if ((ch->cyclestate.cyclestate_buffer == NULL) || (isr_data->data_lo == 0)) | ||
4854 | return 0; | ||
4855 | |||
4856 | mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex); | ||
4857 | |||
4858 | virtual_address = ch->cyclestate.cyclestate_buffer; | ||
4859 | buffer_size = ch->cyclestate.cyclestate_buffer_size; | ||
4860 | offset = isr_data->data_lo; | ||
4861 | exit = false; | ||
4862 | while (!exit) { | ||
4863 | if (offset >= buffer_size) { | ||
4864 | WARN_ON(1); | ||
4865 | break; | ||
4866 | } | ||
4867 | |||
4868 | sh_hdr = (struct share_buffer_head *) | ||
4869 | ((char *)virtual_address + offset); | ||
4870 | |||
4871 | if (sh_hdr->size < sizeof(struct share_buffer_head)) { | ||
4872 | WARN_ON(1); | ||
4873 | break; | ||
4874 | } | ||
4875 | new_offset = offset + sh_hdr->size; | ||
4876 | |||
4877 | switch (sh_hdr->operation) { | ||
4878 | case OP_END: | ||
4879 | exit = true; | ||
4880 | break; | ||
4881 | |||
4882 | case BAR0_READ32: | ||
4883 | case BAR0_WRITE32: | ||
4884 | { | ||
4885 | bool valid; | ||
4886 | op_elem = | ||
4887 | (struct gk20a_cyclestate_buffer_elem *) | ||
4888 | sh_hdr; | ||
4889 | valid = is_valid_cyclestats_bar0_offset_gk20a(g, | ||
4890 | op_elem->offset_bar0); | ||
4891 | if (!valid) { | ||
4892 | gk20a_err(dev_from_gk20a(g), | ||
4893 | "invalid cycletstats op offset: 0x%x\n", | ||
4894 | op_elem->offset_bar0); | ||
4895 | |||
4896 | sh_hdr->failed = exit = true; | ||
4897 | break; | ||
4898 | } | ||
4899 | |||
4900 | |||
4901 | mask_orig = | ||
4902 | ((1ULL << | ||
4903 | (op_elem->last_bit + 1)) | ||
4904 | -1)&~((1ULL << | ||
4905 | op_elem->first_bit)-1); | ||
4906 | |||
4907 | raw_reg = | ||
4908 | gk20a_readl(g, | ||
4909 | op_elem->offset_bar0); | ||
4910 | |||
4911 | switch (sh_hdr->operation) { | ||
4912 | case BAR0_READ32: | ||
4913 | op_elem->data = | ||
4914 | (raw_reg & mask_orig) | ||
4915 | >> op_elem->first_bit; | ||
4916 | break; | ||
4917 | |||
4918 | case BAR0_WRITE32: | ||
4919 | v = 0; | ||
4920 | if ((unsigned int)mask_orig != | ||
4921 | (unsigned int)~0) { | ||
4922 | v = (unsigned int) | ||
4923 | (raw_reg & ~mask_orig); | ||
4924 | } | ||
4925 | |||
4926 | v |= ((op_elem->data | ||
4927 | << op_elem->first_bit) | ||
4928 | & mask_orig); | ||
4929 | |||
4930 | gk20a_writel(g, | ||
4931 | op_elem->offset_bar0, | ||
4932 | (unsigned int)v); | ||
4933 | break; | ||
4934 | default: | ||
4935 | /* nop ok?*/ | ||
4936 | break; | ||
4937 | } | ||
4938 | } | ||
4939 | break; | ||
4940 | |||
4941 | default: | ||
4942 | /* no operation content case */ | ||
4943 | exit = true; | ||
4944 | break; | ||
4945 | } | ||
4946 | sh_hdr->completed = true; | ||
4947 | offset = new_offset; | ||
4948 | } | ||
4949 | mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex); | ||
4950 | #endif | ||
4951 | gk20a_dbg_fn(""); | ||
4952 | wake_up(&ch->notifier_wq); | ||
4953 | return 0; | ||
4954 | } | ||
4955 | |||
4956 | /* Used by sw interrupt thread to translate current ctx to chid. | ||
4957 | * For performance, we don't want to go through 128 channels every time. | ||
4958 | * A small tlb is used here to cache translation */ | ||
4959 | static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) | ||
4960 | { | ||
4961 | struct fifo_gk20a *f = &g->fifo; | ||
4962 | struct gr_gk20a *gr = &g->gr; | ||
4963 | u32 chid = -1; | ||
4964 | u32 i; | ||
4965 | |||
4966 | spin_lock(&gr->ch_tlb_lock); | ||
4967 | |||
4968 | /* check cache first */ | ||
4969 | for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { | ||
4970 | if (gr->chid_tlb[i].curr_ctx == curr_ctx) { | ||
4971 | chid = gr->chid_tlb[i].hw_chid; | ||
4972 | goto unlock; | ||
4973 | } | ||
4974 | } | ||
4975 | |||
4976 | /* slow path */ | ||
4977 | for (chid = 0; chid < f->num_channels; chid++) | ||
4978 | if (f->channel[chid].in_use) { | ||
4979 | if ((u32)(f->channel[chid].inst_block.cpu_pa >> | ||
4980 | ram_in_base_shift_v()) == | ||
4981 | gr_fecs_current_ctx_ptr_v(curr_ctx)) | ||
4982 | break; | ||
4983 | } | ||
4984 | |||
4985 | if (chid >= f->num_channels) { | ||
4986 | chid = -1; | ||
4987 | goto unlock; | ||
4988 | } | ||
4989 | |||
4990 | /* add to free tlb entry */ | ||
4991 | for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { | ||
4992 | if (gr->chid_tlb[i].curr_ctx == 0) { | ||
4993 | gr->chid_tlb[i].curr_ctx = curr_ctx; | ||
4994 | gr->chid_tlb[i].hw_chid = chid; | ||
4995 | goto unlock; | ||
4996 | } | ||
4997 | } | ||
4998 | |||
4999 | /* no free entry, flush one */ | ||
5000 | gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx; | ||
5001 | gr->chid_tlb[gr->channel_tlb_flush_index].hw_chid = chid; | ||
5002 | |||
5003 | gr->channel_tlb_flush_index = | ||
5004 | (gr->channel_tlb_flush_index + 1) & | ||
5005 | (GR_CHANNEL_MAP_TLB_SIZE - 1); | ||
5006 | |||
5007 | unlock: | ||
5008 | spin_unlock(&gr->ch_tlb_lock); | ||
5009 | return chid; | ||
5010 | } | ||
5011 | |||
5012 | static int gk20a_gr_lock_down_sm(struct gk20a *g, u32 global_esr_mask) | ||
5013 | { | ||
5014 | unsigned long end_jiffies = jiffies + | ||
5015 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
5016 | u32 delay = GR_IDLE_CHECK_DEFAULT; | ||
5017 | bool mmu_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled(g); | ||
5018 | u32 dbgr_control0; | ||
5019 | |||
5020 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "locking down SM"); | ||
5021 | |||
5022 | /* assert stop trigger */ | ||
5023 | dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r()); | ||
5024 | dbgr_control0 |= gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(); | ||
5025 | gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r(), dbgr_control0); | ||
5026 | |||
5027 | /* wait for the sm to lock down */ | ||
5028 | do { | ||
5029 | u32 global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r()); | ||
5030 | u32 warp_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_warp_esr_r()); | ||
5031 | u32 dbgr_status0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_status0_r()); | ||
5032 | bool locked_down = | ||
5033 | (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) == | ||
5034 | gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v()); | ||
5035 | bool error_pending = | ||
5036 | (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) != | ||
5037 | gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) || | ||
5038 | ((global_esr & ~global_esr_mask) != 0); | ||
5039 | |||
5040 | if (locked_down || !error_pending) { | ||
5041 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "locked down SM"); | ||
5042 | |||
5043 | /* de-assert stop trigger */ | ||
5044 | dbgr_control0 &= ~gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(); | ||
5045 | gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r(), dbgr_control0); | ||
5046 | |||
5047 | return 0; | ||
5048 | } | ||
5049 | |||
5050 | /* if an mmu fault is pending and mmu debug mode is not | ||
5051 | * enabled, the sm will never lock down. */ | ||
5052 | if (!mmu_debug_mode_enabled && gk20a_fifo_mmu_fault_pending(g)) { | ||
5053 | gk20a_err(dev_from_gk20a(g), "mmu fault pending, sm will" | ||
5054 | " never lock down!"); | ||
5055 | return -EFAULT; | ||
5056 | } | ||
5057 | |||
5058 | usleep_range(delay, delay * 2); | ||
5059 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
5060 | |||
5061 | } while (time_before(jiffies, end_jiffies) | ||
5062 | || !tegra_platform_is_silicon()); | ||
5063 | |||
5064 | gk20a_err(dev_from_gk20a(g), "timed out while trying to lock down SM"); | ||
5065 | |||
5066 | return -EAGAIN; | ||
5067 | } | ||
5068 | |||
5069 | bool gk20a_gr_sm_debugger_attached(struct gk20a *g) | ||
5070 | { | ||
5071 | u32 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r()); | ||
5072 | |||
5073 | /* check if an sm debugger is attached */ | ||
5074 | if (gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(dbgr_control0) == | ||
5075 | gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v()) | ||
5076 | return true; | ||
5077 | |||
5078 | return false; | ||
5079 | } | ||
5080 | |||
5081 | static void gk20a_gr_clear_sm_hww(struct gk20a *g, u32 global_esr) | ||
5082 | { | ||
5083 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r(), global_esr); | ||
5084 | |||
5085 | /* clear the warp hww */ | ||
5086 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r(), | ||
5087 | gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f()); | ||
5088 | } | ||
5089 | |||
5090 | static struct channel_gk20a * | ||
5091 | channel_from_hw_chid(struct gk20a *g, u32 hw_chid) | ||
5092 | { | ||
5093 | return g->fifo.channel+hw_chid; | ||
5094 | } | ||
5095 | |||
5096 | static int gk20a_gr_handle_sm_exception(struct gk20a *g, | ||
5097 | struct gr_isr_data *isr_data) | ||
5098 | { | ||
5099 | int ret = 0; | ||
5100 | bool do_warp_sync = false; | ||
5101 | /* these three interrupts don't require locking down the SM. They can | ||
5102 | * be handled by usermode clients as they aren't fatal. Additionally, | ||
5103 | * usermode clients may wish to allow some warps to execute while others | ||
5104 | * are at breakpoints, as opposed to fatal errors where all warps should | ||
5105 | * halt. */ | ||
5106 | u32 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() | | ||
5107 | gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() | | ||
5108 | gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(); | ||
5109 | u32 global_esr, warp_esr; | ||
5110 | bool sm_debugger_attached = gk20a_gr_sm_debugger_attached(g); | ||
5111 | struct channel_gk20a *fault_ch; | ||
5112 | |||
5113 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
5114 | |||
5115 | global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r()); | ||
5116 | warp_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_warp_esr_r()); | ||
5117 | |||
5118 | /* if an sm debugger is attached, disable forwarding of tpc exceptions. | ||
5119 | * the debugger will reenable exceptions after servicing them. */ | ||
5120 | if (sm_debugger_attached) { | ||
5121 | u32 tpc_exception_en = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r()); | ||
5122 | tpc_exception_en &= ~gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(); | ||
5123 | gk20a_writel(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r(), tpc_exception_en); | ||
5124 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "SM debugger attached"); | ||
5125 | } | ||
5126 | |||
5127 | /* if a debugger is present and an error has occurred, do a warp sync */ | ||
5128 | if (sm_debugger_attached && ((warp_esr != 0) || ((global_esr & ~global_mask) != 0))) { | ||
5129 | gk20a_dbg(gpu_dbg_intr, "warp sync needed"); | ||
5130 | do_warp_sync = true; | ||
5131 | } | ||
5132 | |||
5133 | if (do_warp_sync) { | ||
5134 | ret = gk20a_gr_lock_down_sm(g, global_mask); | ||
5135 | if (ret) { | ||
5136 | gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n"); | ||
5137 | return ret; | ||
5138 | } | ||
5139 | } | ||
5140 | |||
5141 | /* finally, signal any client waiting on an event */ | ||
5142 | fault_ch = channel_from_hw_chid(g, isr_data->chid); | ||
5143 | if (fault_ch) | ||
5144 | gk20a_dbg_gpu_post_events(fault_ch); | ||
5145 | |||
5146 | return ret; | ||
5147 | } | ||
5148 | |||
5149 | static int gk20a_gr_handle_tpc_exception(struct gk20a *g, | ||
5150 | struct gr_isr_data *isr_data) | ||
5151 | { | ||
5152 | int ret = 0; | ||
5153 | u32 tpc_exception = gk20a_readl(g, gr_gpcs_tpcs_tpccs_tpc_exception_r()); | ||
5154 | |||
5155 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, ""); | ||
5156 | |||
5157 | /* check if an sm exeption is pending */ | ||
5158 | if (gr_gpcs_tpcs_tpccs_tpc_exception_sm_v(tpc_exception) == | ||
5159 | gr_gpcs_tpcs_tpccs_tpc_exception_sm_pending_v()) { | ||
5160 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "SM exception pending"); | ||
5161 | ret = gk20a_gr_handle_sm_exception(g, isr_data); | ||
5162 | } | ||
5163 | |||
5164 | return ret; | ||
5165 | } | ||
5166 | |||
5167 | static int gk20a_gr_handle_gpc_exception(struct gk20a *g, | ||
5168 | struct gr_isr_data *isr_data) | ||
5169 | { | ||
5170 | int ret = 0; | ||
5171 | u32 gpc_exception = gk20a_readl(g, gr_gpcs_gpccs_gpc_exception_r()); | ||
5172 | |||
5173 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, ""); | ||
5174 | |||
5175 | /* check if tpc 0 has an exception */ | ||
5176 | if (gr_gpcs_gpccs_gpc_exception_tpc_v(gpc_exception) == | ||
5177 | gr_gpcs_gpccs_gpc_exception_tpc_0_pending_v()) { | ||
5178 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "TPC exception pending"); | ||
5179 | ret = gk20a_gr_handle_tpc_exception(g, isr_data); | ||
5180 | } | ||
5181 | |||
5182 | return ret; | ||
5183 | } | ||
5184 | |||
5185 | int gk20a_gr_isr(struct gk20a *g) | ||
5186 | { | ||
5187 | struct gr_isr_data isr_data; | ||
5188 | u32 grfifo_ctl; | ||
5189 | u32 obj_table; | ||
5190 | int need_reset = 0; | ||
5191 | u32 gr_intr = gk20a_readl(g, gr_intr_r()); | ||
5192 | |||
5193 | gk20a_dbg_fn(""); | ||
5194 | gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr); | ||
5195 | |||
5196 | if (!gr_intr) | ||
5197 | return 0; | ||
5198 | |||
5199 | grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r()); | ||
5200 | grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1); | ||
5201 | grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1); | ||
5202 | |||
5203 | gk20a_writel(g, gr_gpfifo_ctl_r(), | ||
5204 | grfifo_ctl | gr_gpfifo_ctl_access_f(0) | | ||
5205 | gr_gpfifo_ctl_semaphore_access_f(0)); | ||
5206 | |||
5207 | isr_data.addr = gk20a_readl(g, gr_trapped_addr_r()); | ||
5208 | isr_data.data_lo = gk20a_readl(g, gr_trapped_data_lo_r()); | ||
5209 | isr_data.data_hi = gk20a_readl(g, gr_trapped_data_hi_r()); | ||
5210 | isr_data.curr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); | ||
5211 | isr_data.offset = gr_trapped_addr_mthd_v(isr_data.addr); | ||
5212 | isr_data.sub_chan = gr_trapped_addr_subch_v(isr_data.addr); | ||
5213 | obj_table = gk20a_readl(g, | ||
5214 | gr_fe_object_table_r(isr_data.sub_chan)); | ||
5215 | isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table); | ||
5216 | |||
5217 | isr_data.chid = | ||
5218 | gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx); | ||
5219 | if (isr_data.chid == -1) { | ||
5220 | gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x", | ||
5221 | isr_data.curr_ctx); | ||
5222 | goto clean_up; | ||
5223 | } | ||
5224 | |||
5225 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
5226 | "channel %d: addr 0x%08x, " | ||
5227 | "data 0x%08x 0x%08x," | ||
5228 | "ctx 0x%08x, offset 0x%08x, " | ||
5229 | "subchannel 0x%08x, class 0x%08x", | ||
5230 | isr_data.chid, isr_data.addr, | ||
5231 | isr_data.data_hi, isr_data.data_lo, | ||
5232 | isr_data.curr_ctx, isr_data.offset, | ||
5233 | isr_data.sub_chan, isr_data.class_num); | ||
5234 | |||
5235 | if (gr_intr & gr_intr_notify_pending_f()) { | ||
5236 | gk20a_gr_handle_notify_pending(g, &isr_data); | ||
5237 | gk20a_writel(g, gr_intr_r(), | ||
5238 | gr_intr_notify_reset_f()); | ||
5239 | gr_intr &= ~gr_intr_notify_pending_f(); | ||
5240 | } | ||
5241 | |||
5242 | if (gr_intr & gr_intr_semaphore_pending_f()) { | ||
5243 | gk20a_gr_handle_semaphore_pending(g, &isr_data); | ||
5244 | gk20a_writel(g, gr_intr_r(), | ||
5245 | gr_intr_semaphore_reset_f()); | ||
5246 | gr_intr &= ~gr_intr_semaphore_pending_f(); | ||
5247 | } | ||
5248 | |||
5249 | if (gr_intr & gr_intr_semaphore_timeout_pending_f()) { | ||
5250 | need_reset |= gk20a_gr_handle_semaphore_timeout_pending(g, | ||
5251 | &isr_data); | ||
5252 | gk20a_writel(g, gr_intr_r(), | ||
5253 | gr_intr_semaphore_reset_f()); | ||
5254 | gr_intr &= ~gr_intr_semaphore_pending_f(); | ||
5255 | } | ||
5256 | |||
5257 | if (gr_intr & gr_intr_illegal_notify_pending_f()) { | ||
5258 | need_reset |= gk20a_gr_intr_illegal_notify_pending(g, | ||
5259 | &isr_data); | ||
5260 | gk20a_writel(g, gr_intr_r(), | ||
5261 | gr_intr_illegal_notify_reset_f()); | ||
5262 | gr_intr &= ~gr_intr_illegal_notify_pending_f(); | ||
5263 | } | ||
5264 | |||
5265 | if (gr_intr & gr_intr_illegal_method_pending_f()) { | ||
5266 | need_reset |= gk20a_gr_handle_illegal_method(g, &isr_data); | ||
5267 | gk20a_writel(g, gr_intr_r(), | ||
5268 | gr_intr_illegal_method_reset_f()); | ||
5269 | gr_intr &= ~gr_intr_illegal_method_pending_f(); | ||
5270 | } | ||
5271 | |||
5272 | if (gr_intr & gr_intr_illegal_class_pending_f()) { | ||
5273 | need_reset |= gk20a_gr_handle_illegal_class(g, &isr_data); | ||
5274 | gk20a_writel(g, gr_intr_r(), | ||
5275 | gr_intr_illegal_class_reset_f()); | ||
5276 | gr_intr &= ~gr_intr_illegal_class_pending_f(); | ||
5277 | } | ||
5278 | |||
5279 | if (gr_intr & gr_intr_class_error_pending_f()) { | ||
5280 | need_reset |= gk20a_gr_handle_class_error(g, &isr_data); | ||
5281 | gk20a_writel(g, gr_intr_r(), | ||
5282 | gr_intr_class_error_reset_f()); | ||
5283 | gr_intr &= ~gr_intr_class_error_pending_f(); | ||
5284 | } | ||
5285 | |||
5286 | /* this one happens if someone tries to hit a non-whitelisted | ||
5287 | * register using set_falcon[4] */ | ||
5288 | if (gr_intr & gr_intr_firmware_method_pending_f()) { | ||
5289 | need_reset |= true; | ||
5290 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "firmware method intr pending\n"); | ||
5291 | gk20a_writel(g, gr_intr_r(), | ||
5292 | gr_intr_firmware_method_reset_f()); | ||
5293 | gr_intr &= ~gr_intr_firmware_method_pending_f(); | ||
5294 | } | ||
5295 | |||
5296 | if (gr_intr & gr_intr_exception_pending_f()) { | ||
5297 | u32 exception = gk20a_readl(g, gr_exception_r()); | ||
5298 | struct fifo_gk20a *f = &g->fifo; | ||
5299 | struct channel_gk20a *ch = &f->channel[isr_data.chid]; | ||
5300 | |||
5301 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception); | ||
5302 | |||
5303 | if (exception & gr_exception_fe_m()) { | ||
5304 | u32 fe = gk20a_readl(g, gr_fe_hww_esr_r()); | ||
5305 | gk20a_dbg(gpu_dbg_intr, "fe warning %08x\n", fe); | ||
5306 | gk20a_writel(g, gr_fe_hww_esr_r(), fe); | ||
5307 | } | ||
5308 | |||
5309 | /* check if a gpc exception has occurred */ | ||
5310 | if (exception & gr_exception_gpc_m() && need_reset == 0) { | ||
5311 | u32 exception1 = gk20a_readl(g, gr_exception1_r()); | ||
5312 | u32 global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r()); | ||
5313 | |||
5314 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "GPC exception pending"); | ||
5315 | |||
5316 | /* if no sm debugger is present, clean up the channel */ | ||
5317 | if (!gk20a_gr_sm_debugger_attached(g)) { | ||
5318 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
5319 | "SM debugger not attached, clearing interrupt"); | ||
5320 | need_reset |= -EFAULT; | ||
5321 | } else { | ||
5322 | /* check if gpc 0 has an exception */ | ||
5323 | if (exception1 & gr_exception1_gpc_0_pending_f()) | ||
5324 | need_reset |= gk20a_gr_handle_gpc_exception(g, &isr_data); | ||
5325 | /* clear the hwws, also causes tpc and gpc | ||
5326 | * exceptions to be cleared */ | ||
5327 | gk20a_gr_clear_sm_hww(g, global_esr); | ||
5328 | } | ||
5329 | |||
5330 | if (need_reset) | ||
5331 | gk20a_set_error_notifier(ch, | ||
5332 | NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY); | ||
5333 | } | ||
5334 | |||
5335 | gk20a_writel(g, gr_intr_r(), gr_intr_exception_reset_f()); | ||
5336 | gr_intr &= ~gr_intr_exception_pending_f(); | ||
5337 | } | ||
5338 | |||
5339 | if (need_reset) | ||
5340 | gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), true); | ||
5341 | |||
5342 | clean_up: | ||
5343 | gk20a_writel(g, gr_gpfifo_ctl_r(), | ||
5344 | grfifo_ctl | gr_gpfifo_ctl_access_f(1) | | ||
5345 | gr_gpfifo_ctl_semaphore_access_f(1)); | ||
5346 | |||
5347 | if (gr_intr) | ||
5348 | gk20a_err(dev_from_gk20a(g), | ||
5349 | "unhandled gr interrupt 0x%08x", gr_intr); | ||
5350 | |||
5351 | return 0; | ||
5352 | } | ||
5353 | |||
5354 | int gk20a_gr_nonstall_isr(struct gk20a *g) | ||
5355 | { | ||
5356 | u32 gr_intr = gk20a_readl(g, gr_intr_nonstall_r()); | ||
5357 | u32 clear_intr = 0; | ||
5358 | |||
5359 | gk20a_dbg(gpu_dbg_intr, "pgraph nonstall intr %08x", gr_intr); | ||
5360 | |||
5361 | if (gr_intr & gr_intr_nonstall_trap_pending_f()) { | ||
5362 | gk20a_channel_semaphore_wakeup(g); | ||
5363 | clear_intr |= gr_intr_nonstall_trap_pending_f(); | ||
5364 | } | ||
5365 | |||
5366 | gk20a_writel(g, gr_intr_nonstall_r(), clear_intr); | ||
5367 | |||
5368 | return 0; | ||
5369 | } | ||
5370 | |||
5371 | int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size) | ||
5372 | { | ||
5373 | BUG_ON(size == NULL); | ||
5374 | return gr_gk20a_submit_fecs_method_op(g, | ||
5375 | (struct fecs_method_op_gk20a) { | ||
5376 | .mailbox.id = 0, | ||
5377 | .mailbox.data = 0, | ||
5378 | .mailbox.clr = ~0, | ||
5379 | .method.data = 1, | ||
5380 | .method.addr = gr_fecs_method_push_adr_discover_reglist_image_size_v(), | ||
5381 | .mailbox.ret = size, | ||
5382 | .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL, | ||
5383 | .mailbox.ok = 0, | ||
5384 | .cond.fail = GR_IS_UCODE_OP_SKIP, | ||
5385 | .mailbox.fail = 0}); | ||
5386 | } | ||
5387 | |||
5388 | int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr) | ||
5389 | { | ||
5390 | return gr_gk20a_submit_fecs_method_op(g, | ||
5391 | (struct fecs_method_op_gk20a){ | ||
5392 | .mailbox.id = 4, | ||
5393 | .mailbox.data = (gr_fecs_current_ctx_ptr_f(addr >> 12) | | ||
5394 | gr_fecs_current_ctx_valid_f(1) | | ||
5395 | gr_fecs_current_ctx_target_vid_mem_f()), | ||
5396 | .mailbox.clr = ~0, | ||
5397 | .method.data = 1, | ||
5398 | .method.addr = gr_fecs_method_push_adr_set_reglist_bind_instance_v(), | ||
5399 | .mailbox.ret = NULL, | ||
5400 | .cond.ok = GR_IS_UCODE_OP_EQUAL, | ||
5401 | .mailbox.ok = 1, | ||
5402 | .cond.fail = GR_IS_UCODE_OP_SKIP, | ||
5403 | .mailbox.fail = 0}); | ||
5404 | } | ||
5405 | |||
5406 | int gr_gk20a_fecs_set_reglist_virual_addr(struct gk20a *g, u64 pmu_va) | ||
5407 | { | ||
5408 | return gr_gk20a_submit_fecs_method_op(g, | ||
5409 | (struct fecs_method_op_gk20a) { | ||
5410 | .mailbox.id = 4, | ||
5411 | .mailbox.data = u64_lo32(pmu_va >> 8), | ||
5412 | .mailbox.clr = ~0, | ||
5413 | .method.data = 1, | ||
5414 | .method.addr = gr_fecs_method_push_adr_set_reglist_virtual_address_v(), | ||
5415 | .mailbox.ret = NULL, | ||
5416 | .cond.ok = GR_IS_UCODE_OP_EQUAL, | ||
5417 | .mailbox.ok = 1, | ||
5418 | .cond.fail = GR_IS_UCODE_OP_SKIP, | ||
5419 | .mailbox.fail = 0}); | ||
5420 | } | ||
5421 | |||
5422 | int gk20a_gr_suspend(struct gk20a *g) | ||
5423 | { | ||
5424 | unsigned long end_jiffies = jiffies + | ||
5425 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
5426 | u32 ret = 0; | ||
5427 | |||
5428 | gk20a_dbg_fn(""); | ||
5429 | |||
5430 | ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); | ||
5431 | if (ret) | ||
5432 | return ret; | ||
5433 | |||
5434 | gk20a_writel(g, gr_gpfifo_ctl_r(), | ||
5435 | gr_gpfifo_ctl_access_disabled_f()); | ||
5436 | |||
5437 | /* disable gr intr */ | ||
5438 | gk20a_writel(g, gr_intr_r(), 0); | ||
5439 | gk20a_writel(g, gr_intr_en_r(), 0); | ||
5440 | |||
5441 | /* disable all exceptions */ | ||
5442 | gk20a_writel(g, gr_exception_r(), 0); | ||
5443 | gk20a_writel(g, gr_exception_en_r(), 0); | ||
5444 | gk20a_writel(g, gr_exception1_r(), 0); | ||
5445 | gk20a_writel(g, gr_exception1_en_r(), 0); | ||
5446 | gk20a_writel(g, gr_exception2_r(), 0); | ||
5447 | gk20a_writel(g, gr_exception2_en_r(), 0); | ||
5448 | |||
5449 | gk20a_gr_flush_channel_tlb(&g->gr); | ||
5450 | |||
5451 | gk20a_dbg_fn("done"); | ||
5452 | return ret; | ||
5453 | } | ||
5454 | |||
5455 | static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | ||
5456 | u32 addr, | ||
5457 | bool is_quad, u32 quad, | ||
5458 | u32 *context_buffer, | ||
5459 | u32 context_buffer_size, | ||
5460 | u32 *priv_offset); | ||
5461 | |||
5462 | /* This function will decode a priv address and return the partition type and numbers. */ | ||
5463 | int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, | ||
5464 | int *addr_type, /* enum ctxsw_addr_type */ | ||
5465 | u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num, | ||
5466 | u32 *broadcast_flags) | ||
5467 | { | ||
5468 | u32 gpc_addr; | ||
5469 | u32 ppc_address; | ||
5470 | u32 ppc_broadcast_addr; | ||
5471 | |||
5472 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | ||
5473 | |||
5474 | /* setup defaults */ | ||
5475 | ppc_address = 0; | ||
5476 | ppc_broadcast_addr = 0; | ||
5477 | *addr_type = CTXSW_ADDR_TYPE_SYS; | ||
5478 | *broadcast_flags = PRI_BROADCAST_FLAGS_NONE; | ||
5479 | *gpc_num = 0; | ||
5480 | *tpc_num = 0; | ||
5481 | *ppc_num = 0; | ||
5482 | *be_num = 0; | ||
5483 | |||
5484 | if (pri_is_gpc_addr(addr)) { | ||
5485 | *addr_type = CTXSW_ADDR_TYPE_GPC; | ||
5486 | gpc_addr = pri_gpccs_addr_mask(addr); | ||
5487 | if (pri_is_gpc_addr_shared(addr)) { | ||
5488 | *addr_type = CTXSW_ADDR_TYPE_GPC; | ||
5489 | *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC; | ||
5490 | } else | ||
5491 | *gpc_num = pri_get_gpc_num(addr); | ||
5492 | |||
5493 | if (pri_is_tpc_addr(gpc_addr)) { | ||
5494 | *addr_type = CTXSW_ADDR_TYPE_TPC; | ||
5495 | if (pri_is_tpc_addr_shared(gpc_addr)) { | ||
5496 | *broadcast_flags |= PRI_BROADCAST_FLAGS_TPC; | ||
5497 | return 0; | ||
5498 | } | ||
5499 | *tpc_num = pri_get_tpc_num(gpc_addr); | ||
5500 | } | ||
5501 | return 0; | ||
5502 | } else if (pri_is_be_addr(addr)) { | ||
5503 | *addr_type = CTXSW_ADDR_TYPE_BE; | ||
5504 | if (pri_is_be_addr_shared(addr)) { | ||
5505 | *broadcast_flags |= PRI_BROADCAST_FLAGS_BE; | ||
5506 | return 0; | ||
5507 | } | ||
5508 | *be_num = pri_get_be_num(addr); | ||
5509 | return 0; | ||
5510 | } else { | ||
5511 | *addr_type = CTXSW_ADDR_TYPE_SYS; | ||
5512 | return 0; | ||
5513 | } | ||
5514 | /* PPC!?!?!?! */ | ||
5515 | |||
5516 | /*NOTREACHED*/ | ||
5517 | return -EINVAL; | ||
5518 | } | ||
5519 | |||
5520 | static int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr, | ||
5521 | u32 gpc_num, | ||
5522 | u32 *priv_addr_table, u32 *t) | ||
5523 | { | ||
5524 | u32 ppc_num; | ||
5525 | |||
5526 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | ||
5527 | |||
5528 | for (ppc_num = 0; ppc_num < g->gr.pe_count_per_gpc; ppc_num++) | ||
5529 | priv_addr_table[(*t)++] = pri_ppc_addr(pri_ppccs_addr_mask(addr), | ||
5530 | gpc_num, ppc_num); | ||
5531 | |||
5532 | return 0; | ||
5533 | } | ||
5534 | |||
5535 | /* | ||
5536 | * The context buffer is indexed using BE broadcast addresses and GPC/TPC | ||
5537 | * unicast addresses. This function will convert a BE unicast address to a BE | ||
5538 | * broadcast address and split a GPC/TPC broadcast address into a table of | ||
5539 | * GPC/TPC addresses. The addresses generated by this function can be | ||
5540 | * successfully processed by gr_gk20a_find_priv_offset_in_buffer | ||
5541 | */ | ||
5542 | static int gr_gk20a_create_priv_addr_table(struct gk20a *g, | ||
5543 | u32 addr, | ||
5544 | u32 *priv_addr_table, | ||
5545 | u32 *num_registers) | ||
5546 | { | ||
5547 | int addr_type; /*enum ctxsw_addr_type */ | ||
5548 | u32 gpc_num, tpc_num, ppc_num, be_num; | ||
5549 | u32 broadcast_flags; | ||
5550 | u32 t; | ||
5551 | int err; | ||
5552 | |||
5553 | t = 0; | ||
5554 | *num_registers = 0; | ||
5555 | |||
5556 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | ||
5557 | |||
5558 | err = gr_gk20a_decode_priv_addr(g, addr, &addr_type, | ||
5559 | &gpc_num, &tpc_num, &ppc_num, &be_num, | ||
5560 | &broadcast_flags); | ||
5561 | gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type = %d", addr_type); | ||
5562 | if (err) | ||
5563 | return err; | ||
5564 | |||
5565 | if ((addr_type == CTXSW_ADDR_TYPE_SYS) || | ||
5566 | (addr_type == CTXSW_ADDR_TYPE_BE)) { | ||
5567 | /* The BE broadcast registers are included in the compressed PRI | ||
5568 | * table. Convert a BE unicast address to a broadcast address | ||
5569 | * so that we can look up the offset. */ | ||
5570 | if ((addr_type == CTXSW_ADDR_TYPE_BE) && | ||
5571 | !(broadcast_flags & PRI_BROADCAST_FLAGS_BE)) | ||
5572 | priv_addr_table[t++] = pri_be_shared_addr(addr); | ||
5573 | else | ||
5574 | priv_addr_table[t++] = addr; | ||
5575 | |||
5576 | *num_registers = t; | ||
5577 | return 0; | ||
5578 | } | ||
5579 | |||
5580 | /* The GPC/TPC unicast registers are included in the compressed PRI | ||
5581 | * tables. Convert a GPC/TPC broadcast address to unicast addresses so | ||
5582 | * that we can look up the offsets. */ | ||
5583 | if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) { | ||
5584 | for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { | ||
5585 | |||
5586 | if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) | ||
5587 | for (tpc_num = 0; | ||
5588 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; | ||
5589 | tpc_num++) | ||
5590 | priv_addr_table[t++] = | ||
5591 | pri_tpc_addr(pri_tpccs_addr_mask(addr), | ||
5592 | gpc_num, tpc_num); | ||
5593 | |||
5594 | else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) { | ||
5595 | err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num, | ||
5596 | priv_addr_table, &t); | ||
5597 | if (err) | ||
5598 | return err; | ||
5599 | } else | ||
5600 | priv_addr_table[t++] = | ||
5601 | pri_gpc_addr(pri_gpccs_addr_mask(addr), | ||
5602 | gpc_num); | ||
5603 | } | ||
5604 | } else { | ||
5605 | if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) | ||
5606 | for (tpc_num = 0; | ||
5607 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; | ||
5608 | tpc_num++) | ||
5609 | priv_addr_table[t++] = | ||
5610 | pri_tpc_addr(pri_tpccs_addr_mask(addr), | ||
5611 | gpc_num, tpc_num); | ||
5612 | else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) | ||
5613 | err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num, | ||
5614 | priv_addr_table, &t); | ||
5615 | else | ||
5616 | priv_addr_table[t++] = addr; | ||
5617 | } | ||
5618 | |||
5619 | *num_registers = t; | ||
5620 | return 0; | ||
5621 | } | ||
5622 | |||
5623 | int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, | ||
5624 | u32 addr, | ||
5625 | u32 max_offsets, | ||
5626 | u32 *offsets, u32 *offset_addrs, | ||
5627 | u32 *num_offsets, | ||
5628 | bool is_quad, u32 quad) | ||
5629 | { | ||
5630 | u32 i; | ||
5631 | u32 priv_offset = 0; | ||
5632 | u32 *priv_registers; | ||
5633 | u32 num_registers = 0; | ||
5634 | int err = 0; | ||
5635 | u32 potential_offsets = proj_scal_litter_num_gpcs_v() * | ||
5636 | proj_scal_litter_num_tpc_per_gpc_v(); | ||
5637 | |||
5638 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | ||
5639 | |||
5640 | /* implementation is crossed-up if either of these happen */ | ||
5641 | if (max_offsets > potential_offsets) | ||
5642 | return -EINVAL; | ||
5643 | |||
5644 | if (!g->gr.ctx_vars.golden_image_initialized) | ||
5645 | return -ENODEV; | ||
5646 | |||
5647 | priv_registers = kzalloc(sizeof(u32) * potential_offsets, GFP_KERNEL); | ||
5648 | if (IS_ERR_OR_NULL(priv_registers)) { | ||
5649 | gk20a_dbg_fn("failed alloc for potential_offsets=%d", potential_offsets); | ||
5650 | err = PTR_ERR(priv_registers); | ||
5651 | goto cleanup; | ||
5652 | } | ||
5653 | memset(offsets, 0, sizeof(u32) * max_offsets); | ||
5654 | memset(offset_addrs, 0, sizeof(u32) * max_offsets); | ||
5655 | *num_offsets = 0; | ||
5656 | |||
5657 | gr_gk20a_create_priv_addr_table(g, addr, &priv_registers[0], &num_registers); | ||
5658 | |||
5659 | if ((max_offsets > 1) && (num_registers > max_offsets)) { | ||
5660 | err = -EINVAL; | ||
5661 | goto cleanup; | ||
5662 | } | ||
5663 | |||
5664 | if ((max_offsets == 1) && (num_registers > 1)) | ||
5665 | num_registers = 1; | ||
5666 | |||
5667 | if (!g->gr.ctx_vars.local_golden_image) { | ||
5668 | gk20a_dbg_fn("no context switch header info to work with"); | ||
5669 | err = -EINVAL; | ||
5670 | goto cleanup; | ||
5671 | } | ||
5672 | |||
5673 | for (i = 0; i < num_registers; i++) { | ||
5674 | err = gr_gk20a_find_priv_offset_in_buffer(g, | ||
5675 | priv_registers[i], | ||
5676 | is_quad, quad, | ||
5677 | g->gr.ctx_vars.local_golden_image, | ||
5678 | g->gr.ctx_vars.golden_image_size, | ||
5679 | &priv_offset); | ||
5680 | if (err) { | ||
5681 | gk20a_dbg_fn("Could not determine priv_offset for addr:0x%x", | ||
5682 | addr); /*, grPriRegStr(addr)));*/ | ||
5683 | goto cleanup; | ||
5684 | } | ||
5685 | |||
5686 | offsets[i] = priv_offset; | ||
5687 | offset_addrs[i] = priv_registers[i]; | ||
5688 | } | ||
5689 | |||
5690 | *num_offsets = num_registers; | ||
5691 | |||
5692 | cleanup: | ||
5693 | |||
5694 | if (!IS_ERR_OR_NULL(priv_registers)) | ||
5695 | kfree(priv_registers); | ||
5696 | |||
5697 | return err; | ||
5698 | } | ||
5699 | |||
5700 | /* Setup some register tables. This looks hacky; our | ||
5701 | * register/offset functions are just that, functions. | ||
5702 | * So they can't be used as initializers... TBD: fix to | ||
5703 | * generate consts at least on an as-needed basis. | ||
5704 | */ | ||
5705 | static const u32 _num_ovr_perf_regs = 17; | ||
5706 | static u32 _ovr_perf_regs[17] = { 0, }; | ||
5707 | /* Following are the blocks of registers that the ucode | ||
5708 | stores in the extended region.*/ | ||
5709 | /* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ | ||
5710 | static const u32 _num_sm_dsm_perf_regs = 5; | ||
5711 | /* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/ | ||
5712 | static const u32 _num_sm_dsm_perf_ctrl_regs = 4; | ||
5713 | static u32 _sm_dsm_perf_regs[5]; | ||
5714 | static u32 _sm_dsm_perf_ctrl_regs[4]; | ||
5715 | |||
5716 | static void init_sm_dsm_reg_info(void) | ||
5717 | { | ||
5718 | if (_ovr_perf_regs[0] != 0) | ||
5719 | return; | ||
5720 | |||
5721 | _ovr_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r(); | ||
5722 | _ovr_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r(); | ||
5723 | _ovr_perf_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r(); | ||
5724 | _ovr_perf_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(); | ||
5725 | _ovr_perf_regs[4] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status1_r(); | ||
5726 | _ovr_perf_regs[5] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_control_r(); | ||
5727 | _ovr_perf_regs[6] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_control_r(); | ||
5728 | _ovr_perf_regs[7] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_control_r(); | ||
5729 | _ovr_perf_regs[8] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_control_r(); | ||
5730 | _ovr_perf_regs[9] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_control_r(); | ||
5731 | _ovr_perf_regs[10] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_control_r(); | ||
5732 | _ovr_perf_regs[11] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_control_r(); | ||
5733 | _ovr_perf_regs[12] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_control_r(); | ||
5734 | _ovr_perf_regs[13] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_r(); | ||
5735 | _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(); | ||
5736 | _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(); | ||
5737 | _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); | ||
5738 | |||
5739 | |||
5740 | _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(); | ||
5741 | _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(); | ||
5742 | _sm_dsm_perf_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_r(); | ||
5743 | _sm_dsm_perf_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_r(); | ||
5744 | _sm_dsm_perf_regs[4] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_r(); | ||
5745 | |||
5746 | _sm_dsm_perf_ctrl_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control1_r(); | ||
5747 | _sm_dsm_perf_ctrl_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control2_r(); | ||
5748 | _sm_dsm_perf_ctrl_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control3_r(); | ||
5749 | _sm_dsm_perf_ctrl_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control4_r(); | ||
5750 | |||
5751 | } | ||
5752 | |||
5753 | /* TBD: would like to handle this elsewhere, at a higher level. | ||
5754 | * these are currently constructed in a "test-then-write" style | ||
5755 | * which makes it impossible to know externally whether a ctx | ||
5756 | * write will actually occur. so later we should put a lazy, | ||
5757 | * map-and-hold system in the patch write state */ | ||
5758 | int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | ||
5759 | struct channel_ctx_gk20a *ch_ctx, | ||
5760 | u32 addr, u32 data, | ||
5761 | u8 *context) | ||
5762 | { | ||
5763 | u32 num_gpc = g->gr.gpc_count; | ||
5764 | u32 num_tpc; | ||
5765 | u32 tpc, gpc, reg; | ||
5766 | u32 chk_addr; | ||
5767 | u32 vaddr_lo; | ||
5768 | u32 vaddr_hi; | ||
5769 | u32 tmp; | ||
5770 | |||
5771 | init_sm_dsm_reg_info(); | ||
5772 | |||
5773 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | ||
5774 | |||
5775 | for (reg = 0; reg < _num_ovr_perf_regs; reg++) { | ||
5776 | for (gpc = 0; gpc < num_gpc; gpc++) { | ||
5777 | num_tpc = g->gr.gpc_tpc_count[gpc]; | ||
5778 | for (tpc = 0; tpc < num_tpc; tpc++) { | ||
5779 | chk_addr = ((proj_gpc_stride_v() * gpc) + | ||
5780 | (proj_tpc_in_gpc_stride_v() * tpc) + | ||
5781 | _ovr_perf_regs[reg]); | ||
5782 | if (chk_addr != addr) | ||
5783 | continue; | ||
5784 | /* reset the patch count from previous | ||
5785 | runs,if ucode has already processed | ||
5786 | it */ | ||
5787 | tmp = gk20a_mem_rd32(context + | ||
5788 | ctxsw_prog_main_image_patch_count_o(), 0); | ||
5789 | |||
5790 | if (!tmp) | ||
5791 | ch_ctx->patch_ctx.data_count = 0; | ||
5792 | |||
5793 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
5794 | addr, data, true); | ||
5795 | |||
5796 | vaddr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va); | ||
5797 | vaddr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va); | ||
5798 | |||
5799 | gk20a_mem_wr32(context + | ||
5800 | ctxsw_prog_main_image_patch_count_o(), | ||
5801 | 0, ch_ctx->patch_ctx.data_count); | ||
5802 | gk20a_mem_wr32(context + | ||
5803 | ctxsw_prog_main_image_patch_adr_lo_o(), | ||
5804 | 0, vaddr_lo); | ||
5805 | gk20a_mem_wr32(context + | ||
5806 | ctxsw_prog_main_image_patch_adr_hi_o(), | ||
5807 | 0, vaddr_hi); | ||
5808 | |||
5809 | /* we're not caching these on cpu side, | ||
5810 | but later watch for it */ | ||
5811 | |||
5812 | /* the l2 invalidate in the patch_write | ||
5813 | * would be too early for this? */ | ||
5814 | gk20a_mm_l2_invalidate(g); | ||
5815 | return 0; | ||
5816 | } | ||
5817 | } | ||
5818 | } | ||
5819 | |||
5820 | return 0; | ||
5821 | } | ||
5822 | |||
5823 | static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset) | ||
5824 | { | ||
5825 | u32 reg; | ||
5826 | u32 quad_ctrl; | ||
5827 | u32 half_ctrl; | ||
5828 | u32 tpc, gpc; | ||
5829 | u32 gpc_tpc_addr; | ||
5830 | u32 gpc_tpc_stride; | ||
5831 | |||
5832 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset); | ||
5833 | |||
5834 | gpc = pri_get_gpc_num(offset); | ||
5835 | gpc_tpc_addr = pri_gpccs_addr_mask(offset); | ||
5836 | tpc = pri_get_tpc_num(gpc_tpc_addr); | ||
5837 | |||
5838 | quad_ctrl = quad & 0x1; /* first bit tells us quad */ | ||
5839 | half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */ | ||
5840 | |||
5841 | gpc_tpc_stride = gpc * proj_gpc_stride_v() + | ||
5842 | tpc * proj_tpc_in_gpc_stride_v(); | ||
5843 | gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride; | ||
5844 | |||
5845 | reg = gk20a_readl(g, gpc_tpc_addr); | ||
5846 | reg = set_field(reg, | ||
5847 | gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(), | ||
5848 | gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(quad_ctrl)); | ||
5849 | |||
5850 | gk20a_writel(g, gpc_tpc_addr, reg); | ||
5851 | |||
5852 | gpc_tpc_addr = gr_gpc0_tpc0_sm_debug_sfe_control_r() + gpc_tpc_stride; | ||
5853 | reg = gk20a_readl(g, gpc_tpc_addr); | ||
5854 | reg = set_field(reg, | ||
5855 | gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(), | ||
5856 | gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(half_ctrl)); | ||
5857 | gk20a_writel(g, gpc_tpc_addr, reg); | ||
5858 | } | ||
5859 | |||
5860 | #define ILLEGAL_ID (~0) | ||
5861 | |||
5862 | static inline bool check_main_image_header_magic(void *context) | ||
5863 | { | ||
5864 | u32 magic = gk20a_mem_rd32(context + | ||
5865 | ctxsw_prog_main_image_magic_value_o(), 0); | ||
5866 | gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic); | ||
5867 | return magic == ctxsw_prog_main_image_magic_value_v_value_v(); | ||
5868 | } | ||
5869 | static inline bool check_local_header_magic(void *context) | ||
5870 | { | ||
5871 | u32 magic = gk20a_mem_rd32(context + | ||
5872 | ctxsw_prog_local_magic_value_o(), 0); | ||
5873 | gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic); | ||
5874 | return magic == ctxsw_prog_local_magic_value_v_value_v(); | ||
5875 | |||
5876 | } | ||
5877 | |||
5878 | /* most likely dupe of ctxsw_gpccs_header__size_1_v() */ | ||
5879 | static inline int ctxsw_prog_ucode_header_size_in_bytes(void) | ||
5880 | { | ||
5881 | return 256; | ||
5882 | } | ||
5883 | |||
5884 | void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g, | ||
5885 | u32 *num_sm_dsm_perf_regs, | ||
5886 | u32 **sm_dsm_perf_regs, | ||
5887 | u32 *perf_register_stride) | ||
5888 | { | ||
5889 | *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs; | ||
5890 | *sm_dsm_perf_regs = _sm_dsm_perf_regs; | ||
5891 | *perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); | ||
5892 | } | ||
5893 | |||
5894 | void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
5895 | u32 *num_sm_dsm_perf_ctrl_regs, | ||
5896 | u32 **sm_dsm_perf_ctrl_regs, | ||
5897 | u32 *ctrl_register_stride) | ||
5898 | { | ||
5899 | *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs; | ||
5900 | *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs; | ||
5901 | *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); | ||
5902 | } | ||
5903 | |||
5904 | static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | ||
5905 | u32 addr, | ||
5906 | bool is_quad, u32 quad, | ||
5907 | u32 *context_buffer, | ||
5908 | u32 context_buffer_size, | ||
5909 | u32 *priv_offset) | ||
5910 | { | ||
5911 | u32 i, data32; | ||
5912 | u32 gpc_num, tpc_num; | ||
5913 | u32 num_gpcs, num_tpcs; | ||
5914 | u32 chk_addr; | ||
5915 | u32 ext_priv_offset, ext_priv_size; | ||
5916 | void *context; | ||
5917 | u32 offset_to_segment, offset_to_segment_end; | ||
5918 | u32 sm_dsm_perf_reg_id = ILLEGAL_ID; | ||
5919 | u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; | ||
5920 | u32 num_ext_gpccs_ext_buffer_segments; | ||
5921 | u32 inter_seg_offset; | ||
5922 | u32 tpc_gpc_mask = (proj_tpc_in_gpc_stride_v() - 1); | ||
5923 | u32 max_tpc_count; | ||
5924 | u32 *sm_dsm_perf_ctrl_regs = NULL; | ||
5925 | u32 num_sm_dsm_perf_ctrl_regs = 0; | ||
5926 | u32 *sm_dsm_perf_regs = NULL; | ||
5927 | u32 num_sm_dsm_perf_regs = 0; | ||
5928 | u32 buffer_segments_size = 0; | ||
5929 | u32 marker_size = 0; | ||
5930 | u32 control_register_stride = 0; | ||
5931 | u32 perf_register_stride = 0; | ||
5932 | |||
5933 | /* Only have TPC registers in extended region, so if not a TPC reg, | ||
5934 | then return error so caller can look elsewhere. */ | ||
5935 | if (pri_is_gpc_addr(addr)) { | ||
5936 | u32 gpc_addr = 0; | ||
5937 | gpc_num = pri_get_gpc_num(addr); | ||
5938 | gpc_addr = pri_gpccs_addr_mask(addr); | ||
5939 | if (pri_is_tpc_addr(gpc_addr)) | ||
5940 | tpc_num = pri_get_tpc_num(gpc_addr); | ||
5941 | else | ||
5942 | return -EINVAL; | ||
5943 | |||
5944 | gk20a_dbg_info(" gpc = %d tpc = %d", | ||
5945 | gpc_num, tpc_num); | ||
5946 | } else | ||
5947 | return -EINVAL; | ||
5948 | |||
5949 | buffer_segments_size = ctxsw_prog_extended_buffer_segments_size_in_bytes_v(); | ||
5950 | /* note below is in words/num_registers */ | ||
5951 | marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2; | ||
5952 | |||
5953 | context = context_buffer; | ||
5954 | /* sanity check main header */ | ||
5955 | if (!check_main_image_header_magic(context)) { | ||
5956 | gk20a_err(dev_from_gk20a(g), | ||
5957 | "Invalid main header: magic value"); | ||
5958 | return -EINVAL; | ||
5959 | } | ||
5960 | num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); | ||
5961 | if (gpc_num >= num_gpcs) { | ||
5962 | gk20a_err(dev_from_gk20a(g), | ||
5963 | "GPC 0x%08x is greater than total count 0x%08x!\n", | ||
5964 | gpc_num, num_gpcs); | ||
5965 | return -EINVAL; | ||
5966 | } | ||
5967 | |||
5968 | data32 = gk20a_mem_rd32(context + ctxsw_prog_main_extended_buffer_ctl_o(), 0); | ||
5969 | ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32); | ||
5970 | if (0 == ext_priv_size) { | ||
5971 | gk20a_dbg_info(" No extended memory in context buffer"); | ||
5972 | return -EINVAL; | ||
5973 | } | ||
5974 | ext_priv_offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data32); | ||
5975 | |||
5976 | offset_to_segment = ext_priv_offset * ctxsw_prog_ucode_header_size_in_bytes(); | ||
5977 | offset_to_segment_end = offset_to_segment + | ||
5978 | (ext_priv_size * buffer_segments_size); | ||
5979 | |||
5980 | /* check local header magic */ | ||
5981 | context += ctxsw_prog_ucode_header_size_in_bytes(); | ||
5982 | if (!check_local_header_magic(context)) { | ||
5983 | gk20a_err(dev_from_gk20a(g), | ||
5984 | "Invalid local header: magic value\n"); | ||
5985 | return -EINVAL; | ||
5986 | } | ||
5987 | |||
5988 | /* | ||
5989 | * See if the incoming register address is in the first table of | ||
5990 | * registers. We check this by decoding only the TPC addr portion. | ||
5991 | * If we get a hit on the TPC bit, we then double check the address | ||
5992 | * by computing it from the base gpc/tpc strides. Then make sure | ||
5993 | * it is a real match. | ||
5994 | */ | ||
5995 | g->ops.gr.get_sm_dsm_perf_regs(g, &num_sm_dsm_perf_regs, | ||
5996 | &sm_dsm_perf_regs, | ||
5997 | &perf_register_stride); | ||
5998 | |||
5999 | init_sm_dsm_reg_info(); | ||
6000 | |||
6001 | for (i = 0; i < num_sm_dsm_perf_regs; i++) { | ||
6002 | if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) { | ||
6003 | sm_dsm_perf_reg_id = i; | ||
6004 | |||
6005 | gk20a_dbg_info("register match: 0x%08x", | ||
6006 | sm_dsm_perf_regs[i]); | ||
6007 | |||
6008 | chk_addr = (proj_gpc_base_v() + | ||
6009 | (proj_gpc_stride_v() * gpc_num) + | ||
6010 | proj_tpc_in_gpc_base_v() + | ||
6011 | (proj_tpc_in_gpc_stride_v() * tpc_num) + | ||
6012 | (sm_dsm_perf_regs[sm_dsm_perf_reg_id] & tpc_gpc_mask)); | ||
6013 | |||
6014 | if (chk_addr != addr) { | ||
6015 | gk20a_err(dev_from_gk20a(g), | ||
6016 | "Oops addr miss-match! : 0x%08x != 0x%08x\n", | ||
6017 | addr, chk_addr); | ||
6018 | return -EINVAL; | ||
6019 | } | ||
6020 | break; | ||
6021 | } | ||
6022 | } | ||
6023 | |||
6024 | /* Didn't find reg in supported group 1. | ||
6025 | * so try the second group now */ | ||
6026 | g->ops.gr.get_sm_dsm_perf_ctrl_regs(g, &num_sm_dsm_perf_ctrl_regs, | ||
6027 | &sm_dsm_perf_ctrl_regs, | ||
6028 | &control_register_stride); | ||
6029 | |||
6030 | if (ILLEGAL_ID == sm_dsm_perf_reg_id) { | ||
6031 | for (i = 0; i < num_sm_dsm_perf_ctrl_regs; i++) { | ||
6032 | if ((addr & tpc_gpc_mask) == | ||
6033 | (sm_dsm_perf_ctrl_regs[i] & tpc_gpc_mask)) { | ||
6034 | sm_dsm_perf_ctrl_reg_id = i; | ||
6035 | |||
6036 | gk20a_dbg_info("register match: 0x%08x", | ||
6037 | sm_dsm_perf_ctrl_regs[i]); | ||
6038 | |||
6039 | chk_addr = (proj_gpc_base_v() + | ||
6040 | (proj_gpc_stride_v() * gpc_num) + | ||
6041 | proj_tpc_in_gpc_base_v() + | ||
6042 | (proj_tpc_in_gpc_stride_v() * tpc_num) + | ||
6043 | (sm_dsm_perf_ctrl_regs[sm_dsm_perf_ctrl_reg_id] & | ||
6044 | tpc_gpc_mask)); | ||
6045 | |||
6046 | if (chk_addr != addr) { | ||
6047 | gk20a_err(dev_from_gk20a(g), | ||
6048 | "Oops addr miss-match! : 0x%08x != 0x%08x\n", | ||
6049 | addr, chk_addr); | ||
6050 | return -EINVAL; | ||
6051 | |||
6052 | } | ||
6053 | |||
6054 | break; | ||
6055 | } | ||
6056 | } | ||
6057 | } | ||
6058 | |||
6059 | if ((ILLEGAL_ID == sm_dsm_perf_ctrl_reg_id) && | ||
6060 | (ILLEGAL_ID == sm_dsm_perf_reg_id)) | ||
6061 | return -EINVAL; | ||
6062 | |||
6063 | /* Skip the FECS extended header, nothing there for us now. */ | ||
6064 | offset_to_segment += buffer_segments_size; | ||
6065 | |||
6066 | /* skip through the GPCCS extended headers until we get to the data for | ||
6067 | * our GPC. The size of each gpc extended segment is enough to hold the | ||
6068 | * max tpc count for the gpcs,in 256b chunks. | ||
6069 | */ | ||
6070 | |||
6071 | max_tpc_count = proj_scal_litter_num_tpc_per_gpc_v(); | ||
6072 | |||
6073 | num_ext_gpccs_ext_buffer_segments = (u32)((max_tpc_count + 1) / 2); | ||
6074 | |||
6075 | offset_to_segment += (num_ext_gpccs_ext_buffer_segments * | ||
6076 | buffer_segments_size * gpc_num); | ||
6077 | |||
6078 | num_tpcs = g->gr.gpc_tpc_count[gpc_num]; | ||
6079 | |||
6080 | /* skip the head marker to start with */ | ||
6081 | inter_seg_offset = marker_size; | ||
6082 | |||
6083 | if (ILLEGAL_ID != sm_dsm_perf_ctrl_reg_id) { | ||
6084 | /* skip over control regs of TPC's before the one we want. | ||
6085 | * then skip to the register in this tpc */ | ||
6086 | inter_seg_offset = inter_seg_offset + | ||
6087 | (tpc_num * control_register_stride) + | ||
6088 | sm_dsm_perf_ctrl_reg_id; | ||
6089 | } else { | ||
6090 | /* skip all the control registers */ | ||
6091 | inter_seg_offset = inter_seg_offset + | ||
6092 | (num_tpcs * control_register_stride); | ||
6093 | |||
6094 | /* skip the marker between control and counter segments */ | ||
6095 | inter_seg_offset += marker_size; | ||
6096 | |||
6097 | /* skip over counter regs of TPCs before the one we want */ | ||
6098 | inter_seg_offset = inter_seg_offset + | ||
6099 | (tpc_num * perf_register_stride) * | ||
6100 | ctxsw_prog_extended_num_smpc_quadrants_v(); | ||
6101 | |||
6102 | /* skip over the register for the quadrants we do not want. | ||
6103 | * then skip to the register in this tpc */ | ||
6104 | inter_seg_offset = inter_seg_offset + | ||
6105 | (perf_register_stride * quad) + | ||
6106 | sm_dsm_perf_reg_id; | ||
6107 | } | ||
6108 | |||
6109 | /* set the offset to the segment offset plus the inter segment offset to | ||
6110 | * our register */ | ||
6111 | offset_to_segment += (inter_seg_offset * 4); | ||
6112 | |||
6113 | /* last sanity check: did we somehow compute an offset outside the | ||
6114 | * extended buffer? */ | ||
6115 | if (offset_to_segment > offset_to_segment_end) { | ||
6116 | gk20a_err(dev_from_gk20a(g), | ||
6117 | "Overflow ctxsw buffer! 0x%08x > 0x%08x\n", | ||
6118 | offset_to_segment, offset_to_segment_end); | ||
6119 | return -EINVAL; | ||
6120 | } | ||
6121 | |||
6122 | *priv_offset = offset_to_segment; | ||
6123 | |||
6124 | return 0; | ||
6125 | } | ||
6126 | |||
6127 | |||
6128 | static int | ||
6129 | gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, | ||
6130 | int addr_type,/* enum ctxsw_addr_type */ | ||
6131 | u32 pri_addr, | ||
6132 | u32 gpc_num, u32 num_tpcs, | ||
6133 | u32 num_ppcs, u32 ppc_mask, | ||
6134 | u32 *priv_offset) | ||
6135 | { | ||
6136 | u32 i; | ||
6137 | u32 address, base_address; | ||
6138 | u32 sys_offset, gpc_offset, tpc_offset, ppc_offset; | ||
6139 | u32 ppc_num, tpc_num, tpc_addr, gpc_addr, ppc_addr; | ||
6140 | struct aiv_gk20a *reg; | ||
6141 | |||
6142 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr); | ||
6143 | |||
6144 | if (!g->gr.ctx_vars.valid) | ||
6145 | return -EINVAL; | ||
6146 | |||
6147 | /* Process the SYS/BE segment. */ | ||
6148 | if ((addr_type == CTXSW_ADDR_TYPE_SYS) || | ||
6149 | (addr_type == CTXSW_ADDR_TYPE_BE)) { | ||
6150 | for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) { | ||
6151 | reg = &g->gr.ctx_vars.ctxsw_regs.sys.l[i]; | ||
6152 | address = reg->addr; | ||
6153 | sys_offset = reg->index; | ||
6154 | |||
6155 | if (pri_addr == address) { | ||
6156 | *priv_offset = sys_offset; | ||
6157 | return 0; | ||
6158 | } | ||
6159 | } | ||
6160 | } | ||
6161 | |||
6162 | /* Process the TPC segment. */ | ||
6163 | if (addr_type == CTXSW_ADDR_TYPE_TPC) { | ||
6164 | for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) { | ||
6165 | for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) { | ||
6166 | reg = &g->gr.ctx_vars.ctxsw_regs.tpc.l[i]; | ||
6167 | address = reg->addr; | ||
6168 | tpc_addr = pri_tpccs_addr_mask(address); | ||
6169 | base_address = proj_gpc_base_v() + | ||
6170 | (gpc_num * proj_gpc_stride_v()) + | ||
6171 | proj_tpc_in_gpc_base_v() + | ||
6172 | (tpc_num * proj_tpc_in_gpc_stride_v()); | ||
6173 | address = base_address + tpc_addr; | ||
6174 | /* | ||
6175 | * The data for the TPCs is interleaved in the context buffer. | ||
6176 | * Example with num_tpcs = 2 | ||
6177 | * 0 1 2 3 4 5 6 7 8 9 10 11 ... | ||
6178 | * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ... | ||
6179 | */ | ||
6180 | tpc_offset = (reg->index * num_tpcs) + (tpc_num * 4); | ||
6181 | |||
6182 | if (pri_addr == address) { | ||
6183 | *priv_offset = tpc_offset; | ||
6184 | return 0; | ||
6185 | } | ||
6186 | } | ||
6187 | } | ||
6188 | } | ||
6189 | |||
6190 | /* Process the PPC segment. */ | ||
6191 | if (addr_type == CTXSW_ADDR_TYPE_PPC) { | ||
6192 | for (ppc_num = 0; ppc_num < num_ppcs; ppc_num++) { | ||
6193 | for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) { | ||
6194 | reg = &g->gr.ctx_vars.ctxsw_regs.ppc.l[i]; | ||
6195 | address = reg->addr; | ||
6196 | ppc_addr = pri_ppccs_addr_mask(address); | ||
6197 | base_address = proj_gpc_base_v() + | ||
6198 | (gpc_num * proj_gpc_stride_v()) + | ||
6199 | proj_ppc_in_gpc_base_v() + | ||
6200 | (ppc_num * proj_ppc_in_gpc_stride_v()); | ||
6201 | address = base_address + ppc_addr; | ||
6202 | /* | ||
6203 | * The data for the PPCs is interleaved in the context buffer. | ||
6204 | * Example with numPpcs = 2 | ||
6205 | * 0 1 2 3 4 5 6 7 8 9 10 11 ... | ||
6206 | * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ... | ||
6207 | */ | ||
6208 | ppc_offset = (reg->index * num_ppcs) + (ppc_num * 4); | ||
6209 | |||
6210 | if (pri_addr == address) { | ||
6211 | *priv_offset = ppc_offset; | ||
6212 | return 0; | ||
6213 | } | ||
6214 | } | ||
6215 | } | ||
6216 | } | ||
6217 | |||
6218 | |||
6219 | /* Process the GPC segment. */ | ||
6220 | if (addr_type == CTXSW_ADDR_TYPE_GPC) { | ||
6221 | for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) { | ||
6222 | reg = &g->gr.ctx_vars.ctxsw_regs.gpc.l[i]; | ||
6223 | |||
6224 | address = reg->addr; | ||
6225 | gpc_addr = pri_gpccs_addr_mask(address); | ||
6226 | gpc_offset = reg->index; | ||
6227 | |||
6228 | base_address = proj_gpc_base_v() + | ||
6229 | (gpc_num * proj_gpc_stride_v()); | ||
6230 | address = base_address + gpc_addr; | ||
6231 | |||
6232 | if (pri_addr == address) { | ||
6233 | *priv_offset = gpc_offset; | ||
6234 | return 0; | ||
6235 | } | ||
6236 | } | ||
6237 | } | ||
6238 | |||
6239 | return -EINVAL; | ||
6240 | } | ||
6241 | |||
6242 | static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, | ||
6243 | void *context, | ||
6244 | u32 *num_ppcs, u32 *ppc_mask, | ||
6245 | u32 *reg_ppc_count) | ||
6246 | { | ||
6247 | u32 data32; | ||
6248 | u32 litter_num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); | ||
6249 | |||
6250 | /* | ||
6251 | * if there is only 1 PES_PER_GPC, then we put the PES registers | ||
6252 | * in the GPC reglist, so we can't error out if ppc.count == 0 | ||
6253 | */ | ||
6254 | if ((!g->gr.ctx_vars.valid) || | ||
6255 | ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) && | ||
6256 | (litter_num_pes_per_gpc > 1))) | ||
6257 | return -EINVAL; | ||
6258 | |||
6259 | data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0); | ||
6260 | |||
6261 | *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32); | ||
6262 | *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32); | ||
6263 | |||
6264 | *reg_ppc_count = g->gr.ctx_vars.ctxsw_regs.ppc.count; | ||
6265 | |||
6266 | return 0; | ||
6267 | } | ||
6268 | |||
6269 | |||
6270 | |||
6271 | /* | ||
6272 | * This function will return the 32 bit offset for a priv register if it is | ||
6273 | * present in the context buffer. | ||
6274 | */ | ||
6275 | static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | ||
6276 | u32 addr, | ||
6277 | bool is_quad, u32 quad, | ||
6278 | u32 *context_buffer, | ||
6279 | u32 context_buffer_size, | ||
6280 | u32 *priv_offset) | ||
6281 | { | ||
6282 | struct gr_gk20a *gr = &g->gr; | ||
6283 | u32 i, data32; | ||
6284 | int err; | ||
6285 | int addr_type; /*enum ctxsw_addr_type */ | ||
6286 | u32 broadcast_flags; | ||
6287 | u32 gpc_num, tpc_num, ppc_num, be_num; | ||
6288 | u32 num_gpcs, num_tpcs, num_ppcs; | ||
6289 | u32 offset; | ||
6290 | u32 sys_priv_offset, gpc_priv_offset; | ||
6291 | u32 ppc_mask, reg_list_ppc_count; | ||
6292 | void *context; | ||
6293 | u32 offset_to_segment; | ||
6294 | |||
6295 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | ||
6296 | |||
6297 | err = gr_gk20a_decode_priv_addr(g, addr, &addr_type, | ||
6298 | &gpc_num, &tpc_num, &ppc_num, &be_num, | ||
6299 | &broadcast_flags); | ||
6300 | if (err) | ||
6301 | return err; | ||
6302 | |||
6303 | context = context_buffer; | ||
6304 | if (!check_main_image_header_magic(context)) { | ||
6305 | gk20a_err(dev_from_gk20a(g), | ||
6306 | "Invalid main header: magic value"); | ||
6307 | return -EINVAL; | ||
6308 | } | ||
6309 | num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); | ||
6310 | |||
6311 | /* Parse the FECS local header. */ | ||
6312 | context += ctxsw_prog_ucode_header_size_in_bytes(); | ||
6313 | if (!check_local_header_magic(context)) { | ||
6314 | gk20a_err(dev_from_gk20a(g), | ||
6315 | "Invalid FECS local header: magic value\n"); | ||
6316 | return -EINVAL; | ||
6317 | } | ||
6318 | data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); | ||
6319 | sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); | ||
6320 | |||
6321 | /* If found in Ext buffer, ok. | ||
6322 | * If it failed and we expected to find it there (quad offset) | ||
6323 | * then return the error. Otherwise continue on. | ||
6324 | */ | ||
6325 | err = gr_gk20a_find_priv_offset_in_ext_buffer(g, | ||
6326 | addr, is_quad, quad, context_buffer, | ||
6327 | context_buffer_size, priv_offset); | ||
6328 | if (!err || (err && is_quad)) | ||
6329 | return err; | ||
6330 | |||
6331 | if ((addr_type == CTXSW_ADDR_TYPE_SYS) || | ||
6332 | (addr_type == CTXSW_ADDR_TYPE_BE)) { | ||
6333 | /* Find the offset in the FECS segment. */ | ||
6334 | offset_to_segment = sys_priv_offset * | ||
6335 | ctxsw_prog_ucode_header_size_in_bytes(); | ||
6336 | |||
6337 | err = gr_gk20a_process_context_buffer_priv_segment(g, | ||
6338 | addr_type, addr, | ||
6339 | 0, 0, 0, 0, | ||
6340 | &offset); | ||
6341 | if (err) | ||
6342 | return err; | ||
6343 | |||
6344 | *priv_offset = (offset_to_segment + offset); | ||
6345 | return 0; | ||
6346 | } | ||
6347 | |||
6348 | if ((gpc_num + 1) > num_gpcs) { | ||
6349 | gk20a_err(dev_from_gk20a(g), | ||
6350 | "GPC %d not in this context buffer.\n", | ||
6351 | gpc_num); | ||
6352 | return -EINVAL; | ||
6353 | } | ||
6354 | |||
6355 | /* Parse the GPCCS local header(s).*/ | ||
6356 | for (i = 0; i < num_gpcs; i++) { | ||
6357 | context += ctxsw_prog_ucode_header_size_in_bytes(); | ||
6358 | if (!check_local_header_magic(context)) { | ||
6359 | gk20a_err(dev_from_gk20a(g), | ||
6360 | "Invalid GPCCS local header: magic value\n"); | ||
6361 | return -EINVAL; | ||
6362 | |||
6363 | } | ||
6364 | data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); | ||
6365 | gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); | ||
6366 | |||
6367 | err = gr_gk20a_determine_ppc_configuration(g, context, | ||
6368 | &num_ppcs, &ppc_mask, | ||
6369 | ®_list_ppc_count); | ||
6370 | if (err) | ||
6371 | return err; | ||
6372 | |||
6373 | num_tpcs = gk20a_mem_rd32(context + ctxsw_prog_local_image_num_tpcs_o(), 0); | ||
6374 | |||
6375 | if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) { | ||
6376 | gk20a_err(dev_from_gk20a(g), | ||
6377 | "GPC %d TPC %d not in this context buffer.\n", | ||
6378 | gpc_num, tpc_num); | ||
6379 | return -EINVAL; | ||
6380 | } | ||
6381 | |||
6382 | /* Find the offset in the GPCCS segment.*/ | ||
6383 | if (i == gpc_num) { | ||
6384 | offset_to_segment = gpc_priv_offset * | ||
6385 | ctxsw_prog_ucode_header_size_in_bytes(); | ||
6386 | |||
6387 | if (addr_type == CTXSW_ADDR_TYPE_TPC) { | ||
6388 | /*reg = gr->ctx_vars.ctxsw_regs.tpc.l;*/ | ||
6389 | } else if (addr_type == CTXSW_ADDR_TYPE_PPC) { | ||
6390 | /* The ucode stores TPC data before PPC data. | ||
6391 | * Advance offset past TPC data to PPC data. */ | ||
6392 | offset_to_segment += | ||
6393 | ((gr->ctx_vars.ctxsw_regs.tpc.count * | ||
6394 | num_tpcs) << 2); | ||
6395 | } else if (addr_type == CTXSW_ADDR_TYPE_GPC) { | ||
6396 | /* The ucode stores TPC/PPC data before GPC data. | ||
6397 | * Advance offset past TPC/PPC data to GPC data. */ | ||
6398 | /* note 1 PES_PER_GPC case */ | ||
6399 | u32 litter_num_pes_per_gpc = | ||
6400 | proj_scal_litter_num_pes_per_gpc_v(); | ||
6401 | if (litter_num_pes_per_gpc > 1) { | ||
6402 | offset_to_segment += | ||
6403 | (((gr->ctx_vars.ctxsw_regs.tpc.count * | ||
6404 | num_tpcs) << 2) + | ||
6405 | ((reg_list_ppc_count * num_ppcs) << 2)); | ||
6406 | } else { | ||
6407 | offset_to_segment += | ||
6408 | ((gr->ctx_vars.ctxsw_regs.tpc.count * | ||
6409 | num_tpcs) << 2); | ||
6410 | } | ||
6411 | } else { | ||
6412 | gk20a_err(dev_from_gk20a(g), | ||
6413 | " Unknown address type.\n"); | ||
6414 | return -EINVAL; | ||
6415 | } | ||
6416 | err = gr_gk20a_process_context_buffer_priv_segment(g, | ||
6417 | addr_type, addr, | ||
6418 | i, num_tpcs, | ||
6419 | num_ppcs, ppc_mask, | ||
6420 | &offset); | ||
6421 | if (err) | ||
6422 | return -EINVAL; | ||
6423 | |||
6424 | *priv_offset = offset_to_segment + offset; | ||
6425 | return 0; | ||
6426 | } | ||
6427 | } | ||
6428 | |||
6429 | return -EINVAL; | ||
6430 | } | ||
6431 | |||
6432 | |||
6433 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | ||
6434 | struct nvhost_dbg_gpu_reg_op *ctx_ops, u32 num_ops, | ||
6435 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops) | ||
6436 | { | ||
6437 | struct gk20a *g = ch->g; | ||
6438 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | ||
6439 | void *ctx_ptr = NULL; | ||
6440 | int curr_gr_chid, curr_gr_ctx; | ||
6441 | bool ch_is_curr_ctx, restart_gr_ctxsw = false; | ||
6442 | u32 i, j, offset, v; | ||
6443 | u32 max_offsets = proj_scal_litter_num_gpcs_v() * | ||
6444 | proj_scal_litter_num_tpc_per_gpc_v(); | ||
6445 | u32 *offsets = NULL; | ||
6446 | u32 *offset_addrs = NULL; | ||
6447 | u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops}; | ||
6448 | int err, pass; | ||
6449 | |||
6450 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", | ||
6451 | num_ctx_wr_ops, num_ctx_rd_ops); | ||
6452 | |||
6453 | /* disable channel switching. | ||
6454 | * at that point the hardware state can be inspected to | ||
6455 | * determine if the context we're interested in is current. | ||
6456 | */ | ||
6457 | err = gr_gk20a_disable_ctxsw(g); | ||
6458 | if (err) { | ||
6459 | gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw"); | ||
6460 | /* this should probably be ctx-fatal... */ | ||
6461 | goto cleanup; | ||
6462 | } | ||
6463 | |||
6464 | restart_gr_ctxsw = true; | ||
6465 | |||
6466 | curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); | ||
6467 | curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx); | ||
6468 | ch_is_curr_ctx = (curr_gr_chid != -1) && (ch->hw_chid == curr_gr_chid); | ||
6469 | |||
6470 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx); | ||
6471 | if (ch_is_curr_ctx) { | ||
6472 | for (pass = 0; pass < 2; pass++) { | ||
6473 | ctx_op_nr = 0; | ||
6474 | for (i = 0; (ctx_op_nr < num_ctx_ops[pass]) && (i < num_ops); ++i) { | ||
6475 | /* only do ctx ops and only on the right pass */ | ||
6476 | if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) || | ||
6477 | (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) || | ||
6478 | ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) | ||
6479 | continue; | ||
6480 | |||
6481 | /* if this is a quad access, setup for special access*/ | ||
6482 | if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) | ||
6483 | && g->ops.gr.access_smpc_reg) | ||
6484 | g->ops.gr.access_smpc_reg(g, | ||
6485 | ctx_ops[i].quad, | ||
6486 | ctx_ops[i].offset); | ||
6487 | offset = ctx_ops[i].offset; | ||
6488 | |||
6489 | if (pass == 0) { /* write pass */ | ||
6490 | v = gk20a_readl(g, offset); | ||
6491 | v &= ~ctx_ops[i].and_n_mask_lo; | ||
6492 | v |= ctx_ops[i].value_lo; | ||
6493 | gk20a_writel(g, offset, v); | ||
6494 | |||
6495 | gk20a_dbg(gpu_dbg_gpu_dbg, | ||
6496 | "direct wr: offset=0x%x v=0x%x", | ||
6497 | offset, v); | ||
6498 | |||
6499 | if (ctx_ops[i].op == REGOP(WRITE_64)) { | ||
6500 | v = gk20a_readl(g, offset + 4); | ||
6501 | v &= ~ctx_ops[i].and_n_mask_hi; | ||
6502 | v |= ctx_ops[i].value_hi; | ||
6503 | gk20a_writel(g, offset + 4, v); | ||
6504 | |||
6505 | gk20a_dbg(gpu_dbg_gpu_dbg, | ||
6506 | "direct wr: offset=0x%x v=0x%x", | ||
6507 | offset + 4, v); | ||
6508 | } | ||
6509 | |||
6510 | } else { /* read pass */ | ||
6511 | ctx_ops[i].value_lo = | ||
6512 | gk20a_readl(g, offset); | ||
6513 | |||
6514 | gk20a_dbg(gpu_dbg_gpu_dbg, | ||
6515 | "direct rd: offset=0x%x v=0x%x", | ||
6516 | offset, ctx_ops[i].value_lo); | ||
6517 | |||
6518 | if (ctx_ops[i].op == REGOP(READ_64)) { | ||
6519 | ctx_ops[i].value_hi = | ||
6520 | gk20a_readl(g, offset + 4); | ||
6521 | |||
6522 | gk20a_dbg(gpu_dbg_gpu_dbg, | ||
6523 | "direct rd: offset=0x%x v=0x%x", | ||
6524 | offset, ctx_ops[i].value_lo); | ||
6525 | } else | ||
6526 | ctx_ops[i].value_hi = 0; | ||
6527 | } | ||
6528 | ctx_op_nr++; | ||
6529 | } | ||
6530 | } | ||
6531 | goto cleanup; | ||
6532 | } | ||
6533 | |||
6534 | /* they're the same size, so just use one alloc for both */ | ||
6535 | offsets = kzalloc(2 * sizeof(u32) * max_offsets, GFP_KERNEL); | ||
6536 | if (!offsets) { | ||
6537 | err = -ENOMEM; | ||
6538 | goto cleanup; | ||
6539 | } | ||
6540 | offset_addrs = offsets + max_offsets; | ||
6541 | |||
6542 | /* would have been a variant of gr_gk20a_apply_instmem_overrides */ | ||
6543 | /* recoded in-place instead.*/ | ||
6544 | ctx_ptr = vmap(ch_ctx->gr_ctx.pages, | ||
6545 | PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, | ||
6546 | 0, pgprot_dmacoherent(PAGE_KERNEL)); | ||
6547 | if (!ctx_ptr) { | ||
6548 | err = -ENOMEM; | ||
6549 | goto cleanup; | ||
6550 | } | ||
6551 | |||
6552 | /* Channel gr_ctx buffer is gpu cacheable; so flush and invalidate. | ||
6553 | * There should be no on-going/in-flight references by the gpu now. */ | ||
6554 | gk20a_mm_fb_flush(g); | ||
6555 | gk20a_mm_l2_flush(g, true); | ||
6556 | |||
6557 | /* write to appropriate place in context image, | ||
6558 | * first have to figure out where that really is */ | ||
6559 | |||
6560 | /* first pass is writes, second reads */ | ||
6561 | for (pass = 0; pass < 2; pass++) { | ||
6562 | ctx_op_nr = 0; | ||
6563 | for (i = 0; (ctx_op_nr < num_ctx_ops[pass]) && (i < num_ops); ++i) { | ||
6564 | u32 num_offsets; | ||
6565 | |||
6566 | /* only do ctx ops and only on the right pass */ | ||
6567 | if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) || | ||
6568 | (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) || | ||
6569 | ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) | ||
6570 | continue; | ||
6571 | |||
6572 | err = gr_gk20a_get_ctx_buffer_offsets(g, | ||
6573 | ctx_ops[i].offset, | ||
6574 | max_offsets, | ||
6575 | offsets, offset_addrs, | ||
6576 | &num_offsets, | ||
6577 | ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), | ||
6578 | ctx_ops[i].quad); | ||
6579 | if (err) { | ||
6580 | gk20a_dbg(gpu_dbg_gpu_dbg, | ||
6581 | "ctx op invalid offset: offset=0x%x", | ||
6582 | ctx_ops[i].offset); | ||
6583 | ctx_ops[i].status = | ||
6584 | NVHOST_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; | ||
6585 | continue; | ||
6586 | } | ||
6587 | |||
6588 | /* if this is a quad access, setup for special access*/ | ||
6589 | if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) && | ||
6590 | g->ops.gr.access_smpc_reg) | ||
6591 | g->ops.gr.access_smpc_reg(g, ctx_ops[i].quad, | ||
6592 | ctx_ops[i].offset); | ||
6593 | |||
6594 | for (j = 0; j < num_offsets; j++) { | ||
6595 | /* sanity check, don't write outside, worst case */ | ||
6596 | if (offsets[j] >= g->gr.ctx_vars.golden_image_size) | ||
6597 | continue; | ||
6598 | if (pass == 0) { /* write pass */ | ||
6599 | v = gk20a_mem_rd32(ctx_ptr + offsets[j], 0); | ||
6600 | v &= ~ctx_ops[i].and_n_mask_lo; | ||
6601 | v |= ctx_ops[i].value_lo; | ||
6602 | gk20a_mem_wr32(ctx_ptr + offsets[j], 0, v); | ||
6603 | |||
6604 | gk20a_dbg(gpu_dbg_gpu_dbg, | ||
6605 | "context wr: offset=0x%x v=0x%x", | ||
6606 | offsets[j], v); | ||
6607 | |||
6608 | if (ctx_ops[i].op == REGOP(WRITE_64)) { | ||
6609 | v = gk20a_mem_rd32(ctx_ptr + offsets[j] + 4, 0); | ||
6610 | v &= ~ctx_ops[i].and_n_mask_hi; | ||
6611 | v |= ctx_ops[i].value_hi; | ||
6612 | gk20a_mem_wr32(ctx_ptr + offsets[j] + 4, 0, v); | ||
6613 | |||
6614 | gk20a_dbg(gpu_dbg_gpu_dbg, | ||
6615 | "context wr: offset=0x%x v=0x%x", | ||
6616 | offsets[j] + 4, v); | ||
6617 | } | ||
6618 | |||
6619 | /* check to see if we need to add a special WAR | ||
6620 | for some of the SMPC perf regs */ | ||
6621 | gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], | ||
6622 | v, ctx_ptr); | ||
6623 | |||
6624 | } else { /* read pass */ | ||
6625 | ctx_ops[i].value_lo = | ||
6626 | gk20a_mem_rd32(ctx_ptr + offsets[0], 0); | ||
6627 | |||
6628 | gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", | ||
6629 | offsets[0], ctx_ops[i].value_lo); | ||
6630 | |||
6631 | if (ctx_ops[i].op == REGOP(READ_64)) { | ||
6632 | ctx_ops[i].value_hi = | ||
6633 | gk20a_mem_rd32(ctx_ptr + offsets[0] + 4, 0); | ||
6634 | |||
6635 | gk20a_dbg(gpu_dbg_gpu_dbg, | ||
6636 | "context rd: offset=0x%x v=0x%x", | ||
6637 | offsets[0] + 4, ctx_ops[i].value_hi); | ||
6638 | } else | ||
6639 | ctx_ops[i].value_hi = 0; | ||
6640 | } | ||
6641 | } | ||
6642 | ctx_op_nr++; | ||
6643 | } | ||
6644 | } | ||
6645 | #if 0 | ||
6646 | /* flush cpu caches for the ctx buffer? only if cpu cached, of course. | ||
6647 | * they aren't, yet */ | ||
6648 | if (cached) { | ||
6649 | FLUSH_CPU_DCACHE(ctx_ptr, | ||
6650 | sg_phys(ch_ctx->gr_ctx.mem.ref), size); | ||
6651 | } | ||
6652 | #endif | ||
6653 | |||
6654 | cleanup: | ||
6655 | if (offsets) | ||
6656 | kfree(offsets); | ||
6657 | |||
6658 | if (ctx_ptr) | ||
6659 | vunmap(ctx_ptr); | ||
6660 | |||
6661 | if (restart_gr_ctxsw) { | ||
6662 | int tmp_err = gr_gk20a_enable_ctxsw(g); | ||
6663 | if (tmp_err) { | ||
6664 | gk20a_err(dev_from_gk20a(g), "unable to restart ctxsw!\n"); | ||
6665 | err = tmp_err; | ||
6666 | } | ||
6667 | } | ||
6668 | |||
6669 | return err; | ||
6670 | } | ||
6671 | |||
6672 | static void gr_gk20a_cb_size_default(struct gk20a *g) | ||
6673 | { | ||
6674 | struct gr_gk20a *gr = &g->gr; | ||
6675 | |||
6676 | gr->attrib_cb_default_size = | ||
6677 | gr_gpc0_ppc0_cbm_cfg_size_default_v(); | ||
6678 | gr->alpha_cb_default_size = | ||
6679 | gr_gpc0_ppc0_cbm_cfg2_size_default_v(); | ||
6680 | } | ||
6681 | |||
6682 | static int gr_gk20a_calc_global_ctx_buffer_size(struct gk20a *g) | ||
6683 | { | ||
6684 | struct gr_gk20a *gr = &g->gr; | ||
6685 | int size; | ||
6686 | |||
6687 | gr->attrib_cb_size = gr->attrib_cb_default_size; | ||
6688 | gr->alpha_cb_size = gr->alpha_cb_default_size | ||
6689 | + (gr->alpha_cb_default_size >> 1); | ||
6690 | |||
6691 | size = gr->attrib_cb_size * | ||
6692 | gr_gpc0_ppc0_cbm_cfg_size_granularity_v() * | ||
6693 | gr->max_tpc_count; | ||
6694 | |||
6695 | size += gr->alpha_cb_size * | ||
6696 | gr_gpc0_ppc0_cbm_cfg2_size_granularity_v() * | ||
6697 | gr->max_tpc_count; | ||
6698 | |||
6699 | return size; | ||
6700 | } | ||
6701 | |||
6702 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, | ||
6703 | struct channel_ctx_gk20a *ch_ctx, | ||
6704 | u64 addr, u32 size, bool patch) | ||
6705 | { | ||
6706 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), | ||
6707 | gr_scc_pagepool_base_addr_39_8_f(addr), patch); | ||
6708 | |||
6709 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), | ||
6710 | gr_scc_pagepool_total_pages_f(size) | | ||
6711 | gr_scc_pagepool_valid_true_f(), patch); | ||
6712 | |||
6713 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), | ||
6714 | gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); | ||
6715 | |||
6716 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), | ||
6717 | gr_gpcs_gcc_pagepool_total_pages_f(size), patch); | ||
6718 | |||
6719 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_pagepool_r(), | ||
6720 | gr_pd_pagepool_total_pages_f(size) | | ||
6721 | gr_pd_pagepool_valid_true_f(), patch); | ||
6722 | } | ||
6723 | |||
6724 | void gk20a_init_gr(struct gpu_ops *gops) | ||
6725 | { | ||
6726 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; | ||
6727 | gops->gr.bundle_cb_defaults = gr_gk20a_bundle_cb_defaults; | ||
6728 | gops->gr.cb_size_default = gr_gk20a_cb_size_default; | ||
6729 | gops->gr.calc_global_ctx_buffer_size = | ||
6730 | gr_gk20a_calc_global_ctx_buffer_size; | ||
6731 | gops->gr.commit_global_attrib_cb = gr_gk20a_commit_global_attrib_cb; | ||
6732 | gops->gr.commit_global_bundle_cb = gr_gk20a_commit_global_bundle_cb; | ||
6733 | gops->gr.commit_global_cb_manager = gr_gk20a_commit_global_cb_manager; | ||
6734 | gops->gr.commit_global_pagepool = gr_gk20a_commit_global_pagepool; | ||
6735 | gops->gr.handle_sw_method = gr_gk20a_handle_sw_method; | ||
6736 | gops->gr.set_alpha_circular_buffer_size = | ||
6737 | gk20a_gr_set_circular_buffer_size; | ||
6738 | gops->gr.set_circular_buffer_size = | ||
6739 | gk20a_gr_set_alpha_circular_buffer_size; | ||
6740 | gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; | ||
6741 | gops->gr.is_valid_class = gr_gk20a_is_valid_class; | ||
6742 | gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; | ||
6743 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; | ||
6744 | gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep; | ||
6745 | gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask; | ||
6746 | gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables; | ||
6747 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h new file mode 100644 index 00000000..7eb2923a --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -0,0 +1,406 @@ | |||
1 | /* | ||
2 | * GK20A Graphics Engine | ||
3 | * | ||
4 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | #ifndef __GR_GK20A_H__ | ||
19 | #define __GR_GK20A_H__ | ||
20 | |||
21 | #include <linux/slab.h> | ||
22 | |||
23 | #include "gr_ctx_gk20a.h" | ||
24 | |||
25 | #define GR_IDLE_CHECK_DEFAULT 100 /* usec */ | ||
26 | #define GR_IDLE_CHECK_MAX 5000 /* usec */ | ||
27 | |||
28 | #define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFF | ||
29 | #define INVALID_MAX_WAYS 0xFFFFFFFF | ||
30 | |||
31 | #define GK20A_FECS_UCODE_IMAGE "fecs.bin" | ||
32 | #define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin" | ||
33 | |||
34 | enum /* global_ctx_buffer */ { | ||
35 | CIRCULAR = 0, | ||
36 | PAGEPOOL = 1, | ||
37 | ATTRIBUTE = 2, | ||
38 | CIRCULAR_VPR = 3, | ||
39 | PAGEPOOL_VPR = 4, | ||
40 | ATTRIBUTE_VPR = 5, | ||
41 | GOLDEN_CTX = 6, | ||
42 | PRIV_ACCESS_MAP = 7, | ||
43 | NR_GLOBAL_CTX_BUF = 8 | ||
44 | }; | ||
45 | |||
46 | /* either ATTRIBUTE or ATTRIBUTE_VPR maps to ATTRIBUTE_VA */ | ||
47 | enum /*global_ctx_buffer_va */ { | ||
48 | CIRCULAR_VA = 0, | ||
49 | PAGEPOOL_VA = 1, | ||
50 | ATTRIBUTE_VA = 2, | ||
51 | GOLDEN_CTX_VA = 3, | ||
52 | PRIV_ACCESS_MAP_VA = 4, | ||
53 | NR_GLOBAL_CTX_BUF_VA = 5 | ||
54 | }; | ||
55 | |||
56 | enum { | ||
57 | WAIT_UCODE_LOOP, | ||
58 | WAIT_UCODE_TIMEOUT, | ||
59 | WAIT_UCODE_ERROR, | ||
60 | WAIT_UCODE_OK | ||
61 | }; | ||
62 | |||
63 | enum { | ||
64 | GR_IS_UCODE_OP_EQUAL, | ||
65 | GR_IS_UCODE_OP_NOT_EQUAL, | ||
66 | GR_IS_UCODE_OP_AND, | ||
67 | GR_IS_UCODE_OP_LESSER, | ||
68 | GR_IS_UCODE_OP_LESSER_EQUAL, | ||
69 | GR_IS_UCODE_OP_SKIP | ||
70 | }; | ||
71 | |||
72 | enum { | ||
73 | eUcodeHandshakeInitComplete = 1, | ||
74 | eUcodeHandshakeMethodFinished | ||
75 | }; | ||
76 | |||
77 | enum { | ||
78 | ELCG_RUN, /* clk always run, i.e. disable elcg */ | ||
79 | ELCG_STOP, /* clk is stopped */ | ||
80 | ELCG_AUTO /* clk will run when non-idle, standard elcg mode */ | ||
81 | }; | ||
82 | |||
83 | enum { | ||
84 | BLCG_RUN, /* clk always run, i.e. disable blcg */ | ||
85 | BLCG_AUTO /* clk will run when non-idle, standard blcg mode */ | ||
86 | }; | ||
87 | |||
88 | #ifndef GR_GO_IDLE_BUNDLE | ||
89 | #define GR_GO_IDLE_BUNDLE 0x0000e100 /* --V-B */ | ||
90 | #endif | ||
91 | |||
92 | struct gr_channel_map_tlb_entry { | ||
93 | u32 curr_ctx; | ||
94 | u32 hw_chid; | ||
95 | }; | ||
96 | |||
97 | struct gr_zcull_gk20a { | ||
98 | u32 aliquot_width; | ||
99 | u32 aliquot_height; | ||
100 | u32 aliquot_size; | ||
101 | u32 total_aliquots; | ||
102 | |||
103 | u32 width_align_pixels; | ||
104 | u32 height_align_pixels; | ||
105 | u32 pixel_squares_by_aliquots; | ||
106 | }; | ||
107 | |||
108 | struct gr_zcull_info { | ||
109 | u32 width_align_pixels; | ||
110 | u32 height_align_pixels; | ||
111 | u32 pixel_squares_by_aliquots; | ||
112 | u32 aliquot_total; | ||
113 | u32 region_byte_multiplier; | ||
114 | u32 region_header_size; | ||
115 | u32 subregion_header_size; | ||
116 | u32 subregion_width_align_pixels; | ||
117 | u32 subregion_height_align_pixels; | ||
118 | u32 subregion_count; | ||
119 | }; | ||
120 | |||
121 | #define GK20A_ZBC_COLOR_VALUE_SIZE 4 /* RGBA */ | ||
122 | |||
123 | #define GK20A_STARTOF_ZBC_TABLE 1 /* index zero reserved to indicate "not ZBCd" */ | ||
124 | #define GK20A_SIZEOF_ZBC_TABLE 16 /* match ltcs_ltss_dstg_zbc_index_address width (4) */ | ||
125 | #define GK20A_ZBC_TABLE_SIZE (16 - 1) | ||
126 | |||
127 | #define GK20A_ZBC_TYPE_INVALID 0 | ||
128 | #define GK20A_ZBC_TYPE_COLOR 1 | ||
129 | #define GK20A_ZBC_TYPE_DEPTH 2 | ||
130 | |||
131 | struct zbc_color_table { | ||
132 | u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE]; | ||
133 | u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE]; | ||
134 | u32 format; | ||
135 | u32 ref_cnt; | ||
136 | }; | ||
137 | |||
138 | struct zbc_depth_table { | ||
139 | u32 depth; | ||
140 | u32 format; | ||
141 | u32 ref_cnt; | ||
142 | }; | ||
143 | |||
144 | struct zbc_entry { | ||
145 | u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE]; | ||
146 | u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE]; | ||
147 | u32 depth; | ||
148 | u32 type; /* color or depth */ | ||
149 | u32 format; | ||
150 | }; | ||
151 | |||
152 | struct zbc_query_params { | ||
153 | u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE]; | ||
154 | u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE]; | ||
155 | u32 depth; | ||
156 | u32 ref_cnt; | ||
157 | u32 format; | ||
158 | u32 type; /* color or depth */ | ||
159 | u32 index_size; /* [out] size, [in] index */ | ||
160 | }; | ||
161 | |||
162 | struct gr_gk20a { | ||
163 | struct gk20a *g; | ||
164 | struct { | ||
165 | bool dynamic; | ||
166 | |||
167 | u32 buffer_size; | ||
168 | u32 buffer_total_size; | ||
169 | |||
170 | bool golden_image_initialized; | ||
171 | u32 golden_image_size; | ||
172 | u32 *local_golden_image; | ||
173 | |||
174 | u32 zcull_ctxsw_image_size; | ||
175 | |||
176 | u32 buffer_header_size; | ||
177 | |||
178 | u32 priv_access_map_size; | ||
179 | |||
180 | struct gr_ucode_gk20a ucode; | ||
181 | |||
182 | struct av_list_gk20a sw_bundle_init; | ||
183 | struct av_list_gk20a sw_method_init; | ||
184 | struct aiv_list_gk20a sw_ctx_load; | ||
185 | struct av_list_gk20a sw_non_ctx_load; | ||
186 | struct { | ||
187 | struct aiv_list_gk20a sys; | ||
188 | struct aiv_list_gk20a gpc; | ||
189 | struct aiv_list_gk20a tpc; | ||
190 | struct aiv_list_gk20a zcull_gpc; | ||
191 | struct aiv_list_gk20a ppc; | ||
192 | struct aiv_list_gk20a pm_sys; | ||
193 | struct aiv_list_gk20a pm_gpc; | ||
194 | struct aiv_list_gk20a pm_tpc; | ||
195 | } ctxsw_regs; | ||
196 | int regs_base_index; | ||
197 | bool valid; | ||
198 | } ctx_vars; | ||
199 | |||
200 | struct mutex ctx_mutex; /* protect golden ctx init */ | ||
201 | struct mutex fecs_mutex; /* protect fecs method */ | ||
202 | |||
203 | #define GR_NETLIST_DYNAMIC -1 | ||
204 | #define GR_NETLIST_STATIC_A 'A' | ||
205 | int netlist; | ||
206 | |||
207 | int initialized; | ||
208 | u32 num_fbps; | ||
209 | |||
210 | u32 max_gpc_count; | ||
211 | u32 max_fbps_count; | ||
212 | u32 max_tpc_per_gpc_count; | ||
213 | u32 max_zcull_per_gpc_count; | ||
214 | u32 max_tpc_count; | ||
215 | |||
216 | u32 sys_count; | ||
217 | u32 gpc_count; | ||
218 | u32 pe_count_per_gpc; | ||
219 | u32 ppc_count; | ||
220 | u32 *gpc_ppc_count; | ||
221 | u32 tpc_count; | ||
222 | u32 *gpc_tpc_count; | ||
223 | u32 zcb_count; | ||
224 | u32 *gpc_zcb_count; | ||
225 | u32 *pes_tpc_count[2]; | ||
226 | u32 *pes_tpc_mask[2]; | ||
227 | u32 *gpc_skip_mask; | ||
228 | |||
229 | u32 bundle_cb_default_size; | ||
230 | u32 min_gpm_fifo_depth; | ||
231 | u32 bundle_cb_token_limit; | ||
232 | u32 attrib_cb_default_size; | ||
233 | u32 attrib_cb_size; | ||
234 | u32 alpha_cb_default_size; | ||
235 | u32 alpha_cb_size; | ||
236 | u32 timeslice_mode; | ||
237 | |||
238 | struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF]; | ||
239 | |||
240 | struct mmu_desc mmu_wr_mem; | ||
241 | u32 mmu_wr_mem_size; | ||
242 | struct mmu_desc mmu_rd_mem; | ||
243 | u32 mmu_rd_mem_size; | ||
244 | |||
245 | u8 *map_tiles; | ||
246 | u32 map_tile_count; | ||
247 | u32 map_row_offset; | ||
248 | |||
249 | #define COMP_TAG_LINE_SIZE_SHIFT (17) /* one tag covers 128K */ | ||
250 | #define COMP_TAG_LINE_SIZE (1 << COMP_TAG_LINE_SIZE_SHIFT) | ||
251 | |||
252 | u32 max_comptag_mem; /* max memory size (MB) for comptag */ | ||
253 | struct compbit_store_desc compbit_store; | ||
254 | struct gk20a_allocator comp_tags; | ||
255 | |||
256 | struct gr_zcull_gk20a zcull; | ||
257 | |||
258 | struct zbc_color_table zbc_col_tbl[GK20A_ZBC_TABLE_SIZE]; | ||
259 | struct zbc_depth_table zbc_dep_tbl[GK20A_ZBC_TABLE_SIZE]; | ||
260 | |||
261 | s32 max_default_color_index; | ||
262 | s32 max_default_depth_index; | ||
263 | |||
264 | s32 max_used_color_index; | ||
265 | s32 max_used_depth_index; | ||
266 | |||
267 | u32 status_disable_mask; | ||
268 | |||
269 | #define GR_CHANNEL_MAP_TLB_SIZE 2 /* must of power of 2 */ | ||
270 | struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE]; | ||
271 | u32 channel_tlb_flush_index; | ||
272 | spinlock_t ch_tlb_lock; | ||
273 | |||
274 | void (*remove_support)(struct gr_gk20a *gr); | ||
275 | bool sw_ready; | ||
276 | bool skip_ucode_init; | ||
277 | }; | ||
278 | |||
279 | void gk20a_fecs_dump_falcon_stats(struct gk20a *g); | ||
280 | |||
281 | struct gk20a_ctxsw_ucode_segment { | ||
282 | u32 offset; | ||
283 | u32 size; | ||
284 | }; | ||
285 | |||
286 | struct gk20a_ctxsw_ucode_segments { | ||
287 | u32 boot_entry; | ||
288 | u32 boot_imem_offset; | ||
289 | struct gk20a_ctxsw_ucode_segment boot; | ||
290 | struct gk20a_ctxsw_ucode_segment code; | ||
291 | struct gk20a_ctxsw_ucode_segment data; | ||
292 | }; | ||
293 | |||
294 | struct gk20a_ctxsw_ucode_info { | ||
295 | u64 *p_va; | ||
296 | struct inst_desc inst_blk_desc; | ||
297 | struct surface_mem_desc surface_desc; | ||
298 | u64 ucode_gpuva; | ||
299 | struct gk20a_ctxsw_ucode_segments fecs; | ||
300 | struct gk20a_ctxsw_ucode_segments gpccs; | ||
301 | }; | ||
302 | |||
303 | struct gk20a_ctxsw_bootloader_desc { | ||
304 | u32 start_offset; | ||
305 | u32 size; | ||
306 | u32 imem_offset; | ||
307 | u32 entry_point; | ||
308 | }; | ||
309 | |||
310 | struct gpu_ops; | ||
311 | void gk20a_init_gr(struct gpu_ops *gops); | ||
312 | int gk20a_init_gr_support(struct gk20a *g); | ||
313 | void gk20a_gr_reset(struct gk20a *g); | ||
314 | |||
315 | int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a); | ||
316 | |||
317 | int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr); | ||
318 | |||
319 | struct nvhost_alloc_obj_ctx_args; | ||
320 | struct nvhost_free_obj_ctx_args; | ||
321 | |||
322 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | ||
323 | struct nvhost_alloc_obj_ctx_args *args); | ||
324 | int gk20a_free_obj_ctx(struct channel_gk20a *c, | ||
325 | struct nvhost_free_obj_ctx_args *args); | ||
326 | void gk20a_free_channel_ctx(struct channel_gk20a *c); | ||
327 | |||
328 | int gk20a_gr_isr(struct gk20a *g); | ||
329 | int gk20a_gr_nonstall_isr(struct gk20a *g); | ||
330 | |||
331 | /* zcull */ | ||
332 | u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr); | ||
333 | int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, | ||
334 | struct channel_gk20a *c, u64 zcull_va, u32 mode); | ||
335 | int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, | ||
336 | struct gr_zcull_info *zcull_params); | ||
337 | /* zbc */ | ||
338 | int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr, | ||
339 | struct zbc_entry *zbc_val); | ||
340 | int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr, | ||
341 | struct zbc_query_params *query_params); | ||
342 | int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr, | ||
343 | struct zbc_entry *zbc_val); | ||
344 | int gr_gk20a_clear_zbc_table(struct gk20a *g, struct gr_gk20a *gr); | ||
345 | int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr); | ||
346 | |||
347 | /* pmu */ | ||
348 | int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size); | ||
349 | int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr); | ||
350 | int gr_gk20a_fecs_set_reglist_virual_addr(struct gk20a *g, u64 pmu_va); | ||
351 | |||
352 | void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine); | ||
353 | void gr_gk20a_init_blcg_mode(struct gk20a *g, u32 mode, u32 engine); | ||
354 | |||
355 | /* sm */ | ||
356 | bool gk20a_gr_sm_debugger_attached(struct gk20a *g); | ||
357 | |||
358 | #define gr_gk20a_elpg_protected_call(g, func) \ | ||
359 | ({ \ | ||
360 | int err; \ | ||
361 | if (support_gk20a_pmu()) \ | ||
362 | gk20a_pmu_disable_elpg(g); \ | ||
363 | err = func; \ | ||
364 | if (support_gk20a_pmu()) \ | ||
365 | gk20a_pmu_enable_elpg(g); \ | ||
366 | err; \ | ||
367 | }) | ||
368 | |||
369 | int gk20a_gr_suspend(struct gk20a *g); | ||
370 | |||
371 | struct nvhost_dbg_gpu_reg_op; | ||
372 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | ||
373 | struct nvhost_dbg_gpu_reg_op *ctx_ops, u32 num_ops, | ||
374 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops); | ||
375 | int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, | ||
376 | u32 addr, | ||
377 | u32 max_offsets, | ||
378 | u32 *offsets, u32 *offset_addrs, | ||
379 | u32 *num_offsets, | ||
380 | bool is_quad, u32 quad); | ||
381 | int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | ||
382 | struct channel_gk20a *c, | ||
383 | bool enable_smpc_ctxsw); | ||
384 | |||
385 | struct channel_ctx_gk20a; | ||
386 | int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, | ||
387 | u32 addr, u32 data, bool patch); | ||
388 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, | ||
389 | struct channel_ctx_gk20a *ch_ctx); | ||
390 | int gr_gk20a_ctx_patch_write_end(struct gk20a *g, | ||
391 | struct channel_ctx_gk20a *ch_ctx); | ||
392 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, | ||
393 | struct channel_ctx_gk20a *ch_ctx, | ||
394 | u64 addr, u32 size, bool patch); | ||
395 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); | ||
396 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g); | ||
397 | void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g, | ||
398 | u32 *num_sm_dsm_perf_regs, | ||
399 | u32 **sm_dsm_perf_regs, | ||
400 | u32 *perf_register_stride); | ||
401 | void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
402 | u32 *num_sm_dsm_perf_regs, | ||
403 | u32 **sm_dsm_perf_regs, | ||
404 | u32 *perf_register_stride); | ||
405 | int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); | ||
406 | #endif /*__GR_GK20A_H__*/ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h new file mode 100644 index 00000000..a82a1ee7 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h | |||
@@ -0,0 +1,179 @@ | |||
1 | /* | ||
2 | * GK20A Graphics Context Pri Register Addressing | ||
3 | * | ||
4 | * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | #ifndef _NVHOST_GR_PRI_GK20A_H_ | ||
19 | #define _NVHOST_GR_PRI_GK20A_H_ | ||
20 | |||
21 | /* | ||
22 | * These convenience macros are generally for use in the management/modificaiton | ||
23 | * of the context state store for gr/compute contexts. | ||
24 | */ | ||
25 | |||
26 | /* | ||
27 | * GPC pri addressing | ||
28 | */ | ||
29 | static inline u32 pri_gpccs_addr_width(void) | ||
30 | { | ||
31 | return 15; /*from where?*/ | ||
32 | } | ||
33 | static inline u32 pri_gpccs_addr_mask(u32 addr) | ||
34 | { | ||
35 | return addr & ((1 << pri_gpccs_addr_width()) - 1); | ||
36 | } | ||
37 | static inline u32 pri_gpc_addr(u32 addr, u32 gpc) | ||
38 | { | ||
39 | return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) + addr; | ||
40 | } | ||
41 | static inline bool pri_is_gpc_addr_shared(u32 addr) | ||
42 | { | ||
43 | return (addr >= proj_gpc_shared_base_v()) && | ||
44 | (addr < proj_gpc_shared_base_v() + proj_gpc_stride_v()); | ||
45 | } | ||
46 | static inline bool pri_is_gpc_addr(u32 addr) | ||
47 | { | ||
48 | return ((addr >= proj_gpc_base_v()) && | ||
49 | (addr < proj_gpc_base_v() + | ||
50 | proj_scal_litter_num_gpcs_v() * proj_gpc_stride_v())) || | ||
51 | pri_is_gpc_addr_shared(addr); | ||
52 | } | ||
53 | static inline u32 pri_get_gpc_num(u32 addr) | ||
54 | { | ||
55 | u32 i, start; | ||
56 | u32 num_gpcs = proj_scal_litter_num_gpcs_v(); | ||
57 | |||
58 | for (i = 0; i < num_gpcs; i++) { | ||
59 | start = proj_gpc_base_v() + (i * proj_gpc_stride_v()); | ||
60 | if ((addr >= start) && (addr < (start + proj_gpc_stride_v()))) | ||
61 | return i; | ||
62 | } | ||
63 | return 0; | ||
64 | } | ||
65 | /* | ||
66 | * TPC pri addressing | ||
67 | */ | ||
68 | static inline u32 pri_tpccs_addr_width(void) | ||
69 | { | ||
70 | return 11; /* from where? */ | ||
71 | } | ||
72 | static inline u32 pri_tpccs_addr_mask(u32 addr) | ||
73 | { | ||
74 | return addr & ((1 << pri_tpccs_addr_width()) - 1); | ||
75 | } | ||
76 | static inline u32 pri_tpc_addr(u32 addr, u32 gpc, u32 tpc) | ||
77 | { | ||
78 | return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) + | ||
79 | proj_tpc_in_gpc_base_v() + (tpc * proj_tpc_in_gpc_stride_v()) + | ||
80 | addr; | ||
81 | } | ||
82 | static inline bool pri_is_tpc_addr_shared(u32 addr) | ||
83 | { | ||
84 | return (addr >= proj_tpc_in_gpc_shared_base_v()) && | ||
85 | (addr < (proj_tpc_in_gpc_shared_base_v() + | ||
86 | proj_tpc_in_gpc_stride_v())); | ||
87 | } | ||
88 | static inline bool pri_is_tpc_addr(u32 addr) | ||
89 | { | ||
90 | return ((addr >= proj_tpc_in_gpc_base_v()) && | ||
91 | (addr < proj_tpc_in_gpc_base_v() + (proj_scal_litter_num_tpc_per_gpc_v() * | ||
92 | proj_tpc_in_gpc_stride_v()))) | ||
93 | || | ||
94 | pri_is_tpc_addr_shared(addr); | ||
95 | } | ||
96 | static inline u32 pri_get_tpc_num(u32 addr) | ||
97 | { | ||
98 | u32 i, start; | ||
99 | u32 num_tpcs = proj_scal_litter_num_tpc_per_gpc_v(); | ||
100 | |||
101 | for (i = 0; i < num_tpcs; i++) { | ||
102 | start = proj_tpc_in_gpc_base_v() + (i * proj_tpc_in_gpc_stride_v()); | ||
103 | if ((addr >= start) && (addr < (start + proj_tpc_in_gpc_stride_v()))) | ||
104 | return i; | ||
105 | } | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * BE pri addressing | ||
111 | */ | ||
112 | static inline u32 pri_becs_addr_width(void) | ||
113 | { | ||
114 | return 10;/* from where? */ | ||
115 | } | ||
116 | static inline u32 pri_becs_addr_mask(u32 addr) | ||
117 | { | ||
118 | return addr & ((1 << pri_becs_addr_width()) - 1); | ||
119 | } | ||
120 | static inline bool pri_is_be_addr_shared(u32 addr) | ||
121 | { | ||
122 | return (addr >= proj_rop_shared_base_v()) && | ||
123 | (addr < proj_rop_shared_base_v() + proj_rop_stride_v()); | ||
124 | } | ||
125 | static inline u32 pri_be_shared_addr(u32 addr) | ||
126 | { | ||
127 | return proj_rop_shared_base_v() + pri_becs_addr_mask(addr); | ||
128 | } | ||
129 | static inline bool pri_is_be_addr(u32 addr) | ||
130 | { | ||
131 | return ((addr >= proj_rop_base_v()) && | ||
132 | (addr < proj_rop_base_v()+proj_scal_litter_num_fbps_v() * proj_rop_stride_v())) || | ||
133 | pri_is_be_addr_shared(addr); | ||
134 | } | ||
135 | |||
136 | static inline u32 pri_get_be_num(u32 addr) | ||
137 | { | ||
138 | u32 i, start; | ||
139 | u32 num_fbps = proj_scal_litter_num_fbps_v(); | ||
140 | for (i = 0; i < num_fbps; i++) { | ||
141 | start = proj_rop_base_v() + (i * proj_rop_stride_v()); | ||
142 | if ((addr >= start) && (addr < (start + proj_rop_stride_v()))) | ||
143 | return i; | ||
144 | } | ||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * PPC pri addressing | ||
150 | */ | ||
151 | static inline u32 pri_ppccs_addr_width(void) | ||
152 | { | ||
153 | return 9; /* from where? */ | ||
154 | } | ||
155 | static inline u32 pri_ppccs_addr_mask(u32 addr) | ||
156 | { | ||
157 | return addr & ((1 << pri_ppccs_addr_width()) - 1); | ||
158 | } | ||
159 | static inline u32 pri_ppc_addr(u32 addr, u32 gpc, u32 ppc) | ||
160 | { | ||
161 | return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) + | ||
162 | proj_ppc_in_gpc_base_v() + (ppc * proj_ppc_in_gpc_stride_v()) + addr; | ||
163 | } | ||
164 | |||
165 | enum ctxsw_addr_type { | ||
166 | CTXSW_ADDR_TYPE_SYS = 0, | ||
167 | CTXSW_ADDR_TYPE_GPC = 1, | ||
168 | CTXSW_ADDR_TYPE_TPC = 2, | ||
169 | CTXSW_ADDR_TYPE_BE = 3, | ||
170 | CTXSW_ADDR_TYPE_PPC = 4 | ||
171 | }; | ||
172 | |||
173 | #define PRI_BROADCAST_FLAGS_NONE 0 | ||
174 | #define PRI_BROADCAST_FLAGS_GPC BIT(0) | ||
175 | #define PRI_BROADCAST_FLAGS_TPC BIT(1) | ||
176 | #define PRI_BROADCAST_FLAGS_BE BIT(2) | ||
177 | #define PRI_BROADCAST_FLAGS_PPC BIT(3) | ||
178 | |||
179 | #endif /*_NVHOST_GR_PRI_GK20A_H_ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hal.c b/drivers/gpu/nvgpu/gk20a/hal.c new file mode 100644 index 00000000..dea740c2 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hal.c | |||
@@ -0,0 +1,33 @@ | |||
1 | /* | ||
2 | * NVIDIA GPU HAL interface. | ||
3 | * | ||
4 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include "gk20a.h" | ||
17 | #include "hal_gk20a.h" | ||
18 | |||
19 | int gpu_init_hal(struct gk20a *g) | ||
20 | { | ||
21 | u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; | ||
22 | switch (ver) { | ||
23 | case GK20A_GPUID_GK20A: | ||
24 | gk20a_dbg_info("gk20a detected"); | ||
25 | gk20a_init_hal(&g->ops); | ||
26 | break; | ||
27 | default: | ||
28 | gk20a_err(&g->dev->dev, "no support for %x", ver); | ||
29 | return -ENODEV; | ||
30 | } | ||
31 | |||
32 | return 0; | ||
33 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hal.h b/drivers/gpu/nvgpu/gk20a/hal.h new file mode 100644 index 00000000..da02cf5f --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hal.h | |||
@@ -0,0 +1,25 @@ | |||
1 | /* | ||
2 | * NVIDIA GPU Hardware Abstraction Layer functions definitions. | ||
3 | * | ||
4 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #ifndef __HAL_GPU__ | ||
17 | #define __HAL_GPU__ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | |||
21 | struct gk20a; | ||
22 | |||
23 | int gpu_init_hal(struct gk20a *g); | ||
24 | |||
25 | #endif /* __HAL_GPU__ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c new file mode 100644 index 00000000..b3e9b0e6 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/hal_gk20a.c | ||
3 | * | ||
4 | * GK20A Tegra HAL interface. | ||
5 | * | ||
6 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #include "hal_gk20a.h" | ||
19 | #include "ltc_gk20a.h" | ||
20 | #include "fb_gk20a.h" | ||
21 | #include "gk20a.h" | ||
22 | #include "gk20a_gating_reglist.h" | ||
23 | #include "channel_gk20a.h" | ||
24 | |||
25 | struct gpu_ops gk20a_ops = { | ||
26 | .clock_gating = { | ||
27 | .slcg_gr_load_gating_prod = | ||
28 | gr_gk20a_slcg_gr_load_gating_prod, | ||
29 | .slcg_perf_load_gating_prod = | ||
30 | gr_gk20a_slcg_perf_load_gating_prod, | ||
31 | .blcg_gr_load_gating_prod = | ||
32 | gr_gk20a_blcg_gr_load_gating_prod, | ||
33 | .pg_gr_load_gating_prod = | ||
34 | gr_gk20a_pg_gr_load_gating_prod, | ||
35 | .slcg_therm_load_gating_prod = | ||
36 | gr_gk20a_slcg_therm_load_gating_prod, | ||
37 | } | ||
38 | }; | ||
39 | |||
40 | int gk20a_init_hal(struct gpu_ops *gops) | ||
41 | { | ||
42 | *gops = gk20a_ops; | ||
43 | gk20a_init_ltc(gops); | ||
44 | gk20a_init_gr(gops); | ||
45 | gk20a_init_fb(gops); | ||
46 | gk20a_init_fifo(gops); | ||
47 | gops->name = "gk20a"; | ||
48 | |||
49 | return 0; | ||
50 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.h b/drivers/gpu/nvgpu/gk20a/hal_gk20a.h new file mode 100644 index 00000000..db77a4a7 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.h | |||
@@ -0,0 +1,28 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/hal_gk20a.h | ||
3 | * | ||
4 | * GK20A Hardware Abstraction Layer functions definitions. | ||
5 | * | ||
6 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #ifndef __HAL_GK20A__ | ||
19 | #define __HAL_GK20A__ | ||
20 | |||
21 | #include <linux/kernel.h> | ||
22 | |||
23 | struct gpu_ops; | ||
24 | struct gk20a; | ||
25 | |||
26 | int gk20a_init_hal(struct gpu_ops *gops); | ||
27 | |||
28 | #endif /* __HAL_GK20A__ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h new file mode 100644 index 00000000..ebf8a873 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h | |||
@@ -0,0 +1,105 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_bus_gk20a_h_ | ||
51 | #define _hw_bus_gk20a_h_ | ||
52 | |||
53 | static inline u32 bus_bar1_block_r(void) | ||
54 | { | ||
55 | return 0x00001704; | ||
56 | } | ||
57 | static inline u32 bus_bar1_block_ptr_f(u32 v) | ||
58 | { | ||
59 | return (v & 0xfffffff) << 0; | ||
60 | } | ||
61 | static inline u32 bus_bar1_block_target_vid_mem_f(void) | ||
62 | { | ||
63 | return 0x0; | ||
64 | } | ||
65 | static inline u32 bus_bar1_block_mode_virtual_f(void) | ||
66 | { | ||
67 | return 0x80000000; | ||
68 | } | ||
69 | static inline u32 bus_bar1_block_ptr_shift_v(void) | ||
70 | { | ||
71 | return 0x0000000c; | ||
72 | } | ||
73 | static inline u32 bus_intr_0_r(void) | ||
74 | { | ||
75 | return 0x00001100; | ||
76 | } | ||
77 | static inline u32 bus_intr_0_pri_squash_m(void) | ||
78 | { | ||
79 | return 0x1 << 1; | ||
80 | } | ||
81 | static inline u32 bus_intr_0_pri_fecserr_m(void) | ||
82 | { | ||
83 | return 0x1 << 2; | ||
84 | } | ||
85 | static inline u32 bus_intr_0_pri_timeout_m(void) | ||
86 | { | ||
87 | return 0x1 << 3; | ||
88 | } | ||
89 | static inline u32 bus_intr_en_0_r(void) | ||
90 | { | ||
91 | return 0x00001140; | ||
92 | } | ||
93 | static inline u32 bus_intr_en_0_pri_squash_m(void) | ||
94 | { | ||
95 | return 0x1 << 1; | ||
96 | } | ||
97 | static inline u32 bus_intr_en_0_pri_fecserr_m(void) | ||
98 | { | ||
99 | return 0x1 << 2; | ||
100 | } | ||
101 | static inline u32 bus_intr_en_0_pri_timeout_m(void) | ||
102 | { | ||
103 | return 0x1 << 3; | ||
104 | } | ||
105 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h new file mode 100644 index 00000000..573329f1 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h | |||
@@ -0,0 +1,113 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_ccsr_gk20a_h_ | ||
51 | #define _hw_ccsr_gk20a_h_ | ||
52 | |||
53 | static inline u32 ccsr_channel_inst_r(u32 i) | ||
54 | { | ||
55 | return 0x00800000 + i*8; | ||
56 | } | ||
57 | static inline u32 ccsr_channel_inst__size_1_v(void) | ||
58 | { | ||
59 | return 0x00000080; | ||
60 | } | ||
61 | static inline u32 ccsr_channel_inst_ptr_f(u32 v) | ||
62 | { | ||
63 | return (v & 0xfffffff) << 0; | ||
64 | } | ||
65 | static inline u32 ccsr_channel_inst_target_vid_mem_f(void) | ||
66 | { | ||
67 | return 0x0; | ||
68 | } | ||
69 | static inline u32 ccsr_channel_inst_bind_false_f(void) | ||
70 | { | ||
71 | return 0x0; | ||
72 | } | ||
73 | static inline u32 ccsr_channel_inst_bind_true_f(void) | ||
74 | { | ||
75 | return 0x80000000; | ||
76 | } | ||
77 | static inline u32 ccsr_channel_r(u32 i) | ||
78 | { | ||
79 | return 0x00800004 + i*8; | ||
80 | } | ||
81 | static inline u32 ccsr_channel__size_1_v(void) | ||
82 | { | ||
83 | return 0x00000080; | ||
84 | } | ||
85 | static inline u32 ccsr_channel_enable_v(u32 r) | ||
86 | { | ||
87 | return (r >> 0) & 0x1; | ||
88 | } | ||
89 | static inline u32 ccsr_channel_enable_set_f(u32 v) | ||
90 | { | ||
91 | return (v & 0x1) << 10; | ||
92 | } | ||
93 | static inline u32 ccsr_channel_enable_set_true_f(void) | ||
94 | { | ||
95 | return 0x400; | ||
96 | } | ||
97 | static inline u32 ccsr_channel_enable_clr_true_f(void) | ||
98 | { | ||
99 | return 0x800; | ||
100 | } | ||
101 | static inline u32 ccsr_channel_runlist_f(u32 v) | ||
102 | { | ||
103 | return (v & 0xf) << 16; | ||
104 | } | ||
105 | static inline u32 ccsr_channel_status_v(u32 r) | ||
106 | { | ||
107 | return (r >> 24) & 0xf; | ||
108 | } | ||
109 | static inline u32 ccsr_channel_busy_v(u32 r) | ||
110 | { | ||
111 | return (r >> 28) & 0x1; | ||
112 | } | ||
113 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h new file mode 100644 index 00000000..66bf01b0 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h | |||
@@ -0,0 +1,85 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_chiplet_pwr_gk20a_h_ | ||
51 | #define _hw_chiplet_pwr_gk20a_h_ | ||
52 | |||
53 | static inline u32 chiplet_pwr_gpcs_weight_6_r(void) | ||
54 | { | ||
55 | return 0x0010e018; | ||
56 | } | ||
57 | static inline u32 chiplet_pwr_gpcs_weight_7_r(void) | ||
58 | { | ||
59 | return 0x0010e01c; | ||
60 | } | ||
61 | static inline u32 chiplet_pwr_gpcs_config_1_r(void) | ||
62 | { | ||
63 | return 0x0010e03c; | ||
64 | } | ||
65 | static inline u32 chiplet_pwr_gpcs_config_1_ba_enable_yes_f(void) | ||
66 | { | ||
67 | return 0x1; | ||
68 | } | ||
69 | static inline u32 chiplet_pwr_fbps_weight_0_r(void) | ||
70 | { | ||
71 | return 0x0010e100; | ||
72 | } | ||
73 | static inline u32 chiplet_pwr_fbps_weight_1_r(void) | ||
74 | { | ||
75 | return 0x0010e104; | ||
76 | } | ||
77 | static inline u32 chiplet_pwr_fbps_config_1_r(void) | ||
78 | { | ||
79 | return 0x0010e13c; | ||
80 | } | ||
81 | static inline u32 chiplet_pwr_fbps_config_1_ba_enable_yes_f(void) | ||
82 | { | ||
83 | return 0x1; | ||
84 | } | ||
85 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h new file mode 100644 index 00000000..e2a4f2f2 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h | |||
@@ -0,0 +1,245 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_ctxsw_prog_gk20a_h_ | ||
51 | #define _hw_ctxsw_prog_gk20a_h_ | ||
52 | |||
53 | static inline u32 ctxsw_prog_fecs_header_v(void) | ||
54 | { | ||
55 | return 0x00000100; | ||
56 | } | ||
57 | static inline u32 ctxsw_prog_main_image_num_gpcs_o(void) | ||
58 | { | ||
59 | return 0x00000008; | ||
60 | } | ||
61 | static inline u32 ctxsw_prog_main_image_patch_count_o(void) | ||
62 | { | ||
63 | return 0x00000010; | ||
64 | } | ||
65 | static inline u32 ctxsw_prog_main_image_patch_adr_lo_o(void) | ||
66 | { | ||
67 | return 0x00000014; | ||
68 | } | ||
69 | static inline u32 ctxsw_prog_main_image_patch_adr_hi_o(void) | ||
70 | { | ||
71 | return 0x00000018; | ||
72 | } | ||
73 | static inline u32 ctxsw_prog_main_image_zcull_o(void) | ||
74 | { | ||
75 | return 0x0000001c; | ||
76 | } | ||
77 | static inline u32 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v(void) | ||
78 | { | ||
79 | return 0x00000001; | ||
80 | } | ||
81 | static inline u32 ctxsw_prog_main_image_zcull_mode_separate_buffer_v(void) | ||
82 | { | ||
83 | return 0x00000002; | ||
84 | } | ||
85 | static inline u32 ctxsw_prog_main_image_zcull_ptr_o(void) | ||
86 | { | ||
87 | return 0x00000020; | ||
88 | } | ||
89 | static inline u32 ctxsw_prog_main_image_pm_o(void) | ||
90 | { | ||
91 | return 0x00000028; | ||
92 | } | ||
93 | static inline u32 ctxsw_prog_main_image_pm_mode_m(void) | ||
94 | { | ||
95 | return 0x7 << 0; | ||
96 | } | ||
97 | static inline u32 ctxsw_prog_main_image_pm_mode_v(u32 r) | ||
98 | { | ||
99 | return (r >> 0) & 0x7; | ||
100 | } | ||
101 | static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void) | ||
102 | { | ||
103 | return 0x0; | ||
104 | } | ||
105 | static inline u32 ctxsw_prog_main_image_pm_smpc_mode_m(void) | ||
106 | { | ||
107 | return 0x7 << 3; | ||
108 | } | ||
109 | static inline u32 ctxsw_prog_main_image_pm_smpc_mode_v(u32 r) | ||
110 | { | ||
111 | return (r >> 3) & 0x7; | ||
112 | } | ||
113 | static inline u32 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(void) | ||
114 | { | ||
115 | return 0x0; | ||
116 | } | ||
117 | static inline u32 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f(void) | ||
118 | { | ||
119 | return 0x8; | ||
120 | } | ||
121 | static inline u32 ctxsw_prog_main_image_pm_ptr_o(void) | ||
122 | { | ||
123 | return 0x0000002c; | ||
124 | } | ||
125 | static inline u32 ctxsw_prog_main_image_num_save_ops_o(void) | ||
126 | { | ||
127 | return 0x000000f4; | ||
128 | } | ||
129 | static inline u32 ctxsw_prog_main_image_num_restore_ops_o(void) | ||
130 | { | ||
131 | return 0x000000f8; | ||
132 | } | ||
133 | static inline u32 ctxsw_prog_main_image_magic_value_o(void) | ||
134 | { | ||
135 | return 0x000000fc; | ||
136 | } | ||
137 | static inline u32 ctxsw_prog_main_image_magic_value_v_value_v(void) | ||
138 | { | ||
139 | return 0x600dc0de; | ||
140 | } | ||
141 | static inline u32 ctxsw_prog_main_image_priv_access_map_config_o(void) | ||
142 | { | ||
143 | return 0x000000a0; | ||
144 | } | ||
145 | static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(void) | ||
146 | { | ||
147 | return 0x0; | ||
148 | } | ||
149 | static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_allow_none_f(void) | ||
150 | { | ||
151 | return 0x1; | ||
152 | } | ||
153 | static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(void) | ||
154 | { | ||
155 | return 0x2; | ||
156 | } | ||
157 | static inline u32 ctxsw_prog_main_image_priv_access_map_addr_lo_o(void) | ||
158 | { | ||
159 | return 0x000000a4; | ||
160 | } | ||
161 | static inline u32 ctxsw_prog_main_image_priv_access_map_addr_hi_o(void) | ||
162 | { | ||
163 | return 0x000000a8; | ||
164 | } | ||
165 | static inline u32 ctxsw_prog_main_image_misc_options_o(void) | ||
166 | { | ||
167 | return 0x0000003c; | ||
168 | } | ||
169 | static inline u32 ctxsw_prog_main_image_misc_options_verif_features_m(void) | ||
170 | { | ||
171 | return 0x1 << 3; | ||
172 | } | ||
173 | static inline u32 ctxsw_prog_main_image_misc_options_verif_features_disabled_f(void) | ||
174 | { | ||
175 | return 0x0; | ||
176 | } | ||
177 | static inline u32 ctxsw_prog_main_image_misc_options_verif_features_enabled_f(void) | ||
178 | { | ||
179 | return 0x8; | ||
180 | } | ||
181 | static inline u32 ctxsw_prog_local_priv_register_ctl_o(void) | ||
182 | { | ||
183 | return 0x0000000c; | ||
184 | } | ||
185 | static inline u32 ctxsw_prog_local_priv_register_ctl_offset_v(u32 r) | ||
186 | { | ||
187 | return (r >> 0) & 0xffff; | ||
188 | } | ||
189 | static inline u32 ctxsw_prog_local_image_ppc_info_o(void) | ||
190 | { | ||
191 | return 0x000000f4; | ||
192 | } | ||
193 | static inline u32 ctxsw_prog_local_image_ppc_info_num_ppcs_v(u32 r) | ||
194 | { | ||
195 | return (r >> 0) & 0xffff; | ||
196 | } | ||
197 | static inline u32 ctxsw_prog_local_image_ppc_info_ppc_mask_v(u32 r) | ||
198 | { | ||
199 | return (r >> 16) & 0xffff; | ||
200 | } | ||
201 | static inline u32 ctxsw_prog_local_image_num_tpcs_o(void) | ||
202 | { | ||
203 | return 0x000000f8; | ||
204 | } | ||
205 | static inline u32 ctxsw_prog_local_magic_value_o(void) | ||
206 | { | ||
207 | return 0x000000fc; | ||
208 | } | ||
209 | static inline u32 ctxsw_prog_local_magic_value_v_value_v(void) | ||
210 | { | ||
211 | return 0xad0becab; | ||
212 | } | ||
213 | static inline u32 ctxsw_prog_main_extended_buffer_ctl_o(void) | ||
214 | { | ||
215 | return 0x000000ec; | ||
216 | } | ||
217 | static inline u32 ctxsw_prog_main_extended_buffer_ctl_offset_v(u32 r) | ||
218 | { | ||
219 | return (r >> 0) & 0xffff; | ||
220 | } | ||
221 | static inline u32 ctxsw_prog_main_extended_buffer_ctl_size_v(u32 r) | ||
222 | { | ||
223 | return (r >> 16) & 0xff; | ||
224 | } | ||
225 | static inline u32 ctxsw_prog_extended_buffer_segments_size_in_bytes_v(void) | ||
226 | { | ||
227 | return 0x00000100; | ||
228 | } | ||
229 | static inline u32 ctxsw_prog_extended_marker_size_in_bytes_v(void) | ||
230 | { | ||
231 | return 0x00000004; | ||
232 | } | ||
233 | static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(void) | ||
234 | { | ||
235 | return 0x00000005; | ||
236 | } | ||
237 | static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(void) | ||
238 | { | ||
239 | return 0x00000004; | ||
240 | } | ||
241 | static inline u32 ctxsw_prog_extended_num_smpc_quadrants_v(void) | ||
242 | { | ||
243 | return 0x00000004; | ||
244 | } | ||
245 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h new file mode 100644 index 00000000..b7edc29d --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h | |||
@@ -0,0 +1,213 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_fb_gk20a_h_ | ||
51 | #define _hw_fb_gk20a_h_ | ||
52 | |||
53 | static inline u32 fb_mmu_ctrl_r(void) | ||
54 | { | ||
55 | return 0x00100c80; | ||
56 | } | ||
57 | static inline u32 fb_mmu_ctrl_vm_pg_size_f(u32 v) | ||
58 | { | ||
59 | return (v & 0x1) << 0; | ||
60 | } | ||
61 | static inline u32 fb_mmu_ctrl_vm_pg_size_128kb_f(void) | ||
62 | { | ||
63 | return 0x0; | ||
64 | } | ||
65 | static inline u32 fb_mmu_ctrl_pri_fifo_empty_v(u32 r) | ||
66 | { | ||
67 | return (r >> 15) & 0x1; | ||
68 | } | ||
69 | static inline u32 fb_mmu_ctrl_pri_fifo_empty_false_f(void) | ||
70 | { | ||
71 | return 0x0; | ||
72 | } | ||
73 | static inline u32 fb_mmu_ctrl_pri_fifo_space_v(u32 r) | ||
74 | { | ||
75 | return (r >> 16) & 0xff; | ||
76 | } | ||
77 | static inline u32 fb_mmu_invalidate_pdb_r(void) | ||
78 | { | ||
79 | return 0x00100cb8; | ||
80 | } | ||
81 | static inline u32 fb_mmu_invalidate_pdb_aperture_vid_mem_f(void) | ||
82 | { | ||
83 | return 0x0; | ||
84 | } | ||
85 | static inline u32 fb_mmu_invalidate_pdb_addr_f(u32 v) | ||
86 | { | ||
87 | return (v & 0xfffffff) << 4; | ||
88 | } | ||
89 | static inline u32 fb_mmu_invalidate_r(void) | ||
90 | { | ||
91 | return 0x00100cbc; | ||
92 | } | ||
93 | static inline u32 fb_mmu_invalidate_all_va_true_f(void) | ||
94 | { | ||
95 | return 0x1; | ||
96 | } | ||
97 | static inline u32 fb_mmu_invalidate_all_pdb_true_f(void) | ||
98 | { | ||
99 | return 0x2; | ||
100 | } | ||
101 | static inline u32 fb_mmu_invalidate_trigger_s(void) | ||
102 | { | ||
103 | return 1; | ||
104 | } | ||
105 | static inline u32 fb_mmu_invalidate_trigger_f(u32 v) | ||
106 | { | ||
107 | return (v & 0x1) << 31; | ||
108 | } | ||
109 | static inline u32 fb_mmu_invalidate_trigger_m(void) | ||
110 | { | ||
111 | return 0x1 << 31; | ||
112 | } | ||
113 | static inline u32 fb_mmu_invalidate_trigger_v(u32 r) | ||
114 | { | ||
115 | return (r >> 31) & 0x1; | ||
116 | } | ||
117 | static inline u32 fb_mmu_invalidate_trigger_true_f(void) | ||
118 | { | ||
119 | return 0x80000000; | ||
120 | } | ||
121 | static inline u32 fb_mmu_debug_wr_r(void) | ||
122 | { | ||
123 | return 0x00100cc8; | ||
124 | } | ||
125 | static inline u32 fb_mmu_debug_wr_aperture_s(void) | ||
126 | { | ||
127 | return 2; | ||
128 | } | ||
129 | static inline u32 fb_mmu_debug_wr_aperture_f(u32 v) | ||
130 | { | ||
131 | return (v & 0x3) << 0; | ||
132 | } | ||
133 | static inline u32 fb_mmu_debug_wr_aperture_m(void) | ||
134 | { | ||
135 | return 0x3 << 0; | ||
136 | } | ||
137 | static inline u32 fb_mmu_debug_wr_aperture_v(u32 r) | ||
138 | { | ||
139 | return (r >> 0) & 0x3; | ||
140 | } | ||
141 | static inline u32 fb_mmu_debug_wr_aperture_vid_mem_f(void) | ||
142 | { | ||
143 | return 0x0; | ||
144 | } | ||
145 | static inline u32 fb_mmu_debug_wr_vol_false_f(void) | ||
146 | { | ||
147 | return 0x0; | ||
148 | } | ||
149 | static inline u32 fb_mmu_debug_wr_vol_true_v(void) | ||
150 | { | ||
151 | return 0x00000001; | ||
152 | } | ||
153 | static inline u32 fb_mmu_debug_wr_vol_true_f(void) | ||
154 | { | ||
155 | return 0x4; | ||
156 | } | ||
157 | static inline u32 fb_mmu_debug_wr_addr_v(u32 r) | ||
158 | { | ||
159 | return (r >> 4) & 0xfffffff; | ||
160 | } | ||
161 | static inline u32 fb_mmu_debug_wr_addr_alignment_v(void) | ||
162 | { | ||
163 | return 0x0000000c; | ||
164 | } | ||
165 | static inline u32 fb_mmu_debug_rd_r(void) | ||
166 | { | ||
167 | return 0x00100ccc; | ||
168 | } | ||
169 | static inline u32 fb_mmu_debug_rd_aperture_vid_mem_f(void) | ||
170 | { | ||
171 | return 0x0; | ||
172 | } | ||
173 | static inline u32 fb_mmu_debug_rd_vol_false_f(void) | ||
174 | { | ||
175 | return 0x0; | ||
176 | } | ||
177 | static inline u32 fb_mmu_debug_rd_addr_v(u32 r) | ||
178 | { | ||
179 | return (r >> 4) & 0xfffffff; | ||
180 | } | ||
181 | static inline u32 fb_mmu_debug_rd_addr_alignment_v(void) | ||
182 | { | ||
183 | return 0x0000000c; | ||
184 | } | ||
185 | static inline u32 fb_mmu_debug_ctrl_r(void) | ||
186 | { | ||
187 | return 0x00100cc4; | ||
188 | } | ||
189 | static inline u32 fb_mmu_debug_ctrl_debug_v(u32 r) | ||
190 | { | ||
191 | return (r >> 16) & 0x1; | ||
192 | } | ||
193 | static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void) | ||
194 | { | ||
195 | return 0x00000001; | ||
196 | } | ||
197 | static inline u32 fb_mmu_vpr_info_r(void) | ||
198 | { | ||
199 | return 0x00100cd0; | ||
200 | } | ||
201 | static inline u32 fb_mmu_vpr_info_fetch_v(u32 r) | ||
202 | { | ||
203 | return (r >> 2) & 0x1; | ||
204 | } | ||
205 | static inline u32 fb_mmu_vpr_info_fetch_false_v(void) | ||
206 | { | ||
207 | return 0x00000000; | ||
208 | } | ||
209 | static inline u32 fb_mmu_vpr_info_fetch_true_v(void) | ||
210 | { | ||
211 | return 0x00000001; | ||
212 | } | ||
213 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h new file mode 100644 index 00000000..a39d3c51 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h | |||
@@ -0,0 +1,565 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_fifo_gk20a_h_ | ||
51 | #define _hw_fifo_gk20a_h_ | ||
52 | |||
53 | static inline u32 fifo_bar1_base_r(void) | ||
54 | { | ||
55 | return 0x00002254; | ||
56 | } | ||
57 | static inline u32 fifo_bar1_base_ptr_f(u32 v) | ||
58 | { | ||
59 | return (v & 0xfffffff) << 0; | ||
60 | } | ||
61 | static inline u32 fifo_bar1_base_ptr_align_shift_v(void) | ||
62 | { | ||
63 | return 0x0000000c; | ||
64 | } | ||
65 | static inline u32 fifo_bar1_base_valid_false_f(void) | ||
66 | { | ||
67 | return 0x0; | ||
68 | } | ||
69 | static inline u32 fifo_bar1_base_valid_true_f(void) | ||
70 | { | ||
71 | return 0x10000000; | ||
72 | } | ||
73 | static inline u32 fifo_runlist_base_r(void) | ||
74 | { | ||
75 | return 0x00002270; | ||
76 | } | ||
77 | static inline u32 fifo_runlist_base_ptr_f(u32 v) | ||
78 | { | ||
79 | return (v & 0xfffffff) << 0; | ||
80 | } | ||
81 | static inline u32 fifo_runlist_base_target_vid_mem_f(void) | ||
82 | { | ||
83 | return 0x0; | ||
84 | } | ||
85 | static inline u32 fifo_runlist_r(void) | ||
86 | { | ||
87 | return 0x00002274; | ||
88 | } | ||
89 | static inline u32 fifo_runlist_engine_f(u32 v) | ||
90 | { | ||
91 | return (v & 0xf) << 20; | ||
92 | } | ||
93 | static inline u32 fifo_eng_runlist_base_r(u32 i) | ||
94 | { | ||
95 | return 0x00002280 + i*8; | ||
96 | } | ||
97 | static inline u32 fifo_eng_runlist_base__size_1_v(void) | ||
98 | { | ||
99 | return 0x00000001; | ||
100 | } | ||
101 | static inline u32 fifo_eng_runlist_r(u32 i) | ||
102 | { | ||
103 | return 0x00002284 + i*8; | ||
104 | } | ||
105 | static inline u32 fifo_eng_runlist__size_1_v(void) | ||
106 | { | ||
107 | return 0x00000001; | ||
108 | } | ||
109 | static inline u32 fifo_eng_runlist_length_f(u32 v) | ||
110 | { | ||
111 | return (v & 0xffff) << 0; | ||
112 | } | ||
113 | static inline u32 fifo_eng_runlist_pending_true_f(void) | ||
114 | { | ||
115 | return 0x100000; | ||
116 | } | ||
117 | static inline u32 fifo_eng_timeslice_r(u32 i) | ||
118 | { | ||
119 | return 0x00002310 + i*4; | ||
120 | } | ||
121 | static inline u32 fifo_eng_timeslice_timeout_128_f(void) | ||
122 | { | ||
123 | return 0x80; | ||
124 | } | ||
125 | static inline u32 fifo_eng_timeslice_timescale_3_f(void) | ||
126 | { | ||
127 | return 0x3000; | ||
128 | } | ||
129 | static inline u32 fifo_eng_timeslice_enable_true_f(void) | ||
130 | { | ||
131 | return 0x10000000; | ||
132 | } | ||
133 | static inline u32 fifo_pb_timeslice_r(u32 i) | ||
134 | { | ||
135 | return 0x00002350 + i*4; | ||
136 | } | ||
137 | static inline u32 fifo_pb_timeslice_timeout_16_f(void) | ||
138 | { | ||
139 | return 0x10; | ||
140 | } | ||
141 | static inline u32 fifo_pb_timeslice_timescale_0_f(void) | ||
142 | { | ||
143 | return 0x0; | ||
144 | } | ||
145 | static inline u32 fifo_pb_timeslice_enable_true_f(void) | ||
146 | { | ||
147 | return 0x10000000; | ||
148 | } | ||
149 | static inline u32 fifo_pbdma_map_r(u32 i) | ||
150 | { | ||
151 | return 0x00002390 + i*4; | ||
152 | } | ||
153 | static inline u32 fifo_intr_0_r(void) | ||
154 | { | ||
155 | return 0x00002100; | ||
156 | } | ||
157 | static inline u32 fifo_intr_0_bind_error_pending_f(void) | ||
158 | { | ||
159 | return 0x1; | ||
160 | } | ||
161 | static inline u32 fifo_intr_0_bind_error_reset_f(void) | ||
162 | { | ||
163 | return 0x1; | ||
164 | } | ||
165 | static inline u32 fifo_intr_0_pio_error_pending_f(void) | ||
166 | { | ||
167 | return 0x10; | ||
168 | } | ||
169 | static inline u32 fifo_intr_0_pio_error_reset_f(void) | ||
170 | { | ||
171 | return 0x10; | ||
172 | } | ||
173 | static inline u32 fifo_intr_0_sched_error_pending_f(void) | ||
174 | { | ||
175 | return 0x100; | ||
176 | } | ||
177 | static inline u32 fifo_intr_0_sched_error_reset_f(void) | ||
178 | { | ||
179 | return 0x100; | ||
180 | } | ||
181 | static inline u32 fifo_intr_0_chsw_error_pending_f(void) | ||
182 | { | ||
183 | return 0x10000; | ||
184 | } | ||
185 | static inline u32 fifo_intr_0_chsw_error_reset_f(void) | ||
186 | { | ||
187 | return 0x10000; | ||
188 | } | ||
189 | static inline u32 fifo_intr_0_fb_flush_timeout_pending_f(void) | ||
190 | { | ||
191 | return 0x800000; | ||
192 | } | ||
193 | static inline u32 fifo_intr_0_fb_flush_timeout_reset_f(void) | ||
194 | { | ||
195 | return 0x800000; | ||
196 | } | ||
197 | static inline u32 fifo_intr_0_lb_error_pending_f(void) | ||
198 | { | ||
199 | return 0x1000000; | ||
200 | } | ||
201 | static inline u32 fifo_intr_0_lb_error_reset_f(void) | ||
202 | { | ||
203 | return 0x1000000; | ||
204 | } | ||
205 | static inline u32 fifo_intr_0_dropped_mmu_fault_pending_f(void) | ||
206 | { | ||
207 | return 0x8000000; | ||
208 | } | ||
209 | static inline u32 fifo_intr_0_dropped_mmu_fault_reset_f(void) | ||
210 | { | ||
211 | return 0x8000000; | ||
212 | } | ||
213 | static inline u32 fifo_intr_0_mmu_fault_pending_f(void) | ||
214 | { | ||
215 | return 0x10000000; | ||
216 | } | ||
217 | static inline u32 fifo_intr_0_pbdma_intr_pending_f(void) | ||
218 | { | ||
219 | return 0x20000000; | ||
220 | } | ||
221 | static inline u32 fifo_intr_0_runlist_event_pending_f(void) | ||
222 | { | ||
223 | return 0x40000000; | ||
224 | } | ||
225 | static inline u32 fifo_intr_0_channel_intr_pending_f(void) | ||
226 | { | ||
227 | return 0x80000000; | ||
228 | } | ||
229 | static inline u32 fifo_intr_en_0_r(void) | ||
230 | { | ||
231 | return 0x00002140; | ||
232 | } | ||
233 | static inline u32 fifo_intr_en_1_r(void) | ||
234 | { | ||
235 | return 0x00002528; | ||
236 | } | ||
237 | static inline u32 fifo_intr_bind_error_r(void) | ||
238 | { | ||
239 | return 0x0000252c; | ||
240 | } | ||
241 | static inline u32 fifo_intr_sched_error_r(void) | ||
242 | { | ||
243 | return 0x0000254c; | ||
244 | } | ||
245 | static inline u32 fifo_intr_sched_error_code_f(u32 v) | ||
246 | { | ||
247 | return (v & 0xff) << 0; | ||
248 | } | ||
249 | static inline u32 fifo_intr_sched_error_code_ctxsw_timeout_v(void) | ||
250 | { | ||
251 | return 0x0000000a; | ||
252 | } | ||
253 | static inline u32 fifo_intr_chsw_error_r(void) | ||
254 | { | ||
255 | return 0x0000256c; | ||
256 | } | ||
257 | static inline u32 fifo_intr_mmu_fault_id_r(void) | ||
258 | { | ||
259 | return 0x0000259c; | ||
260 | } | ||
261 | static inline u32 fifo_intr_mmu_fault_eng_id_graphics_v(void) | ||
262 | { | ||
263 | return 0x00000000; | ||
264 | } | ||
265 | static inline u32 fifo_intr_mmu_fault_eng_id_graphics_f(void) | ||
266 | { | ||
267 | return 0x0; | ||
268 | } | ||
269 | static inline u32 fifo_intr_mmu_fault_inst_r(u32 i) | ||
270 | { | ||
271 | return 0x00002800 + i*16; | ||
272 | } | ||
273 | static inline u32 fifo_intr_mmu_fault_inst_ptr_v(u32 r) | ||
274 | { | ||
275 | return (r >> 0) & 0xfffffff; | ||
276 | } | ||
277 | static inline u32 fifo_intr_mmu_fault_inst_ptr_align_shift_v(void) | ||
278 | { | ||
279 | return 0x0000000c; | ||
280 | } | ||
281 | static inline u32 fifo_intr_mmu_fault_lo_r(u32 i) | ||
282 | { | ||
283 | return 0x00002804 + i*16; | ||
284 | } | ||
285 | static inline u32 fifo_intr_mmu_fault_hi_r(u32 i) | ||
286 | { | ||
287 | return 0x00002808 + i*16; | ||
288 | } | ||
289 | static inline u32 fifo_intr_mmu_fault_info_r(u32 i) | ||
290 | { | ||
291 | return 0x0000280c + i*16; | ||
292 | } | ||
293 | static inline u32 fifo_intr_mmu_fault_info_type_v(u32 r) | ||
294 | { | ||
295 | return (r >> 0) & 0xf; | ||
296 | } | ||
297 | static inline u32 fifo_intr_mmu_fault_info_engine_subid_v(u32 r) | ||
298 | { | ||
299 | return (r >> 6) & 0x1; | ||
300 | } | ||
301 | static inline u32 fifo_intr_mmu_fault_info_engine_subid_gpc_v(void) | ||
302 | { | ||
303 | return 0x00000000; | ||
304 | } | ||
305 | static inline u32 fifo_intr_mmu_fault_info_engine_subid_hub_v(void) | ||
306 | { | ||
307 | return 0x00000001; | ||
308 | } | ||
309 | static inline u32 fifo_intr_mmu_fault_info_client_v(u32 r) | ||
310 | { | ||
311 | return (r >> 8) & 0x1f; | ||
312 | } | ||
313 | static inline u32 fifo_intr_pbdma_id_r(void) | ||
314 | { | ||
315 | return 0x000025a0; | ||
316 | } | ||
317 | static inline u32 fifo_intr_pbdma_id_status_f(u32 v, u32 i) | ||
318 | { | ||
319 | return (v & 0x1) << (0 + i*1); | ||
320 | } | ||
321 | static inline u32 fifo_intr_pbdma_id_status__size_1_v(void) | ||
322 | { | ||
323 | return 0x00000001; | ||
324 | } | ||
325 | static inline u32 fifo_intr_runlist_r(void) | ||
326 | { | ||
327 | return 0x00002a00; | ||
328 | } | ||
329 | static inline u32 fifo_fb_timeout_r(void) | ||
330 | { | ||
331 | return 0x00002a04; | ||
332 | } | ||
333 | static inline u32 fifo_fb_timeout_period_m(void) | ||
334 | { | ||
335 | return 0x3fffffff << 0; | ||
336 | } | ||
337 | static inline u32 fifo_fb_timeout_period_max_f(void) | ||
338 | { | ||
339 | return 0x3fffffff; | ||
340 | } | ||
341 | static inline u32 fifo_pb_timeout_r(void) | ||
342 | { | ||
343 | return 0x00002a08; | ||
344 | } | ||
345 | static inline u32 fifo_pb_timeout_detection_enabled_f(void) | ||
346 | { | ||
347 | return 0x80000000; | ||
348 | } | ||
349 | static inline u32 fifo_eng_timeout_r(void) | ||
350 | { | ||
351 | return 0x00002a0c; | ||
352 | } | ||
353 | static inline u32 fifo_eng_timeout_period_m(void) | ||
354 | { | ||
355 | return 0x7fffffff << 0; | ||
356 | } | ||
357 | static inline u32 fifo_eng_timeout_period_max_f(void) | ||
358 | { | ||
359 | return 0x7fffffff; | ||
360 | } | ||
361 | static inline u32 fifo_eng_timeout_detection_m(void) | ||
362 | { | ||
363 | return 0x1 << 31; | ||
364 | } | ||
365 | static inline u32 fifo_eng_timeout_detection_enabled_f(void) | ||
366 | { | ||
367 | return 0x80000000; | ||
368 | } | ||
369 | static inline u32 fifo_eng_timeout_detection_disabled_f(void) | ||
370 | { | ||
371 | return 0x0; | ||
372 | } | ||
373 | static inline u32 fifo_error_sched_disable_r(void) | ||
374 | { | ||
375 | return 0x0000262c; | ||
376 | } | ||
377 | static inline u32 fifo_sched_disable_r(void) | ||
378 | { | ||
379 | return 0x00002630; | ||
380 | } | ||
381 | static inline u32 fifo_sched_disable_runlist_f(u32 v, u32 i) | ||
382 | { | ||
383 | return (v & 0x1) << (0 + i*1); | ||
384 | } | ||
385 | static inline u32 fifo_sched_disable_runlist_m(u32 i) | ||
386 | { | ||
387 | return 0x1 << (0 + i*1); | ||
388 | } | ||
389 | static inline u32 fifo_sched_disable_true_v(void) | ||
390 | { | ||
391 | return 0x00000001; | ||
392 | } | ||
393 | static inline u32 fifo_preempt_r(void) | ||
394 | { | ||
395 | return 0x00002634; | ||
396 | } | ||
397 | static inline u32 fifo_preempt_pending_true_f(void) | ||
398 | { | ||
399 | return 0x100000; | ||
400 | } | ||
401 | static inline u32 fifo_preempt_type_channel_f(void) | ||
402 | { | ||
403 | return 0x0; | ||
404 | } | ||
405 | static inline u32 fifo_preempt_chid_f(u32 v) | ||
406 | { | ||
407 | return (v & 0xfff) << 0; | ||
408 | } | ||
409 | static inline u32 fifo_trigger_mmu_fault_r(u32 i) | ||
410 | { | ||
411 | return 0x00002a30 + i*4; | ||
412 | } | ||
413 | static inline u32 fifo_trigger_mmu_fault_id_f(u32 v) | ||
414 | { | ||
415 | return (v & 0x1f) << 0; | ||
416 | } | ||
417 | static inline u32 fifo_trigger_mmu_fault_enable_f(u32 v) | ||
418 | { | ||
419 | return (v & 0x1) << 8; | ||
420 | } | ||
421 | static inline u32 fifo_engine_status_r(u32 i) | ||
422 | { | ||
423 | return 0x00002640 + i*8; | ||
424 | } | ||
425 | static inline u32 fifo_engine_status__size_1_v(void) | ||
426 | { | ||
427 | return 0x00000002; | ||
428 | } | ||
429 | static inline u32 fifo_engine_status_id_v(u32 r) | ||
430 | { | ||
431 | return (r >> 0) & 0xfff; | ||
432 | } | ||
433 | static inline u32 fifo_engine_status_id_type_v(u32 r) | ||
434 | { | ||
435 | return (r >> 12) & 0x1; | ||
436 | } | ||
437 | static inline u32 fifo_engine_status_id_type_chid_v(void) | ||
438 | { | ||
439 | return 0x00000000; | ||
440 | } | ||
441 | static inline u32 fifo_engine_status_ctx_status_v(u32 r) | ||
442 | { | ||
443 | return (r >> 13) & 0x7; | ||
444 | } | ||
445 | static inline u32 fifo_engine_status_ctx_status_valid_v(void) | ||
446 | { | ||
447 | return 0x00000001; | ||
448 | } | ||
449 | static inline u32 fifo_engine_status_ctx_status_ctxsw_load_v(void) | ||
450 | { | ||
451 | return 0x00000005; | ||
452 | } | ||
453 | static inline u32 fifo_engine_status_ctx_status_ctxsw_save_v(void) | ||
454 | { | ||
455 | return 0x00000006; | ||
456 | } | ||
457 | static inline u32 fifo_engine_status_ctx_status_ctxsw_switch_v(void) | ||
458 | { | ||
459 | return 0x00000007; | ||
460 | } | ||
461 | static inline u32 fifo_engine_status_next_id_v(u32 r) | ||
462 | { | ||
463 | return (r >> 16) & 0xfff; | ||
464 | } | ||
465 | static inline u32 fifo_engine_status_next_id_type_v(u32 r) | ||
466 | { | ||
467 | return (r >> 28) & 0x1; | ||
468 | } | ||
469 | static inline u32 fifo_engine_status_next_id_type_chid_v(void) | ||
470 | { | ||
471 | return 0x00000000; | ||
472 | } | ||
473 | static inline u32 fifo_engine_status_faulted_v(u32 r) | ||
474 | { | ||
475 | return (r >> 30) & 0x1; | ||
476 | } | ||
477 | static inline u32 fifo_engine_status_faulted_true_v(void) | ||
478 | { | ||
479 | return 0x00000001; | ||
480 | } | ||
481 | static inline u32 fifo_engine_status_engine_v(u32 r) | ||
482 | { | ||
483 | return (r >> 31) & 0x1; | ||
484 | } | ||
485 | static inline u32 fifo_engine_status_engine_idle_v(void) | ||
486 | { | ||
487 | return 0x00000000; | ||
488 | } | ||
489 | static inline u32 fifo_engine_status_engine_busy_v(void) | ||
490 | { | ||
491 | return 0x00000001; | ||
492 | } | ||
493 | static inline u32 fifo_engine_status_ctxsw_v(u32 r) | ||
494 | { | ||
495 | return (r >> 15) & 0x1; | ||
496 | } | ||
497 | static inline u32 fifo_engine_status_ctxsw_in_progress_v(void) | ||
498 | { | ||
499 | return 0x00000001; | ||
500 | } | ||
501 | static inline u32 fifo_engine_status_ctxsw_in_progress_f(void) | ||
502 | { | ||
503 | return 0x8000; | ||
504 | } | ||
505 | static inline u32 fifo_pbdma_status_r(u32 i) | ||
506 | { | ||
507 | return 0x00003080 + i*4; | ||
508 | } | ||
509 | static inline u32 fifo_pbdma_status__size_1_v(void) | ||
510 | { | ||
511 | return 0x00000001; | ||
512 | } | ||
513 | static inline u32 fifo_pbdma_status_id_v(u32 r) | ||
514 | { | ||
515 | return (r >> 0) & 0xfff; | ||
516 | } | ||
517 | static inline u32 fifo_pbdma_status_id_type_v(u32 r) | ||
518 | { | ||
519 | return (r >> 12) & 0x1; | ||
520 | } | ||
521 | static inline u32 fifo_pbdma_status_id_type_chid_v(void) | ||
522 | { | ||
523 | return 0x00000000; | ||
524 | } | ||
525 | static inline u32 fifo_pbdma_status_chan_status_v(u32 r) | ||
526 | { | ||
527 | return (r >> 13) & 0x7; | ||
528 | } | ||
529 | static inline u32 fifo_pbdma_status_chan_status_valid_v(void) | ||
530 | { | ||
531 | return 0x00000001; | ||
532 | } | ||
533 | static inline u32 fifo_pbdma_status_chan_status_chsw_load_v(void) | ||
534 | { | ||
535 | return 0x00000005; | ||
536 | } | ||
537 | static inline u32 fifo_pbdma_status_chan_status_chsw_save_v(void) | ||
538 | { | ||
539 | return 0x00000006; | ||
540 | } | ||
541 | static inline u32 fifo_pbdma_status_chan_status_chsw_switch_v(void) | ||
542 | { | ||
543 | return 0x00000007; | ||
544 | } | ||
545 | static inline u32 fifo_pbdma_status_next_id_v(u32 r) | ||
546 | { | ||
547 | return (r >> 16) & 0xfff; | ||
548 | } | ||
549 | static inline u32 fifo_pbdma_status_next_id_type_v(u32 r) | ||
550 | { | ||
551 | return (r >> 28) & 0x1; | ||
552 | } | ||
553 | static inline u32 fifo_pbdma_status_next_id_type_chid_v(void) | ||
554 | { | ||
555 | return 0x00000000; | ||
556 | } | ||
557 | static inline u32 fifo_pbdma_status_chsw_v(u32 r) | ||
558 | { | ||
559 | return (r >> 15) & 0x1; | ||
560 | } | ||
561 | static inline u32 fifo_pbdma_status_chsw_in_progress_v(void) | ||
562 | { | ||
563 | return 0x00000001; | ||
564 | } | ||
565 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h new file mode 100644 index 00000000..0aeb11f9 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h | |||
@@ -0,0 +1,141 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_flush_gk20a_h_ | ||
51 | #define _hw_flush_gk20a_h_ | ||
52 | |||
53 | static inline u32 flush_l2_system_invalidate_r(void) | ||
54 | { | ||
55 | return 0x00070004; | ||
56 | } | ||
57 | static inline u32 flush_l2_system_invalidate_pending_v(u32 r) | ||
58 | { | ||
59 | return (r >> 0) & 0x1; | ||
60 | } | ||
61 | static inline u32 flush_l2_system_invalidate_pending_busy_v(void) | ||
62 | { | ||
63 | return 0x00000001; | ||
64 | } | ||
65 | static inline u32 flush_l2_system_invalidate_pending_busy_f(void) | ||
66 | { | ||
67 | return 0x1; | ||
68 | } | ||
69 | static inline u32 flush_l2_system_invalidate_outstanding_v(u32 r) | ||
70 | { | ||
71 | return (r >> 1) & 0x1; | ||
72 | } | ||
73 | static inline u32 flush_l2_system_invalidate_outstanding_true_v(void) | ||
74 | { | ||
75 | return 0x00000001; | ||
76 | } | ||
77 | static inline u32 flush_l2_flush_dirty_r(void) | ||
78 | { | ||
79 | return 0x00070010; | ||
80 | } | ||
81 | static inline u32 flush_l2_flush_dirty_pending_v(u32 r) | ||
82 | { | ||
83 | return (r >> 0) & 0x1; | ||
84 | } | ||
85 | static inline u32 flush_l2_flush_dirty_pending_empty_v(void) | ||
86 | { | ||
87 | return 0x00000000; | ||
88 | } | ||
89 | static inline u32 flush_l2_flush_dirty_pending_empty_f(void) | ||
90 | { | ||
91 | return 0x0; | ||
92 | } | ||
93 | static inline u32 flush_l2_flush_dirty_pending_busy_v(void) | ||
94 | { | ||
95 | return 0x00000001; | ||
96 | } | ||
97 | static inline u32 flush_l2_flush_dirty_pending_busy_f(void) | ||
98 | { | ||
99 | return 0x1; | ||
100 | } | ||
101 | static inline u32 flush_l2_flush_dirty_outstanding_v(u32 r) | ||
102 | { | ||
103 | return (r >> 1) & 0x1; | ||
104 | } | ||
105 | static inline u32 flush_l2_flush_dirty_outstanding_false_v(void) | ||
106 | { | ||
107 | return 0x00000000; | ||
108 | } | ||
109 | static inline u32 flush_l2_flush_dirty_outstanding_false_f(void) | ||
110 | { | ||
111 | return 0x0; | ||
112 | } | ||
113 | static inline u32 flush_l2_flush_dirty_outstanding_true_v(void) | ||
114 | { | ||
115 | return 0x00000001; | ||
116 | } | ||
117 | static inline u32 flush_fb_flush_r(void) | ||
118 | { | ||
119 | return 0x00070000; | ||
120 | } | ||
121 | static inline u32 flush_fb_flush_pending_v(u32 r) | ||
122 | { | ||
123 | return (r >> 0) & 0x1; | ||
124 | } | ||
125 | static inline u32 flush_fb_flush_pending_busy_v(void) | ||
126 | { | ||
127 | return 0x00000001; | ||
128 | } | ||
129 | static inline u32 flush_fb_flush_pending_busy_f(void) | ||
130 | { | ||
131 | return 0x1; | ||
132 | } | ||
133 | static inline u32 flush_fb_flush_outstanding_v(u32 r) | ||
134 | { | ||
135 | return (r >> 1) & 0x1; | ||
136 | } | ||
137 | static inline u32 flush_fb_flush_outstanding_true_v(void) | ||
138 | { | ||
139 | return 0x00000001; | ||
140 | } | ||
141 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h new file mode 100644 index 00000000..e0118946 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h | |||
@@ -0,0 +1,1141 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_gmmu_gk20a_h_ | ||
51 | #define _hw_gmmu_gk20a_h_ | ||
52 | |||
53 | static inline u32 gmmu_pde_aperture_big_w(void) | ||
54 | { | ||
55 | return 0; | ||
56 | } | ||
57 | static inline u32 gmmu_pde_aperture_big_invalid_f(void) | ||
58 | { | ||
59 | return 0x0; | ||
60 | } | ||
61 | static inline u32 gmmu_pde_aperture_big_video_memory_f(void) | ||
62 | { | ||
63 | return 0x1; | ||
64 | } | ||
65 | static inline u32 gmmu_pde_size_w(void) | ||
66 | { | ||
67 | return 0; | ||
68 | } | ||
69 | static inline u32 gmmu_pde_size_full_f(void) | ||
70 | { | ||
71 | return 0x0; | ||
72 | } | ||
73 | static inline u32 gmmu_pde_address_big_sys_f(u32 v) | ||
74 | { | ||
75 | return (v & 0xfffffff) << 4; | ||
76 | } | ||
77 | static inline u32 gmmu_pde_address_big_sys_w(void) | ||
78 | { | ||
79 | return 0; | ||
80 | } | ||
81 | static inline u32 gmmu_pde_aperture_small_w(void) | ||
82 | { | ||
83 | return 1; | ||
84 | } | ||
85 | static inline u32 gmmu_pde_aperture_small_invalid_f(void) | ||
86 | { | ||
87 | return 0x0; | ||
88 | } | ||
89 | static inline u32 gmmu_pde_aperture_small_video_memory_f(void) | ||
90 | { | ||
91 | return 0x1; | ||
92 | } | ||
93 | static inline u32 gmmu_pde_vol_small_w(void) | ||
94 | { | ||
95 | return 1; | ||
96 | } | ||
97 | static inline u32 gmmu_pde_vol_small_true_f(void) | ||
98 | { | ||
99 | return 0x4; | ||
100 | } | ||
101 | static inline u32 gmmu_pde_vol_small_false_f(void) | ||
102 | { | ||
103 | return 0x0; | ||
104 | } | ||
105 | static inline u32 gmmu_pde_vol_big_w(void) | ||
106 | { | ||
107 | return 1; | ||
108 | } | ||
109 | static inline u32 gmmu_pde_vol_big_true_f(void) | ||
110 | { | ||
111 | return 0x8; | ||
112 | } | ||
113 | static inline u32 gmmu_pde_vol_big_false_f(void) | ||
114 | { | ||
115 | return 0x0; | ||
116 | } | ||
117 | static inline u32 gmmu_pde_address_small_sys_f(u32 v) | ||
118 | { | ||
119 | return (v & 0xfffffff) << 4; | ||
120 | } | ||
121 | static inline u32 gmmu_pde_address_small_sys_w(void) | ||
122 | { | ||
123 | return 1; | ||
124 | } | ||
125 | static inline u32 gmmu_pde_address_shift_v(void) | ||
126 | { | ||
127 | return 0x0000000c; | ||
128 | } | ||
129 | static inline u32 gmmu_pde__size_v(void) | ||
130 | { | ||
131 | return 0x00000008; | ||
132 | } | ||
133 | static inline u32 gmmu_pte__size_v(void) | ||
134 | { | ||
135 | return 0x00000008; | ||
136 | } | ||
137 | static inline u32 gmmu_pte_valid_w(void) | ||
138 | { | ||
139 | return 0; | ||
140 | } | ||
141 | static inline u32 gmmu_pte_valid_true_f(void) | ||
142 | { | ||
143 | return 0x1; | ||
144 | } | ||
145 | static inline u32 gmmu_pte_address_sys_f(u32 v) | ||
146 | { | ||
147 | return (v & 0xfffffff) << 4; | ||
148 | } | ||
149 | static inline u32 gmmu_pte_address_sys_w(void) | ||
150 | { | ||
151 | return 0; | ||
152 | } | ||
153 | static inline u32 gmmu_pte_vol_w(void) | ||
154 | { | ||
155 | return 1; | ||
156 | } | ||
157 | static inline u32 gmmu_pte_vol_true_f(void) | ||
158 | { | ||
159 | return 0x1; | ||
160 | } | ||
161 | static inline u32 gmmu_pte_vol_false_f(void) | ||
162 | { | ||
163 | return 0x0; | ||
164 | } | ||
165 | static inline u32 gmmu_pte_aperture_w(void) | ||
166 | { | ||
167 | return 1; | ||
168 | } | ||
169 | static inline u32 gmmu_pte_aperture_video_memory_f(void) | ||
170 | { | ||
171 | return 0x0; | ||
172 | } | ||
173 | static inline u32 gmmu_pte_read_only_w(void) | ||
174 | { | ||
175 | return 0; | ||
176 | } | ||
177 | static inline u32 gmmu_pte_read_only_true_f(void) | ||
178 | { | ||
179 | return 0x4; | ||
180 | } | ||
181 | static inline u32 gmmu_pte_write_disable_w(void) | ||
182 | { | ||
183 | return 1; | ||
184 | } | ||
185 | static inline u32 gmmu_pte_write_disable_true_f(void) | ||
186 | { | ||
187 | return 0x80000000; | ||
188 | } | ||
189 | static inline u32 gmmu_pte_read_disable_w(void) | ||
190 | { | ||
191 | return 1; | ||
192 | } | ||
193 | static inline u32 gmmu_pte_read_disable_true_f(void) | ||
194 | { | ||
195 | return 0x40000000; | ||
196 | } | ||
197 | static inline u32 gmmu_pte_comptagline_f(u32 v) | ||
198 | { | ||
199 | return (v & 0x1ffff) << 12; | ||
200 | } | ||
201 | static inline u32 gmmu_pte_comptagline_w(void) | ||
202 | { | ||
203 | return 1; | ||
204 | } | ||
205 | static inline u32 gmmu_pte_address_shift_v(void) | ||
206 | { | ||
207 | return 0x0000000c; | ||
208 | } | ||
209 | static inline u32 gmmu_pte_kind_f(u32 v) | ||
210 | { | ||
211 | return (v & 0xff) << 4; | ||
212 | } | ||
213 | static inline u32 gmmu_pte_kind_w(void) | ||
214 | { | ||
215 | return 1; | ||
216 | } | ||
217 | static inline u32 gmmu_pte_kind_invalid_v(void) | ||
218 | { | ||
219 | return 0x000000ff; | ||
220 | } | ||
221 | static inline u32 gmmu_pte_kind_pitch_v(void) | ||
222 | { | ||
223 | return 0x00000000; | ||
224 | } | ||
225 | static inline u32 gmmu_pte_kind_z16_v(void) | ||
226 | { | ||
227 | return 0x00000001; | ||
228 | } | ||
229 | static inline u32 gmmu_pte_kind_z16_2c_v(void) | ||
230 | { | ||
231 | return 0x00000002; | ||
232 | } | ||
233 | static inline u32 gmmu_pte_kind_z16_ms2_2c_v(void) | ||
234 | { | ||
235 | return 0x00000003; | ||
236 | } | ||
237 | static inline u32 gmmu_pte_kind_z16_ms4_2c_v(void) | ||
238 | { | ||
239 | return 0x00000004; | ||
240 | } | ||
241 | static inline u32 gmmu_pte_kind_z16_ms8_2c_v(void) | ||
242 | { | ||
243 | return 0x00000005; | ||
244 | } | ||
245 | static inline u32 gmmu_pte_kind_z16_ms16_2c_v(void) | ||
246 | { | ||
247 | return 0x00000006; | ||
248 | } | ||
249 | static inline u32 gmmu_pte_kind_z16_2z_v(void) | ||
250 | { | ||
251 | return 0x00000007; | ||
252 | } | ||
253 | static inline u32 gmmu_pte_kind_z16_ms2_2z_v(void) | ||
254 | { | ||
255 | return 0x00000008; | ||
256 | } | ||
257 | static inline u32 gmmu_pte_kind_z16_ms4_2z_v(void) | ||
258 | { | ||
259 | return 0x00000009; | ||
260 | } | ||
261 | static inline u32 gmmu_pte_kind_z16_ms8_2z_v(void) | ||
262 | { | ||
263 | return 0x0000000a; | ||
264 | } | ||
265 | static inline u32 gmmu_pte_kind_z16_ms16_2z_v(void) | ||
266 | { | ||
267 | return 0x0000000b; | ||
268 | } | ||
269 | static inline u32 gmmu_pte_kind_z16_4cz_v(void) | ||
270 | { | ||
271 | return 0x0000000c; | ||
272 | } | ||
273 | static inline u32 gmmu_pte_kind_z16_ms2_4cz_v(void) | ||
274 | { | ||
275 | return 0x0000000d; | ||
276 | } | ||
277 | static inline u32 gmmu_pte_kind_z16_ms4_4cz_v(void) | ||
278 | { | ||
279 | return 0x0000000e; | ||
280 | } | ||
281 | static inline u32 gmmu_pte_kind_z16_ms8_4cz_v(void) | ||
282 | { | ||
283 | return 0x0000000f; | ||
284 | } | ||
285 | static inline u32 gmmu_pte_kind_z16_ms16_4cz_v(void) | ||
286 | { | ||
287 | return 0x00000010; | ||
288 | } | ||
289 | static inline u32 gmmu_pte_kind_s8z24_v(void) | ||
290 | { | ||
291 | return 0x00000011; | ||
292 | } | ||
293 | static inline u32 gmmu_pte_kind_s8z24_1z_v(void) | ||
294 | { | ||
295 | return 0x00000012; | ||
296 | } | ||
297 | static inline u32 gmmu_pte_kind_s8z24_ms2_1z_v(void) | ||
298 | { | ||
299 | return 0x00000013; | ||
300 | } | ||
301 | static inline u32 gmmu_pte_kind_s8z24_ms4_1z_v(void) | ||
302 | { | ||
303 | return 0x00000014; | ||
304 | } | ||
305 | static inline u32 gmmu_pte_kind_s8z24_ms8_1z_v(void) | ||
306 | { | ||
307 | return 0x00000015; | ||
308 | } | ||
309 | static inline u32 gmmu_pte_kind_s8z24_ms16_1z_v(void) | ||
310 | { | ||
311 | return 0x00000016; | ||
312 | } | ||
313 | static inline u32 gmmu_pte_kind_s8z24_2cz_v(void) | ||
314 | { | ||
315 | return 0x00000017; | ||
316 | } | ||
317 | static inline u32 gmmu_pte_kind_s8z24_ms2_2cz_v(void) | ||
318 | { | ||
319 | return 0x00000018; | ||
320 | } | ||
321 | static inline u32 gmmu_pte_kind_s8z24_ms4_2cz_v(void) | ||
322 | { | ||
323 | return 0x00000019; | ||
324 | } | ||
325 | static inline u32 gmmu_pte_kind_s8z24_ms8_2cz_v(void) | ||
326 | { | ||
327 | return 0x0000001a; | ||
328 | } | ||
329 | static inline u32 gmmu_pte_kind_s8z24_ms16_2cz_v(void) | ||
330 | { | ||
331 | return 0x0000001b; | ||
332 | } | ||
333 | static inline u32 gmmu_pte_kind_s8z24_2cs_v(void) | ||
334 | { | ||
335 | return 0x0000001c; | ||
336 | } | ||
337 | static inline u32 gmmu_pte_kind_s8z24_ms2_2cs_v(void) | ||
338 | { | ||
339 | return 0x0000001d; | ||
340 | } | ||
341 | static inline u32 gmmu_pte_kind_s8z24_ms4_2cs_v(void) | ||
342 | { | ||
343 | return 0x0000001e; | ||
344 | } | ||
345 | static inline u32 gmmu_pte_kind_s8z24_ms8_2cs_v(void) | ||
346 | { | ||
347 | return 0x0000001f; | ||
348 | } | ||
349 | static inline u32 gmmu_pte_kind_s8z24_ms16_2cs_v(void) | ||
350 | { | ||
351 | return 0x00000020; | ||
352 | } | ||
353 | static inline u32 gmmu_pte_kind_s8z24_4cszv_v(void) | ||
354 | { | ||
355 | return 0x00000021; | ||
356 | } | ||
357 | static inline u32 gmmu_pte_kind_s8z24_ms2_4cszv_v(void) | ||
358 | { | ||
359 | return 0x00000022; | ||
360 | } | ||
361 | static inline u32 gmmu_pte_kind_s8z24_ms4_4cszv_v(void) | ||
362 | { | ||
363 | return 0x00000023; | ||
364 | } | ||
365 | static inline u32 gmmu_pte_kind_s8z24_ms8_4cszv_v(void) | ||
366 | { | ||
367 | return 0x00000024; | ||
368 | } | ||
369 | static inline u32 gmmu_pte_kind_s8z24_ms16_4cszv_v(void) | ||
370 | { | ||
371 | return 0x00000025; | ||
372 | } | ||
373 | static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_v(void) | ||
374 | { | ||
375 | return 0x00000026; | ||
376 | } | ||
377 | static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_v(void) | ||
378 | { | ||
379 | return 0x00000027; | ||
380 | } | ||
381 | static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_v(void) | ||
382 | { | ||
383 | return 0x00000028; | ||
384 | } | ||
385 | static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_v(void) | ||
386 | { | ||
387 | return 0x00000029; | ||
388 | } | ||
389 | static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_1zv_v(void) | ||
390 | { | ||
391 | return 0x0000002e; | ||
392 | } | ||
393 | static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_1zv_v(void) | ||
394 | { | ||
395 | return 0x0000002f; | ||
396 | } | ||
397 | static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_1zv_v(void) | ||
398 | { | ||
399 | return 0x00000030; | ||
400 | } | ||
401 | static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_1zv_v(void) | ||
402 | { | ||
403 | return 0x00000031; | ||
404 | } | ||
405 | static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2cs_v(void) | ||
406 | { | ||
407 | return 0x00000032; | ||
408 | } | ||
409 | static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2cs_v(void) | ||
410 | { | ||
411 | return 0x00000033; | ||
412 | } | ||
413 | static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2cs_v(void) | ||
414 | { | ||
415 | return 0x00000034; | ||
416 | } | ||
417 | static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2cs_v(void) | ||
418 | { | ||
419 | return 0x00000035; | ||
420 | } | ||
421 | static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2czv_v(void) | ||
422 | { | ||
423 | return 0x0000003a; | ||
424 | } | ||
425 | static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2czv_v(void) | ||
426 | { | ||
427 | return 0x0000003b; | ||
428 | } | ||
429 | static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2czv_v(void) | ||
430 | { | ||
431 | return 0x0000003c; | ||
432 | } | ||
433 | static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2czv_v(void) | ||
434 | { | ||
435 | return 0x0000003d; | ||
436 | } | ||
437 | static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2zv_v(void) | ||
438 | { | ||
439 | return 0x0000003e; | ||
440 | } | ||
441 | static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2zv_v(void) | ||
442 | { | ||
443 | return 0x0000003f; | ||
444 | } | ||
445 | static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2zv_v(void) | ||
446 | { | ||
447 | return 0x00000040; | ||
448 | } | ||
449 | static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2zv_v(void) | ||
450 | { | ||
451 | return 0x00000041; | ||
452 | } | ||
453 | static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_4cszv_v(void) | ||
454 | { | ||
455 | return 0x00000042; | ||
456 | } | ||
457 | static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_4cszv_v(void) | ||
458 | { | ||
459 | return 0x00000043; | ||
460 | } | ||
461 | static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_4cszv_v(void) | ||
462 | { | ||
463 | return 0x00000044; | ||
464 | } | ||
465 | static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_4cszv_v(void) | ||
466 | { | ||
467 | return 0x00000045; | ||
468 | } | ||
469 | static inline u32 gmmu_pte_kind_z24s8_v(void) | ||
470 | { | ||
471 | return 0x00000046; | ||
472 | } | ||
473 | static inline u32 gmmu_pte_kind_z24s8_1z_v(void) | ||
474 | { | ||
475 | return 0x00000047; | ||
476 | } | ||
477 | static inline u32 gmmu_pte_kind_z24s8_ms2_1z_v(void) | ||
478 | { | ||
479 | return 0x00000048; | ||
480 | } | ||
481 | static inline u32 gmmu_pte_kind_z24s8_ms4_1z_v(void) | ||
482 | { | ||
483 | return 0x00000049; | ||
484 | } | ||
485 | static inline u32 gmmu_pte_kind_z24s8_ms8_1z_v(void) | ||
486 | { | ||
487 | return 0x0000004a; | ||
488 | } | ||
489 | static inline u32 gmmu_pte_kind_z24s8_ms16_1z_v(void) | ||
490 | { | ||
491 | return 0x0000004b; | ||
492 | } | ||
493 | static inline u32 gmmu_pte_kind_z24s8_2cs_v(void) | ||
494 | { | ||
495 | return 0x0000004c; | ||
496 | } | ||
497 | static inline u32 gmmu_pte_kind_z24s8_ms2_2cs_v(void) | ||
498 | { | ||
499 | return 0x0000004d; | ||
500 | } | ||
501 | static inline u32 gmmu_pte_kind_z24s8_ms4_2cs_v(void) | ||
502 | { | ||
503 | return 0x0000004e; | ||
504 | } | ||
505 | static inline u32 gmmu_pte_kind_z24s8_ms8_2cs_v(void) | ||
506 | { | ||
507 | return 0x0000004f; | ||
508 | } | ||
509 | static inline u32 gmmu_pte_kind_z24s8_ms16_2cs_v(void) | ||
510 | { | ||
511 | return 0x00000050; | ||
512 | } | ||
513 | static inline u32 gmmu_pte_kind_z24s8_2cz_v(void) | ||
514 | { | ||
515 | return 0x00000051; | ||
516 | } | ||
517 | static inline u32 gmmu_pte_kind_z24s8_ms2_2cz_v(void) | ||
518 | { | ||
519 | return 0x00000052; | ||
520 | } | ||
521 | static inline u32 gmmu_pte_kind_z24s8_ms4_2cz_v(void) | ||
522 | { | ||
523 | return 0x00000053; | ||
524 | } | ||
525 | static inline u32 gmmu_pte_kind_z24s8_ms8_2cz_v(void) | ||
526 | { | ||
527 | return 0x00000054; | ||
528 | } | ||
529 | static inline u32 gmmu_pte_kind_z24s8_ms16_2cz_v(void) | ||
530 | { | ||
531 | return 0x00000055; | ||
532 | } | ||
533 | static inline u32 gmmu_pte_kind_z24s8_4cszv_v(void) | ||
534 | { | ||
535 | return 0x00000056; | ||
536 | } | ||
537 | static inline u32 gmmu_pte_kind_z24s8_ms2_4cszv_v(void) | ||
538 | { | ||
539 | return 0x00000057; | ||
540 | } | ||
541 | static inline u32 gmmu_pte_kind_z24s8_ms4_4cszv_v(void) | ||
542 | { | ||
543 | return 0x00000058; | ||
544 | } | ||
545 | static inline u32 gmmu_pte_kind_z24s8_ms8_4cszv_v(void) | ||
546 | { | ||
547 | return 0x00000059; | ||
548 | } | ||
549 | static inline u32 gmmu_pte_kind_z24s8_ms16_4cszv_v(void) | ||
550 | { | ||
551 | return 0x0000005a; | ||
552 | } | ||
553 | static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_v(void) | ||
554 | { | ||
555 | return 0x0000005b; | ||
556 | } | ||
557 | static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_v(void) | ||
558 | { | ||
559 | return 0x0000005c; | ||
560 | } | ||
561 | static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_v(void) | ||
562 | { | ||
563 | return 0x0000005d; | ||
564 | } | ||
565 | static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_v(void) | ||
566 | { | ||
567 | return 0x0000005e; | ||
568 | } | ||
569 | static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_1zv_v(void) | ||
570 | { | ||
571 | return 0x00000063; | ||
572 | } | ||
573 | static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_1zv_v(void) | ||
574 | { | ||
575 | return 0x00000064; | ||
576 | } | ||
577 | static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_1zv_v(void) | ||
578 | { | ||
579 | return 0x00000065; | ||
580 | } | ||
581 | static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_1zv_v(void) | ||
582 | { | ||
583 | return 0x00000066; | ||
584 | } | ||
585 | static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2cs_v(void) | ||
586 | { | ||
587 | return 0x00000067; | ||
588 | } | ||
589 | static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2cs_v(void) | ||
590 | { | ||
591 | return 0x00000068; | ||
592 | } | ||
593 | static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2cs_v(void) | ||
594 | { | ||
595 | return 0x00000069; | ||
596 | } | ||
597 | static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2cs_v(void) | ||
598 | { | ||
599 | return 0x0000006a; | ||
600 | } | ||
601 | static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2czv_v(void) | ||
602 | { | ||
603 | return 0x0000006f; | ||
604 | } | ||
605 | static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2czv_v(void) | ||
606 | { | ||
607 | return 0x00000070; | ||
608 | } | ||
609 | static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2czv_v(void) | ||
610 | { | ||
611 | return 0x00000071; | ||
612 | } | ||
613 | static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2czv_v(void) | ||
614 | { | ||
615 | return 0x00000072; | ||
616 | } | ||
617 | static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2zv_v(void) | ||
618 | { | ||
619 | return 0x00000073; | ||
620 | } | ||
621 | static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2zv_v(void) | ||
622 | { | ||
623 | return 0x00000074; | ||
624 | } | ||
625 | static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2zv_v(void) | ||
626 | { | ||
627 | return 0x00000075; | ||
628 | } | ||
629 | static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2zv_v(void) | ||
630 | { | ||
631 | return 0x00000076; | ||
632 | } | ||
633 | static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_4cszv_v(void) | ||
634 | { | ||
635 | return 0x00000077; | ||
636 | } | ||
637 | static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_4cszv_v(void) | ||
638 | { | ||
639 | return 0x00000078; | ||
640 | } | ||
641 | static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_4cszv_v(void) | ||
642 | { | ||
643 | return 0x00000079; | ||
644 | } | ||
645 | static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_4cszv_v(void) | ||
646 | { | ||
647 | return 0x0000007a; | ||
648 | } | ||
649 | static inline u32 gmmu_pte_kind_zf32_v(void) | ||
650 | { | ||
651 | return 0x0000007b; | ||
652 | } | ||
653 | static inline u32 gmmu_pte_kind_zf32_1z_v(void) | ||
654 | { | ||
655 | return 0x0000007c; | ||
656 | } | ||
657 | static inline u32 gmmu_pte_kind_zf32_ms2_1z_v(void) | ||
658 | { | ||
659 | return 0x0000007d; | ||
660 | } | ||
661 | static inline u32 gmmu_pte_kind_zf32_ms4_1z_v(void) | ||
662 | { | ||
663 | return 0x0000007e; | ||
664 | } | ||
665 | static inline u32 gmmu_pte_kind_zf32_ms8_1z_v(void) | ||
666 | { | ||
667 | return 0x0000007f; | ||
668 | } | ||
669 | static inline u32 gmmu_pte_kind_zf32_ms16_1z_v(void) | ||
670 | { | ||
671 | return 0x00000080; | ||
672 | } | ||
673 | static inline u32 gmmu_pte_kind_zf32_2cs_v(void) | ||
674 | { | ||
675 | return 0x00000081; | ||
676 | } | ||
677 | static inline u32 gmmu_pte_kind_zf32_ms2_2cs_v(void) | ||
678 | { | ||
679 | return 0x00000082; | ||
680 | } | ||
681 | static inline u32 gmmu_pte_kind_zf32_ms4_2cs_v(void) | ||
682 | { | ||
683 | return 0x00000083; | ||
684 | } | ||
685 | static inline u32 gmmu_pte_kind_zf32_ms8_2cs_v(void) | ||
686 | { | ||
687 | return 0x00000084; | ||
688 | } | ||
689 | static inline u32 gmmu_pte_kind_zf32_ms16_2cs_v(void) | ||
690 | { | ||
691 | return 0x00000085; | ||
692 | } | ||
693 | static inline u32 gmmu_pte_kind_zf32_2cz_v(void) | ||
694 | { | ||
695 | return 0x00000086; | ||
696 | } | ||
697 | static inline u32 gmmu_pte_kind_zf32_ms2_2cz_v(void) | ||
698 | { | ||
699 | return 0x00000087; | ||
700 | } | ||
701 | static inline u32 gmmu_pte_kind_zf32_ms4_2cz_v(void) | ||
702 | { | ||
703 | return 0x00000088; | ||
704 | } | ||
705 | static inline u32 gmmu_pte_kind_zf32_ms8_2cz_v(void) | ||
706 | { | ||
707 | return 0x00000089; | ||
708 | } | ||
709 | static inline u32 gmmu_pte_kind_zf32_ms16_2cz_v(void) | ||
710 | { | ||
711 | return 0x0000008a; | ||
712 | } | ||
713 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v(void) | ||
714 | { | ||
715 | return 0x0000008b; | ||
716 | } | ||
717 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_v(void) | ||
718 | { | ||
719 | return 0x0000008c; | ||
720 | } | ||
721 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_v(void) | ||
722 | { | ||
723 | return 0x0000008d; | ||
724 | } | ||
725 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v(void) | ||
726 | { | ||
727 | return 0x0000008e; | ||
728 | } | ||
729 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1cs_v(void) | ||
730 | { | ||
731 | return 0x0000008f; | ||
732 | } | ||
733 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1cs_v(void) | ||
734 | { | ||
735 | return 0x00000090; | ||
736 | } | ||
737 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1cs_v(void) | ||
738 | { | ||
739 | return 0x00000091; | ||
740 | } | ||
741 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v(void) | ||
742 | { | ||
743 | return 0x00000092; | ||
744 | } | ||
745 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1zv_v(void) | ||
746 | { | ||
747 | return 0x00000097; | ||
748 | } | ||
749 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1zv_v(void) | ||
750 | { | ||
751 | return 0x00000098; | ||
752 | } | ||
753 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1zv_v(void) | ||
754 | { | ||
755 | return 0x00000099; | ||
756 | } | ||
757 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1zv_v(void) | ||
758 | { | ||
759 | return 0x0000009a; | ||
760 | } | ||
761 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1czv_v(void) | ||
762 | { | ||
763 | return 0x0000009b; | ||
764 | } | ||
765 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1czv_v(void) | ||
766 | { | ||
767 | return 0x0000009c; | ||
768 | } | ||
769 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1czv_v(void) | ||
770 | { | ||
771 | return 0x0000009d; | ||
772 | } | ||
773 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1czv_v(void) | ||
774 | { | ||
775 | return 0x0000009e; | ||
776 | } | ||
777 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cs_v(void) | ||
778 | { | ||
779 | return 0x0000009f; | ||
780 | } | ||
781 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cs_v(void) | ||
782 | { | ||
783 | return 0x000000a0; | ||
784 | } | ||
785 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cs_v(void) | ||
786 | { | ||
787 | return 0x000000a1; | ||
788 | } | ||
789 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cs_v(void) | ||
790 | { | ||
791 | return 0x000000a2; | ||
792 | } | ||
793 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cszv_v(void) | ||
794 | { | ||
795 | return 0x000000a3; | ||
796 | } | ||
797 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cszv_v(void) | ||
798 | { | ||
799 | return 0x000000a4; | ||
800 | } | ||
801 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cszv_v(void) | ||
802 | { | ||
803 | return 0x000000a5; | ||
804 | } | ||
805 | static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v(void) | ||
806 | { | ||
807 | return 0x000000a6; | ||
808 | } | ||
809 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_v(void) | ||
810 | { | ||
811 | return 0x000000a7; | ||
812 | } | ||
813 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_v(void) | ||
814 | { | ||
815 | return 0x000000a8; | ||
816 | } | ||
817 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_v(void) | ||
818 | { | ||
819 | return 0x000000a9; | ||
820 | } | ||
821 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v(void) | ||
822 | { | ||
823 | return 0x000000aa; | ||
824 | } | ||
825 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1cs_v(void) | ||
826 | { | ||
827 | return 0x000000ab; | ||
828 | } | ||
829 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1cs_v(void) | ||
830 | { | ||
831 | return 0x000000ac; | ||
832 | } | ||
833 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1cs_v(void) | ||
834 | { | ||
835 | return 0x000000ad; | ||
836 | } | ||
837 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v(void) | ||
838 | { | ||
839 | return 0x000000ae; | ||
840 | } | ||
841 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1zv_v(void) | ||
842 | { | ||
843 | return 0x000000b3; | ||
844 | } | ||
845 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1zv_v(void) | ||
846 | { | ||
847 | return 0x000000b4; | ||
848 | } | ||
849 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1zv_v(void) | ||
850 | { | ||
851 | return 0x000000b5; | ||
852 | } | ||
853 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1zv_v(void) | ||
854 | { | ||
855 | return 0x000000b6; | ||
856 | } | ||
857 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1czv_v(void) | ||
858 | { | ||
859 | return 0x000000b7; | ||
860 | } | ||
861 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1czv_v(void) | ||
862 | { | ||
863 | return 0x000000b8; | ||
864 | } | ||
865 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1czv_v(void) | ||
866 | { | ||
867 | return 0x000000b9; | ||
868 | } | ||
869 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1czv_v(void) | ||
870 | { | ||
871 | return 0x000000ba; | ||
872 | } | ||
873 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cs_v(void) | ||
874 | { | ||
875 | return 0x000000bb; | ||
876 | } | ||
877 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cs_v(void) | ||
878 | { | ||
879 | return 0x000000bc; | ||
880 | } | ||
881 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cs_v(void) | ||
882 | { | ||
883 | return 0x000000bd; | ||
884 | } | ||
885 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cs_v(void) | ||
886 | { | ||
887 | return 0x000000be; | ||
888 | } | ||
889 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cszv_v(void) | ||
890 | { | ||
891 | return 0x000000bf; | ||
892 | } | ||
893 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cszv_v(void) | ||
894 | { | ||
895 | return 0x000000c0; | ||
896 | } | ||
897 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cszv_v(void) | ||
898 | { | ||
899 | return 0x000000c1; | ||
900 | } | ||
901 | static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v(void) | ||
902 | { | ||
903 | return 0x000000c2; | ||
904 | } | ||
905 | static inline u32 gmmu_pte_kind_zf32_x24s8_v(void) | ||
906 | { | ||
907 | return 0x000000c3; | ||
908 | } | ||
909 | static inline u32 gmmu_pte_kind_zf32_x24s8_1cs_v(void) | ||
910 | { | ||
911 | return 0x000000c4; | ||
912 | } | ||
913 | static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_1cs_v(void) | ||
914 | { | ||
915 | return 0x000000c5; | ||
916 | } | ||
917 | static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_1cs_v(void) | ||
918 | { | ||
919 | return 0x000000c6; | ||
920 | } | ||
921 | static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_1cs_v(void) | ||
922 | { | ||
923 | return 0x000000c7; | ||
924 | } | ||
925 | static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_1cs_v(void) | ||
926 | { | ||
927 | return 0x000000c8; | ||
928 | } | ||
929 | static inline u32 gmmu_pte_kind_zf32_x24s8_2cszv_v(void) | ||
930 | { | ||
931 | return 0x000000ce; | ||
932 | } | ||
933 | static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_2cszv_v(void) | ||
934 | { | ||
935 | return 0x000000cf; | ||
936 | } | ||
937 | static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_2cszv_v(void) | ||
938 | { | ||
939 | return 0x000000d0; | ||
940 | } | ||
941 | static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_2cszv_v(void) | ||
942 | { | ||
943 | return 0x000000d1; | ||
944 | } | ||
945 | static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_2cszv_v(void) | ||
946 | { | ||
947 | return 0x000000d2; | ||
948 | } | ||
949 | static inline u32 gmmu_pte_kind_zf32_x24s8_2cs_v(void) | ||
950 | { | ||
951 | return 0x000000d3; | ||
952 | } | ||
953 | static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_2cs_v(void) | ||
954 | { | ||
955 | return 0x000000d4; | ||
956 | } | ||
957 | static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_2cs_v(void) | ||
958 | { | ||
959 | return 0x000000d5; | ||
960 | } | ||
961 | static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_2cs_v(void) | ||
962 | { | ||
963 | return 0x000000d6; | ||
964 | } | ||
965 | static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_2cs_v(void) | ||
966 | { | ||
967 | return 0x000000d7; | ||
968 | } | ||
969 | static inline u32 gmmu_pte_kind_generic_16bx2_v(void) | ||
970 | { | ||
971 | return 0x000000fe; | ||
972 | } | ||
973 | static inline u32 gmmu_pte_kind_c32_2c_v(void) | ||
974 | { | ||
975 | return 0x000000d8; | ||
976 | } | ||
977 | static inline u32 gmmu_pte_kind_c32_2cbr_v(void) | ||
978 | { | ||
979 | return 0x000000d9; | ||
980 | } | ||
981 | static inline u32 gmmu_pte_kind_c32_2cba_v(void) | ||
982 | { | ||
983 | return 0x000000da; | ||
984 | } | ||
985 | static inline u32 gmmu_pte_kind_c32_2cra_v(void) | ||
986 | { | ||
987 | return 0x000000db; | ||
988 | } | ||
989 | static inline u32 gmmu_pte_kind_c32_2bra_v(void) | ||
990 | { | ||
991 | return 0x000000dc; | ||
992 | } | ||
993 | static inline u32 gmmu_pte_kind_c32_ms2_2c_v(void) | ||
994 | { | ||
995 | return 0x000000dd; | ||
996 | } | ||
997 | static inline u32 gmmu_pte_kind_c32_ms2_2cbr_v(void) | ||
998 | { | ||
999 | return 0x000000de; | ||
1000 | } | ||
1001 | static inline u32 gmmu_pte_kind_c32_ms2_2cra_v(void) | ||
1002 | { | ||
1003 | return 0x000000cc; | ||
1004 | } | ||
1005 | static inline u32 gmmu_pte_kind_c32_ms4_2c_v(void) | ||
1006 | { | ||
1007 | return 0x000000df; | ||
1008 | } | ||
1009 | static inline u32 gmmu_pte_kind_c32_ms4_2cbr_v(void) | ||
1010 | { | ||
1011 | return 0x000000e0; | ||
1012 | } | ||
1013 | static inline u32 gmmu_pte_kind_c32_ms4_2cba_v(void) | ||
1014 | { | ||
1015 | return 0x000000e1; | ||
1016 | } | ||
1017 | static inline u32 gmmu_pte_kind_c32_ms4_2cra_v(void) | ||
1018 | { | ||
1019 | return 0x000000e2; | ||
1020 | } | ||
1021 | static inline u32 gmmu_pte_kind_c32_ms4_2bra_v(void) | ||
1022 | { | ||
1023 | return 0x000000e3; | ||
1024 | } | ||
1025 | static inline u32 gmmu_pte_kind_c32_ms8_ms16_2c_v(void) | ||
1026 | { | ||
1027 | return 0x000000e4; | ||
1028 | } | ||
1029 | static inline u32 gmmu_pte_kind_c32_ms8_ms16_2cra_v(void) | ||
1030 | { | ||
1031 | return 0x000000e5; | ||
1032 | } | ||
1033 | static inline u32 gmmu_pte_kind_c64_2c_v(void) | ||
1034 | { | ||
1035 | return 0x000000e6; | ||
1036 | } | ||
1037 | static inline u32 gmmu_pte_kind_c64_2cbr_v(void) | ||
1038 | { | ||
1039 | return 0x000000e7; | ||
1040 | } | ||
1041 | static inline u32 gmmu_pte_kind_c64_2cba_v(void) | ||
1042 | { | ||
1043 | return 0x000000e8; | ||
1044 | } | ||
1045 | static inline u32 gmmu_pte_kind_c64_2cra_v(void) | ||
1046 | { | ||
1047 | return 0x000000e9; | ||
1048 | } | ||
1049 | static inline u32 gmmu_pte_kind_c64_2bra_v(void) | ||
1050 | { | ||
1051 | return 0x000000ea; | ||
1052 | } | ||
1053 | static inline u32 gmmu_pte_kind_c64_ms2_2c_v(void) | ||
1054 | { | ||
1055 | return 0x000000eb; | ||
1056 | } | ||
1057 | static inline u32 gmmu_pte_kind_c64_ms2_2cbr_v(void) | ||
1058 | { | ||
1059 | return 0x000000ec; | ||
1060 | } | ||
1061 | static inline u32 gmmu_pte_kind_c64_ms2_2cra_v(void) | ||
1062 | { | ||
1063 | return 0x000000cd; | ||
1064 | } | ||
1065 | static inline u32 gmmu_pte_kind_c64_ms4_2c_v(void) | ||
1066 | { | ||
1067 | return 0x000000ed; | ||
1068 | } | ||
1069 | static inline u32 gmmu_pte_kind_c64_ms4_2cbr_v(void) | ||
1070 | { | ||
1071 | return 0x000000ee; | ||
1072 | } | ||
1073 | static inline u32 gmmu_pte_kind_c64_ms4_2cba_v(void) | ||
1074 | { | ||
1075 | return 0x000000ef; | ||
1076 | } | ||
1077 | static inline u32 gmmu_pte_kind_c64_ms4_2cra_v(void) | ||
1078 | { | ||
1079 | return 0x000000f0; | ||
1080 | } | ||
1081 | static inline u32 gmmu_pte_kind_c64_ms4_2bra_v(void) | ||
1082 | { | ||
1083 | return 0x000000f1; | ||
1084 | } | ||
1085 | static inline u32 gmmu_pte_kind_c64_ms8_ms16_2c_v(void) | ||
1086 | { | ||
1087 | return 0x000000f2; | ||
1088 | } | ||
1089 | static inline u32 gmmu_pte_kind_c64_ms8_ms16_2cra_v(void) | ||
1090 | { | ||
1091 | return 0x000000f3; | ||
1092 | } | ||
1093 | static inline u32 gmmu_pte_kind_c128_2c_v(void) | ||
1094 | { | ||
1095 | return 0x000000f4; | ||
1096 | } | ||
1097 | static inline u32 gmmu_pte_kind_c128_2cr_v(void) | ||
1098 | { | ||
1099 | return 0x000000f5; | ||
1100 | } | ||
1101 | static inline u32 gmmu_pte_kind_c128_ms2_2c_v(void) | ||
1102 | { | ||
1103 | return 0x000000f6; | ||
1104 | } | ||
1105 | static inline u32 gmmu_pte_kind_c128_ms2_2cr_v(void) | ||
1106 | { | ||
1107 | return 0x000000f7; | ||
1108 | } | ||
1109 | static inline u32 gmmu_pte_kind_c128_ms4_2c_v(void) | ||
1110 | { | ||
1111 | return 0x000000f8; | ||
1112 | } | ||
1113 | static inline u32 gmmu_pte_kind_c128_ms4_2cr_v(void) | ||
1114 | { | ||
1115 | return 0x000000f9; | ||
1116 | } | ||
1117 | static inline u32 gmmu_pte_kind_c128_ms8_ms16_2c_v(void) | ||
1118 | { | ||
1119 | return 0x000000fa; | ||
1120 | } | ||
1121 | static inline u32 gmmu_pte_kind_c128_ms8_ms16_2cr_v(void) | ||
1122 | { | ||
1123 | return 0x000000fb; | ||
1124 | } | ||
1125 | static inline u32 gmmu_pte_kind_x8c24_v(void) | ||
1126 | { | ||
1127 | return 0x000000fc; | ||
1128 | } | ||
1129 | static inline u32 gmmu_pte_kind_pitch_no_swizzle_v(void) | ||
1130 | { | ||
1131 | return 0x000000fd; | ||
1132 | } | ||
1133 | static inline u32 gmmu_pte_kind_smsked_message_v(void) | ||
1134 | { | ||
1135 | return 0x000000ca; | ||
1136 | } | ||
1137 | static inline u32 gmmu_pte_kind_smhost_message_v(void) | ||
1138 | { | ||
1139 | return 0x000000cb; | ||
1140 | } | ||
1141 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h new file mode 100644 index 00000000..ece7602d --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h | |||
@@ -0,0 +1,3173 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_gr_gk20a_h_ | ||
51 | #define _hw_gr_gk20a_h_ | ||
52 | |||
53 | static inline u32 gr_intr_r(void) | ||
54 | { | ||
55 | return 0x00400100; | ||
56 | } | ||
57 | static inline u32 gr_intr_notify_pending_f(void) | ||
58 | { | ||
59 | return 0x1; | ||
60 | } | ||
61 | static inline u32 gr_intr_notify_reset_f(void) | ||
62 | { | ||
63 | return 0x1; | ||
64 | } | ||
65 | static inline u32 gr_intr_semaphore_pending_f(void) | ||
66 | { | ||
67 | return 0x2; | ||
68 | } | ||
69 | static inline u32 gr_intr_semaphore_reset_f(void) | ||
70 | { | ||
71 | return 0x2; | ||
72 | } | ||
73 | static inline u32 gr_intr_semaphore_timeout_not_pending_f(void) | ||
74 | { | ||
75 | return 0x0; | ||
76 | } | ||
77 | static inline u32 gr_intr_semaphore_timeout_pending_f(void) | ||
78 | { | ||
79 | return 0x4; | ||
80 | } | ||
81 | static inline u32 gr_intr_semaphore_timeout_reset_f(void) | ||
82 | { | ||
83 | return 0x4; | ||
84 | } | ||
85 | static inline u32 gr_intr_illegal_method_pending_f(void) | ||
86 | { | ||
87 | return 0x10; | ||
88 | } | ||
89 | static inline u32 gr_intr_illegal_method_reset_f(void) | ||
90 | { | ||
91 | return 0x10; | ||
92 | } | ||
93 | static inline u32 gr_intr_illegal_notify_pending_f(void) | ||
94 | { | ||
95 | return 0x40; | ||
96 | } | ||
97 | static inline u32 gr_intr_illegal_notify_reset_f(void) | ||
98 | { | ||
99 | return 0x40; | ||
100 | } | ||
101 | static inline u32 gr_intr_illegal_class_pending_f(void) | ||
102 | { | ||
103 | return 0x20; | ||
104 | } | ||
105 | static inline u32 gr_intr_illegal_class_reset_f(void) | ||
106 | { | ||
107 | return 0x20; | ||
108 | } | ||
109 | static inline u32 gr_intr_class_error_pending_f(void) | ||
110 | { | ||
111 | return 0x100000; | ||
112 | } | ||
113 | static inline u32 gr_intr_class_error_reset_f(void) | ||
114 | { | ||
115 | return 0x100000; | ||
116 | } | ||
117 | static inline u32 gr_intr_exception_pending_f(void) | ||
118 | { | ||
119 | return 0x200000; | ||
120 | } | ||
121 | static inline u32 gr_intr_exception_reset_f(void) | ||
122 | { | ||
123 | return 0x200000; | ||
124 | } | ||
125 | static inline u32 gr_intr_firmware_method_pending_f(void) | ||
126 | { | ||
127 | return 0x100; | ||
128 | } | ||
129 | static inline u32 gr_intr_firmware_method_reset_f(void) | ||
130 | { | ||
131 | return 0x100; | ||
132 | } | ||
133 | static inline u32 gr_intr_nonstall_r(void) | ||
134 | { | ||
135 | return 0x00400120; | ||
136 | } | ||
137 | static inline u32 gr_intr_nonstall_trap_pending_f(void) | ||
138 | { | ||
139 | return 0x2; | ||
140 | } | ||
141 | static inline u32 gr_intr_en_r(void) | ||
142 | { | ||
143 | return 0x0040013c; | ||
144 | } | ||
145 | static inline u32 gr_exception_r(void) | ||
146 | { | ||
147 | return 0x00400108; | ||
148 | } | ||
149 | static inline u32 gr_exception_fe_m(void) | ||
150 | { | ||
151 | return 0x1 << 0; | ||
152 | } | ||
153 | static inline u32 gr_exception_gpc_m(void) | ||
154 | { | ||
155 | return 0x1 << 24; | ||
156 | } | ||
157 | static inline u32 gr_exception1_r(void) | ||
158 | { | ||
159 | return 0x00400118; | ||
160 | } | ||
161 | static inline u32 gr_exception1_gpc_0_pending_f(void) | ||
162 | { | ||
163 | return 0x1; | ||
164 | } | ||
165 | static inline u32 gr_exception2_r(void) | ||
166 | { | ||
167 | return 0x0040011c; | ||
168 | } | ||
169 | static inline u32 gr_exception_en_r(void) | ||
170 | { | ||
171 | return 0x00400138; | ||
172 | } | ||
173 | static inline u32 gr_exception_en_fe_m(void) | ||
174 | { | ||
175 | return 0x1 << 0; | ||
176 | } | ||
177 | static inline u32 gr_exception1_en_r(void) | ||
178 | { | ||
179 | return 0x00400130; | ||
180 | } | ||
181 | static inline u32 gr_exception2_en_r(void) | ||
182 | { | ||
183 | return 0x00400134; | ||
184 | } | ||
185 | static inline u32 gr_gpfifo_ctl_r(void) | ||
186 | { | ||
187 | return 0x00400500; | ||
188 | } | ||
189 | static inline u32 gr_gpfifo_ctl_access_f(u32 v) | ||
190 | { | ||
191 | return (v & 0x1) << 0; | ||
192 | } | ||
193 | static inline u32 gr_gpfifo_ctl_access_disabled_f(void) | ||
194 | { | ||
195 | return 0x0; | ||
196 | } | ||
197 | static inline u32 gr_gpfifo_ctl_access_enabled_f(void) | ||
198 | { | ||
199 | return 0x1; | ||
200 | } | ||
201 | static inline u32 gr_gpfifo_ctl_semaphore_access_f(u32 v) | ||
202 | { | ||
203 | return (v & 0x1) << 16; | ||
204 | } | ||
205 | static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_v(void) | ||
206 | { | ||
207 | return 0x00000001; | ||
208 | } | ||
209 | static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_f(void) | ||
210 | { | ||
211 | return 0x10000; | ||
212 | } | ||
213 | static inline u32 gr_trapped_addr_r(void) | ||
214 | { | ||
215 | return 0x00400704; | ||
216 | } | ||
217 | static inline u32 gr_trapped_addr_mthd_v(u32 r) | ||
218 | { | ||
219 | return (r >> 2) & 0xfff; | ||
220 | } | ||
221 | static inline u32 gr_trapped_addr_subch_v(u32 r) | ||
222 | { | ||
223 | return (r >> 16) & 0x7; | ||
224 | } | ||
225 | static inline u32 gr_trapped_data_lo_r(void) | ||
226 | { | ||
227 | return 0x00400708; | ||
228 | } | ||
229 | static inline u32 gr_trapped_data_hi_r(void) | ||
230 | { | ||
231 | return 0x0040070c; | ||
232 | } | ||
233 | static inline u32 gr_status_r(void) | ||
234 | { | ||
235 | return 0x00400700; | ||
236 | } | ||
237 | static inline u32 gr_status_fe_method_lower_v(u32 r) | ||
238 | { | ||
239 | return (r >> 2) & 0x1; | ||
240 | } | ||
241 | static inline u32 gr_status_fe_method_lower_idle_v(void) | ||
242 | { | ||
243 | return 0x00000000; | ||
244 | } | ||
245 | static inline u32 gr_status_mask_r(void) | ||
246 | { | ||
247 | return 0x00400610; | ||
248 | } | ||
249 | static inline u32 gr_engine_status_r(void) | ||
250 | { | ||
251 | return 0x0040060c; | ||
252 | } | ||
253 | static inline u32 gr_engine_status_value_busy_f(void) | ||
254 | { | ||
255 | return 0x1; | ||
256 | } | ||
257 | static inline u32 gr_pipe_bundle_address_r(void) | ||
258 | { | ||
259 | return 0x00400200; | ||
260 | } | ||
261 | static inline u32 gr_pipe_bundle_address_value_v(u32 r) | ||
262 | { | ||
263 | return (r >> 0) & 0xffff; | ||
264 | } | ||
265 | static inline u32 gr_pipe_bundle_data_r(void) | ||
266 | { | ||
267 | return 0x00400204; | ||
268 | } | ||
269 | static inline u32 gr_pipe_bundle_config_r(void) | ||
270 | { | ||
271 | return 0x00400208; | ||
272 | } | ||
273 | static inline u32 gr_pipe_bundle_config_override_pipe_mode_disabled_f(void) | ||
274 | { | ||
275 | return 0x0; | ||
276 | } | ||
277 | static inline u32 gr_pipe_bundle_config_override_pipe_mode_enabled_f(void) | ||
278 | { | ||
279 | return 0x80000000; | ||
280 | } | ||
281 | static inline u32 gr_fe_hww_esr_r(void) | ||
282 | { | ||
283 | return 0x00404000; | ||
284 | } | ||
285 | static inline u32 gr_fe_hww_esr_reset_active_f(void) | ||
286 | { | ||
287 | return 0x40000000; | ||
288 | } | ||
289 | static inline u32 gr_fe_hww_esr_en_enable_f(void) | ||
290 | { | ||
291 | return 0x80000000; | ||
292 | } | ||
293 | static inline u32 gr_fe_go_idle_timeout_r(void) | ||
294 | { | ||
295 | return 0x00404154; | ||
296 | } | ||
297 | static inline u32 gr_fe_go_idle_timeout_count_f(u32 v) | ||
298 | { | ||
299 | return (v & 0xffffffff) << 0; | ||
300 | } | ||
301 | static inline u32 gr_fe_go_idle_timeout_count_disabled_f(void) | ||
302 | { | ||
303 | return 0x0; | ||
304 | } | ||
305 | static inline u32 gr_fe_object_table_r(u32 i) | ||
306 | { | ||
307 | return 0x00404200 + i*4; | ||
308 | } | ||
309 | static inline u32 gr_fe_object_table_nvclass_v(u32 r) | ||
310 | { | ||
311 | return (r >> 0) & 0xffff; | ||
312 | } | ||
313 | static inline u32 gr_pri_mme_shadow_raw_index_r(void) | ||
314 | { | ||
315 | return 0x00404488; | ||
316 | } | ||
317 | static inline u32 gr_pri_mme_shadow_raw_index_write_trigger_f(void) | ||
318 | { | ||
319 | return 0x80000000; | ||
320 | } | ||
321 | static inline u32 gr_pri_mme_shadow_raw_data_r(void) | ||
322 | { | ||
323 | return 0x0040448c; | ||
324 | } | ||
325 | static inline u32 gr_mme_hww_esr_r(void) | ||
326 | { | ||
327 | return 0x00404490; | ||
328 | } | ||
329 | static inline u32 gr_mme_hww_esr_reset_active_f(void) | ||
330 | { | ||
331 | return 0x40000000; | ||
332 | } | ||
333 | static inline u32 gr_mme_hww_esr_en_enable_f(void) | ||
334 | { | ||
335 | return 0x80000000; | ||
336 | } | ||
337 | static inline u32 gr_memfmt_hww_esr_r(void) | ||
338 | { | ||
339 | return 0x00404600; | ||
340 | } | ||
341 | static inline u32 gr_memfmt_hww_esr_reset_active_f(void) | ||
342 | { | ||
343 | return 0x40000000; | ||
344 | } | ||
345 | static inline u32 gr_memfmt_hww_esr_en_enable_f(void) | ||
346 | { | ||
347 | return 0x80000000; | ||
348 | } | ||
349 | static inline u32 gr_fecs_cpuctl_r(void) | ||
350 | { | ||
351 | return 0x00409100; | ||
352 | } | ||
353 | static inline u32 gr_fecs_cpuctl_startcpu_f(u32 v) | ||
354 | { | ||
355 | return (v & 0x1) << 1; | ||
356 | } | ||
357 | static inline u32 gr_fecs_dmactl_r(void) | ||
358 | { | ||
359 | return 0x0040910c; | ||
360 | } | ||
361 | static inline u32 gr_fecs_dmactl_require_ctx_f(u32 v) | ||
362 | { | ||
363 | return (v & 0x1) << 0; | ||
364 | } | ||
365 | static inline u32 gr_fecs_dmactl_dmem_scrubbing_m(void) | ||
366 | { | ||
367 | return 0x1 << 1; | ||
368 | } | ||
369 | static inline u32 gr_fecs_dmactl_imem_scrubbing_m(void) | ||
370 | { | ||
371 | return 0x1 << 2; | ||
372 | } | ||
373 | static inline u32 gr_fecs_os_r(void) | ||
374 | { | ||
375 | return 0x00409080; | ||
376 | } | ||
377 | static inline u32 gr_fecs_idlestate_r(void) | ||
378 | { | ||
379 | return 0x0040904c; | ||
380 | } | ||
381 | static inline u32 gr_fecs_mailbox0_r(void) | ||
382 | { | ||
383 | return 0x00409040; | ||
384 | } | ||
385 | static inline u32 gr_fecs_mailbox1_r(void) | ||
386 | { | ||
387 | return 0x00409044; | ||
388 | } | ||
389 | static inline u32 gr_fecs_irqstat_r(void) | ||
390 | { | ||
391 | return 0x00409008; | ||
392 | } | ||
393 | static inline u32 gr_fecs_irqmode_r(void) | ||
394 | { | ||
395 | return 0x0040900c; | ||
396 | } | ||
397 | static inline u32 gr_fecs_irqmask_r(void) | ||
398 | { | ||
399 | return 0x00409018; | ||
400 | } | ||
401 | static inline u32 gr_fecs_irqdest_r(void) | ||
402 | { | ||
403 | return 0x0040901c; | ||
404 | } | ||
405 | static inline u32 gr_fecs_curctx_r(void) | ||
406 | { | ||
407 | return 0x00409050; | ||
408 | } | ||
409 | static inline u32 gr_fecs_nxtctx_r(void) | ||
410 | { | ||
411 | return 0x00409054; | ||
412 | } | ||
413 | static inline u32 gr_fecs_engctl_r(void) | ||
414 | { | ||
415 | return 0x004090a4; | ||
416 | } | ||
417 | static inline u32 gr_fecs_debug1_r(void) | ||
418 | { | ||
419 | return 0x00409090; | ||
420 | } | ||
421 | static inline u32 gr_fecs_debuginfo_r(void) | ||
422 | { | ||
423 | return 0x00409094; | ||
424 | } | ||
425 | static inline u32 gr_fecs_icd_cmd_r(void) | ||
426 | { | ||
427 | return 0x00409200; | ||
428 | } | ||
429 | static inline u32 gr_fecs_icd_cmd_opc_s(void) | ||
430 | { | ||
431 | return 4; | ||
432 | } | ||
433 | static inline u32 gr_fecs_icd_cmd_opc_f(u32 v) | ||
434 | { | ||
435 | return (v & 0xf) << 0; | ||
436 | } | ||
437 | static inline u32 gr_fecs_icd_cmd_opc_m(void) | ||
438 | { | ||
439 | return 0xf << 0; | ||
440 | } | ||
441 | static inline u32 gr_fecs_icd_cmd_opc_v(u32 r) | ||
442 | { | ||
443 | return (r >> 0) & 0xf; | ||
444 | } | ||
445 | static inline u32 gr_fecs_icd_cmd_opc_rreg_f(void) | ||
446 | { | ||
447 | return 0x8; | ||
448 | } | ||
449 | static inline u32 gr_fecs_icd_cmd_opc_rstat_f(void) | ||
450 | { | ||
451 | return 0xe; | ||
452 | } | ||
453 | static inline u32 gr_fecs_icd_cmd_idx_f(u32 v) | ||
454 | { | ||
455 | return (v & 0x1f) << 8; | ||
456 | } | ||
457 | static inline u32 gr_fecs_icd_rdata_r(void) | ||
458 | { | ||
459 | return 0x0040920c; | ||
460 | } | ||
461 | static inline u32 gr_fecs_imemc_r(u32 i) | ||
462 | { | ||
463 | return 0x00409180 + i*16; | ||
464 | } | ||
465 | static inline u32 gr_fecs_imemc_offs_f(u32 v) | ||
466 | { | ||
467 | return (v & 0x3f) << 2; | ||
468 | } | ||
469 | static inline u32 gr_fecs_imemc_blk_f(u32 v) | ||
470 | { | ||
471 | return (v & 0xff) << 8; | ||
472 | } | ||
473 | static inline u32 gr_fecs_imemc_aincw_f(u32 v) | ||
474 | { | ||
475 | return (v & 0x1) << 24; | ||
476 | } | ||
477 | static inline u32 gr_fecs_imemd_r(u32 i) | ||
478 | { | ||
479 | return 0x00409184 + i*16; | ||
480 | } | ||
481 | static inline u32 gr_fecs_imemt_r(u32 i) | ||
482 | { | ||
483 | return 0x00409188 + i*16; | ||
484 | } | ||
485 | static inline u32 gr_fecs_imemt_tag_f(u32 v) | ||
486 | { | ||
487 | return (v & 0xffff) << 0; | ||
488 | } | ||
489 | static inline u32 gr_fecs_dmemc_r(u32 i) | ||
490 | { | ||
491 | return 0x004091c0 + i*8; | ||
492 | } | ||
493 | static inline u32 gr_fecs_dmemc_offs_s(void) | ||
494 | { | ||
495 | return 6; | ||
496 | } | ||
497 | static inline u32 gr_fecs_dmemc_offs_f(u32 v) | ||
498 | { | ||
499 | return (v & 0x3f) << 2; | ||
500 | } | ||
501 | static inline u32 gr_fecs_dmemc_offs_m(void) | ||
502 | { | ||
503 | return 0x3f << 2; | ||
504 | } | ||
505 | static inline u32 gr_fecs_dmemc_offs_v(u32 r) | ||
506 | { | ||
507 | return (r >> 2) & 0x3f; | ||
508 | } | ||
509 | static inline u32 gr_fecs_dmemc_blk_f(u32 v) | ||
510 | { | ||
511 | return (v & 0xff) << 8; | ||
512 | } | ||
513 | static inline u32 gr_fecs_dmemc_aincw_f(u32 v) | ||
514 | { | ||
515 | return (v & 0x1) << 24; | ||
516 | } | ||
517 | static inline u32 gr_fecs_dmemd_r(u32 i) | ||
518 | { | ||
519 | return 0x004091c4 + i*8; | ||
520 | } | ||
521 | static inline u32 gr_fecs_dmatrfbase_r(void) | ||
522 | { | ||
523 | return 0x00409110; | ||
524 | } | ||
525 | static inline u32 gr_fecs_dmatrfmoffs_r(void) | ||
526 | { | ||
527 | return 0x00409114; | ||
528 | } | ||
529 | static inline u32 gr_fecs_dmatrffboffs_r(void) | ||
530 | { | ||
531 | return 0x0040911c; | ||
532 | } | ||
533 | static inline u32 gr_fecs_dmatrfcmd_r(void) | ||
534 | { | ||
535 | return 0x00409118; | ||
536 | } | ||
537 | static inline u32 gr_fecs_dmatrfcmd_imem_f(u32 v) | ||
538 | { | ||
539 | return (v & 0x1) << 4; | ||
540 | } | ||
541 | static inline u32 gr_fecs_dmatrfcmd_write_f(u32 v) | ||
542 | { | ||
543 | return (v & 0x1) << 5; | ||
544 | } | ||
545 | static inline u32 gr_fecs_dmatrfcmd_size_f(u32 v) | ||
546 | { | ||
547 | return (v & 0x7) << 8; | ||
548 | } | ||
549 | static inline u32 gr_fecs_dmatrfcmd_ctxdma_f(u32 v) | ||
550 | { | ||
551 | return (v & 0x7) << 12; | ||
552 | } | ||
553 | static inline u32 gr_fecs_bootvec_r(void) | ||
554 | { | ||
555 | return 0x00409104; | ||
556 | } | ||
557 | static inline u32 gr_fecs_bootvec_vec_f(u32 v) | ||
558 | { | ||
559 | return (v & 0xffffffff) << 0; | ||
560 | } | ||
561 | static inline u32 gr_fecs_falcon_hwcfg_r(void) | ||
562 | { | ||
563 | return 0x00409108; | ||
564 | } | ||
565 | static inline u32 gr_gpcs_gpccs_falcon_hwcfg_r(void) | ||
566 | { | ||
567 | return 0x0041a108; | ||
568 | } | ||
569 | static inline u32 gr_fecs_falcon_rm_r(void) | ||
570 | { | ||
571 | return 0x00409084; | ||
572 | } | ||
573 | static inline u32 gr_fecs_current_ctx_r(void) | ||
574 | { | ||
575 | return 0x00409b00; | ||
576 | } | ||
577 | static inline u32 gr_fecs_current_ctx_ptr_f(u32 v) | ||
578 | { | ||
579 | return (v & 0xfffffff) << 0; | ||
580 | } | ||
581 | static inline u32 gr_fecs_current_ctx_ptr_v(u32 r) | ||
582 | { | ||
583 | return (r >> 0) & 0xfffffff; | ||
584 | } | ||
585 | static inline u32 gr_fecs_current_ctx_target_s(void) | ||
586 | { | ||
587 | return 2; | ||
588 | } | ||
589 | static inline u32 gr_fecs_current_ctx_target_f(u32 v) | ||
590 | { | ||
591 | return (v & 0x3) << 28; | ||
592 | } | ||
593 | static inline u32 gr_fecs_current_ctx_target_m(void) | ||
594 | { | ||
595 | return 0x3 << 28; | ||
596 | } | ||
597 | static inline u32 gr_fecs_current_ctx_target_v(u32 r) | ||
598 | { | ||
599 | return (r >> 28) & 0x3; | ||
600 | } | ||
601 | static inline u32 gr_fecs_current_ctx_target_vid_mem_f(void) | ||
602 | { | ||
603 | return 0x0; | ||
604 | } | ||
605 | static inline u32 gr_fecs_current_ctx_valid_s(void) | ||
606 | { | ||
607 | return 1; | ||
608 | } | ||
609 | static inline u32 gr_fecs_current_ctx_valid_f(u32 v) | ||
610 | { | ||
611 | return (v & 0x1) << 31; | ||
612 | } | ||
613 | static inline u32 gr_fecs_current_ctx_valid_m(void) | ||
614 | { | ||
615 | return 0x1 << 31; | ||
616 | } | ||
617 | static inline u32 gr_fecs_current_ctx_valid_v(u32 r) | ||
618 | { | ||
619 | return (r >> 31) & 0x1; | ||
620 | } | ||
621 | static inline u32 gr_fecs_current_ctx_valid_false_f(void) | ||
622 | { | ||
623 | return 0x0; | ||
624 | } | ||
625 | static inline u32 gr_fecs_method_data_r(void) | ||
626 | { | ||
627 | return 0x00409500; | ||
628 | } | ||
629 | static inline u32 gr_fecs_method_push_r(void) | ||
630 | { | ||
631 | return 0x00409504; | ||
632 | } | ||
633 | static inline u32 gr_fecs_method_push_adr_f(u32 v) | ||
634 | { | ||
635 | return (v & 0xfff) << 0; | ||
636 | } | ||
637 | static inline u32 gr_fecs_method_push_adr_bind_pointer_v(void) | ||
638 | { | ||
639 | return 0x00000003; | ||
640 | } | ||
641 | static inline u32 gr_fecs_method_push_adr_bind_pointer_f(void) | ||
642 | { | ||
643 | return 0x3; | ||
644 | } | ||
645 | static inline u32 gr_fecs_method_push_adr_discover_image_size_v(void) | ||
646 | { | ||
647 | return 0x00000010; | ||
648 | } | ||
649 | static inline u32 gr_fecs_method_push_adr_wfi_golden_save_v(void) | ||
650 | { | ||
651 | return 0x00000009; | ||
652 | } | ||
653 | static inline u32 gr_fecs_method_push_adr_restore_golden_v(void) | ||
654 | { | ||
655 | return 0x00000015; | ||
656 | } | ||
657 | static inline u32 gr_fecs_method_push_adr_discover_zcull_image_size_v(void) | ||
658 | { | ||
659 | return 0x00000016; | ||
660 | } | ||
661 | static inline u32 gr_fecs_method_push_adr_discover_pm_image_size_v(void) | ||
662 | { | ||
663 | return 0x00000025; | ||
664 | } | ||
665 | static inline u32 gr_fecs_method_push_adr_discover_reglist_image_size_v(void) | ||
666 | { | ||
667 | return 0x00000030; | ||
668 | } | ||
669 | static inline u32 gr_fecs_method_push_adr_set_reglist_bind_instance_v(void) | ||
670 | { | ||
671 | return 0x00000031; | ||
672 | } | ||
673 | static inline u32 gr_fecs_method_push_adr_set_reglist_virtual_address_v(void) | ||
674 | { | ||
675 | return 0x00000032; | ||
676 | } | ||
677 | static inline u32 gr_fecs_method_push_adr_stop_ctxsw_v(void) | ||
678 | { | ||
679 | return 0x00000038; | ||
680 | } | ||
681 | static inline u32 gr_fecs_method_push_adr_start_ctxsw_v(void) | ||
682 | { | ||
683 | return 0x00000039; | ||
684 | } | ||
685 | static inline u32 gr_fecs_method_push_adr_set_watchdog_timeout_f(void) | ||
686 | { | ||
687 | return 0x21; | ||
688 | } | ||
689 | static inline u32 gr_fecs_host_int_enable_r(void) | ||
690 | { | ||
691 | return 0x00409c24; | ||
692 | } | ||
693 | static inline u32 gr_fecs_host_int_enable_fault_during_ctxsw_enable_f(void) | ||
694 | { | ||
695 | return 0x10000; | ||
696 | } | ||
697 | static inline u32 gr_fecs_host_int_enable_umimp_firmware_method_enable_f(void) | ||
698 | { | ||
699 | return 0x20000; | ||
700 | } | ||
701 | static inline u32 gr_fecs_host_int_enable_umimp_illegal_method_enable_f(void) | ||
702 | { | ||
703 | return 0x40000; | ||
704 | } | ||
705 | static inline u32 gr_fecs_host_int_enable_watchdog_enable_f(void) | ||
706 | { | ||
707 | return 0x80000; | ||
708 | } | ||
709 | static inline u32 gr_fecs_ctxsw_reset_ctl_r(void) | ||
710 | { | ||
711 | return 0x00409614; | ||
712 | } | ||
713 | static inline u32 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f(void) | ||
714 | { | ||
715 | return 0x0; | ||
716 | } | ||
717 | static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f(void) | ||
718 | { | ||
719 | return 0x0; | ||
720 | } | ||
721 | static inline u32 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f(void) | ||
722 | { | ||
723 | return 0x0; | ||
724 | } | ||
725 | static inline u32 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f(void) | ||
726 | { | ||
727 | return 0x10; | ||
728 | } | ||
729 | static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f(void) | ||
730 | { | ||
731 | return 0x20; | ||
732 | } | ||
733 | static inline u32 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f(void) | ||
734 | { | ||
735 | return 0x40; | ||
736 | } | ||
737 | static inline u32 gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f(void) | ||
738 | { | ||
739 | return 0x0; | ||
740 | } | ||
741 | static inline u32 gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f(void) | ||
742 | { | ||
743 | return 0x100; | ||
744 | } | ||
745 | static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f(void) | ||
746 | { | ||
747 | return 0x0; | ||
748 | } | ||
749 | static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f(void) | ||
750 | { | ||
751 | return 0x200; | ||
752 | } | ||
753 | static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_s(void) | ||
754 | { | ||
755 | return 1; | ||
756 | } | ||
757 | static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_f(u32 v) | ||
758 | { | ||
759 | return (v & 0x1) << 10; | ||
760 | } | ||
761 | static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_m(void) | ||
762 | { | ||
763 | return 0x1 << 10; | ||
764 | } | ||
765 | static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_v(u32 r) | ||
766 | { | ||
767 | return (r >> 10) & 0x1; | ||
768 | } | ||
769 | static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f(void) | ||
770 | { | ||
771 | return 0x0; | ||
772 | } | ||
773 | static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f(void) | ||
774 | { | ||
775 | return 0x400; | ||
776 | } | ||
777 | static inline u32 gr_fecs_ctx_state_store_major_rev_id_r(void) | ||
778 | { | ||
779 | return 0x0040960c; | ||
780 | } | ||
781 | static inline u32 gr_fecs_ctxsw_mailbox_r(u32 i) | ||
782 | { | ||
783 | return 0x00409800 + i*4; | ||
784 | } | ||
785 | static inline u32 gr_fecs_ctxsw_mailbox__size_1_v(void) | ||
786 | { | ||
787 | return 0x00000008; | ||
788 | } | ||
789 | static inline u32 gr_fecs_ctxsw_mailbox_value_f(u32 v) | ||
790 | { | ||
791 | return (v & 0xffffffff) << 0; | ||
792 | } | ||
793 | static inline u32 gr_fecs_ctxsw_mailbox_value_pass_v(void) | ||
794 | { | ||
795 | return 0x00000001; | ||
796 | } | ||
797 | static inline u32 gr_fecs_ctxsw_mailbox_value_fail_v(void) | ||
798 | { | ||
799 | return 0x00000002; | ||
800 | } | ||
801 | static inline u32 gr_fecs_ctxsw_mailbox_set_r(u32 i) | ||
802 | { | ||
803 | return 0x00409820 + i*4; | ||
804 | } | ||
805 | static inline u32 gr_fecs_ctxsw_mailbox_set_value_f(u32 v) | ||
806 | { | ||
807 | return (v & 0xffffffff) << 0; | ||
808 | } | ||
809 | static inline u32 gr_fecs_ctxsw_mailbox_clear_r(u32 i) | ||
810 | { | ||
811 | return 0x00409840 + i*4; | ||
812 | } | ||
813 | static inline u32 gr_fecs_ctxsw_mailbox_clear_value_f(u32 v) | ||
814 | { | ||
815 | return (v & 0xffffffff) << 0; | ||
816 | } | ||
817 | static inline u32 gr_fecs_fs_r(void) | ||
818 | { | ||
819 | return 0x00409604; | ||
820 | } | ||
821 | static inline u32 gr_fecs_fs_num_available_gpcs_s(void) | ||
822 | { | ||
823 | return 5; | ||
824 | } | ||
825 | static inline u32 gr_fecs_fs_num_available_gpcs_f(u32 v) | ||
826 | { | ||
827 | return (v & 0x1f) << 0; | ||
828 | } | ||
829 | static inline u32 gr_fecs_fs_num_available_gpcs_m(void) | ||
830 | { | ||
831 | return 0x1f << 0; | ||
832 | } | ||
833 | static inline u32 gr_fecs_fs_num_available_gpcs_v(u32 r) | ||
834 | { | ||
835 | return (r >> 0) & 0x1f; | ||
836 | } | ||
837 | static inline u32 gr_fecs_fs_num_available_fbps_s(void) | ||
838 | { | ||
839 | return 5; | ||
840 | } | ||
841 | static inline u32 gr_fecs_fs_num_available_fbps_f(u32 v) | ||
842 | { | ||
843 | return (v & 0x1f) << 16; | ||
844 | } | ||
845 | static inline u32 gr_fecs_fs_num_available_fbps_m(void) | ||
846 | { | ||
847 | return 0x1f << 16; | ||
848 | } | ||
849 | static inline u32 gr_fecs_fs_num_available_fbps_v(u32 r) | ||
850 | { | ||
851 | return (r >> 16) & 0x1f; | ||
852 | } | ||
853 | static inline u32 gr_fecs_cfg_r(void) | ||
854 | { | ||
855 | return 0x00409620; | ||
856 | } | ||
857 | static inline u32 gr_fecs_cfg_imem_sz_v(u32 r) | ||
858 | { | ||
859 | return (r >> 0) & 0xff; | ||
860 | } | ||
861 | static inline u32 gr_fecs_rc_lanes_r(void) | ||
862 | { | ||
863 | return 0x00409880; | ||
864 | } | ||
865 | static inline u32 gr_fecs_rc_lanes_num_chains_s(void) | ||
866 | { | ||
867 | return 6; | ||
868 | } | ||
869 | static inline u32 gr_fecs_rc_lanes_num_chains_f(u32 v) | ||
870 | { | ||
871 | return (v & 0x3f) << 0; | ||
872 | } | ||
873 | static inline u32 gr_fecs_rc_lanes_num_chains_m(void) | ||
874 | { | ||
875 | return 0x3f << 0; | ||
876 | } | ||
877 | static inline u32 gr_fecs_rc_lanes_num_chains_v(u32 r) | ||
878 | { | ||
879 | return (r >> 0) & 0x3f; | ||
880 | } | ||
881 | static inline u32 gr_fecs_ctxsw_status_1_r(void) | ||
882 | { | ||
883 | return 0x00409400; | ||
884 | } | ||
885 | static inline u32 gr_fecs_ctxsw_status_1_arb_busy_s(void) | ||
886 | { | ||
887 | return 1; | ||
888 | } | ||
889 | static inline u32 gr_fecs_ctxsw_status_1_arb_busy_f(u32 v) | ||
890 | { | ||
891 | return (v & 0x1) << 12; | ||
892 | } | ||
893 | static inline u32 gr_fecs_ctxsw_status_1_arb_busy_m(void) | ||
894 | { | ||
895 | return 0x1 << 12; | ||
896 | } | ||
897 | static inline u32 gr_fecs_ctxsw_status_1_arb_busy_v(u32 r) | ||
898 | { | ||
899 | return (r >> 12) & 0x1; | ||
900 | } | ||
901 | static inline u32 gr_fecs_arb_ctx_adr_r(void) | ||
902 | { | ||
903 | return 0x00409a24; | ||
904 | } | ||
905 | static inline u32 gr_fecs_new_ctx_r(void) | ||
906 | { | ||
907 | return 0x00409b04; | ||
908 | } | ||
909 | static inline u32 gr_fecs_new_ctx_ptr_s(void) | ||
910 | { | ||
911 | return 28; | ||
912 | } | ||
913 | static inline u32 gr_fecs_new_ctx_ptr_f(u32 v) | ||
914 | { | ||
915 | return (v & 0xfffffff) << 0; | ||
916 | } | ||
917 | static inline u32 gr_fecs_new_ctx_ptr_m(void) | ||
918 | { | ||
919 | return 0xfffffff << 0; | ||
920 | } | ||
921 | static inline u32 gr_fecs_new_ctx_ptr_v(u32 r) | ||
922 | { | ||
923 | return (r >> 0) & 0xfffffff; | ||
924 | } | ||
925 | static inline u32 gr_fecs_new_ctx_target_s(void) | ||
926 | { | ||
927 | return 2; | ||
928 | } | ||
929 | static inline u32 gr_fecs_new_ctx_target_f(u32 v) | ||
930 | { | ||
931 | return (v & 0x3) << 28; | ||
932 | } | ||
933 | static inline u32 gr_fecs_new_ctx_target_m(void) | ||
934 | { | ||
935 | return 0x3 << 28; | ||
936 | } | ||
937 | static inline u32 gr_fecs_new_ctx_target_v(u32 r) | ||
938 | { | ||
939 | return (r >> 28) & 0x3; | ||
940 | } | ||
941 | static inline u32 gr_fecs_new_ctx_valid_s(void) | ||
942 | { | ||
943 | return 1; | ||
944 | } | ||
945 | static inline u32 gr_fecs_new_ctx_valid_f(u32 v) | ||
946 | { | ||
947 | return (v & 0x1) << 31; | ||
948 | } | ||
949 | static inline u32 gr_fecs_new_ctx_valid_m(void) | ||
950 | { | ||
951 | return 0x1 << 31; | ||
952 | } | ||
953 | static inline u32 gr_fecs_new_ctx_valid_v(u32 r) | ||
954 | { | ||
955 | return (r >> 31) & 0x1; | ||
956 | } | ||
957 | static inline u32 gr_fecs_arb_ctx_ptr_r(void) | ||
958 | { | ||
959 | return 0x00409a0c; | ||
960 | } | ||
961 | static inline u32 gr_fecs_arb_ctx_ptr_ptr_s(void) | ||
962 | { | ||
963 | return 28; | ||
964 | } | ||
965 | static inline u32 gr_fecs_arb_ctx_ptr_ptr_f(u32 v) | ||
966 | { | ||
967 | return (v & 0xfffffff) << 0; | ||
968 | } | ||
969 | static inline u32 gr_fecs_arb_ctx_ptr_ptr_m(void) | ||
970 | { | ||
971 | return 0xfffffff << 0; | ||
972 | } | ||
973 | static inline u32 gr_fecs_arb_ctx_ptr_ptr_v(u32 r) | ||
974 | { | ||
975 | return (r >> 0) & 0xfffffff; | ||
976 | } | ||
977 | static inline u32 gr_fecs_arb_ctx_ptr_target_s(void) | ||
978 | { | ||
979 | return 2; | ||
980 | } | ||
981 | static inline u32 gr_fecs_arb_ctx_ptr_target_f(u32 v) | ||
982 | { | ||
983 | return (v & 0x3) << 28; | ||
984 | } | ||
985 | static inline u32 gr_fecs_arb_ctx_ptr_target_m(void) | ||
986 | { | ||
987 | return 0x3 << 28; | ||
988 | } | ||
989 | static inline u32 gr_fecs_arb_ctx_ptr_target_v(u32 r) | ||
990 | { | ||
991 | return (r >> 28) & 0x3; | ||
992 | } | ||
993 | static inline u32 gr_fecs_arb_ctx_cmd_r(void) | ||
994 | { | ||
995 | return 0x00409a10; | ||
996 | } | ||
997 | static inline u32 gr_fecs_arb_ctx_cmd_cmd_s(void) | ||
998 | { | ||
999 | return 5; | ||
1000 | } | ||
1001 | static inline u32 gr_fecs_arb_ctx_cmd_cmd_f(u32 v) | ||
1002 | { | ||
1003 | return (v & 0x1f) << 0; | ||
1004 | } | ||
1005 | static inline u32 gr_fecs_arb_ctx_cmd_cmd_m(void) | ||
1006 | { | ||
1007 | return 0x1f << 0; | ||
1008 | } | ||
1009 | static inline u32 gr_fecs_arb_ctx_cmd_cmd_v(u32 r) | ||
1010 | { | ||
1011 | return (r >> 0) & 0x1f; | ||
1012 | } | ||
1013 | static inline u32 gr_rstr2d_gpc_map0_r(void) | ||
1014 | { | ||
1015 | return 0x0040780c; | ||
1016 | } | ||
1017 | static inline u32 gr_rstr2d_gpc_map1_r(void) | ||
1018 | { | ||
1019 | return 0x00407810; | ||
1020 | } | ||
1021 | static inline u32 gr_rstr2d_gpc_map2_r(void) | ||
1022 | { | ||
1023 | return 0x00407814; | ||
1024 | } | ||
1025 | static inline u32 gr_rstr2d_gpc_map3_r(void) | ||
1026 | { | ||
1027 | return 0x00407818; | ||
1028 | } | ||
1029 | static inline u32 gr_rstr2d_gpc_map4_r(void) | ||
1030 | { | ||
1031 | return 0x0040781c; | ||
1032 | } | ||
1033 | static inline u32 gr_rstr2d_gpc_map5_r(void) | ||
1034 | { | ||
1035 | return 0x00407820; | ||
1036 | } | ||
1037 | static inline u32 gr_rstr2d_map_table_cfg_r(void) | ||
1038 | { | ||
1039 | return 0x004078bc; | ||
1040 | } | ||
1041 | static inline u32 gr_rstr2d_map_table_cfg_row_offset_f(u32 v) | ||
1042 | { | ||
1043 | return (v & 0xff) << 0; | ||
1044 | } | ||
1045 | static inline u32 gr_rstr2d_map_table_cfg_num_entries_f(u32 v) | ||
1046 | { | ||
1047 | return (v & 0xff) << 8; | ||
1048 | } | ||
1049 | static inline u32 gr_pd_hww_esr_r(void) | ||
1050 | { | ||
1051 | return 0x00406018; | ||
1052 | } | ||
1053 | static inline u32 gr_pd_hww_esr_reset_active_f(void) | ||
1054 | { | ||
1055 | return 0x40000000; | ||
1056 | } | ||
1057 | static inline u32 gr_pd_hww_esr_en_enable_f(void) | ||
1058 | { | ||
1059 | return 0x80000000; | ||
1060 | } | ||
1061 | static inline u32 gr_pd_num_tpc_per_gpc_r(u32 i) | ||
1062 | { | ||
1063 | return 0x00406028 + i*4; | ||
1064 | } | ||
1065 | static inline u32 gr_pd_num_tpc_per_gpc__size_1_v(void) | ||
1066 | { | ||
1067 | return 0x00000004; | ||
1068 | } | ||
1069 | static inline u32 gr_pd_num_tpc_per_gpc_count0_f(u32 v) | ||
1070 | { | ||
1071 | return (v & 0xf) << 0; | ||
1072 | } | ||
1073 | static inline u32 gr_pd_num_tpc_per_gpc_count1_f(u32 v) | ||
1074 | { | ||
1075 | return (v & 0xf) << 4; | ||
1076 | } | ||
1077 | static inline u32 gr_pd_num_tpc_per_gpc_count2_f(u32 v) | ||
1078 | { | ||
1079 | return (v & 0xf) << 8; | ||
1080 | } | ||
1081 | static inline u32 gr_pd_num_tpc_per_gpc_count3_f(u32 v) | ||
1082 | { | ||
1083 | return (v & 0xf) << 12; | ||
1084 | } | ||
1085 | static inline u32 gr_pd_num_tpc_per_gpc_count4_f(u32 v) | ||
1086 | { | ||
1087 | return (v & 0xf) << 16; | ||
1088 | } | ||
1089 | static inline u32 gr_pd_num_tpc_per_gpc_count5_f(u32 v) | ||
1090 | { | ||
1091 | return (v & 0xf) << 20; | ||
1092 | } | ||
1093 | static inline u32 gr_pd_num_tpc_per_gpc_count6_f(u32 v) | ||
1094 | { | ||
1095 | return (v & 0xf) << 24; | ||
1096 | } | ||
1097 | static inline u32 gr_pd_num_tpc_per_gpc_count7_f(u32 v) | ||
1098 | { | ||
1099 | return (v & 0xf) << 28; | ||
1100 | } | ||
1101 | static inline u32 gr_pd_ab_dist_cfg0_r(void) | ||
1102 | { | ||
1103 | return 0x004064c0; | ||
1104 | } | ||
1105 | static inline u32 gr_pd_ab_dist_cfg0_timeslice_enable_en_f(void) | ||
1106 | { | ||
1107 | return 0x80000000; | ||
1108 | } | ||
1109 | static inline u32 gr_pd_ab_dist_cfg0_timeslice_enable_dis_f(void) | ||
1110 | { | ||
1111 | return 0x0; | ||
1112 | } | ||
1113 | static inline u32 gr_pd_ab_dist_cfg1_r(void) | ||
1114 | { | ||
1115 | return 0x004064c4; | ||
1116 | } | ||
1117 | static inline u32 gr_pd_ab_dist_cfg1_max_batches_init_f(void) | ||
1118 | { | ||
1119 | return 0xffff; | ||
1120 | } | ||
1121 | static inline u32 gr_pd_ab_dist_cfg1_max_output_f(u32 v) | ||
1122 | { | ||
1123 | return (v & 0x7ff) << 16; | ||
1124 | } | ||
1125 | static inline u32 gr_pd_ab_dist_cfg1_max_output_granularity_v(void) | ||
1126 | { | ||
1127 | return 0x00000080; | ||
1128 | } | ||
1129 | static inline u32 gr_pd_ab_dist_cfg2_r(void) | ||
1130 | { | ||
1131 | return 0x004064c8; | ||
1132 | } | ||
1133 | static inline u32 gr_pd_ab_dist_cfg2_token_limit_f(u32 v) | ||
1134 | { | ||
1135 | return (v & 0xfff) << 0; | ||
1136 | } | ||
1137 | static inline u32 gr_pd_ab_dist_cfg2_token_limit_init_v(void) | ||
1138 | { | ||
1139 | return 0x00000100; | ||
1140 | } | ||
1141 | static inline u32 gr_pd_ab_dist_cfg2_state_limit_f(u32 v) | ||
1142 | { | ||
1143 | return (v & 0xfff) << 16; | ||
1144 | } | ||
1145 | static inline u32 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(void) | ||
1146 | { | ||
1147 | return 0x00000020; | ||
1148 | } | ||
1149 | static inline u32 gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(void) | ||
1150 | { | ||
1151 | return 0x00000062; | ||
1152 | } | ||
1153 | static inline u32 gr_pd_pagepool_r(void) | ||
1154 | { | ||
1155 | return 0x004064cc; | ||
1156 | } | ||
1157 | static inline u32 gr_pd_pagepool_total_pages_f(u32 v) | ||
1158 | { | ||
1159 | return (v & 0xff) << 0; | ||
1160 | } | ||
1161 | static inline u32 gr_pd_pagepool_valid_true_f(void) | ||
1162 | { | ||
1163 | return 0x80000000; | ||
1164 | } | ||
1165 | static inline u32 gr_pd_dist_skip_table_r(u32 i) | ||
1166 | { | ||
1167 | return 0x004064d0 + i*4; | ||
1168 | } | ||
1169 | static inline u32 gr_pd_dist_skip_table__size_1_v(void) | ||
1170 | { | ||
1171 | return 0x00000008; | ||
1172 | } | ||
1173 | static inline u32 gr_pd_dist_skip_table_gpc_4n0_mask_f(u32 v) | ||
1174 | { | ||
1175 | return (v & 0xff) << 0; | ||
1176 | } | ||
1177 | static inline u32 gr_pd_dist_skip_table_gpc_4n1_mask_f(u32 v) | ||
1178 | { | ||
1179 | return (v & 0xff) << 8; | ||
1180 | } | ||
1181 | static inline u32 gr_pd_dist_skip_table_gpc_4n2_mask_f(u32 v) | ||
1182 | { | ||
1183 | return (v & 0xff) << 16; | ||
1184 | } | ||
1185 | static inline u32 gr_pd_dist_skip_table_gpc_4n3_mask_f(u32 v) | ||
1186 | { | ||
1187 | return (v & 0xff) << 24; | ||
1188 | } | ||
1189 | static inline u32 gr_pd_alpha_ratio_table_r(u32 i) | ||
1190 | { | ||
1191 | return 0x00406800 + i*4; | ||
1192 | } | ||
1193 | static inline u32 gr_pd_alpha_ratio_table__size_1_v(void) | ||
1194 | { | ||
1195 | return 0x00000100; | ||
1196 | } | ||
1197 | static inline u32 gr_pd_alpha_ratio_table_gpc_4n0_mask_f(u32 v) | ||
1198 | { | ||
1199 | return (v & 0xff) << 0; | ||
1200 | } | ||
1201 | static inline u32 gr_pd_alpha_ratio_table_gpc_4n1_mask_f(u32 v) | ||
1202 | { | ||
1203 | return (v & 0xff) << 8; | ||
1204 | } | ||
1205 | static inline u32 gr_pd_alpha_ratio_table_gpc_4n2_mask_f(u32 v) | ||
1206 | { | ||
1207 | return (v & 0xff) << 16; | ||
1208 | } | ||
1209 | static inline u32 gr_pd_alpha_ratio_table_gpc_4n3_mask_f(u32 v) | ||
1210 | { | ||
1211 | return (v & 0xff) << 24; | ||
1212 | } | ||
1213 | static inline u32 gr_pd_beta_ratio_table_r(u32 i) | ||
1214 | { | ||
1215 | return 0x00406c00 + i*4; | ||
1216 | } | ||
1217 | static inline u32 gr_pd_beta_ratio_table__size_1_v(void) | ||
1218 | { | ||
1219 | return 0x00000100; | ||
1220 | } | ||
1221 | static inline u32 gr_pd_beta_ratio_table_gpc_4n0_mask_f(u32 v) | ||
1222 | { | ||
1223 | return (v & 0xff) << 0; | ||
1224 | } | ||
1225 | static inline u32 gr_pd_beta_ratio_table_gpc_4n1_mask_f(u32 v) | ||
1226 | { | ||
1227 | return (v & 0xff) << 8; | ||
1228 | } | ||
1229 | static inline u32 gr_pd_beta_ratio_table_gpc_4n2_mask_f(u32 v) | ||
1230 | { | ||
1231 | return (v & 0xff) << 16; | ||
1232 | } | ||
1233 | static inline u32 gr_pd_beta_ratio_table_gpc_4n3_mask_f(u32 v) | ||
1234 | { | ||
1235 | return (v & 0xff) << 24; | ||
1236 | } | ||
1237 | static inline u32 gr_ds_debug_r(void) | ||
1238 | { | ||
1239 | return 0x00405800; | ||
1240 | } | ||
1241 | static inline u32 gr_ds_debug_timeslice_mode_disable_f(void) | ||
1242 | { | ||
1243 | return 0x0; | ||
1244 | } | ||
1245 | static inline u32 gr_ds_debug_timeslice_mode_enable_f(void) | ||
1246 | { | ||
1247 | return 0x8000000; | ||
1248 | } | ||
1249 | static inline u32 gr_ds_zbc_color_r_r(void) | ||
1250 | { | ||
1251 | return 0x00405804; | ||
1252 | } | ||
1253 | static inline u32 gr_ds_zbc_color_r_val_f(u32 v) | ||
1254 | { | ||
1255 | return (v & 0xffffffff) << 0; | ||
1256 | } | ||
1257 | static inline u32 gr_ds_zbc_color_g_r(void) | ||
1258 | { | ||
1259 | return 0x00405808; | ||
1260 | } | ||
1261 | static inline u32 gr_ds_zbc_color_g_val_f(u32 v) | ||
1262 | { | ||
1263 | return (v & 0xffffffff) << 0; | ||
1264 | } | ||
1265 | static inline u32 gr_ds_zbc_color_b_r(void) | ||
1266 | { | ||
1267 | return 0x0040580c; | ||
1268 | } | ||
1269 | static inline u32 gr_ds_zbc_color_b_val_f(u32 v) | ||
1270 | { | ||
1271 | return (v & 0xffffffff) << 0; | ||
1272 | } | ||
1273 | static inline u32 gr_ds_zbc_color_a_r(void) | ||
1274 | { | ||
1275 | return 0x00405810; | ||
1276 | } | ||
1277 | static inline u32 gr_ds_zbc_color_a_val_f(u32 v) | ||
1278 | { | ||
1279 | return (v & 0xffffffff) << 0; | ||
1280 | } | ||
1281 | static inline u32 gr_ds_zbc_color_fmt_r(void) | ||
1282 | { | ||
1283 | return 0x00405814; | ||
1284 | } | ||
1285 | static inline u32 gr_ds_zbc_color_fmt_val_f(u32 v) | ||
1286 | { | ||
1287 | return (v & 0x7f) << 0; | ||
1288 | } | ||
1289 | static inline u32 gr_ds_zbc_color_fmt_val_invalid_f(void) | ||
1290 | { | ||
1291 | return 0x0; | ||
1292 | } | ||
1293 | static inline u32 gr_ds_zbc_color_fmt_val_zero_v(void) | ||
1294 | { | ||
1295 | return 0x00000001; | ||
1296 | } | ||
1297 | static inline u32 gr_ds_zbc_color_fmt_val_unorm_one_v(void) | ||
1298 | { | ||
1299 | return 0x00000002; | ||
1300 | } | ||
1301 | static inline u32 gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v(void) | ||
1302 | { | ||
1303 | return 0x00000004; | ||
1304 | } | ||
1305 | static inline u32 gr_ds_zbc_z_r(void) | ||
1306 | { | ||
1307 | return 0x00405818; | ||
1308 | } | ||
1309 | static inline u32 gr_ds_zbc_z_val_s(void) | ||
1310 | { | ||
1311 | return 32; | ||
1312 | } | ||
1313 | static inline u32 gr_ds_zbc_z_val_f(u32 v) | ||
1314 | { | ||
1315 | return (v & 0xffffffff) << 0; | ||
1316 | } | ||
1317 | static inline u32 gr_ds_zbc_z_val_m(void) | ||
1318 | { | ||
1319 | return 0xffffffff << 0; | ||
1320 | } | ||
1321 | static inline u32 gr_ds_zbc_z_val_v(u32 r) | ||
1322 | { | ||
1323 | return (r >> 0) & 0xffffffff; | ||
1324 | } | ||
1325 | static inline u32 gr_ds_zbc_z_val__init_v(void) | ||
1326 | { | ||
1327 | return 0x00000000; | ||
1328 | } | ||
1329 | static inline u32 gr_ds_zbc_z_val__init_f(void) | ||
1330 | { | ||
1331 | return 0x0; | ||
1332 | } | ||
1333 | static inline u32 gr_ds_zbc_z_fmt_r(void) | ||
1334 | { | ||
1335 | return 0x0040581c; | ||
1336 | } | ||
1337 | static inline u32 gr_ds_zbc_z_fmt_val_f(u32 v) | ||
1338 | { | ||
1339 | return (v & 0x1) << 0; | ||
1340 | } | ||
1341 | static inline u32 gr_ds_zbc_z_fmt_val_invalid_f(void) | ||
1342 | { | ||
1343 | return 0x0; | ||
1344 | } | ||
1345 | static inline u32 gr_ds_zbc_z_fmt_val_fp32_v(void) | ||
1346 | { | ||
1347 | return 0x00000001; | ||
1348 | } | ||
1349 | static inline u32 gr_ds_zbc_tbl_index_r(void) | ||
1350 | { | ||
1351 | return 0x00405820; | ||
1352 | } | ||
1353 | static inline u32 gr_ds_zbc_tbl_index_val_f(u32 v) | ||
1354 | { | ||
1355 | return (v & 0xf) << 0; | ||
1356 | } | ||
1357 | static inline u32 gr_ds_zbc_tbl_ld_r(void) | ||
1358 | { | ||
1359 | return 0x00405824; | ||
1360 | } | ||
1361 | static inline u32 gr_ds_zbc_tbl_ld_select_c_f(void) | ||
1362 | { | ||
1363 | return 0x0; | ||
1364 | } | ||
1365 | static inline u32 gr_ds_zbc_tbl_ld_select_z_f(void) | ||
1366 | { | ||
1367 | return 0x1; | ||
1368 | } | ||
1369 | static inline u32 gr_ds_zbc_tbl_ld_action_write_f(void) | ||
1370 | { | ||
1371 | return 0x0; | ||
1372 | } | ||
1373 | static inline u32 gr_ds_zbc_tbl_ld_trigger_active_f(void) | ||
1374 | { | ||
1375 | return 0x4; | ||
1376 | } | ||
1377 | static inline u32 gr_ds_tga_constraintlogic_r(void) | ||
1378 | { | ||
1379 | return 0x00405830; | ||
1380 | } | ||
1381 | static inline u32 gr_ds_tga_constraintlogic_beta_cbsize_f(u32 v) | ||
1382 | { | ||
1383 | return (v & 0xfff) << 16; | ||
1384 | } | ||
1385 | static inline u32 gr_ds_tga_constraintlogic_alpha_cbsize_f(u32 v) | ||
1386 | { | ||
1387 | return (v & 0xfff) << 0; | ||
1388 | } | ||
1389 | static inline u32 gr_ds_hww_esr_r(void) | ||
1390 | { | ||
1391 | return 0x00405840; | ||
1392 | } | ||
1393 | static inline u32 gr_ds_hww_esr_reset_s(void) | ||
1394 | { | ||
1395 | return 1; | ||
1396 | } | ||
1397 | static inline u32 gr_ds_hww_esr_reset_f(u32 v) | ||
1398 | { | ||
1399 | return (v & 0x1) << 30; | ||
1400 | } | ||
1401 | static inline u32 gr_ds_hww_esr_reset_m(void) | ||
1402 | { | ||
1403 | return 0x1 << 30; | ||
1404 | } | ||
1405 | static inline u32 gr_ds_hww_esr_reset_v(u32 r) | ||
1406 | { | ||
1407 | return (r >> 30) & 0x1; | ||
1408 | } | ||
1409 | static inline u32 gr_ds_hww_esr_reset_task_v(void) | ||
1410 | { | ||
1411 | return 0x00000001; | ||
1412 | } | ||
1413 | static inline u32 gr_ds_hww_esr_reset_task_f(void) | ||
1414 | { | ||
1415 | return 0x40000000; | ||
1416 | } | ||
1417 | static inline u32 gr_ds_hww_esr_en_enabled_f(void) | ||
1418 | { | ||
1419 | return 0x80000000; | ||
1420 | } | ||
1421 | static inline u32 gr_ds_hww_report_mask_r(void) | ||
1422 | { | ||
1423 | return 0x00405844; | ||
1424 | } | ||
1425 | static inline u32 gr_ds_hww_report_mask_sph0_err_report_f(void) | ||
1426 | { | ||
1427 | return 0x1; | ||
1428 | } | ||
1429 | static inline u32 gr_ds_hww_report_mask_sph1_err_report_f(void) | ||
1430 | { | ||
1431 | return 0x2; | ||
1432 | } | ||
1433 | static inline u32 gr_ds_hww_report_mask_sph2_err_report_f(void) | ||
1434 | { | ||
1435 | return 0x4; | ||
1436 | } | ||
1437 | static inline u32 gr_ds_hww_report_mask_sph3_err_report_f(void) | ||
1438 | { | ||
1439 | return 0x8; | ||
1440 | } | ||
1441 | static inline u32 gr_ds_hww_report_mask_sph4_err_report_f(void) | ||
1442 | { | ||
1443 | return 0x10; | ||
1444 | } | ||
1445 | static inline u32 gr_ds_hww_report_mask_sph5_err_report_f(void) | ||
1446 | { | ||
1447 | return 0x20; | ||
1448 | } | ||
1449 | static inline u32 gr_ds_hww_report_mask_sph6_err_report_f(void) | ||
1450 | { | ||
1451 | return 0x40; | ||
1452 | } | ||
1453 | static inline u32 gr_ds_hww_report_mask_sph7_err_report_f(void) | ||
1454 | { | ||
1455 | return 0x80; | ||
1456 | } | ||
1457 | static inline u32 gr_ds_hww_report_mask_sph8_err_report_f(void) | ||
1458 | { | ||
1459 | return 0x100; | ||
1460 | } | ||
1461 | static inline u32 gr_ds_hww_report_mask_sph9_err_report_f(void) | ||
1462 | { | ||
1463 | return 0x200; | ||
1464 | } | ||
1465 | static inline u32 gr_ds_hww_report_mask_sph10_err_report_f(void) | ||
1466 | { | ||
1467 | return 0x400; | ||
1468 | } | ||
1469 | static inline u32 gr_ds_hww_report_mask_sph11_err_report_f(void) | ||
1470 | { | ||
1471 | return 0x800; | ||
1472 | } | ||
1473 | static inline u32 gr_ds_hww_report_mask_sph12_err_report_f(void) | ||
1474 | { | ||
1475 | return 0x1000; | ||
1476 | } | ||
1477 | static inline u32 gr_ds_hww_report_mask_sph13_err_report_f(void) | ||
1478 | { | ||
1479 | return 0x2000; | ||
1480 | } | ||
1481 | static inline u32 gr_ds_hww_report_mask_sph14_err_report_f(void) | ||
1482 | { | ||
1483 | return 0x4000; | ||
1484 | } | ||
1485 | static inline u32 gr_ds_hww_report_mask_sph15_err_report_f(void) | ||
1486 | { | ||
1487 | return 0x8000; | ||
1488 | } | ||
1489 | static inline u32 gr_ds_hww_report_mask_sph16_err_report_f(void) | ||
1490 | { | ||
1491 | return 0x10000; | ||
1492 | } | ||
1493 | static inline u32 gr_ds_hww_report_mask_sph17_err_report_f(void) | ||
1494 | { | ||
1495 | return 0x20000; | ||
1496 | } | ||
1497 | static inline u32 gr_ds_hww_report_mask_sph18_err_report_f(void) | ||
1498 | { | ||
1499 | return 0x40000; | ||
1500 | } | ||
1501 | static inline u32 gr_ds_hww_report_mask_sph19_err_report_f(void) | ||
1502 | { | ||
1503 | return 0x80000; | ||
1504 | } | ||
1505 | static inline u32 gr_ds_hww_report_mask_sph20_err_report_f(void) | ||
1506 | { | ||
1507 | return 0x100000; | ||
1508 | } | ||
1509 | static inline u32 gr_ds_hww_report_mask_sph21_err_report_f(void) | ||
1510 | { | ||
1511 | return 0x200000; | ||
1512 | } | ||
1513 | static inline u32 gr_ds_hww_report_mask_sph22_err_report_f(void) | ||
1514 | { | ||
1515 | return 0x400000; | ||
1516 | } | ||
1517 | static inline u32 gr_ds_hww_report_mask_sph23_err_report_f(void) | ||
1518 | { | ||
1519 | return 0x800000; | ||
1520 | } | ||
1521 | static inline u32 gr_ds_num_tpc_per_gpc_r(u32 i) | ||
1522 | { | ||
1523 | return 0x00405870 + i*4; | ||
1524 | } | ||
1525 | static inline u32 gr_scc_bundle_cb_base_r(void) | ||
1526 | { | ||
1527 | return 0x00408004; | ||
1528 | } | ||
1529 | static inline u32 gr_scc_bundle_cb_base_addr_39_8_f(u32 v) | ||
1530 | { | ||
1531 | return (v & 0xffffffff) << 0; | ||
1532 | } | ||
1533 | static inline u32 gr_scc_bundle_cb_base_addr_39_8_align_bits_v(void) | ||
1534 | { | ||
1535 | return 0x00000008; | ||
1536 | } | ||
1537 | static inline u32 gr_scc_bundle_cb_size_r(void) | ||
1538 | { | ||
1539 | return 0x00408008; | ||
1540 | } | ||
1541 | static inline u32 gr_scc_bundle_cb_size_div_256b_f(u32 v) | ||
1542 | { | ||
1543 | return (v & 0x7ff) << 0; | ||
1544 | } | ||
1545 | static inline u32 gr_scc_bundle_cb_size_div_256b__prod_v(void) | ||
1546 | { | ||
1547 | return 0x00000018; | ||
1548 | } | ||
1549 | static inline u32 gr_scc_bundle_cb_size_div_256b_byte_granularity_v(void) | ||
1550 | { | ||
1551 | return 0x00000100; | ||
1552 | } | ||
1553 | static inline u32 gr_scc_bundle_cb_size_valid_false_v(void) | ||
1554 | { | ||
1555 | return 0x00000000; | ||
1556 | } | ||
1557 | static inline u32 gr_scc_bundle_cb_size_valid_false_f(void) | ||
1558 | { | ||
1559 | return 0x0; | ||
1560 | } | ||
1561 | static inline u32 gr_scc_bundle_cb_size_valid_true_f(void) | ||
1562 | { | ||
1563 | return 0x80000000; | ||
1564 | } | ||
1565 | static inline u32 gr_scc_pagepool_base_r(void) | ||
1566 | { | ||
1567 | return 0x0040800c; | ||
1568 | } | ||
1569 | static inline u32 gr_scc_pagepool_base_addr_39_8_f(u32 v) | ||
1570 | { | ||
1571 | return (v & 0xffffffff) << 0; | ||
1572 | } | ||
1573 | static inline u32 gr_scc_pagepool_base_addr_39_8_align_bits_v(void) | ||
1574 | { | ||
1575 | return 0x00000008; | ||
1576 | } | ||
1577 | static inline u32 gr_scc_pagepool_r(void) | ||
1578 | { | ||
1579 | return 0x00408010; | ||
1580 | } | ||
1581 | static inline u32 gr_scc_pagepool_total_pages_f(u32 v) | ||
1582 | { | ||
1583 | return (v & 0xff) << 0; | ||
1584 | } | ||
1585 | static inline u32 gr_scc_pagepool_total_pages_hwmax_v(void) | ||
1586 | { | ||
1587 | return 0x00000000; | ||
1588 | } | ||
1589 | static inline u32 gr_scc_pagepool_total_pages_hwmax_value_v(void) | ||
1590 | { | ||
1591 | return 0x00000080; | ||
1592 | } | ||
1593 | static inline u32 gr_scc_pagepool_total_pages_byte_granularity_v(void) | ||
1594 | { | ||
1595 | return 0x00000100; | ||
1596 | } | ||
1597 | static inline u32 gr_scc_pagepool_max_valid_pages_s(void) | ||
1598 | { | ||
1599 | return 8; | ||
1600 | } | ||
1601 | static inline u32 gr_scc_pagepool_max_valid_pages_f(u32 v) | ||
1602 | { | ||
1603 | return (v & 0xff) << 8; | ||
1604 | } | ||
1605 | static inline u32 gr_scc_pagepool_max_valid_pages_m(void) | ||
1606 | { | ||
1607 | return 0xff << 8; | ||
1608 | } | ||
1609 | static inline u32 gr_scc_pagepool_max_valid_pages_v(u32 r) | ||
1610 | { | ||
1611 | return (r >> 8) & 0xff; | ||
1612 | } | ||
1613 | static inline u32 gr_scc_pagepool_valid_true_f(void) | ||
1614 | { | ||
1615 | return 0x80000000; | ||
1616 | } | ||
1617 | static inline u32 gr_scc_init_r(void) | ||
1618 | { | ||
1619 | return 0x0040802c; | ||
1620 | } | ||
1621 | static inline u32 gr_scc_init_ram_trigger_f(void) | ||
1622 | { | ||
1623 | return 0x1; | ||
1624 | } | ||
1625 | static inline u32 gr_scc_hww_esr_r(void) | ||
1626 | { | ||
1627 | return 0x00408030; | ||
1628 | } | ||
1629 | static inline u32 gr_scc_hww_esr_reset_active_f(void) | ||
1630 | { | ||
1631 | return 0x40000000; | ||
1632 | } | ||
1633 | static inline u32 gr_scc_hww_esr_en_enable_f(void) | ||
1634 | { | ||
1635 | return 0x80000000; | ||
1636 | } | ||
1637 | static inline u32 gr_sked_hww_esr_r(void) | ||
1638 | { | ||
1639 | return 0x00407020; | ||
1640 | } | ||
1641 | static inline u32 gr_sked_hww_esr_reset_active_f(void) | ||
1642 | { | ||
1643 | return 0x40000000; | ||
1644 | } | ||
1645 | static inline u32 gr_cwd_fs_r(void) | ||
1646 | { | ||
1647 | return 0x00405b00; | ||
1648 | } | ||
1649 | static inline u32 gr_cwd_fs_num_gpcs_f(u32 v) | ||
1650 | { | ||
1651 | return (v & 0xff) << 0; | ||
1652 | } | ||
1653 | static inline u32 gr_cwd_fs_num_tpcs_f(u32 v) | ||
1654 | { | ||
1655 | return (v & 0xff) << 8; | ||
1656 | } | ||
1657 | static inline u32 gr_gpc0_fs_gpc_r(void) | ||
1658 | { | ||
1659 | return 0x00502608; | ||
1660 | } | ||
1661 | static inline u32 gr_gpc0_fs_gpc_num_available_tpcs_v(u32 r) | ||
1662 | { | ||
1663 | return (r >> 0) & 0x1f; | ||
1664 | } | ||
1665 | static inline u32 gr_gpc0_fs_gpc_num_available_zculls_v(u32 r) | ||
1666 | { | ||
1667 | return (r >> 16) & 0x1f; | ||
1668 | } | ||
1669 | static inline u32 gr_gpc0_cfg_r(void) | ||
1670 | { | ||
1671 | return 0x00502620; | ||
1672 | } | ||
1673 | static inline u32 gr_gpc0_cfg_imem_sz_v(u32 r) | ||
1674 | { | ||
1675 | return (r >> 0) & 0xff; | ||
1676 | } | ||
1677 | static inline u32 gr_gpccs_rc_lanes_r(void) | ||
1678 | { | ||
1679 | return 0x00502880; | ||
1680 | } | ||
1681 | static inline u32 gr_gpccs_rc_lanes_num_chains_s(void) | ||
1682 | { | ||
1683 | return 6; | ||
1684 | } | ||
1685 | static inline u32 gr_gpccs_rc_lanes_num_chains_f(u32 v) | ||
1686 | { | ||
1687 | return (v & 0x3f) << 0; | ||
1688 | } | ||
1689 | static inline u32 gr_gpccs_rc_lanes_num_chains_m(void) | ||
1690 | { | ||
1691 | return 0x3f << 0; | ||
1692 | } | ||
1693 | static inline u32 gr_gpccs_rc_lanes_num_chains_v(u32 r) | ||
1694 | { | ||
1695 | return (r >> 0) & 0x3f; | ||
1696 | } | ||
1697 | static inline u32 gr_gpccs_rc_lane_size_r(u32 i) | ||
1698 | { | ||
1699 | return 0x00502910 + i*0; | ||
1700 | } | ||
1701 | static inline u32 gr_gpccs_rc_lane_size__size_1_v(void) | ||
1702 | { | ||
1703 | return 0x00000010; | ||
1704 | } | ||
1705 | static inline u32 gr_gpccs_rc_lane_size_v_s(void) | ||
1706 | { | ||
1707 | return 24; | ||
1708 | } | ||
1709 | static inline u32 gr_gpccs_rc_lane_size_v_f(u32 v) | ||
1710 | { | ||
1711 | return (v & 0xffffff) << 0; | ||
1712 | } | ||
1713 | static inline u32 gr_gpccs_rc_lane_size_v_m(void) | ||
1714 | { | ||
1715 | return 0xffffff << 0; | ||
1716 | } | ||
1717 | static inline u32 gr_gpccs_rc_lane_size_v_v(u32 r) | ||
1718 | { | ||
1719 | return (r >> 0) & 0xffffff; | ||
1720 | } | ||
1721 | static inline u32 gr_gpccs_rc_lane_size_v_0_v(void) | ||
1722 | { | ||
1723 | return 0x00000000; | ||
1724 | } | ||
1725 | static inline u32 gr_gpccs_rc_lane_size_v_0_f(void) | ||
1726 | { | ||
1727 | return 0x0; | ||
1728 | } | ||
1729 | static inline u32 gr_gpc0_zcull_fs_r(void) | ||
1730 | { | ||
1731 | return 0x00500910; | ||
1732 | } | ||
1733 | static inline u32 gr_gpc0_zcull_fs_num_sms_f(u32 v) | ||
1734 | { | ||
1735 | return (v & 0x1ff) << 0; | ||
1736 | } | ||
1737 | static inline u32 gr_gpc0_zcull_fs_num_active_banks_f(u32 v) | ||
1738 | { | ||
1739 | return (v & 0xf) << 16; | ||
1740 | } | ||
1741 | static inline u32 gr_gpc0_zcull_ram_addr_r(void) | ||
1742 | { | ||
1743 | return 0x00500914; | ||
1744 | } | ||
1745 | static inline u32 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(u32 v) | ||
1746 | { | ||
1747 | return (v & 0xf) << 0; | ||
1748 | } | ||
1749 | static inline u32 gr_gpc0_zcull_ram_addr_row_offset_f(u32 v) | ||
1750 | { | ||
1751 | return (v & 0xf) << 8; | ||
1752 | } | ||
1753 | static inline u32 gr_gpc0_zcull_sm_num_rcp_r(void) | ||
1754 | { | ||
1755 | return 0x00500918; | ||
1756 | } | ||
1757 | static inline u32 gr_gpc0_zcull_sm_num_rcp_conservative_f(u32 v) | ||
1758 | { | ||
1759 | return (v & 0xffffff) << 0; | ||
1760 | } | ||
1761 | static inline u32 gr_gpc0_zcull_sm_num_rcp_conservative__max_v(void) | ||
1762 | { | ||
1763 | return 0x00800000; | ||
1764 | } | ||
1765 | static inline u32 gr_gpc0_zcull_total_ram_size_r(void) | ||
1766 | { | ||
1767 | return 0x00500920; | ||
1768 | } | ||
1769 | static inline u32 gr_gpc0_zcull_total_ram_size_num_aliquots_f(u32 v) | ||
1770 | { | ||
1771 | return (v & 0xffff) << 0; | ||
1772 | } | ||
1773 | static inline u32 gr_gpc0_zcull_zcsize_r(u32 i) | ||
1774 | { | ||
1775 | return 0x00500a04 + i*32; | ||
1776 | } | ||
1777 | static inline u32 gr_gpc0_zcull_zcsize_height_subregion__multiple_v(void) | ||
1778 | { | ||
1779 | return 0x00000040; | ||
1780 | } | ||
1781 | static inline u32 gr_gpc0_zcull_zcsize_width_subregion__multiple_v(void) | ||
1782 | { | ||
1783 | return 0x00000010; | ||
1784 | } | ||
1785 | static inline u32 gr_gpc0_gpm_pd_active_tpcs_r(void) | ||
1786 | { | ||
1787 | return 0x00500c08; | ||
1788 | } | ||
1789 | static inline u32 gr_gpc0_gpm_pd_active_tpcs_num_f(u32 v) | ||
1790 | { | ||
1791 | return (v & 0x7) << 0; | ||
1792 | } | ||
1793 | static inline u32 gr_gpc0_gpm_pd_sm_id_r(u32 i) | ||
1794 | { | ||
1795 | return 0x00500c10 + i*4; | ||
1796 | } | ||
1797 | static inline u32 gr_gpc0_gpm_pd_sm_id_id_f(u32 v) | ||
1798 | { | ||
1799 | return (v & 0xff) << 0; | ||
1800 | } | ||
1801 | static inline u32 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(u32 i) | ||
1802 | { | ||
1803 | return 0x00500c30 + i*4; | ||
1804 | } | ||
1805 | static inline u32 gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(u32 r) | ||
1806 | { | ||
1807 | return (r >> 0) & 0xff; | ||
1808 | } | ||
1809 | static inline u32 gr_gpc0_gpm_sd_active_tpcs_r(void) | ||
1810 | { | ||
1811 | return 0x00500c8c; | ||
1812 | } | ||
1813 | static inline u32 gr_gpc0_gpm_sd_active_tpcs_num_f(u32 v) | ||
1814 | { | ||
1815 | return (v & 0x7) << 0; | ||
1816 | } | ||
1817 | static inline u32 gr_gpc0_tpc0_pe_cfg_smid_r(void) | ||
1818 | { | ||
1819 | return 0x00504088; | ||
1820 | } | ||
1821 | static inline u32 gr_gpc0_tpc0_pe_cfg_smid_value_f(u32 v) | ||
1822 | { | ||
1823 | return (v & 0xffff) << 0; | ||
1824 | } | ||
1825 | static inline u32 gr_gpc0_tpc0_l1c_cfg_smid_r(void) | ||
1826 | { | ||
1827 | return 0x005044e8; | ||
1828 | } | ||
1829 | static inline u32 gr_gpc0_tpc0_l1c_cfg_smid_value_f(u32 v) | ||
1830 | { | ||
1831 | return (v & 0xffff) << 0; | ||
1832 | } | ||
1833 | static inline u32 gr_gpc0_tpc0_sm_cfg_r(void) | ||
1834 | { | ||
1835 | return 0x00504698; | ||
1836 | } | ||
1837 | static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v) | ||
1838 | { | ||
1839 | return (v & 0xffff) << 0; | ||
1840 | } | ||
1841 | static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void) | ||
1842 | { | ||
1843 | return 0x00503018; | ||
1844 | } | ||
1845 | static inline u32 gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(void) | ||
1846 | { | ||
1847 | return 0x1 << 0; | ||
1848 | } | ||
1849 | static inline u32 gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f(void) | ||
1850 | { | ||
1851 | return 0x1; | ||
1852 | } | ||
1853 | static inline u32 gr_gpc0_ppc0_cbm_cfg_r(void) | ||
1854 | { | ||
1855 | return 0x005030c0; | ||
1856 | } | ||
1857 | static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_f(u32 v) | ||
1858 | { | ||
1859 | return (v & 0xffff) << 0; | ||
1860 | } | ||
1861 | static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_m(void) | ||
1862 | { | ||
1863 | return 0xffff << 0; | ||
1864 | } | ||
1865 | static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_v(u32 r) | ||
1866 | { | ||
1867 | return (r >> 0) & 0xffff; | ||
1868 | } | ||
1869 | static inline u32 gr_gpc0_ppc0_cbm_cfg_size_f(u32 v) | ||
1870 | { | ||
1871 | return (v & 0xfff) << 16; | ||
1872 | } | ||
1873 | static inline u32 gr_gpc0_ppc0_cbm_cfg_size_m(void) | ||
1874 | { | ||
1875 | return 0xfff << 16; | ||
1876 | } | ||
1877 | static inline u32 gr_gpc0_ppc0_cbm_cfg_size_v(u32 r) | ||
1878 | { | ||
1879 | return (r >> 16) & 0xfff; | ||
1880 | } | ||
1881 | static inline u32 gr_gpc0_ppc0_cbm_cfg_size_default_v(void) | ||
1882 | { | ||
1883 | return 0x00000240; | ||
1884 | } | ||
1885 | static inline u32 gr_gpc0_ppc0_cbm_cfg_size_granularity_v(void) | ||
1886 | { | ||
1887 | return 0x00000020; | ||
1888 | } | ||
1889 | static inline u32 gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(u32 v) | ||
1890 | { | ||
1891 | return (v & 0x1) << 28; | ||
1892 | } | ||
1893 | static inline u32 gr_gpc0_ppc0_cbm_cfg2_r(void) | ||
1894 | { | ||
1895 | return 0x005030e4; | ||
1896 | } | ||
1897 | static inline u32 gr_gpc0_ppc0_cbm_cfg2_start_offset_f(u32 v) | ||
1898 | { | ||
1899 | return (v & 0xffff) << 0; | ||
1900 | } | ||
1901 | static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_f(u32 v) | ||
1902 | { | ||
1903 | return (v & 0xfff) << 16; | ||
1904 | } | ||
1905 | static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_m(void) | ||
1906 | { | ||
1907 | return 0xfff << 16; | ||
1908 | } | ||
1909 | static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_v(u32 r) | ||
1910 | { | ||
1911 | return (r >> 16) & 0xfff; | ||
1912 | } | ||
1913 | static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_default_v(void) | ||
1914 | { | ||
1915 | return 0x00000648; | ||
1916 | } | ||
1917 | static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_granularity_v(void) | ||
1918 | { | ||
1919 | return 0x00000020; | ||
1920 | } | ||
1921 | static inline u32 gr_gpccs_falcon_addr_r(void) | ||
1922 | { | ||
1923 | return 0x0041a0ac; | ||
1924 | } | ||
1925 | static inline u32 gr_gpccs_falcon_addr_lsb_s(void) | ||
1926 | { | ||
1927 | return 6; | ||
1928 | } | ||
1929 | static inline u32 gr_gpccs_falcon_addr_lsb_f(u32 v) | ||
1930 | { | ||
1931 | return (v & 0x3f) << 0; | ||
1932 | } | ||
1933 | static inline u32 gr_gpccs_falcon_addr_lsb_m(void) | ||
1934 | { | ||
1935 | return 0x3f << 0; | ||
1936 | } | ||
1937 | static inline u32 gr_gpccs_falcon_addr_lsb_v(u32 r) | ||
1938 | { | ||
1939 | return (r >> 0) & 0x3f; | ||
1940 | } | ||
1941 | static inline u32 gr_gpccs_falcon_addr_lsb_init_v(void) | ||
1942 | { | ||
1943 | return 0x00000000; | ||
1944 | } | ||
1945 | static inline u32 gr_gpccs_falcon_addr_lsb_init_f(void) | ||
1946 | { | ||
1947 | return 0x0; | ||
1948 | } | ||
1949 | static inline u32 gr_gpccs_falcon_addr_msb_s(void) | ||
1950 | { | ||
1951 | return 6; | ||
1952 | } | ||
1953 | static inline u32 gr_gpccs_falcon_addr_msb_f(u32 v) | ||
1954 | { | ||
1955 | return (v & 0x3f) << 6; | ||
1956 | } | ||
1957 | static inline u32 gr_gpccs_falcon_addr_msb_m(void) | ||
1958 | { | ||
1959 | return 0x3f << 6; | ||
1960 | } | ||
1961 | static inline u32 gr_gpccs_falcon_addr_msb_v(u32 r) | ||
1962 | { | ||
1963 | return (r >> 6) & 0x3f; | ||
1964 | } | ||
1965 | static inline u32 gr_gpccs_falcon_addr_msb_init_v(void) | ||
1966 | { | ||
1967 | return 0x00000000; | ||
1968 | } | ||
1969 | static inline u32 gr_gpccs_falcon_addr_msb_init_f(void) | ||
1970 | { | ||
1971 | return 0x0; | ||
1972 | } | ||
1973 | static inline u32 gr_gpccs_falcon_addr_ext_s(void) | ||
1974 | { | ||
1975 | return 12; | ||
1976 | } | ||
1977 | static inline u32 gr_gpccs_falcon_addr_ext_f(u32 v) | ||
1978 | { | ||
1979 | return (v & 0xfff) << 0; | ||
1980 | } | ||
1981 | static inline u32 gr_gpccs_falcon_addr_ext_m(void) | ||
1982 | { | ||
1983 | return 0xfff << 0; | ||
1984 | } | ||
1985 | static inline u32 gr_gpccs_falcon_addr_ext_v(u32 r) | ||
1986 | { | ||
1987 | return (r >> 0) & 0xfff; | ||
1988 | } | ||
1989 | static inline u32 gr_gpccs_cpuctl_r(void) | ||
1990 | { | ||
1991 | return 0x0041a100; | ||
1992 | } | ||
1993 | static inline u32 gr_gpccs_cpuctl_startcpu_f(u32 v) | ||
1994 | { | ||
1995 | return (v & 0x1) << 1; | ||
1996 | } | ||
1997 | static inline u32 gr_gpccs_dmactl_r(void) | ||
1998 | { | ||
1999 | return 0x0041a10c; | ||
2000 | } | ||
2001 | static inline u32 gr_gpccs_dmactl_require_ctx_f(u32 v) | ||
2002 | { | ||
2003 | return (v & 0x1) << 0; | ||
2004 | } | ||
2005 | static inline u32 gr_gpccs_dmactl_dmem_scrubbing_m(void) | ||
2006 | { | ||
2007 | return 0x1 << 1; | ||
2008 | } | ||
2009 | static inline u32 gr_gpccs_dmactl_imem_scrubbing_m(void) | ||
2010 | { | ||
2011 | return 0x1 << 2; | ||
2012 | } | ||
2013 | static inline u32 gr_gpccs_imemc_r(u32 i) | ||
2014 | { | ||
2015 | return 0x0041a180 + i*16; | ||
2016 | } | ||
2017 | static inline u32 gr_gpccs_imemc_offs_f(u32 v) | ||
2018 | { | ||
2019 | return (v & 0x3f) << 2; | ||
2020 | } | ||
2021 | static inline u32 gr_gpccs_imemc_blk_f(u32 v) | ||
2022 | { | ||
2023 | return (v & 0xff) << 8; | ||
2024 | } | ||
2025 | static inline u32 gr_gpccs_imemc_aincw_f(u32 v) | ||
2026 | { | ||
2027 | return (v & 0x1) << 24; | ||
2028 | } | ||
2029 | static inline u32 gr_gpccs_imemd_r(u32 i) | ||
2030 | { | ||
2031 | return 0x0041a184 + i*16; | ||
2032 | } | ||
2033 | static inline u32 gr_gpccs_imemt_r(u32 i) | ||
2034 | { | ||
2035 | return 0x0041a188 + i*16; | ||
2036 | } | ||
2037 | static inline u32 gr_gpccs_imemt__size_1_v(void) | ||
2038 | { | ||
2039 | return 0x00000004; | ||
2040 | } | ||
2041 | static inline u32 gr_gpccs_imemt_tag_f(u32 v) | ||
2042 | { | ||
2043 | return (v & 0xffff) << 0; | ||
2044 | } | ||
2045 | static inline u32 gr_gpccs_dmemc_r(u32 i) | ||
2046 | { | ||
2047 | return 0x0041a1c0 + i*8; | ||
2048 | } | ||
2049 | static inline u32 gr_gpccs_dmemc_offs_f(u32 v) | ||
2050 | { | ||
2051 | return (v & 0x3f) << 2; | ||
2052 | } | ||
2053 | static inline u32 gr_gpccs_dmemc_blk_f(u32 v) | ||
2054 | { | ||
2055 | return (v & 0xff) << 8; | ||
2056 | } | ||
2057 | static inline u32 gr_gpccs_dmemc_aincw_f(u32 v) | ||
2058 | { | ||
2059 | return (v & 0x1) << 24; | ||
2060 | } | ||
2061 | static inline u32 gr_gpccs_dmemd_r(u32 i) | ||
2062 | { | ||
2063 | return 0x0041a1c4 + i*8; | ||
2064 | } | ||
2065 | static inline u32 gr_gpccs_ctxsw_mailbox_r(u32 i) | ||
2066 | { | ||
2067 | return 0x0041a800 + i*4; | ||
2068 | } | ||
2069 | static inline u32 gr_gpccs_ctxsw_mailbox_value_f(u32 v) | ||
2070 | { | ||
2071 | return (v & 0xffffffff) << 0; | ||
2072 | } | ||
2073 | static inline u32 gr_gpcs_setup_bundle_cb_base_r(void) | ||
2074 | { | ||
2075 | return 0x00418808; | ||
2076 | } | ||
2077 | static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_s(void) | ||
2078 | { | ||
2079 | return 32; | ||
2080 | } | ||
2081 | static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_f(u32 v) | ||
2082 | { | ||
2083 | return (v & 0xffffffff) << 0; | ||
2084 | } | ||
2085 | static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_m(void) | ||
2086 | { | ||
2087 | return 0xffffffff << 0; | ||
2088 | } | ||
2089 | static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_v(u32 r) | ||
2090 | { | ||
2091 | return (r >> 0) & 0xffffffff; | ||
2092 | } | ||
2093 | static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_init_v(void) | ||
2094 | { | ||
2095 | return 0x00000000; | ||
2096 | } | ||
2097 | static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_init_f(void) | ||
2098 | { | ||
2099 | return 0x0; | ||
2100 | } | ||
2101 | static inline u32 gr_gpcs_setup_bundle_cb_size_r(void) | ||
2102 | { | ||
2103 | return 0x0041880c; | ||
2104 | } | ||
2105 | static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_s(void) | ||
2106 | { | ||
2107 | return 11; | ||
2108 | } | ||
2109 | static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_f(u32 v) | ||
2110 | { | ||
2111 | return (v & 0x7ff) << 0; | ||
2112 | } | ||
2113 | static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_m(void) | ||
2114 | { | ||
2115 | return 0x7ff << 0; | ||
2116 | } | ||
2117 | static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_v(u32 r) | ||
2118 | { | ||
2119 | return (r >> 0) & 0x7ff; | ||
2120 | } | ||
2121 | static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_init_v(void) | ||
2122 | { | ||
2123 | return 0x00000000; | ||
2124 | } | ||
2125 | static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_init_f(void) | ||
2126 | { | ||
2127 | return 0x0; | ||
2128 | } | ||
2129 | static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b__prod_v(void) | ||
2130 | { | ||
2131 | return 0x00000018; | ||
2132 | } | ||
2133 | static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b__prod_f(void) | ||
2134 | { | ||
2135 | return 0x18; | ||
2136 | } | ||
2137 | static inline u32 gr_gpcs_setup_bundle_cb_size_valid_s(void) | ||
2138 | { | ||
2139 | return 1; | ||
2140 | } | ||
2141 | static inline u32 gr_gpcs_setup_bundle_cb_size_valid_f(u32 v) | ||
2142 | { | ||
2143 | return (v & 0x1) << 31; | ||
2144 | } | ||
2145 | static inline u32 gr_gpcs_setup_bundle_cb_size_valid_m(void) | ||
2146 | { | ||
2147 | return 0x1 << 31; | ||
2148 | } | ||
2149 | static inline u32 gr_gpcs_setup_bundle_cb_size_valid_v(u32 r) | ||
2150 | { | ||
2151 | return (r >> 31) & 0x1; | ||
2152 | } | ||
2153 | static inline u32 gr_gpcs_setup_bundle_cb_size_valid_false_v(void) | ||
2154 | { | ||
2155 | return 0x00000000; | ||
2156 | } | ||
2157 | static inline u32 gr_gpcs_setup_bundle_cb_size_valid_false_f(void) | ||
2158 | { | ||
2159 | return 0x0; | ||
2160 | } | ||
2161 | static inline u32 gr_gpcs_setup_bundle_cb_size_valid_true_v(void) | ||
2162 | { | ||
2163 | return 0x00000001; | ||
2164 | } | ||
2165 | static inline u32 gr_gpcs_setup_bundle_cb_size_valid_true_f(void) | ||
2166 | { | ||
2167 | return 0x80000000; | ||
2168 | } | ||
2169 | static inline u32 gr_gpcs_setup_attrib_cb_base_r(void) | ||
2170 | { | ||
2171 | return 0x00418810; | ||
2172 | } | ||
2173 | static inline u32 gr_gpcs_setup_attrib_cb_base_addr_39_12_f(u32 v) | ||
2174 | { | ||
2175 | return (v & 0xfffffff) << 0; | ||
2176 | } | ||
2177 | static inline u32 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v(void) | ||
2178 | { | ||
2179 | return 0x0000000c; | ||
2180 | } | ||
2181 | static inline u32 gr_gpcs_setup_attrib_cb_base_valid_true_f(void) | ||
2182 | { | ||
2183 | return 0x80000000; | ||
2184 | } | ||
2185 | static inline u32 gr_crstr_gpc_map0_r(void) | ||
2186 | { | ||
2187 | return 0x00418b08; | ||
2188 | } | ||
2189 | static inline u32 gr_crstr_gpc_map0_tile0_f(u32 v) | ||
2190 | { | ||
2191 | return (v & 0x7) << 0; | ||
2192 | } | ||
2193 | static inline u32 gr_crstr_gpc_map0_tile1_f(u32 v) | ||
2194 | { | ||
2195 | return (v & 0x7) << 5; | ||
2196 | } | ||
2197 | static inline u32 gr_crstr_gpc_map0_tile2_f(u32 v) | ||
2198 | { | ||
2199 | return (v & 0x7) << 10; | ||
2200 | } | ||
2201 | static inline u32 gr_crstr_gpc_map0_tile3_f(u32 v) | ||
2202 | { | ||
2203 | return (v & 0x7) << 15; | ||
2204 | } | ||
2205 | static inline u32 gr_crstr_gpc_map0_tile4_f(u32 v) | ||
2206 | { | ||
2207 | return (v & 0x7) << 20; | ||
2208 | } | ||
2209 | static inline u32 gr_crstr_gpc_map0_tile5_f(u32 v) | ||
2210 | { | ||
2211 | return (v & 0x7) << 25; | ||
2212 | } | ||
2213 | static inline u32 gr_crstr_gpc_map1_r(void) | ||
2214 | { | ||
2215 | return 0x00418b0c; | ||
2216 | } | ||
2217 | static inline u32 gr_crstr_gpc_map1_tile6_f(u32 v) | ||
2218 | { | ||
2219 | return (v & 0x7) << 0; | ||
2220 | } | ||
2221 | static inline u32 gr_crstr_gpc_map1_tile7_f(u32 v) | ||
2222 | { | ||
2223 | return (v & 0x7) << 5; | ||
2224 | } | ||
2225 | static inline u32 gr_crstr_gpc_map1_tile8_f(u32 v) | ||
2226 | { | ||
2227 | return (v & 0x7) << 10; | ||
2228 | } | ||
2229 | static inline u32 gr_crstr_gpc_map1_tile9_f(u32 v) | ||
2230 | { | ||
2231 | return (v & 0x7) << 15; | ||
2232 | } | ||
2233 | static inline u32 gr_crstr_gpc_map1_tile10_f(u32 v) | ||
2234 | { | ||
2235 | return (v & 0x7) << 20; | ||
2236 | } | ||
2237 | static inline u32 gr_crstr_gpc_map1_tile11_f(u32 v) | ||
2238 | { | ||
2239 | return (v & 0x7) << 25; | ||
2240 | } | ||
2241 | static inline u32 gr_crstr_gpc_map2_r(void) | ||
2242 | { | ||
2243 | return 0x00418b10; | ||
2244 | } | ||
2245 | static inline u32 gr_crstr_gpc_map2_tile12_f(u32 v) | ||
2246 | { | ||
2247 | return (v & 0x7) << 0; | ||
2248 | } | ||
2249 | static inline u32 gr_crstr_gpc_map2_tile13_f(u32 v) | ||
2250 | { | ||
2251 | return (v & 0x7) << 5; | ||
2252 | } | ||
2253 | static inline u32 gr_crstr_gpc_map2_tile14_f(u32 v) | ||
2254 | { | ||
2255 | return (v & 0x7) << 10; | ||
2256 | } | ||
2257 | static inline u32 gr_crstr_gpc_map2_tile15_f(u32 v) | ||
2258 | { | ||
2259 | return (v & 0x7) << 15; | ||
2260 | } | ||
2261 | static inline u32 gr_crstr_gpc_map2_tile16_f(u32 v) | ||
2262 | { | ||
2263 | return (v & 0x7) << 20; | ||
2264 | } | ||
2265 | static inline u32 gr_crstr_gpc_map2_tile17_f(u32 v) | ||
2266 | { | ||
2267 | return (v & 0x7) << 25; | ||
2268 | } | ||
2269 | static inline u32 gr_crstr_gpc_map3_r(void) | ||
2270 | { | ||
2271 | return 0x00418b14; | ||
2272 | } | ||
2273 | static inline u32 gr_crstr_gpc_map3_tile18_f(u32 v) | ||
2274 | { | ||
2275 | return (v & 0x7) << 0; | ||
2276 | } | ||
2277 | static inline u32 gr_crstr_gpc_map3_tile19_f(u32 v) | ||
2278 | { | ||
2279 | return (v & 0x7) << 5; | ||
2280 | } | ||
2281 | static inline u32 gr_crstr_gpc_map3_tile20_f(u32 v) | ||
2282 | { | ||
2283 | return (v & 0x7) << 10; | ||
2284 | } | ||
2285 | static inline u32 gr_crstr_gpc_map3_tile21_f(u32 v) | ||
2286 | { | ||
2287 | return (v & 0x7) << 15; | ||
2288 | } | ||
2289 | static inline u32 gr_crstr_gpc_map3_tile22_f(u32 v) | ||
2290 | { | ||
2291 | return (v & 0x7) << 20; | ||
2292 | } | ||
2293 | static inline u32 gr_crstr_gpc_map3_tile23_f(u32 v) | ||
2294 | { | ||
2295 | return (v & 0x7) << 25; | ||
2296 | } | ||
2297 | static inline u32 gr_crstr_gpc_map4_r(void) | ||
2298 | { | ||
2299 | return 0x00418b18; | ||
2300 | } | ||
2301 | static inline u32 gr_crstr_gpc_map4_tile24_f(u32 v) | ||
2302 | { | ||
2303 | return (v & 0x7) << 0; | ||
2304 | } | ||
2305 | static inline u32 gr_crstr_gpc_map4_tile25_f(u32 v) | ||
2306 | { | ||
2307 | return (v & 0x7) << 5; | ||
2308 | } | ||
2309 | static inline u32 gr_crstr_gpc_map4_tile26_f(u32 v) | ||
2310 | { | ||
2311 | return (v & 0x7) << 10; | ||
2312 | } | ||
2313 | static inline u32 gr_crstr_gpc_map4_tile27_f(u32 v) | ||
2314 | { | ||
2315 | return (v & 0x7) << 15; | ||
2316 | } | ||
2317 | static inline u32 gr_crstr_gpc_map4_tile28_f(u32 v) | ||
2318 | { | ||
2319 | return (v & 0x7) << 20; | ||
2320 | } | ||
2321 | static inline u32 gr_crstr_gpc_map4_tile29_f(u32 v) | ||
2322 | { | ||
2323 | return (v & 0x7) << 25; | ||
2324 | } | ||
2325 | static inline u32 gr_crstr_gpc_map5_r(void) | ||
2326 | { | ||
2327 | return 0x00418b1c; | ||
2328 | } | ||
2329 | static inline u32 gr_crstr_gpc_map5_tile30_f(u32 v) | ||
2330 | { | ||
2331 | return (v & 0x7) << 0; | ||
2332 | } | ||
2333 | static inline u32 gr_crstr_gpc_map5_tile31_f(u32 v) | ||
2334 | { | ||
2335 | return (v & 0x7) << 5; | ||
2336 | } | ||
2337 | static inline u32 gr_crstr_gpc_map5_tile32_f(u32 v) | ||
2338 | { | ||
2339 | return (v & 0x7) << 10; | ||
2340 | } | ||
2341 | static inline u32 gr_crstr_gpc_map5_tile33_f(u32 v) | ||
2342 | { | ||
2343 | return (v & 0x7) << 15; | ||
2344 | } | ||
2345 | static inline u32 gr_crstr_gpc_map5_tile34_f(u32 v) | ||
2346 | { | ||
2347 | return (v & 0x7) << 20; | ||
2348 | } | ||
2349 | static inline u32 gr_crstr_gpc_map5_tile35_f(u32 v) | ||
2350 | { | ||
2351 | return (v & 0x7) << 25; | ||
2352 | } | ||
2353 | static inline u32 gr_crstr_map_table_cfg_r(void) | ||
2354 | { | ||
2355 | return 0x00418bb8; | ||
2356 | } | ||
2357 | static inline u32 gr_crstr_map_table_cfg_row_offset_f(u32 v) | ||
2358 | { | ||
2359 | return (v & 0xff) << 0; | ||
2360 | } | ||
2361 | static inline u32 gr_crstr_map_table_cfg_num_entries_f(u32 v) | ||
2362 | { | ||
2363 | return (v & 0xff) << 8; | ||
2364 | } | ||
2365 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_r(void) | ||
2366 | { | ||
2367 | return 0x00418980; | ||
2368 | } | ||
2369 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(u32 v) | ||
2370 | { | ||
2371 | return (v & 0x7) << 0; | ||
2372 | } | ||
2373 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(u32 v) | ||
2374 | { | ||
2375 | return (v & 0x7) << 4; | ||
2376 | } | ||
2377 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(u32 v) | ||
2378 | { | ||
2379 | return (v & 0x7) << 8; | ||
2380 | } | ||
2381 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(u32 v) | ||
2382 | { | ||
2383 | return (v & 0x7) << 12; | ||
2384 | } | ||
2385 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(u32 v) | ||
2386 | { | ||
2387 | return (v & 0x7) << 16; | ||
2388 | } | ||
2389 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(u32 v) | ||
2390 | { | ||
2391 | return (v & 0x7) << 20; | ||
2392 | } | ||
2393 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(u32 v) | ||
2394 | { | ||
2395 | return (v & 0x7) << 24; | ||
2396 | } | ||
2397 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(u32 v) | ||
2398 | { | ||
2399 | return (v & 0x7) << 28; | ||
2400 | } | ||
2401 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_r(void) | ||
2402 | { | ||
2403 | return 0x00418984; | ||
2404 | } | ||
2405 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(u32 v) | ||
2406 | { | ||
2407 | return (v & 0x7) << 0; | ||
2408 | } | ||
2409 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(u32 v) | ||
2410 | { | ||
2411 | return (v & 0x7) << 4; | ||
2412 | } | ||
2413 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(u32 v) | ||
2414 | { | ||
2415 | return (v & 0x7) << 8; | ||
2416 | } | ||
2417 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(u32 v) | ||
2418 | { | ||
2419 | return (v & 0x7) << 12; | ||
2420 | } | ||
2421 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(u32 v) | ||
2422 | { | ||
2423 | return (v & 0x7) << 16; | ||
2424 | } | ||
2425 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(u32 v) | ||
2426 | { | ||
2427 | return (v & 0x7) << 20; | ||
2428 | } | ||
2429 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(u32 v) | ||
2430 | { | ||
2431 | return (v & 0x7) << 24; | ||
2432 | } | ||
2433 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(u32 v) | ||
2434 | { | ||
2435 | return (v & 0x7) << 28; | ||
2436 | } | ||
2437 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_r(void) | ||
2438 | { | ||
2439 | return 0x00418988; | ||
2440 | } | ||
2441 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(u32 v) | ||
2442 | { | ||
2443 | return (v & 0x7) << 0; | ||
2444 | } | ||
2445 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(u32 v) | ||
2446 | { | ||
2447 | return (v & 0x7) << 4; | ||
2448 | } | ||
2449 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(u32 v) | ||
2450 | { | ||
2451 | return (v & 0x7) << 8; | ||
2452 | } | ||
2453 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(u32 v) | ||
2454 | { | ||
2455 | return (v & 0x7) << 12; | ||
2456 | } | ||
2457 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(u32 v) | ||
2458 | { | ||
2459 | return (v & 0x7) << 16; | ||
2460 | } | ||
2461 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(u32 v) | ||
2462 | { | ||
2463 | return (v & 0x7) << 20; | ||
2464 | } | ||
2465 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(u32 v) | ||
2466 | { | ||
2467 | return (v & 0x7) << 24; | ||
2468 | } | ||
2469 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_s(void) | ||
2470 | { | ||
2471 | return 3; | ||
2472 | } | ||
2473 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(u32 v) | ||
2474 | { | ||
2475 | return (v & 0x7) << 28; | ||
2476 | } | ||
2477 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_m(void) | ||
2478 | { | ||
2479 | return 0x7 << 28; | ||
2480 | } | ||
2481 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_v(u32 r) | ||
2482 | { | ||
2483 | return (r >> 28) & 0x7; | ||
2484 | } | ||
2485 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_r(void) | ||
2486 | { | ||
2487 | return 0x0041898c; | ||
2488 | } | ||
2489 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(u32 v) | ||
2490 | { | ||
2491 | return (v & 0x7) << 0; | ||
2492 | } | ||
2493 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(u32 v) | ||
2494 | { | ||
2495 | return (v & 0x7) << 4; | ||
2496 | } | ||
2497 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(u32 v) | ||
2498 | { | ||
2499 | return (v & 0x7) << 8; | ||
2500 | } | ||
2501 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(u32 v) | ||
2502 | { | ||
2503 | return (v & 0x7) << 12; | ||
2504 | } | ||
2505 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(u32 v) | ||
2506 | { | ||
2507 | return (v & 0x7) << 16; | ||
2508 | } | ||
2509 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(u32 v) | ||
2510 | { | ||
2511 | return (v & 0x7) << 20; | ||
2512 | } | ||
2513 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(u32 v) | ||
2514 | { | ||
2515 | return (v & 0x7) << 24; | ||
2516 | } | ||
2517 | static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(u32 v) | ||
2518 | { | ||
2519 | return (v & 0x7) << 28; | ||
2520 | } | ||
2521 | static inline u32 gr_gpcs_gpm_pd_cfg_r(void) | ||
2522 | { | ||
2523 | return 0x00418c6c; | ||
2524 | } | ||
2525 | static inline u32 gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f(void) | ||
2526 | { | ||
2527 | return 0x0; | ||
2528 | } | ||
2529 | static inline u32 gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f(void) | ||
2530 | { | ||
2531 | return 0x1; | ||
2532 | } | ||
2533 | static inline u32 gr_gpcs_gcc_pagepool_base_r(void) | ||
2534 | { | ||
2535 | return 0x00419004; | ||
2536 | } | ||
2537 | static inline u32 gr_gpcs_gcc_pagepool_base_addr_39_8_f(u32 v) | ||
2538 | { | ||
2539 | return (v & 0xffffffff) << 0; | ||
2540 | } | ||
2541 | static inline u32 gr_gpcs_gcc_pagepool_r(void) | ||
2542 | { | ||
2543 | return 0x00419008; | ||
2544 | } | ||
2545 | static inline u32 gr_gpcs_gcc_pagepool_total_pages_f(u32 v) | ||
2546 | { | ||
2547 | return (v & 0xff) << 0; | ||
2548 | } | ||
2549 | static inline u32 gr_gpcs_tpcs_pe_vaf_r(void) | ||
2550 | { | ||
2551 | return 0x0041980c; | ||
2552 | } | ||
2553 | static inline u32 gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f(void) | ||
2554 | { | ||
2555 | return 0x10; | ||
2556 | } | ||
2557 | static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(void) | ||
2558 | { | ||
2559 | return 0x00419848; | ||
2560 | } | ||
2561 | static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(u32 v) | ||
2562 | { | ||
2563 | return (v & 0xfffffff) << 0; | ||
2564 | } | ||
2565 | static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_f(u32 v) | ||
2566 | { | ||
2567 | return (v & 0x1) << 28; | ||
2568 | } | ||
2569 | static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(void) | ||
2570 | { | ||
2571 | return 0x10000000; | ||
2572 | } | ||
2573 | static inline u32 gr_gpcs_tpcs_l1c_pm_r(void) | ||
2574 | { | ||
2575 | return 0x00419ca8; | ||
2576 | } | ||
2577 | static inline u32 gr_gpcs_tpcs_l1c_pm_enable_m(void) | ||
2578 | { | ||
2579 | return 0x1 << 31; | ||
2580 | } | ||
2581 | static inline u32 gr_gpcs_tpcs_l1c_pm_enable_enable_f(void) | ||
2582 | { | ||
2583 | return 0x80000000; | ||
2584 | } | ||
2585 | static inline u32 gr_gpcs_tpcs_l1c_cfg_r(void) | ||
2586 | { | ||
2587 | return 0x00419cb8; | ||
2588 | } | ||
2589 | static inline u32 gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_m(void) | ||
2590 | { | ||
2591 | return 0x1 << 31; | ||
2592 | } | ||
2593 | static inline u32 gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_enable_f(void) | ||
2594 | { | ||
2595 | return 0x80000000; | ||
2596 | } | ||
2597 | static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_r(void) | ||
2598 | { | ||
2599 | return 0x00419c00; | ||
2600 | } | ||
2601 | static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f(void) | ||
2602 | { | ||
2603 | return 0x0; | ||
2604 | } | ||
2605 | static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f(void) | ||
2606 | { | ||
2607 | return 0x8; | ||
2608 | } | ||
2609 | static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_r(void) | ||
2610 | { | ||
2611 | return 0x00419e00; | ||
2612 | } | ||
2613 | static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_core_enable_m(void) | ||
2614 | { | ||
2615 | return 0x1 << 7; | ||
2616 | } | ||
2617 | static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_core_enable_enable_f(void) | ||
2618 | { | ||
2619 | return 0x80; | ||
2620 | } | ||
2621 | static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_m(void) | ||
2622 | { | ||
2623 | return 0x1 << 15; | ||
2624 | } | ||
2625 | static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_enable_f(void) | ||
2626 | { | ||
2627 | return 0x8000; | ||
2628 | } | ||
2629 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(void) | ||
2630 | { | ||
2631 | return 0x00419e44; | ||
2632 | } | ||
2633 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f(void) | ||
2634 | { | ||
2635 | return 0x2; | ||
2636 | } | ||
2637 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f(void) | ||
2638 | { | ||
2639 | return 0x4; | ||
2640 | } | ||
2641 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f(void) | ||
2642 | { | ||
2643 | return 0x8; | ||
2644 | } | ||
2645 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f(void) | ||
2646 | { | ||
2647 | return 0x10; | ||
2648 | } | ||
2649 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f(void) | ||
2650 | { | ||
2651 | return 0x20; | ||
2652 | } | ||
2653 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f(void) | ||
2654 | { | ||
2655 | return 0x40; | ||
2656 | } | ||
2657 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f(void) | ||
2658 | { | ||
2659 | return 0x80; | ||
2660 | } | ||
2661 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f(void) | ||
2662 | { | ||
2663 | return 0x100; | ||
2664 | } | ||
2665 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f(void) | ||
2666 | { | ||
2667 | return 0x200; | ||
2668 | } | ||
2669 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f(void) | ||
2670 | { | ||
2671 | return 0x400; | ||
2672 | } | ||
2673 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f(void) | ||
2674 | { | ||
2675 | return 0x800; | ||
2676 | } | ||
2677 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f(void) | ||
2678 | { | ||
2679 | return 0x1000; | ||
2680 | } | ||
2681 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f(void) | ||
2682 | { | ||
2683 | return 0x2000; | ||
2684 | } | ||
2685 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f(void) | ||
2686 | { | ||
2687 | return 0x4000; | ||
2688 | } | ||
2689 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f(void) | ||
2690 | { | ||
2691 | return 0x8000; | ||
2692 | } | ||
2693 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f(void) | ||
2694 | { | ||
2695 | return 0x10000; | ||
2696 | } | ||
2697 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f(void) | ||
2698 | { | ||
2699 | return 0x20000; | ||
2700 | } | ||
2701 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f(void) | ||
2702 | { | ||
2703 | return 0x40000; | ||
2704 | } | ||
2705 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f(void) | ||
2706 | { | ||
2707 | return 0x80000; | ||
2708 | } | ||
2709 | static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f(void) | ||
2710 | { | ||
2711 | return 0x100000; | ||
2712 | } | ||
2713 | static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(void) | ||
2714 | { | ||
2715 | return 0x00419e4c; | ||
2716 | } | ||
2717 | static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f(void) | ||
2718 | { | ||
2719 | return 0x1; | ||
2720 | } | ||
2721 | static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f(void) | ||
2722 | { | ||
2723 | return 0x2; | ||
2724 | } | ||
2725 | static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f(void) | ||
2726 | { | ||
2727 | return 0x4; | ||
2728 | } | ||
2729 | static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f(void) | ||
2730 | { | ||
2731 | return 0x8; | ||
2732 | } | ||
2733 | static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f(void) | ||
2734 | { | ||
2735 | return 0x10; | ||
2736 | } | ||
2737 | static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f(void) | ||
2738 | { | ||
2739 | return 0x20; | ||
2740 | } | ||
2741 | static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f(void) | ||
2742 | { | ||
2743 | return 0x40; | ||
2744 | } | ||
2745 | static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void) | ||
2746 | { | ||
2747 | return 0x0050450c; | ||
2748 | } | ||
2749 | static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void) | ||
2750 | { | ||
2751 | return 0x2; | ||
2752 | } | ||
2753 | static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_disabled_f(void) | ||
2754 | { | ||
2755 | return 0x0; | ||
2756 | } | ||
2757 | static inline u32 gr_gpc0_gpccs_gpc_exception_en_r(void) | ||
2758 | { | ||
2759 | return 0x00502c94; | ||
2760 | } | ||
2761 | static inline u32 gr_gpc0_gpccs_gpc_exception_en_tpc_0_enabled_f(void) | ||
2762 | { | ||
2763 | return 0x10000; | ||
2764 | } | ||
2765 | static inline u32 gr_gpc0_gpccs_gpc_exception_en_tpc_0_disabled_f(void) | ||
2766 | { | ||
2767 | return 0x0; | ||
2768 | } | ||
2769 | static inline u32 gr_gpcs_gpccs_gpc_exception_r(void) | ||
2770 | { | ||
2771 | return 0x0041ac90; | ||
2772 | } | ||
2773 | static inline u32 gr_gpcs_gpccs_gpc_exception_tpc_v(u32 r) | ||
2774 | { | ||
2775 | return (r >> 16) & 0xff; | ||
2776 | } | ||
2777 | static inline u32 gr_gpcs_gpccs_gpc_exception_tpc_0_pending_v(void) | ||
2778 | { | ||
2779 | return 0x00000001; | ||
2780 | } | ||
2781 | static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_r(void) | ||
2782 | { | ||
2783 | return 0x00419d08; | ||
2784 | } | ||
2785 | static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_sm_v(u32 r) | ||
2786 | { | ||
2787 | return (r >> 1) & 0x1; | ||
2788 | } | ||
2789 | static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_sm_pending_v(void) | ||
2790 | { | ||
2791 | return 0x00000001; | ||
2792 | } | ||
2793 | static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_r(void) | ||
2794 | { | ||
2795 | return 0x00504610; | ||
2796 | } | ||
2797 | static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(u32 r) | ||
2798 | { | ||
2799 | return (r >> 0) & 0x1; | ||
2800 | } | ||
2801 | static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v(void) | ||
2802 | { | ||
2803 | return 0x00000001; | ||
2804 | } | ||
2805 | static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void) | ||
2806 | { | ||
2807 | return 0x80000000; | ||
2808 | } | ||
2809 | static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void) | ||
2810 | { | ||
2811 | return 0x0050460c; | ||
2812 | } | ||
2813 | static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r) | ||
2814 | { | ||
2815 | return (r >> 4) & 0x1; | ||
2816 | } | ||
2817 | static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v(void) | ||
2818 | { | ||
2819 | return 0x00000001; | ||
2820 | } | ||
2821 | static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_r(void) | ||
2822 | { | ||
2823 | return 0x00504650; | ||
2824 | } | ||
2825 | static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f(void) | ||
2826 | { | ||
2827 | return 0x10; | ||
2828 | } | ||
2829 | static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f(void) | ||
2830 | { | ||
2831 | return 0x20; | ||
2832 | } | ||
2833 | static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(void) | ||
2834 | { | ||
2835 | return 0x40; | ||
2836 | } | ||
2837 | static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_r(void) | ||
2838 | { | ||
2839 | return 0x00504648; | ||
2840 | } | ||
2841 | static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_v(u32 r) | ||
2842 | { | ||
2843 | return (r >> 0) & 0xffff; | ||
2844 | } | ||
2845 | static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v(void) | ||
2846 | { | ||
2847 | return 0x00000000; | ||
2848 | } | ||
2849 | static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f(void) | ||
2850 | { | ||
2851 | return 0x0; | ||
2852 | } | ||
2853 | static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void) | ||
2854 | { | ||
2855 | return 0x00504770; | ||
2856 | } | ||
2857 | static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_r(void) | ||
2858 | { | ||
2859 | return 0x00419f70; | ||
2860 | } | ||
2861 | static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_m(void) | ||
2862 | { | ||
2863 | return 0x1 << 1; | ||
2864 | } | ||
2865 | static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_enable_f(void) | ||
2866 | { | ||
2867 | return 0x2; | ||
2868 | } | ||
2869 | static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(void) | ||
2870 | { | ||
2871 | return 0x1 << 4; | ||
2872 | } | ||
2873 | static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(u32 v) | ||
2874 | { | ||
2875 | return (v & 0x1) << 4; | ||
2876 | } | ||
2877 | static inline u32 gr_gpc0_tpc0_sm_debug_sfe_control_r(void) | ||
2878 | { | ||
2879 | return 0x0050477c; | ||
2880 | } | ||
2881 | static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_r(void) | ||
2882 | { | ||
2883 | return 0x00419f7c; | ||
2884 | } | ||
2885 | static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(void) | ||
2886 | { | ||
2887 | return 0x1 << 0; | ||
2888 | } | ||
2889 | static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(u32 v) | ||
2890 | { | ||
2891 | return (v & 0x1) << 0; | ||
2892 | } | ||
2893 | static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_m(void) | ||
2894 | { | ||
2895 | return 0x1 << 16; | ||
2896 | } | ||
2897 | static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_enable_f(void) | ||
2898 | { | ||
2899 | return 0x10000; | ||
2900 | } | ||
2901 | static inline u32 gr_gpcs_tpcs_sm_power_throttle_r(void) | ||
2902 | { | ||
2903 | return 0x00419ed0; | ||
2904 | } | ||
2905 | static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_r(void) | ||
2906 | { | ||
2907 | return 0x0041be08; | ||
2908 | } | ||
2909 | static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f(void) | ||
2910 | { | ||
2911 | return 0x4; | ||
2912 | } | ||
2913 | static inline u32 gr_ppcs_wwdx_map_gpc_map0_r(void) | ||
2914 | { | ||
2915 | return 0x0041bf00; | ||
2916 | } | ||
2917 | static inline u32 gr_ppcs_wwdx_map_gpc_map1_r(void) | ||
2918 | { | ||
2919 | return 0x0041bf04; | ||
2920 | } | ||
2921 | static inline u32 gr_ppcs_wwdx_map_gpc_map2_r(void) | ||
2922 | { | ||
2923 | return 0x0041bf08; | ||
2924 | } | ||
2925 | static inline u32 gr_ppcs_wwdx_map_gpc_map3_r(void) | ||
2926 | { | ||
2927 | return 0x0041bf0c; | ||
2928 | } | ||
2929 | static inline u32 gr_ppcs_wwdx_map_gpc_map4_r(void) | ||
2930 | { | ||
2931 | return 0x0041bf10; | ||
2932 | } | ||
2933 | static inline u32 gr_ppcs_wwdx_map_gpc_map5_r(void) | ||
2934 | { | ||
2935 | return 0x0041bf14; | ||
2936 | } | ||
2937 | static inline u32 gr_ppcs_wwdx_map_table_cfg_r(void) | ||
2938 | { | ||
2939 | return 0x0041bfd0; | ||
2940 | } | ||
2941 | static inline u32 gr_ppcs_wwdx_map_table_cfg_row_offset_f(u32 v) | ||
2942 | { | ||
2943 | return (v & 0xff) << 0; | ||
2944 | } | ||
2945 | static inline u32 gr_ppcs_wwdx_map_table_cfg_num_entries_f(u32 v) | ||
2946 | { | ||
2947 | return (v & 0xff) << 8; | ||
2948 | } | ||
2949 | static inline u32 gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(u32 v) | ||
2950 | { | ||
2951 | return (v & 0x1f) << 16; | ||
2952 | } | ||
2953 | static inline u32 gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(u32 v) | ||
2954 | { | ||
2955 | return (v & 0x7) << 21; | ||
2956 | } | ||
2957 | static inline u32 gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(u32 v) | ||
2958 | { | ||
2959 | return (v & 0x1f) << 24; | ||
2960 | } | ||
2961 | static inline u32 gr_gpcs_ppcs_wwdx_sm_num_rcp_r(void) | ||
2962 | { | ||
2963 | return 0x0041bfd4; | ||
2964 | } | ||
2965 | static inline u32 gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(u32 v) | ||
2966 | { | ||
2967 | return (v & 0xffffff) << 0; | ||
2968 | } | ||
2969 | static inline u32 gr_ppcs_wwdx_map_table_cfg2_r(void) | ||
2970 | { | ||
2971 | return 0x0041bfe4; | ||
2972 | } | ||
2973 | static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(u32 v) | ||
2974 | { | ||
2975 | return (v & 0x1f) << 0; | ||
2976 | } | ||
2977 | static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(u32 v) | ||
2978 | { | ||
2979 | return (v & 0x1f) << 5; | ||
2980 | } | ||
2981 | static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(u32 v) | ||
2982 | { | ||
2983 | return (v & 0x1f) << 10; | ||
2984 | } | ||
2985 | static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(u32 v) | ||
2986 | { | ||
2987 | return (v & 0x1f) << 15; | ||
2988 | } | ||
2989 | static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(u32 v) | ||
2990 | { | ||
2991 | return (v & 0x1f) << 20; | ||
2992 | } | ||
2993 | static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(u32 v) | ||
2994 | { | ||
2995 | return (v & 0x1f) << 25; | ||
2996 | } | ||
2997 | static inline u32 gr_gpcs_ppcs_cbm_cfg_r(void) | ||
2998 | { | ||
2999 | return 0x0041bec0; | ||
3000 | } | ||
3001 | static inline u32 gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v(void) | ||
3002 | { | ||
3003 | return 0x00000001; | ||
3004 | } | ||
3005 | static inline u32 gr_bes_zrop_settings_r(void) | ||
3006 | { | ||
3007 | return 0x00408850; | ||
3008 | } | ||
3009 | static inline u32 gr_bes_zrop_settings_num_active_fbps_f(u32 v) | ||
3010 | { | ||
3011 | return (v & 0xf) << 0; | ||
3012 | } | ||
3013 | static inline u32 gr_bes_crop_settings_r(void) | ||
3014 | { | ||
3015 | return 0x00408958; | ||
3016 | } | ||
3017 | static inline u32 gr_bes_crop_settings_num_active_fbps_f(u32 v) | ||
3018 | { | ||
3019 | return (v & 0xf) << 0; | ||
3020 | } | ||
3021 | static inline u32 gr_zcull_bytes_per_aliquot_per_gpu_v(void) | ||
3022 | { | ||
3023 | return 0x00000020; | ||
3024 | } | ||
3025 | static inline u32 gr_zcull_save_restore_header_bytes_per_gpc_v(void) | ||
3026 | { | ||
3027 | return 0x00000020; | ||
3028 | } | ||
3029 | static inline u32 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(void) | ||
3030 | { | ||
3031 | return 0x000000c0; | ||
3032 | } | ||
3033 | static inline u32 gr_zcull_subregion_qty_v(void) | ||
3034 | { | ||
3035 | return 0x00000010; | ||
3036 | } | ||
3037 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r(void) | ||
3038 | { | ||
3039 | return 0x00504604; | ||
3040 | } | ||
3041 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r(void) | ||
3042 | { | ||
3043 | return 0x00504608; | ||
3044 | } | ||
3045 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r(void) | ||
3046 | { | ||
3047 | return 0x0050465c; | ||
3048 | } | ||
3049 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control1_r(void) | ||
3050 | { | ||
3051 | return 0x00504660; | ||
3052 | } | ||
3053 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control2_r(void) | ||
3054 | { | ||
3055 | return 0x00504664; | ||
3056 | } | ||
3057 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control3_r(void) | ||
3058 | { | ||
3059 | return 0x00504668; | ||
3060 | } | ||
3061 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control4_r(void) | ||
3062 | { | ||
3063 | return 0x0050466c; | ||
3064 | } | ||
3065 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(void) | ||
3066 | { | ||
3067 | return 0x00504658; | ||
3068 | } | ||
3069 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(void) | ||
3070 | { | ||
3071 | return 0x00504670; | ||
3072 | } | ||
3073 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status1_r(void) | ||
3074 | { | ||
3075 | return 0x00504694; | ||
3076 | } | ||
3077 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_control_r(void) | ||
3078 | { | ||
3079 | return 0x00504730; | ||
3080 | } | ||
3081 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_control_r(void) | ||
3082 | { | ||
3083 | return 0x00504734; | ||
3084 | } | ||
3085 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_control_r(void) | ||
3086 | { | ||
3087 | return 0x00504738; | ||
3088 | } | ||
3089 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_control_r(void) | ||
3090 | { | ||
3091 | return 0x0050473c; | ||
3092 | } | ||
3093 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_control_r(void) | ||
3094 | { | ||
3095 | return 0x00504740; | ||
3096 | } | ||
3097 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_control_r(void) | ||
3098 | { | ||
3099 | return 0x00504744; | ||
3100 | } | ||
3101 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_control_r(void) | ||
3102 | { | ||
3103 | return 0x00504748; | ||
3104 | } | ||
3105 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_control_r(void) | ||
3106 | { | ||
3107 | return 0x0050474c; | ||
3108 | } | ||
3109 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(void) | ||
3110 | { | ||
3111 | return 0x00504674; | ||
3112 | } | ||
3113 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_r(void) | ||
3114 | { | ||
3115 | return 0x00504678; | ||
3116 | } | ||
3117 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_r(void) | ||
3118 | { | ||
3119 | return 0x0050467c; | ||
3120 | } | ||
3121 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_r(void) | ||
3122 | { | ||
3123 | return 0x00504680; | ||
3124 | } | ||
3125 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_r(void) | ||
3126 | { | ||
3127 | return 0x00504684; | ||
3128 | } | ||
3129 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(void) | ||
3130 | { | ||
3131 | return 0x00504688; | ||
3132 | } | ||
3133 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(void) | ||
3134 | { | ||
3135 | return 0x0050468c; | ||
3136 | } | ||
3137 | static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(void) | ||
3138 | { | ||
3139 | return 0x00504690; | ||
3140 | } | ||
3141 | static inline u32 gr_fe_pwr_mode_r(void) | ||
3142 | { | ||
3143 | return 0x00404170; | ||
3144 | } | ||
3145 | static inline u32 gr_fe_pwr_mode_mode_auto_f(void) | ||
3146 | { | ||
3147 | return 0x0; | ||
3148 | } | ||
3149 | static inline u32 gr_fe_pwr_mode_mode_force_on_f(void) | ||
3150 | { | ||
3151 | return 0x2; | ||
3152 | } | ||
3153 | static inline u32 gr_fe_pwr_mode_req_v(u32 r) | ||
3154 | { | ||
3155 | return (r >> 4) & 0x1; | ||
3156 | } | ||
3157 | static inline u32 gr_fe_pwr_mode_req_send_f(void) | ||
3158 | { | ||
3159 | return 0x10; | ||
3160 | } | ||
3161 | static inline u32 gr_fe_pwr_mode_req_done_v(void) | ||
3162 | { | ||
3163 | return 0x00000000; | ||
3164 | } | ||
3165 | static inline u32 gr_gpc0_tpc0_l1c_dbg_r(void) | ||
3166 | { | ||
3167 | return 0x005044b0; | ||
3168 | } | ||
3169 | static inline u32 gr_gpc0_tpc0_l1c_dbg_cya15_en_f(void) | ||
3170 | { | ||
3171 | return 0x8000000; | ||
3172 | } | ||
3173 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h new file mode 100644 index 00000000..65221b59 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h | |||
@@ -0,0 +1,221 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_ltc_gk20a_h_ | ||
51 | #define _hw_ltc_gk20a_h_ | ||
52 | |||
53 | static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void) | ||
54 | { | ||
55 | return 0x001410c8; | ||
56 | } | ||
57 | static inline u32 ltc_ltc0_lts0_tstg_cfg1_r(void) | ||
58 | { | ||
59 | return 0x00141104; | ||
60 | } | ||
61 | static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_ways_v(u32 r) | ||
62 | { | ||
63 | return (r >> 0) & 0xffff; | ||
64 | } | ||
65 | static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_v(u32 r) | ||
66 | { | ||
67 | return (r >> 16) & 0x3; | ||
68 | } | ||
69 | static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v(void) | ||
70 | { | ||
71 | return 0x00000000; | ||
72 | } | ||
73 | static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v(void) | ||
74 | { | ||
75 | return 0x00000001; | ||
76 | } | ||
77 | static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v(void) | ||
78 | { | ||
79 | return 0x00000002; | ||
80 | } | ||
81 | static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void) | ||
82 | { | ||
83 | return 0x0017e8c8; | ||
84 | } | ||
85 | static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r) | ||
86 | { | ||
87 | return (r >> 2) & 0x1; | ||
88 | } | ||
89 | static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_v(void) | ||
90 | { | ||
91 | return 0x00000001; | ||
92 | } | ||
93 | static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(void) | ||
94 | { | ||
95 | return 0x4; | ||
96 | } | ||
97 | static inline u32 ltc_ltc0_lts0_cbc_ctrl1_r(void) | ||
98 | { | ||
99 | return 0x0017e8c8; | ||
100 | } | ||
101 | static inline u32 ltc_ltcs_ltss_cbc_ctrl2_r(void) | ||
102 | { | ||
103 | return 0x0017e8cc; | ||
104 | } | ||
105 | static inline u32 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(u32 v) | ||
106 | { | ||
107 | return (v & 0x1ffff) << 0; | ||
108 | } | ||
109 | static inline u32 ltc_ltcs_ltss_cbc_ctrl3_r(void) | ||
110 | { | ||
111 | return 0x0017e8d0; | ||
112 | } | ||
113 | static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(u32 v) | ||
114 | { | ||
115 | return (v & 0x1ffff) << 0; | ||
116 | } | ||
117 | static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(void) | ||
118 | { | ||
119 | return 0x0001ffff; | ||
120 | } | ||
121 | static inline u32 ltc_ltcs_ltss_cbc_base_r(void) | ||
122 | { | ||
123 | return 0x0017e8d4; | ||
124 | } | ||
125 | static inline u32 ltc_ltcs_ltss_cbc_base_alignment_shift_v(void) | ||
126 | { | ||
127 | return 0x0000000b; | ||
128 | } | ||
129 | static inline u32 ltc_ltcs_ltss_cbc_base_address_v(u32 r) | ||
130 | { | ||
131 | return (r >> 0) & 0x3ffffff; | ||
132 | } | ||
133 | static inline u32 ltc_ltcs_ltss_cbc_param_r(void) | ||
134 | { | ||
135 | return 0x0017e8dc; | ||
136 | } | ||
137 | static inline u32 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(u32 r) | ||
138 | { | ||
139 | return (r >> 0) & 0xffff; | ||
140 | } | ||
141 | static inline u32 ltc_ltcs_ltss_cbc_param_cache_line_size_v(u32 r) | ||
142 | { | ||
143 | return (r >> 24) & 0xf; | ||
144 | } | ||
145 | static inline u32 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(u32 r) | ||
146 | { | ||
147 | return (r >> 28) & 0xf; | ||
148 | } | ||
149 | static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_r(void) | ||
150 | { | ||
151 | return 0x0017e91c; | ||
152 | } | ||
153 | static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(u32 v) | ||
154 | { | ||
155 | return (v & 0x1f) << 16; | ||
156 | } | ||
157 | static inline u32 ltc_ltcs_ltss_dstg_zbc_index_r(void) | ||
158 | { | ||
159 | return 0x0017ea44; | ||
160 | } | ||
161 | static inline u32 ltc_ltcs_ltss_dstg_zbc_index_address_f(u32 v) | ||
162 | { | ||
163 | return (v & 0xf) << 0; | ||
164 | } | ||
165 | static inline u32 ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(u32 i) | ||
166 | { | ||
167 | return 0x0017ea48 + i*4; | ||
168 | } | ||
169 | static inline u32 ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(void) | ||
170 | { | ||
171 | return 0x00000004; | ||
172 | } | ||
173 | static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(void) | ||
174 | { | ||
175 | return 0x0017ea58; | ||
176 | } | ||
177 | static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_s(void) | ||
178 | { | ||
179 | return 32; | ||
180 | } | ||
181 | static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_f(u32 v) | ||
182 | { | ||
183 | return (v & 0xffffffff) << 0; | ||
184 | } | ||
185 | static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_m(void) | ||
186 | { | ||
187 | return 0xffffffff << 0; | ||
188 | } | ||
189 | static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_v(u32 r) | ||
190 | { | ||
191 | return (r >> 0) & 0xffffffff; | ||
192 | } | ||
193 | static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_2_r(void) | ||
194 | { | ||
195 | return 0x0017e924; | ||
196 | } | ||
197 | static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(void) | ||
198 | { | ||
199 | return 0x10000000; | ||
200 | } | ||
201 | static inline u32 ltc_ltss_g_elpg_r(void) | ||
202 | { | ||
203 | return 0x0017e828; | ||
204 | } | ||
205 | static inline u32 ltc_ltss_g_elpg_flush_v(u32 r) | ||
206 | { | ||
207 | return (r >> 0) & 0x1; | ||
208 | } | ||
209 | static inline u32 ltc_ltss_g_elpg_flush_pending_v(void) | ||
210 | { | ||
211 | return 0x00000001; | ||
212 | } | ||
213 | static inline u32 ltc_ltss_g_elpg_flush_pending_f(void) | ||
214 | { | ||
215 | return 0x1; | ||
216 | } | ||
217 | static inline u32 ltc_ltc0_ltss_intr_r(void) | ||
218 | { | ||
219 | return 0x00140820; | ||
220 | } | ||
221 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h new file mode 100644 index 00000000..1692bb54 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h | |||
@@ -0,0 +1,253 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_mc_gk20a_h_ | ||
51 | #define _hw_mc_gk20a_h_ | ||
52 | |||
53 | static inline u32 mc_boot_0_r(void) | ||
54 | { | ||
55 | return 0x00000000; | ||
56 | } | ||
57 | static inline u32 mc_boot_0_architecture_v(u32 r) | ||
58 | { | ||
59 | return (r >> 24) & 0x1f; | ||
60 | } | ||
61 | static inline u32 mc_boot_0_implementation_v(u32 r) | ||
62 | { | ||
63 | return (r >> 20) & 0xf; | ||
64 | } | ||
65 | static inline u32 mc_boot_0_major_revision_v(u32 r) | ||
66 | { | ||
67 | return (r >> 4) & 0xf; | ||
68 | } | ||
69 | static inline u32 mc_boot_0_minor_revision_v(u32 r) | ||
70 | { | ||
71 | return (r >> 0) & 0xf; | ||
72 | } | ||
73 | static inline u32 mc_intr_0_r(void) | ||
74 | { | ||
75 | return 0x00000100; | ||
76 | } | ||
77 | static inline u32 mc_intr_0_pfifo_pending_f(void) | ||
78 | { | ||
79 | return 0x100; | ||
80 | } | ||
81 | static inline u32 mc_intr_0_pgraph_pending_f(void) | ||
82 | { | ||
83 | return 0x1000; | ||
84 | } | ||
85 | static inline u32 mc_intr_0_pmu_pending_f(void) | ||
86 | { | ||
87 | return 0x1000000; | ||
88 | } | ||
89 | static inline u32 mc_intr_0_ltc_pending_f(void) | ||
90 | { | ||
91 | return 0x2000000; | ||
92 | } | ||
93 | static inline u32 mc_intr_0_priv_ring_pending_f(void) | ||
94 | { | ||
95 | return 0x40000000; | ||
96 | } | ||
97 | static inline u32 mc_intr_0_pbus_pending_f(void) | ||
98 | { | ||
99 | return 0x10000000; | ||
100 | } | ||
101 | static inline u32 mc_intr_1_r(void) | ||
102 | { | ||
103 | return 0x00000104; | ||
104 | } | ||
105 | static inline u32 mc_intr_mask_0_r(void) | ||
106 | { | ||
107 | return 0x00000640; | ||
108 | } | ||
109 | static inline u32 mc_intr_mask_0_pmu_enabled_f(void) | ||
110 | { | ||
111 | return 0x1000000; | ||
112 | } | ||
113 | static inline u32 mc_intr_mask_1_r(void) | ||
114 | { | ||
115 | return 0x00000644; | ||
116 | } | ||
117 | static inline u32 mc_intr_mask_1_pmu_enabled_f(void) | ||
118 | { | ||
119 | return 0x1000000; | ||
120 | } | ||
121 | static inline u32 mc_intr_en_0_r(void) | ||
122 | { | ||
123 | return 0x00000140; | ||
124 | } | ||
125 | static inline u32 mc_intr_en_0_inta_disabled_f(void) | ||
126 | { | ||
127 | return 0x0; | ||
128 | } | ||
129 | static inline u32 mc_intr_en_0_inta_hardware_f(void) | ||
130 | { | ||
131 | return 0x1; | ||
132 | } | ||
133 | static inline u32 mc_intr_en_1_r(void) | ||
134 | { | ||
135 | return 0x00000144; | ||
136 | } | ||
137 | static inline u32 mc_intr_en_1_inta_disabled_f(void) | ||
138 | { | ||
139 | return 0x0; | ||
140 | } | ||
141 | static inline u32 mc_intr_en_1_inta_hardware_f(void) | ||
142 | { | ||
143 | return 0x1; | ||
144 | } | ||
145 | static inline u32 mc_enable_r(void) | ||
146 | { | ||
147 | return 0x00000200; | ||
148 | } | ||
149 | static inline u32 mc_enable_xbar_enabled_f(void) | ||
150 | { | ||
151 | return 0x4; | ||
152 | } | ||
153 | static inline u32 mc_enable_l2_enabled_f(void) | ||
154 | { | ||
155 | return 0x8; | ||
156 | } | ||
157 | static inline u32 mc_enable_pmedia_s(void) | ||
158 | { | ||
159 | return 1; | ||
160 | } | ||
161 | static inline u32 mc_enable_pmedia_f(u32 v) | ||
162 | { | ||
163 | return (v & 0x1) << 4; | ||
164 | } | ||
165 | static inline u32 mc_enable_pmedia_m(void) | ||
166 | { | ||
167 | return 0x1 << 4; | ||
168 | } | ||
169 | static inline u32 mc_enable_pmedia_v(u32 r) | ||
170 | { | ||
171 | return (r >> 4) & 0x1; | ||
172 | } | ||
173 | static inline u32 mc_enable_priv_ring_enabled_f(void) | ||
174 | { | ||
175 | return 0x20; | ||
176 | } | ||
177 | static inline u32 mc_enable_ce0_m(void) | ||
178 | { | ||
179 | return 0x1 << 6; | ||
180 | } | ||
181 | static inline u32 mc_enable_pfifo_enabled_f(void) | ||
182 | { | ||
183 | return 0x100; | ||
184 | } | ||
185 | static inline u32 mc_enable_pgraph_enabled_f(void) | ||
186 | { | ||
187 | return 0x1000; | ||
188 | } | ||
189 | static inline u32 mc_enable_pwr_v(u32 r) | ||
190 | { | ||
191 | return (r >> 13) & 0x1; | ||
192 | } | ||
193 | static inline u32 mc_enable_pwr_disabled_v(void) | ||
194 | { | ||
195 | return 0x00000000; | ||
196 | } | ||
197 | static inline u32 mc_enable_pwr_enabled_f(void) | ||
198 | { | ||
199 | return 0x2000; | ||
200 | } | ||
201 | static inline u32 mc_enable_pfb_enabled_f(void) | ||
202 | { | ||
203 | return 0x100000; | ||
204 | } | ||
205 | static inline u32 mc_enable_ce2_m(void) | ||
206 | { | ||
207 | return 0x1 << 21; | ||
208 | } | ||
209 | static inline u32 mc_enable_ce2_enabled_f(void) | ||
210 | { | ||
211 | return 0x200000; | ||
212 | } | ||
213 | static inline u32 mc_enable_blg_enabled_f(void) | ||
214 | { | ||
215 | return 0x8000000; | ||
216 | } | ||
217 | static inline u32 mc_enable_perfmon_enabled_f(void) | ||
218 | { | ||
219 | return 0x10000000; | ||
220 | } | ||
221 | static inline u32 mc_enable_hub_enabled_f(void) | ||
222 | { | ||
223 | return 0x20000000; | ||
224 | } | ||
225 | static inline u32 mc_enable_pb_r(void) | ||
226 | { | ||
227 | return 0x00000204; | ||
228 | } | ||
229 | static inline u32 mc_enable_pb_0_s(void) | ||
230 | { | ||
231 | return 1; | ||
232 | } | ||
233 | static inline u32 mc_enable_pb_0_f(u32 v) | ||
234 | { | ||
235 | return (v & 0x1) << 0; | ||
236 | } | ||
237 | static inline u32 mc_enable_pb_0_m(void) | ||
238 | { | ||
239 | return 0x1 << 0; | ||
240 | } | ||
241 | static inline u32 mc_enable_pb_0_v(u32 r) | ||
242 | { | ||
243 | return (r >> 0) & 0x1; | ||
244 | } | ||
245 | static inline u32 mc_enable_pb_0_enabled_v(void) | ||
246 | { | ||
247 | return 0x00000001; | ||
248 | } | ||
249 | static inline u32 mc_enable_pb_sel_f(u32 v, u32 i) | ||
250 | { | ||
251 | return (v & 0x1) << (0 + i*1); | ||
252 | } | ||
253 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h new file mode 100644 index 00000000..df1a6d48 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h | |||
@@ -0,0 +1,469 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_pbdma_gk20a_h_ | ||
51 | #define _hw_pbdma_gk20a_h_ | ||
52 | |||
53 | static inline u32 pbdma_gp_entry1_r(void) | ||
54 | { | ||
55 | return 0x10000004; | ||
56 | } | ||
57 | static inline u32 pbdma_gp_entry1_get_hi_v(u32 r) | ||
58 | { | ||
59 | return (r >> 0) & 0xff; | ||
60 | } | ||
61 | static inline u32 pbdma_gp_entry1_length_f(u32 v) | ||
62 | { | ||
63 | return (v & 0x1fffff) << 10; | ||
64 | } | ||
65 | static inline u32 pbdma_gp_entry1_length_v(u32 r) | ||
66 | { | ||
67 | return (r >> 10) & 0x1fffff; | ||
68 | } | ||
69 | static inline u32 pbdma_gp_base_r(u32 i) | ||
70 | { | ||
71 | return 0x00040048 + i*8192; | ||
72 | } | ||
73 | static inline u32 pbdma_gp_base__size_1_v(void) | ||
74 | { | ||
75 | return 0x00000001; | ||
76 | } | ||
77 | static inline u32 pbdma_gp_base_offset_f(u32 v) | ||
78 | { | ||
79 | return (v & 0x1fffffff) << 3; | ||
80 | } | ||
81 | static inline u32 pbdma_gp_base_rsvd_s(void) | ||
82 | { | ||
83 | return 3; | ||
84 | } | ||
85 | static inline u32 pbdma_gp_base_hi_r(u32 i) | ||
86 | { | ||
87 | return 0x0004004c + i*8192; | ||
88 | } | ||
89 | static inline u32 pbdma_gp_base_hi_offset_f(u32 v) | ||
90 | { | ||
91 | return (v & 0xff) << 0; | ||
92 | } | ||
93 | static inline u32 pbdma_gp_base_hi_limit2_f(u32 v) | ||
94 | { | ||
95 | return (v & 0x1f) << 16; | ||
96 | } | ||
97 | static inline u32 pbdma_gp_fetch_r(u32 i) | ||
98 | { | ||
99 | return 0x00040050 + i*8192; | ||
100 | } | ||
101 | static inline u32 pbdma_gp_get_r(u32 i) | ||
102 | { | ||
103 | return 0x00040014 + i*8192; | ||
104 | } | ||
105 | static inline u32 pbdma_gp_put_r(u32 i) | ||
106 | { | ||
107 | return 0x00040000 + i*8192; | ||
108 | } | ||
109 | static inline u32 pbdma_pb_fetch_r(u32 i) | ||
110 | { | ||
111 | return 0x00040054 + i*8192; | ||
112 | } | ||
113 | static inline u32 pbdma_pb_fetch_hi_r(u32 i) | ||
114 | { | ||
115 | return 0x00040058 + i*8192; | ||
116 | } | ||
117 | static inline u32 pbdma_get_r(u32 i) | ||
118 | { | ||
119 | return 0x00040018 + i*8192; | ||
120 | } | ||
121 | static inline u32 pbdma_get_hi_r(u32 i) | ||
122 | { | ||
123 | return 0x0004001c + i*8192; | ||
124 | } | ||
125 | static inline u32 pbdma_put_r(u32 i) | ||
126 | { | ||
127 | return 0x0004005c + i*8192; | ||
128 | } | ||
129 | static inline u32 pbdma_put_hi_r(u32 i) | ||
130 | { | ||
131 | return 0x00040060 + i*8192; | ||
132 | } | ||
133 | static inline u32 pbdma_formats_r(u32 i) | ||
134 | { | ||
135 | return 0x0004009c + i*8192; | ||
136 | } | ||
137 | static inline u32 pbdma_formats_gp_fermi0_f(void) | ||
138 | { | ||
139 | return 0x0; | ||
140 | } | ||
141 | static inline u32 pbdma_formats_pb_fermi1_f(void) | ||
142 | { | ||
143 | return 0x100; | ||
144 | } | ||
145 | static inline u32 pbdma_formats_mp_fermi0_f(void) | ||
146 | { | ||
147 | return 0x0; | ||
148 | } | ||
149 | static inline u32 pbdma_syncpointa_r(u32 i) | ||
150 | { | ||
151 | return 0x000400a4 + i*8192; | ||
152 | } | ||
153 | static inline u32 pbdma_syncpointa_payload_v(u32 r) | ||
154 | { | ||
155 | return (r >> 0) & 0xffffffff; | ||
156 | } | ||
157 | static inline u32 pbdma_syncpointb_r(u32 i) | ||
158 | { | ||
159 | return 0x000400a8 + i*8192; | ||
160 | } | ||
161 | static inline u32 pbdma_syncpointb_op_v(u32 r) | ||
162 | { | ||
163 | return (r >> 0) & 0x3; | ||
164 | } | ||
165 | static inline u32 pbdma_syncpointb_op_wait_v(void) | ||
166 | { | ||
167 | return 0x00000000; | ||
168 | } | ||
169 | static inline u32 pbdma_syncpointb_wait_switch_v(u32 r) | ||
170 | { | ||
171 | return (r >> 4) & 0x1; | ||
172 | } | ||
173 | static inline u32 pbdma_syncpointb_wait_switch_en_v(void) | ||
174 | { | ||
175 | return 0x00000001; | ||
176 | } | ||
177 | static inline u32 pbdma_syncpointb_syncpt_index_v(u32 r) | ||
178 | { | ||
179 | return (r >> 8) & 0xff; | ||
180 | } | ||
181 | static inline u32 pbdma_pb_header_r(u32 i) | ||
182 | { | ||
183 | return 0x00040084 + i*8192; | ||
184 | } | ||
185 | static inline u32 pbdma_pb_header_priv_user_f(void) | ||
186 | { | ||
187 | return 0x0; | ||
188 | } | ||
189 | static inline u32 pbdma_pb_header_method_zero_f(void) | ||
190 | { | ||
191 | return 0x0; | ||
192 | } | ||
193 | static inline u32 pbdma_pb_header_subchannel_zero_f(void) | ||
194 | { | ||
195 | return 0x0; | ||
196 | } | ||
197 | static inline u32 pbdma_pb_header_level_main_f(void) | ||
198 | { | ||
199 | return 0x0; | ||
200 | } | ||
201 | static inline u32 pbdma_pb_header_first_true_f(void) | ||
202 | { | ||
203 | return 0x400000; | ||
204 | } | ||
205 | static inline u32 pbdma_pb_header_type_inc_f(void) | ||
206 | { | ||
207 | return 0x20000000; | ||
208 | } | ||
209 | static inline u32 pbdma_subdevice_r(u32 i) | ||
210 | { | ||
211 | return 0x00040094 + i*8192; | ||
212 | } | ||
213 | static inline u32 pbdma_subdevice_id_f(u32 v) | ||
214 | { | ||
215 | return (v & 0xfff) << 0; | ||
216 | } | ||
217 | static inline u32 pbdma_subdevice_status_active_f(void) | ||
218 | { | ||
219 | return 0x10000000; | ||
220 | } | ||
221 | static inline u32 pbdma_subdevice_channel_dma_enable_f(void) | ||
222 | { | ||
223 | return 0x20000000; | ||
224 | } | ||
225 | static inline u32 pbdma_method0_r(u32 i) | ||
226 | { | ||
227 | return 0x000400c0 + i*8192; | ||
228 | } | ||
229 | static inline u32 pbdma_data0_r(u32 i) | ||
230 | { | ||
231 | return 0x000400c4 + i*8192; | ||
232 | } | ||
233 | static inline u32 pbdma_target_r(u32 i) | ||
234 | { | ||
235 | return 0x000400ac + i*8192; | ||
236 | } | ||
237 | static inline u32 pbdma_target_engine_sw_f(void) | ||
238 | { | ||
239 | return 0x1f; | ||
240 | } | ||
241 | static inline u32 pbdma_acquire_r(u32 i) | ||
242 | { | ||
243 | return 0x00040030 + i*8192; | ||
244 | } | ||
245 | static inline u32 pbdma_acquire_retry_man_2_f(void) | ||
246 | { | ||
247 | return 0x2; | ||
248 | } | ||
249 | static inline u32 pbdma_acquire_retry_exp_2_f(void) | ||
250 | { | ||
251 | return 0x100; | ||
252 | } | ||
253 | static inline u32 pbdma_acquire_timeout_exp_max_f(void) | ||
254 | { | ||
255 | return 0x7800; | ||
256 | } | ||
257 | static inline u32 pbdma_acquire_timeout_man_max_f(void) | ||
258 | { | ||
259 | return 0x7fff8000; | ||
260 | } | ||
261 | static inline u32 pbdma_acquire_timeout_en_disable_f(void) | ||
262 | { | ||
263 | return 0x0; | ||
264 | } | ||
265 | static inline u32 pbdma_status_r(u32 i) | ||
266 | { | ||
267 | return 0x00040100 + i*8192; | ||
268 | } | ||
269 | static inline u32 pbdma_channel_r(u32 i) | ||
270 | { | ||
271 | return 0x00040120 + i*8192; | ||
272 | } | ||
273 | static inline u32 pbdma_signature_r(u32 i) | ||
274 | { | ||
275 | return 0x00040010 + i*8192; | ||
276 | } | ||
277 | static inline u32 pbdma_signature_hw_valid_f(void) | ||
278 | { | ||
279 | return 0xface; | ||
280 | } | ||
281 | static inline u32 pbdma_signature_sw_zero_f(void) | ||
282 | { | ||
283 | return 0x0; | ||
284 | } | ||
285 | static inline u32 pbdma_userd_r(u32 i) | ||
286 | { | ||
287 | return 0x00040008 + i*8192; | ||
288 | } | ||
289 | static inline u32 pbdma_userd_target_vid_mem_f(void) | ||
290 | { | ||
291 | return 0x0; | ||
292 | } | ||
293 | static inline u32 pbdma_userd_addr_f(u32 v) | ||
294 | { | ||
295 | return (v & 0x7fffff) << 9; | ||
296 | } | ||
297 | static inline u32 pbdma_userd_hi_r(u32 i) | ||
298 | { | ||
299 | return 0x0004000c + i*8192; | ||
300 | } | ||
301 | static inline u32 pbdma_userd_hi_addr_f(u32 v) | ||
302 | { | ||
303 | return (v & 0xff) << 0; | ||
304 | } | ||
305 | static inline u32 pbdma_hce_ctrl_r(u32 i) | ||
306 | { | ||
307 | return 0x000400e4 + i*8192; | ||
308 | } | ||
309 | static inline u32 pbdma_hce_ctrl_hce_priv_mode_yes_f(void) | ||
310 | { | ||
311 | return 0x20; | ||
312 | } | ||
313 | static inline u32 pbdma_intr_0_r(u32 i) | ||
314 | { | ||
315 | return 0x00040108 + i*8192; | ||
316 | } | ||
317 | static inline u32 pbdma_intr_0_memreq_v(u32 r) | ||
318 | { | ||
319 | return (r >> 0) & 0x1; | ||
320 | } | ||
321 | static inline u32 pbdma_intr_0_memreq_pending_f(void) | ||
322 | { | ||
323 | return 0x1; | ||
324 | } | ||
325 | static inline u32 pbdma_intr_0_memack_timeout_pending_f(void) | ||
326 | { | ||
327 | return 0x2; | ||
328 | } | ||
329 | static inline u32 pbdma_intr_0_memack_extra_pending_f(void) | ||
330 | { | ||
331 | return 0x4; | ||
332 | } | ||
333 | static inline u32 pbdma_intr_0_memdat_timeout_pending_f(void) | ||
334 | { | ||
335 | return 0x8; | ||
336 | } | ||
337 | static inline u32 pbdma_intr_0_memdat_extra_pending_f(void) | ||
338 | { | ||
339 | return 0x10; | ||
340 | } | ||
341 | static inline u32 pbdma_intr_0_memflush_pending_f(void) | ||
342 | { | ||
343 | return 0x20; | ||
344 | } | ||
345 | static inline u32 pbdma_intr_0_memop_pending_f(void) | ||
346 | { | ||
347 | return 0x40; | ||
348 | } | ||
349 | static inline u32 pbdma_intr_0_lbconnect_pending_f(void) | ||
350 | { | ||
351 | return 0x80; | ||
352 | } | ||
353 | static inline u32 pbdma_intr_0_lbreq_pending_f(void) | ||
354 | { | ||
355 | return 0x100; | ||
356 | } | ||
357 | static inline u32 pbdma_intr_0_lback_timeout_pending_f(void) | ||
358 | { | ||
359 | return 0x200; | ||
360 | } | ||
361 | static inline u32 pbdma_intr_0_lback_extra_pending_f(void) | ||
362 | { | ||
363 | return 0x400; | ||
364 | } | ||
365 | static inline u32 pbdma_intr_0_lbdat_timeout_pending_f(void) | ||
366 | { | ||
367 | return 0x800; | ||
368 | } | ||
369 | static inline u32 pbdma_intr_0_lbdat_extra_pending_f(void) | ||
370 | { | ||
371 | return 0x1000; | ||
372 | } | ||
373 | static inline u32 pbdma_intr_0_gpfifo_pending_f(void) | ||
374 | { | ||
375 | return 0x2000; | ||
376 | } | ||
377 | static inline u32 pbdma_intr_0_gpptr_pending_f(void) | ||
378 | { | ||
379 | return 0x4000; | ||
380 | } | ||
381 | static inline u32 pbdma_intr_0_gpentry_pending_f(void) | ||
382 | { | ||
383 | return 0x8000; | ||
384 | } | ||
385 | static inline u32 pbdma_intr_0_gpcrc_pending_f(void) | ||
386 | { | ||
387 | return 0x10000; | ||
388 | } | ||
389 | static inline u32 pbdma_intr_0_pbptr_pending_f(void) | ||
390 | { | ||
391 | return 0x20000; | ||
392 | } | ||
393 | static inline u32 pbdma_intr_0_pbentry_pending_f(void) | ||
394 | { | ||
395 | return 0x40000; | ||
396 | } | ||
397 | static inline u32 pbdma_intr_0_pbcrc_pending_f(void) | ||
398 | { | ||
399 | return 0x80000; | ||
400 | } | ||
401 | static inline u32 pbdma_intr_0_xbarconnect_pending_f(void) | ||
402 | { | ||
403 | return 0x100000; | ||
404 | } | ||
405 | static inline u32 pbdma_intr_0_method_pending_f(void) | ||
406 | { | ||
407 | return 0x200000; | ||
408 | } | ||
409 | static inline u32 pbdma_intr_0_methodcrc_pending_f(void) | ||
410 | { | ||
411 | return 0x400000; | ||
412 | } | ||
413 | static inline u32 pbdma_intr_0_device_pending_f(void) | ||
414 | { | ||
415 | return 0x800000; | ||
416 | } | ||
417 | static inline u32 pbdma_intr_0_semaphore_pending_f(void) | ||
418 | { | ||
419 | return 0x2000000; | ||
420 | } | ||
421 | static inline u32 pbdma_intr_0_acquire_pending_f(void) | ||
422 | { | ||
423 | return 0x4000000; | ||
424 | } | ||
425 | static inline u32 pbdma_intr_0_pri_pending_f(void) | ||
426 | { | ||
427 | return 0x8000000; | ||
428 | } | ||
429 | static inline u32 pbdma_intr_0_no_ctxsw_seg_pending_f(void) | ||
430 | { | ||
431 | return 0x20000000; | ||
432 | } | ||
433 | static inline u32 pbdma_intr_0_pbseg_pending_f(void) | ||
434 | { | ||
435 | return 0x40000000; | ||
436 | } | ||
437 | static inline u32 pbdma_intr_0_signature_pending_f(void) | ||
438 | { | ||
439 | return 0x80000000; | ||
440 | } | ||
441 | static inline u32 pbdma_intr_1_r(u32 i) | ||
442 | { | ||
443 | return 0x00040148 + i*8192; | ||
444 | } | ||
445 | static inline u32 pbdma_intr_en_0_r(u32 i) | ||
446 | { | ||
447 | return 0x0004010c + i*8192; | ||
448 | } | ||
449 | static inline u32 pbdma_intr_en_0_lbreq_enabled_f(void) | ||
450 | { | ||
451 | return 0x100; | ||
452 | } | ||
453 | static inline u32 pbdma_intr_en_1_r(u32 i) | ||
454 | { | ||
455 | return 0x0004014c + i*8192; | ||
456 | } | ||
457 | static inline u32 pbdma_intr_stall_r(u32 i) | ||
458 | { | ||
459 | return 0x0004013c + i*8192; | ||
460 | } | ||
461 | static inline u32 pbdma_intr_stall_lbreq_enabled_f(void) | ||
462 | { | ||
463 | return 0x100; | ||
464 | } | ||
465 | static inline u32 pbdma_udma_nop_r(void) | ||
466 | { | ||
467 | return 0x00000008; | ||
468 | } | ||
469 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h new file mode 100644 index 00000000..d4007613 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h | |||
@@ -0,0 +1,137 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_pri_ringmaster_gk20a_h_ | ||
51 | #define _hw_pri_ringmaster_gk20a_h_ | ||
52 | |||
53 | static inline u32 pri_ringmaster_command_r(void) | ||
54 | { | ||
55 | return 0x0012004c; | ||
56 | } | ||
57 | static inline u32 pri_ringmaster_command_cmd_m(void) | ||
58 | { | ||
59 | return 0x3f << 0; | ||
60 | } | ||
61 | static inline u32 pri_ringmaster_command_cmd_v(u32 r) | ||
62 | { | ||
63 | return (r >> 0) & 0x3f; | ||
64 | } | ||
65 | static inline u32 pri_ringmaster_command_cmd_no_cmd_v(void) | ||
66 | { | ||
67 | return 0x00000000; | ||
68 | } | ||
69 | static inline u32 pri_ringmaster_command_cmd_start_ring_f(void) | ||
70 | { | ||
71 | return 0x1; | ||
72 | } | ||
73 | static inline u32 pri_ringmaster_command_cmd_ack_interrupt_f(void) | ||
74 | { | ||
75 | return 0x2; | ||
76 | } | ||
77 | static inline u32 pri_ringmaster_command_cmd_enumerate_stations_f(void) | ||
78 | { | ||
79 | return 0x3; | ||
80 | } | ||
81 | static inline u32 pri_ringmaster_command_cmd_enumerate_stations_bc_grp_all_f(void) | ||
82 | { | ||
83 | return 0x0; | ||
84 | } | ||
85 | static inline u32 pri_ringmaster_command_data_r(void) | ||
86 | { | ||
87 | return 0x00120048; | ||
88 | } | ||
89 | static inline u32 pri_ringmaster_start_results_r(void) | ||
90 | { | ||
91 | return 0x00120050; | ||
92 | } | ||
93 | static inline u32 pri_ringmaster_start_results_connectivity_v(u32 r) | ||
94 | { | ||
95 | return (r >> 0) & 0x1; | ||
96 | } | ||
97 | static inline u32 pri_ringmaster_start_results_connectivity_pass_v(void) | ||
98 | { | ||
99 | return 0x00000001; | ||
100 | } | ||
101 | static inline u32 pri_ringmaster_intr_status0_r(void) | ||
102 | { | ||
103 | return 0x00120058; | ||
104 | } | ||
105 | static inline u32 pri_ringmaster_intr_status1_r(void) | ||
106 | { | ||
107 | return 0x0012005c; | ||
108 | } | ||
109 | static inline u32 pri_ringmaster_global_ctl_r(void) | ||
110 | { | ||
111 | return 0x00120060; | ||
112 | } | ||
113 | static inline u32 pri_ringmaster_global_ctl_ring_reset_asserted_f(void) | ||
114 | { | ||
115 | return 0x1; | ||
116 | } | ||
117 | static inline u32 pri_ringmaster_global_ctl_ring_reset_deasserted_f(void) | ||
118 | { | ||
119 | return 0x0; | ||
120 | } | ||
121 | static inline u32 pri_ringmaster_enum_fbp_r(void) | ||
122 | { | ||
123 | return 0x00120074; | ||
124 | } | ||
125 | static inline u32 pri_ringmaster_enum_fbp_count_v(u32 r) | ||
126 | { | ||
127 | return (r >> 0) & 0x1f; | ||
128 | } | ||
129 | static inline u32 pri_ringmaster_enum_gpc_r(void) | ||
130 | { | ||
131 | return 0x00120078; | ||
132 | } | ||
133 | static inline u32 pri_ringmaster_enum_gpc_count_v(u32 r) | ||
134 | { | ||
135 | return (r >> 0) & 0x1f; | ||
136 | } | ||
137 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h new file mode 100644 index 00000000..db16a8de --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h | |||
@@ -0,0 +1,226 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/hw_pri_ringstation_fbp_gk20a.h | ||
3 | * | ||
4 | * Copyright (c) 2012-2013, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | * | ||
18 | */ | ||
19 | |||
20 | /* | ||
21 | * Function naming determines intended use: | ||
22 | * | ||
23 | * <x>_r(void) : Returns the offset for register <x>. | ||
24 | * | ||
25 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
26 | * | ||
27 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
28 | * | ||
29 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
30 | * and masked to place it at field <y> of register <x>. This value | ||
31 | * can be |'d with others to produce a full register value for | ||
32 | * register <x>. | ||
33 | * | ||
34 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
35 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
36 | * register <x>. | ||
37 | * | ||
38 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
39 | * to place it at field <y> of register <x>. This value can be |'d | ||
40 | * with others to produce a full register value for <x>. | ||
41 | * | ||
42 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
43 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
44 | * This value is suitable for direct comparison with other unshifted | ||
45 | * values appropriate for use in field <y> of register <x>. | ||
46 | * | ||
47 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
48 | * field <y> of register <x>. This value is suitable for direct | ||
49 | * comparison with unshifted values appropriate for use in field <y> | ||
50 | * of register <x>. | ||
51 | */ | ||
52 | |||
53 | #ifndef __hw_pri_ringstation_fbp_gk20a_h__ | ||
54 | #define __hw_pri_ringstation_fbp_gk20a_h__ | ||
55 | /*This file is autogenerated. Do not edit. */ | ||
56 | |||
57 | static inline u32 pri_ringstation_fbp_master_config_r(u32 i) | ||
58 | { | ||
59 | return 0x00124300+((i)*4); | ||
60 | } | ||
61 | static inline u32 pri_ringstation_fbp_master_config__size_1_v(void) | ||
62 | { | ||
63 | return 64; | ||
64 | } | ||
65 | static inline u32 pri_ringstation_fbp_master_config_timeout_s(void) | ||
66 | { | ||
67 | return 18; | ||
68 | } | ||
69 | static inline u32 pri_ringstation_fbp_master_config_timeout_f(u32 v) | ||
70 | { | ||
71 | return (v & 0x3ffff) << 0; | ||
72 | } | ||
73 | static inline u32 pri_ringstation_fbp_master_config_timeout_m(void) | ||
74 | { | ||
75 | return 0x3ffff << 0; | ||
76 | } | ||
77 | static inline u32 pri_ringstation_fbp_master_config_timeout_v(u32 r) | ||
78 | { | ||
79 | return (r >> 0) & 0x3ffff; | ||
80 | } | ||
81 | static inline u32 pri_ringstation_fbp_master_config_timeout_i_v(void) | ||
82 | { | ||
83 | return 0x00000064; | ||
84 | } | ||
85 | static inline u32 pri_ringstation_fbp_master_config_timeout_i_f(void) | ||
86 | { | ||
87 | return 0x64; | ||
88 | } | ||
89 | static inline u32 pri_ringstation_fbp_master_config_fs_action_s(void) | ||
90 | { | ||
91 | return 1; | ||
92 | } | ||
93 | static inline u32 pri_ringstation_fbp_master_config_fs_action_f(u32 v) | ||
94 | { | ||
95 | return (v & 0x1) << 30; | ||
96 | } | ||
97 | static inline u32 pri_ringstation_fbp_master_config_fs_action_m(void) | ||
98 | { | ||
99 | return 0x1 << 30; | ||
100 | } | ||
101 | static inline u32 pri_ringstation_fbp_master_config_fs_action_v(u32 r) | ||
102 | { | ||
103 | return (r >> 30) & 0x1; | ||
104 | } | ||
105 | static inline u32 pri_ringstation_fbp_master_config_fs_action_error_v(void) | ||
106 | { | ||
107 | return 0x00000000; | ||
108 | } | ||
109 | static inline u32 pri_ringstation_fbp_master_config_fs_action_error_f(void) | ||
110 | { | ||
111 | return 0x0; | ||
112 | } | ||
113 | static inline u32 pri_ringstation_fbp_master_config_fs_action_soldier_on_v(void) | ||
114 | { | ||
115 | return 0x00000001; | ||
116 | } | ||
117 | static inline u32 pri_ringstation_fbp_master_config_fs_action_soldier_on_f(void) | ||
118 | { | ||
119 | return 0x40000000; | ||
120 | } | ||
121 | static inline u32 pri_ringstation_fbp_master_config_reset_action_s(void) | ||
122 | { | ||
123 | return 1; | ||
124 | } | ||
125 | static inline u32 pri_ringstation_fbp_master_config_reset_action_f(u32 v) | ||
126 | { | ||
127 | return (v & 0x1) << 31; | ||
128 | } | ||
129 | static inline u32 pri_ringstation_fbp_master_config_reset_action_m(void) | ||
130 | { | ||
131 | return 0x1 << 31; | ||
132 | } | ||
133 | static inline u32 pri_ringstation_fbp_master_config_reset_action_v(u32 r) | ||
134 | { | ||
135 | return (r >> 31) & 0x1; | ||
136 | } | ||
137 | static inline u32 pri_ringstation_fbp_master_config_reset_action_error_v(void) | ||
138 | { | ||
139 | return 0x00000000; | ||
140 | } | ||
141 | static inline u32 pri_ringstation_fbp_master_config_reset_action_error_f(void) | ||
142 | { | ||
143 | return 0x0; | ||
144 | } | ||
145 | static inline u32 pri_ringstation_fbp_master_config_reset_action_soldier_on_v(void) | ||
146 | { | ||
147 | return 0x00000001; | ||
148 | } | ||
149 | static inline u32 pri_ringstation_fbp_master_config_reset_action_soldier_on_f(void) | ||
150 | { | ||
151 | return 0x80000000; | ||
152 | } | ||
153 | static inline u32 pri_ringstation_fbp_master_config_setup_clocks_s(void) | ||
154 | { | ||
155 | return 3; | ||
156 | } | ||
157 | static inline u32 pri_ringstation_fbp_master_config_setup_clocks_f(u32 v) | ||
158 | { | ||
159 | return (v & 0x7) << 20; | ||
160 | } | ||
161 | static inline u32 pri_ringstation_fbp_master_config_setup_clocks_m(void) | ||
162 | { | ||
163 | return 0x7 << 20; | ||
164 | } | ||
165 | static inline u32 pri_ringstation_fbp_master_config_setup_clocks_v(u32 r) | ||
166 | { | ||
167 | return (r >> 20) & 0x7; | ||
168 | } | ||
169 | static inline u32 pri_ringstation_fbp_master_config_setup_clocks_i_v(void) | ||
170 | { | ||
171 | return 0x00000000; | ||
172 | } | ||
173 | static inline u32 pri_ringstation_fbp_master_config_setup_clocks_i_f(void) | ||
174 | { | ||
175 | return 0x0; | ||
176 | } | ||
177 | static inline u32 pri_ringstation_fbp_master_config_wait_clocks_s(void) | ||
178 | { | ||
179 | return 3; | ||
180 | } | ||
181 | static inline u32 pri_ringstation_fbp_master_config_wait_clocks_f(u32 v) | ||
182 | { | ||
183 | return (v & 0x7) << 24; | ||
184 | } | ||
185 | static inline u32 pri_ringstation_fbp_master_config_wait_clocks_m(void) | ||
186 | { | ||
187 | return 0x7 << 24; | ||
188 | } | ||
189 | static inline u32 pri_ringstation_fbp_master_config_wait_clocks_v(u32 r) | ||
190 | { | ||
191 | return (r >> 24) & 0x7; | ||
192 | } | ||
193 | static inline u32 pri_ringstation_fbp_master_config_wait_clocks_i_v(void) | ||
194 | { | ||
195 | return 0x00000000; | ||
196 | } | ||
197 | static inline u32 pri_ringstation_fbp_master_config_wait_clocks_i_f(void) | ||
198 | { | ||
199 | return 0x0; | ||
200 | } | ||
201 | static inline u32 pri_ringstation_fbp_master_config_hold_clocks_s(void) | ||
202 | { | ||
203 | return 3; | ||
204 | } | ||
205 | static inline u32 pri_ringstation_fbp_master_config_hold_clocks_f(u32 v) | ||
206 | { | ||
207 | return (v & 0x7) << 27; | ||
208 | } | ||
209 | static inline u32 pri_ringstation_fbp_master_config_hold_clocks_m(void) | ||
210 | { | ||
211 | return 0x7 << 27; | ||
212 | } | ||
213 | static inline u32 pri_ringstation_fbp_master_config_hold_clocks_v(u32 r) | ||
214 | { | ||
215 | return (r >> 27) & 0x7; | ||
216 | } | ||
217 | static inline u32 pri_ringstation_fbp_master_config_hold_clocks_i_v(void) | ||
218 | { | ||
219 | return 0x00000000; | ||
220 | } | ||
221 | static inline u32 pri_ringstation_fbp_master_config_hold_clocks_i_f(void) | ||
222 | { | ||
223 | return 0x0; | ||
224 | } | ||
225 | |||
226 | #endif /* __hw_pri_ringstation_fbp_gk20a_h__ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h new file mode 100644 index 00000000..e8aad933 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h | |||
@@ -0,0 +1,226 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/hw_pri_ringstation_gpc_gk20a.h | ||
3 | * | ||
4 | * Copyright (c) 2012-2013, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | * | ||
18 | */ | ||
19 | |||
20 | /* | ||
21 | * Function naming determines intended use: | ||
22 | * | ||
23 | * <x>_r(void) : Returns the offset for register <x>. | ||
24 | * | ||
25 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
26 | * | ||
27 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
28 | * | ||
29 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
30 | * and masked to place it at field <y> of register <x>. This value | ||
31 | * can be |'d with others to produce a full register value for | ||
32 | * register <x>. | ||
33 | * | ||
34 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
35 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
36 | * register <x>. | ||
37 | * | ||
38 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
39 | * to place it at field <y> of register <x>. This value can be |'d | ||
40 | * with others to produce a full register value for <x>. | ||
41 | * | ||
42 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
43 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
44 | * This value is suitable for direct comparison with other unshifted | ||
45 | * values appropriate for use in field <y> of register <x>. | ||
46 | * | ||
47 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
48 | * field <y> of register <x>. This value is suitable for direct | ||
49 | * comparison with unshifted values appropriate for use in field <y> | ||
50 | * of register <x>. | ||
51 | */ | ||
52 | |||
53 | #ifndef __hw_pri_ringstation_gpc_gk20a_h__ | ||
54 | #define __hw_pri_ringstation_gpc_gk20a_h__ | ||
55 | /*This file is autogenerated. Do not edit. */ | ||
56 | |||
57 | static inline u32 pri_ringstation_gpc_master_config_r(u32 i) | ||
58 | { | ||
59 | return 0x00128300+((i)*4); | ||
60 | } | ||
61 | static inline u32 pri_ringstation_gpc_master_config__size_1_v(void) | ||
62 | { | ||
63 | return 64; | ||
64 | } | ||
65 | static inline u32 pri_ringstation_gpc_master_config_timeout_s(void) | ||
66 | { | ||
67 | return 18; | ||
68 | } | ||
69 | static inline u32 pri_ringstation_gpc_master_config_timeout_f(u32 v) | ||
70 | { | ||
71 | return (v & 0x3ffff) << 0; | ||
72 | } | ||
73 | static inline u32 pri_ringstation_gpc_master_config_timeout_m(void) | ||
74 | { | ||
75 | return 0x3ffff << 0; | ||
76 | } | ||
77 | static inline u32 pri_ringstation_gpc_master_config_timeout_v(u32 r) | ||
78 | { | ||
79 | return (r >> 0) & 0x3ffff; | ||
80 | } | ||
81 | static inline u32 pri_ringstation_gpc_master_config_timeout_i_v(void) | ||
82 | { | ||
83 | return 0x00000064; | ||
84 | } | ||
85 | static inline u32 pri_ringstation_gpc_master_config_timeout_i_f(void) | ||
86 | { | ||
87 | return 0x64; | ||
88 | } | ||
89 | static inline u32 pri_ringstation_gpc_master_config_fs_action_s(void) | ||
90 | { | ||
91 | return 1; | ||
92 | } | ||
93 | static inline u32 pri_ringstation_gpc_master_config_fs_action_f(u32 v) | ||
94 | { | ||
95 | return (v & 0x1) << 30; | ||
96 | } | ||
97 | static inline u32 pri_ringstation_gpc_master_config_fs_action_m(void) | ||
98 | { | ||
99 | return 0x1 << 30; | ||
100 | } | ||
101 | static inline u32 pri_ringstation_gpc_master_config_fs_action_v(u32 r) | ||
102 | { | ||
103 | return (r >> 30) & 0x1; | ||
104 | } | ||
105 | static inline u32 pri_ringstation_gpc_master_config_fs_action_error_v(void) | ||
106 | { | ||
107 | return 0x00000000; | ||
108 | } | ||
109 | static inline u32 pri_ringstation_gpc_master_config_fs_action_error_f(void) | ||
110 | { | ||
111 | return 0x0; | ||
112 | } | ||
113 | static inline u32 pri_ringstation_gpc_master_config_fs_action_soldier_on_v(void) | ||
114 | { | ||
115 | return 0x00000001; | ||
116 | } | ||
117 | static inline u32 pri_ringstation_gpc_master_config_fs_action_soldier_on_f(void) | ||
118 | { | ||
119 | return 0x40000000; | ||
120 | } | ||
121 | static inline u32 pri_ringstation_gpc_master_config_reset_action_s(void) | ||
122 | { | ||
123 | return 1; | ||
124 | } | ||
125 | static inline u32 pri_ringstation_gpc_master_config_reset_action_f(u32 v) | ||
126 | { | ||
127 | return (v & 0x1) << 31; | ||
128 | } | ||
129 | static inline u32 pri_ringstation_gpc_master_config_reset_action_m(void) | ||
130 | { | ||
131 | return 0x1 << 31; | ||
132 | } | ||
133 | static inline u32 pri_ringstation_gpc_master_config_reset_action_v(u32 r) | ||
134 | { | ||
135 | return (r >> 31) & 0x1; | ||
136 | } | ||
137 | static inline u32 pri_ringstation_gpc_master_config_reset_action_error_v(void) | ||
138 | { | ||
139 | return 0x00000000; | ||
140 | } | ||
141 | static inline u32 pri_ringstation_gpc_master_config_reset_action_error_f(void) | ||
142 | { | ||
143 | return 0x0; | ||
144 | } | ||
145 | static inline u32 pri_ringstation_gpc_master_config_reset_action_soldier_on_v(void) | ||
146 | { | ||
147 | return 0x00000001; | ||
148 | } | ||
149 | static inline u32 pri_ringstation_gpc_master_config_reset_action_soldier_on_f(void) | ||
150 | { | ||
151 | return 0x80000000; | ||
152 | } | ||
153 | static inline u32 pri_ringstation_gpc_master_config_setup_clocks_s(void) | ||
154 | { | ||
155 | return 3; | ||
156 | } | ||
157 | static inline u32 pri_ringstation_gpc_master_config_setup_clocks_f(u32 v) | ||
158 | { | ||
159 | return (v & 0x7) << 20; | ||
160 | } | ||
161 | static inline u32 pri_ringstation_gpc_master_config_setup_clocks_m(void) | ||
162 | { | ||
163 | return 0x7 << 20; | ||
164 | } | ||
165 | static inline u32 pri_ringstation_gpc_master_config_setup_clocks_v(u32 r) | ||
166 | { | ||
167 | return (r >> 20) & 0x7; | ||
168 | } | ||
169 | static inline u32 pri_ringstation_gpc_master_config_setup_clocks_i_v(void) | ||
170 | { | ||
171 | return 0x00000000; | ||
172 | } | ||
173 | static inline u32 pri_ringstation_gpc_master_config_setup_clocks_i_f(void) | ||
174 | { | ||
175 | return 0x0; | ||
176 | } | ||
177 | static inline u32 pri_ringstation_gpc_master_config_wait_clocks_s(void) | ||
178 | { | ||
179 | return 3; | ||
180 | } | ||
181 | static inline u32 pri_ringstation_gpc_master_config_wait_clocks_f(u32 v) | ||
182 | { | ||
183 | return (v & 0x7) << 24; | ||
184 | } | ||
185 | static inline u32 pri_ringstation_gpc_master_config_wait_clocks_m(void) | ||
186 | { | ||
187 | return 0x7 << 24; | ||
188 | } | ||
189 | static inline u32 pri_ringstation_gpc_master_config_wait_clocks_v(u32 r) | ||
190 | { | ||
191 | return (r >> 24) & 0x7; | ||
192 | } | ||
193 | static inline u32 pri_ringstation_gpc_master_config_wait_clocks_i_v(void) | ||
194 | { | ||
195 | return 0x00000000; | ||
196 | } | ||
197 | static inline u32 pri_ringstation_gpc_master_config_wait_clocks_i_f(void) | ||
198 | { | ||
199 | return 0x0; | ||
200 | } | ||
201 | static inline u32 pri_ringstation_gpc_master_config_hold_clocks_s(void) | ||
202 | { | ||
203 | return 3; | ||
204 | } | ||
205 | static inline u32 pri_ringstation_gpc_master_config_hold_clocks_f(u32 v) | ||
206 | { | ||
207 | return (v & 0x7) << 27; | ||
208 | } | ||
209 | static inline u32 pri_ringstation_gpc_master_config_hold_clocks_m(void) | ||
210 | { | ||
211 | return 0x7 << 27; | ||
212 | } | ||
213 | static inline u32 pri_ringstation_gpc_master_config_hold_clocks_v(u32 r) | ||
214 | { | ||
215 | return (r >> 27) & 0x7; | ||
216 | } | ||
217 | static inline u32 pri_ringstation_gpc_master_config_hold_clocks_i_v(void) | ||
218 | { | ||
219 | return 0x00000000; | ||
220 | } | ||
221 | static inline u32 pri_ringstation_gpc_master_config_hold_clocks_i_f(void) | ||
222 | { | ||
223 | return 0x0; | ||
224 | } | ||
225 | |||
226 | #endif /* __hw_pri_ringstation_gpc_gk20a_h__ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h new file mode 100644 index 00000000..c281dd54 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h | |||
@@ -0,0 +1,69 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_pri_ringstation_sys_gk20a_h_ | ||
51 | #define _hw_pri_ringstation_sys_gk20a_h_ | ||
52 | |||
53 | static inline u32 pri_ringstation_sys_master_config_r(u32 i) | ||
54 | { | ||
55 | return 0x00122300 + i*4; | ||
56 | } | ||
57 | static inline u32 pri_ringstation_sys_decode_config_r(void) | ||
58 | { | ||
59 | return 0x00122204; | ||
60 | } | ||
61 | static inline u32 pri_ringstation_sys_decode_config_ring_m(void) | ||
62 | { | ||
63 | return 0x7 << 0; | ||
64 | } | ||
65 | static inline u32 pri_ringstation_sys_decode_config_ring_drop_on_ring_not_started_f(void) | ||
66 | { | ||
67 | return 0x1; | ||
68 | } | ||
69 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h new file mode 100644 index 00000000..93c55c30 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h | |||
@@ -0,0 +1,141 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_proj_gk20a_h_ | ||
51 | #define _hw_proj_gk20a_h_ | ||
52 | |||
53 | static inline u32 proj_gpc_base_v(void) | ||
54 | { | ||
55 | return 0x00500000; | ||
56 | } | ||
57 | static inline u32 proj_gpc_shared_base_v(void) | ||
58 | { | ||
59 | return 0x00418000; | ||
60 | } | ||
61 | static inline u32 proj_gpc_stride_v(void) | ||
62 | { | ||
63 | return 0x00008000; | ||
64 | } | ||
65 | static inline u32 proj_ltc_stride_v(void) | ||
66 | { | ||
67 | return 0x00002000; | ||
68 | } | ||
69 | static inline u32 proj_lts_stride_v(void) | ||
70 | { | ||
71 | return 0x00000400; | ||
72 | } | ||
73 | static inline u32 proj_ppc_in_gpc_base_v(void) | ||
74 | { | ||
75 | return 0x00003000; | ||
76 | } | ||
77 | static inline u32 proj_ppc_in_gpc_stride_v(void) | ||
78 | { | ||
79 | return 0x00000200; | ||
80 | } | ||
81 | static inline u32 proj_rop_base_v(void) | ||
82 | { | ||
83 | return 0x00410000; | ||
84 | } | ||
85 | static inline u32 proj_rop_shared_base_v(void) | ||
86 | { | ||
87 | return 0x00408800; | ||
88 | } | ||
89 | static inline u32 proj_rop_stride_v(void) | ||
90 | { | ||
91 | return 0x00000400; | ||
92 | } | ||
93 | static inline u32 proj_tpc_in_gpc_base_v(void) | ||
94 | { | ||
95 | return 0x00004000; | ||
96 | } | ||
97 | static inline u32 proj_tpc_in_gpc_stride_v(void) | ||
98 | { | ||
99 | return 0x00000800; | ||
100 | } | ||
101 | static inline u32 proj_tpc_in_gpc_shared_base_v(void) | ||
102 | { | ||
103 | return 0x00001800; | ||
104 | } | ||
105 | static inline u32 proj_host_num_pbdma_v(void) | ||
106 | { | ||
107 | return 0x00000001; | ||
108 | } | ||
109 | static inline u32 proj_scal_litter_num_tpc_per_gpc_v(void) | ||
110 | { | ||
111 | return 0x00000001; | ||
112 | } | ||
113 | static inline u32 proj_scal_litter_num_fbps_v(void) | ||
114 | { | ||
115 | return 0x00000001; | ||
116 | } | ||
117 | static inline u32 proj_scal_litter_num_gpcs_v(void) | ||
118 | { | ||
119 | return 0x00000001; | ||
120 | } | ||
121 | static inline u32 proj_scal_litter_num_pes_per_gpc_v(void) | ||
122 | { | ||
123 | return 0x00000001; | ||
124 | } | ||
125 | static inline u32 proj_scal_litter_num_tpcs_per_pes_v(void) | ||
126 | { | ||
127 | return 0x00000001; | ||
128 | } | ||
129 | static inline u32 proj_scal_litter_num_zcull_banks_v(void) | ||
130 | { | ||
131 | return 0x00000004; | ||
132 | } | ||
133 | static inline u32 proj_scal_max_gpcs_v(void) | ||
134 | { | ||
135 | return 0x00000020; | ||
136 | } | ||
137 | static inline u32 proj_scal_max_tpc_per_gpc_v(void) | ||
138 | { | ||
139 | return 0x00000008; | ||
140 | } | ||
141 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h new file mode 100644 index 00000000..d7d26b80 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h | |||
@@ -0,0 +1,737 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_pwr_gk20a_h_ | ||
51 | #define _hw_pwr_gk20a_h_ | ||
52 | |||
53 | static inline u32 pwr_falcon_irqsset_r(void) | ||
54 | { | ||
55 | return 0x0010a000; | ||
56 | } | ||
57 | static inline u32 pwr_falcon_irqsset_swgen0_set_f(void) | ||
58 | { | ||
59 | return 0x40; | ||
60 | } | ||
61 | static inline u32 pwr_falcon_irqsclr_r(void) | ||
62 | { | ||
63 | return 0x0010a004; | ||
64 | } | ||
65 | static inline u32 pwr_falcon_irqstat_r(void) | ||
66 | { | ||
67 | return 0x0010a008; | ||
68 | } | ||
69 | static inline u32 pwr_falcon_irqstat_halt_true_f(void) | ||
70 | { | ||
71 | return 0x10; | ||
72 | } | ||
73 | static inline u32 pwr_falcon_irqstat_exterr_true_f(void) | ||
74 | { | ||
75 | return 0x20; | ||
76 | } | ||
77 | static inline u32 pwr_falcon_irqstat_swgen0_true_f(void) | ||
78 | { | ||
79 | return 0x40; | ||
80 | } | ||
81 | static inline u32 pwr_falcon_irqmode_r(void) | ||
82 | { | ||
83 | return 0x0010a00c; | ||
84 | } | ||
85 | static inline u32 pwr_falcon_irqmset_r(void) | ||
86 | { | ||
87 | return 0x0010a010; | ||
88 | } | ||
89 | static inline u32 pwr_falcon_irqmset_gptmr_f(u32 v) | ||
90 | { | ||
91 | return (v & 0x1) << 0; | ||
92 | } | ||
93 | static inline u32 pwr_falcon_irqmset_wdtmr_f(u32 v) | ||
94 | { | ||
95 | return (v & 0x1) << 1; | ||
96 | } | ||
97 | static inline u32 pwr_falcon_irqmset_mthd_f(u32 v) | ||
98 | { | ||
99 | return (v & 0x1) << 2; | ||
100 | } | ||
101 | static inline u32 pwr_falcon_irqmset_ctxsw_f(u32 v) | ||
102 | { | ||
103 | return (v & 0x1) << 3; | ||
104 | } | ||
105 | static inline u32 pwr_falcon_irqmset_halt_f(u32 v) | ||
106 | { | ||
107 | return (v & 0x1) << 4; | ||
108 | } | ||
109 | static inline u32 pwr_falcon_irqmset_exterr_f(u32 v) | ||
110 | { | ||
111 | return (v & 0x1) << 5; | ||
112 | } | ||
113 | static inline u32 pwr_falcon_irqmset_swgen0_f(u32 v) | ||
114 | { | ||
115 | return (v & 0x1) << 6; | ||
116 | } | ||
117 | static inline u32 pwr_falcon_irqmset_swgen1_f(u32 v) | ||
118 | { | ||
119 | return (v & 0x1) << 7; | ||
120 | } | ||
121 | static inline u32 pwr_falcon_irqmclr_r(void) | ||
122 | { | ||
123 | return 0x0010a014; | ||
124 | } | ||
125 | static inline u32 pwr_falcon_irqmclr_gptmr_f(u32 v) | ||
126 | { | ||
127 | return (v & 0x1) << 0; | ||
128 | } | ||
129 | static inline u32 pwr_falcon_irqmclr_wdtmr_f(u32 v) | ||
130 | { | ||
131 | return (v & 0x1) << 1; | ||
132 | } | ||
133 | static inline u32 pwr_falcon_irqmclr_mthd_f(u32 v) | ||
134 | { | ||
135 | return (v & 0x1) << 2; | ||
136 | } | ||
137 | static inline u32 pwr_falcon_irqmclr_ctxsw_f(u32 v) | ||
138 | { | ||
139 | return (v & 0x1) << 3; | ||
140 | } | ||
141 | static inline u32 pwr_falcon_irqmclr_halt_f(u32 v) | ||
142 | { | ||
143 | return (v & 0x1) << 4; | ||
144 | } | ||
145 | static inline u32 pwr_falcon_irqmclr_exterr_f(u32 v) | ||
146 | { | ||
147 | return (v & 0x1) << 5; | ||
148 | } | ||
149 | static inline u32 pwr_falcon_irqmclr_swgen0_f(u32 v) | ||
150 | { | ||
151 | return (v & 0x1) << 6; | ||
152 | } | ||
153 | static inline u32 pwr_falcon_irqmclr_swgen1_f(u32 v) | ||
154 | { | ||
155 | return (v & 0x1) << 7; | ||
156 | } | ||
157 | static inline u32 pwr_falcon_irqmclr_ext_f(u32 v) | ||
158 | { | ||
159 | return (v & 0xff) << 8; | ||
160 | } | ||
161 | static inline u32 pwr_falcon_irqmask_r(void) | ||
162 | { | ||
163 | return 0x0010a018; | ||
164 | } | ||
165 | static inline u32 pwr_falcon_irqdest_r(void) | ||
166 | { | ||
167 | return 0x0010a01c; | ||
168 | } | ||
169 | static inline u32 pwr_falcon_irqdest_host_gptmr_f(u32 v) | ||
170 | { | ||
171 | return (v & 0x1) << 0; | ||
172 | } | ||
173 | static inline u32 pwr_falcon_irqdest_host_wdtmr_f(u32 v) | ||
174 | { | ||
175 | return (v & 0x1) << 1; | ||
176 | } | ||
177 | static inline u32 pwr_falcon_irqdest_host_mthd_f(u32 v) | ||
178 | { | ||
179 | return (v & 0x1) << 2; | ||
180 | } | ||
181 | static inline u32 pwr_falcon_irqdest_host_ctxsw_f(u32 v) | ||
182 | { | ||
183 | return (v & 0x1) << 3; | ||
184 | } | ||
185 | static inline u32 pwr_falcon_irqdest_host_halt_f(u32 v) | ||
186 | { | ||
187 | return (v & 0x1) << 4; | ||
188 | } | ||
189 | static inline u32 pwr_falcon_irqdest_host_exterr_f(u32 v) | ||
190 | { | ||
191 | return (v & 0x1) << 5; | ||
192 | } | ||
193 | static inline u32 pwr_falcon_irqdest_host_swgen0_f(u32 v) | ||
194 | { | ||
195 | return (v & 0x1) << 6; | ||
196 | } | ||
197 | static inline u32 pwr_falcon_irqdest_host_swgen1_f(u32 v) | ||
198 | { | ||
199 | return (v & 0x1) << 7; | ||
200 | } | ||
201 | static inline u32 pwr_falcon_irqdest_host_ext_f(u32 v) | ||
202 | { | ||
203 | return (v & 0xff) << 8; | ||
204 | } | ||
205 | static inline u32 pwr_falcon_irqdest_target_gptmr_f(u32 v) | ||
206 | { | ||
207 | return (v & 0x1) << 16; | ||
208 | } | ||
209 | static inline u32 pwr_falcon_irqdest_target_wdtmr_f(u32 v) | ||
210 | { | ||
211 | return (v & 0x1) << 17; | ||
212 | } | ||
213 | static inline u32 pwr_falcon_irqdest_target_mthd_f(u32 v) | ||
214 | { | ||
215 | return (v & 0x1) << 18; | ||
216 | } | ||
217 | static inline u32 pwr_falcon_irqdest_target_ctxsw_f(u32 v) | ||
218 | { | ||
219 | return (v & 0x1) << 19; | ||
220 | } | ||
221 | static inline u32 pwr_falcon_irqdest_target_halt_f(u32 v) | ||
222 | { | ||
223 | return (v & 0x1) << 20; | ||
224 | } | ||
225 | static inline u32 pwr_falcon_irqdest_target_exterr_f(u32 v) | ||
226 | { | ||
227 | return (v & 0x1) << 21; | ||
228 | } | ||
229 | static inline u32 pwr_falcon_irqdest_target_swgen0_f(u32 v) | ||
230 | { | ||
231 | return (v & 0x1) << 22; | ||
232 | } | ||
233 | static inline u32 pwr_falcon_irqdest_target_swgen1_f(u32 v) | ||
234 | { | ||
235 | return (v & 0x1) << 23; | ||
236 | } | ||
237 | static inline u32 pwr_falcon_irqdest_target_ext_f(u32 v) | ||
238 | { | ||
239 | return (v & 0xff) << 24; | ||
240 | } | ||
241 | static inline u32 pwr_falcon_curctx_r(void) | ||
242 | { | ||
243 | return 0x0010a050; | ||
244 | } | ||
245 | static inline u32 pwr_falcon_nxtctx_r(void) | ||
246 | { | ||
247 | return 0x0010a054; | ||
248 | } | ||
249 | static inline u32 pwr_falcon_mailbox0_r(void) | ||
250 | { | ||
251 | return 0x0010a040; | ||
252 | } | ||
253 | static inline u32 pwr_falcon_mailbox1_r(void) | ||
254 | { | ||
255 | return 0x0010a044; | ||
256 | } | ||
257 | static inline u32 pwr_falcon_itfen_r(void) | ||
258 | { | ||
259 | return 0x0010a048; | ||
260 | } | ||
261 | static inline u32 pwr_falcon_itfen_ctxen_enable_f(void) | ||
262 | { | ||
263 | return 0x1; | ||
264 | } | ||
265 | static inline u32 pwr_falcon_idlestate_r(void) | ||
266 | { | ||
267 | return 0x0010a04c; | ||
268 | } | ||
269 | static inline u32 pwr_falcon_idlestate_falcon_busy_v(u32 r) | ||
270 | { | ||
271 | return (r >> 0) & 0x1; | ||
272 | } | ||
273 | static inline u32 pwr_falcon_idlestate_ext_busy_v(u32 r) | ||
274 | { | ||
275 | return (r >> 1) & 0x7fff; | ||
276 | } | ||
277 | static inline u32 pwr_falcon_os_r(void) | ||
278 | { | ||
279 | return 0x0010a080; | ||
280 | } | ||
281 | static inline u32 pwr_falcon_engctl_r(void) | ||
282 | { | ||
283 | return 0x0010a0a4; | ||
284 | } | ||
285 | static inline u32 pwr_falcon_cpuctl_r(void) | ||
286 | { | ||
287 | return 0x0010a100; | ||
288 | } | ||
289 | static inline u32 pwr_falcon_cpuctl_startcpu_f(u32 v) | ||
290 | { | ||
291 | return (v & 0x1) << 1; | ||
292 | } | ||
293 | static inline u32 pwr_falcon_bootvec_r(void) | ||
294 | { | ||
295 | return 0x0010a104; | ||
296 | } | ||
297 | static inline u32 pwr_falcon_bootvec_vec_f(u32 v) | ||
298 | { | ||
299 | return (v & 0xffffffff) << 0; | ||
300 | } | ||
301 | static inline u32 pwr_falcon_dmactl_r(void) | ||
302 | { | ||
303 | return 0x0010a10c; | ||
304 | } | ||
305 | static inline u32 pwr_falcon_dmactl_dmem_scrubbing_m(void) | ||
306 | { | ||
307 | return 0x1 << 1; | ||
308 | } | ||
309 | static inline u32 pwr_falcon_dmactl_imem_scrubbing_m(void) | ||
310 | { | ||
311 | return 0x1 << 2; | ||
312 | } | ||
313 | static inline u32 pwr_falcon_hwcfg_r(void) | ||
314 | { | ||
315 | return 0x0010a108; | ||
316 | } | ||
317 | static inline u32 pwr_falcon_hwcfg_imem_size_v(u32 r) | ||
318 | { | ||
319 | return (r >> 0) & 0x1ff; | ||
320 | } | ||
321 | static inline u32 pwr_falcon_hwcfg_dmem_size_v(u32 r) | ||
322 | { | ||
323 | return (r >> 9) & 0x1ff; | ||
324 | } | ||
325 | static inline u32 pwr_falcon_dmatrfbase_r(void) | ||
326 | { | ||
327 | return 0x0010a110; | ||
328 | } | ||
329 | static inline u32 pwr_falcon_dmatrfmoffs_r(void) | ||
330 | { | ||
331 | return 0x0010a114; | ||
332 | } | ||
333 | static inline u32 pwr_falcon_dmatrfcmd_r(void) | ||
334 | { | ||
335 | return 0x0010a118; | ||
336 | } | ||
337 | static inline u32 pwr_falcon_dmatrfcmd_imem_f(u32 v) | ||
338 | { | ||
339 | return (v & 0x1) << 4; | ||
340 | } | ||
341 | static inline u32 pwr_falcon_dmatrfcmd_write_f(u32 v) | ||
342 | { | ||
343 | return (v & 0x1) << 5; | ||
344 | } | ||
345 | static inline u32 pwr_falcon_dmatrfcmd_size_f(u32 v) | ||
346 | { | ||
347 | return (v & 0x7) << 8; | ||
348 | } | ||
349 | static inline u32 pwr_falcon_dmatrfcmd_ctxdma_f(u32 v) | ||
350 | { | ||
351 | return (v & 0x7) << 12; | ||
352 | } | ||
353 | static inline u32 pwr_falcon_dmatrffboffs_r(void) | ||
354 | { | ||
355 | return 0x0010a11c; | ||
356 | } | ||
357 | static inline u32 pwr_falcon_exterraddr_r(void) | ||
358 | { | ||
359 | return 0x0010a168; | ||
360 | } | ||
361 | static inline u32 pwr_falcon_exterrstat_r(void) | ||
362 | { | ||
363 | return 0x0010a16c; | ||
364 | } | ||
365 | static inline u32 pwr_falcon_exterrstat_valid_m(void) | ||
366 | { | ||
367 | return 0x1 << 31; | ||
368 | } | ||
369 | static inline u32 pwr_falcon_exterrstat_valid_v(u32 r) | ||
370 | { | ||
371 | return (r >> 31) & 0x1; | ||
372 | } | ||
373 | static inline u32 pwr_falcon_exterrstat_valid_true_v(void) | ||
374 | { | ||
375 | return 0x00000001; | ||
376 | } | ||
377 | static inline u32 pwr_pmu_falcon_icd_cmd_r(void) | ||
378 | { | ||
379 | return 0x0010a200; | ||
380 | } | ||
381 | static inline u32 pwr_pmu_falcon_icd_cmd_opc_s(void) | ||
382 | { | ||
383 | return 4; | ||
384 | } | ||
385 | static inline u32 pwr_pmu_falcon_icd_cmd_opc_f(u32 v) | ||
386 | { | ||
387 | return (v & 0xf) << 0; | ||
388 | } | ||
389 | static inline u32 pwr_pmu_falcon_icd_cmd_opc_m(void) | ||
390 | { | ||
391 | return 0xf << 0; | ||
392 | } | ||
393 | static inline u32 pwr_pmu_falcon_icd_cmd_opc_v(u32 r) | ||
394 | { | ||
395 | return (r >> 0) & 0xf; | ||
396 | } | ||
397 | static inline u32 pwr_pmu_falcon_icd_cmd_opc_rreg_f(void) | ||
398 | { | ||
399 | return 0x8; | ||
400 | } | ||
401 | static inline u32 pwr_pmu_falcon_icd_cmd_opc_rstat_f(void) | ||
402 | { | ||
403 | return 0xe; | ||
404 | } | ||
405 | static inline u32 pwr_pmu_falcon_icd_cmd_idx_f(u32 v) | ||
406 | { | ||
407 | return (v & 0x1f) << 8; | ||
408 | } | ||
409 | static inline u32 pwr_pmu_falcon_icd_rdata_r(void) | ||
410 | { | ||
411 | return 0x0010a20c; | ||
412 | } | ||
413 | static inline u32 pwr_falcon_dmemc_r(u32 i) | ||
414 | { | ||
415 | return 0x0010a1c0 + i*8; | ||
416 | } | ||
417 | static inline u32 pwr_falcon_dmemc_offs_f(u32 v) | ||
418 | { | ||
419 | return (v & 0x3f) << 2; | ||
420 | } | ||
421 | static inline u32 pwr_falcon_dmemc_offs_m(void) | ||
422 | { | ||
423 | return 0x3f << 2; | ||
424 | } | ||
425 | static inline u32 pwr_falcon_dmemc_blk_f(u32 v) | ||
426 | { | ||
427 | return (v & 0xff) << 8; | ||
428 | } | ||
429 | static inline u32 pwr_falcon_dmemc_blk_m(void) | ||
430 | { | ||
431 | return 0xff << 8; | ||
432 | } | ||
433 | static inline u32 pwr_falcon_dmemc_aincw_f(u32 v) | ||
434 | { | ||
435 | return (v & 0x1) << 24; | ||
436 | } | ||
437 | static inline u32 pwr_falcon_dmemc_aincr_f(u32 v) | ||
438 | { | ||
439 | return (v & 0x1) << 25; | ||
440 | } | ||
441 | static inline u32 pwr_falcon_dmemd_r(u32 i) | ||
442 | { | ||
443 | return 0x0010a1c4 + i*8; | ||
444 | } | ||
445 | static inline u32 pwr_pmu_new_instblk_r(void) | ||
446 | { | ||
447 | return 0x0010a480; | ||
448 | } | ||
449 | static inline u32 pwr_pmu_new_instblk_ptr_f(u32 v) | ||
450 | { | ||
451 | return (v & 0xfffffff) << 0; | ||
452 | } | ||
453 | static inline u32 pwr_pmu_new_instblk_target_fb_f(void) | ||
454 | { | ||
455 | return 0x0; | ||
456 | } | ||
457 | static inline u32 pwr_pmu_new_instblk_target_sys_coh_f(void) | ||
458 | { | ||
459 | return 0x20000000; | ||
460 | } | ||
461 | static inline u32 pwr_pmu_new_instblk_valid_f(u32 v) | ||
462 | { | ||
463 | return (v & 0x1) << 30; | ||
464 | } | ||
465 | static inline u32 pwr_pmu_mutex_id_r(void) | ||
466 | { | ||
467 | return 0x0010a488; | ||
468 | } | ||
469 | static inline u32 pwr_pmu_mutex_id_value_v(u32 r) | ||
470 | { | ||
471 | return (r >> 0) & 0xff; | ||
472 | } | ||
473 | static inline u32 pwr_pmu_mutex_id_value_init_v(void) | ||
474 | { | ||
475 | return 0x00000000; | ||
476 | } | ||
477 | static inline u32 pwr_pmu_mutex_id_value_not_avail_v(void) | ||
478 | { | ||
479 | return 0x000000ff; | ||
480 | } | ||
481 | static inline u32 pwr_pmu_mutex_id_release_r(void) | ||
482 | { | ||
483 | return 0x0010a48c; | ||
484 | } | ||
485 | static inline u32 pwr_pmu_mutex_id_release_value_f(u32 v) | ||
486 | { | ||
487 | return (v & 0xff) << 0; | ||
488 | } | ||
489 | static inline u32 pwr_pmu_mutex_id_release_value_m(void) | ||
490 | { | ||
491 | return 0xff << 0; | ||
492 | } | ||
493 | static inline u32 pwr_pmu_mutex_id_release_value_init_v(void) | ||
494 | { | ||
495 | return 0x00000000; | ||
496 | } | ||
497 | static inline u32 pwr_pmu_mutex_id_release_value_init_f(void) | ||
498 | { | ||
499 | return 0x0; | ||
500 | } | ||
501 | static inline u32 pwr_pmu_mutex_r(u32 i) | ||
502 | { | ||
503 | return 0x0010a580 + i*4; | ||
504 | } | ||
505 | static inline u32 pwr_pmu_mutex__size_1_v(void) | ||
506 | { | ||
507 | return 0x00000010; | ||
508 | } | ||
509 | static inline u32 pwr_pmu_mutex_value_f(u32 v) | ||
510 | { | ||
511 | return (v & 0xff) << 0; | ||
512 | } | ||
513 | static inline u32 pwr_pmu_mutex_value_v(u32 r) | ||
514 | { | ||
515 | return (r >> 0) & 0xff; | ||
516 | } | ||
517 | static inline u32 pwr_pmu_mutex_value_initial_lock_f(void) | ||
518 | { | ||
519 | return 0x0; | ||
520 | } | ||
521 | static inline u32 pwr_pmu_queue_head_r(u32 i) | ||
522 | { | ||
523 | return 0x0010a4a0 + i*4; | ||
524 | } | ||
525 | static inline u32 pwr_pmu_queue_head__size_1_v(void) | ||
526 | { | ||
527 | return 0x00000004; | ||
528 | } | ||
529 | static inline u32 pwr_pmu_queue_head_address_f(u32 v) | ||
530 | { | ||
531 | return (v & 0xffffffff) << 0; | ||
532 | } | ||
533 | static inline u32 pwr_pmu_queue_head_address_v(u32 r) | ||
534 | { | ||
535 | return (r >> 0) & 0xffffffff; | ||
536 | } | ||
537 | static inline u32 pwr_pmu_queue_tail_r(u32 i) | ||
538 | { | ||
539 | return 0x0010a4b0 + i*4; | ||
540 | } | ||
541 | static inline u32 pwr_pmu_queue_tail__size_1_v(void) | ||
542 | { | ||
543 | return 0x00000004; | ||
544 | } | ||
545 | static inline u32 pwr_pmu_queue_tail_address_f(u32 v) | ||
546 | { | ||
547 | return (v & 0xffffffff) << 0; | ||
548 | } | ||
549 | static inline u32 pwr_pmu_queue_tail_address_v(u32 r) | ||
550 | { | ||
551 | return (r >> 0) & 0xffffffff; | ||
552 | } | ||
553 | static inline u32 pwr_pmu_msgq_head_r(void) | ||
554 | { | ||
555 | return 0x0010a4c8; | ||
556 | } | ||
557 | static inline u32 pwr_pmu_msgq_head_val_f(u32 v) | ||
558 | { | ||
559 | return (v & 0xffffffff) << 0; | ||
560 | } | ||
561 | static inline u32 pwr_pmu_msgq_head_val_v(u32 r) | ||
562 | { | ||
563 | return (r >> 0) & 0xffffffff; | ||
564 | } | ||
565 | static inline u32 pwr_pmu_msgq_tail_r(void) | ||
566 | { | ||
567 | return 0x0010a4cc; | ||
568 | } | ||
569 | static inline u32 pwr_pmu_msgq_tail_val_f(u32 v) | ||
570 | { | ||
571 | return (v & 0xffffffff) << 0; | ||
572 | } | ||
573 | static inline u32 pwr_pmu_msgq_tail_val_v(u32 r) | ||
574 | { | ||
575 | return (r >> 0) & 0xffffffff; | ||
576 | } | ||
577 | static inline u32 pwr_pmu_idle_mask_r(u32 i) | ||
578 | { | ||
579 | return 0x0010a504 + i*16; | ||
580 | } | ||
581 | static inline u32 pwr_pmu_idle_mask_gr_enabled_f(void) | ||
582 | { | ||
583 | return 0x1; | ||
584 | } | ||
585 | static inline u32 pwr_pmu_idle_mask_ce_2_enabled_f(void) | ||
586 | { | ||
587 | return 0x200000; | ||
588 | } | ||
589 | static inline u32 pwr_pmu_idle_count_r(u32 i) | ||
590 | { | ||
591 | return 0x0010a508 + i*16; | ||
592 | } | ||
593 | static inline u32 pwr_pmu_idle_count_value_f(u32 v) | ||
594 | { | ||
595 | return (v & 0x7fffffff) << 0; | ||
596 | } | ||
597 | static inline u32 pwr_pmu_idle_count_value_v(u32 r) | ||
598 | { | ||
599 | return (r >> 0) & 0x7fffffff; | ||
600 | } | ||
601 | static inline u32 pwr_pmu_idle_count_reset_f(u32 v) | ||
602 | { | ||
603 | return (v & 0x1) << 31; | ||
604 | } | ||
605 | static inline u32 pwr_pmu_idle_ctrl_r(u32 i) | ||
606 | { | ||
607 | return 0x0010a50c + i*16; | ||
608 | } | ||
609 | static inline u32 pwr_pmu_idle_ctrl_value_m(void) | ||
610 | { | ||
611 | return 0x3 << 0; | ||
612 | } | ||
613 | static inline u32 pwr_pmu_idle_ctrl_value_busy_f(void) | ||
614 | { | ||
615 | return 0x2; | ||
616 | } | ||
617 | static inline u32 pwr_pmu_idle_ctrl_value_always_f(void) | ||
618 | { | ||
619 | return 0x3; | ||
620 | } | ||
621 | static inline u32 pwr_pmu_idle_ctrl_filter_m(void) | ||
622 | { | ||
623 | return 0x1 << 2; | ||
624 | } | ||
625 | static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) | ||
626 | { | ||
627 | return 0x0; | ||
628 | } | ||
629 | static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) | ||
630 | { | ||
631 | return 0x0010a9f0 + i*8; | ||
632 | } | ||
633 | static inline u32 pwr_pmu_idle_mask_1_supp_r(u32 i) | ||
634 | { | ||
635 | return 0x0010a9f4 + i*8; | ||
636 | } | ||
637 | static inline u32 pwr_pmu_idle_ctrl_supp_r(u32 i) | ||
638 | { | ||
639 | return 0x0010aa30 + i*8; | ||
640 | } | ||
641 | static inline u32 pwr_pmu_debug_r(u32 i) | ||
642 | { | ||
643 | return 0x0010a5c0 + i*4; | ||
644 | } | ||
645 | static inline u32 pwr_pmu_debug__size_1_v(void) | ||
646 | { | ||
647 | return 0x00000004; | ||
648 | } | ||
649 | static inline u32 pwr_pmu_mailbox_r(u32 i) | ||
650 | { | ||
651 | return 0x0010a450 + i*4; | ||
652 | } | ||
653 | static inline u32 pwr_pmu_mailbox__size_1_v(void) | ||
654 | { | ||
655 | return 0x0000000c; | ||
656 | } | ||
657 | static inline u32 pwr_pmu_bar0_addr_r(void) | ||
658 | { | ||
659 | return 0x0010a7a0; | ||
660 | } | ||
661 | static inline u32 pwr_pmu_bar0_data_r(void) | ||
662 | { | ||
663 | return 0x0010a7a4; | ||
664 | } | ||
665 | static inline u32 pwr_pmu_bar0_ctl_r(void) | ||
666 | { | ||
667 | return 0x0010a7ac; | ||
668 | } | ||
669 | static inline u32 pwr_pmu_bar0_timeout_r(void) | ||
670 | { | ||
671 | return 0x0010a7a8; | ||
672 | } | ||
673 | static inline u32 pwr_pmu_bar0_fecs_error_r(void) | ||
674 | { | ||
675 | return 0x0010a988; | ||
676 | } | ||
677 | static inline u32 pwr_pmu_bar0_error_status_r(void) | ||
678 | { | ||
679 | return 0x0010a7b0; | ||
680 | } | ||
681 | static inline u32 pwr_pmu_pg_idlefilth_r(u32 i) | ||
682 | { | ||
683 | return 0x0010a6c0 + i*4; | ||
684 | } | ||
685 | static inline u32 pwr_pmu_pg_ppuidlefilth_r(u32 i) | ||
686 | { | ||
687 | return 0x0010a6e8 + i*4; | ||
688 | } | ||
689 | static inline u32 pwr_pmu_pg_idle_cnt_r(u32 i) | ||
690 | { | ||
691 | return 0x0010a710 + i*4; | ||
692 | } | ||
693 | static inline u32 pwr_pmu_pg_intren_r(u32 i) | ||
694 | { | ||
695 | return 0x0010a760 + i*4; | ||
696 | } | ||
697 | static inline u32 pwr_fbif_transcfg_r(u32 i) | ||
698 | { | ||
699 | return 0x0010a600 + i*4; | ||
700 | } | ||
701 | static inline u32 pwr_fbif_transcfg_target_local_fb_f(void) | ||
702 | { | ||
703 | return 0x0; | ||
704 | } | ||
705 | static inline u32 pwr_fbif_transcfg_target_coherent_sysmem_f(void) | ||
706 | { | ||
707 | return 0x1; | ||
708 | } | ||
709 | static inline u32 pwr_fbif_transcfg_target_noncoherent_sysmem_f(void) | ||
710 | { | ||
711 | return 0x2; | ||
712 | } | ||
713 | static inline u32 pwr_fbif_transcfg_mem_type_s(void) | ||
714 | { | ||
715 | return 1; | ||
716 | } | ||
717 | static inline u32 pwr_fbif_transcfg_mem_type_f(u32 v) | ||
718 | { | ||
719 | return (v & 0x1) << 2; | ||
720 | } | ||
721 | static inline u32 pwr_fbif_transcfg_mem_type_m(void) | ||
722 | { | ||
723 | return 0x1 << 2; | ||
724 | } | ||
725 | static inline u32 pwr_fbif_transcfg_mem_type_v(u32 r) | ||
726 | { | ||
727 | return (r >> 2) & 0x1; | ||
728 | } | ||
729 | static inline u32 pwr_fbif_transcfg_mem_type_virtual_f(void) | ||
730 | { | ||
731 | return 0x0; | ||
732 | } | ||
733 | static inline u32 pwr_fbif_transcfg_mem_type_physical_f(void) | ||
734 | { | ||
735 | return 0x4; | ||
736 | } | ||
737 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h new file mode 100644 index 00000000..7eff3881 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h | |||
@@ -0,0 +1,389 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_ram_gk20a_h_ | ||
51 | #define _hw_ram_gk20a_h_ | ||
52 | |||
53 | static inline u32 ram_in_ramfc_s(void) | ||
54 | { | ||
55 | return 4096; | ||
56 | } | ||
57 | static inline u32 ram_in_ramfc_w(void) | ||
58 | { | ||
59 | return 0; | ||
60 | } | ||
61 | static inline u32 ram_in_page_dir_base_target_f(u32 v) | ||
62 | { | ||
63 | return (v & 0x3) << 0; | ||
64 | } | ||
65 | static inline u32 ram_in_page_dir_base_target_w(void) | ||
66 | { | ||
67 | return 128; | ||
68 | } | ||
69 | static inline u32 ram_in_page_dir_base_target_vid_mem_f(void) | ||
70 | { | ||
71 | return 0x0; | ||
72 | } | ||
73 | static inline u32 ram_in_page_dir_base_vol_w(void) | ||
74 | { | ||
75 | return 128; | ||
76 | } | ||
77 | static inline u32 ram_in_page_dir_base_vol_true_f(void) | ||
78 | { | ||
79 | return 0x4; | ||
80 | } | ||
81 | static inline u32 ram_in_page_dir_base_lo_f(u32 v) | ||
82 | { | ||
83 | return (v & 0xfffff) << 12; | ||
84 | } | ||
85 | static inline u32 ram_in_page_dir_base_lo_w(void) | ||
86 | { | ||
87 | return 128; | ||
88 | } | ||
89 | static inline u32 ram_in_page_dir_base_hi_f(u32 v) | ||
90 | { | ||
91 | return (v & 0xff) << 0; | ||
92 | } | ||
93 | static inline u32 ram_in_page_dir_base_hi_w(void) | ||
94 | { | ||
95 | return 129; | ||
96 | } | ||
97 | static inline u32 ram_in_adr_limit_lo_f(u32 v) | ||
98 | { | ||
99 | return (v & 0xfffff) << 12; | ||
100 | } | ||
101 | static inline u32 ram_in_adr_limit_lo_w(void) | ||
102 | { | ||
103 | return 130; | ||
104 | } | ||
105 | static inline u32 ram_in_adr_limit_hi_f(u32 v) | ||
106 | { | ||
107 | return (v & 0xff) << 0; | ||
108 | } | ||
109 | static inline u32 ram_in_adr_limit_hi_w(void) | ||
110 | { | ||
111 | return 131; | ||
112 | } | ||
113 | static inline u32 ram_in_engine_cs_w(void) | ||
114 | { | ||
115 | return 132; | ||
116 | } | ||
117 | static inline u32 ram_in_engine_cs_wfi_v(void) | ||
118 | { | ||
119 | return 0x00000000; | ||
120 | } | ||
121 | static inline u32 ram_in_engine_cs_wfi_f(void) | ||
122 | { | ||
123 | return 0x0; | ||
124 | } | ||
125 | static inline u32 ram_in_engine_cs_fg_v(void) | ||
126 | { | ||
127 | return 0x00000001; | ||
128 | } | ||
129 | static inline u32 ram_in_engine_cs_fg_f(void) | ||
130 | { | ||
131 | return 0x8; | ||
132 | } | ||
133 | static inline u32 ram_in_gr_cs_w(void) | ||
134 | { | ||
135 | return 132; | ||
136 | } | ||
137 | static inline u32 ram_in_gr_cs_wfi_f(void) | ||
138 | { | ||
139 | return 0x0; | ||
140 | } | ||
141 | static inline u32 ram_in_gr_wfi_target_w(void) | ||
142 | { | ||
143 | return 132; | ||
144 | } | ||
145 | static inline u32 ram_in_gr_wfi_mode_w(void) | ||
146 | { | ||
147 | return 132; | ||
148 | } | ||
149 | static inline u32 ram_in_gr_wfi_mode_physical_v(void) | ||
150 | { | ||
151 | return 0x00000000; | ||
152 | } | ||
153 | static inline u32 ram_in_gr_wfi_mode_physical_f(void) | ||
154 | { | ||
155 | return 0x0; | ||
156 | } | ||
157 | static inline u32 ram_in_gr_wfi_mode_virtual_v(void) | ||
158 | { | ||
159 | return 0x00000001; | ||
160 | } | ||
161 | static inline u32 ram_in_gr_wfi_mode_virtual_f(void) | ||
162 | { | ||
163 | return 0x4; | ||
164 | } | ||
165 | static inline u32 ram_in_gr_wfi_ptr_lo_f(u32 v) | ||
166 | { | ||
167 | return (v & 0xfffff) << 12; | ||
168 | } | ||
169 | static inline u32 ram_in_gr_wfi_ptr_lo_w(void) | ||
170 | { | ||
171 | return 132; | ||
172 | } | ||
173 | static inline u32 ram_in_gr_wfi_ptr_hi_f(u32 v) | ||
174 | { | ||
175 | return (v & 0xff) << 0; | ||
176 | } | ||
177 | static inline u32 ram_in_gr_wfi_ptr_hi_w(void) | ||
178 | { | ||
179 | return 133; | ||
180 | } | ||
181 | static inline u32 ram_in_base_shift_v(void) | ||
182 | { | ||
183 | return 0x0000000c; | ||
184 | } | ||
185 | static inline u32 ram_in_alloc_size_v(void) | ||
186 | { | ||
187 | return 0x00001000; | ||
188 | } | ||
189 | static inline u32 ram_fc_size_val_v(void) | ||
190 | { | ||
191 | return 0x00000200; | ||
192 | } | ||
193 | static inline u32 ram_fc_gp_put_w(void) | ||
194 | { | ||
195 | return 0; | ||
196 | } | ||
197 | static inline u32 ram_fc_userd_w(void) | ||
198 | { | ||
199 | return 2; | ||
200 | } | ||
201 | static inline u32 ram_fc_userd_hi_w(void) | ||
202 | { | ||
203 | return 3; | ||
204 | } | ||
205 | static inline u32 ram_fc_signature_w(void) | ||
206 | { | ||
207 | return 4; | ||
208 | } | ||
209 | static inline u32 ram_fc_gp_get_w(void) | ||
210 | { | ||
211 | return 5; | ||
212 | } | ||
213 | static inline u32 ram_fc_pb_get_w(void) | ||
214 | { | ||
215 | return 6; | ||
216 | } | ||
217 | static inline u32 ram_fc_pb_get_hi_w(void) | ||
218 | { | ||
219 | return 7; | ||
220 | } | ||
221 | static inline u32 ram_fc_pb_top_level_get_w(void) | ||
222 | { | ||
223 | return 8; | ||
224 | } | ||
225 | static inline u32 ram_fc_pb_top_level_get_hi_w(void) | ||
226 | { | ||
227 | return 9; | ||
228 | } | ||
229 | static inline u32 ram_fc_acquire_w(void) | ||
230 | { | ||
231 | return 12; | ||
232 | } | ||
233 | static inline u32 ram_fc_semaphorea_w(void) | ||
234 | { | ||
235 | return 14; | ||
236 | } | ||
237 | static inline u32 ram_fc_semaphoreb_w(void) | ||
238 | { | ||
239 | return 15; | ||
240 | } | ||
241 | static inline u32 ram_fc_semaphorec_w(void) | ||
242 | { | ||
243 | return 16; | ||
244 | } | ||
245 | static inline u32 ram_fc_semaphored_w(void) | ||
246 | { | ||
247 | return 17; | ||
248 | } | ||
249 | static inline u32 ram_fc_gp_base_w(void) | ||
250 | { | ||
251 | return 18; | ||
252 | } | ||
253 | static inline u32 ram_fc_gp_base_hi_w(void) | ||
254 | { | ||
255 | return 19; | ||
256 | } | ||
257 | static inline u32 ram_fc_gp_fetch_w(void) | ||
258 | { | ||
259 | return 20; | ||
260 | } | ||
261 | static inline u32 ram_fc_pb_fetch_w(void) | ||
262 | { | ||
263 | return 21; | ||
264 | } | ||
265 | static inline u32 ram_fc_pb_fetch_hi_w(void) | ||
266 | { | ||
267 | return 22; | ||
268 | } | ||
269 | static inline u32 ram_fc_pb_put_w(void) | ||
270 | { | ||
271 | return 23; | ||
272 | } | ||
273 | static inline u32 ram_fc_pb_put_hi_w(void) | ||
274 | { | ||
275 | return 24; | ||
276 | } | ||
277 | static inline u32 ram_fc_pb_header_w(void) | ||
278 | { | ||
279 | return 33; | ||
280 | } | ||
281 | static inline u32 ram_fc_pb_count_w(void) | ||
282 | { | ||
283 | return 34; | ||
284 | } | ||
285 | static inline u32 ram_fc_subdevice_w(void) | ||
286 | { | ||
287 | return 37; | ||
288 | } | ||
289 | static inline u32 ram_fc_formats_w(void) | ||
290 | { | ||
291 | return 39; | ||
292 | } | ||
293 | static inline u32 ram_fc_syncpointa_w(void) | ||
294 | { | ||
295 | return 41; | ||
296 | } | ||
297 | static inline u32 ram_fc_syncpointb_w(void) | ||
298 | { | ||
299 | return 42; | ||
300 | } | ||
301 | static inline u32 ram_fc_target_w(void) | ||
302 | { | ||
303 | return 43; | ||
304 | } | ||
305 | static inline u32 ram_fc_hce_ctrl_w(void) | ||
306 | { | ||
307 | return 57; | ||
308 | } | ||
309 | static inline u32 ram_fc_chid_w(void) | ||
310 | { | ||
311 | return 58; | ||
312 | } | ||
313 | static inline u32 ram_fc_chid_id_f(u32 v) | ||
314 | { | ||
315 | return (v & 0xfff) << 0; | ||
316 | } | ||
317 | static inline u32 ram_fc_chid_id_w(void) | ||
318 | { | ||
319 | return 0; | ||
320 | } | ||
321 | static inline u32 ram_fc_eng_timeslice_w(void) | ||
322 | { | ||
323 | return 62; | ||
324 | } | ||
325 | static inline u32 ram_fc_pb_timeslice_w(void) | ||
326 | { | ||
327 | return 63; | ||
328 | } | ||
329 | static inline u32 ram_userd_base_shift_v(void) | ||
330 | { | ||
331 | return 0x00000009; | ||
332 | } | ||
333 | static inline u32 ram_userd_chan_size_v(void) | ||
334 | { | ||
335 | return 0x00000200; | ||
336 | } | ||
337 | static inline u32 ram_userd_put_w(void) | ||
338 | { | ||
339 | return 16; | ||
340 | } | ||
341 | static inline u32 ram_userd_get_w(void) | ||
342 | { | ||
343 | return 17; | ||
344 | } | ||
345 | static inline u32 ram_userd_ref_w(void) | ||
346 | { | ||
347 | return 18; | ||
348 | } | ||
349 | static inline u32 ram_userd_put_hi_w(void) | ||
350 | { | ||
351 | return 19; | ||
352 | } | ||
353 | static inline u32 ram_userd_ref_threshold_w(void) | ||
354 | { | ||
355 | return 20; | ||
356 | } | ||
357 | static inline u32 ram_userd_top_level_get_w(void) | ||
358 | { | ||
359 | return 22; | ||
360 | } | ||
361 | static inline u32 ram_userd_top_level_get_hi_w(void) | ||
362 | { | ||
363 | return 23; | ||
364 | } | ||
365 | static inline u32 ram_userd_get_hi_w(void) | ||
366 | { | ||
367 | return 24; | ||
368 | } | ||
369 | static inline u32 ram_userd_gp_get_w(void) | ||
370 | { | ||
371 | return 34; | ||
372 | } | ||
373 | static inline u32 ram_userd_gp_put_w(void) | ||
374 | { | ||
375 | return 35; | ||
376 | } | ||
377 | static inline u32 ram_userd_gp_top_level_get_w(void) | ||
378 | { | ||
379 | return 22; | ||
380 | } | ||
381 | static inline u32 ram_userd_gp_top_level_get_hi_w(void) | ||
382 | { | ||
383 | return 23; | ||
384 | } | ||
385 | static inline u32 ram_rl_entry_size_v(void) | ||
386 | { | ||
387 | return 0x00000008; | ||
388 | } | ||
389 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h new file mode 100644 index 00000000..b1e6658d --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h | |||
@@ -0,0 +1,2150 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/hw_sim_gk20a.h | ||
3 | * | ||
4 | * Copyright (c) 2012, NVIDIA Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | * | ||
18 | */ | ||
19 | |||
20 | /* | ||
21 | * Function naming determines intended use: | ||
22 | * | ||
23 | * <x>_r(void) : Returns the offset for register <x>. | ||
24 | * | ||
25 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
26 | * | ||
27 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
28 | * | ||
29 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
30 | * and masked to place it at field <y> of register <x>. This value | ||
31 | * can be |'d with others to produce a full register value for | ||
32 | * register <x>. | ||
33 | * | ||
34 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
35 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
36 | * register <x>. | ||
37 | * | ||
38 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
39 | * to place it at field <y> of register <x>. This value can be |'d | ||
40 | * with others to produce a full register value for <x>. | ||
41 | * | ||
42 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
43 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
44 | * This value is suitable for direct comparison with other unshifted | ||
45 | * values appropriate for use in field <y> of register <x>. | ||
46 | * | ||
47 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
48 | * field <y> of register <x>. This value is suitable for direct | ||
49 | * comparison with unshifted values appropriate for use in field <y> | ||
50 | * of register <x>. | ||
51 | */ | ||
52 | |||
53 | #ifndef __hw_sim_gk20a_h__ | ||
54 | #define __hw_sim_gk20a_h__ | ||
55 | /*This file is autogenerated. Do not edit. */ | ||
56 | |||
57 | static inline u32 sim_send_ring_r(void) | ||
58 | { | ||
59 | return 0x00000000; | ||
60 | } | ||
61 | static inline u32 sim_send_ring_target_s(void) | ||
62 | { | ||
63 | return 2; | ||
64 | } | ||
65 | static inline u32 sim_send_ring_target_f(u32 v) | ||
66 | { | ||
67 | return (v & 0x3) << 0; | ||
68 | } | ||
69 | static inline u32 sim_send_ring_target_m(void) | ||
70 | { | ||
71 | return 0x3 << 0; | ||
72 | } | ||
73 | static inline u32 sim_send_ring_target_v(u32 r) | ||
74 | { | ||
75 | return (r >> 0) & 0x3; | ||
76 | } | ||
77 | static inline u32 sim_send_ring_target_phys_init_v(void) | ||
78 | { | ||
79 | return 0x00000001; | ||
80 | } | ||
81 | static inline u32 sim_send_ring_target_phys_init_f(void) | ||
82 | { | ||
83 | return 0x1; | ||
84 | } | ||
85 | static inline u32 sim_send_ring_target_phys__init_v(void) | ||
86 | { | ||
87 | return 0x00000001; | ||
88 | } | ||
89 | static inline u32 sim_send_ring_target_phys__init_f(void) | ||
90 | { | ||
91 | return 0x1; | ||
92 | } | ||
93 | static inline u32 sim_send_ring_target_phys__prod_v(void) | ||
94 | { | ||
95 | return 0x00000001; | ||
96 | } | ||
97 | static inline u32 sim_send_ring_target_phys__prod_f(void) | ||
98 | { | ||
99 | return 0x1; | ||
100 | } | ||
101 | static inline u32 sim_send_ring_target_phys_nvm_v(void) | ||
102 | { | ||
103 | return 0x00000001; | ||
104 | } | ||
105 | static inline u32 sim_send_ring_target_phys_nvm_f(void) | ||
106 | { | ||
107 | return 0x1; | ||
108 | } | ||
109 | static inline u32 sim_send_ring_target_phys_pci_v(void) | ||
110 | { | ||
111 | return 0x00000002; | ||
112 | } | ||
113 | static inline u32 sim_send_ring_target_phys_pci_f(void) | ||
114 | { | ||
115 | return 0x2; | ||
116 | } | ||
117 | static inline u32 sim_send_ring_target_phys_pci_coherent_v(void) | ||
118 | { | ||
119 | return 0x00000003; | ||
120 | } | ||
121 | static inline u32 sim_send_ring_target_phys_pci_coherent_f(void) | ||
122 | { | ||
123 | return 0x3; | ||
124 | } | ||
125 | static inline u32 sim_send_ring_status_s(void) | ||
126 | { | ||
127 | return 1; | ||
128 | } | ||
129 | static inline u32 sim_send_ring_status_f(u32 v) | ||
130 | { | ||
131 | return (v & 0x1) << 3; | ||
132 | } | ||
133 | static inline u32 sim_send_ring_status_m(void) | ||
134 | { | ||
135 | return 0x1 << 3; | ||
136 | } | ||
137 | static inline u32 sim_send_ring_status_v(u32 r) | ||
138 | { | ||
139 | return (r >> 3) & 0x1; | ||
140 | } | ||
141 | static inline u32 sim_send_ring_status_init_v(void) | ||
142 | { | ||
143 | return 0x00000000; | ||
144 | } | ||
145 | static inline u32 sim_send_ring_status_init_f(void) | ||
146 | { | ||
147 | return 0x0; | ||
148 | } | ||
149 | static inline u32 sim_send_ring_status__init_v(void) | ||
150 | { | ||
151 | return 0x00000000; | ||
152 | } | ||
153 | static inline u32 sim_send_ring_status__init_f(void) | ||
154 | { | ||
155 | return 0x0; | ||
156 | } | ||
157 | static inline u32 sim_send_ring_status__prod_v(void) | ||
158 | { | ||
159 | return 0x00000000; | ||
160 | } | ||
161 | static inline u32 sim_send_ring_status__prod_f(void) | ||
162 | { | ||
163 | return 0x0; | ||
164 | } | ||
165 | static inline u32 sim_send_ring_status_invalid_v(void) | ||
166 | { | ||
167 | return 0x00000000; | ||
168 | } | ||
169 | static inline u32 sim_send_ring_status_invalid_f(void) | ||
170 | { | ||
171 | return 0x0; | ||
172 | } | ||
173 | static inline u32 sim_send_ring_status_valid_v(void) | ||
174 | { | ||
175 | return 0x00000001; | ||
176 | } | ||
177 | static inline u32 sim_send_ring_status_valid_f(void) | ||
178 | { | ||
179 | return 0x8; | ||
180 | } | ||
181 | static inline u32 sim_send_ring_size_s(void) | ||
182 | { | ||
183 | return 2; | ||
184 | } | ||
185 | static inline u32 sim_send_ring_size_f(u32 v) | ||
186 | { | ||
187 | return (v & 0x3) << 4; | ||
188 | } | ||
189 | static inline u32 sim_send_ring_size_m(void) | ||
190 | { | ||
191 | return 0x3 << 4; | ||
192 | } | ||
193 | static inline u32 sim_send_ring_size_v(u32 r) | ||
194 | { | ||
195 | return (r >> 4) & 0x3; | ||
196 | } | ||
197 | static inline u32 sim_send_ring_size_init_v(void) | ||
198 | { | ||
199 | return 0x00000000; | ||
200 | } | ||
201 | static inline u32 sim_send_ring_size_init_f(void) | ||
202 | { | ||
203 | return 0x0; | ||
204 | } | ||
205 | static inline u32 sim_send_ring_size__init_v(void) | ||
206 | { | ||
207 | return 0x00000000; | ||
208 | } | ||
209 | static inline u32 sim_send_ring_size__init_f(void) | ||
210 | { | ||
211 | return 0x0; | ||
212 | } | ||
213 | static inline u32 sim_send_ring_size__prod_v(void) | ||
214 | { | ||
215 | return 0x00000000; | ||
216 | } | ||
217 | static inline u32 sim_send_ring_size__prod_f(void) | ||
218 | { | ||
219 | return 0x0; | ||
220 | } | ||
221 | static inline u32 sim_send_ring_size_4kb_v(void) | ||
222 | { | ||
223 | return 0x00000000; | ||
224 | } | ||
225 | static inline u32 sim_send_ring_size_4kb_f(void) | ||
226 | { | ||
227 | return 0x0; | ||
228 | } | ||
229 | static inline u32 sim_send_ring_size_8kb_v(void) | ||
230 | { | ||
231 | return 0x00000001; | ||
232 | } | ||
233 | static inline u32 sim_send_ring_size_8kb_f(void) | ||
234 | { | ||
235 | return 0x10; | ||
236 | } | ||
237 | static inline u32 sim_send_ring_size_12kb_v(void) | ||
238 | { | ||
239 | return 0x00000002; | ||
240 | } | ||
241 | static inline u32 sim_send_ring_size_12kb_f(void) | ||
242 | { | ||
243 | return 0x20; | ||
244 | } | ||
245 | static inline u32 sim_send_ring_size_16kb_v(void) | ||
246 | { | ||
247 | return 0x00000003; | ||
248 | } | ||
249 | static inline u32 sim_send_ring_size_16kb_f(void) | ||
250 | { | ||
251 | return 0x30; | ||
252 | } | ||
253 | static inline u32 sim_send_ring_gp_in_ring_s(void) | ||
254 | { | ||
255 | return 1; | ||
256 | } | ||
257 | static inline u32 sim_send_ring_gp_in_ring_f(u32 v) | ||
258 | { | ||
259 | return (v & 0x1) << 11; | ||
260 | } | ||
261 | static inline u32 sim_send_ring_gp_in_ring_m(void) | ||
262 | { | ||
263 | return 0x1 << 11; | ||
264 | } | ||
265 | static inline u32 sim_send_ring_gp_in_ring_v(u32 r) | ||
266 | { | ||
267 | return (r >> 11) & 0x1; | ||
268 | } | ||
269 | static inline u32 sim_send_ring_gp_in_ring__init_v(void) | ||
270 | { | ||
271 | return 0x00000000; | ||
272 | } | ||
273 | static inline u32 sim_send_ring_gp_in_ring__init_f(void) | ||
274 | { | ||
275 | return 0x0; | ||
276 | } | ||
277 | static inline u32 sim_send_ring_gp_in_ring__prod_v(void) | ||
278 | { | ||
279 | return 0x00000000; | ||
280 | } | ||
281 | static inline u32 sim_send_ring_gp_in_ring__prod_f(void) | ||
282 | { | ||
283 | return 0x0; | ||
284 | } | ||
285 | static inline u32 sim_send_ring_gp_in_ring_no_v(void) | ||
286 | { | ||
287 | return 0x00000000; | ||
288 | } | ||
289 | static inline u32 sim_send_ring_gp_in_ring_no_f(void) | ||
290 | { | ||
291 | return 0x0; | ||
292 | } | ||
293 | static inline u32 sim_send_ring_gp_in_ring_yes_v(void) | ||
294 | { | ||
295 | return 0x00000001; | ||
296 | } | ||
297 | static inline u32 sim_send_ring_gp_in_ring_yes_f(void) | ||
298 | { | ||
299 | return 0x800; | ||
300 | } | ||
301 | static inline u32 sim_send_ring_addr_lo_s(void) | ||
302 | { | ||
303 | return 20; | ||
304 | } | ||
305 | static inline u32 sim_send_ring_addr_lo_f(u32 v) | ||
306 | { | ||
307 | return (v & 0xfffff) << 12; | ||
308 | } | ||
309 | static inline u32 sim_send_ring_addr_lo_m(void) | ||
310 | { | ||
311 | return 0xfffff << 12; | ||
312 | } | ||
313 | static inline u32 sim_send_ring_addr_lo_v(u32 r) | ||
314 | { | ||
315 | return (r >> 12) & 0xfffff; | ||
316 | } | ||
317 | static inline u32 sim_send_ring_addr_lo__init_v(void) | ||
318 | { | ||
319 | return 0x00000000; | ||
320 | } | ||
321 | static inline u32 sim_send_ring_addr_lo__init_f(void) | ||
322 | { | ||
323 | return 0x0; | ||
324 | } | ||
325 | static inline u32 sim_send_ring_addr_lo__prod_v(void) | ||
326 | { | ||
327 | return 0x00000000; | ||
328 | } | ||
329 | static inline u32 sim_send_ring_addr_lo__prod_f(void) | ||
330 | { | ||
331 | return 0x0; | ||
332 | } | ||
333 | static inline u32 sim_send_ring_hi_r(void) | ||
334 | { | ||
335 | return 0x00000004; | ||
336 | } | ||
337 | static inline u32 sim_send_ring_hi_addr_s(void) | ||
338 | { | ||
339 | return 20; | ||
340 | } | ||
341 | static inline u32 sim_send_ring_hi_addr_f(u32 v) | ||
342 | { | ||
343 | return (v & 0xfffff) << 0; | ||
344 | } | ||
345 | static inline u32 sim_send_ring_hi_addr_m(void) | ||
346 | { | ||
347 | return 0xfffff << 0; | ||
348 | } | ||
349 | static inline u32 sim_send_ring_hi_addr_v(u32 r) | ||
350 | { | ||
351 | return (r >> 0) & 0xfffff; | ||
352 | } | ||
353 | static inline u32 sim_send_ring_hi_addr__init_v(void) | ||
354 | { | ||
355 | return 0x00000000; | ||
356 | } | ||
357 | static inline u32 sim_send_ring_hi_addr__init_f(void) | ||
358 | { | ||
359 | return 0x0; | ||
360 | } | ||
361 | static inline u32 sim_send_ring_hi_addr__prod_v(void) | ||
362 | { | ||
363 | return 0x00000000; | ||
364 | } | ||
365 | static inline u32 sim_send_ring_hi_addr__prod_f(void) | ||
366 | { | ||
367 | return 0x0; | ||
368 | } | ||
369 | static inline u32 sim_send_put_r(void) | ||
370 | { | ||
371 | return 0x00000008; | ||
372 | } | ||
373 | static inline u32 sim_send_put_pointer_s(void) | ||
374 | { | ||
375 | return 29; | ||
376 | } | ||
377 | static inline u32 sim_send_put_pointer_f(u32 v) | ||
378 | { | ||
379 | return (v & 0x1fffffff) << 3; | ||
380 | } | ||
381 | static inline u32 sim_send_put_pointer_m(void) | ||
382 | { | ||
383 | return 0x1fffffff << 3; | ||
384 | } | ||
385 | static inline u32 sim_send_put_pointer_v(u32 r) | ||
386 | { | ||
387 | return (r >> 3) & 0x1fffffff; | ||
388 | } | ||
389 | static inline u32 sim_send_get_r(void) | ||
390 | { | ||
391 | return 0x0000000c; | ||
392 | } | ||
393 | static inline u32 sim_send_get_pointer_s(void) | ||
394 | { | ||
395 | return 29; | ||
396 | } | ||
397 | static inline u32 sim_send_get_pointer_f(u32 v) | ||
398 | { | ||
399 | return (v & 0x1fffffff) << 3; | ||
400 | } | ||
401 | static inline u32 sim_send_get_pointer_m(void) | ||
402 | { | ||
403 | return 0x1fffffff << 3; | ||
404 | } | ||
405 | static inline u32 sim_send_get_pointer_v(u32 r) | ||
406 | { | ||
407 | return (r >> 3) & 0x1fffffff; | ||
408 | } | ||
409 | static inline u32 sim_recv_ring_r(void) | ||
410 | { | ||
411 | return 0x00000010; | ||
412 | } | ||
413 | static inline u32 sim_recv_ring_target_s(void) | ||
414 | { | ||
415 | return 2; | ||
416 | } | ||
417 | static inline u32 sim_recv_ring_target_f(u32 v) | ||
418 | { | ||
419 | return (v & 0x3) << 0; | ||
420 | } | ||
421 | static inline u32 sim_recv_ring_target_m(void) | ||
422 | { | ||
423 | return 0x3 << 0; | ||
424 | } | ||
425 | static inline u32 sim_recv_ring_target_v(u32 r) | ||
426 | { | ||
427 | return (r >> 0) & 0x3; | ||
428 | } | ||
429 | static inline u32 sim_recv_ring_target_phys_init_v(void) | ||
430 | { | ||
431 | return 0x00000001; | ||
432 | } | ||
433 | static inline u32 sim_recv_ring_target_phys_init_f(void) | ||
434 | { | ||
435 | return 0x1; | ||
436 | } | ||
437 | static inline u32 sim_recv_ring_target_phys__init_v(void) | ||
438 | { | ||
439 | return 0x00000001; | ||
440 | } | ||
441 | static inline u32 sim_recv_ring_target_phys__init_f(void) | ||
442 | { | ||
443 | return 0x1; | ||
444 | } | ||
445 | static inline u32 sim_recv_ring_target_phys__prod_v(void) | ||
446 | { | ||
447 | return 0x00000001; | ||
448 | } | ||
449 | static inline u32 sim_recv_ring_target_phys__prod_f(void) | ||
450 | { | ||
451 | return 0x1; | ||
452 | } | ||
453 | static inline u32 sim_recv_ring_target_phys_nvm_v(void) | ||
454 | { | ||
455 | return 0x00000001; | ||
456 | } | ||
457 | static inline u32 sim_recv_ring_target_phys_nvm_f(void) | ||
458 | { | ||
459 | return 0x1; | ||
460 | } | ||
461 | static inline u32 sim_recv_ring_target_phys_pci_v(void) | ||
462 | { | ||
463 | return 0x00000002; | ||
464 | } | ||
465 | static inline u32 sim_recv_ring_target_phys_pci_f(void) | ||
466 | { | ||
467 | return 0x2; | ||
468 | } | ||
469 | static inline u32 sim_recv_ring_target_phys_pci_coherent_v(void) | ||
470 | { | ||
471 | return 0x00000003; | ||
472 | } | ||
473 | static inline u32 sim_recv_ring_target_phys_pci_coherent_f(void) | ||
474 | { | ||
475 | return 0x3; | ||
476 | } | ||
477 | static inline u32 sim_recv_ring_status_s(void) | ||
478 | { | ||
479 | return 1; | ||
480 | } | ||
481 | static inline u32 sim_recv_ring_status_f(u32 v) | ||
482 | { | ||
483 | return (v & 0x1) << 3; | ||
484 | } | ||
485 | static inline u32 sim_recv_ring_status_m(void) | ||
486 | { | ||
487 | return 0x1 << 3; | ||
488 | } | ||
489 | static inline u32 sim_recv_ring_status_v(u32 r) | ||
490 | { | ||
491 | return (r >> 3) & 0x1; | ||
492 | } | ||
493 | static inline u32 sim_recv_ring_status_init_v(void) | ||
494 | { | ||
495 | return 0x00000000; | ||
496 | } | ||
497 | static inline u32 sim_recv_ring_status_init_f(void) | ||
498 | { | ||
499 | return 0x0; | ||
500 | } | ||
501 | static inline u32 sim_recv_ring_status__init_v(void) | ||
502 | { | ||
503 | return 0x00000000; | ||
504 | } | ||
505 | static inline u32 sim_recv_ring_status__init_f(void) | ||
506 | { | ||
507 | return 0x0; | ||
508 | } | ||
509 | static inline u32 sim_recv_ring_status__prod_v(void) | ||
510 | { | ||
511 | return 0x00000000; | ||
512 | } | ||
513 | static inline u32 sim_recv_ring_status__prod_f(void) | ||
514 | { | ||
515 | return 0x0; | ||
516 | } | ||
517 | static inline u32 sim_recv_ring_status_invalid_v(void) | ||
518 | { | ||
519 | return 0x00000000; | ||
520 | } | ||
521 | static inline u32 sim_recv_ring_status_invalid_f(void) | ||
522 | { | ||
523 | return 0x0; | ||
524 | } | ||
525 | static inline u32 sim_recv_ring_status_valid_v(void) | ||
526 | { | ||
527 | return 0x00000001; | ||
528 | } | ||
529 | static inline u32 sim_recv_ring_status_valid_f(void) | ||
530 | { | ||
531 | return 0x8; | ||
532 | } | ||
533 | static inline u32 sim_recv_ring_size_s(void) | ||
534 | { | ||
535 | return 2; | ||
536 | } | ||
537 | static inline u32 sim_recv_ring_size_f(u32 v) | ||
538 | { | ||
539 | return (v & 0x3) << 4; | ||
540 | } | ||
541 | static inline u32 sim_recv_ring_size_m(void) | ||
542 | { | ||
543 | return 0x3 << 4; | ||
544 | } | ||
545 | static inline u32 sim_recv_ring_size_v(u32 r) | ||
546 | { | ||
547 | return (r >> 4) & 0x3; | ||
548 | } | ||
549 | static inline u32 sim_recv_ring_size_init_v(void) | ||
550 | { | ||
551 | return 0x00000000; | ||
552 | } | ||
553 | static inline u32 sim_recv_ring_size_init_f(void) | ||
554 | { | ||
555 | return 0x0; | ||
556 | } | ||
557 | static inline u32 sim_recv_ring_size__init_v(void) | ||
558 | { | ||
559 | return 0x00000000; | ||
560 | } | ||
561 | static inline u32 sim_recv_ring_size__init_f(void) | ||
562 | { | ||
563 | return 0x0; | ||
564 | } | ||
565 | static inline u32 sim_recv_ring_size__prod_v(void) | ||
566 | { | ||
567 | return 0x00000000; | ||
568 | } | ||
569 | static inline u32 sim_recv_ring_size__prod_f(void) | ||
570 | { | ||
571 | return 0x0; | ||
572 | } | ||
573 | static inline u32 sim_recv_ring_size_4kb_v(void) | ||
574 | { | ||
575 | return 0x00000000; | ||
576 | } | ||
577 | static inline u32 sim_recv_ring_size_4kb_f(void) | ||
578 | { | ||
579 | return 0x0; | ||
580 | } | ||
581 | static inline u32 sim_recv_ring_size_8kb_v(void) | ||
582 | { | ||
583 | return 0x00000001; | ||
584 | } | ||
585 | static inline u32 sim_recv_ring_size_8kb_f(void) | ||
586 | { | ||
587 | return 0x10; | ||
588 | } | ||
589 | static inline u32 sim_recv_ring_size_12kb_v(void) | ||
590 | { | ||
591 | return 0x00000002; | ||
592 | } | ||
593 | static inline u32 sim_recv_ring_size_12kb_f(void) | ||
594 | { | ||
595 | return 0x20; | ||
596 | } | ||
597 | static inline u32 sim_recv_ring_size_16kb_v(void) | ||
598 | { | ||
599 | return 0x00000003; | ||
600 | } | ||
601 | static inline u32 sim_recv_ring_size_16kb_f(void) | ||
602 | { | ||
603 | return 0x30; | ||
604 | } | ||
605 | static inline u32 sim_recv_ring_gp_in_ring_s(void) | ||
606 | { | ||
607 | return 1; | ||
608 | } | ||
609 | static inline u32 sim_recv_ring_gp_in_ring_f(u32 v) | ||
610 | { | ||
611 | return (v & 0x1) << 11; | ||
612 | } | ||
613 | static inline u32 sim_recv_ring_gp_in_ring_m(void) | ||
614 | { | ||
615 | return 0x1 << 11; | ||
616 | } | ||
617 | static inline u32 sim_recv_ring_gp_in_ring_v(u32 r) | ||
618 | { | ||
619 | return (r >> 11) & 0x1; | ||
620 | } | ||
621 | static inline u32 sim_recv_ring_gp_in_ring__init_v(void) | ||
622 | { | ||
623 | return 0x00000000; | ||
624 | } | ||
625 | static inline u32 sim_recv_ring_gp_in_ring__init_f(void) | ||
626 | { | ||
627 | return 0x0; | ||
628 | } | ||
629 | static inline u32 sim_recv_ring_gp_in_ring__prod_v(void) | ||
630 | { | ||
631 | return 0x00000000; | ||
632 | } | ||
633 | static inline u32 sim_recv_ring_gp_in_ring__prod_f(void) | ||
634 | { | ||
635 | return 0x0; | ||
636 | } | ||
637 | static inline u32 sim_recv_ring_gp_in_ring_no_v(void) | ||
638 | { | ||
639 | return 0x00000000; | ||
640 | } | ||
641 | static inline u32 sim_recv_ring_gp_in_ring_no_f(void) | ||
642 | { | ||
643 | return 0x0; | ||
644 | } | ||
645 | static inline u32 sim_recv_ring_gp_in_ring_yes_v(void) | ||
646 | { | ||
647 | return 0x00000001; | ||
648 | } | ||
649 | static inline u32 sim_recv_ring_gp_in_ring_yes_f(void) | ||
650 | { | ||
651 | return 0x800; | ||
652 | } | ||
653 | static inline u32 sim_recv_ring_addr_lo_s(void) | ||
654 | { | ||
655 | return 20; | ||
656 | } | ||
657 | static inline u32 sim_recv_ring_addr_lo_f(u32 v) | ||
658 | { | ||
659 | return (v & 0xfffff) << 12; | ||
660 | } | ||
661 | static inline u32 sim_recv_ring_addr_lo_m(void) | ||
662 | { | ||
663 | return 0xfffff << 12; | ||
664 | } | ||
665 | static inline u32 sim_recv_ring_addr_lo_v(u32 r) | ||
666 | { | ||
667 | return (r >> 12) & 0xfffff; | ||
668 | } | ||
669 | static inline u32 sim_recv_ring_addr_lo__init_v(void) | ||
670 | { | ||
671 | return 0x00000000; | ||
672 | } | ||
673 | static inline u32 sim_recv_ring_addr_lo__init_f(void) | ||
674 | { | ||
675 | return 0x0; | ||
676 | } | ||
677 | static inline u32 sim_recv_ring_addr_lo__prod_v(void) | ||
678 | { | ||
679 | return 0x00000000; | ||
680 | } | ||
681 | static inline u32 sim_recv_ring_addr_lo__prod_f(void) | ||
682 | { | ||
683 | return 0x0; | ||
684 | } | ||
685 | static inline u32 sim_recv_ring_hi_r(void) | ||
686 | { | ||
687 | return 0x00000014; | ||
688 | } | ||
689 | static inline u32 sim_recv_ring_hi_addr_s(void) | ||
690 | { | ||
691 | return 20; | ||
692 | } | ||
693 | static inline u32 sim_recv_ring_hi_addr_f(u32 v) | ||
694 | { | ||
695 | return (v & 0xfffff) << 0; | ||
696 | } | ||
697 | static inline u32 sim_recv_ring_hi_addr_m(void) | ||
698 | { | ||
699 | return 0xfffff << 0; | ||
700 | } | ||
701 | static inline u32 sim_recv_ring_hi_addr_v(u32 r) | ||
702 | { | ||
703 | return (r >> 0) & 0xfffff; | ||
704 | } | ||
705 | static inline u32 sim_recv_ring_hi_addr__init_v(void) | ||
706 | { | ||
707 | return 0x00000000; | ||
708 | } | ||
709 | static inline u32 sim_recv_ring_hi_addr__init_f(void) | ||
710 | { | ||
711 | return 0x0; | ||
712 | } | ||
713 | static inline u32 sim_recv_ring_hi_addr__prod_v(void) | ||
714 | { | ||
715 | return 0x00000000; | ||
716 | } | ||
717 | static inline u32 sim_recv_ring_hi_addr__prod_f(void) | ||
718 | { | ||
719 | return 0x0; | ||
720 | } | ||
721 | static inline u32 sim_recv_put_r(void) | ||
722 | { | ||
723 | return 0x00000018; | ||
724 | } | ||
725 | static inline u32 sim_recv_put_pointer_s(void) | ||
726 | { | ||
727 | return 11; | ||
728 | } | ||
729 | static inline u32 sim_recv_put_pointer_f(u32 v) | ||
730 | { | ||
731 | return (v & 0x7ff) << 3; | ||
732 | } | ||
733 | static inline u32 sim_recv_put_pointer_m(void) | ||
734 | { | ||
735 | return 0x7ff << 3; | ||
736 | } | ||
737 | static inline u32 sim_recv_put_pointer_v(u32 r) | ||
738 | { | ||
739 | return (r >> 3) & 0x7ff; | ||
740 | } | ||
741 | static inline u32 sim_recv_get_r(void) | ||
742 | { | ||
743 | return 0x0000001c; | ||
744 | } | ||
745 | static inline u32 sim_recv_get_pointer_s(void) | ||
746 | { | ||
747 | return 11; | ||
748 | } | ||
749 | static inline u32 sim_recv_get_pointer_f(u32 v) | ||
750 | { | ||
751 | return (v & 0x7ff) << 3; | ||
752 | } | ||
753 | static inline u32 sim_recv_get_pointer_m(void) | ||
754 | { | ||
755 | return 0x7ff << 3; | ||
756 | } | ||
757 | static inline u32 sim_recv_get_pointer_v(u32 r) | ||
758 | { | ||
759 | return (r >> 3) & 0x7ff; | ||
760 | } | ||
761 | static inline u32 sim_config_r(void) | ||
762 | { | ||
763 | return 0x00000020; | ||
764 | } | ||
765 | static inline u32 sim_config_mode_s(void) | ||
766 | { | ||
767 | return 1; | ||
768 | } | ||
769 | static inline u32 sim_config_mode_f(u32 v) | ||
770 | { | ||
771 | return (v & 0x1) << 0; | ||
772 | } | ||
773 | static inline u32 sim_config_mode_m(void) | ||
774 | { | ||
775 | return 0x1 << 0; | ||
776 | } | ||
777 | static inline u32 sim_config_mode_v(u32 r) | ||
778 | { | ||
779 | return (r >> 0) & 0x1; | ||
780 | } | ||
781 | static inline u32 sim_config_mode_disabled_v(void) | ||
782 | { | ||
783 | return 0x00000000; | ||
784 | } | ||
785 | static inline u32 sim_config_mode_disabled_f(void) | ||
786 | { | ||
787 | return 0x0; | ||
788 | } | ||
789 | static inline u32 sim_config_mode_enabled_v(void) | ||
790 | { | ||
791 | return 0x00000001; | ||
792 | } | ||
793 | static inline u32 sim_config_mode_enabled_f(void) | ||
794 | { | ||
795 | return 0x1; | ||
796 | } | ||
797 | static inline u32 sim_config_channels_s(void) | ||
798 | { | ||
799 | return 7; | ||
800 | } | ||
801 | static inline u32 sim_config_channels_f(u32 v) | ||
802 | { | ||
803 | return (v & 0x7f) << 1; | ||
804 | } | ||
805 | static inline u32 sim_config_channels_m(void) | ||
806 | { | ||
807 | return 0x7f << 1; | ||
808 | } | ||
809 | static inline u32 sim_config_channels_v(u32 r) | ||
810 | { | ||
811 | return (r >> 1) & 0x7f; | ||
812 | } | ||
813 | static inline u32 sim_config_channels_none_v(void) | ||
814 | { | ||
815 | return 0x00000000; | ||
816 | } | ||
817 | static inline u32 sim_config_channels_none_f(void) | ||
818 | { | ||
819 | return 0x0; | ||
820 | } | ||
821 | static inline u32 sim_config_cached_only_s(void) | ||
822 | { | ||
823 | return 1; | ||
824 | } | ||
825 | static inline u32 sim_config_cached_only_f(u32 v) | ||
826 | { | ||
827 | return (v & 0x1) << 8; | ||
828 | } | ||
829 | static inline u32 sim_config_cached_only_m(void) | ||
830 | { | ||
831 | return 0x1 << 8; | ||
832 | } | ||
833 | static inline u32 sim_config_cached_only_v(u32 r) | ||
834 | { | ||
835 | return (r >> 8) & 0x1; | ||
836 | } | ||
837 | static inline u32 sim_config_cached_only_disabled_v(void) | ||
838 | { | ||
839 | return 0x00000000; | ||
840 | } | ||
841 | static inline u32 sim_config_cached_only_disabled_f(void) | ||
842 | { | ||
843 | return 0x0; | ||
844 | } | ||
845 | static inline u32 sim_config_cached_only_enabled_v(void) | ||
846 | { | ||
847 | return 0x00000001; | ||
848 | } | ||
849 | static inline u32 sim_config_cached_only_enabled_f(void) | ||
850 | { | ||
851 | return 0x100; | ||
852 | } | ||
853 | static inline u32 sim_config_validity_s(void) | ||
854 | { | ||
855 | return 2; | ||
856 | } | ||
857 | static inline u32 sim_config_validity_f(u32 v) | ||
858 | { | ||
859 | return (v & 0x3) << 9; | ||
860 | } | ||
861 | static inline u32 sim_config_validity_m(void) | ||
862 | { | ||
863 | return 0x3 << 9; | ||
864 | } | ||
865 | static inline u32 sim_config_validity_v(u32 r) | ||
866 | { | ||
867 | return (r >> 9) & 0x3; | ||
868 | } | ||
869 | static inline u32 sim_config_validity__init_v(void) | ||
870 | { | ||
871 | return 0x00000001; | ||
872 | } | ||
873 | static inline u32 sim_config_validity__init_f(void) | ||
874 | { | ||
875 | return 0x200; | ||
876 | } | ||
877 | static inline u32 sim_config_validity_valid_v(void) | ||
878 | { | ||
879 | return 0x00000001; | ||
880 | } | ||
881 | static inline u32 sim_config_validity_valid_f(void) | ||
882 | { | ||
883 | return 0x200; | ||
884 | } | ||
885 | static inline u32 sim_config_simulation_s(void) | ||
886 | { | ||
887 | return 2; | ||
888 | } | ||
889 | static inline u32 sim_config_simulation_f(u32 v) | ||
890 | { | ||
891 | return (v & 0x3) << 12; | ||
892 | } | ||
893 | static inline u32 sim_config_simulation_m(void) | ||
894 | { | ||
895 | return 0x3 << 12; | ||
896 | } | ||
897 | static inline u32 sim_config_simulation_v(u32 r) | ||
898 | { | ||
899 | return (r >> 12) & 0x3; | ||
900 | } | ||
901 | static inline u32 sim_config_simulation_disabled_v(void) | ||
902 | { | ||
903 | return 0x00000000; | ||
904 | } | ||
905 | static inline u32 sim_config_simulation_disabled_f(void) | ||
906 | { | ||
907 | return 0x0; | ||
908 | } | ||
909 | static inline u32 sim_config_simulation_fmodel_v(void) | ||
910 | { | ||
911 | return 0x00000001; | ||
912 | } | ||
913 | static inline u32 sim_config_simulation_fmodel_f(void) | ||
914 | { | ||
915 | return 0x1000; | ||
916 | } | ||
917 | static inline u32 sim_config_simulation_rtlsim_v(void) | ||
918 | { | ||
919 | return 0x00000002; | ||
920 | } | ||
921 | static inline u32 sim_config_simulation_rtlsim_f(void) | ||
922 | { | ||
923 | return 0x2000; | ||
924 | } | ||
925 | static inline u32 sim_config_secondary_display_s(void) | ||
926 | { | ||
927 | return 1; | ||
928 | } | ||
929 | static inline u32 sim_config_secondary_display_f(u32 v) | ||
930 | { | ||
931 | return (v & 0x1) << 14; | ||
932 | } | ||
933 | static inline u32 sim_config_secondary_display_m(void) | ||
934 | { | ||
935 | return 0x1 << 14; | ||
936 | } | ||
937 | static inline u32 sim_config_secondary_display_v(u32 r) | ||
938 | { | ||
939 | return (r >> 14) & 0x1; | ||
940 | } | ||
941 | static inline u32 sim_config_secondary_display_disabled_v(void) | ||
942 | { | ||
943 | return 0x00000000; | ||
944 | } | ||
945 | static inline u32 sim_config_secondary_display_disabled_f(void) | ||
946 | { | ||
947 | return 0x0; | ||
948 | } | ||
949 | static inline u32 sim_config_secondary_display_enabled_v(void) | ||
950 | { | ||
951 | return 0x00000001; | ||
952 | } | ||
953 | static inline u32 sim_config_secondary_display_enabled_f(void) | ||
954 | { | ||
955 | return 0x4000; | ||
956 | } | ||
957 | static inline u32 sim_config_num_heads_s(void) | ||
958 | { | ||
959 | return 8; | ||
960 | } | ||
961 | static inline u32 sim_config_num_heads_f(u32 v) | ||
962 | { | ||
963 | return (v & 0xff) << 17; | ||
964 | } | ||
965 | static inline u32 sim_config_num_heads_m(void) | ||
966 | { | ||
967 | return 0xff << 17; | ||
968 | } | ||
969 | static inline u32 sim_config_num_heads_v(u32 r) | ||
970 | { | ||
971 | return (r >> 17) & 0xff; | ||
972 | } | ||
973 | static inline u32 sim_event_ring_r(void) | ||
974 | { | ||
975 | return 0x00000030; | ||
976 | } | ||
977 | static inline u32 sim_event_ring_target_s(void) | ||
978 | { | ||
979 | return 2; | ||
980 | } | ||
981 | static inline u32 sim_event_ring_target_f(u32 v) | ||
982 | { | ||
983 | return (v & 0x3) << 0; | ||
984 | } | ||
985 | static inline u32 sim_event_ring_target_m(void) | ||
986 | { | ||
987 | return 0x3 << 0; | ||
988 | } | ||
989 | static inline u32 sim_event_ring_target_v(u32 r) | ||
990 | { | ||
991 | return (r >> 0) & 0x3; | ||
992 | } | ||
993 | static inline u32 sim_event_ring_target_phys_init_v(void) | ||
994 | { | ||
995 | return 0x00000001; | ||
996 | } | ||
997 | static inline u32 sim_event_ring_target_phys_init_f(void) | ||
998 | { | ||
999 | return 0x1; | ||
1000 | } | ||
1001 | static inline u32 sim_event_ring_target_phys__init_v(void) | ||
1002 | { | ||
1003 | return 0x00000001; | ||
1004 | } | ||
1005 | static inline u32 sim_event_ring_target_phys__init_f(void) | ||
1006 | { | ||
1007 | return 0x1; | ||
1008 | } | ||
1009 | static inline u32 sim_event_ring_target_phys__prod_v(void) | ||
1010 | { | ||
1011 | return 0x00000001; | ||
1012 | } | ||
1013 | static inline u32 sim_event_ring_target_phys__prod_f(void) | ||
1014 | { | ||
1015 | return 0x1; | ||
1016 | } | ||
1017 | static inline u32 sim_event_ring_target_phys_nvm_v(void) | ||
1018 | { | ||
1019 | return 0x00000001; | ||
1020 | } | ||
1021 | static inline u32 sim_event_ring_target_phys_nvm_f(void) | ||
1022 | { | ||
1023 | return 0x1; | ||
1024 | } | ||
1025 | static inline u32 sim_event_ring_target_phys_pci_v(void) | ||
1026 | { | ||
1027 | return 0x00000002; | ||
1028 | } | ||
1029 | static inline u32 sim_event_ring_target_phys_pci_f(void) | ||
1030 | { | ||
1031 | return 0x2; | ||
1032 | } | ||
1033 | static inline u32 sim_event_ring_target_phys_pci_coherent_v(void) | ||
1034 | { | ||
1035 | return 0x00000003; | ||
1036 | } | ||
1037 | static inline u32 sim_event_ring_target_phys_pci_coherent_f(void) | ||
1038 | { | ||
1039 | return 0x3; | ||
1040 | } | ||
1041 | static inline u32 sim_event_ring_status_s(void) | ||
1042 | { | ||
1043 | return 1; | ||
1044 | } | ||
1045 | static inline u32 sim_event_ring_status_f(u32 v) | ||
1046 | { | ||
1047 | return (v & 0x1) << 3; | ||
1048 | } | ||
1049 | static inline u32 sim_event_ring_status_m(void) | ||
1050 | { | ||
1051 | return 0x1 << 3; | ||
1052 | } | ||
1053 | static inline u32 sim_event_ring_status_v(u32 r) | ||
1054 | { | ||
1055 | return (r >> 3) & 0x1; | ||
1056 | } | ||
1057 | static inline u32 sim_event_ring_status_init_v(void) | ||
1058 | { | ||
1059 | return 0x00000000; | ||
1060 | } | ||
1061 | static inline u32 sim_event_ring_status_init_f(void) | ||
1062 | { | ||
1063 | return 0x0; | ||
1064 | } | ||
1065 | static inline u32 sim_event_ring_status__init_v(void) | ||
1066 | { | ||
1067 | return 0x00000000; | ||
1068 | } | ||
1069 | static inline u32 sim_event_ring_status__init_f(void) | ||
1070 | { | ||
1071 | return 0x0; | ||
1072 | } | ||
1073 | static inline u32 sim_event_ring_status__prod_v(void) | ||
1074 | { | ||
1075 | return 0x00000000; | ||
1076 | } | ||
1077 | static inline u32 sim_event_ring_status__prod_f(void) | ||
1078 | { | ||
1079 | return 0x0; | ||
1080 | } | ||
1081 | static inline u32 sim_event_ring_status_invalid_v(void) | ||
1082 | { | ||
1083 | return 0x00000000; | ||
1084 | } | ||
1085 | static inline u32 sim_event_ring_status_invalid_f(void) | ||
1086 | { | ||
1087 | return 0x0; | ||
1088 | } | ||
1089 | static inline u32 sim_event_ring_status_valid_v(void) | ||
1090 | { | ||
1091 | return 0x00000001; | ||
1092 | } | ||
1093 | static inline u32 sim_event_ring_status_valid_f(void) | ||
1094 | { | ||
1095 | return 0x8; | ||
1096 | } | ||
1097 | static inline u32 sim_event_ring_size_s(void) | ||
1098 | { | ||
1099 | return 2; | ||
1100 | } | ||
1101 | static inline u32 sim_event_ring_size_f(u32 v) | ||
1102 | { | ||
1103 | return (v & 0x3) << 4; | ||
1104 | } | ||
1105 | static inline u32 sim_event_ring_size_m(void) | ||
1106 | { | ||
1107 | return 0x3 << 4; | ||
1108 | } | ||
1109 | static inline u32 sim_event_ring_size_v(u32 r) | ||
1110 | { | ||
1111 | return (r >> 4) & 0x3; | ||
1112 | } | ||
1113 | static inline u32 sim_event_ring_size_init_v(void) | ||
1114 | { | ||
1115 | return 0x00000000; | ||
1116 | } | ||
1117 | static inline u32 sim_event_ring_size_init_f(void) | ||
1118 | { | ||
1119 | return 0x0; | ||
1120 | } | ||
1121 | static inline u32 sim_event_ring_size__init_v(void) | ||
1122 | { | ||
1123 | return 0x00000000; | ||
1124 | } | ||
1125 | static inline u32 sim_event_ring_size__init_f(void) | ||
1126 | { | ||
1127 | return 0x0; | ||
1128 | } | ||
1129 | static inline u32 sim_event_ring_size__prod_v(void) | ||
1130 | { | ||
1131 | return 0x00000000; | ||
1132 | } | ||
1133 | static inline u32 sim_event_ring_size__prod_f(void) | ||
1134 | { | ||
1135 | return 0x0; | ||
1136 | } | ||
1137 | static inline u32 sim_event_ring_size_4kb_v(void) | ||
1138 | { | ||
1139 | return 0x00000000; | ||
1140 | } | ||
1141 | static inline u32 sim_event_ring_size_4kb_f(void) | ||
1142 | { | ||
1143 | return 0x0; | ||
1144 | } | ||
1145 | static inline u32 sim_event_ring_size_8kb_v(void) | ||
1146 | { | ||
1147 | return 0x00000001; | ||
1148 | } | ||
1149 | static inline u32 sim_event_ring_size_8kb_f(void) | ||
1150 | { | ||
1151 | return 0x10; | ||
1152 | } | ||
1153 | static inline u32 sim_event_ring_size_12kb_v(void) | ||
1154 | { | ||
1155 | return 0x00000002; | ||
1156 | } | ||
1157 | static inline u32 sim_event_ring_size_12kb_f(void) | ||
1158 | { | ||
1159 | return 0x20; | ||
1160 | } | ||
1161 | static inline u32 sim_event_ring_size_16kb_v(void) | ||
1162 | { | ||
1163 | return 0x00000003; | ||
1164 | } | ||
1165 | static inline u32 sim_event_ring_size_16kb_f(void) | ||
1166 | { | ||
1167 | return 0x30; | ||
1168 | } | ||
1169 | static inline u32 sim_event_ring_gp_in_ring_s(void) | ||
1170 | { | ||
1171 | return 1; | ||
1172 | } | ||
1173 | static inline u32 sim_event_ring_gp_in_ring_f(u32 v) | ||
1174 | { | ||
1175 | return (v & 0x1) << 11; | ||
1176 | } | ||
1177 | static inline u32 sim_event_ring_gp_in_ring_m(void) | ||
1178 | { | ||
1179 | return 0x1 << 11; | ||
1180 | } | ||
1181 | static inline u32 sim_event_ring_gp_in_ring_v(u32 r) | ||
1182 | { | ||
1183 | return (r >> 11) & 0x1; | ||
1184 | } | ||
1185 | static inline u32 sim_event_ring_gp_in_ring__init_v(void) | ||
1186 | { | ||
1187 | return 0x00000000; | ||
1188 | } | ||
1189 | static inline u32 sim_event_ring_gp_in_ring__init_f(void) | ||
1190 | { | ||
1191 | return 0x0; | ||
1192 | } | ||
1193 | static inline u32 sim_event_ring_gp_in_ring__prod_v(void) | ||
1194 | { | ||
1195 | return 0x00000000; | ||
1196 | } | ||
1197 | static inline u32 sim_event_ring_gp_in_ring__prod_f(void) | ||
1198 | { | ||
1199 | return 0x0; | ||
1200 | } | ||
1201 | static inline u32 sim_event_ring_gp_in_ring_no_v(void) | ||
1202 | { | ||
1203 | return 0x00000000; | ||
1204 | } | ||
1205 | static inline u32 sim_event_ring_gp_in_ring_no_f(void) | ||
1206 | { | ||
1207 | return 0x0; | ||
1208 | } | ||
1209 | static inline u32 sim_event_ring_gp_in_ring_yes_v(void) | ||
1210 | { | ||
1211 | return 0x00000001; | ||
1212 | } | ||
1213 | static inline u32 sim_event_ring_gp_in_ring_yes_f(void) | ||
1214 | { | ||
1215 | return 0x800; | ||
1216 | } | ||
1217 | static inline u32 sim_event_ring_addr_lo_s(void) | ||
1218 | { | ||
1219 | return 20; | ||
1220 | } | ||
1221 | static inline u32 sim_event_ring_addr_lo_f(u32 v) | ||
1222 | { | ||
1223 | return (v & 0xfffff) << 12; | ||
1224 | } | ||
1225 | static inline u32 sim_event_ring_addr_lo_m(void) | ||
1226 | { | ||
1227 | return 0xfffff << 12; | ||
1228 | } | ||
1229 | static inline u32 sim_event_ring_addr_lo_v(u32 r) | ||
1230 | { | ||
1231 | return (r >> 12) & 0xfffff; | ||
1232 | } | ||
1233 | static inline u32 sim_event_ring_addr_lo__init_v(void) | ||
1234 | { | ||
1235 | return 0x00000000; | ||
1236 | } | ||
1237 | static inline u32 sim_event_ring_addr_lo__init_f(void) | ||
1238 | { | ||
1239 | return 0x0; | ||
1240 | } | ||
1241 | static inline u32 sim_event_ring_addr_lo__prod_v(void) | ||
1242 | { | ||
1243 | return 0x00000000; | ||
1244 | } | ||
1245 | static inline u32 sim_event_ring_addr_lo__prod_f(void) | ||
1246 | { | ||
1247 | return 0x0; | ||
1248 | } | ||
1249 | static inline u32 sim_event_ring_hi_v(void) | ||
1250 | { | ||
1251 | return 0x00000034; | ||
1252 | } | ||
1253 | static inline u32 sim_event_ring_hi_addr_s(void) | ||
1254 | { | ||
1255 | return 20; | ||
1256 | } | ||
1257 | static inline u32 sim_event_ring_hi_addr_f(u32 v) | ||
1258 | { | ||
1259 | return (v & 0xfffff) << 0; | ||
1260 | } | ||
1261 | static inline u32 sim_event_ring_hi_addr_m(void) | ||
1262 | { | ||
1263 | return 0xfffff << 0; | ||
1264 | } | ||
1265 | static inline u32 sim_event_ring_hi_addr_v(u32 r) | ||
1266 | { | ||
1267 | return (r >> 0) & 0xfffff; | ||
1268 | } | ||
1269 | static inline u32 sim_event_ring_hi_addr__init_v(void) | ||
1270 | { | ||
1271 | return 0x00000000; | ||
1272 | } | ||
1273 | static inline u32 sim_event_ring_hi_addr__init_f(void) | ||
1274 | { | ||
1275 | return 0x0; | ||
1276 | } | ||
1277 | static inline u32 sim_event_ring_hi_addr__prod_v(void) | ||
1278 | { | ||
1279 | return 0x00000000; | ||
1280 | } | ||
1281 | static inline u32 sim_event_ring_hi_addr__prod_f(void) | ||
1282 | { | ||
1283 | return 0x0; | ||
1284 | } | ||
1285 | static inline u32 sim_event_put_r(void) | ||
1286 | { | ||
1287 | return 0x00000038; | ||
1288 | } | ||
1289 | static inline u32 sim_event_put_pointer_s(void) | ||
1290 | { | ||
1291 | return 30; | ||
1292 | } | ||
1293 | static inline u32 sim_event_put_pointer_f(u32 v) | ||
1294 | { | ||
1295 | return (v & 0x3fffffff) << 2; | ||
1296 | } | ||
1297 | static inline u32 sim_event_put_pointer_m(void) | ||
1298 | { | ||
1299 | return 0x3fffffff << 2; | ||
1300 | } | ||
1301 | static inline u32 sim_event_put_pointer_v(u32 r) | ||
1302 | { | ||
1303 | return (r >> 2) & 0x3fffffff; | ||
1304 | } | ||
1305 | static inline u32 sim_event_get_r(void) | ||
1306 | { | ||
1307 | return 0x0000003c; | ||
1308 | } | ||
1309 | static inline u32 sim_event_get_pointer_s(void) | ||
1310 | { | ||
1311 | return 30; | ||
1312 | } | ||
1313 | static inline u32 sim_event_get_pointer_f(u32 v) | ||
1314 | { | ||
1315 | return (v & 0x3fffffff) << 2; | ||
1316 | } | ||
1317 | static inline u32 sim_event_get_pointer_m(void) | ||
1318 | { | ||
1319 | return 0x3fffffff << 2; | ||
1320 | } | ||
1321 | static inline u32 sim_event_get_pointer_v(u32 r) | ||
1322 | { | ||
1323 | return (r >> 2) & 0x3fffffff; | ||
1324 | } | ||
1325 | static inline u32 sim_status_r(void) | ||
1326 | { | ||
1327 | return 0x00000028; | ||
1328 | } | ||
1329 | static inline u32 sim_status_send_put_s(void) | ||
1330 | { | ||
1331 | return 1; | ||
1332 | } | ||
1333 | static inline u32 sim_status_send_put_f(u32 v) | ||
1334 | { | ||
1335 | return (v & 0x1) << 0; | ||
1336 | } | ||
1337 | static inline u32 sim_status_send_put_m(void) | ||
1338 | { | ||
1339 | return 0x1 << 0; | ||
1340 | } | ||
1341 | static inline u32 sim_status_send_put_v(u32 r) | ||
1342 | { | ||
1343 | return (r >> 0) & 0x1; | ||
1344 | } | ||
1345 | static inline u32 sim_status_send_put__init_v(void) | ||
1346 | { | ||
1347 | return 0x00000000; | ||
1348 | } | ||
1349 | static inline u32 sim_status_send_put__init_f(void) | ||
1350 | { | ||
1351 | return 0x0; | ||
1352 | } | ||
1353 | static inline u32 sim_status_send_put_idle_v(void) | ||
1354 | { | ||
1355 | return 0x00000000; | ||
1356 | } | ||
1357 | static inline u32 sim_status_send_put_idle_f(void) | ||
1358 | { | ||
1359 | return 0x0; | ||
1360 | } | ||
1361 | static inline u32 sim_status_send_put_pending_v(void) | ||
1362 | { | ||
1363 | return 0x00000001; | ||
1364 | } | ||
1365 | static inline u32 sim_status_send_put_pending_f(void) | ||
1366 | { | ||
1367 | return 0x1; | ||
1368 | } | ||
1369 | static inline u32 sim_status_send_get_s(void) | ||
1370 | { | ||
1371 | return 1; | ||
1372 | } | ||
1373 | static inline u32 sim_status_send_get_f(u32 v) | ||
1374 | { | ||
1375 | return (v & 0x1) << 1; | ||
1376 | } | ||
1377 | static inline u32 sim_status_send_get_m(void) | ||
1378 | { | ||
1379 | return 0x1 << 1; | ||
1380 | } | ||
1381 | static inline u32 sim_status_send_get_v(u32 r) | ||
1382 | { | ||
1383 | return (r >> 1) & 0x1; | ||
1384 | } | ||
1385 | static inline u32 sim_status_send_get__init_v(void) | ||
1386 | { | ||
1387 | return 0x00000000; | ||
1388 | } | ||
1389 | static inline u32 sim_status_send_get__init_f(void) | ||
1390 | { | ||
1391 | return 0x0; | ||
1392 | } | ||
1393 | static inline u32 sim_status_send_get_idle_v(void) | ||
1394 | { | ||
1395 | return 0x00000000; | ||
1396 | } | ||
1397 | static inline u32 sim_status_send_get_idle_f(void) | ||
1398 | { | ||
1399 | return 0x0; | ||
1400 | } | ||
1401 | static inline u32 sim_status_send_get_pending_v(void) | ||
1402 | { | ||
1403 | return 0x00000001; | ||
1404 | } | ||
1405 | static inline u32 sim_status_send_get_pending_f(void) | ||
1406 | { | ||
1407 | return 0x2; | ||
1408 | } | ||
1409 | static inline u32 sim_status_send_get_clear_v(void) | ||
1410 | { | ||
1411 | return 0x00000001; | ||
1412 | } | ||
1413 | static inline u32 sim_status_send_get_clear_f(void) | ||
1414 | { | ||
1415 | return 0x2; | ||
1416 | } | ||
1417 | static inline u32 sim_status_recv_put_s(void) | ||
1418 | { | ||
1419 | return 1; | ||
1420 | } | ||
1421 | static inline u32 sim_status_recv_put_f(u32 v) | ||
1422 | { | ||
1423 | return (v & 0x1) << 2; | ||
1424 | } | ||
1425 | static inline u32 sim_status_recv_put_m(void) | ||
1426 | { | ||
1427 | return 0x1 << 2; | ||
1428 | } | ||
1429 | static inline u32 sim_status_recv_put_v(u32 r) | ||
1430 | { | ||
1431 | return (r >> 2) & 0x1; | ||
1432 | } | ||
1433 | static inline u32 sim_status_recv_put__init_v(void) | ||
1434 | { | ||
1435 | return 0x00000000; | ||
1436 | } | ||
1437 | static inline u32 sim_status_recv_put__init_f(void) | ||
1438 | { | ||
1439 | return 0x0; | ||
1440 | } | ||
1441 | static inline u32 sim_status_recv_put_idle_v(void) | ||
1442 | { | ||
1443 | return 0x00000000; | ||
1444 | } | ||
1445 | static inline u32 sim_status_recv_put_idle_f(void) | ||
1446 | { | ||
1447 | return 0x0; | ||
1448 | } | ||
1449 | static inline u32 sim_status_recv_put_pending_v(void) | ||
1450 | { | ||
1451 | return 0x00000001; | ||
1452 | } | ||
1453 | static inline u32 sim_status_recv_put_pending_f(void) | ||
1454 | { | ||
1455 | return 0x4; | ||
1456 | } | ||
1457 | static inline u32 sim_status_recv_put_clear_v(void) | ||
1458 | { | ||
1459 | return 0x00000001; | ||
1460 | } | ||
1461 | static inline u32 sim_status_recv_put_clear_f(void) | ||
1462 | { | ||
1463 | return 0x4; | ||
1464 | } | ||
1465 | static inline u32 sim_status_recv_get_s(void) | ||
1466 | { | ||
1467 | return 1; | ||
1468 | } | ||
1469 | static inline u32 sim_status_recv_get_f(u32 v) | ||
1470 | { | ||
1471 | return (v & 0x1) << 3; | ||
1472 | } | ||
1473 | static inline u32 sim_status_recv_get_m(void) | ||
1474 | { | ||
1475 | return 0x1 << 3; | ||
1476 | } | ||
1477 | static inline u32 sim_status_recv_get_v(u32 r) | ||
1478 | { | ||
1479 | return (r >> 3) & 0x1; | ||
1480 | } | ||
1481 | static inline u32 sim_status_recv_get__init_v(void) | ||
1482 | { | ||
1483 | return 0x00000000; | ||
1484 | } | ||
1485 | static inline u32 sim_status_recv_get__init_f(void) | ||
1486 | { | ||
1487 | return 0x0; | ||
1488 | } | ||
1489 | static inline u32 sim_status_recv_get_idle_v(void) | ||
1490 | { | ||
1491 | return 0x00000000; | ||
1492 | } | ||
1493 | static inline u32 sim_status_recv_get_idle_f(void) | ||
1494 | { | ||
1495 | return 0x0; | ||
1496 | } | ||
1497 | static inline u32 sim_status_recv_get_pending_v(void) | ||
1498 | { | ||
1499 | return 0x00000001; | ||
1500 | } | ||
1501 | static inline u32 sim_status_recv_get_pending_f(void) | ||
1502 | { | ||
1503 | return 0x8; | ||
1504 | } | ||
1505 | static inline u32 sim_status_event_put_s(void) | ||
1506 | { | ||
1507 | return 1; | ||
1508 | } | ||
1509 | static inline u32 sim_status_event_put_f(u32 v) | ||
1510 | { | ||
1511 | return (v & 0x1) << 4; | ||
1512 | } | ||
1513 | static inline u32 sim_status_event_put_m(void) | ||
1514 | { | ||
1515 | return 0x1 << 4; | ||
1516 | } | ||
1517 | static inline u32 sim_status_event_put_v(u32 r) | ||
1518 | { | ||
1519 | return (r >> 4) & 0x1; | ||
1520 | } | ||
1521 | static inline u32 sim_status_event_put__init_v(void) | ||
1522 | { | ||
1523 | return 0x00000000; | ||
1524 | } | ||
1525 | static inline u32 sim_status_event_put__init_f(void) | ||
1526 | { | ||
1527 | return 0x0; | ||
1528 | } | ||
1529 | static inline u32 sim_status_event_put_idle_v(void) | ||
1530 | { | ||
1531 | return 0x00000000; | ||
1532 | } | ||
1533 | static inline u32 sim_status_event_put_idle_f(void) | ||
1534 | { | ||
1535 | return 0x0; | ||
1536 | } | ||
1537 | static inline u32 sim_status_event_put_pending_v(void) | ||
1538 | { | ||
1539 | return 0x00000001; | ||
1540 | } | ||
1541 | static inline u32 sim_status_event_put_pending_f(void) | ||
1542 | { | ||
1543 | return 0x10; | ||
1544 | } | ||
1545 | static inline u32 sim_status_event_put_clear_v(void) | ||
1546 | { | ||
1547 | return 0x00000001; | ||
1548 | } | ||
1549 | static inline u32 sim_status_event_put_clear_f(void) | ||
1550 | { | ||
1551 | return 0x10; | ||
1552 | } | ||
1553 | static inline u32 sim_status_event_get_s(void) | ||
1554 | { | ||
1555 | return 1; | ||
1556 | } | ||
1557 | static inline u32 sim_status_event_get_f(u32 v) | ||
1558 | { | ||
1559 | return (v & 0x1) << 5; | ||
1560 | } | ||
1561 | static inline u32 sim_status_event_get_m(void) | ||
1562 | { | ||
1563 | return 0x1 << 5; | ||
1564 | } | ||
1565 | static inline u32 sim_status_event_get_v(u32 r) | ||
1566 | { | ||
1567 | return (r >> 5) & 0x1; | ||
1568 | } | ||
1569 | static inline u32 sim_status_event_get__init_v(void) | ||
1570 | { | ||
1571 | return 0x00000000; | ||
1572 | } | ||
1573 | static inline u32 sim_status_event_get__init_f(void) | ||
1574 | { | ||
1575 | return 0x0; | ||
1576 | } | ||
1577 | static inline u32 sim_status_event_get_idle_v(void) | ||
1578 | { | ||
1579 | return 0x00000000; | ||
1580 | } | ||
1581 | static inline u32 sim_status_event_get_idle_f(void) | ||
1582 | { | ||
1583 | return 0x0; | ||
1584 | } | ||
1585 | static inline u32 sim_status_event_get_pending_v(void) | ||
1586 | { | ||
1587 | return 0x00000001; | ||
1588 | } | ||
1589 | static inline u32 sim_status_event_get_pending_f(void) | ||
1590 | { | ||
1591 | return 0x20; | ||
1592 | } | ||
1593 | static inline u32 sim_control_r(void) | ||
1594 | { | ||
1595 | return 0x0000002c; | ||
1596 | } | ||
1597 | static inline u32 sim_control_send_put_s(void) | ||
1598 | { | ||
1599 | return 1; | ||
1600 | } | ||
1601 | static inline u32 sim_control_send_put_f(u32 v) | ||
1602 | { | ||
1603 | return (v & 0x1) << 0; | ||
1604 | } | ||
1605 | static inline u32 sim_control_send_put_m(void) | ||
1606 | { | ||
1607 | return 0x1 << 0; | ||
1608 | } | ||
1609 | static inline u32 sim_control_send_put_v(u32 r) | ||
1610 | { | ||
1611 | return (r >> 0) & 0x1; | ||
1612 | } | ||
1613 | static inline u32 sim_control_send_put__init_v(void) | ||
1614 | { | ||
1615 | return 0x00000000; | ||
1616 | } | ||
1617 | static inline u32 sim_control_send_put__init_f(void) | ||
1618 | { | ||
1619 | return 0x0; | ||
1620 | } | ||
1621 | static inline u32 sim_control_send_put_disabled_v(void) | ||
1622 | { | ||
1623 | return 0x00000000; | ||
1624 | } | ||
1625 | static inline u32 sim_control_send_put_disabled_f(void) | ||
1626 | { | ||
1627 | return 0x0; | ||
1628 | } | ||
1629 | static inline u32 sim_control_send_put_enabled_v(void) | ||
1630 | { | ||
1631 | return 0x00000001; | ||
1632 | } | ||
1633 | static inline u32 sim_control_send_put_enabled_f(void) | ||
1634 | { | ||
1635 | return 0x1; | ||
1636 | } | ||
1637 | static inline u32 sim_control_send_get_s(void) | ||
1638 | { | ||
1639 | return 1; | ||
1640 | } | ||
1641 | static inline u32 sim_control_send_get_f(u32 v) | ||
1642 | { | ||
1643 | return (v & 0x1) << 1; | ||
1644 | } | ||
1645 | static inline u32 sim_control_send_get_m(void) | ||
1646 | { | ||
1647 | return 0x1 << 1; | ||
1648 | } | ||
1649 | static inline u32 sim_control_send_get_v(u32 r) | ||
1650 | { | ||
1651 | return (r >> 1) & 0x1; | ||
1652 | } | ||
1653 | static inline u32 sim_control_send_get__init_v(void) | ||
1654 | { | ||
1655 | return 0x00000000; | ||
1656 | } | ||
1657 | static inline u32 sim_control_send_get__init_f(void) | ||
1658 | { | ||
1659 | return 0x0; | ||
1660 | } | ||
1661 | static inline u32 sim_control_send_get_disabled_v(void) | ||
1662 | { | ||
1663 | return 0x00000000; | ||
1664 | } | ||
1665 | static inline u32 sim_control_send_get_disabled_f(void) | ||
1666 | { | ||
1667 | return 0x0; | ||
1668 | } | ||
1669 | static inline u32 sim_control_send_get_enabled_v(void) | ||
1670 | { | ||
1671 | return 0x00000001; | ||
1672 | } | ||
1673 | static inline u32 sim_control_send_get_enabled_f(void) | ||
1674 | { | ||
1675 | return 0x2; | ||
1676 | } | ||
1677 | static inline u32 sim_control_recv_put_s(void) | ||
1678 | { | ||
1679 | return 1; | ||
1680 | } | ||
1681 | static inline u32 sim_control_recv_put_f(u32 v) | ||
1682 | { | ||
1683 | return (v & 0x1) << 2; | ||
1684 | } | ||
1685 | static inline u32 sim_control_recv_put_m(void) | ||
1686 | { | ||
1687 | return 0x1 << 2; | ||
1688 | } | ||
1689 | static inline u32 sim_control_recv_put_v(u32 r) | ||
1690 | { | ||
1691 | return (r >> 2) & 0x1; | ||
1692 | } | ||
1693 | static inline u32 sim_control_recv_put__init_v(void) | ||
1694 | { | ||
1695 | return 0x00000000; | ||
1696 | } | ||
1697 | static inline u32 sim_control_recv_put__init_f(void) | ||
1698 | { | ||
1699 | return 0x0; | ||
1700 | } | ||
1701 | static inline u32 sim_control_recv_put_disabled_v(void) | ||
1702 | { | ||
1703 | return 0x00000000; | ||
1704 | } | ||
1705 | static inline u32 sim_control_recv_put_disabled_f(void) | ||
1706 | { | ||
1707 | return 0x0; | ||
1708 | } | ||
1709 | static inline u32 sim_control_recv_put_enabled_v(void) | ||
1710 | { | ||
1711 | return 0x00000001; | ||
1712 | } | ||
1713 | static inline u32 sim_control_recv_put_enabled_f(void) | ||
1714 | { | ||
1715 | return 0x4; | ||
1716 | } | ||
1717 | static inline u32 sim_control_recv_get_s(void) | ||
1718 | { | ||
1719 | return 1; | ||
1720 | } | ||
1721 | static inline u32 sim_control_recv_get_f(u32 v) | ||
1722 | { | ||
1723 | return (v & 0x1) << 3; | ||
1724 | } | ||
1725 | static inline u32 sim_control_recv_get_m(void) | ||
1726 | { | ||
1727 | return 0x1 << 3; | ||
1728 | } | ||
1729 | static inline u32 sim_control_recv_get_v(u32 r) | ||
1730 | { | ||
1731 | return (r >> 3) & 0x1; | ||
1732 | } | ||
1733 | static inline u32 sim_control_recv_get__init_v(void) | ||
1734 | { | ||
1735 | return 0x00000000; | ||
1736 | } | ||
1737 | static inline u32 sim_control_recv_get__init_f(void) | ||
1738 | { | ||
1739 | return 0x0; | ||
1740 | } | ||
1741 | static inline u32 sim_control_recv_get_disabled_v(void) | ||
1742 | { | ||
1743 | return 0x00000000; | ||
1744 | } | ||
1745 | static inline u32 sim_control_recv_get_disabled_f(void) | ||
1746 | { | ||
1747 | return 0x0; | ||
1748 | } | ||
1749 | static inline u32 sim_control_recv_get_enabled_v(void) | ||
1750 | { | ||
1751 | return 0x00000001; | ||
1752 | } | ||
1753 | static inline u32 sim_control_recv_get_enabled_f(void) | ||
1754 | { | ||
1755 | return 0x8; | ||
1756 | } | ||
1757 | static inline u32 sim_control_event_put_s(void) | ||
1758 | { | ||
1759 | return 1; | ||
1760 | } | ||
1761 | static inline u32 sim_control_event_put_f(u32 v) | ||
1762 | { | ||
1763 | return (v & 0x1) << 4; | ||
1764 | } | ||
1765 | static inline u32 sim_control_event_put_m(void) | ||
1766 | { | ||
1767 | return 0x1 << 4; | ||
1768 | } | ||
1769 | static inline u32 sim_control_event_put_v(u32 r) | ||
1770 | { | ||
1771 | return (r >> 4) & 0x1; | ||
1772 | } | ||
1773 | static inline u32 sim_control_event_put__init_v(void) | ||
1774 | { | ||
1775 | return 0x00000000; | ||
1776 | } | ||
1777 | static inline u32 sim_control_event_put__init_f(void) | ||
1778 | { | ||
1779 | return 0x0; | ||
1780 | } | ||
1781 | static inline u32 sim_control_event_put_disabled_v(void) | ||
1782 | { | ||
1783 | return 0x00000000; | ||
1784 | } | ||
1785 | static inline u32 sim_control_event_put_disabled_f(void) | ||
1786 | { | ||
1787 | return 0x0; | ||
1788 | } | ||
1789 | static inline u32 sim_control_event_put_enabled_v(void) | ||
1790 | { | ||
1791 | return 0x00000001; | ||
1792 | } | ||
1793 | static inline u32 sim_control_event_put_enabled_f(void) | ||
1794 | { | ||
1795 | return 0x10; | ||
1796 | } | ||
1797 | static inline u32 sim_control_event_get_s(void) | ||
1798 | { | ||
1799 | return 1; | ||
1800 | } | ||
1801 | static inline u32 sim_control_event_get_f(u32 v) | ||
1802 | { | ||
1803 | return (v & 0x1) << 5; | ||
1804 | } | ||
1805 | static inline u32 sim_control_event_get_m(void) | ||
1806 | { | ||
1807 | return 0x1 << 5; | ||
1808 | } | ||
1809 | static inline u32 sim_control_event_get_v(u32 r) | ||
1810 | { | ||
1811 | return (r >> 5) & 0x1; | ||
1812 | } | ||
1813 | static inline u32 sim_control_event_get__init_v(void) | ||
1814 | { | ||
1815 | return 0x00000000; | ||
1816 | } | ||
1817 | static inline u32 sim_control_event_get__init_f(void) | ||
1818 | { | ||
1819 | return 0x0; | ||
1820 | } | ||
1821 | static inline u32 sim_control_event_get_disabled_v(void) | ||
1822 | { | ||
1823 | return 0x00000000; | ||
1824 | } | ||
1825 | static inline u32 sim_control_event_get_disabled_f(void) | ||
1826 | { | ||
1827 | return 0x0; | ||
1828 | } | ||
1829 | static inline u32 sim_control_event_get_enabled_v(void) | ||
1830 | { | ||
1831 | return 0x00000001; | ||
1832 | } | ||
1833 | static inline u32 sim_control_event_get_enabled_f(void) | ||
1834 | { | ||
1835 | return 0x20; | ||
1836 | } | ||
1837 | static inline u32 sim_dma_r(void) | ||
1838 | { | ||
1839 | return 0x00000000; | ||
1840 | } | ||
1841 | static inline u32 sim_dma_target_s(void) | ||
1842 | { | ||
1843 | return 2; | ||
1844 | } | ||
1845 | static inline u32 sim_dma_target_f(u32 v) | ||
1846 | { | ||
1847 | return (v & 0x3) << 0; | ||
1848 | } | ||
1849 | static inline u32 sim_dma_target_m(void) | ||
1850 | { | ||
1851 | return 0x3 << 0; | ||
1852 | } | ||
1853 | static inline u32 sim_dma_target_v(u32 r) | ||
1854 | { | ||
1855 | return (r >> 0) & 0x3; | ||
1856 | } | ||
1857 | static inline u32 sim_dma_target_phys_init_v(void) | ||
1858 | { | ||
1859 | return 0x00000001; | ||
1860 | } | ||
1861 | static inline u32 sim_dma_target_phys_init_f(void) | ||
1862 | { | ||
1863 | return 0x1; | ||
1864 | } | ||
1865 | static inline u32 sim_dma_target_phys__init_v(void) | ||
1866 | { | ||
1867 | return 0x00000001; | ||
1868 | } | ||
1869 | static inline u32 sim_dma_target_phys__init_f(void) | ||
1870 | { | ||
1871 | return 0x1; | ||
1872 | } | ||
1873 | static inline u32 sim_dma_target_phys__prod_v(void) | ||
1874 | { | ||
1875 | return 0x00000001; | ||
1876 | } | ||
1877 | static inline u32 sim_dma_target_phys__prod_f(void) | ||
1878 | { | ||
1879 | return 0x1; | ||
1880 | } | ||
1881 | static inline u32 sim_dma_target_phys_nvm_v(void) | ||
1882 | { | ||
1883 | return 0x00000001; | ||
1884 | } | ||
1885 | static inline u32 sim_dma_target_phys_nvm_f(void) | ||
1886 | { | ||
1887 | return 0x1; | ||
1888 | } | ||
1889 | static inline u32 sim_dma_target_phys_pci_v(void) | ||
1890 | { | ||
1891 | return 0x00000002; | ||
1892 | } | ||
1893 | static inline u32 sim_dma_target_phys_pci_f(void) | ||
1894 | { | ||
1895 | return 0x2; | ||
1896 | } | ||
1897 | static inline u32 sim_dma_target_phys_pci_coherent_v(void) | ||
1898 | { | ||
1899 | return 0x00000003; | ||
1900 | } | ||
1901 | static inline u32 sim_dma_target_phys_pci_coherent_f(void) | ||
1902 | { | ||
1903 | return 0x3; | ||
1904 | } | ||
1905 | static inline u32 sim_dma_status_s(void) | ||
1906 | { | ||
1907 | return 1; | ||
1908 | } | ||
1909 | static inline u32 sim_dma_status_f(u32 v) | ||
1910 | { | ||
1911 | return (v & 0x1) << 3; | ||
1912 | } | ||
1913 | static inline u32 sim_dma_status_m(void) | ||
1914 | { | ||
1915 | return 0x1 << 3; | ||
1916 | } | ||
1917 | static inline u32 sim_dma_status_v(u32 r) | ||
1918 | { | ||
1919 | return (r >> 3) & 0x1; | ||
1920 | } | ||
1921 | static inline u32 sim_dma_status_init_v(void) | ||
1922 | { | ||
1923 | return 0x00000000; | ||
1924 | } | ||
1925 | static inline u32 sim_dma_status_init_f(void) | ||
1926 | { | ||
1927 | return 0x0; | ||
1928 | } | ||
1929 | static inline u32 sim_dma_status__init_v(void) | ||
1930 | { | ||
1931 | return 0x00000000; | ||
1932 | } | ||
1933 | static inline u32 sim_dma_status__init_f(void) | ||
1934 | { | ||
1935 | return 0x0; | ||
1936 | } | ||
1937 | static inline u32 sim_dma_status__prod_v(void) | ||
1938 | { | ||
1939 | return 0x00000000; | ||
1940 | } | ||
1941 | static inline u32 sim_dma_status__prod_f(void) | ||
1942 | { | ||
1943 | return 0x0; | ||
1944 | } | ||
1945 | static inline u32 sim_dma_status_invalid_v(void) | ||
1946 | { | ||
1947 | return 0x00000000; | ||
1948 | } | ||
1949 | static inline u32 sim_dma_status_invalid_f(void) | ||
1950 | { | ||
1951 | return 0x0; | ||
1952 | } | ||
1953 | static inline u32 sim_dma_status_valid_v(void) | ||
1954 | { | ||
1955 | return 0x00000001; | ||
1956 | } | ||
1957 | static inline u32 sim_dma_status_valid_f(void) | ||
1958 | { | ||
1959 | return 0x8; | ||
1960 | } | ||
1961 | static inline u32 sim_dma_size_s(void) | ||
1962 | { | ||
1963 | return 2; | ||
1964 | } | ||
1965 | static inline u32 sim_dma_size_f(u32 v) | ||
1966 | { | ||
1967 | return (v & 0x3) << 4; | ||
1968 | } | ||
1969 | static inline u32 sim_dma_size_m(void) | ||
1970 | { | ||
1971 | return 0x3 << 4; | ||
1972 | } | ||
1973 | static inline u32 sim_dma_size_v(u32 r) | ||
1974 | { | ||
1975 | return (r >> 4) & 0x3; | ||
1976 | } | ||
1977 | static inline u32 sim_dma_size_init_v(void) | ||
1978 | { | ||
1979 | return 0x00000000; | ||
1980 | } | ||
1981 | static inline u32 sim_dma_size_init_f(void) | ||
1982 | { | ||
1983 | return 0x0; | ||
1984 | } | ||
1985 | static inline u32 sim_dma_size__init_v(void) | ||
1986 | { | ||
1987 | return 0x00000000; | ||
1988 | } | ||
1989 | static inline u32 sim_dma_size__init_f(void) | ||
1990 | { | ||
1991 | return 0x0; | ||
1992 | } | ||
1993 | static inline u32 sim_dma_size__prod_v(void) | ||
1994 | { | ||
1995 | return 0x00000000; | ||
1996 | } | ||
1997 | static inline u32 sim_dma_size__prod_f(void) | ||
1998 | { | ||
1999 | return 0x0; | ||
2000 | } | ||
2001 | static inline u32 sim_dma_size_4kb_v(void) | ||
2002 | { | ||
2003 | return 0x00000000; | ||
2004 | } | ||
2005 | static inline u32 sim_dma_size_4kb_f(void) | ||
2006 | { | ||
2007 | return 0x0; | ||
2008 | } | ||
2009 | static inline u32 sim_dma_size_8kb_v(void) | ||
2010 | { | ||
2011 | return 0x00000001; | ||
2012 | } | ||
2013 | static inline u32 sim_dma_size_8kb_f(void) | ||
2014 | { | ||
2015 | return 0x10; | ||
2016 | } | ||
2017 | static inline u32 sim_dma_size_12kb_v(void) | ||
2018 | { | ||
2019 | return 0x00000002; | ||
2020 | } | ||
2021 | static inline u32 sim_dma_size_12kb_f(void) | ||
2022 | { | ||
2023 | return 0x20; | ||
2024 | } | ||
2025 | static inline u32 sim_dma_size_16kb_v(void) | ||
2026 | { | ||
2027 | return 0x00000003; | ||
2028 | } | ||
2029 | static inline u32 sim_dma_size_16kb_f(void) | ||
2030 | { | ||
2031 | return 0x30; | ||
2032 | } | ||
2033 | static inline u32 sim_dma_addr_lo_s(void) | ||
2034 | { | ||
2035 | return 20; | ||
2036 | } | ||
2037 | static inline u32 sim_dma_addr_lo_f(u32 v) | ||
2038 | { | ||
2039 | return (v & 0xfffff) << 12; | ||
2040 | } | ||
2041 | static inline u32 sim_dma_addr_lo_m(void) | ||
2042 | { | ||
2043 | return 0xfffff << 12; | ||
2044 | } | ||
2045 | static inline u32 sim_dma_addr_lo_v(u32 r) | ||
2046 | { | ||
2047 | return (r >> 12) & 0xfffff; | ||
2048 | } | ||
2049 | static inline u32 sim_dma_addr_lo__init_v(void) | ||
2050 | { | ||
2051 | return 0x00000000; | ||
2052 | } | ||
2053 | static inline u32 sim_dma_addr_lo__init_f(void) | ||
2054 | { | ||
2055 | return 0x0; | ||
2056 | } | ||
2057 | static inline u32 sim_dma_addr_lo__prod_v(void) | ||
2058 | { | ||
2059 | return 0x00000000; | ||
2060 | } | ||
2061 | static inline u32 sim_dma_addr_lo__prod_f(void) | ||
2062 | { | ||
2063 | return 0x0; | ||
2064 | } | ||
2065 | static inline u32 sim_dma_hi_r(void) | ||
2066 | { | ||
2067 | return 0x00000004; | ||
2068 | } | ||
2069 | static inline u32 sim_dma_hi_addr_s(void) | ||
2070 | { | ||
2071 | return 20; | ||
2072 | } | ||
2073 | static inline u32 sim_dma_hi_addr_f(u32 v) | ||
2074 | { | ||
2075 | return (v & 0xfffff) << 0; | ||
2076 | } | ||
2077 | static inline u32 sim_dma_hi_addr_m(void) | ||
2078 | { | ||
2079 | return 0xfffff << 0; | ||
2080 | } | ||
2081 | static inline u32 sim_dma_hi_addr_v(u32 r) | ||
2082 | { | ||
2083 | return (r >> 0) & 0xfffff; | ||
2084 | } | ||
2085 | static inline u32 sim_dma_hi_addr__init_v(void) | ||
2086 | { | ||
2087 | return 0x00000000; | ||
2088 | } | ||
2089 | static inline u32 sim_dma_hi_addr__init_f(void) | ||
2090 | { | ||
2091 | return 0x0; | ||
2092 | } | ||
2093 | static inline u32 sim_dma_hi_addr__prod_v(void) | ||
2094 | { | ||
2095 | return 0x00000000; | ||
2096 | } | ||
2097 | static inline u32 sim_dma_hi_addr__prod_f(void) | ||
2098 | { | ||
2099 | return 0x0; | ||
2100 | } | ||
2101 | static inline u32 sim_msg_signature_r(void) | ||
2102 | { | ||
2103 | return 0x00000000; | ||
2104 | } | ||
2105 | static inline u32 sim_msg_signature_valid_v(void) | ||
2106 | { | ||
2107 | return 0x43505256; | ||
2108 | } | ||
2109 | static inline u32 sim_msg_length_r(void) | ||
2110 | { | ||
2111 | return 0x00000004; | ||
2112 | } | ||
2113 | static inline u32 sim_msg_function_r(void) | ||
2114 | { | ||
2115 | return 0x00000008; | ||
2116 | } | ||
2117 | static inline u32 sim_msg_function_sim_escape_read_v(void) | ||
2118 | { | ||
2119 | return 0x00000023; | ||
2120 | } | ||
2121 | static inline u32 sim_msg_function_sim_escape_write_v(void) | ||
2122 | { | ||
2123 | return 0x00000024; | ||
2124 | } | ||
2125 | static inline u32 sim_msg_result_r(void) | ||
2126 | { | ||
2127 | return 0x0000000c; | ||
2128 | } | ||
2129 | static inline u32 sim_msg_result_success_v(void) | ||
2130 | { | ||
2131 | return 0x00000000; | ||
2132 | } | ||
2133 | static inline u32 sim_msg_result_rpc_pending_v(void) | ||
2134 | { | ||
2135 | return 0xFFFFFFFF; | ||
2136 | } | ||
2137 | static inline u32 sim_msg_sequence_r(void) | ||
2138 | { | ||
2139 | return 0x00000010; | ||
2140 | } | ||
2141 | static inline u32 sim_msg_spare_r(void) | ||
2142 | { | ||
2143 | return 0x00000014; | ||
2144 | } | ||
2145 | static inline u32 sim_msg_spare__init_v(void) | ||
2146 | { | ||
2147 | return 0x00000000; | ||
2148 | } | ||
2149 | |||
2150 | #endif /* __hw_sim_gk20a_h__ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h new file mode 100644 index 00000000..5d6397b4 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h | |||
@@ -0,0 +1,225 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_therm_gk20a_h_ | ||
51 | #define _hw_therm_gk20a_h_ | ||
52 | |||
53 | static inline u32 therm_use_a_r(void) | ||
54 | { | ||
55 | return 0x00020798; | ||
56 | } | ||
57 | static inline u32 therm_evt_ext_therm_0_r(void) | ||
58 | { | ||
59 | return 0x00020700; | ||
60 | } | ||
61 | static inline u32 therm_evt_ext_therm_1_r(void) | ||
62 | { | ||
63 | return 0x00020704; | ||
64 | } | ||
65 | static inline u32 therm_evt_ext_therm_2_r(void) | ||
66 | { | ||
67 | return 0x00020708; | ||
68 | } | ||
69 | static inline u32 therm_evt_ba_w0_t1h_r(void) | ||
70 | { | ||
71 | return 0x00020750; | ||
72 | } | ||
73 | static inline u32 therm_weight_1_r(void) | ||
74 | { | ||
75 | return 0x00020024; | ||
76 | } | ||
77 | static inline u32 therm_peakpower_config1_r(u32 i) | ||
78 | { | ||
79 | return 0x00020154 + i*4; | ||
80 | } | ||
81 | static inline u32 therm_peakpower_config1_window_period_2m_v(void) | ||
82 | { | ||
83 | return 0x0000000f; | ||
84 | } | ||
85 | static inline u32 therm_peakpower_config1_window_period_2m_f(void) | ||
86 | { | ||
87 | return 0xf; | ||
88 | } | ||
89 | static inline u32 therm_peakpower_config1_ba_sum_shift_s(void) | ||
90 | { | ||
91 | return 6; | ||
92 | } | ||
93 | static inline u32 therm_peakpower_config1_ba_sum_shift_f(u32 v) | ||
94 | { | ||
95 | return (v & 0x3f) << 8; | ||
96 | } | ||
97 | static inline u32 therm_peakpower_config1_ba_sum_shift_m(void) | ||
98 | { | ||
99 | return 0x3f << 8; | ||
100 | } | ||
101 | static inline u32 therm_peakpower_config1_ba_sum_shift_v(u32 r) | ||
102 | { | ||
103 | return (r >> 8) & 0x3f; | ||
104 | } | ||
105 | static inline u32 therm_peakpower_config1_ba_sum_shift_20_f(void) | ||
106 | { | ||
107 | return 0x1400; | ||
108 | } | ||
109 | static inline u32 therm_peakpower_config1_window_en_enabled_f(void) | ||
110 | { | ||
111 | return 0x80000000; | ||
112 | } | ||
113 | static inline u32 therm_peakpower_config2_r(u32 i) | ||
114 | { | ||
115 | return 0x00020170 + i*4; | ||
116 | } | ||
117 | static inline u32 therm_peakpower_config4_r(u32 i) | ||
118 | { | ||
119 | return 0x000201c0 + i*4; | ||
120 | } | ||
121 | static inline u32 therm_peakpower_config6_r(u32 i) | ||
122 | { | ||
123 | return 0x00020270 + i*4; | ||
124 | } | ||
125 | static inline u32 therm_peakpower_config8_r(u32 i) | ||
126 | { | ||
127 | return 0x000202e8 + i*4; | ||
128 | } | ||
129 | static inline u32 therm_peakpower_config9_r(u32 i) | ||
130 | { | ||
131 | return 0x000202f4 + i*4; | ||
132 | } | ||
133 | static inline u32 therm_config1_r(void) | ||
134 | { | ||
135 | return 0x00020050; | ||
136 | } | ||
137 | static inline u32 therm_gate_ctrl_r(u32 i) | ||
138 | { | ||
139 | return 0x00020200 + i*4; | ||
140 | } | ||
141 | static inline u32 therm_gate_ctrl_eng_clk_m(void) | ||
142 | { | ||
143 | return 0x3 << 0; | ||
144 | } | ||
145 | static inline u32 therm_gate_ctrl_eng_clk_run_f(void) | ||
146 | { | ||
147 | return 0x0; | ||
148 | } | ||
149 | static inline u32 therm_gate_ctrl_eng_clk_auto_f(void) | ||
150 | { | ||
151 | return 0x1; | ||
152 | } | ||
153 | static inline u32 therm_gate_ctrl_eng_clk_stop_f(void) | ||
154 | { | ||
155 | return 0x2; | ||
156 | } | ||
157 | static inline u32 therm_gate_ctrl_blk_clk_m(void) | ||
158 | { | ||
159 | return 0x3 << 2; | ||
160 | } | ||
161 | static inline u32 therm_gate_ctrl_blk_clk_run_f(void) | ||
162 | { | ||
163 | return 0x0; | ||
164 | } | ||
165 | static inline u32 therm_gate_ctrl_blk_clk_auto_f(void) | ||
166 | { | ||
167 | return 0x4; | ||
168 | } | ||
169 | static inline u32 therm_gate_ctrl_eng_pwr_m(void) | ||
170 | { | ||
171 | return 0x3 << 4; | ||
172 | } | ||
173 | static inline u32 therm_gate_ctrl_eng_pwr_auto_f(void) | ||
174 | { | ||
175 | return 0x10; | ||
176 | } | ||
177 | static inline u32 therm_gate_ctrl_eng_pwr_off_v(void) | ||
178 | { | ||
179 | return 0x00000002; | ||
180 | } | ||
181 | static inline u32 therm_gate_ctrl_eng_pwr_off_f(void) | ||
182 | { | ||
183 | return 0x20; | ||
184 | } | ||
185 | static inline u32 therm_gate_ctrl_eng_idle_filt_exp_f(u32 v) | ||
186 | { | ||
187 | return (v & 0x1f) << 8; | ||
188 | } | ||
189 | static inline u32 therm_gate_ctrl_eng_idle_filt_exp_m(void) | ||
190 | { | ||
191 | return 0x1f << 8; | ||
192 | } | ||
193 | static inline u32 therm_gate_ctrl_eng_idle_filt_mant_f(u32 v) | ||
194 | { | ||
195 | return (v & 0x7) << 13; | ||
196 | } | ||
197 | static inline u32 therm_gate_ctrl_eng_idle_filt_mant_m(void) | ||
198 | { | ||
199 | return 0x7 << 13; | ||
200 | } | ||
201 | static inline u32 therm_gate_ctrl_eng_delay_after_f(u32 v) | ||
202 | { | ||
203 | return (v & 0xf) << 20; | ||
204 | } | ||
205 | static inline u32 therm_gate_ctrl_eng_delay_after_m(void) | ||
206 | { | ||
207 | return 0xf << 20; | ||
208 | } | ||
209 | static inline u32 therm_fecs_idle_filter_r(void) | ||
210 | { | ||
211 | return 0x00020288; | ||
212 | } | ||
213 | static inline u32 therm_fecs_idle_filter_value_m(void) | ||
214 | { | ||
215 | return 0xffffffff << 0; | ||
216 | } | ||
217 | static inline u32 therm_hubmmu_idle_filter_r(void) | ||
218 | { | ||
219 | return 0x0002028c; | ||
220 | } | ||
221 | static inline u32 therm_hubmmu_idle_filter_value_m(void) | ||
222 | { | ||
223 | return 0xffffffff << 0; | ||
224 | } | ||
225 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h new file mode 100644 index 00000000..22bc50ac --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h | |||
@@ -0,0 +1,101 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_timer_gk20a_h_ | ||
51 | #define _hw_timer_gk20a_h_ | ||
52 | |||
53 | static inline u32 timer_pri_timeout_r(void) | ||
54 | { | ||
55 | return 0x00009080; | ||
56 | } | ||
57 | static inline u32 timer_pri_timeout_period_f(u32 v) | ||
58 | { | ||
59 | return (v & 0xffffff) << 0; | ||
60 | } | ||
61 | static inline u32 timer_pri_timeout_period_m(void) | ||
62 | { | ||
63 | return 0xffffff << 0; | ||
64 | } | ||
65 | static inline u32 timer_pri_timeout_period_v(u32 r) | ||
66 | { | ||
67 | return (r >> 0) & 0xffffff; | ||
68 | } | ||
69 | static inline u32 timer_pri_timeout_en_f(u32 v) | ||
70 | { | ||
71 | return (v & 0x1) << 31; | ||
72 | } | ||
73 | static inline u32 timer_pri_timeout_en_m(void) | ||
74 | { | ||
75 | return 0x1 << 31; | ||
76 | } | ||
77 | static inline u32 timer_pri_timeout_en_v(u32 r) | ||
78 | { | ||
79 | return (r >> 31) & 0x1; | ||
80 | } | ||
81 | static inline u32 timer_pri_timeout_en_en_enabled_f(void) | ||
82 | { | ||
83 | return 0x80000000; | ||
84 | } | ||
85 | static inline u32 timer_pri_timeout_en_en_disabled_f(void) | ||
86 | { | ||
87 | return 0x0; | ||
88 | } | ||
89 | static inline u32 timer_pri_timeout_save_0_r(void) | ||
90 | { | ||
91 | return 0x00009084; | ||
92 | } | ||
93 | static inline u32 timer_pri_timeout_save_1_r(void) | ||
94 | { | ||
95 | return 0x00009088; | ||
96 | } | ||
97 | static inline u32 timer_pri_timeout_fecs_errcode_r(void) | ||
98 | { | ||
99 | return 0x0000908c; | ||
100 | } | ||
101 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h new file mode 100644 index 00000000..c2922814 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h | |||
@@ -0,0 +1,137 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_top_gk20a_h_ | ||
51 | #define _hw_top_gk20a_h_ | ||
52 | |||
53 | static inline u32 top_num_gpcs_r(void) | ||
54 | { | ||
55 | return 0x00022430; | ||
56 | } | ||
57 | static inline u32 top_num_gpcs_value_v(u32 r) | ||
58 | { | ||
59 | return (r >> 0) & 0x1f; | ||
60 | } | ||
61 | static inline u32 top_tpc_per_gpc_r(void) | ||
62 | { | ||
63 | return 0x00022434; | ||
64 | } | ||
65 | static inline u32 top_tpc_per_gpc_value_v(u32 r) | ||
66 | { | ||
67 | return (r >> 0) & 0x1f; | ||
68 | } | ||
69 | static inline u32 top_num_fbps_r(void) | ||
70 | { | ||
71 | return 0x00022438; | ||
72 | } | ||
73 | static inline u32 top_num_fbps_value_v(u32 r) | ||
74 | { | ||
75 | return (r >> 0) & 0x1f; | ||
76 | } | ||
77 | static inline u32 top_fs_status_r(void) | ||
78 | { | ||
79 | return 0x00022500; | ||
80 | } | ||
81 | static inline u32 top_device_info_r(u32 i) | ||
82 | { | ||
83 | return 0x00022700 + i*4; | ||
84 | } | ||
85 | static inline u32 top_device_info__size_1_v(void) | ||
86 | { | ||
87 | return 0x00000040; | ||
88 | } | ||
89 | static inline u32 top_device_info_chain_v(u32 r) | ||
90 | { | ||
91 | return (r >> 31) & 0x1; | ||
92 | } | ||
93 | static inline u32 top_device_info_chain_enable_v(void) | ||
94 | { | ||
95 | return 0x00000001; | ||
96 | } | ||
97 | static inline u32 top_device_info_engine_enum_v(u32 r) | ||
98 | { | ||
99 | return (r >> 26) & 0xf; | ||
100 | } | ||
101 | static inline u32 top_device_info_runlist_enum_v(u32 r) | ||
102 | { | ||
103 | return (r >> 21) & 0xf; | ||
104 | } | ||
105 | static inline u32 top_device_info_type_enum_v(u32 r) | ||
106 | { | ||
107 | return (r >> 2) & 0x1fffffff; | ||
108 | } | ||
109 | static inline u32 top_device_info_type_enum_graphics_v(void) | ||
110 | { | ||
111 | return 0x00000000; | ||
112 | } | ||
113 | static inline u32 top_device_info_type_enum_graphics_f(void) | ||
114 | { | ||
115 | return 0x0; | ||
116 | } | ||
117 | static inline u32 top_device_info_type_enum_copy0_v(void) | ||
118 | { | ||
119 | return 0x00000001; | ||
120 | } | ||
121 | static inline u32 top_device_info_type_enum_copy0_f(void) | ||
122 | { | ||
123 | return 0x4; | ||
124 | } | ||
125 | static inline u32 top_device_info_entry_v(u32 r) | ||
126 | { | ||
127 | return (r >> 0) & 0x3; | ||
128 | } | ||
129 | static inline u32 top_device_info_entry_not_valid_v(void) | ||
130 | { | ||
131 | return 0x00000000; | ||
132 | } | ||
133 | static inline u32 top_device_info_entry_enum_v(void) | ||
134 | { | ||
135 | return 0x00000002; | ||
136 | } | ||
137 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h new file mode 100644 index 00000000..826e9bd1 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h | |||
@@ -0,0 +1,301 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_trim_gk20a_h_ | ||
51 | #define _hw_trim_gk20a_h_ | ||
52 | |||
53 | static inline u32 trim_sys_gpcpll_cfg_r(void) | ||
54 | { | ||
55 | return 0x00137000; | ||
56 | } | ||
57 | static inline u32 trim_sys_gpcpll_cfg_enable_m(void) | ||
58 | { | ||
59 | return 0x1 << 0; | ||
60 | } | ||
61 | static inline u32 trim_sys_gpcpll_cfg_enable_v(u32 r) | ||
62 | { | ||
63 | return (r >> 0) & 0x1; | ||
64 | } | ||
65 | static inline u32 trim_sys_gpcpll_cfg_enable_no_f(void) | ||
66 | { | ||
67 | return 0x0; | ||
68 | } | ||
69 | static inline u32 trim_sys_gpcpll_cfg_enable_yes_f(void) | ||
70 | { | ||
71 | return 0x1; | ||
72 | } | ||
73 | static inline u32 trim_sys_gpcpll_cfg_iddq_m(void) | ||
74 | { | ||
75 | return 0x1 << 1; | ||
76 | } | ||
77 | static inline u32 trim_sys_gpcpll_cfg_iddq_v(u32 r) | ||
78 | { | ||
79 | return (r >> 1) & 0x1; | ||
80 | } | ||
81 | static inline u32 trim_sys_gpcpll_cfg_iddq_power_on_v(void) | ||
82 | { | ||
83 | return 0x00000000; | ||
84 | } | ||
85 | static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_m(void) | ||
86 | { | ||
87 | return 0x1 << 4; | ||
88 | } | ||
89 | static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_power_on_f(void) | ||
90 | { | ||
91 | return 0x0; | ||
92 | } | ||
93 | static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_power_off_f(void) | ||
94 | { | ||
95 | return 0x10; | ||
96 | } | ||
97 | static inline u32 trim_sys_gpcpll_cfg_pll_lock_v(u32 r) | ||
98 | { | ||
99 | return (r >> 17) & 0x1; | ||
100 | } | ||
101 | static inline u32 trim_sys_gpcpll_cfg_pll_lock_true_f(void) | ||
102 | { | ||
103 | return 0x20000; | ||
104 | } | ||
105 | static inline u32 trim_sys_gpcpll_coeff_r(void) | ||
106 | { | ||
107 | return 0x00137004; | ||
108 | } | ||
109 | static inline u32 trim_sys_gpcpll_coeff_mdiv_f(u32 v) | ||
110 | { | ||
111 | return (v & 0xff) << 0; | ||
112 | } | ||
113 | static inline u32 trim_sys_gpcpll_coeff_mdiv_v(u32 r) | ||
114 | { | ||
115 | return (r >> 0) & 0xff; | ||
116 | } | ||
117 | static inline u32 trim_sys_gpcpll_coeff_ndiv_f(u32 v) | ||
118 | { | ||
119 | return (v & 0xff) << 8; | ||
120 | } | ||
121 | static inline u32 trim_sys_gpcpll_coeff_ndiv_m(void) | ||
122 | { | ||
123 | return 0xff << 8; | ||
124 | } | ||
125 | static inline u32 trim_sys_gpcpll_coeff_ndiv_v(u32 r) | ||
126 | { | ||
127 | return (r >> 8) & 0xff; | ||
128 | } | ||
129 | static inline u32 trim_sys_gpcpll_coeff_pldiv_f(u32 v) | ||
130 | { | ||
131 | return (v & 0x3f) << 16; | ||
132 | } | ||
133 | static inline u32 trim_sys_gpcpll_coeff_pldiv_v(u32 r) | ||
134 | { | ||
135 | return (r >> 16) & 0x3f; | ||
136 | } | ||
137 | static inline u32 trim_sys_sel_vco_r(void) | ||
138 | { | ||
139 | return 0x00137100; | ||
140 | } | ||
141 | static inline u32 trim_sys_sel_vco_gpc2clk_out_m(void) | ||
142 | { | ||
143 | return 0x1 << 0; | ||
144 | } | ||
145 | static inline u32 trim_sys_sel_vco_gpc2clk_out_init_v(void) | ||
146 | { | ||
147 | return 0x00000000; | ||
148 | } | ||
149 | static inline u32 trim_sys_sel_vco_gpc2clk_out_init_f(void) | ||
150 | { | ||
151 | return 0x0; | ||
152 | } | ||
153 | static inline u32 trim_sys_sel_vco_gpc2clk_out_bypass_f(void) | ||
154 | { | ||
155 | return 0x0; | ||
156 | } | ||
157 | static inline u32 trim_sys_sel_vco_gpc2clk_out_vco_f(void) | ||
158 | { | ||
159 | return 0x1; | ||
160 | } | ||
161 | static inline u32 trim_sys_gpc2clk_out_r(void) | ||
162 | { | ||
163 | return 0x00137250; | ||
164 | } | ||
165 | static inline u32 trim_sys_gpc2clk_out_bypdiv_s(void) | ||
166 | { | ||
167 | return 6; | ||
168 | } | ||
169 | static inline u32 trim_sys_gpc2clk_out_bypdiv_f(u32 v) | ||
170 | { | ||
171 | return (v & 0x3f) << 0; | ||
172 | } | ||
173 | static inline u32 trim_sys_gpc2clk_out_bypdiv_m(void) | ||
174 | { | ||
175 | return 0x3f << 0; | ||
176 | } | ||
177 | static inline u32 trim_sys_gpc2clk_out_bypdiv_v(u32 r) | ||
178 | { | ||
179 | return (r >> 0) & 0x3f; | ||
180 | } | ||
181 | static inline u32 trim_sys_gpc2clk_out_bypdiv_by31_f(void) | ||
182 | { | ||
183 | return 0x3c; | ||
184 | } | ||
185 | static inline u32 trim_sys_gpc2clk_out_vcodiv_s(void) | ||
186 | { | ||
187 | return 6; | ||
188 | } | ||
189 | static inline u32 trim_sys_gpc2clk_out_vcodiv_f(u32 v) | ||
190 | { | ||
191 | return (v & 0x3f) << 8; | ||
192 | } | ||
193 | static inline u32 trim_sys_gpc2clk_out_vcodiv_m(void) | ||
194 | { | ||
195 | return 0x3f << 8; | ||
196 | } | ||
197 | static inline u32 trim_sys_gpc2clk_out_vcodiv_v(u32 r) | ||
198 | { | ||
199 | return (r >> 8) & 0x3f; | ||
200 | } | ||
201 | static inline u32 trim_sys_gpc2clk_out_vcodiv_by1_f(void) | ||
202 | { | ||
203 | return 0x0; | ||
204 | } | ||
205 | static inline u32 trim_sys_gpc2clk_out_sdiv14_m(void) | ||
206 | { | ||
207 | return 0x1 << 31; | ||
208 | } | ||
209 | static inline u32 trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f(void) | ||
210 | { | ||
211 | return 0x80000000; | ||
212 | } | ||
213 | static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_r(u32 i) | ||
214 | { | ||
215 | return 0x00134124 + i*512; | ||
216 | } | ||
217 | static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(u32 v) | ||
218 | { | ||
219 | return (v & 0x3fff) << 0; | ||
220 | } | ||
221 | static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f(void) | ||
222 | { | ||
223 | return 0x10000; | ||
224 | } | ||
225 | static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f(void) | ||
226 | { | ||
227 | return 0x100000; | ||
228 | } | ||
229 | static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f(void) | ||
230 | { | ||
231 | return 0x1000000; | ||
232 | } | ||
233 | static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_r(u32 i) | ||
234 | { | ||
235 | return 0x00134128 + i*512; | ||
236 | } | ||
237 | static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(u32 r) | ||
238 | { | ||
239 | return (r >> 0) & 0xfffff; | ||
240 | } | ||
241 | static inline u32 trim_sys_gpcpll_cfg2_r(void) | ||
242 | { | ||
243 | return 0x0013700c; | ||
244 | } | ||
245 | static inline u32 trim_sys_gpcpll_cfg2_pll_stepa_f(u32 v) | ||
246 | { | ||
247 | return (v & 0xff) << 24; | ||
248 | } | ||
249 | static inline u32 trim_sys_gpcpll_cfg2_pll_stepa_m(void) | ||
250 | { | ||
251 | return 0xff << 24; | ||
252 | } | ||
253 | static inline u32 trim_sys_gpcpll_cfg3_r(void) | ||
254 | { | ||
255 | return 0x00137018; | ||
256 | } | ||
257 | static inline u32 trim_sys_gpcpll_cfg3_pll_stepb_f(u32 v) | ||
258 | { | ||
259 | return (v & 0xff) << 16; | ||
260 | } | ||
261 | static inline u32 trim_sys_gpcpll_cfg3_pll_stepb_m(void) | ||
262 | { | ||
263 | return 0xff << 16; | ||
264 | } | ||
265 | static inline u32 trim_sys_gpcpll_ndiv_slowdown_r(void) | ||
266 | { | ||
267 | return 0x0013701c; | ||
268 | } | ||
269 | static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(void) | ||
270 | { | ||
271 | return 0x1 << 22; | ||
272 | } | ||
273 | static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f(void) | ||
274 | { | ||
275 | return 0x400000; | ||
276 | } | ||
277 | static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f(void) | ||
278 | { | ||
279 | return 0x0; | ||
280 | } | ||
281 | static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(void) | ||
282 | { | ||
283 | return 0x1 << 31; | ||
284 | } | ||
285 | static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f(void) | ||
286 | { | ||
287 | return 0x80000000; | ||
288 | } | ||
289 | static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f(void) | ||
290 | { | ||
291 | return 0x0; | ||
292 | } | ||
293 | static inline u32 trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r(void) | ||
294 | { | ||
295 | return 0x001328a0; | ||
296 | } | ||
297 | static inline u32 trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(u32 r) | ||
298 | { | ||
299 | return (r >> 24) & 0x1; | ||
300 | } | ||
301 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/kind_gk20a.c b/drivers/gpu/nvgpu/gk20a/kind_gk20a.c new file mode 100644 index 00000000..b0a74056 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/kind_gk20a.c | |||
@@ -0,0 +1,424 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/kind_gk20a.c | ||
3 | * | ||
4 | * GK20A memory kind management | ||
5 | * | ||
6 | * Copyright (c) 2011, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | #include <linux/bitops.h> | ||
22 | |||
23 | #include "hw_gmmu_gk20a.h" | ||
24 | #include "kind_gk20a.h" | ||
25 | |||
26 | /* TBD: generate these from kind_macros.h */ | ||
27 | |||
28 | /* TBD: not sure on the work creation for gk20a, doubtful */ | ||
29 | static inline bool gk20a_kind_work_creation_sked(u8 k) | ||
30 | { | ||
31 | return false; | ||
32 | } | ||
33 | static inline bool gk20a_kind_work_creation_host(u8 k) | ||
34 | { | ||
35 | return false; | ||
36 | } | ||
37 | |||
38 | static inline bool gk20a_kind_work_creation(u8 k) | ||
39 | { | ||
40 | return gk20a_kind_work_creation_sked(k) || | ||
41 | gk20a_kind_work_creation_host(k); | ||
42 | } | ||
43 | |||
44 | /* note: taken from the !2cs_compression case */ | ||
45 | static inline bool gk20a_kind_supported(u8 k) | ||
46 | { | ||
47 | return gk20a_kind_work_creation(k) || | ||
48 | (k == gmmu_pte_kind_invalid_v()) || | ||
49 | (k == gmmu_pte_kind_pitch_v()) || | ||
50 | (k >= gmmu_pte_kind_z16_v() && | ||
51 | k <= gmmu_pte_kind_z16_ms8_2c_v()) || | ||
52 | (k >= gmmu_pte_kind_z16_2z_v() && | ||
53 | k <= gmmu_pte_kind_z16_ms8_2z_v()) || | ||
54 | (k == gmmu_pte_kind_s8z24_v()) || | ||
55 | (k >= gmmu_pte_kind_s8z24_2cz_v() && | ||
56 | k <= gmmu_pte_kind_s8z24_ms8_2cz_v()) || | ||
57 | (k >= gmmu_pte_kind_v8z24_ms4_vc12_v() && | ||
58 | k <= gmmu_pte_kind_v8z24_ms8_vc24_v()) || | ||
59 | (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() && | ||
60 | k <= gmmu_pte_kind_v8z24_ms8_vc24_2zv_v()) || | ||
61 | (k == gmmu_pte_kind_z24s8_v()) || | ||
62 | (k >= gmmu_pte_kind_z24s8_2cz_v() && | ||
63 | k <= gmmu_pte_kind_z24s8_ms8_2cz_v()) || | ||
64 | (k == gmmu_pte_kind_zf32_v()) || | ||
65 | (k >= gmmu_pte_kind_zf32_2cz_v() && | ||
66 | k <= gmmu_pte_kind_zf32_ms8_2cz_v()) || | ||
67 | (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v() && | ||
68 | k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v()) || | ||
69 | (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cszv_v() && | ||
70 | k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v()) || | ||
71 | (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cszv_v() && | ||
72 | k <= gmmu_pte_kind_zf32_x24s8_v()) || | ||
73 | (k >= gmmu_pte_kind_zf32_x24s8_2cszv_v() && | ||
74 | k <= gmmu_pte_kind_zf32_x24s8_ms8_2cszv_v()) || | ||
75 | (k == gmmu_pte_kind_generic_16bx2_v()) || | ||
76 | (k == gmmu_pte_kind_c32_2c_v()) || | ||
77 | (k == gmmu_pte_kind_c32_2cra_v()) || | ||
78 | (k == gmmu_pte_kind_c32_ms2_2c_v()) || | ||
79 | (k == gmmu_pte_kind_c32_ms2_2cra_v()) || | ||
80 | (k >= gmmu_pte_kind_c32_ms4_2c_v() && | ||
81 | k <= gmmu_pte_kind_c32_ms4_2cbr_v()) || | ||
82 | (k >= gmmu_pte_kind_c32_ms4_2cra_v() && | ||
83 | k <= gmmu_pte_kind_c64_2c_v()) || | ||
84 | (k == gmmu_pte_kind_c64_2cra_v()) || | ||
85 | (k == gmmu_pte_kind_c64_ms2_2c_v()) || | ||
86 | (k == gmmu_pte_kind_c64_ms2_2cra_v()) || | ||
87 | (k >= gmmu_pte_kind_c64_ms4_2c_v() && | ||
88 | k <= gmmu_pte_kind_c64_ms4_2cbr_v()) || | ||
89 | (k >= gmmu_pte_kind_c64_ms4_2cra_v() && | ||
90 | k <= gmmu_pte_kind_c128_ms8_ms16_2cr_v()) || | ||
91 | (k == gmmu_pte_kind_pitch_no_swizzle_v()); | ||
92 | } | ||
93 | |||
94 | static inline bool gk20a_kind_z(u8 k) | ||
95 | { | ||
96 | return (k >= gmmu_pte_kind_z16_v() && | ||
97 | k <= gmmu_pte_kind_v8z24_ms8_vc24_v()) || | ||
98 | (k >= gmmu_pte_kind_v8z24_ms4_vc12_1zv_v() && | ||
99 | k <= gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()) || | ||
100 | (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() && | ||
101 | k <= gmmu_pte_kind_z24v8_ms8_vc24_v()) || | ||
102 | (k >= gmmu_pte_kind_z24v8_ms4_vc12_1zv_v() && | ||
103 | k <= gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()) || | ||
104 | (k >= gmmu_pte_kind_z24v8_ms4_vc12_2czv_v() && | ||
105 | k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v()) || | ||
106 | (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1zv_v() && | ||
107 | k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v()) || | ||
108 | (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1zv_v() && | ||
109 | k <= gmmu_pte_kind_zf32_x24s8_ms16_1cs_v()) | ||
110 | /* || | ||
111 | (k >= gmmu_pte_kind_zv32_x24s8_2cszv_v() && | ||
112 | k <= gmmu_pte_kind_xf32_x24s8_ms16_2cs_v())*/; | ||
113 | } | ||
114 | |||
115 | static inline bool gk20a_kind_c(u8 k) | ||
116 | { | ||
117 | return gk20a_kind_work_creation(k) || | ||
118 | (k == gmmu_pte_kind_pitch_v()) || | ||
119 | (k == gmmu_pte_kind_generic_16bx2_v()) || | ||
120 | (k >= gmmu_pte_kind_c32_2c_v() && | ||
121 | k <= gmmu_pte_kind_c32_ms2_2cbr_v()) || | ||
122 | (k == gmmu_pte_kind_c32_ms2_2cra_v()) || | ||
123 | (k >= gmmu_pte_kind_c32_ms4_2c_v() && | ||
124 | k <= gmmu_pte_kind_c64_ms2_2cbr_v()) || | ||
125 | (k == gmmu_pte_kind_c64_ms2_2cra_v()) || | ||
126 | (k >= gmmu_pte_kind_c64_ms4_2c_v() && | ||
127 | k <= gmmu_pte_kind_pitch_no_swizzle_v()); | ||
128 | } | ||
129 | |||
130 | static inline bool gk20a_kind_compressible(u8 k) | ||
131 | { | ||
132 | return (k >= gmmu_pte_kind_z16_2c_v() && | ||
133 | k <= gmmu_pte_kind_z16_ms16_4cz_v()) || | ||
134 | (k >= gmmu_pte_kind_s8z24_1z_v() && | ||
135 | k <= gmmu_pte_kind_s8z24_ms16_4cszv_v()) || | ||
136 | (k >= gmmu_pte_kind_v8z24_ms4_vc12_1zv_v() && | ||
137 | k <= gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()) || | ||
138 | (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() && | ||
139 | k <= gmmu_pte_kind_v8z24_ms8_vc24_4cszv_v()) || | ||
140 | (k >= gmmu_pte_kind_z24s8_1z_v() && | ||
141 | k <= gmmu_pte_kind_z24s8_ms16_4cszv_v()) || | ||
142 | (k >= gmmu_pte_kind_z24v8_ms4_vc12_1zv_v() && | ||
143 | k <= gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()) || | ||
144 | (k >= gmmu_pte_kind_z24v8_ms4_vc12_2czv_v() && | ||
145 | k <= gmmu_pte_kind_z24v8_ms8_vc24_4cszv_v()) || | ||
146 | (k >= gmmu_pte_kind_zf32_1z_v() && | ||
147 | k <= gmmu_pte_kind_zf32_ms16_2cz_v()) || | ||
148 | (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1cs_v() && | ||
149 | k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v()) || | ||
150 | (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1zv_v() && | ||
151 | k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v()) || | ||
152 | (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1cs_v() && | ||
153 | k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v()) || | ||
154 | (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1zv_v() && | ||
155 | k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v()) || | ||
156 | (k >= gmmu_pte_kind_zf32_x24s8_1cs_v() && | ||
157 | k <= gmmu_pte_kind_zf32_x24s8_ms16_1cs_v()) || | ||
158 | (k >= gmmu_pte_kind_zf32_x24s8_2cszv_v() && | ||
159 | k <= gmmu_pte_kind_c32_ms2_2cbr_v()) || | ||
160 | (k == gmmu_pte_kind_c32_ms2_2cra_v()) || | ||
161 | (k >= gmmu_pte_kind_c32_ms4_2c_v() && | ||
162 | k <= gmmu_pte_kind_c64_ms2_2cbr_v()) || | ||
163 | (k == gmmu_pte_kind_c64_ms2_2cra_v()) || | ||
164 | (k >= gmmu_pte_kind_c64_ms4_2c_v() && | ||
165 | k <= gmmu_pte_kind_c128_ms8_ms16_2cr_v()); | ||
166 | } | ||
167 | |||
168 | static inline bool gk20a_kind_zbc(u8 k) | ||
169 | { | ||
170 | return (k >= gmmu_pte_kind_z16_2c_v() && | ||
171 | k <= gmmu_pte_kind_z16_ms16_2c_v()) || | ||
172 | (k >= gmmu_pte_kind_z16_4cz_v() && | ||
173 | k <= gmmu_pte_kind_z16_ms16_4cz_v()) || | ||
174 | (k >= gmmu_pte_kind_s8z24_2cz_v() && | ||
175 | k <= gmmu_pte_kind_s8z24_ms16_4cszv_v()) || | ||
176 | (k >= gmmu_pte_kind_v8z24_ms4_vc12_2cs_v() && | ||
177 | k <= gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()) || | ||
178 | (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() && | ||
179 | k <= gmmu_pte_kind_v8z24_ms8_vc24_2czv_v()) || | ||
180 | (k >= gmmu_pte_kind_v8z24_ms4_vc12_4cszv_v() && | ||
181 | k <= gmmu_pte_kind_v8z24_ms8_vc24_4cszv_v()) || | ||
182 | (k >= gmmu_pte_kind_z24s8_2cs_v() && | ||
183 | k <= gmmu_pte_kind_z24s8_ms16_4cszv_v()) || | ||
184 | (k >= gmmu_pte_kind_z24v8_ms4_vc12_2cs_v() && | ||
185 | k <= gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()) || | ||
186 | (k >= gmmu_pte_kind_z24v8_ms4_vc12_2czv_v() && | ||
187 | k <= gmmu_pte_kind_z24v8_ms8_vc24_2czv_v()) || | ||
188 | (k >= gmmu_pte_kind_z24v8_ms4_vc12_4cszv_v() && | ||
189 | k <= gmmu_pte_kind_z24v8_ms8_vc24_4cszv_v()) || | ||
190 | (k >= gmmu_pte_kind_zf32_2cs_v() && | ||
191 | k <= gmmu_pte_kind_zf32_ms16_2cz_v()) || | ||
192 | (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1cs_v() && | ||
193 | k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v()) || | ||
194 | (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1czv_v() && | ||
195 | k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v()) || | ||
196 | (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1cs_v() && | ||
197 | k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v()) || | ||
198 | (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1czv_v() && | ||
199 | k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v()) || | ||
200 | (k >= gmmu_pte_kind_zf32_x24s8_1cs_v() && | ||
201 | k <= gmmu_pte_kind_zf32_x24s8_ms16_1cs_v()) || | ||
202 | (k >= gmmu_pte_kind_zf32_x24s8_2cszv_v() && | ||
203 | k <= gmmu_pte_kind_c32_2cra_v()) || | ||
204 | (k >= gmmu_pte_kind_c32_ms2_2c_v() && | ||
205 | k <= gmmu_pte_kind_c32_ms2_2cbr_v()) || | ||
206 | (k == gmmu_pte_kind_c32_ms2_2cra_v()) || | ||
207 | (k >= gmmu_pte_kind_c32_ms4_2c_v() && | ||
208 | k <= gmmu_pte_kind_c32_ms4_2cra_v()) || | ||
209 | (k >= gmmu_pte_kind_c32_ms8_ms16_2c_v() && | ||
210 | k <= gmmu_pte_kind_c64_2cra_v()) || | ||
211 | (k >= gmmu_pte_kind_c64_ms2_2c_v() && | ||
212 | k <= gmmu_pte_kind_c64_ms2_2cbr_v()) || | ||
213 | (k == gmmu_pte_kind_c64_ms2_2cra_v()) || | ||
214 | (k >= gmmu_pte_kind_c64_ms4_2c_v() && | ||
215 | k <= gmmu_pte_kind_c64_ms4_2cra_v()) || | ||
216 | (k >= gmmu_pte_kind_c64_ms8_ms16_2c_v() && | ||
217 | k <= gmmu_pte_kind_c128_ms8_ms16_2cr_v()); | ||
218 | } | ||
219 | |||
220 | u8 gk20a_uc_kind_map[256]; | ||
221 | void gk20a_init_uncompressed_kind_map(void) | ||
222 | { | ||
223 | int i; | ||
224 | for (i = 0; i < 256; i++) | ||
225 | gk20a_uc_kind_map[i] = gmmu_pte_kind_invalid_v(); | ||
226 | |||
227 | gk20a_uc_kind_map[gmmu_pte_kind_z16_v()] = | ||
228 | gk20a_uc_kind_map[gmmu_pte_kind_z16_2c_v()] = | ||
229 | gk20a_uc_kind_map[gmmu_pte_kind_z16_ms2_2c_v()] = | ||
230 | gk20a_uc_kind_map[gmmu_pte_kind_z16_ms4_2c_v()] = | ||
231 | gk20a_uc_kind_map[gmmu_pte_kind_z16_ms8_2c_v()] = | ||
232 | gk20a_uc_kind_map[gmmu_pte_kind_z16_2z_v()] = | ||
233 | gk20a_uc_kind_map[gmmu_pte_kind_z16_ms2_2z_v()] = | ||
234 | gk20a_uc_kind_map[gmmu_pte_kind_z16_ms4_2z_v()] = | ||
235 | gk20a_uc_kind_map[gmmu_pte_kind_z16_ms8_2z_v()] = | ||
236 | gmmu_pte_kind_z16_v(); | ||
237 | |||
238 | gk20a_uc_kind_map[gmmu_pte_kind_s8z24_v()] = | ||
239 | gk20a_uc_kind_map[gmmu_pte_kind_s8z24_2cz_v()] = | ||
240 | gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms2_2cz_v()] = | ||
241 | gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms4_2cz_v()] = | ||
242 | gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms8_2cz_v()] = | ||
243 | gk20a_uc_kind_map[gmmu_pte_kind_s8z24_2cs_v()] = | ||
244 | gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms2_2cs_v()] = | ||
245 | gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms4_2cs_v()] = | ||
246 | gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms8_2cs_v()] = | ||
247 | gmmu_pte_kind_s8z24_v(); | ||
248 | |||
249 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_v()] = | ||
250 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_2cs_v()] = | ||
251 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_2czv_v()] = | ||
252 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_2zv_v()] = | ||
253 | gmmu_pte_kind_v8z24_ms4_vc4_v(); | ||
254 | |||
255 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_v()] = | ||
256 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_2cs_v()] = | ||
257 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_2czv_v()] = | ||
258 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_2zv_v()] = | ||
259 | gmmu_pte_kind_v8z24_ms8_vc8_v(); | ||
260 | |||
261 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_v()] = | ||
262 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_2cs_v()] = | ||
263 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_2czv_v()] = | ||
264 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_2zv_v()] = | ||
265 | gmmu_pte_kind_v8z24_ms4_vc12_v(); | ||
266 | |||
267 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_v()] = | ||
268 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()] = | ||
269 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_2czv_v()] = | ||
270 | gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_2zv_v()] = | ||
271 | gmmu_pte_kind_v8z24_ms8_vc24_v(); | ||
272 | |||
273 | gk20a_uc_kind_map[gmmu_pte_kind_z24s8_v()] = | ||
274 | gk20a_uc_kind_map[gmmu_pte_kind_z24s8_2cs_v()] = | ||
275 | gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms2_2cs_v()] = | ||
276 | gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms4_2cs_v()] = | ||
277 | gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms8_2cs_v()] = | ||
278 | gk20a_uc_kind_map[gmmu_pte_kind_z24s8_2cz_v()] = | ||
279 | gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms2_2cz_v()] = | ||
280 | gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms4_2cz_v()] = | ||
281 | gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms8_2cz_v()] = | ||
282 | gmmu_pte_kind_z24s8_v(); | ||
283 | |||
284 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_v()] = | ||
285 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_2cs_v()] = | ||
286 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms2_2cs_v()] = | ||
287 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms4_2cs_v()] = | ||
288 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms8_2cs_v()] = | ||
289 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_2cz_v()] = | ||
290 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms2_2cz_v()] = | ||
291 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms4_2cz_v()] = | ||
292 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms8_2cz_v()] = | ||
293 | gmmu_pte_kind_zf32_v(); | ||
294 | |||
295 | gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v()] = | ||
296 | gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cs_v()] = | ||
297 | gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cszv_v()] = | ||
298 | gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v(); | ||
299 | |||
300 | gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_v()] = | ||
301 | gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cs_v()] = | ||
302 | gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cszv_v()] = | ||
303 | gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_v(); | ||
304 | |||
305 | gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_v()] = | ||
306 | gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cs_v()] = | ||
307 | gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cszv_v()] = | ||
308 | gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_v(); | ||
309 | |||
310 | gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v()] = | ||
311 | gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cs_v()] = | ||
312 | gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v()] = | ||
313 | gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v(); | ||
314 | |||
315 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_v()] = | ||
316 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cs_v()] = | ||
317 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cszv_v()] = | ||
318 | gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_v(); | ||
319 | |||
320 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_v()] = | ||
321 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cs_v()] = | ||
322 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cszv_v()] = | ||
323 | gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_v(); | ||
324 | |||
325 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_v()] = | ||
326 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cs_v()] = | ||
327 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cszv_v()] = | ||
328 | gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_v(); | ||
329 | |||
330 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v()] = | ||
331 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cs_v()] = | ||
332 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v()] = | ||
333 | gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v(); | ||
334 | |||
335 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_v()] = | ||
336 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_2cszv_v()] = | ||
337 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms2_2cszv_v()] = | ||
338 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms4_2cszv_v()] = | ||
339 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms8_2cszv_v()] = | ||
340 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_2cs_v()] = | ||
341 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms2_2cs_v()] = | ||
342 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms4_2cs_v()] = | ||
343 | gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms8_2cs_v()] = | ||
344 | gmmu_pte_kind_zf32_x24s8_v(); | ||
345 | |||
346 | gk20a_uc_kind_map[gmmu_pte_kind_c32_2c_v()] = | ||
347 | gk20a_uc_kind_map[gmmu_pte_kind_c32_2cba_v()] = | ||
348 | gk20a_uc_kind_map[gmmu_pte_kind_c32_2cra_v()] = | ||
349 | gk20a_uc_kind_map[gmmu_pte_kind_c32_2bra_v()] = | ||
350 | gk20a_uc_kind_map[gmmu_pte_kind_c32_ms2_2c_v()] = | ||
351 | gk20a_uc_kind_map[gmmu_pte_kind_c32_ms2_2cra_v()] = | ||
352 | gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2c_v()] = | ||
353 | gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2cbr_v()] = | ||
354 | gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2cba_v()] = | ||
355 | gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2cra_v()] = | ||
356 | gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2bra_v()] = | ||
357 | gk20a_uc_kind_map[gmmu_pte_kind_c32_ms8_ms16_2c_v()] = | ||
358 | gk20a_uc_kind_map[gmmu_pte_kind_c32_ms8_ms16_2cra_v()] = | ||
359 | gk20a_uc_kind_map[gmmu_pte_kind_c64_2c_v()] = | ||
360 | gk20a_uc_kind_map[gmmu_pte_kind_c64_2cbr_v()] = | ||
361 | gk20a_uc_kind_map[gmmu_pte_kind_c64_2cba_v()] = | ||
362 | gk20a_uc_kind_map[gmmu_pte_kind_c64_2cra_v()] = | ||
363 | gk20a_uc_kind_map[gmmu_pte_kind_c64_2bra_v()] = | ||
364 | gk20a_uc_kind_map[gmmu_pte_kind_c64_ms2_2c_v()] = | ||
365 | gk20a_uc_kind_map[gmmu_pte_kind_c64_ms2_2cra_v()] = | ||
366 | gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2c_v()] = | ||
367 | gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2cbr_v()] = | ||
368 | gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2cba_v()] = | ||
369 | gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2cra_v()] = | ||
370 | gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2bra_v()] = | ||
371 | gk20a_uc_kind_map[gmmu_pte_kind_c64_ms8_ms16_2c_v()] = | ||
372 | gk20a_uc_kind_map[gmmu_pte_kind_c64_ms8_ms16_2cra_v()] = | ||
373 | gk20a_uc_kind_map[gmmu_pte_kind_c128_2c_v()] = | ||
374 | gk20a_uc_kind_map[gmmu_pte_kind_c128_2cr_v()] = | ||
375 | gk20a_uc_kind_map[gmmu_pte_kind_c128_ms2_2c_v()] = | ||
376 | gk20a_uc_kind_map[gmmu_pte_kind_c128_ms2_2cr_v()] = | ||
377 | gk20a_uc_kind_map[gmmu_pte_kind_c128_ms4_2c_v()] = | ||
378 | gk20a_uc_kind_map[gmmu_pte_kind_c128_ms4_2cr_v()] = | ||
379 | gk20a_uc_kind_map[gmmu_pte_kind_c128_ms8_ms16_2c_v()] = | ||
380 | gk20a_uc_kind_map[gmmu_pte_kind_c128_ms8_ms16_2cr_v()] = | ||
381 | gmmu_pte_kind_generic_16bx2_v(); | ||
382 | |||
383 | gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc4_2czv_v()] = | ||
384 | gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc4_2cs_v()] = | ||
385 | gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc4_2zv_v()] = | ||
386 | gmmu_pte_kind_z24v8_ms4_vc4_v(); | ||
387 | |||
388 | gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc12_2czv_v()] = | ||
389 | gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc12_2cs_v()] = | ||
390 | gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc12_2zv_v()] = | ||
391 | gmmu_pte_kind_z24v8_ms4_vc12_v(); | ||
392 | |||
393 | gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc8_2cs_v()] = | ||
394 | gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc8_2czv_v()] = | ||
395 | gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc8_2zv_v()] = | ||
396 | gmmu_pte_kind_z24v8_ms8_vc8_v(); | ||
397 | |||
398 | gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()] = | ||
399 | gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc24_2czv_v()] = | ||
400 | gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc24_2zv_v()] = | ||
401 | gmmu_pte_kind_z24v8_ms8_vc24_v(); | ||
402 | |||
403 | gk20a_uc_kind_map[gmmu_pte_kind_x8c24_v()] = | ||
404 | gmmu_pte_kind_x8c24_v(); | ||
405 | } | ||
406 | |||
407 | u16 gk20a_kind_attr[256]; | ||
408 | void gk20a_init_kind_attr(void) | ||
409 | { | ||
410 | u16 k; | ||
411 | for (k = 0; k < 256; k++) { | ||
412 | gk20a_kind_attr[k] = 0; | ||
413 | if (gk20a_kind_supported((u8)k)) | ||
414 | gk20a_kind_attr[k] |= GK20A_KIND_ATTR_SUPPORTED; | ||
415 | if (gk20a_kind_compressible((u8)k)) | ||
416 | gk20a_kind_attr[k] |= GK20A_KIND_ATTR_COMPRESSIBLE; | ||
417 | if (gk20a_kind_z((u8)k)) | ||
418 | gk20a_kind_attr[k] |= GK20A_KIND_ATTR_Z; | ||
419 | if (gk20a_kind_c((u8)k)) | ||
420 | gk20a_kind_attr[k] |= GK20A_KIND_ATTR_C; | ||
421 | if (gk20a_kind_zbc((u8)k)) | ||
422 | gk20a_kind_attr[k] |= GK20A_KIND_ATTR_ZBC; | ||
423 | } | ||
424 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/kind_gk20a.h b/drivers/gpu/nvgpu/gk20a/kind_gk20a.h new file mode 100644 index 00000000..93f011d4 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/kind_gk20a.h | |||
@@ -0,0 +1,67 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/kind_gk20a.h | ||
3 | * | ||
4 | * GK20A memory kind management | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | #ifndef __KIND_GK20A_H__ | ||
22 | #define __KIND_GK20A_H__ | ||
23 | |||
24 | |||
25 | void gk20a_init_uncompressed_kind_map(void); | ||
26 | void gk20a_init_kind_attr(void); | ||
27 | |||
28 | extern u16 gk20a_kind_attr[]; | ||
29 | #define NV_KIND_DEFAULT -1 | ||
30 | |||
31 | #define GK20A_KIND_ATTR_SUPPORTED BIT(0) | ||
32 | #define GK20A_KIND_ATTR_COMPRESSIBLE BIT(1) | ||
33 | #define GK20A_KIND_ATTR_Z BIT(2) | ||
34 | #define GK20A_KIND_ATTR_C BIT(3) | ||
35 | #define GK20A_KIND_ATTR_ZBC BIT(4) | ||
36 | |||
37 | static inline bool gk20a_kind_is_supported(u8 k) | ||
38 | { | ||
39 | return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_SUPPORTED); | ||
40 | } | ||
41 | static inline bool gk20a_kind_is_compressible(u8 k) | ||
42 | { | ||
43 | return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_COMPRESSIBLE); | ||
44 | } | ||
45 | |||
46 | static inline bool gk20a_kind_is_z(u8 k) | ||
47 | { | ||
48 | return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_Z); | ||
49 | } | ||
50 | |||
51 | static inline bool gk20a_kind_is_c(u8 k) | ||
52 | { | ||
53 | return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_C); | ||
54 | } | ||
55 | static inline bool gk20a_kind_is_zbc(u8 k) | ||
56 | { | ||
57 | return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_ZBC); | ||
58 | } | ||
59 | |||
60 | /* maps kind to its uncompressed version */ | ||
61 | extern u8 gk20a_uc_kind_map[]; | ||
62 | static inline u8 gk20a_get_uncompressed_kind(u8 k) | ||
63 | { | ||
64 | return gk20a_uc_kind_map[k]; | ||
65 | } | ||
66 | |||
67 | #endif /* __KIND_GK20A_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c new file mode 100644 index 00000000..cbb27cc7 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c | |||
@@ -0,0 +1,243 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/ltc_common.c | ||
3 | * | ||
4 | * GK20A Graphics | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include <linux/dma-mapping.h> | ||
22 | #include <linux/delay.h> | ||
23 | |||
24 | #include "gk20a.h" | ||
25 | #include "gr_gk20a.h" | ||
26 | |||
27 | static int gk20a_determine_L2_size_bytes(struct gk20a *g) | ||
28 | { | ||
29 | const u32 gpuid = GK20A_GPUID(g->gpu_characteristics.arch, | ||
30 | g->gpu_characteristics.impl); | ||
31 | u32 lts_per_ltc; | ||
32 | u32 ways; | ||
33 | u32 sets; | ||
34 | u32 bytes_per_line; | ||
35 | u32 active_ltcs; | ||
36 | u32 cache_size; | ||
37 | |||
38 | u32 tmp; | ||
39 | u32 active_sets_value; | ||
40 | |||
41 | tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r()); | ||
42 | ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp)); | ||
43 | |||
44 | active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp); | ||
45 | if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) { | ||
46 | sets = 64; | ||
47 | } else if (active_sets_value == | ||
48 | ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) { | ||
49 | sets = 32; | ||
50 | } else if (active_sets_value == | ||
51 | ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) { | ||
52 | sets = 16; | ||
53 | } else { | ||
54 | dev_err(dev_from_gk20a(g), | ||
55 | "Unknown constant %u for active sets", | ||
56 | (unsigned)active_sets_value); | ||
57 | sets = 0; | ||
58 | } | ||
59 | |||
60 | active_ltcs = g->gr.num_fbps; | ||
61 | |||
62 | /* chip-specific values */ | ||
63 | switch (gpuid) { | ||
64 | case GK20A_GPUID_GK20A: | ||
65 | lts_per_ltc = 1; | ||
66 | bytes_per_line = 128; | ||
67 | break; | ||
68 | |||
69 | default: | ||
70 | dev_err(dev_from_gk20a(g), "Unknown GPU id 0x%02x\n", | ||
71 | (unsigned)gpuid); | ||
72 | lts_per_ltc = 0; | ||
73 | bytes_per_line = 0; | ||
74 | } | ||
75 | |||
76 | cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line; | ||
77 | |||
78 | return cache_size; | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * Set the maximum number of ways that can have the "EVIST_LAST" class. | ||
83 | */ | ||
84 | static void gk20a_ltc_set_max_ways_evict_last(struct gk20a *g, u32 max_ways) | ||
85 | { | ||
86 | u32 mgmt_reg; | ||
87 | |||
88 | mgmt_reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_r()) & | ||
89 | ~ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(~0); | ||
90 | mgmt_reg |= ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(max_ways); | ||
91 | |||
92 | gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_r(), mgmt_reg); | ||
93 | } | ||
94 | |||
95 | /* | ||
96 | * Sets the ZBC color for the passed index. | ||
97 | */ | ||
98 | static void gk20a_ltc_set_zbc_color_entry(struct gk20a *g, | ||
99 | struct zbc_entry *color_val, | ||
100 | u32 index) | ||
101 | { | ||
102 | u32 i; | ||
103 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
104 | |||
105 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
106 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
107 | |||
108 | for (i = 0; | ||
109 | i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) | ||
110 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), | ||
111 | color_val->color_l2[i]); | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * Sets the ZBC depth for the passed index. | ||
116 | */ | ||
117 | static void gk20a_ltc_set_zbc_depth_entry(struct gk20a *g, | ||
118 | struct zbc_entry *depth_val, | ||
119 | u32 index) | ||
120 | { | ||
121 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
122 | |||
123 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
124 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
125 | |||
126 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), | ||
127 | depth_val->depth); | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Clear the L2 ZBC color table for the passed index. | ||
132 | */ | ||
133 | static void gk20a_ltc_clear_zbc_color_entry(struct gk20a *g, u32 index) | ||
134 | { | ||
135 | u32 i; | ||
136 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
137 | |||
138 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
139 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
140 | |||
141 | for (i = 0; | ||
142 | i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) | ||
143 | gk20a_writel(g, | ||
144 | ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), 0); | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * Clear the L2 ZBC depth entry for the passed index. | ||
149 | */ | ||
150 | static void gk20a_ltc_clear_zbc_depth_entry(struct gk20a *g, u32 index) | ||
151 | { | ||
152 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
153 | |||
154 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
155 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
156 | |||
157 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), 0); | ||
158 | } | ||
159 | |||
160 | static int gk20a_ltc_init_zbc(struct gk20a *g, struct gr_gk20a *gr) | ||
161 | { | ||
162 | u32 i, j; | ||
163 | |||
164 | /* reset zbc clear */ | ||
165 | for (i = 0; i < GK20A_SIZEOF_ZBC_TABLE - | ||
166 | GK20A_STARTOF_ZBC_TABLE; i++) { | ||
167 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
168 | (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) & | ||
169 | ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) | | ||
170 | ltc_ltcs_ltss_dstg_zbc_index_address_f( | ||
171 | i + GK20A_STARTOF_ZBC_TABLE)); | ||
172 | for (j = 0; j < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); j++) | ||
173 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(j), 0); | ||
174 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), 0); | ||
175 | } | ||
176 | |||
177 | gr_gk20a_clear_zbc_table(g, gr); | ||
178 | gr_gk20a_load_zbc_default_table(g, gr); | ||
179 | |||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) | ||
184 | { | ||
185 | u32 compbit_base_post_divide; | ||
186 | u64 compbit_base_post_multiply64; | ||
187 | u64 compbit_store_base_iova = | ||
188 | NV_MC_SMMU_VADDR_TRANSLATE(gr->compbit_store.base_iova); | ||
189 | u64 compbit_base_post_divide64 = (compbit_store_base_iova >> | ||
190 | ltc_ltcs_ltss_cbc_base_alignment_shift_v()); | ||
191 | |||
192 | do_div(compbit_base_post_divide64, gr->num_fbps); | ||
193 | compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); | ||
194 | |||
195 | compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * | ||
196 | gr->num_fbps) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
197 | |||
198 | if (compbit_base_post_multiply64 < compbit_store_base_iova) | ||
199 | compbit_base_post_divide++; | ||
200 | |||
201 | gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), | ||
202 | compbit_base_post_divide); | ||
203 | |||
204 | gk20a_dbg(gpu_dbg_info | gpu_dbg_map | gpu_dbg_pte, | ||
205 | "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", | ||
206 | (u32)(compbit_store_base_iova >> 32), | ||
207 | (u32)(compbit_store_base_iova & 0xffffffff), | ||
208 | compbit_base_post_divide); | ||
209 | } | ||
210 | |||
211 | /* Flushes the compression bit cache as well as "data". | ||
212 | * Note: the name here is a bit of a misnomer. ELPG uses this | ||
213 | * internally... but ELPG doesn't have to be on to do it manually. | ||
214 | */ | ||
215 | static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) | ||
216 | { | ||
217 | u32 data; | ||
218 | s32 retry = 100; | ||
219 | |||
220 | gk20a_dbg_fn(""); | ||
221 | |||
222 | /* Make sure all previous writes are committed to the L2. There's no | ||
223 | guarantee that writes are to DRAM. This will be a sysmembar internal | ||
224 | to the L2. */ | ||
225 | gk20a_writel(g, ltc_ltss_g_elpg_r(), | ||
226 | ltc_ltss_g_elpg_flush_pending_f()); | ||
227 | do { | ||
228 | data = gk20a_readl(g, ltc_ltss_g_elpg_r()); | ||
229 | |||
230 | if (ltc_ltss_g_elpg_flush_v(data) == | ||
231 | ltc_ltss_g_elpg_flush_pending_v()) { | ||
232 | gk20a_dbg_info("g_elpg_flush 0x%x", data); | ||
233 | retry--; | ||
234 | usleep_range(20, 40); | ||
235 | } else | ||
236 | break; | ||
237 | } while (retry >= 0 || !tegra_platform_is_silicon()); | ||
238 | |||
239 | if (retry < 0) | ||
240 | gk20a_warn(dev_from_gk20a(g), | ||
241 | "g_elpg_flush too many retries"); | ||
242 | |||
243 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c new file mode 100644 index 00000000..08aedecd --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | |||
@@ -0,0 +1,203 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/ltc_gk20a.c | ||
3 | * | ||
4 | * GK20A Graphics | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include <linux/kernel.h> | ||
22 | |||
23 | #include "hw_ltc_gk20a.h" | ||
24 | #include "hw_proj_gk20a.h" | ||
25 | |||
26 | #include "ltc_common.c" | ||
27 | |||
28 | static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | ||
29 | { | ||
30 | struct device *d = dev_from_gk20a(g); | ||
31 | DEFINE_DMA_ATTRS(attrs); | ||
32 | dma_addr_t iova; | ||
33 | |||
34 | /* max memory size (MB) to cover */ | ||
35 | u32 max_size = gr->max_comptag_mem; | ||
36 | /* one tag line covers 128KB */ | ||
37 | u32 max_comptag_lines = max_size << 3; | ||
38 | |||
39 | u32 hw_max_comptag_lines = | ||
40 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); | ||
41 | |||
42 | u32 cbc_param = | ||
43 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); | ||
44 | u32 comptags_per_cacheline = | ||
45 | ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); | ||
46 | u32 slices_per_fbp = | ||
47 | ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(cbc_param); | ||
48 | u32 cacheline_size = | ||
49 | 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param); | ||
50 | |||
51 | u32 compbit_backing_size; | ||
52 | |||
53 | gk20a_dbg_fn(""); | ||
54 | |||
55 | if (max_comptag_lines == 0) { | ||
56 | gr->compbit_store.size = 0; | ||
57 | return 0; | ||
58 | } | ||
59 | |||
60 | if (max_comptag_lines > hw_max_comptag_lines) | ||
61 | max_comptag_lines = hw_max_comptag_lines; | ||
62 | |||
63 | /* no hybird fb */ | ||
64 | compbit_backing_size = | ||
65 | DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * | ||
66 | cacheline_size * slices_per_fbp * gr->num_fbps; | ||
67 | |||
68 | /* aligned to 2KB * num_fbps */ | ||
69 | compbit_backing_size += | ||
70 | gr->num_fbps << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
71 | |||
72 | /* must be a multiple of 64KB */ | ||
73 | compbit_backing_size = roundup(compbit_backing_size, 64*1024); | ||
74 | |||
75 | max_comptag_lines = | ||
76 | (compbit_backing_size * comptags_per_cacheline) / | ||
77 | cacheline_size * slices_per_fbp * gr->num_fbps; | ||
78 | |||
79 | if (max_comptag_lines > hw_max_comptag_lines) | ||
80 | max_comptag_lines = hw_max_comptag_lines; | ||
81 | |||
82 | gk20a_dbg_info("compbit backing store size : %d", | ||
83 | compbit_backing_size); | ||
84 | gk20a_dbg_info("max comptag lines : %d", | ||
85 | max_comptag_lines); | ||
86 | |||
87 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
88 | gr->compbit_store.size = compbit_backing_size; | ||
89 | gr->compbit_store.pages = dma_alloc_attrs(d, gr->compbit_store.size, | ||
90 | &iova, GFP_KERNEL, &attrs); | ||
91 | if (!gr->compbit_store.pages) { | ||
92 | gk20a_err(dev_from_gk20a(g), "failed to allocate" | ||
93 | "backing store for compbit : size %d", | ||
94 | compbit_backing_size); | ||
95 | return -ENOMEM; | ||
96 | } | ||
97 | gr->compbit_store.base_iova = iova; | ||
98 | |||
99 | gk20a_allocator_init(&gr->comp_tags, "comptag", | ||
100 | 1, /* start */ | ||
101 | max_comptag_lines - 1, /* length*/ | ||
102 | 1); /* align */ | ||
103 | |||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | static int gk20a_ltc_clear_comptags(struct gk20a *g, u32 min, u32 max) | ||
108 | { | ||
109 | struct gr_gk20a *gr = &g->gr; | ||
110 | u32 fbp, slice, ctrl1, val; | ||
111 | unsigned long end_jiffies = jiffies + | ||
112 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
113 | u32 delay = GR_IDLE_CHECK_DEFAULT; | ||
114 | u32 slices_per_fbp = | ||
115 | ltc_ltcs_ltss_cbc_param_slices_per_fbp_v( | ||
116 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); | ||
117 | |||
118 | gk20a_dbg_fn(""); | ||
119 | |||
120 | if (gr->compbit_store.size == 0) | ||
121 | return 0; | ||
122 | |||
123 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(), | ||
124 | ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min)); | ||
125 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(), | ||
126 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max)); | ||
127 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), | ||
128 | gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | | ||
129 | ltc_ltcs_ltss_cbc_ctrl1_clear_active_f()); | ||
130 | |||
131 | for (fbp = 0; fbp < gr->num_fbps; fbp++) { | ||
132 | for (slice = 0; slice < slices_per_fbp; slice++) { | ||
133 | |||
134 | delay = GR_IDLE_CHECK_DEFAULT; | ||
135 | |||
136 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | ||
137 | fbp * proj_ltc_stride_v() + | ||
138 | slice * proj_lts_stride_v(); | ||
139 | |||
140 | do { | ||
141 | val = gk20a_readl(g, ctrl1); | ||
142 | if (ltc_ltcs_ltss_cbc_ctrl1_clear_v(val) != | ||
143 | ltc_ltcs_ltss_cbc_ctrl1_clear_active_v()) | ||
144 | break; | ||
145 | |||
146 | usleep_range(delay, delay * 2); | ||
147 | delay = min_t(u32, delay << 1, | ||
148 | GR_IDLE_CHECK_MAX); | ||
149 | |||
150 | } while (time_before(jiffies, end_jiffies) || | ||
151 | !tegra_platform_is_silicon()); | ||
152 | |||
153 | if (!time_before(jiffies, end_jiffies)) { | ||
154 | gk20a_err(dev_from_gk20a(g), | ||
155 | "comp tag clear timeout\n"); | ||
156 | return -EBUSY; | ||
157 | } | ||
158 | } | ||
159 | } | ||
160 | |||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | |||
165 | #ifdef CONFIG_DEBUG_FS | ||
166 | static void gk20a_ltc_sync_debugfs(struct gk20a *g) | ||
167 | { | ||
168 | u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); | ||
169 | |||
170 | spin_lock(&g->debugfs_lock); | ||
171 | if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) { | ||
172 | u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r()); | ||
173 | if (g->mm.ltc_enabled_debug) | ||
174 | /* bypass disabled (normal caching ops)*/ | ||
175 | reg &= ~reg_f; | ||
176 | else | ||
177 | /* bypass enabled (no caching) */ | ||
178 | reg |= reg_f; | ||
179 | |||
180 | gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg); | ||
181 | g->mm.ltc_enabled = g->mm.ltc_enabled_debug; | ||
182 | } | ||
183 | spin_unlock(&g->debugfs_lock); | ||
184 | } | ||
185 | #endif | ||
186 | |||
187 | void gk20a_init_ltc(struct gpu_ops *gops) | ||
188 | { | ||
189 | gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes; | ||
190 | gops->ltc.set_max_ways_evict_last = gk20a_ltc_set_max_ways_evict_last; | ||
191 | gops->ltc.init_comptags = gk20a_ltc_init_comptags; | ||
192 | gops->ltc.clear_comptags = gk20a_ltc_clear_comptags; | ||
193 | gops->ltc.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry; | ||
194 | gops->ltc.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry; | ||
195 | gops->ltc.clear_zbc_color_entry = gk20a_ltc_clear_zbc_color_entry; | ||
196 | gops->ltc.clear_zbc_depth_entry = gk20a_ltc_clear_zbc_depth_entry; | ||
197 | gops->ltc.init_zbc = gk20a_ltc_init_zbc; | ||
198 | gops->ltc.init_cbc = gk20a_ltc_init_cbc; | ||
199 | #ifdef CONFIG_DEBUG_FS | ||
200 | gops->ltc.sync_debugfs = gk20a_ltc_sync_debugfs; | ||
201 | #endif | ||
202 | gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked; | ||
203 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h new file mode 100644 index 00000000..208811b2 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * GK20A L2 | ||
3 | * | ||
4 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #ifndef _NVHOST_GK20A_LTC | ||
17 | #define _NVHOST_GK20A_LTC | ||
18 | struct gk20a; | ||
19 | |||
20 | void gk20a_init_ltc(struct gpu_ops *gops); | ||
21 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c new file mode 100644 index 00000000..b22df5e8 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -0,0 +1,2984 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/mm_gk20a.c | ||
3 | * | ||
4 | * GK20A memory management | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | |||
22 | #include <linux/delay.h> | ||
23 | #include <linux/highmem.h> | ||
24 | #include <linux/log2.h> | ||
25 | #include <linux/nvhost.h> | ||
26 | #include <linux/pm_runtime.h> | ||
27 | #include <linux/scatterlist.h> | ||
28 | #include <linux/nvmap.h> | ||
29 | #include <linux/tegra-soc.h> | ||
30 | #include <linux/vmalloc.h> | ||
31 | #include <linux/dma-buf.h> | ||
32 | #include <asm/cacheflush.h> | ||
33 | |||
34 | #include "gk20a.h" | ||
35 | #include "mm_gk20a.h" | ||
36 | #include "hw_gmmu_gk20a.h" | ||
37 | #include "hw_fb_gk20a.h" | ||
38 | #include "hw_bus_gk20a.h" | ||
39 | #include "hw_ram_gk20a.h" | ||
40 | #include "hw_mc_gk20a.h" | ||
41 | #include "hw_flush_gk20a.h" | ||
42 | #include "hw_ltc_gk20a.h" | ||
43 | |||
44 | #include "kind_gk20a.h" | ||
45 | |||
46 | #ifdef CONFIG_ARM64 | ||
47 | #define outer_flush_range(a, b) | ||
48 | #define __cpuc_flush_dcache_area __flush_dcache_area | ||
49 | #endif | ||
50 | |||
51 | /* | ||
52 | * GPU mapping life cycle | ||
53 | * ====================== | ||
54 | * | ||
55 | * Kernel mappings | ||
56 | * --------------- | ||
57 | * | ||
58 | * Kernel mappings are created through vm.map(..., false): | ||
59 | * | ||
60 | * - Mappings to the same allocations are reused and refcounted. | ||
61 | * - This path does not support deferred unmapping (i.e. kernel must wait for | ||
62 | * all hw operations on the buffer to complete before unmapping). | ||
63 | * - References to dmabuf are owned and managed by the (kernel) clients of | ||
64 | * the gk20a_vm layer. | ||
65 | * | ||
66 | * | ||
67 | * User space mappings | ||
68 | * ------------------- | ||
69 | * | ||
70 | * User space mappings are created through as.map_buffer -> vm.map(..., true): | ||
71 | * | ||
72 | * - Mappings to the same allocations are reused and refcounted. | ||
73 | * - This path supports deferred unmapping (i.e. we delay the actual unmapping | ||
74 | * until all hw operations have completed). | ||
75 | * - References to dmabuf are owned and managed by the vm_gk20a | ||
76 | * layer itself. vm.map acquires these refs, and sets | ||
77 | * mapped_buffer->own_mem_ref to record that we must release the refs when we | ||
78 | * actually unmap. | ||
79 | * | ||
80 | */ | ||
81 | |||
82 | static inline int vm_aspace_id(struct vm_gk20a *vm) | ||
83 | { | ||
84 | /* -1 is bar1 or pmu, etc. */ | ||
85 | return vm->as_share ? vm->as_share->id : -1; | ||
86 | } | ||
87 | static inline u32 hi32(u64 f) | ||
88 | { | ||
89 | return (u32)(f >> 32); | ||
90 | } | ||
91 | static inline u32 lo32(u64 f) | ||
92 | { | ||
93 | return (u32)(f & 0xffffffff); | ||
94 | } | ||
95 | |||
96 | #define FLUSH_CPU_DCACHE(va, pa, size) \ | ||
97 | do { \ | ||
98 | __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \ | ||
99 | outer_flush_range(pa, pa + (size_t)(size)); \ | ||
100 | } while (0) | ||
101 | |||
102 | static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer); | ||
103 | static struct mapped_buffer_node *find_mapped_buffer_locked( | ||
104 | struct rb_root *root, u64 addr); | ||
105 | static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( | ||
106 | struct rb_root *root, struct dma_buf *dmabuf, | ||
107 | u32 kind); | ||
108 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | ||
109 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
110 | struct sg_table *sgt, | ||
111 | u64 first_vaddr, u64 last_vaddr, | ||
112 | u8 kind_v, u32 ctag_offset, bool cacheable, | ||
113 | int rw_flag); | ||
114 | static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i); | ||
115 | static void gk20a_vm_remove_support(struct vm_gk20a *vm); | ||
116 | |||
117 | |||
118 | /* note: keep the page sizes sorted lowest to highest here */ | ||
119 | static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; | ||
120 | static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 }; | ||
121 | static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL, | ||
122 | 0x1ffffLL }; | ||
123 | static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL }; | ||
124 | |||
125 | struct gk20a_comptags { | ||
126 | u32 offset; | ||
127 | u32 lines; | ||
128 | }; | ||
129 | |||
130 | struct gk20a_dmabuf_priv { | ||
131 | struct mutex lock; | ||
132 | |||
133 | struct gk20a_allocator *comptag_allocator; | ||
134 | struct gk20a_comptags comptags; | ||
135 | |||
136 | struct dma_buf_attachment *attach; | ||
137 | struct sg_table *sgt; | ||
138 | |||
139 | int pin_count; | ||
140 | }; | ||
141 | |||
142 | static void gk20a_mm_delete_priv(void *_priv) | ||
143 | { | ||
144 | struct gk20a_dmabuf_priv *priv = _priv; | ||
145 | if (!priv) | ||
146 | return; | ||
147 | |||
148 | if (priv->comptags.lines) { | ||
149 | BUG_ON(!priv->comptag_allocator); | ||
150 | priv->comptag_allocator->free(priv->comptag_allocator, | ||
151 | priv->comptags.offset, | ||
152 | priv->comptags.lines); | ||
153 | } | ||
154 | |||
155 | kfree(priv); | ||
156 | } | ||
157 | |||
158 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf) | ||
159 | { | ||
160 | struct gk20a_dmabuf_priv *priv; | ||
161 | |||
162 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
163 | if (WARN_ON(!priv)) | ||
164 | return ERR_PTR(-EINVAL); | ||
165 | |||
166 | mutex_lock(&priv->lock); | ||
167 | |||
168 | if (priv->pin_count == 0) { | ||
169 | priv->attach = dma_buf_attach(dmabuf, dev); | ||
170 | if (IS_ERR(priv->attach)) { | ||
171 | mutex_unlock(&priv->lock); | ||
172 | return (struct sg_table *)priv->attach; | ||
173 | } | ||
174 | |||
175 | priv->sgt = dma_buf_map_attachment(priv->attach, | ||
176 | DMA_BIDIRECTIONAL); | ||
177 | if (IS_ERR(priv->sgt)) { | ||
178 | dma_buf_detach(dmabuf, priv->attach); | ||
179 | mutex_unlock(&priv->lock); | ||
180 | return priv->sgt; | ||
181 | } | ||
182 | } | ||
183 | |||
184 | priv->pin_count++; | ||
185 | mutex_unlock(&priv->lock); | ||
186 | return priv->sgt; | ||
187 | } | ||
188 | |||
189 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | ||
190 | struct sg_table *sgt) | ||
191 | { | ||
192 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | ||
193 | dma_addr_t dma_addr; | ||
194 | |||
195 | if (IS_ERR(priv) || !priv) | ||
196 | return; | ||
197 | |||
198 | mutex_lock(&priv->lock); | ||
199 | WARN_ON(priv->sgt != sgt); | ||
200 | priv->pin_count--; | ||
201 | WARN_ON(priv->pin_count < 0); | ||
202 | dma_addr = sg_dma_address(priv->sgt->sgl); | ||
203 | if (priv->pin_count == 0) { | ||
204 | dma_buf_unmap_attachment(priv->attach, priv->sgt, | ||
205 | DMA_BIDIRECTIONAL); | ||
206 | dma_buf_detach(dmabuf, priv->attach); | ||
207 | } | ||
208 | mutex_unlock(&priv->lock); | ||
209 | } | ||
210 | |||
211 | |||
212 | static void gk20a_get_comptags(struct device *dev, | ||
213 | struct dma_buf *dmabuf, | ||
214 | struct gk20a_comptags *comptags) | ||
215 | { | ||
216 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | ||
217 | |||
218 | if (!comptags) | ||
219 | return; | ||
220 | |||
221 | if (!priv) { | ||
222 | comptags->lines = 0; | ||
223 | comptags->offset = 0; | ||
224 | return; | ||
225 | } | ||
226 | |||
227 | *comptags = priv->comptags; | ||
228 | } | ||
229 | |||
230 | static int gk20a_alloc_comptags(struct device *dev, | ||
231 | struct dma_buf *dmabuf, | ||
232 | struct gk20a_allocator *allocator, | ||
233 | int lines) | ||
234 | { | ||
235 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | ||
236 | u32 offset = 0; | ||
237 | int err; | ||
238 | |||
239 | if (!priv) | ||
240 | return -ENOSYS; | ||
241 | |||
242 | if (!lines) | ||
243 | return -EINVAL; | ||
244 | |||
245 | /* store the allocator so we can use it when we free the ctags */ | ||
246 | priv->comptag_allocator = allocator; | ||
247 | err = allocator->alloc(allocator, &offset, lines); | ||
248 | if (!err) { | ||
249 | priv->comptags.lines = lines; | ||
250 | priv->comptags.offset = offset; | ||
251 | } | ||
252 | return err; | ||
253 | } | ||
254 | |||
255 | |||
256 | |||
257 | |||
258 | static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) | ||
259 | { | ||
260 | gk20a_dbg_fn(""); | ||
261 | if (g->ops.fb.reset) | ||
262 | g->ops.fb.reset(g); | ||
263 | |||
264 | if (g->ops.fb.init_fs_state) | ||
265 | g->ops.fb.init_fs_state(g); | ||
266 | |||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | void gk20a_remove_mm_support(struct mm_gk20a *mm) | ||
271 | { | ||
272 | struct gk20a *g = mm->g; | ||
273 | struct device *d = dev_from_gk20a(g); | ||
274 | struct vm_gk20a *vm = &mm->bar1.vm; | ||
275 | struct inst_desc *inst_block = &mm->bar1.inst_block; | ||
276 | |||
277 | gk20a_dbg_fn(""); | ||
278 | |||
279 | if (inst_block->cpuva) | ||
280 | dma_free_coherent(d, inst_block->size, | ||
281 | inst_block->cpuva, inst_block->iova); | ||
282 | inst_block->cpuva = NULL; | ||
283 | inst_block->iova = 0; | ||
284 | |||
285 | gk20a_vm_remove_support(vm); | ||
286 | } | ||
287 | |||
288 | int gk20a_init_mm_setup_sw(struct gk20a *g) | ||
289 | { | ||
290 | struct mm_gk20a *mm = &g->mm; | ||
291 | int i; | ||
292 | |||
293 | gk20a_dbg_fn(""); | ||
294 | |||
295 | if (mm->sw_ready) { | ||
296 | gk20a_dbg_fn("skip init"); | ||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | mm->g = g; | ||
301 | mutex_init(&mm->tlb_lock); | ||
302 | mutex_init(&mm->l2_op_lock); | ||
303 | mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; | ||
304 | mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big]; | ||
305 | mm->pde_stride = mm->big_page_size << 10; | ||
306 | mm->pde_stride_shift = ilog2(mm->pde_stride); | ||
307 | BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */ | ||
308 | |||
309 | for (i = 0; i < ARRAY_SIZE(gmmu_page_sizes); i++) { | ||
310 | |||
311 | u32 num_ptes, pte_space, num_pages; | ||
312 | |||
313 | /* assuming "full" page tables */ | ||
314 | num_ptes = mm->pde_stride / gmmu_page_sizes[i]; | ||
315 | |||
316 | pte_space = num_ptes * gmmu_pte__size_v(); | ||
317 | /* allocate whole pages */ | ||
318 | pte_space = roundup(pte_space, PAGE_SIZE); | ||
319 | |||
320 | num_pages = pte_space / PAGE_SIZE; | ||
321 | /* make sure "order" is viable */ | ||
322 | BUG_ON(!is_power_of_2(num_pages)); | ||
323 | |||
324 | mm->page_table_sizing[i].num_ptes = num_ptes; | ||
325 | mm->page_table_sizing[i].order = ilog2(num_pages); | ||
326 | } | ||
327 | |||
328 | /*TBD: make channel vm size configurable */ | ||
329 | mm->channel.size = 1ULL << NV_GMMU_VA_RANGE; | ||
330 | |||
331 | gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20)); | ||
332 | |||
333 | gk20a_dbg_info("small page-size (%dKB) pte array: %dKB", | ||
334 | gmmu_page_sizes[gmmu_page_size_small] >> 10, | ||
335 | (mm->page_table_sizing[gmmu_page_size_small].num_ptes * | ||
336 | gmmu_pte__size_v()) >> 10); | ||
337 | |||
338 | gk20a_dbg_info("big page-size (%dKB) pte array: %dKB", | ||
339 | gmmu_page_sizes[gmmu_page_size_big] >> 10, | ||
340 | (mm->page_table_sizing[gmmu_page_size_big].num_ptes * | ||
341 | gmmu_pte__size_v()) >> 10); | ||
342 | |||
343 | |||
344 | gk20a_init_bar1_vm(mm); | ||
345 | |||
346 | mm->remove_support = gk20a_remove_mm_support; | ||
347 | mm->sw_ready = true; | ||
348 | |||
349 | gk20a_dbg_fn("done"); | ||
350 | return 0; | ||
351 | } | ||
352 | |||
353 | /* make sure gk20a_init_mm_support is called before */ | ||
354 | static int gk20a_init_mm_setup_hw(struct gk20a *g) | ||
355 | { | ||
356 | struct mm_gk20a *mm = &g->mm; | ||
357 | struct inst_desc *inst_block = &mm->bar1.inst_block; | ||
358 | phys_addr_t inst_pa = inst_block->cpu_pa; | ||
359 | |||
360 | gk20a_dbg_fn(""); | ||
361 | |||
362 | /* set large page size in fb | ||
363 | * note this is very early on, can we defer it ? */ | ||
364 | { | ||
365 | u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
366 | |||
367 | if (gmmu_page_sizes[gmmu_page_size_big] == SZ_128K) | ||
368 | fb_mmu_ctrl = (fb_mmu_ctrl & | ||
369 | ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) | | ||
370 | fb_mmu_ctrl_vm_pg_size_128kb_f(); | ||
371 | else | ||
372 | BUG_ON(1); /* no support/testing for larger ones yet */ | ||
373 | |||
374 | gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); | ||
375 | } | ||
376 | |||
377 | inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a()); | ||
378 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); | ||
379 | |||
380 | /* this is very early in init... can we defer this? */ | ||
381 | { | ||
382 | gk20a_writel(g, bus_bar1_block_r(), | ||
383 | bus_bar1_block_target_vid_mem_f() | | ||
384 | bus_bar1_block_mode_virtual_f() | | ||
385 | bus_bar1_block_ptr_f(inst_pa)); | ||
386 | } | ||
387 | |||
388 | gk20a_dbg_fn("done"); | ||
389 | return 0; | ||
390 | } | ||
391 | |||
392 | int gk20a_init_mm_support(struct gk20a *g) | ||
393 | { | ||
394 | u32 err; | ||
395 | |||
396 | err = gk20a_init_mm_reset_enable_hw(g); | ||
397 | if (err) | ||
398 | return err; | ||
399 | |||
400 | err = gk20a_init_mm_setup_sw(g); | ||
401 | if (err) | ||
402 | return err; | ||
403 | |||
404 | err = gk20a_init_mm_setup_hw(g); | ||
405 | if (err) | ||
406 | return err; | ||
407 | |||
408 | return err; | ||
409 | } | ||
410 | |||
411 | #ifdef CONFIG_GK20A_PHYS_PAGE_TABLES | ||
412 | static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, | ||
413 | void **handle, | ||
414 | struct sg_table **sgt, | ||
415 | size_t *size) | ||
416 | { | ||
417 | u32 num_pages = 1 << order; | ||
418 | u32 len = num_pages * PAGE_SIZE; | ||
419 | int err; | ||
420 | struct page *pages; | ||
421 | |||
422 | gk20a_dbg_fn(""); | ||
423 | |||
424 | pages = alloc_pages(GFP_KERNEL, order); | ||
425 | if (!pages) { | ||
426 | gk20a_dbg(gpu_dbg_pte, "alloc_pages failed\n"); | ||
427 | goto err_out; | ||
428 | } | ||
429 | *sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); | ||
430 | if (!sgt) { | ||
431 | gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table"); | ||
432 | goto err_alloced; | ||
433 | } | ||
434 | err = sg_alloc_table(*sgt, 1, GFP_KERNEL); | ||
435 | if (err) { | ||
436 | gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed\n"); | ||
437 | goto err_sg_table; | ||
438 | } | ||
439 | sg_set_page((*sgt)->sgl, pages, len, 0); | ||
440 | *handle = page_address(pages); | ||
441 | memset(*handle, 0, len); | ||
442 | *size = len; | ||
443 | FLUSH_CPU_DCACHE(*handle, sg_phys((*sgt)->sgl), len); | ||
444 | |||
445 | return 0; | ||
446 | |||
447 | err_sg_table: | ||
448 | kfree(*sgt); | ||
449 | err_alloced: | ||
450 | __free_pages(pages, order); | ||
451 | err_out: | ||
452 | return -ENOMEM; | ||
453 | } | ||
454 | |||
455 | static void free_gmmu_pages(struct vm_gk20a *vm, void *handle, | ||
456 | struct sg_table *sgt, u32 order, | ||
457 | size_t size) | ||
458 | { | ||
459 | gk20a_dbg_fn(""); | ||
460 | BUG_ON(sgt == NULL); | ||
461 | free_pages((unsigned long)handle, order); | ||
462 | sg_free_table(sgt); | ||
463 | kfree(sgt); | ||
464 | } | ||
465 | |||
466 | static int map_gmmu_pages(void *handle, struct sg_table *sgt, | ||
467 | void **va, size_t size) | ||
468 | { | ||
469 | FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length); | ||
470 | *va = handle; | ||
471 | return 0; | ||
472 | } | ||
473 | |||
474 | static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va) | ||
475 | { | ||
476 | FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length); | ||
477 | } | ||
478 | #else | ||
479 | static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, | ||
480 | void **handle, | ||
481 | struct sg_table **sgt, | ||
482 | size_t *size) | ||
483 | { | ||
484 | struct device *d = dev_from_vm(vm); | ||
485 | u32 num_pages = 1 << order; | ||
486 | u32 len = num_pages * PAGE_SIZE; | ||
487 | dma_addr_t iova; | ||
488 | DEFINE_DMA_ATTRS(attrs); | ||
489 | struct page **pages; | ||
490 | int err = 0; | ||
491 | |||
492 | gk20a_dbg_fn(""); | ||
493 | |||
494 | *size = len; | ||
495 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
496 | pages = dma_alloc_attrs(d, len, &iova, GFP_KERNEL, &attrs); | ||
497 | if (!pages) { | ||
498 | gk20a_err(d, "memory allocation failed\n"); | ||
499 | goto err_out; | ||
500 | } | ||
501 | |||
502 | err = gk20a_get_sgtable_from_pages(d, sgt, pages, | ||
503 | iova, len); | ||
504 | if (err) { | ||
505 | gk20a_err(d, "sgt allocation failed\n"); | ||
506 | goto err_free; | ||
507 | } | ||
508 | |||
509 | *handle = (void *)pages; | ||
510 | |||
511 | return 0; | ||
512 | |||
513 | err_free: | ||
514 | dma_free_attrs(d, len, pages, iova, &attrs); | ||
515 | pages = NULL; | ||
516 | iova = 0; | ||
517 | err_out: | ||
518 | return -ENOMEM; | ||
519 | } | ||
520 | |||
521 | static void free_gmmu_pages(struct vm_gk20a *vm, void *handle, | ||
522 | struct sg_table *sgt, u32 order, | ||
523 | size_t size) | ||
524 | { | ||
525 | struct device *d = dev_from_vm(vm); | ||
526 | u64 iova; | ||
527 | DEFINE_DMA_ATTRS(attrs); | ||
528 | struct page **pages = (struct page **)handle; | ||
529 | |||
530 | gk20a_dbg_fn(""); | ||
531 | BUG_ON(sgt == NULL); | ||
532 | |||
533 | iova = sg_dma_address(sgt->sgl); | ||
534 | |||
535 | gk20a_free_sgtable(&sgt); | ||
536 | |||
537 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
538 | dma_free_attrs(d, size, pages, iova, &attrs); | ||
539 | pages = NULL; | ||
540 | iova = 0; | ||
541 | } | ||
542 | |||
543 | static int map_gmmu_pages(void *handle, struct sg_table *sgt, | ||
544 | void **kva, size_t size) | ||
545 | { | ||
546 | int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
547 | struct page **pages = (struct page **)handle; | ||
548 | gk20a_dbg_fn(""); | ||
549 | |||
550 | *kva = vmap(pages, count, 0, pgprot_dmacoherent(PAGE_KERNEL)); | ||
551 | if (!(*kva)) | ||
552 | return -ENOMEM; | ||
553 | |||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va) | ||
558 | { | ||
559 | gk20a_dbg_fn(""); | ||
560 | vunmap(va); | ||
561 | } | ||
562 | #endif | ||
563 | |||
564 | /* allocate a phys contig region big enough for a full | ||
565 | * sized gmmu page table for the given gmmu_page_size. | ||
566 | * the whole range is zeroed so it's "invalid"/will fault | ||
567 | */ | ||
568 | |||
569 | static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, | ||
570 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx, | ||
571 | struct page_table_gk20a *pte) | ||
572 | { | ||
573 | int err; | ||
574 | u32 pte_order; | ||
575 | void *handle = NULL; | ||
576 | struct sg_table *sgt; | ||
577 | size_t size; | ||
578 | |||
579 | gk20a_dbg_fn(""); | ||
580 | |||
581 | /* allocate enough pages for the table */ | ||
582 | pte_order = vm->mm->page_table_sizing[gmmu_pgsz_idx].order; | ||
583 | |||
584 | err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt, &size); | ||
585 | if (err) | ||
586 | return err; | ||
587 | |||
588 | gk20a_dbg(gpu_dbg_pte, "pte = 0x%p, addr=%08llx, size %d", | ||
589 | pte, gk20a_mm_iova_addr(sgt->sgl), pte_order); | ||
590 | |||
591 | pte->ref = handle; | ||
592 | pte->sgt = sgt; | ||
593 | pte->size = size; | ||
594 | |||
595 | return 0; | ||
596 | } | ||
597 | |||
598 | /* given address range (inclusive) determine the pdes crossed */ | ||
599 | static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm, | ||
600 | u64 addr_lo, u64 addr_hi, | ||
601 | u32 *pde_lo, u32 *pde_hi) | ||
602 | { | ||
603 | *pde_lo = (u32)(addr_lo >> vm->mm->pde_stride_shift); | ||
604 | *pde_hi = (u32)(addr_hi >> vm->mm->pde_stride_shift); | ||
605 | gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d", | ||
606 | addr_lo, addr_hi, vm->mm->pde_stride_shift); | ||
607 | gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d", | ||
608 | *pde_lo, *pde_hi); | ||
609 | } | ||
610 | |||
611 | static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i) | ||
612 | { | ||
613 | return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v()); | ||
614 | } | ||
615 | |||
616 | static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm, | ||
617 | u64 addr, enum gmmu_pgsz_gk20a pgsz_idx) | ||
618 | { | ||
619 | u32 ret; | ||
620 | /* mask off pde part */ | ||
621 | addr = addr & ((((u64)1) << vm->mm->pde_stride_shift) - ((u64)1)); | ||
622 | /* shift over to get pte index. note assumption that pte index | ||
623 | * doesn't leak over into the high 32b */ | ||
624 | ret = (u32)(addr >> gmmu_page_shifts[pgsz_idx]); | ||
625 | |||
626 | gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret); | ||
627 | return ret; | ||
628 | } | ||
629 | |||
630 | static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page, | ||
631 | u32 *pte_offset) | ||
632 | { | ||
633 | /* ptes are 8B regardless of pagesize */ | ||
634 | /* pte space pages are 4KB. so 512 ptes per 4KB page*/ | ||
635 | *pte_page = i >> 9; | ||
636 | |||
637 | /* this offset is a pte offset, not a byte offset */ | ||
638 | *pte_offset = i & ((1<<9)-1); | ||
639 | |||
640 | gk20a_dbg(gpu_dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x", | ||
641 | i, *pte_page, *pte_offset); | ||
642 | } | ||
643 | |||
644 | |||
645 | /* | ||
646 | * given a pde index/page table number make sure it has | ||
647 | * backing store and if not go ahead allocate it and | ||
648 | * record it in the appropriate pde | ||
649 | */ | ||
650 | static int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm, | ||
651 | u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | ||
652 | { | ||
653 | int err; | ||
654 | struct page_table_gk20a *pte = | ||
655 | vm->pdes.ptes[gmmu_pgsz_idx] + i; | ||
656 | |||
657 | gk20a_dbg_fn(""); | ||
658 | |||
659 | /* if it's already in place it's valid */ | ||
660 | if (pte->ref) | ||
661 | return 0; | ||
662 | |||
663 | gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d", | ||
664 | gmmu_page_sizes[gmmu_pgsz_idx]/1024, i); | ||
665 | |||
666 | err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte); | ||
667 | if (err) | ||
668 | return err; | ||
669 | |||
670 | /* rewrite pde */ | ||
671 | update_gmmu_pde_locked(vm, i); | ||
672 | |||
673 | return 0; | ||
674 | } | ||
675 | |||
676 | static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm, | ||
677 | u64 addr) | ||
678 | { | ||
679 | struct vm_reserved_va_node *va_node; | ||
680 | list_for_each_entry(va_node, &vm->reserved_va_list, reserved_va_list) | ||
681 | if (addr >= va_node->vaddr_start && | ||
682 | addr < (u64)va_node->vaddr_start + (u64)va_node->size) | ||
683 | return va_node; | ||
684 | |||
685 | return NULL; | ||
686 | } | ||
687 | |||
688 | int gk20a_vm_get_buffers(struct vm_gk20a *vm, | ||
689 | struct mapped_buffer_node ***mapped_buffers, | ||
690 | int *num_buffers) | ||
691 | { | ||
692 | struct mapped_buffer_node *mapped_buffer; | ||
693 | struct mapped_buffer_node **buffer_list; | ||
694 | struct rb_node *node; | ||
695 | int i = 0; | ||
696 | |||
697 | mutex_lock(&vm->update_gmmu_lock); | ||
698 | |||
699 | buffer_list = kzalloc(sizeof(*buffer_list) * | ||
700 | vm->num_user_mapped_buffers, GFP_KERNEL); | ||
701 | if (!buffer_list) { | ||
702 | mutex_unlock(&vm->update_gmmu_lock); | ||
703 | return -ENOMEM; | ||
704 | } | ||
705 | |||
706 | node = rb_first(&vm->mapped_buffers); | ||
707 | while (node) { | ||
708 | mapped_buffer = | ||
709 | container_of(node, struct mapped_buffer_node, node); | ||
710 | if (mapped_buffer->user_mapped) { | ||
711 | buffer_list[i] = mapped_buffer; | ||
712 | kref_get(&mapped_buffer->ref); | ||
713 | i++; | ||
714 | } | ||
715 | node = rb_next(&mapped_buffer->node); | ||
716 | } | ||
717 | |||
718 | BUG_ON(i != vm->num_user_mapped_buffers); | ||
719 | |||
720 | *num_buffers = vm->num_user_mapped_buffers; | ||
721 | *mapped_buffers = buffer_list; | ||
722 | |||
723 | mutex_unlock(&vm->update_gmmu_lock); | ||
724 | |||
725 | return 0; | ||
726 | } | ||
727 | |||
728 | static void gk20a_vm_unmap_locked_kref(struct kref *ref) | ||
729 | { | ||
730 | struct mapped_buffer_node *mapped_buffer = | ||
731 | container_of(ref, struct mapped_buffer_node, ref); | ||
732 | gk20a_vm_unmap_locked(mapped_buffer); | ||
733 | } | ||
734 | |||
735 | void gk20a_vm_put_buffers(struct vm_gk20a *vm, | ||
736 | struct mapped_buffer_node **mapped_buffers, | ||
737 | int num_buffers) | ||
738 | { | ||
739 | int i; | ||
740 | |||
741 | mutex_lock(&vm->update_gmmu_lock); | ||
742 | |||
743 | for (i = 0; i < num_buffers; ++i) | ||
744 | kref_put(&mapped_buffers[i]->ref, | ||
745 | gk20a_vm_unmap_locked_kref); | ||
746 | |||
747 | mutex_unlock(&vm->update_gmmu_lock); | ||
748 | |||
749 | kfree(mapped_buffers); | ||
750 | } | ||
751 | |||
752 | static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset) | ||
753 | { | ||
754 | struct device *d = dev_from_vm(vm); | ||
755 | int retries; | ||
756 | struct mapped_buffer_node *mapped_buffer; | ||
757 | |||
758 | mutex_lock(&vm->update_gmmu_lock); | ||
759 | |||
760 | mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset); | ||
761 | if (!mapped_buffer) { | ||
762 | mutex_unlock(&vm->update_gmmu_lock); | ||
763 | gk20a_err(d, "invalid addr to unmap 0x%llx", offset); | ||
764 | return; | ||
765 | } | ||
766 | |||
767 | if (mapped_buffer->flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | ||
768 | mutex_unlock(&vm->update_gmmu_lock); | ||
769 | |||
770 | retries = 1000; | ||
771 | while (retries) { | ||
772 | if (atomic_read(&mapped_buffer->ref.refcount) == 1) | ||
773 | break; | ||
774 | retries--; | ||
775 | udelay(50); | ||
776 | } | ||
777 | if (!retries) | ||
778 | gk20a_err(d, "sync-unmap failed on 0x%llx", | ||
779 | offset); | ||
780 | mutex_lock(&vm->update_gmmu_lock); | ||
781 | } | ||
782 | |||
783 | mapped_buffer->user_mapped--; | ||
784 | if (mapped_buffer->user_mapped == 0) | ||
785 | vm->num_user_mapped_buffers--; | ||
786 | kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); | ||
787 | |||
788 | mutex_unlock(&vm->update_gmmu_lock); | ||
789 | } | ||
790 | |||
791 | static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | ||
792 | u64 size, | ||
793 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | ||
794 | |||
795 | { | ||
796 | struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; | ||
797 | int err; | ||
798 | u64 offset; | ||
799 | u32 start_page_nr = 0, num_pages; | ||
800 | u64 gmmu_page_size = gmmu_page_sizes[gmmu_pgsz_idx]; | ||
801 | |||
802 | if (gmmu_pgsz_idx >= ARRAY_SIZE(gmmu_page_sizes)) { | ||
803 | dev_warn(dev_from_vm(vm), | ||
804 | "invalid page size requested in gk20a vm alloc"); | ||
805 | return -EINVAL; | ||
806 | } | ||
807 | |||
808 | if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) { | ||
809 | dev_warn(dev_from_vm(vm), | ||
810 | "unsupportd page size requested"); | ||
811 | return -EINVAL; | ||
812 | |||
813 | } | ||
814 | |||
815 | /* be certain we round up to gmmu_page_size if needed */ | ||
816 | /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */ | ||
817 | size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); | ||
818 | |||
819 | gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, | ||
820 | gmmu_page_sizes[gmmu_pgsz_idx]>>10); | ||
821 | |||
822 | /* The vma allocator represents page accounting. */ | ||
823 | num_pages = size >> gmmu_page_shifts[gmmu_pgsz_idx]; | ||
824 | |||
825 | err = vma->alloc(vma, &start_page_nr, num_pages); | ||
826 | |||
827 | if (err) { | ||
828 | gk20a_err(dev_from_vm(vm), | ||
829 | "%s oom: sz=0x%llx", vma->name, size); | ||
830 | return 0; | ||
831 | } | ||
832 | |||
833 | offset = (u64)start_page_nr << gmmu_page_shifts[gmmu_pgsz_idx]; | ||
834 | gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); | ||
835 | |||
836 | return offset; | ||
837 | } | ||
838 | |||
839 | static int gk20a_vm_free_va(struct vm_gk20a *vm, | ||
840 | u64 offset, u64 size, | ||
841 | enum gmmu_pgsz_gk20a pgsz_idx) | ||
842 | { | ||
843 | struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; | ||
844 | u32 page_size = gmmu_page_sizes[pgsz_idx]; | ||
845 | u32 page_shift = gmmu_page_shifts[pgsz_idx]; | ||
846 | u32 start_page_nr, num_pages; | ||
847 | int err; | ||
848 | |||
849 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", | ||
850 | vma->name, offset, size); | ||
851 | |||
852 | start_page_nr = (u32)(offset >> page_shift); | ||
853 | num_pages = (u32)((size + page_size - 1) >> page_shift); | ||
854 | |||
855 | err = vma->free(vma, start_page_nr, num_pages); | ||
856 | if (err) { | ||
857 | gk20a_err(dev_from_vm(vm), | ||
858 | "not found: offset=0x%llx, sz=0x%llx", | ||
859 | offset, size); | ||
860 | } | ||
861 | |||
862 | return err; | ||
863 | } | ||
864 | |||
865 | static int insert_mapped_buffer(struct rb_root *root, | ||
866 | struct mapped_buffer_node *mapped_buffer) | ||
867 | { | ||
868 | struct rb_node **new_node = &(root->rb_node), *parent = NULL; | ||
869 | |||
870 | /* Figure out where to put new node */ | ||
871 | while (*new_node) { | ||
872 | struct mapped_buffer_node *cmp_with = | ||
873 | container_of(*new_node, struct mapped_buffer_node, | ||
874 | node); | ||
875 | |||
876 | parent = *new_node; | ||
877 | |||
878 | if (cmp_with->addr > mapped_buffer->addr) /* u64 cmp */ | ||
879 | new_node = &((*new_node)->rb_left); | ||
880 | else if (cmp_with->addr != mapped_buffer->addr) /* u64 cmp */ | ||
881 | new_node = &((*new_node)->rb_right); | ||
882 | else | ||
883 | return -EINVAL; /* no fair dup'ing */ | ||
884 | } | ||
885 | |||
886 | /* Add new node and rebalance tree. */ | ||
887 | rb_link_node(&mapped_buffer->node, parent, new_node); | ||
888 | rb_insert_color(&mapped_buffer->node, root); | ||
889 | |||
890 | return 0; | ||
891 | } | ||
892 | |||
893 | static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( | ||
894 | struct rb_root *root, struct dma_buf *dmabuf, | ||
895 | u32 kind) | ||
896 | { | ||
897 | struct rb_node *node = rb_first(root); | ||
898 | while (node) { | ||
899 | struct mapped_buffer_node *mapped_buffer = | ||
900 | container_of(node, struct mapped_buffer_node, node); | ||
901 | if (mapped_buffer->dmabuf == dmabuf && | ||
902 | kind == mapped_buffer->kind) | ||
903 | return mapped_buffer; | ||
904 | node = rb_next(&mapped_buffer->node); | ||
905 | } | ||
906 | return 0; | ||
907 | } | ||
908 | |||
909 | static struct mapped_buffer_node *find_mapped_buffer_locked( | ||
910 | struct rb_root *root, u64 addr) | ||
911 | { | ||
912 | |||
913 | struct rb_node *node = root->rb_node; | ||
914 | while (node) { | ||
915 | struct mapped_buffer_node *mapped_buffer = | ||
916 | container_of(node, struct mapped_buffer_node, node); | ||
917 | if (mapped_buffer->addr > addr) /* u64 cmp */ | ||
918 | node = node->rb_left; | ||
919 | else if (mapped_buffer->addr != addr) /* u64 cmp */ | ||
920 | node = node->rb_right; | ||
921 | else | ||
922 | return mapped_buffer; | ||
923 | } | ||
924 | return 0; | ||
925 | } | ||
926 | |||
927 | static struct mapped_buffer_node *find_mapped_buffer_range_locked( | ||
928 | struct rb_root *root, u64 addr) | ||
929 | { | ||
930 | struct rb_node *node = root->rb_node; | ||
931 | while (node) { | ||
932 | struct mapped_buffer_node *m = | ||
933 | container_of(node, struct mapped_buffer_node, node); | ||
934 | if (m->addr <= addr && m->addr + m->size > addr) | ||
935 | return m; | ||
936 | else if (m->addr > addr) /* u64 cmp */ | ||
937 | node = node->rb_left; | ||
938 | else | ||
939 | node = node->rb_right; | ||
940 | } | ||
941 | return 0; | ||
942 | } | ||
943 | |||
944 | #define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0])) | ||
945 | |||
946 | struct buffer_attrs { | ||
947 | struct sg_table *sgt; | ||
948 | u64 size; | ||
949 | u64 align; | ||
950 | u32 ctag_offset; | ||
951 | u32 ctag_lines; | ||
952 | int pgsz_idx; | ||
953 | u8 kind_v; | ||
954 | u8 uc_kind_v; | ||
955 | }; | ||
956 | |||
957 | static void gmmu_select_page_size(struct buffer_attrs *bfr) | ||
958 | { | ||
959 | int i; | ||
960 | /* choose the biggest first (top->bottom) */ | ||
961 | for (i = (gmmu_nr_page_sizes-1); i >= 0; i--) | ||
962 | if (!(gmmu_page_offset_masks[i] & bfr->align)) { | ||
963 | /* would like to add this too but nvmap returns the | ||
964 | * original requested size not the allocated size. | ||
965 | * (!(gmmu_page_offset_masks[i] & bfr->size)) */ | ||
966 | bfr->pgsz_idx = i; | ||
967 | break; | ||
968 | } | ||
969 | } | ||
970 | |||
971 | static int setup_buffer_kind_and_compression(struct device *d, | ||
972 | u32 flags, | ||
973 | struct buffer_attrs *bfr, | ||
974 | enum gmmu_pgsz_gk20a pgsz_idx) | ||
975 | { | ||
976 | bool kind_compressible; | ||
977 | |||
978 | if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v())) | ||
979 | bfr->kind_v = gmmu_pte_kind_pitch_v(); | ||
980 | |||
981 | if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) { | ||
982 | gk20a_err(d, "kind 0x%x not supported", bfr->kind_v); | ||
983 | return -EINVAL; | ||
984 | } | ||
985 | |||
986 | bfr->uc_kind_v = gmmu_pte_kind_invalid_v(); | ||
987 | /* find a suitable uncompressed kind if it becomes necessary later */ | ||
988 | kind_compressible = gk20a_kind_is_compressible(bfr->kind_v); | ||
989 | if (kind_compressible) { | ||
990 | bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v); | ||
991 | if (unlikely(bfr->uc_kind_v == gmmu_pte_kind_invalid_v())) { | ||
992 | /* shouldn't happen, but it is worth cross-checking */ | ||
993 | gk20a_err(d, "comptag kind 0x%x can't be" | ||
994 | " downgraded to uncompressed kind", | ||
995 | bfr->kind_v); | ||
996 | return -EINVAL; | ||
997 | } | ||
998 | } | ||
999 | /* comptags only supported for suitable kinds, 128KB pagesize */ | ||
1000 | if (unlikely(kind_compressible && | ||
1001 | (gmmu_page_sizes[pgsz_idx] != 128*1024))) { | ||
1002 | /* | ||
1003 | gk20a_warn(d, "comptags specified" | ||
1004 | " but pagesize being used doesn't support it");*/ | ||
1005 | /* it is safe to fall back to uncompressed as | ||
1006 | functionality is not harmed */ | ||
1007 | bfr->kind_v = bfr->uc_kind_v; | ||
1008 | kind_compressible = false; | ||
1009 | } | ||
1010 | if (kind_compressible) | ||
1011 | bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >> | ||
1012 | COMP_TAG_LINE_SIZE_SHIFT; | ||
1013 | else | ||
1014 | bfr->ctag_lines = 0; | ||
1015 | |||
1016 | return 0; | ||
1017 | } | ||
1018 | |||
1019 | static int validate_fixed_buffer(struct vm_gk20a *vm, | ||
1020 | struct buffer_attrs *bfr, | ||
1021 | u64 map_offset) | ||
1022 | { | ||
1023 | struct device *dev = dev_from_vm(vm); | ||
1024 | struct vm_reserved_va_node *va_node; | ||
1025 | struct mapped_buffer_node *buffer; | ||
1026 | |||
1027 | if (map_offset & gmmu_page_offset_masks[bfr->pgsz_idx]) { | ||
1028 | gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", | ||
1029 | map_offset); | ||
1030 | return -EINVAL; | ||
1031 | } | ||
1032 | |||
1033 | /* find the space reservation */ | ||
1034 | va_node = addr_to_reservation(vm, map_offset); | ||
1035 | if (!va_node) { | ||
1036 | gk20a_warn(dev, "fixed offset mapping without space allocation"); | ||
1037 | return -EINVAL; | ||
1038 | } | ||
1039 | |||
1040 | /* check that this mappings does not collide with existing | ||
1041 | * mappings by checking the overlapping area between the current | ||
1042 | * buffer and all other mapped buffers */ | ||
1043 | |||
1044 | list_for_each_entry(buffer, | ||
1045 | &va_node->va_buffers_list, va_buffers_list) { | ||
1046 | s64 begin = max(buffer->addr, map_offset); | ||
1047 | s64 end = min(buffer->addr + | ||
1048 | buffer->size, map_offset + bfr->size); | ||
1049 | if (end - begin > 0) { | ||
1050 | gk20a_warn(dev, "overlapping buffer map requested"); | ||
1051 | return -EINVAL; | ||
1052 | } | ||
1053 | } | ||
1054 | |||
1055 | return 0; | ||
1056 | } | ||
1057 | |||
1058 | static u64 __locked_gmmu_map(struct vm_gk20a *vm, | ||
1059 | u64 map_offset, | ||
1060 | struct sg_table *sgt, | ||
1061 | u64 size, | ||
1062 | int pgsz_idx, | ||
1063 | u8 kind_v, | ||
1064 | u32 ctag_offset, | ||
1065 | u32 flags, | ||
1066 | int rw_flag) | ||
1067 | { | ||
1068 | int err = 0, i = 0; | ||
1069 | u32 pde_lo, pde_hi; | ||
1070 | struct device *d = dev_from_vm(vm); | ||
1071 | |||
1072 | /* Allocate (or validate when map_offset != 0) the virtual address. */ | ||
1073 | if (!map_offset) { | ||
1074 | map_offset = gk20a_vm_alloc_va(vm, size, | ||
1075 | pgsz_idx); | ||
1076 | if (!map_offset) { | ||
1077 | gk20a_err(d, "failed to allocate va space"); | ||
1078 | err = -ENOMEM; | ||
1079 | goto fail; | ||
1080 | } | ||
1081 | } | ||
1082 | |||
1083 | pde_range_from_vaddr_range(vm, | ||
1084 | map_offset, | ||
1085 | map_offset + size - 1, | ||
1086 | &pde_lo, &pde_hi); | ||
1087 | |||
1088 | /* mark the addr range valid (but with 0 phys addr, which will fault) */ | ||
1089 | for (i = pde_lo; i <= pde_hi; i++) { | ||
1090 | err = validate_gmmu_page_table_gk20a_locked(vm, i, | ||
1091 | pgsz_idx); | ||
1092 | if (err) { | ||
1093 | gk20a_err(d, "failed to validate page table %d: %d", | ||
1094 | i, err); | ||
1095 | goto fail; | ||
1096 | } | ||
1097 | } | ||
1098 | |||
1099 | err = update_gmmu_ptes_locked(vm, pgsz_idx, | ||
1100 | sgt, | ||
1101 | map_offset, map_offset + size - 1, | ||
1102 | kind_v, | ||
1103 | ctag_offset, | ||
1104 | flags & | ||
1105 | NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
1106 | rw_flag); | ||
1107 | if (err) { | ||
1108 | gk20a_err(d, "failed to update ptes on map"); | ||
1109 | goto fail; | ||
1110 | } | ||
1111 | |||
1112 | return map_offset; | ||
1113 | fail: | ||
1114 | gk20a_err(d, "%s: failed with err=%d\n", __func__, err); | ||
1115 | return 0; | ||
1116 | } | ||
1117 | |||
1118 | static void __locked_gmmu_unmap(struct vm_gk20a *vm, | ||
1119 | u64 vaddr, | ||
1120 | u64 size, | ||
1121 | int pgsz_idx, | ||
1122 | bool va_allocated, | ||
1123 | int rw_flag) | ||
1124 | { | ||
1125 | int err = 0; | ||
1126 | struct gk20a *g = gk20a_from_vm(vm); | ||
1127 | |||
1128 | if (va_allocated) { | ||
1129 | err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx); | ||
1130 | if (err) { | ||
1131 | dev_err(dev_from_vm(vm), | ||
1132 | "failed to free va"); | ||
1133 | return; | ||
1134 | } | ||
1135 | } | ||
1136 | |||
1137 | /* unmap here needs to know the page size we assigned at mapping */ | ||
1138 | err = update_gmmu_ptes_locked(vm, | ||
1139 | pgsz_idx, | ||
1140 | 0, /* n/a for unmap */ | ||
1141 | vaddr, | ||
1142 | vaddr + size - 1, | ||
1143 | 0, 0, false /* n/a for unmap */, | ||
1144 | rw_flag); | ||
1145 | if (err) | ||
1146 | dev_err(dev_from_vm(vm), | ||
1147 | "failed to update gmmu ptes on unmap"); | ||
1148 | |||
1149 | /* detect which if any pdes/ptes can now be released */ | ||
1150 | |||
1151 | /* flush l2 so any dirty lines are written out *now*. | ||
1152 | * also as we could potentially be switching this buffer | ||
1153 | * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at | ||
1154 | * some point in the future we need to invalidate l2. e.g. switching | ||
1155 | * from a render buffer unmap (here) to later using the same memory | ||
1156 | * for gmmu ptes. note the positioning of this relative to any smmu | ||
1157 | * unmapping (below). */ | ||
1158 | |||
1159 | gk20a_mm_l2_flush(g, true); | ||
1160 | } | ||
1161 | |||
1162 | static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm, | ||
1163 | struct dma_buf *dmabuf, | ||
1164 | u64 offset_align, | ||
1165 | u32 flags, | ||
1166 | int kind, | ||
1167 | struct sg_table **sgt, | ||
1168 | bool user_mapped, | ||
1169 | int rw_flag) | ||
1170 | { | ||
1171 | struct mapped_buffer_node *mapped_buffer = 0; | ||
1172 | |||
1173 | mapped_buffer = | ||
1174 | find_mapped_buffer_reverse_locked(&vm->mapped_buffers, | ||
1175 | dmabuf, kind); | ||
1176 | if (!mapped_buffer) | ||
1177 | return 0; | ||
1178 | |||
1179 | if (mapped_buffer->flags != flags) | ||
1180 | return 0; | ||
1181 | |||
1182 | if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET && | ||
1183 | mapped_buffer->addr != offset_align) | ||
1184 | return 0; | ||
1185 | |||
1186 | BUG_ON(mapped_buffer->vm != vm); | ||
1187 | |||
1188 | /* mark the buffer as used */ | ||
1189 | if (user_mapped) { | ||
1190 | if (mapped_buffer->user_mapped == 0) | ||
1191 | vm->num_user_mapped_buffers++; | ||
1192 | mapped_buffer->user_mapped++; | ||
1193 | |||
1194 | /* If the mapping comes from user space, we own | ||
1195 | * the handle ref. Since we reuse an | ||
1196 | * existing mapping here, we need to give back those | ||
1197 | * refs once in order not to leak. | ||
1198 | */ | ||
1199 | if (mapped_buffer->own_mem_ref) | ||
1200 | dma_buf_put(mapped_buffer->dmabuf); | ||
1201 | else | ||
1202 | mapped_buffer->own_mem_ref = true; | ||
1203 | } | ||
1204 | kref_get(&mapped_buffer->ref); | ||
1205 | |||
1206 | gk20a_dbg(gpu_dbg_map, | ||
1207 | "reusing as=%d pgsz=%d flags=0x%x ctags=%d " | ||
1208 | "start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x " | ||
1209 | "own_mem_ref=%d user_mapped=%d", | ||
1210 | vm_aspace_id(vm), mapped_buffer->pgsz_idx, | ||
1211 | mapped_buffer->flags, | ||
1212 | mapped_buffer->ctag_lines, | ||
1213 | mapped_buffer->ctag_offset, | ||
1214 | hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), | ||
1215 | hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), | ||
1216 | lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), | ||
1217 | hi32((u64)sg_phys(mapped_buffer->sgt->sgl)), | ||
1218 | lo32((u64)sg_phys(mapped_buffer->sgt->sgl)), | ||
1219 | mapped_buffer->own_mem_ref, user_mapped); | ||
1220 | |||
1221 | if (sgt) | ||
1222 | *sgt = mapped_buffer->sgt; | ||
1223 | return mapped_buffer->addr; | ||
1224 | } | ||
1225 | |||
1226 | u64 gk20a_vm_map(struct vm_gk20a *vm, | ||
1227 | struct dma_buf *dmabuf, | ||
1228 | u64 offset_align, | ||
1229 | u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/, | ||
1230 | int kind, | ||
1231 | struct sg_table **sgt, | ||
1232 | bool user_mapped, | ||
1233 | int rw_flag) | ||
1234 | { | ||
1235 | struct gk20a *g = gk20a_from_vm(vm); | ||
1236 | struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags; | ||
1237 | struct device *d = dev_from_vm(vm); | ||
1238 | struct mapped_buffer_node *mapped_buffer = 0; | ||
1239 | bool inserted = false, va_allocated = false; | ||
1240 | u32 gmmu_page_size = 0; | ||
1241 | u64 map_offset = 0; | ||
1242 | int err = 0; | ||
1243 | struct buffer_attrs bfr = {0}; | ||
1244 | struct gk20a_comptags comptags; | ||
1245 | |||
1246 | mutex_lock(&vm->update_gmmu_lock); | ||
1247 | |||
1248 | /* check if this buffer is already mapped */ | ||
1249 | map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align, | ||
1250 | flags, kind, sgt, | ||
1251 | user_mapped, rw_flag); | ||
1252 | if (map_offset) { | ||
1253 | mutex_unlock(&vm->update_gmmu_lock); | ||
1254 | return map_offset; | ||
1255 | } | ||
1256 | |||
1257 | /* pin buffer to get phys/iovmm addr */ | ||
1258 | bfr.sgt = gk20a_mm_pin(d, dmabuf); | ||
1259 | if (IS_ERR(bfr.sgt)) { | ||
1260 | /* Falling back to physical is actually possible | ||
1261 | * here in many cases if we use 4K phys pages in the | ||
1262 | * gmmu. However we have some regions which require | ||
1263 | * contig regions to work properly (either phys-contig | ||
1264 | * or contig through smmu io_vaspace). Until we can | ||
1265 | * track the difference between those two cases we have | ||
1266 | * to fail the mapping when we run out of SMMU space. | ||
1267 | */ | ||
1268 | gk20a_warn(d, "oom allocating tracking buffer"); | ||
1269 | goto clean_up; | ||
1270 | } | ||
1271 | |||
1272 | if (sgt) | ||
1273 | *sgt = bfr.sgt; | ||
1274 | |||
1275 | bfr.kind_v = kind; | ||
1276 | bfr.size = dmabuf->size; | ||
1277 | bfr.align = 1 << __ffs((u64)sg_dma_address(bfr.sgt->sgl)); | ||
1278 | bfr.pgsz_idx = -1; | ||
1279 | |||
1280 | /* If FIX_OFFSET is set, pgsz is determined. Otherwise, select | ||
1281 | * page size according to memory alignment */ | ||
1282 | if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | ||
1283 | bfr.pgsz_idx = NV_GMMU_VA_IS_UPPER(offset_align) ? | ||
1284 | gmmu_page_size_big : gmmu_page_size_small; | ||
1285 | } else { | ||
1286 | gmmu_select_page_size(&bfr); | ||
1287 | } | ||
1288 | |||
1289 | /* validate/adjust bfr attributes */ | ||
1290 | if (unlikely(bfr.pgsz_idx == -1)) { | ||
1291 | gk20a_err(d, "unsupported page size detected"); | ||
1292 | goto clean_up; | ||
1293 | } | ||
1294 | |||
1295 | if (unlikely(bfr.pgsz_idx < gmmu_page_size_small || | ||
1296 | bfr.pgsz_idx > gmmu_page_size_big)) { | ||
1297 | BUG_ON(1); | ||
1298 | err = -EINVAL; | ||
1299 | goto clean_up; | ||
1300 | } | ||
1301 | gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx]; | ||
1302 | |||
1303 | /* Check if we should use a fixed offset for mapping this buffer */ | ||
1304 | if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | ||
1305 | err = validate_fixed_buffer(vm, &bfr, offset_align); | ||
1306 | if (err) | ||
1307 | goto clean_up; | ||
1308 | |||
1309 | map_offset = offset_align; | ||
1310 | va_allocated = false; | ||
1311 | } else | ||
1312 | va_allocated = true; | ||
1313 | |||
1314 | if (sgt) | ||
1315 | *sgt = bfr.sgt; | ||
1316 | |||
1317 | err = setup_buffer_kind_and_compression(d, flags, &bfr, bfr.pgsz_idx); | ||
1318 | if (unlikely(err)) { | ||
1319 | gk20a_err(d, "failure setting up kind and compression"); | ||
1320 | goto clean_up; | ||
1321 | } | ||
1322 | |||
1323 | /* bar1 and pmu vm don't need ctag */ | ||
1324 | if (!vm->enable_ctag) | ||
1325 | bfr.ctag_lines = 0; | ||
1326 | |||
1327 | gk20a_get_comptags(d, dmabuf, &comptags); | ||
1328 | |||
1329 | if (bfr.ctag_lines && !comptags.lines) { | ||
1330 | /* allocate compression resources if needed */ | ||
1331 | err = gk20a_alloc_comptags(d, dmabuf, ctag_allocator, | ||
1332 | bfr.ctag_lines); | ||
1333 | if (err) { | ||
1334 | /* ok to fall back here if we ran out */ | ||
1335 | /* TBD: we can partially alloc ctags as well... */ | ||
1336 | bfr.ctag_lines = bfr.ctag_offset = 0; | ||
1337 | bfr.kind_v = bfr.uc_kind_v; | ||
1338 | } else { | ||
1339 | gk20a_get_comptags(d, dmabuf, &comptags); | ||
1340 | |||
1341 | /* init/clear the ctag buffer */ | ||
1342 | g->ops.ltc.clear_comptags(g, | ||
1343 | comptags.offset, | ||
1344 | comptags.offset + comptags.lines - 1); | ||
1345 | } | ||
1346 | } | ||
1347 | |||
1348 | /* store the comptag info */ | ||
1349 | bfr.ctag_offset = comptags.offset; | ||
1350 | |||
1351 | /* update gmmu ptes */ | ||
1352 | map_offset = __locked_gmmu_map(vm, map_offset, | ||
1353 | bfr.sgt, | ||
1354 | bfr.size, | ||
1355 | bfr.pgsz_idx, | ||
1356 | bfr.kind_v, | ||
1357 | bfr.ctag_offset, | ||
1358 | flags, rw_flag); | ||
1359 | if (!map_offset) | ||
1360 | goto clean_up; | ||
1361 | |||
1362 | gk20a_dbg(gpu_dbg_map, | ||
1363 | "as=%d pgsz=%d " | ||
1364 | "kind=0x%x kind_uc=0x%x flags=0x%x " | ||
1365 | "ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x", | ||
1366 | vm_aspace_id(vm), gmmu_page_size, | ||
1367 | bfr.kind_v, bfr.uc_kind_v, flags, | ||
1368 | bfr.ctag_lines, bfr.ctag_offset, | ||
1369 | hi32(map_offset), lo32(map_offset), | ||
1370 | hi32((u64)sg_dma_address(bfr.sgt->sgl)), | ||
1371 | lo32((u64)sg_dma_address(bfr.sgt->sgl)), | ||
1372 | hi32((u64)sg_phys(bfr.sgt->sgl)), | ||
1373 | lo32((u64)sg_phys(bfr.sgt->sgl))); | ||
1374 | |||
1375 | #if defined(NVHOST_DEBUG) | ||
1376 | { | ||
1377 | int i; | ||
1378 | struct scatterlist *sg = NULL; | ||
1379 | gk20a_dbg(gpu_dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)"); | ||
1380 | for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) { | ||
1381 | u64 da = sg_dma_address(sg); | ||
1382 | u64 pa = sg_phys(sg); | ||
1383 | u64 len = sg->length; | ||
1384 | gk20a_dbg(gpu_dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x", | ||
1385 | i, hi32(pa), lo32(pa), hi32(da), lo32(da), | ||
1386 | hi32(len), lo32(len)); | ||
1387 | } | ||
1388 | } | ||
1389 | #endif | ||
1390 | |||
1391 | /* keep track of the buffer for unmapping */ | ||
1392 | /* TBD: check for multiple mapping of same buffer */ | ||
1393 | mapped_buffer = kzalloc(sizeof(*mapped_buffer), GFP_KERNEL); | ||
1394 | if (!mapped_buffer) { | ||
1395 | gk20a_warn(d, "oom allocating tracking buffer"); | ||
1396 | goto clean_up; | ||
1397 | } | ||
1398 | mapped_buffer->dmabuf = dmabuf; | ||
1399 | mapped_buffer->sgt = bfr.sgt; | ||
1400 | mapped_buffer->addr = map_offset; | ||
1401 | mapped_buffer->size = bfr.size; | ||
1402 | mapped_buffer->pgsz_idx = bfr.pgsz_idx; | ||
1403 | mapped_buffer->ctag_offset = bfr.ctag_offset; | ||
1404 | mapped_buffer->ctag_lines = bfr.ctag_lines; | ||
1405 | mapped_buffer->vm = vm; | ||
1406 | mapped_buffer->flags = flags; | ||
1407 | mapped_buffer->kind = kind; | ||
1408 | mapped_buffer->va_allocated = va_allocated; | ||
1409 | mapped_buffer->user_mapped = user_mapped ? 1 : 0; | ||
1410 | mapped_buffer->own_mem_ref = user_mapped; | ||
1411 | INIT_LIST_HEAD(&mapped_buffer->unmap_list); | ||
1412 | INIT_LIST_HEAD(&mapped_buffer->va_buffers_list); | ||
1413 | kref_init(&mapped_buffer->ref); | ||
1414 | |||
1415 | err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer); | ||
1416 | if (err) { | ||
1417 | gk20a_err(d, "failed to insert into mapped buffer tree"); | ||
1418 | goto clean_up; | ||
1419 | } | ||
1420 | inserted = true; | ||
1421 | if (user_mapped) | ||
1422 | vm->num_user_mapped_buffers++; | ||
1423 | |||
1424 | gk20a_dbg_info("allocated va @ 0x%llx", map_offset); | ||
1425 | |||
1426 | if (!va_allocated) { | ||
1427 | struct vm_reserved_va_node *va_node; | ||
1428 | |||
1429 | /* find the space reservation */ | ||
1430 | va_node = addr_to_reservation(vm, map_offset); | ||
1431 | list_add_tail(&mapped_buffer->va_buffers_list, | ||
1432 | &va_node->va_buffers_list); | ||
1433 | mapped_buffer->va_node = va_node; | ||
1434 | } | ||
1435 | |||
1436 | mutex_unlock(&vm->update_gmmu_lock); | ||
1437 | |||
1438 | /* Invalidate kernel mappings immediately */ | ||
1439 | if (vm_aspace_id(vm) == -1) | ||
1440 | gk20a_mm_tlb_invalidate(vm); | ||
1441 | |||
1442 | return map_offset; | ||
1443 | |||
1444 | clean_up: | ||
1445 | if (inserted) { | ||
1446 | rb_erase(&mapped_buffer->node, &vm->mapped_buffers); | ||
1447 | if (user_mapped) | ||
1448 | vm->num_user_mapped_buffers--; | ||
1449 | } | ||
1450 | kfree(mapped_buffer); | ||
1451 | if (va_allocated) | ||
1452 | gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx); | ||
1453 | if (!IS_ERR(bfr.sgt)) | ||
1454 | gk20a_mm_unpin(d, dmabuf, bfr.sgt); | ||
1455 | |||
1456 | mutex_unlock(&vm->update_gmmu_lock); | ||
1457 | gk20a_dbg_info("err=%d\n", err); | ||
1458 | return 0; | ||
1459 | } | ||
1460 | |||
1461 | u64 gk20a_gmmu_map(struct vm_gk20a *vm, | ||
1462 | struct sg_table **sgt, | ||
1463 | u64 size, | ||
1464 | u32 flags, | ||
1465 | int rw_flag) | ||
1466 | { | ||
1467 | u64 vaddr; | ||
1468 | |||
1469 | mutex_lock(&vm->update_gmmu_lock); | ||
1470 | vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */ | ||
1471 | *sgt, /* sg table */ | ||
1472 | size, | ||
1473 | 0, /* page size index = 0 i.e. SZ_4K */ | ||
1474 | 0, /* kind */ | ||
1475 | 0, /* ctag_offset */ | ||
1476 | flags, rw_flag); | ||
1477 | mutex_unlock(&vm->update_gmmu_lock); | ||
1478 | if (!vaddr) { | ||
1479 | gk20a_err(dev_from_vm(vm), "failed to allocate va space"); | ||
1480 | return 0; | ||
1481 | } | ||
1482 | |||
1483 | /* Invalidate kernel mappings immediately */ | ||
1484 | gk20a_mm_tlb_invalidate(vm); | ||
1485 | |||
1486 | return vaddr; | ||
1487 | } | ||
1488 | |||
1489 | void gk20a_gmmu_unmap(struct vm_gk20a *vm, | ||
1490 | u64 vaddr, | ||
1491 | u64 size, | ||
1492 | int rw_flag) | ||
1493 | { | ||
1494 | mutex_lock(&vm->update_gmmu_lock); | ||
1495 | __locked_gmmu_unmap(vm, | ||
1496 | vaddr, | ||
1497 | size, | ||
1498 | 0, /* page size 4K */ | ||
1499 | true, /*va_allocated */ | ||
1500 | rw_flag); | ||
1501 | mutex_unlock(&vm->update_gmmu_lock); | ||
1502 | } | ||
1503 | |||
1504 | phys_addr_t gk20a_get_phys_from_iova(struct device *d, | ||
1505 | u64 dma_addr) | ||
1506 | { | ||
1507 | phys_addr_t phys; | ||
1508 | u64 iova; | ||
1509 | |||
1510 | struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); | ||
1511 | if (!mapping) | ||
1512 | return dma_addr; | ||
1513 | |||
1514 | iova = dma_addr & PAGE_MASK; | ||
1515 | phys = iommu_iova_to_phys(mapping->domain, iova); | ||
1516 | return phys; | ||
1517 | } | ||
1518 | |||
1519 | /* get sg_table from already allocated buffer */ | ||
1520 | int gk20a_get_sgtable(struct device *d, struct sg_table **sgt, | ||
1521 | void *cpuva, u64 iova, | ||
1522 | size_t size) | ||
1523 | { | ||
1524 | int err = 0; | ||
1525 | *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL); | ||
1526 | if (!(*sgt)) { | ||
1527 | dev_err(d, "failed to allocate memory\n"); | ||
1528 | err = -ENOMEM; | ||
1529 | goto fail; | ||
1530 | } | ||
1531 | err = dma_get_sgtable(d, *sgt, | ||
1532 | cpuva, iova, | ||
1533 | size); | ||
1534 | if (err) { | ||
1535 | dev_err(d, "failed to create sg table\n"); | ||
1536 | goto fail; | ||
1537 | } | ||
1538 | sg_dma_address((*sgt)->sgl) = iova; | ||
1539 | |||
1540 | return 0; | ||
1541 | fail: | ||
1542 | if (*sgt) { | ||
1543 | kfree(*sgt); | ||
1544 | *sgt = NULL; | ||
1545 | } | ||
1546 | return err; | ||
1547 | } | ||
1548 | |||
1549 | int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt, | ||
1550 | struct page **pages, u64 iova, | ||
1551 | size_t size) | ||
1552 | { | ||
1553 | int err = 0; | ||
1554 | *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL); | ||
1555 | if (!(*sgt)) { | ||
1556 | dev_err(d, "failed to allocate memory\n"); | ||
1557 | err = -ENOMEM; | ||
1558 | goto fail; | ||
1559 | } | ||
1560 | err = sg_alloc_table(*sgt, 1, GFP_KERNEL); | ||
1561 | if (err) { | ||
1562 | dev_err(d, "failed to allocate sg_table\n"); | ||
1563 | goto fail; | ||
1564 | } | ||
1565 | sg_set_page((*sgt)->sgl, *pages, size, 0); | ||
1566 | sg_dma_address((*sgt)->sgl) = iova; | ||
1567 | |||
1568 | return 0; | ||
1569 | fail: | ||
1570 | if (*sgt) { | ||
1571 | kfree(*sgt); | ||
1572 | *sgt = NULL; | ||
1573 | } | ||
1574 | return err; | ||
1575 | } | ||
1576 | |||
1577 | void gk20a_free_sgtable(struct sg_table **sgt) | ||
1578 | { | ||
1579 | sg_free_table(*sgt); | ||
1580 | kfree(*sgt); | ||
1581 | *sgt = NULL; | ||
1582 | } | ||
1583 | |||
1584 | u64 gk20a_mm_iova_addr(struct scatterlist *sgl) | ||
1585 | { | ||
1586 | u64 result = sg_phys(sgl); | ||
1587 | #ifdef CONFIG_TEGRA_IOMMU_SMMU | ||
1588 | if (sg_dma_address(sgl) == DMA_ERROR_CODE) | ||
1589 | result = 0; | ||
1590 | else if (sg_dma_address(sgl)) { | ||
1591 | result = sg_dma_address(sgl) | | ||
1592 | 1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT; | ||
1593 | } | ||
1594 | #endif | ||
1595 | return result; | ||
1596 | } | ||
1597 | |||
1598 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | ||
1599 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
1600 | struct sg_table *sgt, | ||
1601 | u64 first_vaddr, u64 last_vaddr, | ||
1602 | u8 kind_v, u32 ctag_offset, | ||
1603 | bool cacheable, | ||
1604 | int rw_flag) | ||
1605 | { | ||
1606 | int err; | ||
1607 | u32 pde_lo, pde_hi, pde_i; | ||
1608 | struct scatterlist *cur_chunk; | ||
1609 | unsigned int cur_offset; | ||
1610 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | ||
1611 | u32 ctag = ctag_offset; | ||
1612 | u32 ctag_incr; | ||
1613 | u32 page_size = gmmu_page_sizes[pgsz_idx]; | ||
1614 | u64 addr = 0; | ||
1615 | |||
1616 | pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, | ||
1617 | &pde_lo, &pde_hi); | ||
1618 | |||
1619 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", | ||
1620 | pgsz_idx, pde_lo, pde_hi); | ||
1621 | |||
1622 | /* If ctag_offset !=0 add 1 else add 0. The idea is to avoid a branch | ||
1623 | * below (per-pte). Note: this doesn't work unless page size (when | ||
1624 | * comptags are active) is 128KB. We have checks elsewhere for that. */ | ||
1625 | ctag_incr = !!ctag_offset; | ||
1626 | |||
1627 | if (sgt) | ||
1628 | cur_chunk = sgt->sgl; | ||
1629 | else | ||
1630 | cur_chunk = NULL; | ||
1631 | |||
1632 | cur_offset = 0; | ||
1633 | |||
1634 | for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) { | ||
1635 | u32 pte_lo, pte_hi; | ||
1636 | u32 pte_cur; | ||
1637 | void *pte_kv_cur; | ||
1638 | |||
1639 | struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i; | ||
1640 | |||
1641 | if (pde_i == pde_lo) | ||
1642 | pte_lo = pte_index_from_vaddr(vm, first_vaddr, | ||
1643 | pgsz_idx); | ||
1644 | else | ||
1645 | pte_lo = 0; | ||
1646 | |||
1647 | if ((pde_i != pde_hi) && (pde_hi != pde_lo)) | ||
1648 | pte_hi = vm->mm->page_table_sizing[pgsz_idx].num_ptes-1; | ||
1649 | else | ||
1650 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, | ||
1651 | pgsz_idx); | ||
1652 | |||
1653 | /* get cpu access to the ptes */ | ||
1654 | err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur, | ||
1655 | pte->size); | ||
1656 | if (err) { | ||
1657 | gk20a_err(dev_from_vm(vm), | ||
1658 | "couldn't map ptes for update as=%d pte_ref_cnt=%d", | ||
1659 | vm_aspace_id(vm), pte->ref_cnt); | ||
1660 | goto clean_up; | ||
1661 | } | ||
1662 | |||
1663 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); | ||
1664 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { | ||
1665 | |||
1666 | if (likely(sgt)) { | ||
1667 | u64 new_addr = gk20a_mm_iova_addr(cur_chunk); | ||
1668 | if (new_addr) { | ||
1669 | addr = new_addr; | ||
1670 | addr += cur_offset; | ||
1671 | } | ||
1672 | |||
1673 | pte_w[0] = gmmu_pte_valid_true_f() | | ||
1674 | gmmu_pte_address_sys_f(addr | ||
1675 | >> gmmu_pte_address_shift_v()); | ||
1676 | pte_w[1] = gmmu_pte_aperture_video_memory_f() | | ||
1677 | gmmu_pte_kind_f(kind_v) | | ||
1678 | gmmu_pte_comptagline_f(ctag); | ||
1679 | |||
1680 | if (rw_flag == gk20a_mem_flag_read_only) { | ||
1681 | pte_w[0] |= gmmu_pte_read_only_true_f(); | ||
1682 | pte_w[1] |= | ||
1683 | gmmu_pte_write_disable_true_f(); | ||
1684 | } else if (rw_flag == | ||
1685 | gk20a_mem_flag_write_only) { | ||
1686 | pte_w[1] |= | ||
1687 | gmmu_pte_read_disable_true_f(); | ||
1688 | } | ||
1689 | |||
1690 | if (!cacheable) | ||
1691 | pte_w[1] |= gmmu_pte_vol_true_f(); | ||
1692 | |||
1693 | pte->ref_cnt++; | ||
1694 | |||
1695 | gk20a_dbg(gpu_dbg_pte, | ||
1696 | "pte_cur=%d addr=0x%x,%08x kind=%d" | ||
1697 | " ctag=%d vol=%d refs=%d" | ||
1698 | " [0x%08x,0x%08x]", | ||
1699 | pte_cur, hi32(addr), lo32(addr), | ||
1700 | kind_v, ctag, !cacheable, | ||
1701 | pte->ref_cnt, pte_w[1], pte_w[0]); | ||
1702 | |||
1703 | ctag += ctag_incr; | ||
1704 | cur_offset += page_size; | ||
1705 | addr += page_size; | ||
1706 | while (cur_chunk && | ||
1707 | cur_offset >= cur_chunk->length) { | ||
1708 | cur_offset -= cur_chunk->length; | ||
1709 | cur_chunk = sg_next(cur_chunk); | ||
1710 | } | ||
1711 | |||
1712 | } else { | ||
1713 | pte->ref_cnt--; | ||
1714 | gk20a_dbg(gpu_dbg_pte, | ||
1715 | "pte_cur=%d ref=%d [0x0,0x0]", | ||
1716 | pte_cur, pte->ref_cnt); | ||
1717 | } | ||
1718 | |||
1719 | gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]); | ||
1720 | gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]); | ||
1721 | } | ||
1722 | |||
1723 | unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur); | ||
1724 | |||
1725 | if (pte->ref_cnt == 0) { | ||
1726 | /* It can make sense to keep around one page table for | ||
1727 | * each flavor (empty)... in case a new map is coming | ||
1728 | * right back to alloc (and fill it in) again. | ||
1729 | * But: deferring unmapping should help with pathologic | ||
1730 | * unmap/map/unmap/map cases where we'd trigger pte | ||
1731 | * free/alloc/free/alloc. | ||
1732 | */ | ||
1733 | free_gmmu_pages(vm, pte->ref, pte->sgt, | ||
1734 | vm->mm->page_table_sizing[pgsz_idx].order, | ||
1735 | pte->size); | ||
1736 | pte->ref = NULL; | ||
1737 | |||
1738 | /* rewrite pde */ | ||
1739 | update_gmmu_pde_locked(vm, pde_i); | ||
1740 | } | ||
1741 | |||
1742 | } | ||
1743 | |||
1744 | smp_mb(); | ||
1745 | vm->tlb_dirty = true; | ||
1746 | gk20a_dbg_fn("set tlb dirty"); | ||
1747 | |||
1748 | return 0; | ||
1749 | |||
1750 | clean_up: | ||
1751 | /*TBD: potentially rewrite above to pre-map everything it needs to | ||
1752 | * as that's the only way it can fail */ | ||
1753 | return err; | ||
1754 | |||
1755 | } | ||
1756 | |||
1757 | |||
1758 | /* for gk20a the "video memory" apertures here are misnomers. */ | ||
1759 | static inline u32 big_valid_pde0_bits(u64 pte_addr) | ||
1760 | { | ||
1761 | u32 pde0_bits = | ||
1762 | gmmu_pde_aperture_big_video_memory_f() | | ||
1763 | gmmu_pde_address_big_sys_f( | ||
1764 | (u32)(pte_addr >> gmmu_pde_address_shift_v())); | ||
1765 | return pde0_bits; | ||
1766 | } | ||
1767 | static inline u32 small_valid_pde1_bits(u64 pte_addr) | ||
1768 | { | ||
1769 | u32 pde1_bits = | ||
1770 | gmmu_pde_aperture_small_video_memory_f() | | ||
1771 | gmmu_pde_vol_small_true_f() | /* tbd: why? */ | ||
1772 | gmmu_pde_address_small_sys_f( | ||
1773 | (u32)(pte_addr >> gmmu_pde_address_shift_v())); | ||
1774 | return pde1_bits; | ||
1775 | } | ||
1776 | |||
1777 | /* Given the current state of the ptes associated with a pde, | ||
1778 | determine value and write it out. There's no checking | ||
1779 | here to determine whether or not a change was actually | ||
1780 | made. So, superfluous updates will cause unnecessary | ||
1781 | pde invalidations. | ||
1782 | */ | ||
1783 | static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) | ||
1784 | { | ||
1785 | bool small_valid, big_valid; | ||
1786 | u64 pte_addr[2] = {0, 0}; | ||
1787 | struct page_table_gk20a *small_pte = | ||
1788 | vm->pdes.ptes[gmmu_page_size_small] + i; | ||
1789 | struct page_table_gk20a *big_pte = | ||
1790 | vm->pdes.ptes[gmmu_page_size_big] + i; | ||
1791 | u32 pde_v[2] = {0, 0}; | ||
1792 | u32 *pde; | ||
1793 | |||
1794 | small_valid = small_pte && small_pte->ref; | ||
1795 | big_valid = big_pte && big_pte->ref; | ||
1796 | |||
1797 | if (small_valid) | ||
1798 | pte_addr[gmmu_page_size_small] = | ||
1799 | gk20a_mm_iova_addr(small_pte->sgt->sgl); | ||
1800 | if (big_valid) | ||
1801 | pte_addr[gmmu_page_size_big] = | ||
1802 | gk20a_mm_iova_addr(big_pte->sgt->sgl); | ||
1803 | |||
1804 | pde_v[0] = gmmu_pde_size_full_f(); | ||
1805 | pde_v[0] |= big_valid ? | ||
1806 | big_valid_pde0_bits(pte_addr[gmmu_page_size_big]) | ||
1807 | : | ||
1808 | (gmmu_pde_aperture_big_invalid_f()); | ||
1809 | |||
1810 | pde_v[1] |= (small_valid ? | ||
1811 | small_valid_pde1_bits(pte_addr[gmmu_page_size_small]) | ||
1812 | : | ||
1813 | (gmmu_pde_aperture_small_invalid_f() | | ||
1814 | gmmu_pde_vol_small_false_f()) | ||
1815 | ) | ||
1816 | | | ||
1817 | (big_valid ? (gmmu_pde_vol_big_true_f()) : | ||
1818 | gmmu_pde_vol_big_false_f()); | ||
1819 | |||
1820 | pde = pde_from_index(vm, i); | ||
1821 | |||
1822 | gk20a_mem_wr32(pde, 0, pde_v[0]); | ||
1823 | gk20a_mem_wr32(pde, 1, pde_v[1]); | ||
1824 | |||
1825 | smp_mb(); | ||
1826 | |||
1827 | FLUSH_CPU_DCACHE(pde, | ||
1828 | sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()), | ||
1829 | sizeof(u32)*2); | ||
1830 | |||
1831 | gk20a_mm_l2_invalidate(vm->mm->g); | ||
1832 | |||
1833 | gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]); | ||
1834 | |||
1835 | vm->tlb_dirty = true; | ||
1836 | } | ||
1837 | |||
1838 | |||
1839 | static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr, | ||
1840 | u32 num_pages, u32 pgsz_idx) | ||
1841 | { | ||
1842 | struct mm_gk20a *mm = vm->mm; | ||
1843 | struct gk20a *g = mm->g; | ||
1844 | u32 pgsz = gmmu_page_sizes[pgsz_idx]; | ||
1845 | u32 i; | ||
1846 | dma_addr_t iova; | ||
1847 | |||
1848 | /* allocate the zero page if the va does not already have one */ | ||
1849 | if (!vm->zero_page_cpuva) { | ||
1850 | int err = 0; | ||
1851 | vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev, | ||
1852 | mm->big_page_size, | ||
1853 | &iova, | ||
1854 | GFP_KERNEL); | ||
1855 | if (!vm->zero_page_cpuva) { | ||
1856 | dev_err(&g->dev->dev, "failed to allocate zero page\n"); | ||
1857 | return -ENOMEM; | ||
1858 | } | ||
1859 | |||
1860 | vm->zero_page_iova = iova; | ||
1861 | err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt, | ||
1862 | vm->zero_page_cpuva, vm->zero_page_iova, | ||
1863 | mm->big_page_size); | ||
1864 | if (err) { | ||
1865 | dma_free_coherent(&g->dev->dev, mm->big_page_size, | ||
1866 | vm->zero_page_cpuva, | ||
1867 | vm->zero_page_iova); | ||
1868 | vm->zero_page_iova = 0; | ||
1869 | vm->zero_page_cpuva = NULL; | ||
1870 | |||
1871 | dev_err(&g->dev->dev, "failed to create sg table for zero page\n"); | ||
1872 | return -ENOMEM; | ||
1873 | } | ||
1874 | } | ||
1875 | |||
1876 | for (i = 0; i < num_pages; i++) { | ||
1877 | u64 page_vaddr = __locked_gmmu_map(vm, vaddr, | ||
1878 | vm->zero_page_sgt, pgsz, pgsz_idx, 0, 0, | ||
1879 | NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET, | ||
1880 | gk20a_mem_flag_none); | ||
1881 | |||
1882 | if (!page_vaddr) { | ||
1883 | gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!"); | ||
1884 | goto err_unmap; | ||
1885 | } | ||
1886 | vaddr += pgsz; | ||
1887 | } | ||
1888 | |||
1889 | gk20a_mm_l2_flush(mm->g, true); | ||
1890 | |||
1891 | return 0; | ||
1892 | |||
1893 | err_unmap: | ||
1894 | |||
1895 | WARN_ON(1); | ||
1896 | /* something went wrong. unmap pages */ | ||
1897 | while (i--) { | ||
1898 | vaddr -= pgsz; | ||
1899 | __locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0, | ||
1900 | gk20a_mem_flag_none); | ||
1901 | } | ||
1902 | |||
1903 | return -EINVAL; | ||
1904 | } | ||
1905 | |||
1906 | /* NOTE! mapped_buffers lock must be held */ | ||
1907 | static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | ||
1908 | { | ||
1909 | struct vm_gk20a *vm = mapped_buffer->vm; | ||
1910 | |||
1911 | if (mapped_buffer->va_node && | ||
1912 | mapped_buffer->va_node->sparse) { | ||
1913 | u64 vaddr = mapped_buffer->addr; | ||
1914 | u32 pgsz_idx = mapped_buffer->pgsz_idx; | ||
1915 | u32 num_pages = mapped_buffer->size >> | ||
1916 | gmmu_page_shifts[pgsz_idx]; | ||
1917 | |||
1918 | /* there is little we can do if this fails... */ | ||
1919 | gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx); | ||
1920 | |||
1921 | } else | ||
1922 | __locked_gmmu_unmap(vm, | ||
1923 | mapped_buffer->addr, | ||
1924 | mapped_buffer->size, | ||
1925 | mapped_buffer->pgsz_idx, | ||
1926 | mapped_buffer->va_allocated, | ||
1927 | gk20a_mem_flag_none); | ||
1928 | |||
1929 | gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d", | ||
1930 | vm_aspace_id(vm), gmmu_page_sizes[mapped_buffer->pgsz_idx], | ||
1931 | hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), | ||
1932 | mapped_buffer->own_mem_ref); | ||
1933 | |||
1934 | gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf, | ||
1935 | mapped_buffer->sgt); | ||
1936 | |||
1937 | /* remove from mapped buffer tree and remove list, free */ | ||
1938 | rb_erase(&mapped_buffer->node, &vm->mapped_buffers); | ||
1939 | if (!list_empty(&mapped_buffer->va_buffers_list)) | ||
1940 | list_del(&mapped_buffer->va_buffers_list); | ||
1941 | |||
1942 | /* keep track of mapped buffers */ | ||
1943 | if (mapped_buffer->user_mapped) | ||
1944 | vm->num_user_mapped_buffers--; | ||
1945 | |||
1946 | if (mapped_buffer->own_mem_ref) | ||
1947 | dma_buf_put(mapped_buffer->dmabuf); | ||
1948 | |||
1949 | kfree(mapped_buffer); | ||
1950 | |||
1951 | return; | ||
1952 | } | ||
1953 | |||
1954 | void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset) | ||
1955 | { | ||
1956 | struct device *d = dev_from_vm(vm); | ||
1957 | struct mapped_buffer_node *mapped_buffer; | ||
1958 | |||
1959 | mutex_lock(&vm->update_gmmu_lock); | ||
1960 | mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset); | ||
1961 | if (!mapped_buffer) { | ||
1962 | mutex_unlock(&vm->update_gmmu_lock); | ||
1963 | gk20a_err(d, "invalid addr to unmap 0x%llx", offset); | ||
1964 | return; | ||
1965 | } | ||
1966 | kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); | ||
1967 | mutex_unlock(&vm->update_gmmu_lock); | ||
1968 | } | ||
1969 | |||
1970 | static void gk20a_vm_remove_support(struct vm_gk20a *vm) | ||
1971 | { | ||
1972 | struct gk20a *g = vm->mm->g; | ||
1973 | struct mapped_buffer_node *mapped_buffer; | ||
1974 | struct vm_reserved_va_node *va_node, *va_node_tmp; | ||
1975 | struct rb_node *node; | ||
1976 | |||
1977 | gk20a_dbg_fn(""); | ||
1978 | mutex_lock(&vm->update_gmmu_lock); | ||
1979 | |||
1980 | /* TBD: add a flag here for the unmap code to recognize teardown | ||
1981 | * and short-circuit any otherwise expensive operations. */ | ||
1982 | |||
1983 | node = rb_first(&vm->mapped_buffers); | ||
1984 | while (node) { | ||
1985 | mapped_buffer = | ||
1986 | container_of(node, struct mapped_buffer_node, node); | ||
1987 | gk20a_vm_unmap_locked(mapped_buffer); | ||
1988 | node = rb_first(&vm->mapped_buffers); | ||
1989 | } | ||
1990 | |||
1991 | /* destroy remaining reserved memory areas */ | ||
1992 | list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list, | ||
1993 | reserved_va_list) { | ||
1994 | list_del(&va_node->reserved_va_list); | ||
1995 | kfree(va_node); | ||
1996 | } | ||
1997 | |||
1998 | /* TBD: unmapping all buffers above may not actually free | ||
1999 | * all vm ptes. jettison them here for certain... */ | ||
2000 | |||
2001 | unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); | ||
2002 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, vm->pdes.size); | ||
2003 | |||
2004 | kfree(vm->pdes.ptes[gmmu_page_size_small]); | ||
2005 | kfree(vm->pdes.ptes[gmmu_page_size_big]); | ||
2006 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | ||
2007 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | ||
2008 | |||
2009 | mutex_unlock(&vm->update_gmmu_lock); | ||
2010 | |||
2011 | /* release zero page if used */ | ||
2012 | if (vm->zero_page_cpuva) | ||
2013 | dma_free_coherent(&g->dev->dev, vm->mm->big_page_size, | ||
2014 | vm->zero_page_cpuva, vm->zero_page_iova); | ||
2015 | |||
2016 | /* vm is not used anymore. release it. */ | ||
2017 | kfree(vm); | ||
2018 | } | ||
2019 | |||
2020 | static void gk20a_vm_remove_support_kref(struct kref *ref) | ||
2021 | { | ||
2022 | struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref); | ||
2023 | gk20a_vm_remove_support(vm); | ||
2024 | } | ||
2025 | |||
2026 | void gk20a_vm_get(struct vm_gk20a *vm) | ||
2027 | { | ||
2028 | kref_get(&vm->ref); | ||
2029 | } | ||
2030 | |||
2031 | void gk20a_vm_put(struct vm_gk20a *vm) | ||
2032 | { | ||
2033 | kref_put(&vm->ref, gk20a_vm_remove_support_kref); | ||
2034 | } | ||
2035 | |||
2036 | /* address space interfaces for the gk20a module */ | ||
2037 | int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) | ||
2038 | { | ||
2039 | struct gk20a_as *as = as_share->as; | ||
2040 | struct gk20a *g = gk20a_from_as(as); | ||
2041 | struct mm_gk20a *mm = &g->mm; | ||
2042 | struct vm_gk20a *vm; | ||
2043 | u64 vma_size; | ||
2044 | u32 num_pages, low_hole_pages; | ||
2045 | char name[32]; | ||
2046 | int err; | ||
2047 | |||
2048 | gk20a_dbg_fn(""); | ||
2049 | |||
2050 | vm = kzalloc(sizeof(*vm), GFP_KERNEL); | ||
2051 | if (!vm) | ||
2052 | return -ENOMEM; | ||
2053 | |||
2054 | as_share->vm = vm; | ||
2055 | |||
2056 | vm->mm = mm; | ||
2057 | vm->as_share = as_share; | ||
2058 | |||
2059 | vm->big_pages = true; | ||
2060 | |||
2061 | vm->va_start = mm->pde_stride; /* create a one pde hole */ | ||
2062 | vm->va_limit = mm->channel.size; /* note this means channel.size is | ||
2063 | really just the max */ | ||
2064 | { | ||
2065 | u32 pde_lo, pde_hi; | ||
2066 | pde_range_from_vaddr_range(vm, | ||
2067 | 0, vm->va_limit-1, | ||
2068 | &pde_lo, &pde_hi); | ||
2069 | vm->pdes.num_pdes = pde_hi + 1; | ||
2070 | } | ||
2071 | |||
2072 | vm->pdes.ptes[gmmu_page_size_small] = | ||
2073 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2074 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2075 | |||
2076 | vm->pdes.ptes[gmmu_page_size_big] = | ||
2077 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2078 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2079 | |||
2080 | if (!(vm->pdes.ptes[gmmu_page_size_small] && | ||
2081 | vm->pdes.ptes[gmmu_page_size_big])) | ||
2082 | return -ENOMEM; | ||
2083 | |||
2084 | gk20a_dbg_info("init space for va_limit=0x%llx num_pdes=%d", | ||
2085 | vm->va_limit, vm->pdes.num_pdes); | ||
2086 | |||
2087 | /* allocate the page table directory */ | ||
2088 | err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, | ||
2089 | &vm->pdes.sgt, &vm->pdes.size); | ||
2090 | if (err) | ||
2091 | return -ENOMEM; | ||
2092 | |||
2093 | err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, | ||
2094 | vm->pdes.size); | ||
2095 | if (err) { | ||
2096 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | ||
2097 | vm->pdes.size); | ||
2098 | return -ENOMEM; | ||
2099 | } | ||
2100 | gk20a_dbg(gpu_dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx", | ||
2101 | vm->pdes.kv, | ||
2102 | gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); | ||
2103 | /* we could release vm->pdes.kv but it's only one page... */ | ||
2104 | |||
2105 | |||
2106 | /* low-half: alloc small pages */ | ||
2107 | /* high-half: alloc big pages */ | ||
2108 | vma_size = mm->channel.size >> 1; | ||
2109 | |||
2110 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | ||
2111 | gmmu_page_sizes[gmmu_page_size_small]>>10); | ||
2112 | num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]); | ||
2113 | |||
2114 | /* num_pages above is without regard to the low-side hole. */ | ||
2115 | low_hole_pages = (vm->va_start >> | ||
2116 | gmmu_page_shifts[gmmu_page_size_small]); | ||
2117 | |||
2118 | gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name, | ||
2119 | low_hole_pages, /* start */ | ||
2120 | num_pages - low_hole_pages, /* length */ | ||
2121 | 1); /* align */ | ||
2122 | |||
2123 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | ||
2124 | gmmu_page_sizes[gmmu_page_size_big]>>10); | ||
2125 | |||
2126 | num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]); | ||
2127 | gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name, | ||
2128 | num_pages, /* start */ | ||
2129 | num_pages, /* length */ | ||
2130 | 1); /* align */ | ||
2131 | |||
2132 | vm->mapped_buffers = RB_ROOT; | ||
2133 | |||
2134 | mutex_init(&vm->update_gmmu_lock); | ||
2135 | kref_init(&vm->ref); | ||
2136 | INIT_LIST_HEAD(&vm->reserved_va_list); | ||
2137 | |||
2138 | vm->enable_ctag = true; | ||
2139 | |||
2140 | return 0; | ||
2141 | } | ||
2142 | |||
2143 | |||
2144 | int gk20a_vm_release_share(struct gk20a_as_share *as_share) | ||
2145 | { | ||
2146 | struct vm_gk20a *vm = as_share->vm; | ||
2147 | |||
2148 | gk20a_dbg_fn(""); | ||
2149 | |||
2150 | vm->as_share = NULL; | ||
2151 | |||
2152 | /* put as reference to vm */ | ||
2153 | gk20a_vm_put(vm); | ||
2154 | |||
2155 | as_share->vm = NULL; | ||
2156 | |||
2157 | return 0; | ||
2158 | } | ||
2159 | |||
2160 | |||
2161 | int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | ||
2162 | struct nvhost_as_alloc_space_args *args) | ||
2163 | |||
2164 | { int err = -ENOMEM; | ||
2165 | int pgsz_idx; | ||
2166 | u32 start_page_nr; | ||
2167 | struct gk20a_allocator *vma; | ||
2168 | struct vm_gk20a *vm = as_share->vm; | ||
2169 | struct vm_reserved_va_node *va_node; | ||
2170 | u64 vaddr_start = 0; | ||
2171 | |||
2172 | gk20a_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx", | ||
2173 | args->flags, args->page_size, args->pages, | ||
2174 | args->o_a.offset); | ||
2175 | |||
2176 | /* determine pagesz idx */ | ||
2177 | for (pgsz_idx = gmmu_page_size_small; | ||
2178 | pgsz_idx < gmmu_nr_page_sizes; | ||
2179 | pgsz_idx++) { | ||
2180 | if (gmmu_page_sizes[pgsz_idx] == args->page_size) | ||
2181 | break; | ||
2182 | } | ||
2183 | |||
2184 | if (pgsz_idx >= gmmu_nr_page_sizes) { | ||
2185 | err = -EINVAL; | ||
2186 | goto clean_up; | ||
2187 | } | ||
2188 | |||
2189 | va_node = kzalloc(sizeof(*va_node), GFP_KERNEL); | ||
2190 | if (!va_node) { | ||
2191 | err = -ENOMEM; | ||
2192 | goto clean_up; | ||
2193 | } | ||
2194 | |||
2195 | if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE && | ||
2196 | pgsz_idx != gmmu_page_size_big) { | ||
2197 | err = -ENOSYS; | ||
2198 | kfree(va_node); | ||
2199 | goto clean_up; | ||
2200 | } | ||
2201 | |||
2202 | start_page_nr = 0; | ||
2203 | if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) | ||
2204 | start_page_nr = (u32)(args->o_a.offset >> | ||
2205 | gmmu_page_shifts[pgsz_idx]); | ||
2206 | |||
2207 | vma = &vm->vma[pgsz_idx]; | ||
2208 | err = vma->alloc(vma, &start_page_nr, args->pages); | ||
2209 | if (err) { | ||
2210 | kfree(va_node); | ||
2211 | goto clean_up; | ||
2212 | } | ||
2213 | |||
2214 | vaddr_start = (u64)start_page_nr << gmmu_page_shifts[pgsz_idx]; | ||
2215 | |||
2216 | va_node->vaddr_start = vaddr_start; | ||
2217 | va_node->size = (u64)args->page_size * (u64)args->pages; | ||
2218 | va_node->pgsz_idx = args->page_size; | ||
2219 | INIT_LIST_HEAD(&va_node->va_buffers_list); | ||
2220 | INIT_LIST_HEAD(&va_node->reserved_va_list); | ||
2221 | |||
2222 | mutex_lock(&vm->update_gmmu_lock); | ||
2223 | |||
2224 | /* mark that we need to use sparse mappings here */ | ||
2225 | if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) { | ||
2226 | err = gk20a_vm_put_empty(vm, vaddr_start, args->pages, | ||
2227 | pgsz_idx); | ||
2228 | if (err) { | ||
2229 | mutex_unlock(&vm->update_gmmu_lock); | ||
2230 | vma->free(vma, start_page_nr, args->pages); | ||
2231 | kfree(va_node); | ||
2232 | goto clean_up; | ||
2233 | } | ||
2234 | |||
2235 | va_node->sparse = true; | ||
2236 | } | ||
2237 | |||
2238 | list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list); | ||
2239 | |||
2240 | mutex_unlock(&vm->update_gmmu_lock); | ||
2241 | |||
2242 | args->o_a.offset = vaddr_start; | ||
2243 | |||
2244 | clean_up: | ||
2245 | return err; | ||
2246 | } | ||
2247 | |||
2248 | int gk20a_vm_free_space(struct gk20a_as_share *as_share, | ||
2249 | struct nvhost_as_free_space_args *args) | ||
2250 | { | ||
2251 | int err = -ENOMEM; | ||
2252 | int pgsz_idx; | ||
2253 | u32 start_page_nr; | ||
2254 | struct gk20a_allocator *vma; | ||
2255 | struct vm_gk20a *vm = as_share->vm; | ||
2256 | struct vm_reserved_va_node *va_node; | ||
2257 | |||
2258 | gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size, | ||
2259 | args->pages, args->offset); | ||
2260 | |||
2261 | /* determine pagesz idx */ | ||
2262 | for (pgsz_idx = gmmu_page_size_small; | ||
2263 | pgsz_idx < gmmu_nr_page_sizes; | ||
2264 | pgsz_idx++) { | ||
2265 | if (gmmu_page_sizes[pgsz_idx] == args->page_size) | ||
2266 | break; | ||
2267 | } | ||
2268 | |||
2269 | if (pgsz_idx >= gmmu_nr_page_sizes) { | ||
2270 | err = -EINVAL; | ||
2271 | goto clean_up; | ||
2272 | } | ||
2273 | |||
2274 | start_page_nr = (u32)(args->offset >> | ||
2275 | gmmu_page_shifts[pgsz_idx]); | ||
2276 | |||
2277 | vma = &vm->vma[pgsz_idx]; | ||
2278 | err = vma->free(vma, start_page_nr, args->pages); | ||
2279 | |||
2280 | if (err) | ||
2281 | goto clean_up; | ||
2282 | |||
2283 | mutex_lock(&vm->update_gmmu_lock); | ||
2284 | va_node = addr_to_reservation(vm, args->offset); | ||
2285 | if (va_node) { | ||
2286 | struct mapped_buffer_node *buffer; | ||
2287 | |||
2288 | /* there is no need to unallocate the buffers in va. Just | ||
2289 | * convert them into normal buffers */ | ||
2290 | |||
2291 | list_for_each_entry(buffer, | ||
2292 | &va_node->va_buffers_list, va_buffers_list) | ||
2293 | list_del_init(&buffer->va_buffers_list); | ||
2294 | |||
2295 | list_del(&va_node->reserved_va_list); | ||
2296 | |||
2297 | /* if this was a sparse mapping, free the va */ | ||
2298 | if (va_node->sparse) | ||
2299 | __locked_gmmu_unmap(vm, | ||
2300 | va_node->vaddr_start, | ||
2301 | va_node->size, | ||
2302 | va_node->pgsz_idx, | ||
2303 | false, | ||
2304 | gk20a_mem_flag_none); | ||
2305 | kfree(va_node); | ||
2306 | } | ||
2307 | mutex_unlock(&vm->update_gmmu_lock); | ||
2308 | |||
2309 | clean_up: | ||
2310 | return err; | ||
2311 | } | ||
2312 | |||
2313 | int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, | ||
2314 | struct channel_gk20a *ch) | ||
2315 | { | ||
2316 | int err = 0; | ||
2317 | struct vm_gk20a *vm = as_share->vm; | ||
2318 | |||
2319 | gk20a_dbg_fn(""); | ||
2320 | |||
2321 | ch->vm = vm; | ||
2322 | err = channel_gk20a_commit_va(ch); | ||
2323 | if (err) | ||
2324 | ch->vm = 0; | ||
2325 | |||
2326 | return err; | ||
2327 | } | ||
2328 | |||
2329 | int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev) | ||
2330 | { | ||
2331 | struct gk20a_dmabuf_priv *priv; | ||
2332 | static DEFINE_MUTEX(priv_lock); | ||
2333 | |||
2334 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
2335 | if (likely(priv)) | ||
2336 | return 0; | ||
2337 | |||
2338 | mutex_lock(&priv_lock); | ||
2339 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
2340 | if (priv) | ||
2341 | goto priv_exist_or_err; | ||
2342 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
2343 | if (!priv) { | ||
2344 | priv = ERR_PTR(-ENOMEM); | ||
2345 | goto priv_exist_or_err; | ||
2346 | } | ||
2347 | mutex_init(&priv->lock); | ||
2348 | dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv); | ||
2349 | priv_exist_or_err: | ||
2350 | mutex_unlock(&priv_lock); | ||
2351 | if (IS_ERR(priv)) | ||
2352 | return -ENOMEM; | ||
2353 | |||
2354 | return 0; | ||
2355 | } | ||
2356 | |||
2357 | |||
2358 | static int gk20a_dmabuf_get_kind(struct dma_buf *dmabuf) | ||
2359 | { | ||
2360 | int kind = 0; | ||
2361 | #ifdef CONFIG_TEGRA_NVMAP | ||
2362 | int err; | ||
2363 | u64 nvmap_param; | ||
2364 | |||
2365 | err = nvmap_get_dmabuf_param(dmabuf, NVMAP_HANDLE_PARAM_KIND, | ||
2366 | &nvmap_param); | ||
2367 | kind = err ? kind : nvmap_param; | ||
2368 | #endif | ||
2369 | return kind; | ||
2370 | } | ||
2371 | |||
2372 | int gk20a_vm_map_buffer(struct gk20a_as_share *as_share, | ||
2373 | int dmabuf_fd, | ||
2374 | u64 *offset_align, | ||
2375 | u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/ | ||
2376 | int kind) | ||
2377 | { | ||
2378 | int err = 0; | ||
2379 | struct vm_gk20a *vm = as_share->vm; | ||
2380 | struct dma_buf *dmabuf; | ||
2381 | u64 ret_va; | ||
2382 | |||
2383 | gk20a_dbg_fn(""); | ||
2384 | |||
2385 | /* get ref to the mem handle (released on unmap_locked) */ | ||
2386 | dmabuf = dma_buf_get(dmabuf_fd); | ||
2387 | if (!dmabuf) | ||
2388 | return 0; | ||
2389 | |||
2390 | err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm)); | ||
2391 | if (err) { | ||
2392 | dma_buf_put(dmabuf); | ||
2393 | return err; | ||
2394 | } | ||
2395 | |||
2396 | if (kind == -1) | ||
2397 | kind = gk20a_dmabuf_get_kind(dmabuf); | ||
2398 | |||
2399 | ret_va = gk20a_vm_map(vm, dmabuf, *offset_align, | ||
2400 | flags, kind, NULL, true, | ||
2401 | gk20a_mem_flag_none); | ||
2402 | *offset_align = ret_va; | ||
2403 | if (!ret_va) { | ||
2404 | dma_buf_put(dmabuf); | ||
2405 | err = -EINVAL; | ||
2406 | } | ||
2407 | |||
2408 | return err; | ||
2409 | } | ||
2410 | |||
2411 | int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset) | ||
2412 | { | ||
2413 | struct vm_gk20a *vm = as_share->vm; | ||
2414 | |||
2415 | gk20a_dbg_fn(""); | ||
2416 | |||
2417 | gk20a_vm_unmap_user(vm, offset); | ||
2418 | return 0; | ||
2419 | } | ||
2420 | |||
2421 | int gk20a_init_bar1_vm(struct mm_gk20a *mm) | ||
2422 | { | ||
2423 | int err; | ||
2424 | phys_addr_t inst_pa; | ||
2425 | void *inst_ptr; | ||
2426 | struct vm_gk20a *vm = &mm->bar1.vm; | ||
2427 | struct gk20a *g = gk20a_from_mm(mm); | ||
2428 | struct device *d = dev_from_gk20a(g); | ||
2429 | struct inst_desc *inst_block = &mm->bar1.inst_block; | ||
2430 | u64 pde_addr; | ||
2431 | u32 pde_addr_lo; | ||
2432 | u32 pde_addr_hi; | ||
2433 | dma_addr_t iova; | ||
2434 | |||
2435 | vm->mm = mm; | ||
2436 | |||
2437 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; | ||
2438 | |||
2439 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); | ||
2440 | |||
2441 | vm->va_start = mm->pde_stride * 1; | ||
2442 | vm->va_limit = mm->bar1.aperture_size; | ||
2443 | |||
2444 | { | ||
2445 | u32 pde_lo, pde_hi; | ||
2446 | pde_range_from_vaddr_range(vm, | ||
2447 | 0, vm->va_limit-1, | ||
2448 | &pde_lo, &pde_hi); | ||
2449 | vm->pdes.num_pdes = pde_hi + 1; | ||
2450 | } | ||
2451 | |||
2452 | /* bar1 is likely only to ever use/need small page sizes. */ | ||
2453 | /* But just in case, for now... arrange for both.*/ | ||
2454 | vm->pdes.ptes[gmmu_page_size_small] = | ||
2455 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2456 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2457 | |||
2458 | vm->pdes.ptes[gmmu_page_size_big] = | ||
2459 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2460 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2461 | |||
2462 | if (!(vm->pdes.ptes[gmmu_page_size_small] && | ||
2463 | vm->pdes.ptes[gmmu_page_size_big])) | ||
2464 | return -ENOMEM; | ||
2465 | |||
2466 | gk20a_dbg_info("init space for bar1 va_limit=0x%llx num_pdes=%d", | ||
2467 | vm->va_limit, vm->pdes.num_pdes); | ||
2468 | |||
2469 | |||
2470 | /* allocate the page table directory */ | ||
2471 | err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, | ||
2472 | &vm->pdes.sgt, &vm->pdes.size); | ||
2473 | if (err) | ||
2474 | goto clean_up; | ||
2475 | |||
2476 | err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, | ||
2477 | vm->pdes.size); | ||
2478 | if (err) { | ||
2479 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | ||
2480 | vm->pdes.size); | ||
2481 | goto clean_up; | ||
2482 | } | ||
2483 | gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx", | ||
2484 | vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); | ||
2485 | /* we could release vm->pdes.kv but it's only one page... */ | ||
2486 | |||
2487 | pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); | ||
2488 | pde_addr_lo = u64_lo32(pde_addr >> 12); | ||
2489 | pde_addr_hi = u64_hi32(pde_addr); | ||
2490 | |||
2491 | gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x", | ||
2492 | (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl), | ||
2493 | pde_addr_lo, pde_addr_hi); | ||
2494 | |||
2495 | /* allocate instance mem for bar1 */ | ||
2496 | inst_block->size = ram_in_alloc_size_v(); | ||
2497 | inst_block->cpuva = dma_alloc_coherent(d, inst_block->size, | ||
2498 | &iova, GFP_KERNEL); | ||
2499 | if (!inst_block->cpuva) { | ||
2500 | gk20a_err(d, "%s: memory allocation failed\n", __func__); | ||
2501 | err = -ENOMEM; | ||
2502 | goto clean_up; | ||
2503 | } | ||
2504 | |||
2505 | inst_block->iova = iova; | ||
2506 | inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova); | ||
2507 | if (!inst_block->cpu_pa) { | ||
2508 | gk20a_err(d, "%s: failed to get phys address\n", __func__); | ||
2509 | err = -ENOMEM; | ||
2510 | goto clean_up; | ||
2511 | } | ||
2512 | |||
2513 | inst_pa = inst_block->cpu_pa; | ||
2514 | inst_ptr = inst_block->cpuva; | ||
2515 | |||
2516 | gk20a_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p", | ||
2517 | (u64)inst_pa, inst_ptr); | ||
2518 | |||
2519 | memset(inst_ptr, 0, ram_fc_size_val_v()); | ||
2520 | |||
2521 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | ||
2522 | ram_in_page_dir_base_target_vid_mem_f() | | ||
2523 | ram_in_page_dir_base_vol_true_f() | | ||
2524 | ram_in_page_dir_base_lo_f(pde_addr_lo)); | ||
2525 | |||
2526 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | ||
2527 | ram_in_page_dir_base_hi_f(pde_addr_hi)); | ||
2528 | |||
2529 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), | ||
2530 | u64_lo32(vm->va_limit) | 0xFFF); | ||
2531 | |||
2532 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), | ||
2533 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); | ||
2534 | |||
2535 | gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa); | ||
2536 | gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_bar1", | ||
2537 | 1,/*start*/ | ||
2538 | (vm->va_limit >> 12) - 1 /* length*/, | ||
2539 | 1); /* align */ | ||
2540 | /* initialize just in case we try to use it anyway */ | ||
2541 | gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_bar1-unused", | ||
2542 | 0x0badc0de, /* start */ | ||
2543 | 1, /* length */ | ||
2544 | 1); /* align */ | ||
2545 | |||
2546 | vm->mapped_buffers = RB_ROOT; | ||
2547 | |||
2548 | mutex_init(&vm->update_gmmu_lock); | ||
2549 | kref_init(&vm->ref); | ||
2550 | INIT_LIST_HEAD(&vm->reserved_va_list); | ||
2551 | |||
2552 | return 0; | ||
2553 | |||
2554 | clean_up: | ||
2555 | /* free, etc */ | ||
2556 | if (inst_block->cpuva) | ||
2557 | dma_free_coherent(d, inst_block->size, | ||
2558 | inst_block->cpuva, inst_block->iova); | ||
2559 | inst_block->cpuva = NULL; | ||
2560 | inst_block->iova = 0; | ||
2561 | return err; | ||
2562 | } | ||
2563 | |||
2564 | /* pmu vm, share channel_vm interfaces */ | ||
2565 | int gk20a_init_pmu_vm(struct mm_gk20a *mm) | ||
2566 | { | ||
2567 | int err; | ||
2568 | phys_addr_t inst_pa; | ||
2569 | void *inst_ptr; | ||
2570 | struct vm_gk20a *vm = &mm->pmu.vm; | ||
2571 | struct gk20a *g = gk20a_from_mm(mm); | ||
2572 | struct device *d = dev_from_gk20a(g); | ||
2573 | struct inst_desc *inst_block = &mm->pmu.inst_block; | ||
2574 | u64 pde_addr; | ||
2575 | u32 pde_addr_lo; | ||
2576 | u32 pde_addr_hi; | ||
2577 | dma_addr_t iova; | ||
2578 | |||
2579 | vm->mm = mm; | ||
2580 | |||
2581 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; | ||
2582 | |||
2583 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); | ||
2584 | |||
2585 | vm->va_start = GK20A_PMU_VA_START; | ||
2586 | vm->va_limit = vm->va_start + mm->pmu.aperture_size; | ||
2587 | |||
2588 | { | ||
2589 | u32 pde_lo, pde_hi; | ||
2590 | pde_range_from_vaddr_range(vm, | ||
2591 | 0, vm->va_limit-1, | ||
2592 | &pde_lo, &pde_hi); | ||
2593 | vm->pdes.num_pdes = pde_hi + 1; | ||
2594 | } | ||
2595 | |||
2596 | /* The pmu is likely only to ever use/need small page sizes. */ | ||
2597 | /* But just in case, for now... arrange for both.*/ | ||
2598 | vm->pdes.ptes[gmmu_page_size_small] = | ||
2599 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2600 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2601 | |||
2602 | vm->pdes.ptes[gmmu_page_size_big] = | ||
2603 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2604 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2605 | |||
2606 | if (!(vm->pdes.ptes[gmmu_page_size_small] && | ||
2607 | vm->pdes.ptes[gmmu_page_size_big])) | ||
2608 | return -ENOMEM; | ||
2609 | |||
2610 | gk20a_dbg_info("init space for pmu va_limit=0x%llx num_pdes=%d", | ||
2611 | vm->va_limit, vm->pdes.num_pdes); | ||
2612 | |||
2613 | /* allocate the page table directory */ | ||
2614 | err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, | ||
2615 | &vm->pdes.sgt, &vm->pdes.size); | ||
2616 | if (err) | ||
2617 | goto clean_up; | ||
2618 | |||
2619 | err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, | ||
2620 | vm->pdes.size); | ||
2621 | if (err) { | ||
2622 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | ||
2623 | vm->pdes.size); | ||
2624 | goto clean_up; | ||
2625 | } | ||
2626 | gk20a_dbg_info("pmu pdes phys @ 0x%llx", | ||
2627 | (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); | ||
2628 | /* we could release vm->pdes.kv but it's only one page... */ | ||
2629 | |||
2630 | pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); | ||
2631 | pde_addr_lo = u64_lo32(pde_addr >> 12); | ||
2632 | pde_addr_hi = u64_hi32(pde_addr); | ||
2633 | |||
2634 | gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x", | ||
2635 | (u64)pde_addr, pde_addr_lo, pde_addr_hi); | ||
2636 | |||
2637 | /* allocate instance mem for pmu */ | ||
2638 | inst_block->size = GK20A_PMU_INST_SIZE; | ||
2639 | inst_block->cpuva = dma_alloc_coherent(d, inst_block->size, | ||
2640 | &iova, GFP_KERNEL); | ||
2641 | if (!inst_block->cpuva) { | ||
2642 | gk20a_err(d, "%s: memory allocation failed\n", __func__); | ||
2643 | err = -ENOMEM; | ||
2644 | goto clean_up; | ||
2645 | } | ||
2646 | |||
2647 | inst_block->iova = iova; | ||
2648 | inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova); | ||
2649 | if (!inst_block->cpu_pa) { | ||
2650 | gk20a_err(d, "%s: failed to get phys address\n", __func__); | ||
2651 | err = -ENOMEM; | ||
2652 | goto clean_up; | ||
2653 | } | ||
2654 | |||
2655 | inst_pa = inst_block->cpu_pa; | ||
2656 | inst_ptr = inst_block->cpuva; | ||
2657 | |||
2658 | gk20a_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa); | ||
2659 | |||
2660 | memset(inst_ptr, 0, GK20A_PMU_INST_SIZE); | ||
2661 | |||
2662 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | ||
2663 | ram_in_page_dir_base_target_vid_mem_f() | | ||
2664 | ram_in_page_dir_base_vol_true_f() | | ||
2665 | ram_in_page_dir_base_lo_f(pde_addr_lo)); | ||
2666 | |||
2667 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | ||
2668 | ram_in_page_dir_base_hi_f(pde_addr_hi)); | ||
2669 | |||
2670 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), | ||
2671 | u64_lo32(vm->va_limit) | 0xFFF); | ||
2672 | |||
2673 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), | ||
2674 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); | ||
2675 | |||
2676 | gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_pmu", | ||
2677 | (vm->va_start >> 12), /* start */ | ||
2678 | (vm->va_limit - vm->va_start) >> 12, /*length*/ | ||
2679 | 1); /* align */ | ||
2680 | /* initialize just in case we try to use it anyway */ | ||
2681 | gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_pmu-unused", | ||
2682 | 0x0badc0de, /* start */ | ||
2683 | 1, /* length */ | ||
2684 | 1); /* align */ | ||
2685 | |||
2686 | |||
2687 | vm->mapped_buffers = RB_ROOT; | ||
2688 | |||
2689 | mutex_init(&vm->update_gmmu_lock); | ||
2690 | kref_init(&vm->ref); | ||
2691 | INIT_LIST_HEAD(&vm->reserved_va_list); | ||
2692 | |||
2693 | return 0; | ||
2694 | |||
2695 | clean_up: | ||
2696 | /* free, etc */ | ||
2697 | if (inst_block->cpuva) | ||
2698 | dma_free_coherent(d, inst_block->size, | ||
2699 | inst_block->cpuva, inst_block->iova); | ||
2700 | inst_block->cpuva = NULL; | ||
2701 | inst_block->iova = 0; | ||
2702 | return err; | ||
2703 | } | ||
2704 | |||
2705 | void gk20a_mm_fb_flush(struct gk20a *g) | ||
2706 | { | ||
2707 | struct mm_gk20a *mm = &g->mm; | ||
2708 | u32 data; | ||
2709 | s32 retry = 100; | ||
2710 | |||
2711 | gk20a_dbg_fn(""); | ||
2712 | |||
2713 | mutex_lock(&mm->l2_op_lock); | ||
2714 | |||
2715 | g->ops.ltc.elpg_flush(g); | ||
2716 | |||
2717 | /* Make sure all previous writes are committed to the L2. There's no | ||
2718 | guarantee that writes are to DRAM. This will be a sysmembar internal | ||
2719 | to the L2. */ | ||
2720 | gk20a_writel(g, flush_fb_flush_r(), | ||
2721 | flush_fb_flush_pending_busy_f()); | ||
2722 | |||
2723 | do { | ||
2724 | data = gk20a_readl(g, flush_fb_flush_r()); | ||
2725 | |||
2726 | if (flush_fb_flush_outstanding_v(data) == | ||
2727 | flush_fb_flush_outstanding_true_v() || | ||
2728 | flush_fb_flush_pending_v(data) == | ||
2729 | flush_fb_flush_pending_busy_v()) { | ||
2730 | gk20a_dbg_info("fb_flush 0x%x", data); | ||
2731 | retry--; | ||
2732 | usleep_range(20, 40); | ||
2733 | } else | ||
2734 | break; | ||
2735 | } while (retry >= 0 || !tegra_platform_is_silicon()); | ||
2736 | |||
2737 | if (retry < 0) | ||
2738 | gk20a_warn(dev_from_gk20a(g), | ||
2739 | "fb_flush too many retries"); | ||
2740 | |||
2741 | mutex_unlock(&mm->l2_op_lock); | ||
2742 | } | ||
2743 | |||
2744 | static void gk20a_mm_l2_invalidate_locked(struct gk20a *g) | ||
2745 | { | ||
2746 | u32 data; | ||
2747 | s32 retry = 200; | ||
2748 | |||
2749 | /* Invalidate any clean lines from the L2 so subsequent reads go to | ||
2750 | DRAM. Dirty lines are not affected by this operation. */ | ||
2751 | gk20a_writel(g, flush_l2_system_invalidate_r(), | ||
2752 | flush_l2_system_invalidate_pending_busy_f()); | ||
2753 | |||
2754 | do { | ||
2755 | data = gk20a_readl(g, flush_l2_system_invalidate_r()); | ||
2756 | |||
2757 | if (flush_l2_system_invalidate_outstanding_v(data) == | ||
2758 | flush_l2_system_invalidate_outstanding_true_v() || | ||
2759 | flush_l2_system_invalidate_pending_v(data) == | ||
2760 | flush_l2_system_invalidate_pending_busy_v()) { | ||
2761 | gk20a_dbg_info("l2_system_invalidate 0x%x", | ||
2762 | data); | ||
2763 | retry--; | ||
2764 | usleep_range(20, 40); | ||
2765 | } else | ||
2766 | break; | ||
2767 | } while (retry >= 0 || !tegra_platform_is_silicon()); | ||
2768 | |||
2769 | if (retry < 0) | ||
2770 | gk20a_warn(dev_from_gk20a(g), | ||
2771 | "l2_system_invalidate too many retries"); | ||
2772 | } | ||
2773 | |||
2774 | void gk20a_mm_l2_invalidate(struct gk20a *g) | ||
2775 | { | ||
2776 | struct mm_gk20a *mm = &g->mm; | ||
2777 | mutex_lock(&mm->l2_op_lock); | ||
2778 | gk20a_mm_l2_invalidate_locked(g); | ||
2779 | mutex_unlock(&mm->l2_op_lock); | ||
2780 | } | ||
2781 | |||
2782 | void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate) | ||
2783 | { | ||
2784 | struct mm_gk20a *mm = &g->mm; | ||
2785 | u32 data; | ||
2786 | s32 retry = 200; | ||
2787 | |||
2788 | gk20a_dbg_fn(""); | ||
2789 | |||
2790 | mutex_lock(&mm->l2_op_lock); | ||
2791 | |||
2792 | /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2 | ||
2793 | as clean, so subsequent reads might hit in the L2. */ | ||
2794 | gk20a_writel(g, flush_l2_flush_dirty_r(), | ||
2795 | flush_l2_flush_dirty_pending_busy_f()); | ||
2796 | |||
2797 | do { | ||
2798 | data = gk20a_readl(g, flush_l2_flush_dirty_r()); | ||
2799 | |||
2800 | if (flush_l2_flush_dirty_outstanding_v(data) == | ||
2801 | flush_l2_flush_dirty_outstanding_true_v() || | ||
2802 | flush_l2_flush_dirty_pending_v(data) == | ||
2803 | flush_l2_flush_dirty_pending_busy_v()) { | ||
2804 | gk20a_dbg_info("l2_flush_dirty 0x%x", data); | ||
2805 | retry--; | ||
2806 | usleep_range(20, 40); | ||
2807 | } else | ||
2808 | break; | ||
2809 | } while (retry >= 0 || !tegra_platform_is_silicon()); | ||
2810 | |||
2811 | if (retry < 0) | ||
2812 | gk20a_warn(dev_from_gk20a(g), | ||
2813 | "l2_flush_dirty too many retries"); | ||
2814 | |||
2815 | if (invalidate) | ||
2816 | gk20a_mm_l2_invalidate_locked(g); | ||
2817 | |||
2818 | mutex_unlock(&mm->l2_op_lock); | ||
2819 | } | ||
2820 | |||
2821 | |||
2822 | int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, | ||
2823 | struct dma_buf **dmabuf, | ||
2824 | u64 *offset) | ||
2825 | { | ||
2826 | struct mapped_buffer_node *mapped_buffer; | ||
2827 | |||
2828 | gk20a_dbg_fn("gpu_va=0x%llx", gpu_va); | ||
2829 | |||
2830 | mutex_lock(&vm->update_gmmu_lock); | ||
2831 | |||
2832 | mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers, | ||
2833 | gpu_va); | ||
2834 | if (!mapped_buffer) { | ||
2835 | mutex_unlock(&vm->update_gmmu_lock); | ||
2836 | return -EINVAL; | ||
2837 | } | ||
2838 | |||
2839 | *dmabuf = mapped_buffer->dmabuf; | ||
2840 | *offset = gpu_va - mapped_buffer->addr; | ||
2841 | |||
2842 | mutex_unlock(&vm->update_gmmu_lock); | ||
2843 | |||
2844 | return 0; | ||
2845 | } | ||
2846 | |||
2847 | void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) | ||
2848 | { | ||
2849 | struct mm_gk20a *mm = vm->mm; | ||
2850 | struct gk20a *g = gk20a_from_vm(vm); | ||
2851 | u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12); | ||
2852 | u32 data; | ||
2853 | s32 retry = 200; | ||
2854 | |||
2855 | gk20a_dbg_fn(""); | ||
2856 | |||
2857 | /* pagetables are considered sw states which are preserved after | ||
2858 | prepare_poweroff. When gk20a deinit releases those pagetables, | ||
2859 | common code in vm unmap path calls tlb invalidate that touches | ||
2860 | hw. Use the power_on flag to skip tlb invalidation when gpu | ||
2861 | power is turned off */ | ||
2862 | |||
2863 | if (!g->power_on) | ||
2864 | return; | ||
2865 | |||
2866 | /* No need to invalidate if tlb is clean */ | ||
2867 | mutex_lock(&vm->update_gmmu_lock); | ||
2868 | if (!vm->tlb_dirty) { | ||
2869 | mutex_unlock(&vm->update_gmmu_lock); | ||
2870 | return; | ||
2871 | } | ||
2872 | vm->tlb_dirty = false; | ||
2873 | mutex_unlock(&vm->update_gmmu_lock); | ||
2874 | |||
2875 | mutex_lock(&mm->tlb_lock); | ||
2876 | do { | ||
2877 | data = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
2878 | if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0) | ||
2879 | break; | ||
2880 | usleep_range(20, 40); | ||
2881 | retry--; | ||
2882 | } while (retry >= 0 || !tegra_platform_is_silicon()); | ||
2883 | |||
2884 | if (retry < 0) | ||
2885 | gk20a_warn(dev_from_gk20a(g), | ||
2886 | "wait mmu fifo space too many retries"); | ||
2887 | |||
2888 | gk20a_writel(g, fb_mmu_invalidate_pdb_r(), | ||
2889 | fb_mmu_invalidate_pdb_addr_f(addr_lo) | | ||
2890 | fb_mmu_invalidate_pdb_aperture_vid_mem_f()); | ||
2891 | |||
2892 | /* this is a sledgehammer, it would seem */ | ||
2893 | gk20a_writel(g, fb_mmu_invalidate_r(), | ||
2894 | fb_mmu_invalidate_all_pdb_true_f() | | ||
2895 | fb_mmu_invalidate_all_va_true_f() | | ||
2896 | fb_mmu_invalidate_trigger_true_f()); | ||
2897 | |||
2898 | do { | ||
2899 | data = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
2900 | if (fb_mmu_ctrl_pri_fifo_empty_v(data) != | ||
2901 | fb_mmu_ctrl_pri_fifo_empty_false_f()) | ||
2902 | break; | ||
2903 | retry--; | ||
2904 | usleep_range(20, 40); | ||
2905 | } while (retry >= 0 || !tegra_platform_is_silicon()); | ||
2906 | |||
2907 | if (retry < 0) | ||
2908 | gk20a_warn(dev_from_gk20a(g), | ||
2909 | "mmu invalidate too many retries"); | ||
2910 | |||
2911 | mutex_unlock(&mm->tlb_lock); | ||
2912 | } | ||
2913 | |||
2914 | int gk20a_mm_suspend(struct gk20a *g) | ||
2915 | { | ||
2916 | gk20a_dbg_fn(""); | ||
2917 | |||
2918 | gk20a_mm_fb_flush(g); | ||
2919 | gk20a_mm_l2_flush(g, true); | ||
2920 | |||
2921 | gk20a_dbg_fn("done"); | ||
2922 | return 0; | ||
2923 | } | ||
2924 | |||
2925 | void gk20a_mm_ltc_isr(struct gk20a *g) | ||
2926 | { | ||
2927 | u32 intr; | ||
2928 | |||
2929 | intr = gk20a_readl(g, ltc_ltc0_ltss_intr_r()); | ||
2930 | gk20a_err(dev_from_gk20a(g), "ltc: %08x\n", intr); | ||
2931 | gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr); | ||
2932 | } | ||
2933 | |||
2934 | bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g) | ||
2935 | { | ||
2936 | u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r()); | ||
2937 | return fb_mmu_debug_ctrl_debug_v(debug_ctrl) == | ||
2938 | fb_mmu_debug_ctrl_debug_enabled_v(); | ||
2939 | } | ||
2940 | |||
2941 | static int gk20a_mm_mmu_vpr_info_fetch_wait(struct gk20a *g, | ||
2942 | const unsigned int msec) | ||
2943 | { | ||
2944 | unsigned long timeout; | ||
2945 | |||
2946 | timeout = jiffies + msecs_to_jiffies(msec); | ||
2947 | while (1) { | ||
2948 | u32 val; | ||
2949 | |||
2950 | val = gk20a_readl(g, fb_mmu_vpr_info_r()); | ||
2951 | if (fb_mmu_vpr_info_fetch_v(val) == | ||
2952 | fb_mmu_vpr_info_fetch_false_v()) | ||
2953 | break; | ||
2954 | |||
2955 | if (tegra_platform_is_silicon() && | ||
2956 | WARN_ON(time_after(jiffies, timeout))) | ||
2957 | return -ETIME; | ||
2958 | } | ||
2959 | |||
2960 | return 0; | ||
2961 | } | ||
2962 | |||
2963 | int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g) | ||
2964 | { | ||
2965 | int ret = 0; | ||
2966 | |||
2967 | gk20a_busy_noresume(g->dev); | ||
2968 | if (!pm_runtime_active(&g->dev->dev)) | ||
2969 | goto fail; | ||
2970 | |||
2971 | if (gk20a_mm_mmu_vpr_info_fetch_wait(g, 5)) { | ||
2972 | ret = -ETIME; | ||
2973 | goto fail; | ||
2974 | } | ||
2975 | |||
2976 | gk20a_writel(g, fb_mmu_vpr_info_r(), | ||
2977 | fb_mmu_vpr_info_fetch_true_v()); | ||
2978 | |||
2979 | ret = gk20a_mm_mmu_vpr_info_fetch_wait(g, 5); | ||
2980 | |||
2981 | fail: | ||
2982 | gk20a_idle(g->dev); | ||
2983 | return ret; | ||
2984 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h new file mode 100644 index 00000000..23d15c23 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -0,0 +1,464 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/mm_gk20a.h | ||
3 | * | ||
4 | * GK20A memory management | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | #ifndef __MM_GK20A_H__ | ||
22 | #define __MM_GK20A_H__ | ||
23 | |||
24 | #include <linux/scatterlist.h> | ||
25 | #include <linux/dma-attrs.h> | ||
26 | #include <linux/iommu.h> | ||
27 | #include <asm/dma-iommu.h> | ||
28 | #include "gk20a_allocator.h" | ||
29 | |||
30 | /* This "address bit" in the gmmu ptes (and other gk20a accesses) | ||
31 | * signals the address as presented should be translated by the SMMU. | ||
32 | * Without this bit present gk20a accesses are *not* translated. | ||
33 | */ | ||
34 | /* Hack, get this from manuals somehow... */ | ||
35 | #define NV_MC_SMMU_VADDR_TRANSLATION_BIT 34 | ||
36 | #define NV_MC_SMMU_VADDR_TRANSLATE(x) (x | \ | ||
37 | (1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT)) | ||
38 | |||
39 | /* For now keep the size relatively small-ish compared to the full | ||
40 | * 40b va. 32GB for now. It consists of two 16GB spaces. */ | ||
41 | #define NV_GMMU_VA_RANGE 35ULL | ||
42 | #define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1))) | ||
43 | |||
44 | struct mem_desc { | ||
45 | struct dma_buf *ref; | ||
46 | struct sg_table *sgt; | ||
47 | u32 size; | ||
48 | }; | ||
49 | |||
50 | struct mem_desc_sub { | ||
51 | u32 offset; | ||
52 | u32 size; | ||
53 | }; | ||
54 | |||
55 | struct gpfifo_desc { | ||
56 | size_t size; | ||
57 | u32 entry_num; | ||
58 | |||
59 | u32 get; | ||
60 | u32 put; | ||
61 | |||
62 | bool wrap; | ||
63 | |||
64 | u64 iova; | ||
65 | struct gpfifo *cpu_va; | ||
66 | u64 gpu_va; | ||
67 | }; | ||
68 | |||
69 | struct mmu_desc { | ||
70 | void *cpuva; | ||
71 | u64 iova; | ||
72 | size_t size; | ||
73 | }; | ||
74 | |||
75 | struct inst_desc { | ||
76 | u64 iova; | ||
77 | void *cpuva; | ||
78 | phys_addr_t cpu_pa; | ||
79 | size_t size; | ||
80 | }; | ||
81 | |||
82 | struct surface_mem_desc { | ||
83 | u64 iova; | ||
84 | void *cpuva; | ||
85 | struct sg_table *sgt; | ||
86 | size_t size; | ||
87 | }; | ||
88 | |||
89 | struct userd_desc { | ||
90 | struct sg_table *sgt; | ||
91 | u64 iova; | ||
92 | void *cpuva; | ||
93 | size_t size; | ||
94 | u64 gpu_va; | ||
95 | }; | ||
96 | |||
97 | struct runlist_mem_desc { | ||
98 | u64 iova; | ||
99 | void *cpuva; | ||
100 | size_t size; | ||
101 | }; | ||
102 | |||
103 | struct patch_desc { | ||
104 | struct page **pages; | ||
105 | u64 iova; | ||
106 | size_t size; | ||
107 | void *cpu_va; | ||
108 | u64 gpu_va; | ||
109 | u32 data_count; | ||
110 | }; | ||
111 | |||
112 | struct pmu_mem_desc { | ||
113 | void *cpuva; | ||
114 | u64 iova; | ||
115 | u64 pmu_va; | ||
116 | size_t size; | ||
117 | }; | ||
118 | |||
119 | struct priv_cmd_queue_mem_desc { | ||
120 | u64 base_iova; | ||
121 | u32 *base_cpuva; | ||
122 | size_t size; | ||
123 | }; | ||
124 | |||
125 | struct zcull_ctx_desc { | ||
126 | struct mem_desc mem; | ||
127 | u64 gpu_va; | ||
128 | u32 ctx_attr; | ||
129 | u32 ctx_sw_mode; | ||
130 | }; | ||
131 | |||
132 | struct pm_ctx_desc { | ||
133 | struct mem_desc mem; | ||
134 | u64 gpu_va; | ||
135 | u32 ctx_attr; | ||
136 | u32 ctx_sw_mode; | ||
137 | }; | ||
138 | |||
139 | struct gr_ctx_buffer_desc; | ||
140 | struct platform_device; | ||
141 | struct gr_ctx_buffer_desc { | ||
142 | void (*destroy)(struct platform_device *, struct gr_ctx_buffer_desc *); | ||
143 | struct sg_table *sgt; | ||
144 | struct page **pages; | ||
145 | size_t size; | ||
146 | u64 iova; | ||
147 | struct dma_attrs attrs; | ||
148 | void *priv; | ||
149 | }; | ||
150 | |||
151 | struct gr_ctx_desc { | ||
152 | struct page **pages; | ||
153 | u64 iova; | ||
154 | size_t size; | ||
155 | u64 gpu_va; | ||
156 | }; | ||
157 | |||
158 | struct compbit_store_desc { | ||
159 | struct pages **pages; | ||
160 | size_t size; | ||
161 | u64 base_iova; | ||
162 | }; | ||
163 | |||
164 | struct page_table_gk20a { | ||
165 | /* backing for */ | ||
166 | /* Either a *page or a *mem_handle */ | ||
167 | void *ref; | ||
168 | /* track mapping cnt on this page table */ | ||
169 | u32 ref_cnt; | ||
170 | struct sg_table *sgt; | ||
171 | size_t size; | ||
172 | }; | ||
173 | |||
174 | #ifndef _NVHOST_MEM_MGR_H | ||
175 | enum gk20a_mem_rw_flag { | ||
176 | gk20a_mem_flag_none = 0, | ||
177 | gk20a_mem_flag_read_only = 1, | ||
178 | gk20a_mem_flag_write_only = 2, | ||
179 | }; | ||
180 | #endif | ||
181 | |||
182 | enum gmmu_pgsz_gk20a { | ||
183 | gmmu_page_size_small = 0, | ||
184 | gmmu_page_size_big = 1, | ||
185 | gmmu_nr_page_sizes = 2 | ||
186 | }; | ||
187 | |||
188 | |||
189 | struct page_directory_gk20a { | ||
190 | /* backing for */ | ||
191 | u32 num_pdes; | ||
192 | void *kv; | ||
193 | /* Either a *page or a *mem_handle */ | ||
194 | void *ref; | ||
195 | struct sg_table *sgt; | ||
196 | size_t size; | ||
197 | struct page_table_gk20a *ptes[gmmu_nr_page_sizes]; | ||
198 | }; | ||
199 | |||
200 | struct priv_cmd_queue { | ||
201 | struct priv_cmd_queue_mem_desc mem; | ||
202 | u64 base_gpuva; /* gpu_va base */ | ||
203 | u16 size; /* num of entries in words */ | ||
204 | u16 put; /* put for priv cmd queue */ | ||
205 | u16 get; /* get for priv cmd queue */ | ||
206 | struct list_head free; /* list of pre-allocated free entries */ | ||
207 | struct list_head head; /* list of used entries */ | ||
208 | }; | ||
209 | |||
210 | struct priv_cmd_entry { | ||
211 | u32 *ptr; | ||
212 | u64 gva; | ||
213 | u16 get; /* start of entry in queue */ | ||
214 | u16 size; /* in words */ | ||
215 | u32 gp_get; /* gp_get when submitting last priv cmd */ | ||
216 | u32 gp_put; /* gp_put when submitting last priv cmd */ | ||
217 | u32 gp_wrap; /* wrap when submitting last priv cmd */ | ||
218 | bool pre_alloc; /* prealloc entry, free to free list */ | ||
219 | struct list_head list; /* node for lists */ | ||
220 | }; | ||
221 | |||
222 | struct mapped_buffer_node { | ||
223 | struct vm_gk20a *vm; | ||
224 | struct rb_node node; | ||
225 | struct list_head unmap_list; | ||
226 | struct list_head va_buffers_list; | ||
227 | struct vm_reserved_va_node *va_node; | ||
228 | u64 addr; | ||
229 | u64 size; | ||
230 | struct dma_buf *dmabuf; | ||
231 | struct sg_table *sgt; | ||
232 | struct kref ref; | ||
233 | u32 user_mapped; | ||
234 | bool own_mem_ref; | ||
235 | u32 pgsz_idx; | ||
236 | u32 ctag_offset; | ||
237 | u32 ctag_lines; | ||
238 | u32 flags; | ||
239 | u32 kind; | ||
240 | bool va_allocated; | ||
241 | }; | ||
242 | |||
243 | struct vm_reserved_va_node { | ||
244 | struct list_head reserved_va_list; | ||
245 | struct list_head va_buffers_list; | ||
246 | u32 pgsz_idx; | ||
247 | u64 vaddr_start; | ||
248 | u64 size; | ||
249 | bool sparse; | ||
250 | }; | ||
251 | |||
252 | struct vm_gk20a { | ||
253 | struct mm_gk20a *mm; | ||
254 | struct gk20a_as_share *as_share; /* as_share this represents */ | ||
255 | |||
256 | u64 va_start; | ||
257 | u64 va_limit; | ||
258 | |||
259 | int num_user_mapped_buffers; | ||
260 | |||
261 | bool big_pages; /* enable large page support */ | ||
262 | bool enable_ctag; | ||
263 | bool tlb_dirty; | ||
264 | bool mapped; | ||
265 | |||
266 | struct kref ref; | ||
267 | |||
268 | struct mutex update_gmmu_lock; | ||
269 | |||
270 | struct page_directory_gk20a pdes; | ||
271 | |||
272 | struct gk20a_allocator vma[gmmu_nr_page_sizes]; | ||
273 | struct rb_root mapped_buffers; | ||
274 | |||
275 | struct list_head reserved_va_list; | ||
276 | |||
277 | dma_addr_t zero_page_iova; | ||
278 | void *zero_page_cpuva; | ||
279 | struct sg_table *zero_page_sgt; | ||
280 | }; | ||
281 | |||
282 | struct gk20a; | ||
283 | struct channel_gk20a; | ||
284 | |||
285 | int gk20a_init_mm_support(struct gk20a *g); | ||
286 | int gk20a_init_mm_setup_sw(struct gk20a *g); | ||
287 | int gk20a_init_bar1_vm(struct mm_gk20a *mm); | ||
288 | int gk20a_init_pmu_vm(struct mm_gk20a *mm); | ||
289 | |||
290 | void gk20a_mm_fb_flush(struct gk20a *g); | ||
291 | void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); | ||
292 | void gk20a_mm_l2_invalidate(struct gk20a *g); | ||
293 | |||
294 | struct mm_gk20a { | ||
295 | struct gk20a *g; | ||
296 | |||
297 | u32 compression_page_size; | ||
298 | u32 big_page_size; | ||
299 | u32 pde_stride; | ||
300 | u32 pde_stride_shift; | ||
301 | |||
302 | struct { | ||
303 | u32 order; | ||
304 | u32 num_ptes; | ||
305 | } page_table_sizing[gmmu_nr_page_sizes]; | ||
306 | |||
307 | |||
308 | struct { | ||
309 | u64 size; | ||
310 | } channel; | ||
311 | |||
312 | struct { | ||
313 | u32 aperture_size; | ||
314 | struct vm_gk20a vm; | ||
315 | struct inst_desc inst_block; | ||
316 | } bar1; | ||
317 | |||
318 | struct { | ||
319 | u32 aperture_size; | ||
320 | struct vm_gk20a vm; | ||
321 | struct inst_desc inst_block; | ||
322 | } pmu; | ||
323 | |||
324 | struct mutex tlb_lock; | ||
325 | struct mutex l2_op_lock; | ||
326 | |||
327 | void (*remove_support)(struct mm_gk20a *mm); | ||
328 | bool sw_ready; | ||
329 | #ifdef CONFIG_DEBUG_FS | ||
330 | u32 ltc_enabled; | ||
331 | u32 ltc_enabled_debug; | ||
332 | #endif | ||
333 | }; | ||
334 | |||
335 | int gk20a_mm_init(struct mm_gk20a *mm); | ||
336 | |||
337 | #define gk20a_from_mm(mm) ((mm)->g) | ||
338 | #define gk20a_from_vm(vm) ((vm)->mm->g) | ||
339 | |||
340 | #define dev_from_vm(vm) dev_from_gk20a(vm->mm->g) | ||
341 | |||
342 | #define DEFAULT_ALLOC_ALIGNMENT (4*1024) | ||
343 | |||
344 | static inline int bar1_aperture_size_mb_gk20a(void) | ||
345 | { | ||
346 | return 128; /*TBD read this from fuses?*/ | ||
347 | } | ||
348 | /* max address bits */ | ||
349 | static inline int max_physaddr_bits_gk20a(void) | ||
350 | { | ||
351 | return 40;/*"old" sys physaddr, meaningful? */ | ||
352 | } | ||
353 | static inline int max_vid_physaddr_bits_gk20a(void) | ||
354 | { | ||
355 | /* "vid phys" is asid/smmu phys?, | ||
356 | * i.e. is this the real sys physaddr? */ | ||
357 | return 37; | ||
358 | } | ||
359 | static inline int max_vaddr_bits_gk20a(void) | ||
360 | { | ||
361 | return 40; /* chopped for area? */ | ||
362 | } | ||
363 | |||
364 | #if 0 /*related to addr bits above, concern below TBD on which is accurate */ | ||
365 | #define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ | ||
366 | bus_bar1_block_ptr_s()) | ||
367 | #else | ||
368 | #define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v() | ||
369 | #endif | ||
370 | |||
371 | void gk20a_mm_dump_vm(struct vm_gk20a *vm, | ||
372 | u64 va_begin, u64 va_end, char *label); | ||
373 | |||
374 | int gk20a_mm_suspend(struct gk20a *g); | ||
375 | |||
376 | phys_addr_t gk20a_get_phys_from_iova(struct device *d, | ||
377 | u64 dma_addr); | ||
378 | |||
379 | int gk20a_get_sgtable(struct device *d, struct sg_table **sgt, | ||
380 | void *cpuva, u64 iova, | ||
381 | size_t size); | ||
382 | |||
383 | int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt, | ||
384 | struct page **pages, u64 iova, | ||
385 | size_t size); | ||
386 | |||
387 | void gk20a_free_sgtable(struct sg_table **sgt); | ||
388 | |||
389 | u64 gk20a_mm_iova_addr(struct scatterlist *sgl); | ||
390 | |||
391 | void gk20a_mm_ltc_isr(struct gk20a *g); | ||
392 | |||
393 | bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g); | ||
394 | |||
395 | int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g); | ||
396 | |||
397 | u64 gk20a_gmmu_map(struct vm_gk20a *vm, | ||
398 | struct sg_table **sgt, | ||
399 | u64 size, | ||
400 | u32 flags, | ||
401 | int rw_flag); | ||
402 | |||
403 | void gk20a_gmmu_unmap(struct vm_gk20a *vm, | ||
404 | u64 vaddr, | ||
405 | u64 size, | ||
406 | int rw_flag); | ||
407 | |||
408 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); | ||
409 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | ||
410 | struct sg_table *sgt); | ||
411 | |||
412 | u64 gk20a_vm_map(struct vm_gk20a *vm, | ||
413 | struct dma_buf *dmabuf, | ||
414 | u64 offset_align, | ||
415 | u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/, | ||
416 | int kind, | ||
417 | struct sg_table **sgt, | ||
418 | bool user_mapped, | ||
419 | int rw_flag); | ||
420 | |||
421 | /* unmap handle from kernel */ | ||
422 | void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); | ||
423 | |||
424 | /* get reference to all currently mapped buffers */ | ||
425 | int gk20a_vm_get_buffers(struct vm_gk20a *vm, | ||
426 | struct mapped_buffer_node ***mapped_buffers, | ||
427 | int *num_buffers); | ||
428 | |||
429 | /* put references on the given buffers */ | ||
430 | void gk20a_vm_put_buffers(struct vm_gk20a *vm, | ||
431 | struct mapped_buffer_node **mapped_buffers, | ||
432 | int num_buffers); | ||
433 | |||
434 | /* invalidate tlbs for the vm area */ | ||
435 | void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm); | ||
436 | |||
437 | /* find buffer corresponding to va */ | ||
438 | int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, | ||
439 | struct dma_buf **dmabuf, | ||
440 | u64 *offset); | ||
441 | |||
442 | void gk20a_vm_get(struct vm_gk20a *vm); | ||
443 | void gk20a_vm_put(struct vm_gk20a *vm); | ||
444 | |||
445 | /* vm-as interface */ | ||
446 | struct nvhost_as_alloc_space_args; | ||
447 | struct nvhost_as_free_space_args; | ||
448 | int gk20a_vm_alloc_share(struct gk20a_as_share *as_share); | ||
449 | int gk20a_vm_release_share(struct gk20a_as_share *as_share); | ||
450 | int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | ||
451 | struct nvhost_as_alloc_space_args *args); | ||
452 | int gk20a_vm_free_space(struct gk20a_as_share *as_share, | ||
453 | struct nvhost_as_free_space_args *args); | ||
454 | int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, | ||
455 | struct channel_gk20a *ch); | ||
456 | int gk20a_vm_map_buffer(struct gk20a_as_share *as_share, | ||
457 | int dmabuf_fd, | ||
458 | u64 *offset_align, | ||
459 | u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/ | ||
460 | int kind); | ||
461 | int gk20a_vm_unmap_buffer(struct gk20a_as_share *, u64 offset); | ||
462 | |||
463 | int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); | ||
464 | #endif /*_MM_GK20A_H_ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h new file mode 100644 index 00000000..09f348cb --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h | |||
@@ -0,0 +1,160 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/soc/platform_gk20a.h | ||
3 | * | ||
4 | * GK20A Platform (SoC) Interface | ||
5 | * | ||
6 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #ifndef _GK20A_PLATFORM_H_ | ||
19 | #define _GK20A_PLATFORM_H_ | ||
20 | |||
21 | #include <linux/platform_device.h> | ||
22 | #include <linux/pm_domain.h> | ||
23 | |||
24 | struct gk20a; | ||
25 | struct channel_gk20a; | ||
26 | struct gr_ctx_buffer_desc; | ||
27 | struct gk20a_scale_profile; | ||
28 | |||
29 | struct gk20a_platform { | ||
30 | #ifdef CONFIG_TEGRA_GK20A | ||
31 | u32 syncpt_base; | ||
32 | #endif | ||
33 | /* Populated by the gk20a driver before probing the platform. */ | ||
34 | struct gk20a *g; | ||
35 | |||
36 | /* Should be populated at probe. */ | ||
37 | bool can_railgate; | ||
38 | |||
39 | /* Should be populated at probe. */ | ||
40 | bool has_syncpoints; | ||
41 | |||
42 | /* Should be populated by probe. */ | ||
43 | struct dentry *debugfs; | ||
44 | |||
45 | /* Clock configuration is stored here. Platform probe is responsible | ||
46 | * for filling this data. */ | ||
47 | struct clk *clk[3]; | ||
48 | int num_clks; | ||
49 | |||
50 | /* Delay before rail gated */ | ||
51 | int railgate_delay; | ||
52 | |||
53 | /* Delay before clock gated */ | ||
54 | int clockgate_delay; | ||
55 | |||
56 | /* Initialize the platform interface of the gk20a driver. | ||
57 | * | ||
58 | * The platform implementation of this function must | ||
59 | * - set the power and clocks of the gk20a device to a known | ||
60 | * state, and | ||
61 | * - populate the gk20a_platform structure (a pointer to the | ||
62 | * structure can be obtained by calling gk20a_get_platform). | ||
63 | * | ||
64 | * After this function is finished, the driver will initialise | ||
65 | * pm runtime and genpd based on the platform configuration. | ||
66 | */ | ||
67 | int (*probe)(struct platform_device *dev); | ||
68 | |||
69 | /* Second stage initialisation - called once all power management | ||
70 | * initialisations are done. | ||
71 | */ | ||
72 | int (*late_probe)(struct platform_device *dev); | ||
73 | |||
74 | /* Called before submitting work to the gpu. The platform may use this | ||
75 | * hook to ensure that any other hw modules that the gpu depends on are | ||
76 | * powered. The platform implementation must count refs to this call. */ | ||
77 | int (*channel_busy)(struct platform_device *dev); | ||
78 | |||
79 | /* Called after the work on the gpu is completed. The platform may use | ||
80 | * this hook to release power refs to any other hw modules that the gpu | ||
81 | * depends on. The platform implementation must count refs to this | ||
82 | * call. */ | ||
83 | void (*channel_idle)(struct platform_device *dev); | ||
84 | |||
85 | /* This function is called to allocate secure memory (memory that the | ||
86 | * CPU cannot see). The function should fill the context buffer | ||
87 | * descriptor (especially fields destroy, sgt, size). | ||
88 | */ | ||
89 | int (*secure_alloc)(struct platform_device *dev, | ||
90 | struct gr_ctx_buffer_desc *desc, | ||
91 | size_t size); | ||
92 | |||
93 | /* Device is going to be suspended */ | ||
94 | int (*suspend)(struct device *); | ||
95 | |||
96 | /* Called to turn off the device */ | ||
97 | int (*railgate)(struct platform_device *dev); | ||
98 | |||
99 | /* Called to turn on the device */ | ||
100 | int (*unrailgate)(struct platform_device *dev); | ||
101 | |||
102 | /* Postscale callback is called after frequency change */ | ||
103 | void (*postscale)(struct platform_device *pdev, | ||
104 | unsigned long freq); | ||
105 | |||
106 | /* Pre callback is called before frequency change */ | ||
107 | void (*prescale)(struct platform_device *pdev); | ||
108 | |||
109 | /* Devfreq governor name. If scaling is enabled, we request | ||
110 | * this governor to be used in scaling */ | ||
111 | const char *devfreq_governor; | ||
112 | |||
113 | /* Quality of service id. If this is set, the scaling routines | ||
114 | * will register a callback to id. Each time we receive a new value, | ||
115 | * the postscale callback gets called. */ | ||
116 | int qos_id; | ||
117 | |||
118 | /* Called as part of debug dump. If the gpu gets hung, this function | ||
119 | * is responsible for delivering all necessary debug data of other | ||
120 | * hw units which may interact with the gpu without direct supervision | ||
121 | * of the CPU. | ||
122 | */ | ||
123 | void (*dump_platform_dependencies)(struct platform_device *dev); | ||
124 | }; | ||
125 | |||
126 | static inline struct gk20a_platform *gk20a_get_platform( | ||
127 | struct platform_device *dev) | ||
128 | { | ||
129 | return (struct gk20a_platform *)platform_get_drvdata(dev); | ||
130 | } | ||
131 | |||
132 | extern struct gk20a_platform gk20a_generic_platform; | ||
133 | #ifdef CONFIG_TEGRA_GK20A | ||
134 | extern struct gk20a_platform gk20a_tegra_platform; | ||
135 | #endif | ||
136 | |||
137 | static inline int gk20a_platform_channel_busy(struct platform_device *dev) | ||
138 | { | ||
139 | struct gk20a_platform *p = gk20a_get_platform(dev); | ||
140 | int ret = 0; | ||
141 | if (p->channel_busy) | ||
142 | ret = p->channel_busy(dev); | ||
143 | |||
144 | return ret; | ||
145 | } | ||
146 | |||
147 | static inline void gk20a_platform_channel_idle(struct platform_device *dev) | ||
148 | { | ||
149 | struct gk20a_platform *p = gk20a_get_platform(dev); | ||
150 | if (p->channel_idle) | ||
151 | p->channel_idle(dev); | ||
152 | } | ||
153 | |||
154 | static inline bool gk20a_platform_has_syncpoints(struct platform_device *dev) | ||
155 | { | ||
156 | struct gk20a_platform *p = gk20a_get_platform(dev); | ||
157 | return p->has_syncpoints; | ||
158 | } | ||
159 | |||
160 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c new file mode 100644 index 00000000..7b750df6 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c | |||
@@ -0,0 +1,35 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/platform_gk20a_generic.c | ||
3 | * | ||
4 | * GK20A Generic Platform Interface | ||
5 | * | ||
6 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include "platform_gk20a.h" | ||
22 | |||
23 | static int gk20a_generic_probe(struct platform_device *dev) | ||
24 | { | ||
25 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
26 | |||
27 | /* TODO: Initialize clocks and power */ | ||
28 | (void)platform; | ||
29 | |||
30 | return 0; | ||
31 | } | ||
32 | |||
33 | struct gk20a_platform gk20a_generic_platform = { | ||
34 | .probe = gk20a_generic_probe, | ||
35 | }; | ||
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c new file mode 100644 index 00000000..35658f31 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | |||
@@ -0,0 +1,561 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/platform_gk20a_tegra.c | ||
3 | * | ||
4 | * GK20A Tegra Platform Interface | ||
5 | * | ||
6 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #include <linux/debugfs.h> | ||
19 | #include <linux/tegra-powergate.h> | ||
20 | #include <linux/platform_data/tegra_edp.h> | ||
21 | #include <linux/nvhost_ioctl.h> | ||
22 | #include <linux/dma-buf.h> | ||
23 | #include <linux/nvmap.h> | ||
24 | #include <mach/irqs.h> | ||
25 | #include <mach/pm_domains.h> | ||
26 | |||
27 | #include "../../../arch/arm/mach-tegra/iomap.h" | ||
28 | |||
29 | #include "gk20a.h" | ||
30 | #include "hal_gk20a.h" | ||
31 | #include "platform_gk20a.h" | ||
32 | #include "gk20a_scale.h" | ||
33 | |||
34 | #define TEGRA_GK20A_INTR INT_GPU | ||
35 | #define TEGRA_GK20A_INTR_NONSTALL INT_GPU_NONSTALL | ||
36 | |||
37 | #define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */ | ||
38 | #define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */ | ||
39 | |||
40 | extern struct device tegra_vpr_dev; | ||
41 | struct gk20a_platform t132_gk20a_tegra_platform; | ||
42 | |||
43 | struct gk20a_emc_params { | ||
44 | long emc_slope; | ||
45 | long emc_offset; | ||
46 | long emc_dip_slope; | ||
47 | long emc_dip_offset; | ||
48 | long emc_xmid; | ||
49 | bool linear; | ||
50 | }; | ||
51 | |||
52 | /* | ||
53 | * 20.12 fixed point arithmetic | ||
54 | */ | ||
55 | |||
56 | static const int FXFRAC = 12; | ||
57 | static const int FX_HALF = (1 << 12) / 2; | ||
58 | |||
59 | #define INT_TO_FX(x) ((x) << FXFRAC) | ||
60 | #define FX_TO_INT(x) ((x) >> FXFRAC) | ||
61 | |||
62 | #define MHZ_TO_HZ(x) ((x) * 1000000) | ||
63 | #define HZ_TO_MHZ(x) ((x) / 1000000) | ||
64 | |||
65 | int FXMUL(int x, int y) | ||
66 | { | ||
67 | return ((long long) x * (long long) y) >> FXFRAC; | ||
68 | } | ||
69 | |||
70 | int FXDIV(int x, int y) | ||
71 | { | ||
72 | /* long long div operation not supported, must shift manually. This | ||
73 | * would have been | ||
74 | * | ||
75 | * return (((long long) x) << FXFRAC) / (long long) y; | ||
76 | */ | ||
77 | int pos, t; | ||
78 | if (x == 0) | ||
79 | return 0; | ||
80 | |||
81 | /* find largest allowable right shift to numerator, limit to FXFRAC */ | ||
82 | t = x < 0 ? -x : x; | ||
83 | pos = 31 - fls(t); /* fls can't be 32 if x != 0 */ | ||
84 | if (pos > FXFRAC) | ||
85 | pos = FXFRAC; | ||
86 | |||
87 | y >>= FXFRAC - pos; | ||
88 | if (y == 0) | ||
89 | return 0x7FFFFFFF; /* overflow, return MAX_FIXED */ | ||
90 | |||
91 | return (x << pos) / y; | ||
92 | } | ||
93 | |||
94 | static int gk20a_tegra_channel_busy(struct platform_device *dev) | ||
95 | { | ||
96 | int ret = 0; | ||
97 | |||
98 | /* Explicitly turn on the host1x clocks | ||
99 | * - This is needed as host1x driver sets ignore_children = true | ||
100 | * to cater the use case of display clock ON but host1x clock OFF | ||
101 | * in OS-Idle-Display-ON case | ||
102 | * - This was easily done in ACM as it only checked the ref count | ||
103 | * of host1x (or any device for that matter) to be zero before | ||
104 | * turning off its clock | ||
105 | * - However, runtime PM checks to see if *ANY* child of device is | ||
106 | * in ACTIVE state and if yes, it doesn't suspend the parent. As a | ||
107 | * result of this, display && host1x clocks remains ON during | ||
108 | * OS-Idle-Display-ON case | ||
109 | * - The code below fixes this use-case | ||
110 | */ | ||
111 | if (to_platform_device(dev->dev.parent)) | ||
112 | ret = nvhost_module_busy_ext( | ||
113 | to_platform_device(dev->dev.parent)); | ||
114 | |||
115 | return ret; | ||
116 | } | ||
117 | |||
118 | static void gk20a_tegra_channel_idle(struct platform_device *dev) | ||
119 | { | ||
120 | /* Explicitly turn off the host1x clocks */ | ||
121 | if (to_platform_device(dev->dev.parent)) | ||
122 | nvhost_module_idle_ext(to_platform_device(dev->dev.parent)); | ||
123 | } | ||
124 | |||
125 | static void gk20a_tegra_secure_destroy(struct platform_device *pdev, | ||
126 | struct gr_ctx_buffer_desc *desc) | ||
127 | { | ||
128 | gk20a_free_sgtable(&desc->sgt); | ||
129 | dma_free_attrs(&tegra_vpr_dev, desc->size, | ||
130 | (void *)(uintptr_t)&desc->iova, | ||
131 | desc->iova, &desc->attrs); | ||
132 | } | ||
133 | |||
134 | static int gk20a_tegra_secure_alloc(struct platform_device *pdev, | ||
135 | struct gr_ctx_buffer_desc *desc, | ||
136 | size_t size) | ||
137 | { | ||
138 | struct device *dev = &pdev->dev; | ||
139 | DEFINE_DMA_ATTRS(attrs); | ||
140 | dma_addr_t iova; | ||
141 | struct sg_table *sgt; | ||
142 | struct page *page; | ||
143 | int err = 0; | ||
144 | |||
145 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
146 | |||
147 | (void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova, | ||
148 | GFP_KERNEL, &attrs); | ||
149 | if (dma_mapping_error(&tegra_vpr_dev, iova)) | ||
150 | return -ENOMEM; | ||
151 | |||
152 | desc->iova = iova; | ||
153 | desc->size = size; | ||
154 | desc->attrs = attrs; | ||
155 | desc->destroy = gk20a_tegra_secure_destroy; | ||
156 | |||
157 | sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); | ||
158 | if (!sgt) { | ||
159 | gk20a_err(dev, "failed to allocate memory\n"); | ||
160 | goto fail; | ||
161 | } | ||
162 | err = sg_alloc_table(sgt, 1, GFP_KERNEL); | ||
163 | if (err) { | ||
164 | gk20a_err(dev, "failed to allocate sg_table\n"); | ||
165 | goto fail_sgt; | ||
166 | } | ||
167 | page = phys_to_page(iova); | ||
168 | sg_set_page(sgt->sgl, page, size, 0); | ||
169 | sg_dma_address(sgt->sgl) = iova; | ||
170 | |||
171 | desc->sgt = sgt; | ||
172 | |||
173 | return err; | ||
174 | |||
175 | fail_sgt: | ||
176 | kfree(sgt); | ||
177 | fail: | ||
178 | dma_free_attrs(&tegra_vpr_dev, desc->size, | ||
179 | (void *)(uintptr_t)&desc->iova, | ||
180 | desc->iova, &desc->attrs); | ||
181 | return err; | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * gk20a_tegra_get_emc_rate() | ||
186 | * | ||
187 | * This function returns the minimum emc clock based on gpu frequency | ||
188 | */ | ||
189 | |||
190 | long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq) | ||
191 | { | ||
192 | long hz; | ||
193 | |||
194 | freq = INT_TO_FX(HZ_TO_MHZ(freq)); | ||
195 | hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset; | ||
196 | |||
197 | hz -= FXMUL(emc_params->emc_dip_slope, | ||
198 | FXMUL(freq - emc_params->emc_xmid, | ||
199 | freq - emc_params->emc_xmid)) + | ||
200 | emc_params->emc_dip_offset; | ||
201 | |||
202 | hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */ | ||
203 | hz = (hz < 0) ? 0 : hz; | ||
204 | |||
205 | return hz; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * gk20a_tegra_postscale(profile, freq) | ||
210 | * | ||
211 | * This function sets emc frequency based on current gpu frequency | ||
212 | */ | ||
213 | |||
214 | static void gk20a_tegra_postscale(struct platform_device *pdev, | ||
215 | unsigned long freq) | ||
216 | { | ||
217 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | ||
218 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
219 | struct gk20a_emc_params *emc_params = profile->private_data; | ||
220 | struct gk20a *g = get_gk20a(pdev); | ||
221 | |||
222 | long after = gk20a_clk_get_rate(g); | ||
223 | long emc_target = gk20a_tegra_get_emc_rate(emc_params, after); | ||
224 | |||
225 | clk_set_rate(platform->clk[2], emc_target); | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * gk20a_tegra_prescale(profile, freq) | ||
230 | * | ||
231 | * This function informs EDP about changed constraints. | ||
232 | */ | ||
233 | |||
234 | static void gk20a_tegra_prescale(struct platform_device *pdev) | ||
235 | { | ||
236 | struct gk20a *g = get_gk20a(pdev); | ||
237 | u32 avg = 0; | ||
238 | |||
239 | gk20a_pmu_load_norm(g, &avg); | ||
240 | tegra_edp_notify_gpu_load(avg); | ||
241 | } | ||
242 | |||
243 | /* | ||
244 | * gk20a_tegra_calibrate_emc() | ||
245 | * | ||
246 | * Compute emc scaling parameters | ||
247 | * | ||
248 | * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od) | ||
249 | * | ||
250 | * Remc - 3d.emc rate | ||
251 | * R3d - 3d.cbus rate | ||
252 | * Rm - 3d.cbus 'middle' rate = (max + min)/2 | ||
253 | * S - emc_slope | ||
254 | * O - emc_offset | ||
255 | * Sd - emc_dip_slope | ||
256 | * Od - emc_dip_offset | ||
257 | * | ||
258 | * this superposes a quadratic dip centered around the middle 3d | ||
259 | * frequency over a linear correlation of 3d.emc to 3d clock | ||
260 | * rates. | ||
261 | * | ||
262 | * S, O are chosen so that the maximum 3d rate produces the | ||
263 | * maximum 3d.emc rate exactly, and the minimum 3d rate produces | ||
264 | * at least the minimum 3d.emc rate. | ||
265 | * | ||
266 | * Sd and Od are chosen to produce the largest dip that will | ||
267 | * keep 3d.emc frequencies monotonously decreasing with 3d | ||
268 | * frequencies. To achieve this, the first derivative of Remc | ||
269 | * with respect to R3d should be zero for the minimal 3d rate: | ||
270 | * | ||
271 | * R'emc = S - 2 * Sd * (R3d - Rm) | ||
272 | * R'emc(R3d-min) = 0 | ||
273 | * S = 2 * Sd * (R3d-min - Rm) | ||
274 | * = 2 * Sd * (R3d-min - R3d-max) / 2 | ||
275 | * | ||
276 | * +------------------------------+ | ||
277 | * | Sd = S / (R3d-min - R3d-max) | | ||
278 | * +------------------------------+ | ||
279 | * | ||
280 | * dip = Sd * (R3d - Rm)^2 + Od | ||
281 | * | ||
282 | * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives | ||
283 | * | ||
284 | * Sd * (R3d-min - Rm)^2 + Od = 0 | ||
285 | * Od = -Sd * ((R3d-min - R3d-max) / 2)^2 | ||
286 | * = -Sd * ((R3d-min - R3d-max)^2) / 4 | ||
287 | * | ||
288 | * +------------------------------+ | ||
289 | * | Od = (emc-max - emc-min) / 4 | | ||
290 | * +------------------------------+ | ||
291 | * | ||
292 | */ | ||
293 | |||
294 | void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params, | ||
295 | struct clk *clk_3d, struct clk *clk_3d_emc) | ||
296 | { | ||
297 | long correction; | ||
298 | unsigned long max_emc; | ||
299 | unsigned long min_emc; | ||
300 | unsigned long min_rate_3d; | ||
301 | unsigned long max_rate_3d; | ||
302 | |||
303 | max_emc = clk_round_rate(clk_3d_emc, UINT_MAX); | ||
304 | max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc)); | ||
305 | |||
306 | min_emc = clk_round_rate(clk_3d_emc, 0); | ||
307 | min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc)); | ||
308 | |||
309 | max_rate_3d = clk_round_rate(clk_3d, UINT_MAX); | ||
310 | max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d)); | ||
311 | |||
312 | min_rate_3d = clk_round_rate(clk_3d, 0); | ||
313 | min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d)); | ||
314 | |||
315 | emc_params->emc_slope = | ||
316 | FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d)); | ||
317 | emc_params->emc_offset = max_emc - | ||
318 | FXMUL(emc_params->emc_slope, max_rate_3d); | ||
319 | /* Guarantee max 3d rate maps to max emc rate */ | ||
320 | emc_params->emc_offset += max_emc - | ||
321 | (FXMUL(emc_params->emc_slope, max_rate_3d) + | ||
322 | emc_params->emc_offset); | ||
323 | |||
324 | emc_params->emc_dip_offset = (max_emc - min_emc) / 4; | ||
325 | emc_params->emc_dip_slope = | ||
326 | -FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d); | ||
327 | emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2; | ||
328 | correction = | ||
329 | emc_params->emc_dip_offset + | ||
330 | FXMUL(emc_params->emc_dip_slope, | ||
331 | FXMUL(max_rate_3d - emc_params->emc_xmid, | ||
332 | max_rate_3d - emc_params->emc_xmid)); | ||
333 | emc_params->emc_dip_offset -= correction; | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * gk20a_tegra_railgate() | ||
338 | * | ||
339 | * Gate (disable) gk20a power rail | ||
340 | */ | ||
341 | |||
342 | static int gk20a_tegra_railgate(struct platform_device *pdev) | ||
343 | { | ||
344 | if (tegra_powergate_is_powered(TEGRA_POWERGATE_GPU)) | ||
345 | tegra_powergate_partition(TEGRA_POWERGATE_GPU); | ||
346 | return 0; | ||
347 | } | ||
348 | |||
349 | /* | ||
350 | * gk20a_tegra_unrailgate() | ||
351 | * | ||
352 | * Ungate (enable) gk20a power rail | ||
353 | */ | ||
354 | |||
355 | static int gk20a_tegra_unrailgate(struct platform_device *pdev) | ||
356 | { | ||
357 | tegra_unpowergate_partition(TEGRA_POWERGATE_GPU); | ||
358 | return 0; | ||
359 | } | ||
360 | |||
361 | struct { | ||
362 | char *name; | ||
363 | unsigned long default_rate; | ||
364 | } tegra_gk20a_clocks[] = { | ||
365 | {"PLLG_ref", UINT_MAX}, | ||
366 | {"pwr", 204000000}, | ||
367 | {"emc", UINT_MAX} }; | ||
368 | |||
369 | /* | ||
370 | * gk20a_tegra_get_clocks() | ||
371 | * | ||
372 | * This function finds clocks in tegra platform and populates | ||
373 | * the clock information to gk20a platform data. | ||
374 | */ | ||
375 | |||
376 | static int gk20a_tegra_get_clocks(struct platform_device *pdev) | ||
377 | { | ||
378 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | ||
379 | char devname[16]; | ||
380 | int i; | ||
381 | int ret = 0; | ||
382 | |||
383 | snprintf(devname, sizeof(devname), | ||
384 | (pdev->id <= 0) ? "tegra_%s" : "tegra_%s.%d\n", | ||
385 | pdev->name, pdev->id); | ||
386 | |||
387 | platform->num_clks = 0; | ||
388 | for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) { | ||
389 | long rate = tegra_gk20a_clocks[i].default_rate; | ||
390 | struct clk *c; | ||
391 | |||
392 | c = clk_get_sys(devname, tegra_gk20a_clocks[i].name); | ||
393 | if (IS_ERR(c)) { | ||
394 | ret = PTR_ERR(c); | ||
395 | goto err_get_clock; | ||
396 | } | ||
397 | rate = clk_round_rate(c, rate); | ||
398 | clk_set_rate(c, rate); | ||
399 | platform->clk[i] = c; | ||
400 | } | ||
401 | platform->num_clks = i; | ||
402 | |||
403 | return 0; | ||
404 | |||
405 | err_get_clock: | ||
406 | |||
407 | while (i--) | ||
408 | clk_put(platform->clk[i]); | ||
409 | return ret; | ||
410 | } | ||
411 | |||
412 | static void gk20a_tegra_scale_init(struct platform_device *pdev) | ||
413 | { | ||
414 | struct gk20a_platform *platform = gk20a_get_platform(pdev); | ||
415 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
416 | struct gk20a_emc_params *emc_params; | ||
417 | |||
418 | if (!profile) | ||
419 | return; | ||
420 | |||
421 | emc_params = kzalloc(sizeof(*emc_params), GFP_KERNEL); | ||
422 | if (!emc_params) | ||
423 | return; | ||
424 | |||
425 | gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g), | ||
426 | platform->clk[2]); | ||
427 | |||
428 | profile->private_data = emc_params; | ||
429 | } | ||
430 | |||
431 | static void gk20a_tegra_debug_dump(struct platform_device *pdev) | ||
432 | { | ||
433 | struct gk20a_platform *platform = gk20a_get_platform(pdev); | ||
434 | struct gk20a *g = platform->g; | ||
435 | nvhost_debug_dump_device(g->dev); | ||
436 | } | ||
437 | |||
438 | static int gk20a_tegra_probe(struct platform_device *dev) | ||
439 | { | ||
440 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
441 | |||
442 | if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA13) { | ||
443 | t132_gk20a_tegra_platform.g = platform->g; | ||
444 | *platform = t132_gk20a_tegra_platform; | ||
445 | } | ||
446 | |||
447 | gk20a_tegra_get_clocks(dev); | ||
448 | |||
449 | return 0; | ||
450 | } | ||
451 | |||
452 | static int gk20a_tegra_late_probe(struct platform_device *dev) | ||
453 | { | ||
454 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
455 | |||
456 | /* Make gk20a power domain a subdomain of mc */ | ||
457 | tegra_pd_add_sd(&platform->g->pd); | ||
458 | |||
459 | /* Initialise tegra specific scaling quirks */ | ||
460 | gk20a_tegra_scale_init(dev); | ||
461 | |||
462 | return 0; | ||
463 | } | ||
464 | |||
465 | static int gk20a_tegra_suspend(struct device *dev) | ||
466 | { | ||
467 | tegra_edp_notify_gpu_load(0); | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | static struct resource gk20a_tegra_resources[] = { | ||
472 | { | ||
473 | .start = TEGRA_GK20A_BAR0_BASE, | ||
474 | .end = TEGRA_GK20A_BAR0_BASE + TEGRA_GK20A_BAR0_SIZE - 1, | ||
475 | .flags = IORESOURCE_MEM, | ||
476 | }, | ||
477 | { | ||
478 | .start = TEGRA_GK20A_BAR1_BASE, | ||
479 | .end = TEGRA_GK20A_BAR1_BASE + TEGRA_GK20A_BAR1_SIZE - 1, | ||
480 | .flags = IORESOURCE_MEM, | ||
481 | }, | ||
482 | { /* Used on ASIM only */ | ||
483 | .start = TEGRA_GK20A_SIM_BASE, | ||
484 | .end = TEGRA_GK20A_SIM_BASE + TEGRA_GK20A_SIM_SIZE - 1, | ||
485 | .flags = IORESOURCE_MEM, | ||
486 | }, | ||
487 | { | ||
488 | .start = TEGRA_GK20A_INTR, | ||
489 | .end = TEGRA_GK20A_INTR, | ||
490 | .flags = IORESOURCE_IRQ, | ||
491 | }, | ||
492 | { | ||
493 | .start = TEGRA_GK20A_INTR_NONSTALL, | ||
494 | .end = TEGRA_GK20A_INTR_NONSTALL, | ||
495 | .flags = IORESOURCE_IRQ, | ||
496 | }, | ||
497 | }; | ||
498 | |||
499 | struct gk20a_platform t132_gk20a_tegra_platform = { | ||
500 | .has_syncpoints = true, | ||
501 | |||
502 | /* power management configuration */ | ||
503 | .railgate_delay = 500, | ||
504 | .clockgate_delay = 50, | ||
505 | |||
506 | .probe = gk20a_tegra_probe, | ||
507 | .late_probe = gk20a_tegra_late_probe, | ||
508 | |||
509 | /* power management callbacks */ | ||
510 | .suspend = gk20a_tegra_suspend, | ||
511 | .railgate = gk20a_tegra_railgate, | ||
512 | .unrailgate = gk20a_tegra_unrailgate, | ||
513 | |||
514 | /* frequency scaling configuration */ | ||
515 | .prescale = gk20a_tegra_prescale, | ||
516 | .postscale = gk20a_tegra_postscale, | ||
517 | .devfreq_governor = "nvhost_podgov", | ||
518 | .qos_id = PM_QOS_GPU_FREQ_MIN, | ||
519 | |||
520 | .channel_busy = gk20a_tegra_channel_busy, | ||
521 | .channel_idle = gk20a_tegra_channel_idle, | ||
522 | .secure_alloc = gk20a_tegra_secure_alloc, | ||
523 | .dump_platform_dependencies = gk20a_tegra_debug_dump, | ||
524 | }; | ||
525 | |||
526 | struct gk20a_platform gk20a_tegra_platform = { | ||
527 | .has_syncpoints = true, | ||
528 | |||
529 | /* power management configuration */ | ||
530 | .railgate_delay = 500, | ||
531 | .clockgate_delay = 50, | ||
532 | .can_railgate = true, | ||
533 | |||
534 | .probe = gk20a_tegra_probe, | ||
535 | .late_probe = gk20a_tegra_late_probe, | ||
536 | |||
537 | /* power management callbacks */ | ||
538 | .suspend = gk20a_tegra_suspend, | ||
539 | .railgate = gk20a_tegra_railgate, | ||
540 | .unrailgate = gk20a_tegra_unrailgate, | ||
541 | |||
542 | /* frequency scaling configuration */ | ||
543 | .prescale = gk20a_tegra_prescale, | ||
544 | .postscale = gk20a_tegra_postscale, | ||
545 | .devfreq_governor = "nvhost_podgov", | ||
546 | .qos_id = PM_QOS_GPU_FREQ_MIN, | ||
547 | |||
548 | .channel_busy = gk20a_tegra_channel_busy, | ||
549 | .channel_idle = gk20a_tegra_channel_idle, | ||
550 | .secure_alloc = gk20a_tegra_secure_alloc, | ||
551 | .dump_platform_dependencies = gk20a_tegra_debug_dump, | ||
552 | }; | ||
553 | |||
554 | struct platform_device tegra_gk20a_device = { | ||
555 | .name = "gk20a", | ||
556 | .resource = gk20a_tegra_resources, | ||
557 | .num_resources = ARRAY_SIZE(gk20a_tegra_resources), | ||
558 | .dev = { | ||
559 | .platform_data = &gk20a_tegra_platform, | ||
560 | }, | ||
561 | }; | ||
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c new file mode 100644 index 00000000..a00499a9 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | |||
@@ -0,0 +1,3796 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/pmu_gk20a.c | ||
3 | * | ||
4 | * GK20A PMU (aka. gPMU outside gk20a context) | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | |||
22 | #include <linux/delay.h> /* for mdelay */ | ||
23 | #include <linux/firmware.h> | ||
24 | #include <linux/clk.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/debugfs.h> | ||
27 | #include <linux/dma-mapping.h> | ||
28 | |||
29 | #include "gk20a.h" | ||
30 | #include "hw_mc_gk20a.h" | ||
31 | #include "hw_pwr_gk20a.h" | ||
32 | #include "hw_top_gk20a.h" | ||
33 | |||
34 | #define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin" | ||
35 | |||
36 | #define gk20a_dbg_pmu(fmt, arg...) \ | ||
37 | gk20a_dbg(gpu_dbg_pmu, fmt, ##arg) | ||
38 | |||
39 | static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu); | ||
40 | static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g, | ||
41 | u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt); | ||
42 | static void gk20a_init_pmu_setup_hw2_workqueue(struct work_struct *work); | ||
43 | static void pmu_save_zbc(struct gk20a *g, u32 entries); | ||
44 | static void ap_callback_init_and_enable_ctrl( | ||
45 | struct gk20a *g, struct pmu_msg *msg, | ||
46 | void *param, u32 seq_desc, u32 status); | ||
47 | static int gk20a_pmu_ap_send_command(struct gk20a *g, | ||
48 | union pmu_ap_cmd *p_ap_cmd, bool b_block); | ||
49 | |||
50 | static u32 pmu_cmdline_size_v0(struct pmu_gk20a *pmu) | ||
51 | { | ||
52 | return sizeof(struct pmu_cmdline_args_v0); | ||
53 | } | ||
54 | |||
55 | static u32 pmu_cmdline_size_v1(struct pmu_gk20a *pmu) | ||
56 | { | ||
57 | return sizeof(struct pmu_cmdline_args_v1); | ||
58 | } | ||
59 | |||
60 | static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq) | ||
61 | { | ||
62 | pmu->args_v1.cpu_freq_hz = freq; | ||
63 | } | ||
64 | |||
65 | static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq) | ||
66 | { | ||
67 | pmu->args_v0.cpu_freq_hz = freq; | ||
68 | } | ||
69 | |||
70 | static void *get_pmu_cmdline_args_ptr_v1(struct pmu_gk20a *pmu) | ||
71 | { | ||
72 | return (void *)(&pmu->args_v1); | ||
73 | } | ||
74 | |||
75 | static void *get_pmu_cmdline_args_ptr_v0(struct pmu_gk20a *pmu) | ||
76 | { | ||
77 | return (void *)(&pmu->args_v0); | ||
78 | } | ||
79 | |||
80 | static u32 get_pmu_allocation_size_v1(struct pmu_gk20a *pmu) | ||
81 | { | ||
82 | return sizeof(struct pmu_allocation_v1); | ||
83 | } | ||
84 | |||
85 | static u32 get_pmu_allocation_size_v0(struct pmu_gk20a *pmu) | ||
86 | { | ||
87 | return sizeof(struct pmu_allocation_v0); | ||
88 | } | ||
89 | |||
90 | static void set_pmu_allocation_ptr_v1(struct pmu_gk20a *pmu, | ||
91 | void **pmu_alloc_ptr, void *assign_ptr) | ||
92 | { | ||
93 | struct pmu_allocation_v1 **pmu_a_ptr = | ||
94 | (struct pmu_allocation_v1 **)pmu_alloc_ptr; | ||
95 | *pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr; | ||
96 | } | ||
97 | |||
98 | static void set_pmu_allocation_ptr_v0(struct pmu_gk20a *pmu, | ||
99 | void **pmu_alloc_ptr, void *assign_ptr) | ||
100 | { | ||
101 | struct pmu_allocation_v0 **pmu_a_ptr = | ||
102 | (struct pmu_allocation_v0 **)pmu_alloc_ptr; | ||
103 | *pmu_a_ptr = (struct pmu_allocation_v0 *)assign_ptr; | ||
104 | } | ||
105 | |||
106 | static void pmu_allocation_set_dmem_size_v1(struct pmu_gk20a *pmu, | ||
107 | void *pmu_alloc_ptr, u16 size) | ||
108 | { | ||
109 | struct pmu_allocation_v1 *pmu_a_ptr = | ||
110 | (struct pmu_allocation_v1 *)pmu_alloc_ptr; | ||
111 | pmu_a_ptr->alloc.dmem.size = size; | ||
112 | } | ||
113 | |||
114 | static void pmu_allocation_set_dmem_size_v0(struct pmu_gk20a *pmu, | ||
115 | void *pmu_alloc_ptr, u16 size) | ||
116 | { | ||
117 | struct pmu_allocation_v0 *pmu_a_ptr = | ||
118 | (struct pmu_allocation_v0 *)pmu_alloc_ptr; | ||
119 | pmu_a_ptr->alloc.dmem.size = size; | ||
120 | } | ||
121 | |||
122 | static u16 pmu_allocation_get_dmem_size_v1(struct pmu_gk20a *pmu, | ||
123 | void *pmu_alloc_ptr) | ||
124 | { | ||
125 | struct pmu_allocation_v1 *pmu_a_ptr = | ||
126 | (struct pmu_allocation_v1 *)pmu_alloc_ptr; | ||
127 | return pmu_a_ptr->alloc.dmem.size; | ||
128 | } | ||
129 | |||
130 | static u16 pmu_allocation_get_dmem_size_v0(struct pmu_gk20a *pmu, | ||
131 | void *pmu_alloc_ptr) | ||
132 | { | ||
133 | struct pmu_allocation_v0 *pmu_a_ptr = | ||
134 | (struct pmu_allocation_v0 *)pmu_alloc_ptr; | ||
135 | return pmu_a_ptr->alloc.dmem.size; | ||
136 | } | ||
137 | |||
138 | static u32 pmu_allocation_get_dmem_offset_v1(struct pmu_gk20a *pmu, | ||
139 | void *pmu_alloc_ptr) | ||
140 | { | ||
141 | struct pmu_allocation_v1 *pmu_a_ptr = | ||
142 | (struct pmu_allocation_v1 *)pmu_alloc_ptr; | ||
143 | return pmu_a_ptr->alloc.dmem.offset; | ||
144 | } | ||
145 | |||
146 | static u32 pmu_allocation_get_dmem_offset_v0(struct pmu_gk20a *pmu, | ||
147 | void *pmu_alloc_ptr) | ||
148 | { | ||
149 | struct pmu_allocation_v0 *pmu_a_ptr = | ||
150 | (struct pmu_allocation_v0 *)pmu_alloc_ptr; | ||
151 | return pmu_a_ptr->alloc.dmem.offset; | ||
152 | } | ||
153 | |||
154 | static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct pmu_gk20a *pmu, | ||
155 | void *pmu_alloc_ptr) | ||
156 | { | ||
157 | struct pmu_allocation_v1 *pmu_a_ptr = | ||
158 | (struct pmu_allocation_v1 *)pmu_alloc_ptr; | ||
159 | return &pmu_a_ptr->alloc.dmem.offset; | ||
160 | } | ||
161 | |||
162 | static u32 *pmu_allocation_get_dmem_offset_addr_v0(struct pmu_gk20a *pmu, | ||
163 | void *pmu_alloc_ptr) | ||
164 | { | ||
165 | struct pmu_allocation_v0 *pmu_a_ptr = | ||
166 | (struct pmu_allocation_v0 *)pmu_alloc_ptr; | ||
167 | return &pmu_a_ptr->alloc.dmem.offset; | ||
168 | } | ||
169 | |||
170 | static void pmu_allocation_set_dmem_offset_v1(struct pmu_gk20a *pmu, | ||
171 | void *pmu_alloc_ptr, u32 offset) | ||
172 | { | ||
173 | struct pmu_allocation_v1 *pmu_a_ptr = | ||
174 | (struct pmu_allocation_v1 *)pmu_alloc_ptr; | ||
175 | pmu_a_ptr->alloc.dmem.offset = offset; | ||
176 | } | ||
177 | |||
178 | static void pmu_allocation_set_dmem_offset_v0(struct pmu_gk20a *pmu, | ||
179 | void *pmu_alloc_ptr, u32 offset) | ||
180 | { | ||
181 | struct pmu_allocation_v0 *pmu_a_ptr = | ||
182 | (struct pmu_allocation_v0 *)pmu_alloc_ptr; | ||
183 | pmu_a_ptr->alloc.dmem.offset = offset; | ||
184 | } | ||
185 | |||
186 | static void *get_pmu_msg_pmu_init_msg_ptr_v1(struct pmu_init_msg *init) | ||
187 | { | ||
188 | return (void *)(&(init->pmu_init_v1)); | ||
189 | } | ||
190 | |||
191 | static u16 get_pmu_init_msg_pmu_sw_mg_off_v1(union pmu_init_msg_pmu *init_msg) | ||
192 | { | ||
193 | struct pmu_init_msg_pmu_v1 *init = | ||
194 | (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1); | ||
195 | return init->sw_managed_area_offset; | ||
196 | } | ||
197 | |||
198 | static u16 get_pmu_init_msg_pmu_sw_mg_size_v1(union pmu_init_msg_pmu *init_msg) | ||
199 | { | ||
200 | struct pmu_init_msg_pmu_v1 *init = | ||
201 | (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1); | ||
202 | return init->sw_managed_area_size; | ||
203 | } | ||
204 | |||
205 | static void *get_pmu_msg_pmu_init_msg_ptr_v0(struct pmu_init_msg *init) | ||
206 | { | ||
207 | return (void *)(&(init->pmu_init_v0)); | ||
208 | } | ||
209 | |||
210 | static u16 get_pmu_init_msg_pmu_sw_mg_off_v0(union pmu_init_msg_pmu *init_msg) | ||
211 | { | ||
212 | struct pmu_init_msg_pmu_v0 *init = | ||
213 | (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0); | ||
214 | return init->sw_managed_area_offset; | ||
215 | } | ||
216 | |||
217 | static u16 get_pmu_init_msg_pmu_sw_mg_size_v0(union pmu_init_msg_pmu *init_msg) | ||
218 | { | ||
219 | struct pmu_init_msg_pmu_v0 *init = | ||
220 | (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0); | ||
221 | return init->sw_managed_area_size; | ||
222 | } | ||
223 | |||
224 | static u32 get_pmu_perfmon_cmd_start_size_v1(void) | ||
225 | { | ||
226 | return sizeof(struct pmu_perfmon_cmd_start_v1); | ||
227 | } | ||
228 | |||
229 | static u32 get_pmu_perfmon_cmd_start_size_v0(void) | ||
230 | { | ||
231 | return sizeof(struct pmu_perfmon_cmd_start_v0); | ||
232 | } | ||
233 | |||
234 | static int get_perfmon_cmd_start_offsetofvar_v1( | ||
235 | enum pmu_perfmon_cmd_start_fields field) | ||
236 | { | ||
237 | switch (field) { | ||
238 | case COUNTER_ALLOC: | ||
239 | return offsetof(struct pmu_perfmon_cmd_start_v1, | ||
240 | counter_alloc); | ||
241 | default: | ||
242 | return -EINVAL; | ||
243 | break; | ||
244 | } | ||
245 | return 0; | ||
246 | } | ||
247 | |||
248 | static int get_perfmon_cmd_start_offsetofvar_v0( | ||
249 | enum pmu_perfmon_cmd_start_fields field) | ||
250 | { | ||
251 | switch (field) { | ||
252 | case COUNTER_ALLOC: | ||
253 | return offsetof(struct pmu_perfmon_cmd_start_v0, | ||
254 | counter_alloc); | ||
255 | default: | ||
256 | return -EINVAL; | ||
257 | break; | ||
258 | } | ||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | static u32 get_pmu_perfmon_cmd_init_size_v1(void) | ||
263 | { | ||
264 | return sizeof(struct pmu_perfmon_cmd_init_v1); | ||
265 | } | ||
266 | |||
267 | static u32 get_pmu_perfmon_cmd_init_size_v0(void) | ||
268 | { | ||
269 | return sizeof(struct pmu_perfmon_cmd_init_v0); | ||
270 | } | ||
271 | |||
272 | static int get_perfmon_cmd_init_offsetofvar_v1( | ||
273 | enum pmu_perfmon_cmd_start_fields field) | ||
274 | { | ||
275 | switch (field) { | ||
276 | case COUNTER_ALLOC: | ||
277 | return offsetof(struct pmu_perfmon_cmd_init_v1, | ||
278 | counter_alloc); | ||
279 | default: | ||
280 | return -EINVAL; | ||
281 | break; | ||
282 | } | ||
283 | return 0; | ||
284 | } | ||
285 | |||
286 | static int get_perfmon_cmd_init_offsetofvar_v0( | ||
287 | enum pmu_perfmon_cmd_start_fields field) | ||
288 | { | ||
289 | switch (field) { | ||
290 | case COUNTER_ALLOC: | ||
291 | return offsetof(struct pmu_perfmon_cmd_init_v0, | ||
292 | counter_alloc); | ||
293 | default: | ||
294 | return -EINVAL; | ||
295 | break; | ||
296 | } | ||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | static void perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, u8 value) | ||
301 | { | ||
302 | struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; | ||
303 | start->cmd_type = value; | ||
304 | } | ||
305 | |||
306 | static void perfmon_start_set_cmd_type_v0(struct pmu_perfmon_cmd *pc, u8 value) | ||
307 | { | ||
308 | struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0; | ||
309 | start->cmd_type = value; | ||
310 | } | ||
311 | |||
312 | static void perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, u8 value) | ||
313 | { | ||
314 | struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; | ||
315 | start->group_id = value; | ||
316 | } | ||
317 | |||
318 | static void perfmon_start_set_group_id_v0(struct pmu_perfmon_cmd *pc, u8 value) | ||
319 | { | ||
320 | struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0; | ||
321 | start->group_id = value; | ||
322 | } | ||
323 | |||
324 | static void perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, u8 value) | ||
325 | { | ||
326 | struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; | ||
327 | start->state_id = value; | ||
328 | } | ||
329 | |||
330 | static void perfmon_start_set_state_id_v0(struct pmu_perfmon_cmd *pc, u8 value) | ||
331 | { | ||
332 | struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0; | ||
333 | start->state_id = value; | ||
334 | } | ||
335 | |||
336 | static void perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, u8 value) | ||
337 | { | ||
338 | struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; | ||
339 | start->flags = value; | ||
340 | } | ||
341 | |||
342 | static void perfmon_start_set_flags_v0(struct pmu_perfmon_cmd *pc, u8 value) | ||
343 | { | ||
344 | struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0; | ||
345 | start->flags = value; | ||
346 | } | ||
347 | |||
348 | static u8 perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc) | ||
349 | { | ||
350 | struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; | ||
351 | return start->flags; | ||
352 | } | ||
353 | |||
354 | static u8 perfmon_start_get_flags_v0(struct pmu_perfmon_cmd *pc) | ||
355 | { | ||
356 | struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0; | ||
357 | return start->flags; | ||
358 | } | ||
359 | |||
360 | static void perfmon_cmd_init_set_sample_buffer_v1(struct pmu_perfmon_cmd *pc, | ||
361 | u16 value) | ||
362 | { | ||
363 | struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; | ||
364 | init->sample_buffer = value; | ||
365 | } | ||
366 | |||
367 | static void perfmon_cmd_init_set_sample_buffer_v0(struct pmu_perfmon_cmd *pc, | ||
368 | u16 value) | ||
369 | { | ||
370 | struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0; | ||
371 | init->sample_buffer = value; | ||
372 | } | ||
373 | |||
374 | static void perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc, | ||
375 | u8 value) | ||
376 | { | ||
377 | struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; | ||
378 | init->to_decrease_count = value; | ||
379 | } | ||
380 | |||
381 | static void perfmon_cmd_init_set_dec_cnt_v0(struct pmu_perfmon_cmd *pc, | ||
382 | u8 value) | ||
383 | { | ||
384 | struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0; | ||
385 | init->to_decrease_count = value; | ||
386 | } | ||
387 | |||
388 | static void perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc, | ||
389 | u8 value) | ||
390 | { | ||
391 | struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; | ||
392 | init->base_counter_id = value; | ||
393 | } | ||
394 | |||
395 | static void perfmon_cmd_init_set_base_cnt_id_v0(struct pmu_perfmon_cmd *pc, | ||
396 | u8 value) | ||
397 | { | ||
398 | struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0; | ||
399 | init->base_counter_id = value; | ||
400 | } | ||
401 | |||
402 | static void perfmon_cmd_init_set_samp_period_us_v1(struct pmu_perfmon_cmd *pc, | ||
403 | u32 value) | ||
404 | { | ||
405 | struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; | ||
406 | init->sample_period_us = value; | ||
407 | } | ||
408 | |||
409 | static void perfmon_cmd_init_set_samp_period_us_v0(struct pmu_perfmon_cmd *pc, | ||
410 | u32 value) | ||
411 | { | ||
412 | struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0; | ||
413 | init->sample_period_us = value; | ||
414 | } | ||
415 | |||
416 | static void perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc, | ||
417 | u8 value) | ||
418 | { | ||
419 | struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; | ||
420 | init->num_counters = value; | ||
421 | } | ||
422 | |||
423 | static void perfmon_cmd_init_set_num_cnt_v0(struct pmu_perfmon_cmd *pc, | ||
424 | u8 value) | ||
425 | { | ||
426 | struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0; | ||
427 | init->num_counters = value; | ||
428 | } | ||
429 | |||
430 | static void perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc, | ||
431 | u8 value) | ||
432 | { | ||
433 | struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; | ||
434 | init->samples_in_moving_avg = value; | ||
435 | } | ||
436 | |||
437 | static void perfmon_cmd_init_set_mov_avg_v0(struct pmu_perfmon_cmd *pc, | ||
438 | u8 value) | ||
439 | { | ||
440 | struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0; | ||
441 | init->samples_in_moving_avg = value; | ||
442 | } | ||
443 | |||
444 | static void get_pmu_init_msg_pmu_queue_params_v0(struct pmu_queue *queue, | ||
445 | u32 id, void *pmu_init_msg) | ||
446 | { | ||
447 | struct pmu_init_msg_pmu_v0 *init = | ||
448 | (struct pmu_init_msg_pmu_v0 *)pmu_init_msg; | ||
449 | queue->index = init->queue_info[id].index; | ||
450 | queue->offset = init->queue_info[id].offset; | ||
451 | queue->size = init->queue_info[id].size; | ||
452 | } | ||
453 | |||
454 | static void get_pmu_init_msg_pmu_queue_params_v1(struct pmu_queue *queue, | ||
455 | u32 id, void *pmu_init_msg) | ||
456 | { | ||
457 | struct pmu_init_msg_pmu_v1 *init = | ||
458 | (struct pmu_init_msg_pmu_v1 *)pmu_init_msg; | ||
459 | queue->index = init->queue_info[id].index; | ||
460 | queue->offset = init->queue_info[id].offset; | ||
461 | queue->size = init->queue_info[id].size; | ||
462 | } | ||
463 | |||
464 | static void *get_pmu_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq) | ||
465 | { | ||
466 | return (void *)(&seq->in_v1); | ||
467 | } | ||
468 | |||
469 | static void *get_pmu_sequence_in_alloc_ptr_v0(struct pmu_sequence *seq) | ||
470 | { | ||
471 | return (void *)(&seq->in_v0); | ||
472 | } | ||
473 | |||
474 | static void *get_pmu_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq) | ||
475 | { | ||
476 | return (void *)(&seq->out_v1); | ||
477 | } | ||
478 | |||
479 | static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq) | ||
480 | { | ||
481 | return (void *)(&seq->out_v0); | ||
482 | } | ||
483 | |||
484 | static int gk20a_init_pmu(struct pmu_gk20a *pmu) | ||
485 | { | ||
486 | struct gk20a *g = pmu->g; | ||
487 | switch (pmu->desc->app_version) { | ||
488 | case APP_VERSION_1: | ||
489 | g->ops.pmu_ver.cmd_id_zbc_table_update = 16; | ||
490 | g->ops.pmu_ver.get_pmu_cmdline_args_size = | ||
491 | pmu_cmdline_size_v1; | ||
492 | g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq = | ||
493 | set_pmu_cmdline_args_cpufreq_v1; | ||
494 | g->ops.pmu_ver.get_pmu_cmdline_args_ptr = | ||
495 | get_pmu_cmdline_args_ptr_v1; | ||
496 | g->ops.pmu_ver.get_pmu_allocation_struct_size = | ||
497 | get_pmu_allocation_size_v1; | ||
498 | g->ops.pmu_ver.set_pmu_allocation_ptr = | ||
499 | set_pmu_allocation_ptr_v1; | ||
500 | g->ops.pmu_ver.pmu_allocation_set_dmem_size = | ||
501 | pmu_allocation_set_dmem_size_v1; | ||
502 | g->ops.pmu_ver.pmu_allocation_get_dmem_size = | ||
503 | pmu_allocation_get_dmem_size_v1; | ||
504 | g->ops.pmu_ver.pmu_allocation_get_dmem_offset = | ||
505 | pmu_allocation_get_dmem_offset_v1; | ||
506 | g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr = | ||
507 | pmu_allocation_get_dmem_offset_addr_v1; | ||
508 | g->ops.pmu_ver.pmu_allocation_set_dmem_offset = | ||
509 | pmu_allocation_set_dmem_offset_v1; | ||
510 | g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params = | ||
511 | get_pmu_init_msg_pmu_queue_params_v1; | ||
512 | g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr = | ||
513 | get_pmu_msg_pmu_init_msg_ptr_v1; | ||
514 | g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off = | ||
515 | get_pmu_init_msg_pmu_sw_mg_off_v1; | ||
516 | g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size = | ||
517 | get_pmu_init_msg_pmu_sw_mg_size_v1; | ||
518 | g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size = | ||
519 | get_pmu_perfmon_cmd_start_size_v1; | ||
520 | g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar = | ||
521 | get_perfmon_cmd_start_offsetofvar_v1; | ||
522 | g->ops.pmu_ver.perfmon_start_set_cmd_type = | ||
523 | perfmon_start_set_cmd_type_v1; | ||
524 | g->ops.pmu_ver.perfmon_start_set_group_id = | ||
525 | perfmon_start_set_group_id_v1; | ||
526 | g->ops.pmu_ver.perfmon_start_set_state_id = | ||
527 | perfmon_start_set_state_id_v1; | ||
528 | g->ops.pmu_ver.perfmon_start_set_flags = | ||
529 | perfmon_start_set_flags_v1; | ||
530 | g->ops.pmu_ver.perfmon_start_get_flags = | ||
531 | perfmon_start_get_flags_v1; | ||
532 | g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size = | ||
533 | get_pmu_perfmon_cmd_init_size_v1; | ||
534 | g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar = | ||
535 | get_perfmon_cmd_init_offsetofvar_v1; | ||
536 | g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer = | ||
537 | perfmon_cmd_init_set_sample_buffer_v1; | ||
538 | g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt = | ||
539 | perfmon_cmd_init_set_dec_cnt_v1; | ||
540 | g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id = | ||
541 | perfmon_cmd_init_set_base_cnt_id_v1; | ||
542 | g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us = | ||
543 | perfmon_cmd_init_set_samp_period_us_v1; | ||
544 | g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt = | ||
545 | perfmon_cmd_init_set_num_cnt_v1; | ||
546 | g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg = | ||
547 | perfmon_cmd_init_set_mov_avg_v1; | ||
548 | g->ops.pmu_ver.get_pmu_seq_in_a_ptr = | ||
549 | get_pmu_sequence_in_alloc_ptr_v1; | ||
550 | g->ops.pmu_ver.get_pmu_seq_out_a_ptr = | ||
551 | get_pmu_sequence_out_alloc_ptr_v1; | ||
552 | break; | ||
553 | case APP_VERSION_0: | ||
554 | g->ops.pmu_ver.cmd_id_zbc_table_update = 14; | ||
555 | g->ops.pmu_ver.get_pmu_cmdline_args_size = | ||
556 | pmu_cmdline_size_v0; | ||
557 | g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq = | ||
558 | set_pmu_cmdline_args_cpufreq_v0; | ||
559 | g->ops.pmu_ver.get_pmu_cmdline_args_ptr = | ||
560 | get_pmu_cmdline_args_ptr_v0; | ||
561 | g->ops.pmu_ver.get_pmu_allocation_struct_size = | ||
562 | get_pmu_allocation_size_v0; | ||
563 | g->ops.pmu_ver.set_pmu_allocation_ptr = | ||
564 | set_pmu_allocation_ptr_v0; | ||
565 | g->ops.pmu_ver.pmu_allocation_set_dmem_size = | ||
566 | pmu_allocation_set_dmem_size_v0; | ||
567 | g->ops.pmu_ver.pmu_allocation_get_dmem_size = | ||
568 | pmu_allocation_get_dmem_size_v0; | ||
569 | g->ops.pmu_ver.pmu_allocation_get_dmem_offset = | ||
570 | pmu_allocation_get_dmem_offset_v0; | ||
571 | g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr = | ||
572 | pmu_allocation_get_dmem_offset_addr_v0; | ||
573 | g->ops.pmu_ver.pmu_allocation_set_dmem_offset = | ||
574 | pmu_allocation_set_dmem_offset_v0; | ||
575 | g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params = | ||
576 | get_pmu_init_msg_pmu_queue_params_v0; | ||
577 | g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr = | ||
578 | get_pmu_msg_pmu_init_msg_ptr_v0; | ||
579 | g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off = | ||
580 | get_pmu_init_msg_pmu_sw_mg_off_v0; | ||
581 | g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size = | ||
582 | get_pmu_init_msg_pmu_sw_mg_size_v0; | ||
583 | g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size = | ||
584 | get_pmu_perfmon_cmd_start_size_v0; | ||
585 | g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar = | ||
586 | get_perfmon_cmd_start_offsetofvar_v0; | ||
587 | g->ops.pmu_ver.perfmon_start_set_cmd_type = | ||
588 | perfmon_start_set_cmd_type_v0; | ||
589 | g->ops.pmu_ver.perfmon_start_set_group_id = | ||
590 | perfmon_start_set_group_id_v0; | ||
591 | g->ops.pmu_ver.perfmon_start_set_state_id = | ||
592 | perfmon_start_set_state_id_v0; | ||
593 | g->ops.pmu_ver.perfmon_start_set_flags = | ||
594 | perfmon_start_set_flags_v0; | ||
595 | g->ops.pmu_ver.perfmon_start_get_flags = | ||
596 | perfmon_start_get_flags_v0; | ||
597 | g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size = | ||
598 | get_pmu_perfmon_cmd_init_size_v0; | ||
599 | g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar = | ||
600 | get_perfmon_cmd_init_offsetofvar_v0; | ||
601 | g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer = | ||
602 | perfmon_cmd_init_set_sample_buffer_v0; | ||
603 | g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt = | ||
604 | perfmon_cmd_init_set_dec_cnt_v0; | ||
605 | g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id = | ||
606 | perfmon_cmd_init_set_base_cnt_id_v0; | ||
607 | g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us = | ||
608 | perfmon_cmd_init_set_samp_period_us_v0; | ||
609 | g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt = | ||
610 | perfmon_cmd_init_set_num_cnt_v0; | ||
611 | g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg = | ||
612 | perfmon_cmd_init_set_mov_avg_v0; | ||
613 | g->ops.pmu_ver.get_pmu_seq_in_a_ptr = | ||
614 | get_pmu_sequence_in_alloc_ptr_v0; | ||
615 | g->ops.pmu_ver.get_pmu_seq_out_a_ptr = | ||
616 | get_pmu_sequence_out_alloc_ptr_v0; | ||
617 | break; | ||
618 | default: | ||
619 | gk20a_err(dev_from_gk20a(pmu->g), | ||
620 | "PMU code version not supported\n"); | ||
621 | return -EINVAL; | ||
622 | break; | ||
623 | } | ||
624 | return 0; | ||
625 | } | ||
626 | |||
627 | static void pmu_copy_from_dmem(struct pmu_gk20a *pmu, | ||
628 | u32 src, u8 *dst, u32 size, u8 port) | ||
629 | { | ||
630 | struct gk20a *g = pmu->g; | ||
631 | u32 i, words, bytes; | ||
632 | u32 data, addr_mask; | ||
633 | u32 *dst_u32 = (u32*)dst; | ||
634 | |||
635 | if (size == 0) { | ||
636 | gk20a_err(dev_from_gk20a(g), | ||
637 | "size is zero"); | ||
638 | return; | ||
639 | } | ||
640 | |||
641 | if (src & 0x3) { | ||
642 | gk20a_err(dev_from_gk20a(g), | ||
643 | "src (0x%08x) not 4-byte aligned", src); | ||
644 | return; | ||
645 | } | ||
646 | |||
647 | mutex_lock(&pmu->pmu_copy_lock); | ||
648 | |||
649 | words = size >> 2; | ||
650 | bytes = size & 0x3; | ||
651 | |||
652 | addr_mask = pwr_falcon_dmemc_offs_m() | | ||
653 | pwr_falcon_dmemc_blk_m(); | ||
654 | |||
655 | src &= addr_mask; | ||
656 | |||
657 | gk20a_writel(g, pwr_falcon_dmemc_r(port), | ||
658 | src | pwr_falcon_dmemc_aincr_f(1)); | ||
659 | |||
660 | for (i = 0; i < words; i++) | ||
661 | dst_u32[i] = gk20a_readl(g, pwr_falcon_dmemd_r(port)); | ||
662 | |||
663 | if (bytes > 0) { | ||
664 | data = gk20a_readl(g, pwr_falcon_dmemd_r(port)); | ||
665 | for (i = 0; i < bytes; i++) { | ||
666 | dst[(words << 2) + i] = ((u8 *)&data)[i]; | ||
667 | gk20a_dbg_pmu("read: dst_u8[%d]=0x%08x", | ||
668 | i, dst[(words << 2) + i]); | ||
669 | } | ||
670 | } | ||
671 | mutex_unlock(&pmu->pmu_copy_lock); | ||
672 | return; | ||
673 | } | ||
674 | |||
675 | static void pmu_copy_to_dmem(struct pmu_gk20a *pmu, | ||
676 | u32 dst, u8 *src, u32 size, u8 port) | ||
677 | { | ||
678 | struct gk20a *g = pmu->g; | ||
679 | u32 i, words, bytes; | ||
680 | u32 data, addr_mask; | ||
681 | u32 *src_u32 = (u32*)src; | ||
682 | |||
683 | if (size == 0) { | ||
684 | gk20a_err(dev_from_gk20a(g), | ||
685 | "size is zero"); | ||
686 | return; | ||
687 | } | ||
688 | |||
689 | if (dst & 0x3) { | ||
690 | gk20a_err(dev_from_gk20a(g), | ||
691 | "dst (0x%08x) not 4-byte aligned", dst); | ||
692 | return; | ||
693 | } | ||
694 | |||
695 | mutex_lock(&pmu->pmu_copy_lock); | ||
696 | |||
697 | words = size >> 2; | ||
698 | bytes = size & 0x3; | ||
699 | |||
700 | addr_mask = pwr_falcon_dmemc_offs_m() | | ||
701 | pwr_falcon_dmemc_blk_m(); | ||
702 | |||
703 | dst &= addr_mask; | ||
704 | |||
705 | gk20a_writel(g, pwr_falcon_dmemc_r(port), | ||
706 | dst | pwr_falcon_dmemc_aincw_f(1)); | ||
707 | |||
708 | for (i = 0; i < words; i++) | ||
709 | gk20a_writel(g, pwr_falcon_dmemd_r(port), src_u32[i]); | ||
710 | |||
711 | if (bytes > 0) { | ||
712 | data = 0; | ||
713 | for (i = 0; i < bytes; i++) | ||
714 | ((u8 *)&data)[i] = src[(words << 2) + i]; | ||
715 | gk20a_writel(g, pwr_falcon_dmemd_r(port), data); | ||
716 | } | ||
717 | |||
718 | data = gk20a_readl(g, pwr_falcon_dmemc_r(port)) & addr_mask; | ||
719 | size = ALIGN(size, 4); | ||
720 | if (data != dst + size) { | ||
721 | gk20a_err(dev_from_gk20a(g), | ||
722 | "copy failed. bytes written %d, expected %d", | ||
723 | data - dst, size); | ||
724 | } | ||
725 | mutex_unlock(&pmu->pmu_copy_lock); | ||
726 | return; | ||
727 | } | ||
728 | |||
729 | static int pmu_idle(struct pmu_gk20a *pmu) | ||
730 | { | ||
731 | struct gk20a *g = pmu->g; | ||
732 | unsigned long end_jiffies = jiffies + | ||
733 | msecs_to_jiffies(2000); | ||
734 | u32 idle_stat; | ||
735 | |||
736 | /* wait for pmu idle */ | ||
737 | do { | ||
738 | idle_stat = gk20a_readl(g, pwr_falcon_idlestate_r()); | ||
739 | |||
740 | if (pwr_falcon_idlestate_falcon_busy_v(idle_stat) == 0 && | ||
741 | pwr_falcon_idlestate_ext_busy_v(idle_stat) == 0) { | ||
742 | break; | ||
743 | } | ||
744 | |||
745 | if (time_after_eq(jiffies, end_jiffies)) { | ||
746 | gk20a_err(dev_from_gk20a(g), | ||
747 | "timeout waiting pmu idle : 0x%08x", | ||
748 | idle_stat); | ||
749 | return -EBUSY; | ||
750 | } | ||
751 | usleep_range(100, 200); | ||
752 | } while (1); | ||
753 | |||
754 | gk20a_dbg_fn("done"); | ||
755 | return 0; | ||
756 | } | ||
757 | |||
758 | static void pmu_enable_irq(struct pmu_gk20a *pmu, bool enable) | ||
759 | { | ||
760 | struct gk20a *g = pmu->g; | ||
761 | |||
762 | gk20a_dbg_fn(""); | ||
763 | |||
764 | gk20a_writel(g, mc_intr_mask_0_r(), | ||
765 | gk20a_readl(g, mc_intr_mask_0_r()) & | ||
766 | ~mc_intr_mask_0_pmu_enabled_f()); | ||
767 | gk20a_writel(g, mc_intr_mask_1_r(), | ||
768 | gk20a_readl(g, mc_intr_mask_1_r()) & | ||
769 | ~mc_intr_mask_1_pmu_enabled_f()); | ||
770 | |||
771 | gk20a_writel(g, pwr_falcon_irqmclr_r(), | ||
772 | pwr_falcon_irqmclr_gptmr_f(1) | | ||
773 | pwr_falcon_irqmclr_wdtmr_f(1) | | ||
774 | pwr_falcon_irqmclr_mthd_f(1) | | ||
775 | pwr_falcon_irqmclr_ctxsw_f(1) | | ||
776 | pwr_falcon_irqmclr_halt_f(1) | | ||
777 | pwr_falcon_irqmclr_exterr_f(1) | | ||
778 | pwr_falcon_irqmclr_swgen0_f(1) | | ||
779 | pwr_falcon_irqmclr_swgen1_f(1) | | ||
780 | pwr_falcon_irqmclr_ext_f(0xff)); | ||
781 | |||
782 | if (enable) { | ||
783 | /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */ | ||
784 | gk20a_writel(g, pwr_falcon_irqdest_r(), | ||
785 | pwr_falcon_irqdest_host_gptmr_f(0) | | ||
786 | pwr_falcon_irqdest_host_wdtmr_f(1) | | ||
787 | pwr_falcon_irqdest_host_mthd_f(0) | | ||
788 | pwr_falcon_irqdest_host_ctxsw_f(0) | | ||
789 | pwr_falcon_irqdest_host_halt_f(1) | | ||
790 | pwr_falcon_irqdest_host_exterr_f(0) | | ||
791 | pwr_falcon_irqdest_host_swgen0_f(1) | | ||
792 | pwr_falcon_irqdest_host_swgen1_f(0) | | ||
793 | pwr_falcon_irqdest_host_ext_f(0xff) | | ||
794 | pwr_falcon_irqdest_target_gptmr_f(1) | | ||
795 | pwr_falcon_irqdest_target_wdtmr_f(0) | | ||
796 | pwr_falcon_irqdest_target_mthd_f(0) | | ||
797 | pwr_falcon_irqdest_target_ctxsw_f(0) | | ||
798 | pwr_falcon_irqdest_target_halt_f(0) | | ||
799 | pwr_falcon_irqdest_target_exterr_f(0) | | ||
800 | pwr_falcon_irqdest_target_swgen0_f(0) | | ||
801 | pwr_falcon_irqdest_target_swgen1_f(0) | | ||
802 | pwr_falcon_irqdest_target_ext_f(0xff)); | ||
803 | |||
804 | /* 0=disable, 1=enable */ | ||
805 | gk20a_writel(g, pwr_falcon_irqmset_r(), | ||
806 | pwr_falcon_irqmset_gptmr_f(1) | | ||
807 | pwr_falcon_irqmset_wdtmr_f(1) | | ||
808 | pwr_falcon_irqmset_mthd_f(0) | | ||
809 | pwr_falcon_irqmset_ctxsw_f(0) | | ||
810 | pwr_falcon_irqmset_halt_f(1) | | ||
811 | pwr_falcon_irqmset_exterr_f(1) | | ||
812 | pwr_falcon_irqmset_swgen0_f(1) | | ||
813 | pwr_falcon_irqmset_swgen1_f(1)); | ||
814 | |||
815 | gk20a_writel(g, mc_intr_mask_0_r(), | ||
816 | gk20a_readl(g, mc_intr_mask_0_r()) | | ||
817 | mc_intr_mask_0_pmu_enabled_f()); | ||
818 | } | ||
819 | |||
820 | gk20a_dbg_fn("done"); | ||
821 | } | ||
822 | |||
823 | static int pmu_enable_hw(struct pmu_gk20a *pmu, bool enable) | ||
824 | { | ||
825 | struct gk20a *g = pmu->g; | ||
826 | |||
827 | gk20a_dbg_fn(""); | ||
828 | |||
829 | if (enable) { | ||
830 | int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT; | ||
831 | gk20a_enable(g, mc_enable_pwr_enabled_f()); | ||
832 | |||
833 | do { | ||
834 | u32 w = gk20a_readl(g, pwr_falcon_dmactl_r()) & | ||
835 | (pwr_falcon_dmactl_dmem_scrubbing_m() | | ||
836 | pwr_falcon_dmactl_imem_scrubbing_m()); | ||
837 | |||
838 | if (!w) { | ||
839 | gk20a_dbg_fn("done"); | ||
840 | return 0; | ||
841 | } | ||
842 | udelay(GR_IDLE_CHECK_DEFAULT); | ||
843 | } while (--retries || !tegra_platform_is_silicon()); | ||
844 | |||
845 | gk20a_disable(g, mc_enable_pwr_enabled_f()); | ||
846 | gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout"); | ||
847 | |||
848 | return -ETIMEDOUT; | ||
849 | } else { | ||
850 | gk20a_disable(g, mc_enable_pwr_enabled_f()); | ||
851 | return 0; | ||
852 | } | ||
853 | } | ||
854 | |||
855 | static int pmu_enable(struct pmu_gk20a *pmu, bool enable) | ||
856 | { | ||
857 | struct gk20a *g = pmu->g; | ||
858 | u32 pmc_enable; | ||
859 | int err; | ||
860 | |||
861 | gk20a_dbg_fn(""); | ||
862 | |||
863 | if (!enable) { | ||
864 | pmc_enable = gk20a_readl(g, mc_enable_r()); | ||
865 | if (mc_enable_pwr_v(pmc_enable) != | ||
866 | mc_enable_pwr_disabled_v()) { | ||
867 | |||
868 | pmu_enable_irq(pmu, false); | ||
869 | pmu_enable_hw(pmu, false); | ||
870 | } | ||
871 | } else { | ||
872 | err = pmu_enable_hw(pmu, true); | ||
873 | if (err) | ||
874 | return err; | ||
875 | |||
876 | /* TBD: post reset */ | ||
877 | |||
878 | err = pmu_idle(pmu); | ||
879 | if (err) | ||
880 | return err; | ||
881 | |||
882 | pmu_enable_irq(pmu, true); | ||
883 | } | ||
884 | |||
885 | gk20a_dbg_fn("done"); | ||
886 | return 0; | ||
887 | } | ||
888 | |||
889 | static int pmu_reset(struct pmu_gk20a *pmu) | ||
890 | { | ||
891 | int err; | ||
892 | |||
893 | err = pmu_idle(pmu); | ||
894 | if (err) | ||
895 | return err; | ||
896 | |||
897 | /* TBD: release pmu hw mutex */ | ||
898 | |||
899 | err = pmu_enable(pmu, false); | ||
900 | if (err) | ||
901 | return err; | ||
902 | |||
903 | /* TBD: cancel all sequences */ | ||
904 | /* TBD: init all sequences and state tables */ | ||
905 | /* TBD: restore pre-init message handler */ | ||
906 | |||
907 | err = pmu_enable(pmu, true); | ||
908 | if (err) | ||
909 | return err; | ||
910 | |||
911 | return 0; | ||
912 | } | ||
913 | |||
914 | static int pmu_bootstrap(struct pmu_gk20a *pmu) | ||
915 | { | ||
916 | struct gk20a *g = pmu->g; | ||
917 | struct gk20a_platform *platform = platform_get_drvdata(g->dev); | ||
918 | struct mm_gk20a *mm = &g->mm; | ||
919 | struct pmu_ucode_desc *desc = pmu->desc; | ||
920 | u64 addr_code, addr_data, addr_load; | ||
921 | u32 i, blocks, addr_args; | ||
922 | |||
923 | gk20a_dbg_fn(""); | ||
924 | |||
925 | gk20a_writel(g, pwr_falcon_itfen_r(), | ||
926 | gk20a_readl(g, pwr_falcon_itfen_r()) | | ||
927 | pwr_falcon_itfen_ctxen_enable_f()); | ||
928 | gk20a_writel(g, pwr_pmu_new_instblk_r(), | ||
929 | pwr_pmu_new_instblk_ptr_f( | ||
930 | mm->pmu.inst_block.cpu_pa >> 12) | | ||
931 | pwr_pmu_new_instblk_valid_f(1) | | ||
932 | pwr_pmu_new_instblk_target_sys_coh_f()); | ||
933 | |||
934 | /* TBD: load all other surfaces */ | ||
935 | |||
936 | g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu, | ||
937 | clk_get_rate(platform->clk[1])); | ||
938 | |||
939 | addr_args = (pwr_falcon_hwcfg_dmem_size_v( | ||
940 | gk20a_readl(g, pwr_falcon_hwcfg_r())) | ||
941 | << GK20A_PMU_DMEM_BLKSIZE2) - | ||
942 | g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu); | ||
943 | |||
944 | pmu_copy_to_dmem(pmu, addr_args, | ||
945 | (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)), | ||
946 | g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0); | ||
947 | |||
948 | gk20a_writel(g, pwr_falcon_dmemc_r(0), | ||
949 | pwr_falcon_dmemc_offs_f(0) | | ||
950 | pwr_falcon_dmemc_blk_f(0) | | ||
951 | pwr_falcon_dmemc_aincw_f(1)); | ||
952 | |||
953 | addr_code = u64_lo32((pmu->ucode.pmu_va + | ||
954 | desc->app_start_offset + | ||
955 | desc->app_resident_code_offset) >> 8) ; | ||
956 | addr_data = u64_lo32((pmu->ucode.pmu_va + | ||
957 | desc->app_start_offset + | ||
958 | desc->app_resident_data_offset) >> 8); | ||
959 | addr_load = u64_lo32((pmu->ucode.pmu_va + | ||
960 | desc->bootloader_start_offset) >> 8); | ||
961 | |||
962 | gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE); | ||
963 | gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code); | ||
964 | gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size); | ||
965 | gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size); | ||
966 | gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry); | ||
967 | gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data); | ||
968 | gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size); | ||
969 | gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code); | ||
970 | gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1); | ||
971 | gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args); | ||
972 | |||
973 | gk20a_writel(g, pwr_falcon_dmatrfbase_r(), | ||
974 | addr_load - (desc->bootloader_imem_offset >> 8)); | ||
975 | |||
976 | blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8; | ||
977 | |||
978 | for (i = 0; i < blocks; i++) { | ||
979 | gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(), | ||
980 | desc->bootloader_imem_offset + (i << 8)); | ||
981 | gk20a_writel(g, pwr_falcon_dmatrffboffs_r(), | ||
982 | desc->bootloader_imem_offset + (i << 8)); | ||
983 | gk20a_writel(g, pwr_falcon_dmatrfcmd_r(), | ||
984 | pwr_falcon_dmatrfcmd_imem_f(1) | | ||
985 | pwr_falcon_dmatrfcmd_write_f(0) | | ||
986 | pwr_falcon_dmatrfcmd_size_f(6) | | ||
987 | pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE)); | ||
988 | } | ||
989 | |||
990 | gk20a_writel(g, pwr_falcon_bootvec_r(), | ||
991 | pwr_falcon_bootvec_vec_f(desc->bootloader_entry_point)); | ||
992 | |||
993 | gk20a_writel(g, pwr_falcon_cpuctl_r(), | ||
994 | pwr_falcon_cpuctl_startcpu_f(1)); | ||
995 | |||
996 | gk20a_writel(g, pwr_falcon_os_r(), desc->app_version); | ||
997 | |||
998 | return 0; | ||
999 | } | ||
1000 | |||
1001 | static void pmu_seq_init(struct pmu_gk20a *pmu) | ||
1002 | { | ||
1003 | u32 i; | ||
1004 | |||
1005 | memset(pmu->seq, 0, | ||
1006 | sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES); | ||
1007 | memset(pmu->pmu_seq_tbl, 0, | ||
1008 | sizeof(pmu->pmu_seq_tbl)); | ||
1009 | |||
1010 | for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++) | ||
1011 | pmu->seq[i].id = i; | ||
1012 | } | ||
1013 | |||
1014 | static int pmu_seq_acquire(struct pmu_gk20a *pmu, | ||
1015 | struct pmu_sequence **pseq) | ||
1016 | { | ||
1017 | struct gk20a *g = pmu->g; | ||
1018 | struct pmu_sequence *seq; | ||
1019 | u32 index; | ||
1020 | |||
1021 | mutex_lock(&pmu->pmu_seq_lock); | ||
1022 | index = find_first_zero_bit(pmu->pmu_seq_tbl, | ||
1023 | sizeof(pmu->pmu_seq_tbl)); | ||
1024 | if (index >= sizeof(pmu->pmu_seq_tbl)) { | ||
1025 | gk20a_err(dev_from_gk20a(g), | ||
1026 | "no free sequence available"); | ||
1027 | mutex_unlock(&pmu->pmu_seq_lock); | ||
1028 | return -EAGAIN; | ||
1029 | } | ||
1030 | set_bit(index, pmu->pmu_seq_tbl); | ||
1031 | mutex_unlock(&pmu->pmu_seq_lock); | ||
1032 | |||
1033 | seq = &pmu->seq[index]; | ||
1034 | seq->state = PMU_SEQ_STATE_PENDING; | ||
1035 | |||
1036 | *pseq = seq; | ||
1037 | return 0; | ||
1038 | } | ||
1039 | |||
1040 | static void pmu_seq_release(struct pmu_gk20a *pmu, | ||
1041 | struct pmu_sequence *seq) | ||
1042 | { | ||
1043 | struct gk20a *g = pmu->g; | ||
1044 | seq->state = PMU_SEQ_STATE_FREE; | ||
1045 | seq->desc = PMU_INVALID_SEQ_DESC; | ||
1046 | seq->callback = NULL; | ||
1047 | seq->cb_params = NULL; | ||
1048 | seq->msg = NULL; | ||
1049 | seq->out_payload = NULL; | ||
1050 | g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu, | ||
1051 | g->ops.pmu_ver.get_pmu_seq_in_a_ptr(seq), 0); | ||
1052 | g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu, | ||
1053 | g->ops.pmu_ver.get_pmu_seq_out_a_ptr(seq), 0); | ||
1054 | |||
1055 | clear_bit(seq->id, pmu->pmu_seq_tbl); | ||
1056 | } | ||
1057 | |||
1058 | static int pmu_queue_init(struct pmu_gk20a *pmu, | ||
1059 | u32 id, union pmu_init_msg_pmu *init) | ||
1060 | { | ||
1061 | struct gk20a *g = pmu->g; | ||
1062 | struct pmu_queue *queue = &pmu->queue[id]; | ||
1063 | queue->id = id; | ||
1064 | g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init); | ||
1065 | |||
1066 | queue->mutex_id = id; | ||
1067 | mutex_init(&queue->mutex); | ||
1068 | |||
1069 | gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x", | ||
1070 | id, queue->index, queue->offset, queue->size); | ||
1071 | |||
1072 | return 0; | ||
1073 | } | ||
1074 | |||
1075 | static int pmu_queue_head(struct pmu_gk20a *pmu, struct pmu_queue *queue, | ||
1076 | u32 *head, bool set) | ||
1077 | { | ||
1078 | struct gk20a *g = pmu->g; | ||
1079 | |||
1080 | BUG_ON(!head); | ||
1081 | |||
1082 | if (PMU_IS_COMMAND_QUEUE(queue->id)) { | ||
1083 | |||
1084 | if (queue->index >= pwr_pmu_queue_head__size_1_v()) | ||
1085 | return -EINVAL; | ||
1086 | |||
1087 | if (!set) | ||
1088 | *head = pwr_pmu_queue_head_address_v( | ||
1089 | gk20a_readl(g, | ||
1090 | pwr_pmu_queue_head_r(queue->index))); | ||
1091 | else | ||
1092 | gk20a_writel(g, | ||
1093 | pwr_pmu_queue_head_r(queue->index), | ||
1094 | pwr_pmu_queue_head_address_f(*head)); | ||
1095 | } else { | ||
1096 | if (!set) | ||
1097 | *head = pwr_pmu_msgq_head_val_v( | ||
1098 | gk20a_readl(g, pwr_pmu_msgq_head_r())); | ||
1099 | else | ||
1100 | gk20a_writel(g, | ||
1101 | pwr_pmu_msgq_head_r(), | ||
1102 | pwr_pmu_msgq_head_val_f(*head)); | ||
1103 | } | ||
1104 | |||
1105 | return 0; | ||
1106 | } | ||
1107 | |||
1108 | static int pmu_queue_tail(struct pmu_gk20a *pmu, struct pmu_queue *queue, | ||
1109 | u32 *tail, bool set) | ||
1110 | { | ||
1111 | struct gk20a *g = pmu->g; | ||
1112 | |||
1113 | BUG_ON(!tail); | ||
1114 | |||
1115 | if (PMU_IS_COMMAND_QUEUE(queue->id)) { | ||
1116 | |||
1117 | if (queue->index >= pwr_pmu_queue_tail__size_1_v()) | ||
1118 | return -EINVAL; | ||
1119 | |||
1120 | if (!set) | ||
1121 | *tail = pwr_pmu_queue_tail_address_v( | ||
1122 | gk20a_readl(g, | ||
1123 | pwr_pmu_queue_tail_r(queue->index))); | ||
1124 | else | ||
1125 | gk20a_writel(g, | ||
1126 | pwr_pmu_queue_tail_r(queue->index), | ||
1127 | pwr_pmu_queue_tail_address_f(*tail)); | ||
1128 | } else { | ||
1129 | if (!set) | ||
1130 | *tail = pwr_pmu_msgq_tail_val_v( | ||
1131 | gk20a_readl(g, pwr_pmu_msgq_tail_r())); | ||
1132 | else | ||
1133 | gk20a_writel(g, | ||
1134 | pwr_pmu_msgq_tail_r(), | ||
1135 | pwr_pmu_msgq_tail_val_f(*tail)); | ||
1136 | } | ||
1137 | |||
1138 | return 0; | ||
1139 | } | ||
1140 | |||
1141 | static inline void pmu_queue_read(struct pmu_gk20a *pmu, | ||
1142 | u32 offset, u8 *dst, u32 size) | ||
1143 | { | ||
1144 | pmu_copy_from_dmem(pmu, offset, dst, size, 0); | ||
1145 | } | ||
1146 | |||
1147 | static inline void pmu_queue_write(struct pmu_gk20a *pmu, | ||
1148 | u32 offset, u8 *src, u32 size) | ||
1149 | { | ||
1150 | pmu_copy_to_dmem(pmu, offset, src, size, 0); | ||
1151 | } | ||
1152 | |||
1153 | int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token) | ||
1154 | { | ||
1155 | struct gk20a *g = pmu->g; | ||
1156 | struct pmu_mutex *mutex; | ||
1157 | u32 data, owner, max_retry; | ||
1158 | |||
1159 | if (!pmu->initialized) | ||
1160 | return 0; | ||
1161 | |||
1162 | BUG_ON(!token); | ||
1163 | BUG_ON(!PMU_MUTEX_ID_IS_VALID(id)); | ||
1164 | BUG_ON(id > pmu->mutex_cnt); | ||
1165 | |||
1166 | mutex = &pmu->mutex[id]; | ||
1167 | |||
1168 | owner = pwr_pmu_mutex_value_v( | ||
1169 | gk20a_readl(g, pwr_pmu_mutex_r(mutex->index))); | ||
1170 | |||
1171 | if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) { | ||
1172 | BUG_ON(mutex->ref_cnt == 0); | ||
1173 | gk20a_dbg_pmu("already acquired by owner : 0x%08x", *token); | ||
1174 | mutex->ref_cnt++; | ||
1175 | return 0; | ||
1176 | } | ||
1177 | |||
1178 | max_retry = 40; | ||
1179 | do { | ||
1180 | data = pwr_pmu_mutex_id_value_v( | ||
1181 | gk20a_readl(g, pwr_pmu_mutex_id_r())); | ||
1182 | if (data == pwr_pmu_mutex_id_value_init_v() || | ||
1183 | data == pwr_pmu_mutex_id_value_not_avail_v()) { | ||
1184 | gk20a_warn(dev_from_gk20a(g), | ||
1185 | "fail to generate mutex token: val 0x%08x", | ||
1186 | owner); | ||
1187 | usleep_range(20, 40); | ||
1188 | continue; | ||
1189 | } | ||
1190 | |||
1191 | owner = data; | ||
1192 | gk20a_writel(g, pwr_pmu_mutex_r(mutex->index), | ||
1193 | pwr_pmu_mutex_value_f(owner)); | ||
1194 | |||
1195 | data = pwr_pmu_mutex_value_v( | ||
1196 | gk20a_readl(g, pwr_pmu_mutex_r(mutex->index))); | ||
1197 | |||
1198 | if (owner == data) { | ||
1199 | mutex->ref_cnt = 1; | ||
1200 | gk20a_dbg_pmu("mutex acquired: id=%d, token=0x%x", | ||
1201 | mutex->index, *token); | ||
1202 | *token = owner; | ||
1203 | return 0; | ||
1204 | } else { | ||
1205 | gk20a_dbg_info("fail to acquire mutex idx=0x%08x", | ||
1206 | mutex->index); | ||
1207 | |||
1208 | data = gk20a_readl(g, pwr_pmu_mutex_id_release_r()); | ||
1209 | data = set_field(data, | ||
1210 | pwr_pmu_mutex_id_release_value_m(), | ||
1211 | pwr_pmu_mutex_id_release_value_f(owner)); | ||
1212 | gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data); | ||
1213 | |||
1214 | usleep_range(20, 40); | ||
1215 | continue; | ||
1216 | } | ||
1217 | } while (max_retry-- > 0); | ||
1218 | |||
1219 | return -EBUSY; | ||
1220 | } | ||
1221 | |||
1222 | int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token) | ||
1223 | { | ||
1224 | struct gk20a *g = pmu->g; | ||
1225 | struct pmu_mutex *mutex; | ||
1226 | u32 owner, data; | ||
1227 | |||
1228 | if (!pmu->initialized) | ||
1229 | return 0; | ||
1230 | |||
1231 | BUG_ON(!token); | ||
1232 | BUG_ON(!PMU_MUTEX_ID_IS_VALID(id)); | ||
1233 | BUG_ON(id > pmu->mutex_cnt); | ||
1234 | |||
1235 | mutex = &pmu->mutex[id]; | ||
1236 | |||
1237 | owner = pwr_pmu_mutex_value_v( | ||
1238 | gk20a_readl(g, pwr_pmu_mutex_r(mutex->index))); | ||
1239 | |||
1240 | if (*token != owner) { | ||
1241 | gk20a_err(dev_from_gk20a(g), | ||
1242 | "requester 0x%08x NOT match owner 0x%08x", | ||
1243 | *token, owner); | ||
1244 | return -EINVAL; | ||
1245 | } | ||
1246 | |||
1247 | if (--mutex->ref_cnt == 0) { | ||
1248 | gk20a_writel(g, pwr_pmu_mutex_r(mutex->index), | ||
1249 | pwr_pmu_mutex_value_initial_lock_f()); | ||
1250 | |||
1251 | data = gk20a_readl(g, pwr_pmu_mutex_id_release_r()); | ||
1252 | data = set_field(data, pwr_pmu_mutex_id_release_value_m(), | ||
1253 | pwr_pmu_mutex_id_release_value_f(owner)); | ||
1254 | gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data); | ||
1255 | |||
1256 | gk20a_dbg_pmu("mutex released: id=%d, token=0x%x", | ||
1257 | mutex->index, *token); | ||
1258 | } | ||
1259 | |||
1260 | return 0; | ||
1261 | } | ||
1262 | |||
1263 | static int pmu_queue_lock(struct pmu_gk20a *pmu, | ||
1264 | struct pmu_queue *queue) | ||
1265 | { | ||
1266 | int err; | ||
1267 | |||
1268 | if (PMU_IS_MESSAGE_QUEUE(queue->id)) | ||
1269 | return 0; | ||
1270 | |||
1271 | if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) { | ||
1272 | mutex_lock(&queue->mutex); | ||
1273 | queue->locked = true; | ||
1274 | return 0; | ||
1275 | } | ||
1276 | |||
1277 | err = pmu_mutex_acquire(pmu, queue->mutex_id, | ||
1278 | &queue->mutex_lock); | ||
1279 | if (err == 0) | ||
1280 | queue->locked = true; | ||
1281 | |||
1282 | return err; | ||
1283 | } | ||
1284 | |||
1285 | static int pmu_queue_unlock(struct pmu_gk20a *pmu, | ||
1286 | struct pmu_queue *queue) | ||
1287 | { | ||
1288 | int err; | ||
1289 | |||
1290 | if (PMU_IS_MESSAGE_QUEUE(queue->id)) | ||
1291 | return 0; | ||
1292 | |||
1293 | if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) { | ||
1294 | mutex_unlock(&queue->mutex); | ||
1295 | queue->locked = false; | ||
1296 | return 0; | ||
1297 | } | ||
1298 | |||
1299 | if (queue->locked) { | ||
1300 | err = pmu_mutex_release(pmu, queue->mutex_id, | ||
1301 | &queue->mutex_lock); | ||
1302 | if (err == 0) | ||
1303 | queue->locked = false; | ||
1304 | } | ||
1305 | |||
1306 | return 0; | ||
1307 | } | ||
1308 | |||
1309 | /* called by pmu_read_message, no lock */ | ||
1310 | static bool pmu_queue_is_empty(struct pmu_gk20a *pmu, | ||
1311 | struct pmu_queue *queue) | ||
1312 | { | ||
1313 | u32 head, tail; | ||
1314 | |||
1315 | pmu_queue_head(pmu, queue, &head, QUEUE_GET); | ||
1316 | if (queue->opened && queue->oflag == OFLAG_READ) | ||
1317 | tail = queue->position; | ||
1318 | else | ||
1319 | pmu_queue_tail(pmu, queue, &tail, QUEUE_GET); | ||
1320 | |||
1321 | return head == tail; | ||
1322 | } | ||
1323 | |||
1324 | static bool pmu_queue_has_room(struct pmu_gk20a *pmu, | ||
1325 | struct pmu_queue *queue, u32 size, bool *need_rewind) | ||
1326 | { | ||
1327 | u32 head, tail, free; | ||
1328 | bool rewind = false; | ||
1329 | |||
1330 | BUG_ON(!queue->locked); | ||
1331 | |||
1332 | size = ALIGN(size, QUEUE_ALIGNMENT); | ||
1333 | |||
1334 | pmu_queue_head(pmu, queue, &head, QUEUE_GET); | ||
1335 | pmu_queue_tail(pmu, queue, &tail, QUEUE_GET); | ||
1336 | |||
1337 | if (head >= tail) { | ||
1338 | free = queue->offset + queue->size - head; | ||
1339 | free -= PMU_CMD_HDR_SIZE; | ||
1340 | |||
1341 | if (size > free) { | ||
1342 | rewind = true; | ||
1343 | head = queue->offset; | ||
1344 | } | ||
1345 | } | ||
1346 | |||
1347 | if (head < tail) | ||
1348 | free = tail - head - 1; | ||
1349 | |||
1350 | if (need_rewind) | ||
1351 | *need_rewind = rewind; | ||
1352 | |||
1353 | return size <= free; | ||
1354 | } | ||
1355 | |||
1356 | static int pmu_queue_push(struct pmu_gk20a *pmu, | ||
1357 | struct pmu_queue *queue, void *data, u32 size) | ||
1358 | { | ||
1359 | gk20a_dbg_fn(""); | ||
1360 | |||
1361 | if (!queue->opened && queue->oflag == OFLAG_WRITE){ | ||
1362 | gk20a_err(dev_from_gk20a(pmu->g), | ||
1363 | "queue not opened for write"); | ||
1364 | return -EINVAL; | ||
1365 | } | ||
1366 | |||
1367 | pmu_queue_write(pmu, queue->position, data, size); | ||
1368 | queue->position += ALIGN(size, QUEUE_ALIGNMENT); | ||
1369 | return 0; | ||
1370 | } | ||
1371 | |||
1372 | static int pmu_queue_pop(struct pmu_gk20a *pmu, | ||
1373 | struct pmu_queue *queue, void *data, u32 size, | ||
1374 | u32 *bytes_read) | ||
1375 | { | ||
1376 | u32 head, tail, used; | ||
1377 | |||
1378 | *bytes_read = 0; | ||
1379 | |||
1380 | if (!queue->opened && queue->oflag == OFLAG_READ){ | ||
1381 | gk20a_err(dev_from_gk20a(pmu->g), | ||
1382 | "queue not opened for read"); | ||
1383 | return -EINVAL; | ||
1384 | } | ||
1385 | |||
1386 | pmu_queue_head(pmu, queue, &head, QUEUE_GET); | ||
1387 | tail = queue->position; | ||
1388 | |||
1389 | if (head == tail) | ||
1390 | return 0; | ||
1391 | |||
1392 | if (head > tail) | ||
1393 | used = head - tail; | ||
1394 | else | ||
1395 | used = queue->offset + queue->size - tail; | ||
1396 | |||
1397 | if (size > used) { | ||
1398 | gk20a_warn(dev_from_gk20a(pmu->g), | ||
1399 | "queue size smaller than request read"); | ||
1400 | size = used; | ||
1401 | } | ||
1402 | |||
1403 | pmu_queue_read(pmu, tail, data, size); | ||
1404 | queue->position += ALIGN(size, QUEUE_ALIGNMENT); | ||
1405 | *bytes_read = size; | ||
1406 | return 0; | ||
1407 | } | ||
1408 | |||
1409 | static void pmu_queue_rewind(struct pmu_gk20a *pmu, | ||
1410 | struct pmu_queue *queue) | ||
1411 | { | ||
1412 | struct pmu_cmd cmd; | ||
1413 | |||
1414 | gk20a_dbg_fn(""); | ||
1415 | |||
1416 | if (!queue->opened) { | ||
1417 | gk20a_err(dev_from_gk20a(pmu->g), | ||
1418 | "queue not opened"); | ||
1419 | return; | ||
1420 | } | ||
1421 | |||
1422 | if (queue->oflag == OFLAG_WRITE) { | ||
1423 | cmd.hdr.unit_id = PMU_UNIT_REWIND; | ||
1424 | cmd.hdr.size = PMU_CMD_HDR_SIZE; | ||
1425 | pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size); | ||
1426 | gk20a_dbg_pmu("queue %d rewinded", queue->id); | ||
1427 | } | ||
1428 | |||
1429 | queue->position = queue->offset; | ||
1430 | return; | ||
1431 | } | ||
1432 | |||
1433 | /* open for read and lock the queue */ | ||
1434 | static int pmu_queue_open_read(struct pmu_gk20a *pmu, | ||
1435 | struct pmu_queue *queue) | ||
1436 | { | ||
1437 | int err; | ||
1438 | |||
1439 | err = pmu_queue_lock(pmu, queue); | ||
1440 | if (err) | ||
1441 | return err; | ||
1442 | |||
1443 | if (queue->opened) | ||
1444 | BUG(); | ||
1445 | |||
1446 | pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET); | ||
1447 | queue->oflag = OFLAG_READ; | ||
1448 | queue->opened = true; | ||
1449 | |||
1450 | return 0; | ||
1451 | } | ||
1452 | |||
1453 | /* open for write and lock the queue | ||
1454 | make sure there's enough free space for the write */ | ||
1455 | static int pmu_queue_open_write(struct pmu_gk20a *pmu, | ||
1456 | struct pmu_queue *queue, u32 size) | ||
1457 | { | ||
1458 | bool rewind = false; | ||
1459 | int err; | ||
1460 | |||
1461 | err = pmu_queue_lock(pmu, queue); | ||
1462 | if (err) | ||
1463 | return err; | ||
1464 | |||
1465 | if (queue->opened) | ||
1466 | BUG(); | ||
1467 | |||
1468 | if (!pmu_queue_has_room(pmu, queue, size, &rewind)) { | ||
1469 | gk20a_err(dev_from_gk20a(pmu->g), "queue full"); | ||
1470 | return -EAGAIN; | ||
1471 | } | ||
1472 | |||
1473 | pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET); | ||
1474 | queue->oflag = OFLAG_WRITE; | ||
1475 | queue->opened = true; | ||
1476 | |||
1477 | if (rewind) | ||
1478 | pmu_queue_rewind(pmu, queue); | ||
1479 | |||
1480 | return 0; | ||
1481 | } | ||
1482 | |||
1483 | /* close and unlock the queue */ | ||
1484 | static int pmu_queue_close(struct pmu_gk20a *pmu, | ||
1485 | struct pmu_queue *queue, bool commit) | ||
1486 | { | ||
1487 | if (!queue->opened) | ||
1488 | return 0; | ||
1489 | |||
1490 | if (commit) { | ||
1491 | if (queue->oflag == OFLAG_READ) { | ||
1492 | pmu_queue_tail(pmu, queue, | ||
1493 | &queue->position, QUEUE_SET); | ||
1494 | } | ||
1495 | else { | ||
1496 | pmu_queue_head(pmu, queue, | ||
1497 | &queue->position, QUEUE_SET); | ||
1498 | } | ||
1499 | } | ||
1500 | |||
1501 | queue->opened = false; | ||
1502 | |||
1503 | pmu_queue_unlock(pmu, queue); | ||
1504 | |||
1505 | return 0; | ||
1506 | } | ||
1507 | |||
1508 | static void gk20a_save_pmu_sw_state(struct pmu_gk20a *pmu, | ||
1509 | struct gk20a_pmu_save_state *save) | ||
1510 | { | ||
1511 | save->seq = pmu->seq; | ||
1512 | save->next_seq_desc = pmu->next_seq_desc; | ||
1513 | save->mutex = pmu->mutex; | ||
1514 | save->mutex_cnt = pmu->mutex_cnt; | ||
1515 | save->desc = pmu->desc; | ||
1516 | save->ucode = pmu->ucode; | ||
1517 | save->elpg_enable = pmu->elpg_enable; | ||
1518 | save->pg_wq = pmu->pg_wq; | ||
1519 | save->seq_buf = pmu->seq_buf; | ||
1520 | save->pg_buf = pmu->pg_buf; | ||
1521 | save->sw_ready = pmu->sw_ready; | ||
1522 | save->pg_init = pmu->pg_init; | ||
1523 | } | ||
1524 | |||
1525 | static void gk20a_restore_pmu_sw_state(struct pmu_gk20a *pmu, | ||
1526 | struct gk20a_pmu_save_state *save) | ||
1527 | { | ||
1528 | pmu->seq = save->seq; | ||
1529 | pmu->next_seq_desc = save->next_seq_desc; | ||
1530 | pmu->mutex = save->mutex; | ||
1531 | pmu->mutex_cnt = save->mutex_cnt; | ||
1532 | pmu->desc = save->desc; | ||
1533 | pmu->ucode = save->ucode; | ||
1534 | pmu->elpg_enable = save->elpg_enable; | ||
1535 | pmu->pg_wq = save->pg_wq; | ||
1536 | pmu->seq_buf = save->seq_buf; | ||
1537 | pmu->pg_buf = save->pg_buf; | ||
1538 | pmu->sw_ready = save->sw_ready; | ||
1539 | pmu->pg_init = save->pg_init; | ||
1540 | } | ||
1541 | |||
1542 | void gk20a_remove_pmu_support(struct pmu_gk20a *pmu) | ||
1543 | { | ||
1544 | struct gk20a_pmu_save_state save; | ||
1545 | |||
1546 | gk20a_dbg_fn(""); | ||
1547 | |||
1548 | gk20a_allocator_destroy(&pmu->dmem); | ||
1549 | |||
1550 | /* Save the stuff you don't want to lose */ | ||
1551 | gk20a_save_pmu_sw_state(pmu, &save); | ||
1552 | |||
1553 | /* this function is also called by pmu_destory outside gk20a deinit that | ||
1554 | releases gk20a struct so fill up with zeros here. */ | ||
1555 | memset(pmu, 0, sizeof(struct pmu_gk20a)); | ||
1556 | |||
1557 | /* Restore stuff you want to keep */ | ||
1558 | gk20a_restore_pmu_sw_state(pmu, &save); | ||
1559 | } | ||
1560 | |||
1561 | int gk20a_init_pmu_reset_enable_hw(struct gk20a *g) | ||
1562 | { | ||
1563 | struct pmu_gk20a *pmu = &g->pmu; | ||
1564 | |||
1565 | gk20a_dbg_fn(""); | ||
1566 | |||
1567 | pmu_enable_hw(pmu, true); | ||
1568 | |||
1569 | return 0; | ||
1570 | } | ||
1571 | |||
1572 | static void pmu_elpg_enable_allow(struct work_struct *work); | ||
1573 | |||
1574 | int gk20a_init_pmu_setup_sw(struct gk20a *g) | ||
1575 | { | ||
1576 | struct pmu_gk20a *pmu = &g->pmu; | ||
1577 | struct mm_gk20a *mm = &g->mm; | ||
1578 | struct vm_gk20a *vm = &mm->pmu.vm; | ||
1579 | struct device *d = dev_from_gk20a(g); | ||
1580 | int i, err = 0; | ||
1581 | u8 *ptr; | ||
1582 | void *ucode_ptr; | ||
1583 | struct sg_table *sgt_pmu_ucode; | ||
1584 | struct sg_table *sgt_seq_buf; | ||
1585 | DEFINE_DMA_ATTRS(attrs); | ||
1586 | dma_addr_t iova; | ||
1587 | |||
1588 | gk20a_dbg_fn(""); | ||
1589 | |||
1590 | if (pmu->sw_ready) { | ||
1591 | for (i = 0; i < pmu->mutex_cnt; i++) { | ||
1592 | pmu->mutex[i].id = i; | ||
1593 | pmu->mutex[i].index = i; | ||
1594 | } | ||
1595 | pmu_seq_init(pmu); | ||
1596 | |||
1597 | gk20a_dbg_fn("skip init"); | ||
1598 | goto skip_init; | ||
1599 | } | ||
1600 | |||
1601 | /* no infoRom script from vbios? */ | ||
1602 | |||
1603 | /* TBD: sysmon subtask */ | ||
1604 | |||
1605 | pmu->mutex_cnt = pwr_pmu_mutex__size_1_v(); | ||
1606 | pmu->mutex = kzalloc(pmu->mutex_cnt * | ||
1607 | sizeof(struct pmu_mutex), GFP_KERNEL); | ||
1608 | if (!pmu->mutex) { | ||
1609 | err = -ENOMEM; | ||
1610 | goto err; | ||
1611 | } | ||
1612 | |||
1613 | for (i = 0; i < pmu->mutex_cnt; i++) { | ||
1614 | pmu->mutex[i].id = i; | ||
1615 | pmu->mutex[i].index = i; | ||
1616 | } | ||
1617 | |||
1618 | pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES * | ||
1619 | sizeof(struct pmu_sequence), GFP_KERNEL); | ||
1620 | if (!pmu->seq) { | ||
1621 | err = -ENOMEM; | ||
1622 | goto err_free_mutex; | ||
1623 | } | ||
1624 | |||
1625 | pmu_seq_init(pmu); | ||
1626 | |||
1627 | if (!g->pmu_fw) { | ||
1628 | g->pmu_fw = gk20a_request_firmware(g, GK20A_PMU_UCODE_IMAGE); | ||
1629 | if (!g->pmu_fw) { | ||
1630 | gk20a_err(d, "failed to load pmu ucode!!"); | ||
1631 | err = -ENOENT; | ||
1632 | goto err_free_seq; | ||
1633 | } | ||
1634 | } | ||
1635 | |||
1636 | gk20a_dbg_fn("firmware loaded"); | ||
1637 | |||
1638 | pmu->desc = (struct pmu_ucode_desc *)g->pmu_fw->data; | ||
1639 | pmu->ucode_image = (u32 *)((u8 *)pmu->desc + | ||
1640 | pmu->desc->descriptor_size); | ||
1641 | |||
1642 | |||
1643 | INIT_DELAYED_WORK(&pmu->elpg_enable, pmu_elpg_enable_allow); | ||
1644 | INIT_WORK(&pmu->pg_init, gk20a_init_pmu_setup_hw2_workqueue); | ||
1645 | |||
1646 | gk20a_init_pmu_vm(mm); | ||
1647 | |||
1648 | dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); | ||
1649 | pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX, | ||
1650 | &iova, | ||
1651 | GFP_KERNEL, | ||
1652 | &attrs); | ||
1653 | if (!pmu->ucode.cpuva) { | ||
1654 | gk20a_err(d, "failed to allocate memory\n"); | ||
1655 | err = -ENOMEM; | ||
1656 | goto err_release_fw; | ||
1657 | } | ||
1658 | |||
1659 | pmu->ucode.iova = iova; | ||
1660 | pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE, | ||
1661 | &iova, | ||
1662 | GFP_KERNEL); | ||
1663 | if (!pmu->seq_buf.cpuva) { | ||
1664 | gk20a_err(d, "failed to allocate memory\n"); | ||
1665 | err = -ENOMEM; | ||
1666 | goto err_free_pmu_ucode; | ||
1667 | } | ||
1668 | |||
1669 | pmu->seq_buf.iova = iova; | ||
1670 | init_waitqueue_head(&pmu->pg_wq); | ||
1671 | |||
1672 | err = gk20a_get_sgtable(d, &sgt_pmu_ucode, | ||
1673 | pmu->ucode.cpuva, | ||
1674 | pmu->ucode.iova, | ||
1675 | GK20A_PMU_UCODE_SIZE_MAX); | ||
1676 | if (err) { | ||
1677 | gk20a_err(d, "failed to allocate sg table\n"); | ||
1678 | goto err_free_seq_buf; | ||
1679 | } | ||
1680 | |||
1681 | pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode, | ||
1682 | GK20A_PMU_UCODE_SIZE_MAX, | ||
1683 | 0, /* flags */ | ||
1684 | gk20a_mem_flag_read_only); | ||
1685 | if (!pmu->ucode.pmu_va) { | ||
1686 | gk20a_err(d, "failed to map pmu ucode memory!!"); | ||
1687 | goto err_free_ucode_sgt; | ||
1688 | } | ||
1689 | |||
1690 | err = gk20a_get_sgtable(d, &sgt_seq_buf, | ||
1691 | pmu->seq_buf.cpuva, | ||
1692 | pmu->seq_buf.iova, | ||
1693 | GK20A_PMU_SEQ_BUF_SIZE); | ||
1694 | if (err) { | ||
1695 | gk20a_err(d, "failed to allocate sg table\n"); | ||
1696 | goto err_unmap_ucode; | ||
1697 | } | ||
1698 | |||
1699 | pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf, | ||
1700 | GK20A_PMU_SEQ_BUF_SIZE, | ||
1701 | 0, /* flags */ | ||
1702 | gk20a_mem_flag_none); | ||
1703 | if (!pmu->seq_buf.pmu_va) { | ||
1704 | gk20a_err(d, "failed to map pmu ucode memory!!"); | ||
1705 | goto err_free_seq_buf_sgt; | ||
1706 | } | ||
1707 | |||
1708 | ptr = (u8 *)pmu->seq_buf.cpuva; | ||
1709 | if (!ptr) { | ||
1710 | gk20a_err(d, "failed to map cpu ptr for zbc buffer"); | ||
1711 | goto err_unmap_seq_buf; | ||
1712 | } | ||
1713 | |||
1714 | /* TBD: remove this if ZBC save/restore is handled by PMU | ||
1715 | * end an empty ZBC sequence for now */ | ||
1716 | ptr[0] = 0x16; /* opcode EXIT */ | ||
1717 | ptr[1] = 0; ptr[2] = 1; ptr[3] = 0; | ||
1718 | ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0; | ||
1719 | |||
1720 | pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE; | ||
1721 | |||
1722 | ucode_ptr = pmu->ucode.cpuva; | ||
1723 | |||
1724 | for (i = 0; i < (pmu->desc->app_start_offset + | ||
1725 | pmu->desc->app_size) >> 2; i++) | ||
1726 | gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]); | ||
1727 | |||
1728 | gk20a_free_sgtable(&sgt_pmu_ucode); | ||
1729 | gk20a_free_sgtable(&sgt_seq_buf); | ||
1730 | |||
1731 | skip_init: | ||
1732 | mutex_init(&pmu->elpg_mutex); | ||
1733 | mutex_init(&pmu->isr_mutex); | ||
1734 | mutex_init(&pmu->pmu_copy_lock); | ||
1735 | mutex_init(&pmu->pmu_seq_lock); | ||
1736 | |||
1737 | pmu->perfmon_counter.index = 3; /* GR & CE2 */ | ||
1738 | pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE; | ||
1739 | |||
1740 | pmu->remove_support = gk20a_remove_pmu_support; | ||
1741 | err = gk20a_init_pmu(pmu); | ||
1742 | if (err) { | ||
1743 | gk20a_err(d, "failed to set function pointers\n"); | ||
1744 | return err; | ||
1745 | } | ||
1746 | |||
1747 | gk20a_dbg_fn("done"); | ||
1748 | return 0; | ||
1749 | |||
1750 | err_unmap_seq_buf: | ||
1751 | gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va, | ||
1752 | GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none); | ||
1753 | err_free_seq_buf_sgt: | ||
1754 | gk20a_free_sgtable(&sgt_seq_buf); | ||
1755 | err_unmap_ucode: | ||
1756 | gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va, | ||
1757 | GK20A_PMU_UCODE_SIZE_MAX, gk20a_mem_flag_none); | ||
1758 | err_free_ucode_sgt: | ||
1759 | gk20a_free_sgtable(&sgt_pmu_ucode); | ||
1760 | err_free_seq_buf: | ||
1761 | dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE, | ||
1762 | pmu->seq_buf.cpuva, pmu->seq_buf.iova); | ||
1763 | pmu->seq_buf.cpuva = NULL; | ||
1764 | pmu->seq_buf.iova = 0; | ||
1765 | err_free_pmu_ucode: | ||
1766 | dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX, | ||
1767 | pmu->ucode.cpuva, pmu->ucode.iova, &attrs); | ||
1768 | pmu->ucode.cpuva = NULL; | ||
1769 | pmu->ucode.iova = 0; | ||
1770 | err_release_fw: | ||
1771 | release_firmware(g->pmu_fw); | ||
1772 | err_free_seq: | ||
1773 | kfree(pmu->seq); | ||
1774 | err_free_mutex: | ||
1775 | kfree(pmu->mutex); | ||
1776 | err: | ||
1777 | gk20a_dbg_fn("fail"); | ||
1778 | return err; | ||
1779 | } | ||
1780 | |||
1781 | static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg, | ||
1782 | void *param, u32 handle, u32 status); | ||
1783 | |||
1784 | static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg, | ||
1785 | void *param, u32 handle, u32 status) | ||
1786 | { | ||
1787 | struct pmu_gk20a *pmu = param; | ||
1788 | struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = &msg->msg.pg.eng_buf_stat; | ||
1789 | |||
1790 | gk20a_dbg_fn(""); | ||
1791 | |||
1792 | if (status != 0) { | ||
1793 | gk20a_err(dev_from_gk20a(g), "PGENG cmd aborted"); | ||
1794 | /* TBD: disable ELPG */ | ||
1795 | return; | ||
1796 | } | ||
1797 | |||
1798 | if (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_FAILED) { | ||
1799 | gk20a_err(dev_from_gk20a(g), "failed to load PGENG buffer"); | ||
1800 | } | ||
1801 | |||
1802 | pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED); | ||
1803 | wake_up(&pmu->pg_wq); | ||
1804 | } | ||
1805 | |||
1806 | int gk20a_init_pmu_setup_hw1(struct gk20a *g) | ||
1807 | { | ||
1808 | struct pmu_gk20a *pmu = &g->pmu; | ||
1809 | int err; | ||
1810 | |||
1811 | gk20a_dbg_fn(""); | ||
1812 | |||
1813 | pmu_reset(pmu); | ||
1814 | |||
1815 | /* setup apertures - virtual */ | ||
1816 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE), | ||
1817 | pwr_fbif_transcfg_mem_type_virtual_f()); | ||
1818 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT), | ||
1819 | pwr_fbif_transcfg_mem_type_virtual_f()); | ||
1820 | /* setup apertures - physical */ | ||
1821 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID), | ||
1822 | pwr_fbif_transcfg_mem_type_physical_f() | | ||
1823 | pwr_fbif_transcfg_target_local_fb_f()); | ||
1824 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH), | ||
1825 | pwr_fbif_transcfg_mem_type_physical_f() | | ||
1826 | pwr_fbif_transcfg_target_coherent_sysmem_f()); | ||
1827 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH), | ||
1828 | pwr_fbif_transcfg_mem_type_physical_f() | | ||
1829 | pwr_fbif_transcfg_target_noncoherent_sysmem_f()); | ||
1830 | |||
1831 | /* TBD: load pmu ucode */ | ||
1832 | err = pmu_bootstrap(pmu); | ||
1833 | if (err) | ||
1834 | return err; | ||
1835 | |||
1836 | return 0; | ||
1837 | |||
1838 | } | ||
1839 | |||
1840 | static int gk20a_aelpg_init(struct gk20a *g); | ||
1841 | static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id); | ||
1842 | |||
1843 | |||
1844 | static void gk20a_init_pmu_setup_hw2_workqueue(struct work_struct *work) | ||
1845 | { | ||
1846 | struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init); | ||
1847 | struct gk20a *g = pmu->g; | ||
1848 | gk20a_init_pmu_setup_hw2(g); | ||
1849 | } | ||
1850 | |||
1851 | int gk20a_init_pmu_setup_hw2(struct gk20a *g) | ||
1852 | { | ||
1853 | struct pmu_gk20a *pmu = &g->pmu; | ||
1854 | struct mm_gk20a *mm = &g->mm; | ||
1855 | struct vm_gk20a *vm = &mm->pmu.vm; | ||
1856 | struct device *d = dev_from_gk20a(g); | ||
1857 | struct pmu_cmd cmd; | ||
1858 | u32 desc; | ||
1859 | long remain; | ||
1860 | int err; | ||
1861 | bool status; | ||
1862 | u32 size; | ||
1863 | struct sg_table *sgt_pg_buf; | ||
1864 | dma_addr_t iova; | ||
1865 | |||
1866 | gk20a_dbg_fn(""); | ||
1867 | |||
1868 | if (!support_gk20a_pmu()) | ||
1869 | return 0; | ||
1870 | |||
1871 | size = 0; | ||
1872 | err = gr_gk20a_fecs_get_reglist_img_size(g, &size); | ||
1873 | if (err) { | ||
1874 | gk20a_err(dev_from_gk20a(g), | ||
1875 | "fail to query fecs pg buffer size"); | ||
1876 | return err; | ||
1877 | } | ||
1878 | |||
1879 | if (!pmu->sw_ready) { | ||
1880 | pmu->pg_buf.cpuva = dma_alloc_coherent(d, size, | ||
1881 | &iova, | ||
1882 | GFP_KERNEL); | ||
1883 | if (!pmu->pg_buf.cpuva) { | ||
1884 | gk20a_err(d, "failed to allocate memory\n"); | ||
1885 | err = -ENOMEM; | ||
1886 | goto err; | ||
1887 | } | ||
1888 | |||
1889 | pmu->pg_buf.iova = iova; | ||
1890 | pmu->pg_buf.size = size; | ||
1891 | |||
1892 | err = gk20a_get_sgtable(d, &sgt_pg_buf, | ||
1893 | pmu->pg_buf.cpuva, | ||
1894 | pmu->pg_buf.iova, | ||
1895 | size); | ||
1896 | if (err) { | ||
1897 | gk20a_err(d, "failed to create sg table\n"); | ||
1898 | goto err_free_pg_buf; | ||
1899 | } | ||
1900 | |||
1901 | pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm, | ||
1902 | &sgt_pg_buf, | ||
1903 | size, | ||
1904 | 0, /* flags */ | ||
1905 | gk20a_mem_flag_none); | ||
1906 | if (!pmu->pg_buf.pmu_va) { | ||
1907 | gk20a_err(d, "failed to map fecs pg buffer"); | ||
1908 | err = -ENOMEM; | ||
1909 | goto err_free_sgtable; | ||
1910 | } | ||
1911 | |||
1912 | gk20a_free_sgtable(&sgt_pg_buf); | ||
1913 | } | ||
1914 | |||
1915 | /* | ||
1916 | * This is the actual point at which sw setup is complete, so set the | ||
1917 | * sw_ready flag here. | ||
1918 | */ | ||
1919 | pmu->sw_ready = true; | ||
1920 | |||
1921 | /* TBD: acquire pmu hw mutex */ | ||
1922 | |||
1923 | /* TBD: post reset again? */ | ||
1924 | |||
1925 | /* PMU_INIT message handler will send PG_INIT */ | ||
1926 | remain = wait_event_timeout( | ||
1927 | pmu->pg_wq, | ||
1928 | (status = (pmu->elpg_ready && | ||
1929 | pmu->stat_dmem_offset != 0 && | ||
1930 | pmu->elpg_stat == PMU_ELPG_STAT_OFF)), | ||
1931 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g))); | ||
1932 | if (status == 0) { | ||
1933 | gk20a_err(dev_from_gk20a(g), | ||
1934 | "PG_INIT_ACK failed, remaining timeout : 0x%lx", remain); | ||
1935 | pmu_dump_falcon_stats(pmu); | ||
1936 | return -EBUSY; | ||
1937 | } | ||
1938 | |||
1939 | err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa); | ||
1940 | if (err) { | ||
1941 | gk20a_err(dev_from_gk20a(g), | ||
1942 | "fail to bind pmu inst to gr"); | ||
1943 | return err; | ||
1944 | } | ||
1945 | |||
1946 | err = gr_gk20a_fecs_set_reglist_virual_addr(g, pmu->pg_buf.pmu_va); | ||
1947 | if (err) { | ||
1948 | gk20a_err(dev_from_gk20a(g), | ||
1949 | "fail to set pg buffer pmu va"); | ||
1950 | return err; | ||
1951 | } | ||
1952 | |||
1953 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
1954 | cmd.hdr.unit_id = PMU_UNIT_PG; | ||
1955 | cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load); | ||
1956 | cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD; | ||
1957 | cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A; | ||
1958 | cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS; | ||
1959 | cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size; | ||
1960 | cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8); | ||
1961 | cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF); | ||
1962 | cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT; | ||
1963 | |||
1964 | pmu->buf_loaded = false; | ||
1965 | gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, | ||
1966 | pmu_handle_pg_buf_config_msg, pmu, &desc, ~0); | ||
1967 | |||
1968 | remain = wait_event_timeout( | ||
1969 | pmu->pg_wq, | ||
1970 | pmu->buf_loaded, | ||
1971 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g))); | ||
1972 | if (!pmu->buf_loaded) { | ||
1973 | gk20a_err(dev_from_gk20a(g), | ||
1974 | "PGENG FECS buffer load failed, remaining timeout : 0x%lx", | ||
1975 | remain); | ||
1976 | return -EBUSY; | ||
1977 | } | ||
1978 | |||
1979 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
1980 | cmd.hdr.unit_id = PMU_UNIT_PG; | ||
1981 | cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load); | ||
1982 | cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD; | ||
1983 | cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A; | ||
1984 | cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC; | ||
1985 | cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size; | ||
1986 | cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8); | ||
1987 | cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF); | ||
1988 | cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT; | ||
1989 | |||
1990 | pmu->buf_loaded = false; | ||
1991 | gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, | ||
1992 | pmu_handle_pg_buf_config_msg, pmu, &desc, ~0); | ||
1993 | |||
1994 | remain = wait_event_timeout( | ||
1995 | pmu->pg_wq, | ||
1996 | pmu->buf_loaded, | ||
1997 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g))); | ||
1998 | if (!pmu->buf_loaded) { | ||
1999 | gk20a_err(dev_from_gk20a(g), | ||
2000 | "PGENG ZBC buffer load failed, remaining timeout 0x%lx", | ||
2001 | remain); | ||
2002 | return -EBUSY; | ||
2003 | } | ||
2004 | |||
2005 | /* | ||
2006 | * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to | ||
2007 | * 7. This prevents PMU stalling on Host register accesses. Once the | ||
2008 | * cause for this hang is discovered and fixed, this WAR should be | ||
2009 | * removed. | ||
2010 | */ | ||
2011 | gk20a_writel(g, 0x10a164, 0x109ff); | ||
2012 | |||
2013 | pmu->initialized = true; | ||
2014 | pmu->zbc_ready = true; | ||
2015 | |||
2016 | /* Save zbc table after PMU is initialized. */ | ||
2017 | pmu_save_zbc(g, 0xf); | ||
2018 | |||
2019 | /* | ||
2020 | * We can't guarantee that gr code to enable ELPG will be | ||
2021 | * invoked, so we explicitly call disable-enable here | ||
2022 | * to enable elpg. | ||
2023 | */ | ||
2024 | gk20a_pmu_disable_elpg(g); | ||
2025 | |||
2026 | if (g->elpg_enabled) | ||
2027 | gk20a_pmu_enable_elpg(g); | ||
2028 | |||
2029 | udelay(50); | ||
2030 | |||
2031 | /* Enable AELPG */ | ||
2032 | if (g->aelpg_enabled) { | ||
2033 | gk20a_aelpg_init(g); | ||
2034 | gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS); | ||
2035 | } | ||
2036 | |||
2037 | return 0; | ||
2038 | |||
2039 | err_free_sgtable: | ||
2040 | gk20a_free_sgtable(&sgt_pg_buf); | ||
2041 | err_free_pg_buf: | ||
2042 | dma_free_coherent(d, size, | ||
2043 | pmu->pg_buf.cpuva, pmu->pg_buf.iova); | ||
2044 | pmu->pg_buf.cpuva = NULL; | ||
2045 | pmu->pg_buf.iova = 0; | ||
2046 | err: | ||
2047 | return err; | ||
2048 | } | ||
2049 | |||
2050 | int gk20a_init_pmu_support(struct gk20a *g) | ||
2051 | { | ||
2052 | struct pmu_gk20a *pmu = &g->pmu; | ||
2053 | u32 err; | ||
2054 | |||
2055 | gk20a_dbg_fn(""); | ||
2056 | |||
2057 | if (pmu->initialized) | ||
2058 | return 0; | ||
2059 | |||
2060 | pmu->g = g; | ||
2061 | |||
2062 | err = gk20a_init_pmu_reset_enable_hw(g); | ||
2063 | if (err) | ||
2064 | return err; | ||
2065 | |||
2066 | if (support_gk20a_pmu()) { | ||
2067 | err = gk20a_init_pmu_setup_sw(g); | ||
2068 | if (err) | ||
2069 | return err; | ||
2070 | |||
2071 | err = gk20a_init_pmu_setup_hw1(g); | ||
2072 | if (err) | ||
2073 | return err; | ||
2074 | } | ||
2075 | |||
2076 | return err; | ||
2077 | } | ||
2078 | |||
2079 | static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg, | ||
2080 | void *param, u32 handle, u32 status) | ||
2081 | { | ||
2082 | struct pmu_gk20a *pmu = param; | ||
2083 | struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg; | ||
2084 | |||
2085 | gk20a_dbg_fn(""); | ||
2086 | |||
2087 | if (status != 0) { | ||
2088 | gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted"); | ||
2089 | /* TBD: disable ELPG */ | ||
2090 | return; | ||
2091 | } | ||
2092 | |||
2093 | switch (elpg_msg->msg) { | ||
2094 | case PMU_PG_ELPG_MSG_INIT_ACK: | ||
2095 | gk20a_dbg_pmu("INIT_PG is acknowledged from PMU"); | ||
2096 | pmu->elpg_ready = true; | ||
2097 | wake_up(&pmu->pg_wq); | ||
2098 | break; | ||
2099 | case PMU_PG_ELPG_MSG_ALLOW_ACK: | ||
2100 | gk20a_dbg_pmu("ALLOW is acknowledged from PMU"); | ||
2101 | pmu->elpg_stat = PMU_ELPG_STAT_ON; | ||
2102 | wake_up(&pmu->pg_wq); | ||
2103 | break; | ||
2104 | case PMU_PG_ELPG_MSG_DISALLOW_ACK: | ||
2105 | gk20a_dbg_pmu("DISALLOW is acknowledged from PMU"); | ||
2106 | pmu->elpg_stat = PMU_ELPG_STAT_OFF; | ||
2107 | wake_up(&pmu->pg_wq); | ||
2108 | break; | ||
2109 | default: | ||
2110 | gk20a_err(dev_from_gk20a(g), | ||
2111 | "unsupported ELPG message : 0x%04x", elpg_msg->msg); | ||
2112 | } | ||
2113 | |||
2114 | return; | ||
2115 | } | ||
2116 | |||
2117 | static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg, | ||
2118 | void *param, u32 handle, u32 status) | ||
2119 | { | ||
2120 | struct pmu_gk20a *pmu = param; | ||
2121 | |||
2122 | gk20a_dbg_fn(""); | ||
2123 | |||
2124 | if (status != 0) { | ||
2125 | gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted"); | ||
2126 | /* TBD: disable ELPG */ | ||
2127 | return; | ||
2128 | } | ||
2129 | |||
2130 | switch (msg->msg.pg.stat.sub_msg_id) { | ||
2131 | case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET: | ||
2132 | gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU"); | ||
2133 | pmu->stat_dmem_offset = msg->msg.pg.stat.data; | ||
2134 | wake_up(&pmu->pg_wq); | ||
2135 | break; | ||
2136 | default: | ||
2137 | break; | ||
2138 | } | ||
2139 | } | ||
2140 | |||
2141 | static int pmu_init_powergating(struct pmu_gk20a *pmu) | ||
2142 | { | ||
2143 | struct gk20a *g = pmu->g; | ||
2144 | struct pmu_cmd cmd; | ||
2145 | u32 seq; | ||
2146 | |||
2147 | gk20a_dbg_fn(""); | ||
2148 | |||
2149 | if (tegra_cpu_is_asim()) { | ||
2150 | /* TBD: calculate threshold for silicon */ | ||
2151 | gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A), | ||
2152 | PMU_PG_IDLE_THRESHOLD_SIM); | ||
2153 | gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A), | ||
2154 | PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM); | ||
2155 | } else { | ||
2156 | /* TBD: calculate threshold for silicon */ | ||
2157 | gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A), | ||
2158 | PMU_PG_IDLE_THRESHOLD); | ||
2159 | gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A), | ||
2160 | PMU_PG_POST_POWERUP_IDLE_THRESHOLD); | ||
2161 | } | ||
2162 | |||
2163 | /* init ELPG */ | ||
2164 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
2165 | cmd.hdr.unit_id = PMU_UNIT_PG; | ||
2166 | cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); | ||
2167 | cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; | ||
2168 | cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; | ||
2169 | cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT; | ||
2170 | |||
2171 | gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, | ||
2172 | pmu_handle_pg_elpg_msg, pmu, &seq, ~0); | ||
2173 | |||
2174 | /* alloc dmem for powergating state log */ | ||
2175 | pmu->stat_dmem_offset = 0; | ||
2176 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
2177 | cmd.hdr.unit_id = PMU_UNIT_PG; | ||
2178 | cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat); | ||
2179 | cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT; | ||
2180 | cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A; | ||
2181 | cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM; | ||
2182 | cmd.cmd.pg.stat.data = 0; | ||
2183 | |||
2184 | gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, | ||
2185 | pmu_handle_pg_stat_msg, pmu, &seq, ~0); | ||
2186 | |||
2187 | /* disallow ELPG initially | ||
2188 | PMU ucode requires a disallow cmd before allow cmd */ | ||
2189 | pmu->elpg_stat = PMU_ELPG_STAT_ON; /* set for wait_event PMU_ELPG_STAT_OFF */ | ||
2190 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
2191 | cmd.hdr.unit_id = PMU_UNIT_PG; | ||
2192 | cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); | ||
2193 | cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; | ||
2194 | cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; | ||
2195 | cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW; | ||
2196 | |||
2197 | gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, | ||
2198 | pmu_handle_pg_elpg_msg, pmu, &seq, ~0); | ||
2199 | |||
2200 | /* start with elpg disabled until first enable call */ | ||
2201 | pmu->elpg_refcnt = 1; | ||
2202 | |||
2203 | return 0; | ||
2204 | } | ||
2205 | |||
2206 | static int pmu_init_perfmon(struct pmu_gk20a *pmu) | ||
2207 | { | ||
2208 | struct gk20a *g = pmu->g; | ||
2209 | struct pmu_v *pv = &g->ops.pmu_ver; | ||
2210 | struct pmu_cmd cmd; | ||
2211 | struct pmu_payload payload; | ||
2212 | u32 seq; | ||
2213 | u32 data; | ||
2214 | int err; | ||
2215 | |||
2216 | gk20a_dbg_fn(""); | ||
2217 | |||
2218 | pmu->perfmon_ready = 0; | ||
2219 | |||
2220 | /* use counter #3 for GR && CE2 busy cycles */ | ||
2221 | gk20a_writel(g, pwr_pmu_idle_mask_r(3), | ||
2222 | pwr_pmu_idle_mask_gr_enabled_f() | | ||
2223 | pwr_pmu_idle_mask_ce_2_enabled_f()); | ||
2224 | |||
2225 | /* disable idle filtering for counters 3 and 6 */ | ||
2226 | data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3)); | ||
2227 | data = set_field(data, pwr_pmu_idle_ctrl_value_m() | | ||
2228 | pwr_pmu_idle_ctrl_filter_m(), | ||
2229 | pwr_pmu_idle_ctrl_value_busy_f() | | ||
2230 | pwr_pmu_idle_ctrl_filter_disabled_f()); | ||
2231 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data); | ||
2232 | |||
2233 | /* use counter #6 for total cycles */ | ||
2234 | data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6)); | ||
2235 | data = set_field(data, pwr_pmu_idle_ctrl_value_m() | | ||
2236 | pwr_pmu_idle_ctrl_filter_m(), | ||
2237 | pwr_pmu_idle_ctrl_value_always_f() | | ||
2238 | pwr_pmu_idle_ctrl_filter_disabled_f()); | ||
2239 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data); | ||
2240 | |||
2241 | /* | ||
2242 | * We don't want to disturb counters #3 and #6, which are used by | ||
2243 | * perfmon, so we add wiring also to counters #1 and #2 for | ||
2244 | * exposing raw counter readings. | ||
2245 | */ | ||
2246 | gk20a_writel(g, pwr_pmu_idle_mask_r(1), | ||
2247 | pwr_pmu_idle_mask_gr_enabled_f() | | ||
2248 | pwr_pmu_idle_mask_ce_2_enabled_f()); | ||
2249 | |||
2250 | data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1)); | ||
2251 | data = set_field(data, pwr_pmu_idle_ctrl_value_m() | | ||
2252 | pwr_pmu_idle_ctrl_filter_m(), | ||
2253 | pwr_pmu_idle_ctrl_value_busy_f() | | ||
2254 | pwr_pmu_idle_ctrl_filter_disabled_f()); | ||
2255 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data); | ||
2256 | |||
2257 | data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2)); | ||
2258 | data = set_field(data, pwr_pmu_idle_ctrl_value_m() | | ||
2259 | pwr_pmu_idle_ctrl_filter_m(), | ||
2260 | pwr_pmu_idle_ctrl_value_always_f() | | ||
2261 | pwr_pmu_idle_ctrl_filter_disabled_f()); | ||
2262 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); | ||
2263 | |||
2264 | pmu->sample_buffer = 0; | ||
2265 | err = pmu->dmem.alloc(&pmu->dmem, &pmu->sample_buffer, 2 * sizeof(u16)); | ||
2266 | if (err) { | ||
2267 | gk20a_err(dev_from_gk20a(g), | ||
2268 | "failed to allocate perfmon sample buffer"); | ||
2269 | return -ENOMEM; | ||
2270 | } | ||
2271 | |||
2272 | /* init PERFMON */ | ||
2273 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
2274 | cmd.hdr.unit_id = PMU_UNIT_PERFMON; | ||
2275 | cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size(); | ||
2276 | cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT; | ||
2277 | /* buffer to save counter values for pmu perfmon */ | ||
2278 | pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon, | ||
2279 | (u16)pmu->sample_buffer); | ||
2280 | /* number of sample periods below lower threshold | ||
2281 | before pmu triggers perfmon decrease event | ||
2282 | TBD: = 15 */ | ||
2283 | pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15); | ||
2284 | /* index of base counter, aka. always ticking counter */ | ||
2285 | pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6); | ||
2286 | /* microseconds interval between pmu polls perf counters */ | ||
2287 | pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700); | ||
2288 | /* number of perfmon counters | ||
2289 | counter #3 (GR and CE2) for gk20a */ | ||
2290 | pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1); | ||
2291 | /* moving average window for sample periods | ||
2292 | TBD: = 3000000 / sample_period_us = 17 */ | ||
2293 | pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17); | ||
2294 | |||
2295 | memset(&payload, 0, sizeof(struct pmu_payload)); | ||
2296 | payload.in.buf = &pmu->perfmon_counter; | ||
2297 | payload.in.size = sizeof(struct pmu_perfmon_counter); | ||
2298 | payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC); | ||
2299 | |||
2300 | gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ, | ||
2301 | NULL, NULL, &seq, ~0); | ||
2302 | |||
2303 | return 0; | ||
2304 | } | ||
2305 | |||
2306 | static int pmu_process_init_msg(struct pmu_gk20a *pmu, | ||
2307 | struct pmu_msg *msg) | ||
2308 | { | ||
2309 | struct gk20a *g = pmu->g; | ||
2310 | struct pmu_v *pv = &g->ops.pmu_ver; | ||
2311 | union pmu_init_msg_pmu *init; | ||
2312 | struct pmu_sha1_gid_data gid_data; | ||
2313 | u32 i, tail = 0; | ||
2314 | |||
2315 | tail = pwr_pmu_msgq_tail_val_v( | ||
2316 | gk20a_readl(g, pwr_pmu_msgq_tail_r())); | ||
2317 | |||
2318 | pmu_copy_from_dmem(pmu, tail, | ||
2319 | (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0); | ||
2320 | |||
2321 | if (msg->hdr.unit_id != PMU_UNIT_INIT) { | ||
2322 | gk20a_err(dev_from_gk20a(g), | ||
2323 | "expecting init msg"); | ||
2324 | return -EINVAL; | ||
2325 | } | ||
2326 | |||
2327 | pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE, | ||
2328 | (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0); | ||
2329 | |||
2330 | if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) { | ||
2331 | gk20a_err(dev_from_gk20a(g), | ||
2332 | "expecting init msg"); | ||
2333 | return -EINVAL; | ||
2334 | } | ||
2335 | |||
2336 | tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT); | ||
2337 | gk20a_writel(g, pwr_pmu_msgq_tail_r(), | ||
2338 | pwr_pmu_msgq_tail_val_f(tail)); | ||
2339 | |||
2340 | init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init)); | ||
2341 | if (!pmu->gid_info.valid) { | ||
2342 | |||
2343 | pmu_copy_from_dmem(pmu, | ||
2344 | pv->get_pmu_init_msg_pmu_sw_mg_off(init), | ||
2345 | (u8 *)&gid_data, | ||
2346 | sizeof(struct pmu_sha1_gid_data), 0); | ||
2347 | |||
2348 | pmu->gid_info.valid = | ||
2349 | (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE); | ||
2350 | |||
2351 | if (pmu->gid_info.valid) { | ||
2352 | |||
2353 | BUG_ON(sizeof(pmu->gid_info.gid) != | ||
2354 | sizeof(gid_data.gid)); | ||
2355 | |||
2356 | memcpy(pmu->gid_info.gid, gid_data.gid, | ||
2357 | sizeof(pmu->gid_info.gid)); | ||
2358 | } | ||
2359 | } | ||
2360 | |||
2361 | for (i = 0; i < PMU_QUEUE_COUNT; i++) | ||
2362 | pmu_queue_init(pmu, i, init); | ||
2363 | |||
2364 | gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem", | ||
2365 | pv->get_pmu_init_msg_pmu_sw_mg_off(init), | ||
2366 | pv->get_pmu_init_msg_pmu_sw_mg_size(init), | ||
2367 | PMU_DMEM_ALLOC_ALIGNMENT); | ||
2368 | |||
2369 | pmu->pmu_ready = true; | ||
2370 | |||
2371 | return 0; | ||
2372 | } | ||
2373 | |||
2374 | static bool pmu_read_message(struct pmu_gk20a *pmu, struct pmu_queue *queue, | ||
2375 | struct pmu_msg *msg, int *status) | ||
2376 | { | ||
2377 | struct gk20a *g = pmu->g; | ||
2378 | u32 read_size, bytes_read; | ||
2379 | int err; | ||
2380 | |||
2381 | *status = 0; | ||
2382 | |||
2383 | if (pmu_queue_is_empty(pmu, queue)) | ||
2384 | return false; | ||
2385 | |||
2386 | err = pmu_queue_open_read(pmu, queue); | ||
2387 | if (err) { | ||
2388 | gk20a_err(dev_from_gk20a(g), | ||
2389 | "fail to open queue %d for read", queue->id); | ||
2390 | *status = err; | ||
2391 | return false; | ||
2392 | } | ||
2393 | |||
2394 | err = pmu_queue_pop(pmu, queue, &msg->hdr, | ||
2395 | PMU_MSG_HDR_SIZE, &bytes_read); | ||
2396 | if (err || bytes_read != PMU_MSG_HDR_SIZE) { | ||
2397 | gk20a_err(dev_from_gk20a(g), | ||
2398 | "fail to read msg from queue %d", queue->id); | ||
2399 | *status = err | -EINVAL; | ||
2400 | goto clean_up; | ||
2401 | } | ||
2402 | |||
2403 | if (msg->hdr.unit_id == PMU_UNIT_REWIND) { | ||
2404 | pmu_queue_rewind(pmu, queue); | ||
2405 | /* read again after rewind */ | ||
2406 | err = pmu_queue_pop(pmu, queue, &msg->hdr, | ||
2407 | PMU_MSG_HDR_SIZE, &bytes_read); | ||
2408 | if (err || bytes_read != PMU_MSG_HDR_SIZE) { | ||
2409 | gk20a_err(dev_from_gk20a(g), | ||
2410 | "fail to read msg from queue %d", queue->id); | ||
2411 | *status = err | -EINVAL; | ||
2412 | goto clean_up; | ||
2413 | } | ||
2414 | } | ||
2415 | |||
2416 | if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) { | ||
2417 | gk20a_err(dev_from_gk20a(g), | ||
2418 | "read invalid unit_id %d from queue %d", | ||
2419 | msg->hdr.unit_id, queue->id); | ||
2420 | *status = -EINVAL; | ||
2421 | goto clean_up; | ||
2422 | } | ||
2423 | |||
2424 | if (msg->hdr.size > PMU_MSG_HDR_SIZE) { | ||
2425 | read_size = msg->hdr.size - PMU_MSG_HDR_SIZE; | ||
2426 | err = pmu_queue_pop(pmu, queue, &msg->msg, | ||
2427 | read_size, &bytes_read); | ||
2428 | if (err || bytes_read != read_size) { | ||
2429 | gk20a_err(dev_from_gk20a(g), | ||
2430 | "fail to read msg from queue %d", queue->id); | ||
2431 | *status = err; | ||
2432 | goto clean_up; | ||
2433 | } | ||
2434 | } | ||
2435 | |||
2436 | err = pmu_queue_close(pmu, queue, true); | ||
2437 | if (err) { | ||
2438 | gk20a_err(dev_from_gk20a(g), | ||
2439 | "fail to close queue %d", queue->id); | ||
2440 | *status = err; | ||
2441 | return false; | ||
2442 | } | ||
2443 | |||
2444 | return true; | ||
2445 | |||
2446 | clean_up: | ||
2447 | err = pmu_queue_close(pmu, queue, false); | ||
2448 | if (err) | ||
2449 | gk20a_err(dev_from_gk20a(g), | ||
2450 | "fail to close queue %d", queue->id); | ||
2451 | return false; | ||
2452 | } | ||
2453 | |||
2454 | static int pmu_response_handle(struct pmu_gk20a *pmu, | ||
2455 | struct pmu_msg *msg) | ||
2456 | { | ||
2457 | struct gk20a *g = pmu->g; | ||
2458 | struct pmu_sequence *seq; | ||
2459 | struct pmu_v *pv = &g->ops.pmu_ver; | ||
2460 | int ret = 0; | ||
2461 | |||
2462 | gk20a_dbg_fn(""); | ||
2463 | |||
2464 | seq = &pmu->seq[msg->hdr.seq_id]; | ||
2465 | if (seq->state != PMU_SEQ_STATE_USED && | ||
2466 | seq->state != PMU_SEQ_STATE_CANCELLED) { | ||
2467 | gk20a_err(dev_from_gk20a(g), | ||
2468 | "msg for an unknown sequence %d", seq->id); | ||
2469 | return -EINVAL; | ||
2470 | } | ||
2471 | |||
2472 | if (msg->hdr.unit_id == PMU_UNIT_RC && | ||
2473 | msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) { | ||
2474 | gk20a_err(dev_from_gk20a(g), | ||
2475 | "unhandled cmd: seq %d", seq->id); | ||
2476 | } | ||
2477 | else if (seq->state != PMU_SEQ_STATE_CANCELLED) { | ||
2478 | if (seq->msg) { | ||
2479 | if (seq->msg->hdr.size >= msg->hdr.size) { | ||
2480 | memcpy(seq->msg, msg, msg->hdr.size); | ||
2481 | if (pv->pmu_allocation_get_dmem_size(pmu, | ||
2482 | pv->get_pmu_seq_out_a_ptr(seq)) != 0) { | ||
2483 | pmu_copy_from_dmem(pmu, | ||
2484 | pv->pmu_allocation_get_dmem_offset(pmu, | ||
2485 | pv->get_pmu_seq_out_a_ptr(seq)), | ||
2486 | seq->out_payload, | ||
2487 | pv->pmu_allocation_get_dmem_size(pmu, | ||
2488 | pv->get_pmu_seq_out_a_ptr(seq)), 0); | ||
2489 | } | ||
2490 | } else { | ||
2491 | gk20a_err(dev_from_gk20a(g), | ||
2492 | "sequence %d msg buffer too small", | ||
2493 | seq->id); | ||
2494 | } | ||
2495 | } | ||
2496 | } else | ||
2497 | seq->callback = NULL; | ||
2498 | if (pv->pmu_allocation_get_dmem_size(pmu, | ||
2499 | pv->get_pmu_seq_in_a_ptr(seq)) != 0) | ||
2500 | pmu->dmem.free(&pmu->dmem, | ||
2501 | pv->pmu_allocation_get_dmem_offset(pmu, | ||
2502 | pv->get_pmu_seq_in_a_ptr(seq)), | ||
2503 | pv->pmu_allocation_get_dmem_size(pmu, | ||
2504 | pv->get_pmu_seq_in_a_ptr(seq))); | ||
2505 | if (pv->pmu_allocation_get_dmem_size(pmu, | ||
2506 | pv->get_pmu_seq_out_a_ptr(seq)) != 0) | ||
2507 | pmu->dmem.free(&pmu->dmem, | ||
2508 | pv->pmu_allocation_get_dmem_offset(pmu, | ||
2509 | pv->get_pmu_seq_out_a_ptr(seq)), | ||
2510 | pv->pmu_allocation_get_dmem_size(pmu, | ||
2511 | pv->get_pmu_seq_out_a_ptr(seq))); | ||
2512 | |||
2513 | if (seq->callback) | ||
2514 | seq->callback(g, msg, seq->cb_params, seq->desc, ret); | ||
2515 | |||
2516 | pmu_seq_release(pmu, seq); | ||
2517 | |||
2518 | /* TBD: notify client waiting for available dmem */ | ||
2519 | |||
2520 | gk20a_dbg_fn("done"); | ||
2521 | |||
2522 | return 0; | ||
2523 | } | ||
2524 | |||
2525 | static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout, | ||
2526 | u32 *var, u32 val); | ||
2527 | |||
2528 | static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg, | ||
2529 | void *param, u32 handle, u32 status) | ||
2530 | { | ||
2531 | struct pmu_gk20a *pmu = param; | ||
2532 | pmu->zbc_save_done = 1; | ||
2533 | } | ||
2534 | |||
2535 | static void pmu_save_zbc(struct gk20a *g, u32 entries) | ||
2536 | { | ||
2537 | struct pmu_gk20a *pmu = &g->pmu; | ||
2538 | struct pmu_cmd cmd; | ||
2539 | u32 seq; | ||
2540 | |||
2541 | if (!pmu->pmu_ready || !entries || !pmu->zbc_ready) | ||
2542 | return; | ||
2543 | |||
2544 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
2545 | cmd.hdr.unit_id = PMU_UNIT_PG; | ||
2546 | cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd); | ||
2547 | cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update; | ||
2548 | cmd.cmd.zbc.entry_mask = ZBC_MASK(entries); | ||
2549 | |||
2550 | pmu->zbc_save_done = 0; | ||
2551 | |||
2552 | gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, | ||
2553 | pmu_handle_zbc_msg, pmu, &seq, ~0); | ||
2554 | pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g), | ||
2555 | &pmu->zbc_save_done, 1); | ||
2556 | if (!pmu->zbc_save_done) | ||
2557 | gk20a_err(dev_from_gk20a(g), "ZBC save timeout"); | ||
2558 | } | ||
2559 | |||
2560 | void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) | ||
2561 | { | ||
2562 | if (g->pmu.zbc_ready) | ||
2563 | pmu_save_zbc(g, entries); | ||
2564 | } | ||
2565 | |||
2566 | static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu) | ||
2567 | { | ||
2568 | struct gk20a *g = pmu->g; | ||
2569 | struct pmu_v *pv = &g->ops.pmu_ver; | ||
2570 | struct pmu_cmd cmd; | ||
2571 | struct pmu_payload payload; | ||
2572 | u32 current_rate = 0; | ||
2573 | u32 seq; | ||
2574 | |||
2575 | /* PERFMON Start */ | ||
2576 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
2577 | cmd.hdr.unit_id = PMU_UNIT_PERFMON; | ||
2578 | cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size(); | ||
2579 | pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon, | ||
2580 | PMU_PERFMON_CMD_ID_START); | ||
2581 | pv->perfmon_start_set_group_id(&cmd.cmd.perfmon, | ||
2582 | PMU_DOMAIN_GROUP_PSTATE); | ||
2583 | pv->perfmon_start_set_state_id(&cmd.cmd.perfmon, | ||
2584 | pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]); | ||
2585 | |||
2586 | current_rate = rate_gpu_to_gpc2clk(gk20a_clk_get_rate(g)); | ||
2587 | if (current_rate >= gpc_pll_params.max_freq) | ||
2588 | pv->perfmon_start_set_flags(&cmd.cmd.perfmon, | ||
2589 | PMU_PERFMON_FLAG_ENABLE_DECREASE); | ||
2590 | else if (current_rate <= gpc_pll_params.min_freq) | ||
2591 | pv->perfmon_start_set_flags(&cmd.cmd.perfmon, | ||
2592 | PMU_PERFMON_FLAG_ENABLE_INCREASE); | ||
2593 | else | ||
2594 | pv->perfmon_start_set_flags(&cmd.cmd.perfmon, | ||
2595 | PMU_PERFMON_FLAG_ENABLE_INCREASE | | ||
2596 | PMU_PERFMON_FLAG_ENABLE_DECREASE); | ||
2597 | |||
2598 | pv->perfmon_start_set_flags(&cmd.cmd.perfmon, | ||
2599 | pv->perfmon_start_get_flags(&cmd.cmd.perfmon) | | ||
2600 | PMU_PERFMON_FLAG_CLEAR_PREV); | ||
2601 | |||
2602 | memset(&payload, 0, sizeof(struct pmu_payload)); | ||
2603 | |||
2604 | /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */ | ||
2605 | pmu->perfmon_counter.upper_threshold = 3000; /* 30% */ | ||
2606 | /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */ | ||
2607 | pmu->perfmon_counter.lower_threshold = 1000; /* 10% */ | ||
2608 | pmu->perfmon_counter.valid = true; | ||
2609 | |||
2610 | payload.in.buf = &pmu->perfmon_counter; | ||
2611 | payload.in.size = sizeof(pmu->perfmon_counter); | ||
2612 | payload.in.offset = | ||
2613 | pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC); | ||
2614 | |||
2615 | gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ, | ||
2616 | NULL, NULL, &seq, ~0); | ||
2617 | |||
2618 | return 0; | ||
2619 | } | ||
2620 | |||
2621 | static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu) | ||
2622 | { | ||
2623 | struct gk20a *g = pmu->g; | ||
2624 | struct pmu_cmd cmd; | ||
2625 | u32 seq; | ||
2626 | |||
2627 | /* PERFMON Stop */ | ||
2628 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
2629 | cmd.hdr.unit_id = PMU_UNIT_PERFMON; | ||
2630 | cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop); | ||
2631 | cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP; | ||
2632 | |||
2633 | gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, | ||
2634 | NULL, NULL, &seq, ~0); | ||
2635 | return 0; | ||
2636 | } | ||
2637 | |||
2638 | static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu, | ||
2639 | struct pmu_perfmon_msg *msg) | ||
2640 | { | ||
2641 | struct gk20a *g = pmu->g; | ||
2642 | u32 rate; | ||
2643 | |||
2644 | gk20a_dbg_fn(""); | ||
2645 | |||
2646 | switch (msg->msg_type) { | ||
2647 | case PMU_PERFMON_MSG_ID_INCREASE_EVENT: | ||
2648 | gk20a_dbg_pmu("perfmon increase event: " | ||
2649 | "state_id %d, ground_id %d, pct %d", | ||
2650 | msg->gen.state_id, msg->gen.group_id, msg->gen.data); | ||
2651 | /* increase gk20a clock freq by 20% */ | ||
2652 | rate = gk20a_clk_get_rate(g); | ||
2653 | gk20a_clk_set_rate(g, rate * 6 / 5); | ||
2654 | break; | ||
2655 | case PMU_PERFMON_MSG_ID_DECREASE_EVENT: | ||
2656 | gk20a_dbg_pmu("perfmon decrease event: " | ||
2657 | "state_id %d, ground_id %d, pct %d", | ||
2658 | msg->gen.state_id, msg->gen.group_id, msg->gen.data); | ||
2659 | /* decrease gk20a clock freq by 10% */ | ||
2660 | rate = gk20a_clk_get_rate(g); | ||
2661 | gk20a_clk_set_rate(g, (rate / 10) * 7); | ||
2662 | break; | ||
2663 | case PMU_PERFMON_MSG_ID_INIT_EVENT: | ||
2664 | pmu->perfmon_ready = 1; | ||
2665 | gk20a_dbg_pmu("perfmon init event"); | ||
2666 | break; | ||
2667 | default: | ||
2668 | break; | ||
2669 | } | ||
2670 | |||
2671 | /* restart sampling */ | ||
2672 | if (IS_ENABLED(CONFIG_GK20A_PERFMON)) | ||
2673 | return pmu_perfmon_start_sampling(pmu); | ||
2674 | return 0; | ||
2675 | } | ||
2676 | |||
2677 | |||
2678 | static int pmu_handle_event(struct pmu_gk20a *pmu, struct pmu_msg *msg) | ||
2679 | { | ||
2680 | int err; | ||
2681 | |||
2682 | gk20a_dbg_fn(""); | ||
2683 | |||
2684 | switch (msg->hdr.unit_id) { | ||
2685 | case PMU_UNIT_PERFMON: | ||
2686 | err = pmu_handle_perfmon_event(pmu, &msg->msg.perfmon); | ||
2687 | break; | ||
2688 | default: | ||
2689 | break; | ||
2690 | } | ||
2691 | |||
2692 | return err; | ||
2693 | } | ||
2694 | |||
2695 | static int pmu_process_message(struct pmu_gk20a *pmu) | ||
2696 | { | ||
2697 | struct pmu_msg msg; | ||
2698 | int status; | ||
2699 | |||
2700 | if (unlikely(!pmu->pmu_ready)) { | ||
2701 | pmu_process_init_msg(pmu, &msg); | ||
2702 | pmu_init_powergating(pmu); | ||
2703 | pmu_init_perfmon(pmu); | ||
2704 | return 0; | ||
2705 | } | ||
2706 | |||
2707 | while (pmu_read_message(pmu, | ||
2708 | &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) { | ||
2709 | |||
2710 | gk20a_dbg_pmu("read msg hdr: " | ||
2711 | "unit_id = 0x%08x, size = 0x%08x, " | ||
2712 | "ctrl_flags = 0x%08x, seq_id = 0x%08x", | ||
2713 | msg.hdr.unit_id, msg.hdr.size, | ||
2714 | msg.hdr.ctrl_flags, msg.hdr.seq_id); | ||
2715 | |||
2716 | msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK; | ||
2717 | |||
2718 | if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) { | ||
2719 | pmu_handle_event(pmu, &msg); | ||
2720 | } else { | ||
2721 | pmu_response_handle(pmu, &msg); | ||
2722 | } | ||
2723 | } | ||
2724 | |||
2725 | return 0; | ||
2726 | } | ||
2727 | |||
2728 | static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout, | ||
2729 | u32 *var, u32 val) | ||
2730 | { | ||
2731 | struct gk20a *g = pmu->g; | ||
2732 | unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); | ||
2733 | unsigned long delay = GR_IDLE_CHECK_DEFAULT; | ||
2734 | |||
2735 | do { | ||
2736 | if (*var == val) | ||
2737 | return 0; | ||
2738 | |||
2739 | if (gk20a_readl(g, pwr_falcon_irqstat_r())) | ||
2740 | gk20a_pmu_isr(g); | ||
2741 | |||
2742 | usleep_range(delay, delay * 2); | ||
2743 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
2744 | } while (time_before(jiffies, end_jiffies) || | ||
2745 | !tegra_platform_is_silicon()); | ||
2746 | |||
2747 | return -ETIMEDOUT; | ||
2748 | } | ||
2749 | |||
2750 | static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu) | ||
2751 | { | ||
2752 | struct gk20a *g = pmu->g; | ||
2753 | struct pmu_pg_stats stats; | ||
2754 | |||
2755 | pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset, | ||
2756 | (u8 *)&stats, sizeof(struct pmu_pg_stats), 0); | ||
2757 | |||
2758 | gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx", | ||
2759 | stats.pg_entry_start_timestamp); | ||
2760 | gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx", | ||
2761 | stats.pg_exit_start_timestamp); | ||
2762 | gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx", | ||
2763 | stats.pg_ingating_start_timestamp); | ||
2764 | gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx", | ||
2765 | stats.pg_ungating_start_timestamp); | ||
2766 | gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x", | ||
2767 | stats.pg_avg_entry_time_us); | ||
2768 | gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x", | ||
2769 | stats.pg_avg_exit_time_us); | ||
2770 | gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x", | ||
2771 | stats.pg_ingating_cnt); | ||
2772 | gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x", | ||
2773 | stats.pg_ingating_time_us); | ||
2774 | gk20a_dbg_pmu("pg_ungating_count : 0x%08x", | ||
2775 | stats.pg_ungating_count); | ||
2776 | gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ", | ||
2777 | stats.pg_ungating_time_us); | ||
2778 | gk20a_dbg_pmu("pg_gating_cnt : 0x%08x", | ||
2779 | stats.pg_gating_cnt); | ||
2780 | gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x", | ||
2781 | stats.pg_gating_deny_cnt); | ||
2782 | |||
2783 | /* | ||
2784 | Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset | ||
2785 | in .nm file, e.g. 0x1000066c. use 0x66c. | ||
2786 | u32 i, val[20]; | ||
2787 | pmu_copy_from_dmem(pmu, 0x66c, | ||
2788 | (u8 *)val, sizeof(val), 0); | ||
2789 | gk20a_dbg_pmu("elpg log begin"); | ||
2790 | for (i = 0; i < 20; i++) | ||
2791 | gk20a_dbg_pmu("0x%08x", val[i]); | ||
2792 | gk20a_dbg_pmu("elpg log end"); | ||
2793 | */ | ||
2794 | |||
2795 | gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x", | ||
2796 | gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3))); | ||
2797 | gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x", | ||
2798 | gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3))); | ||
2799 | gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x", | ||
2800 | gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3))); | ||
2801 | gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x", | ||
2802 | gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0))); | ||
2803 | gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x", | ||
2804 | gk20a_readl(g, pwr_pmu_pg_intren_r(0))); | ||
2805 | |||
2806 | gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x", | ||
2807 | gk20a_readl(g, pwr_pmu_idle_count_r(3))); | ||
2808 | gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x", | ||
2809 | gk20a_readl(g, pwr_pmu_idle_count_r(4))); | ||
2810 | gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x", | ||
2811 | gk20a_readl(g, pwr_pmu_idle_count_r(7))); | ||
2812 | |||
2813 | /* | ||
2814 | TBD: script can't generate those registers correctly | ||
2815 | gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x", | ||
2816 | gk20a_readl(g, pwr_pmu_idle_status_r())); | ||
2817 | gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x", | ||
2818 | gk20a_readl(g, pwr_pmu_pg_ctrl_r())); | ||
2819 | */ | ||
2820 | } | ||
2821 | |||
2822 | static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu) | ||
2823 | { | ||
2824 | struct gk20a *g = pmu->g; | ||
2825 | int i; | ||
2826 | |||
2827 | gk20a_err(dev_from_gk20a(g), "pwr_falcon_os_r : %d", | ||
2828 | gk20a_readl(g, pwr_falcon_os_r())); | ||
2829 | gk20a_err(dev_from_gk20a(g), "pwr_falcon_cpuctl_r : 0x%x", | ||
2830 | gk20a_readl(g, pwr_falcon_cpuctl_r())); | ||
2831 | gk20a_err(dev_from_gk20a(g), "pwr_falcon_idlestate_r : 0x%x", | ||
2832 | gk20a_readl(g, pwr_falcon_idlestate_r())); | ||
2833 | gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox0_r : 0x%x", | ||
2834 | gk20a_readl(g, pwr_falcon_mailbox0_r())); | ||
2835 | gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox1_r : 0x%x", | ||
2836 | gk20a_readl(g, pwr_falcon_mailbox1_r())); | ||
2837 | gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqstat_r : 0x%x", | ||
2838 | gk20a_readl(g, pwr_falcon_irqstat_r())); | ||
2839 | gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmode_r : 0x%x", | ||
2840 | gk20a_readl(g, pwr_falcon_irqmode_r())); | ||
2841 | gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmask_r : 0x%x", | ||
2842 | gk20a_readl(g, pwr_falcon_irqmask_r())); | ||
2843 | gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqdest_r : 0x%x", | ||
2844 | gk20a_readl(g, pwr_falcon_irqdest_r())); | ||
2845 | |||
2846 | for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++) | ||
2847 | gk20a_err(dev_from_gk20a(g), "pwr_pmu_mailbox_r(%d) : 0x%x", | ||
2848 | i, gk20a_readl(g, pwr_pmu_mailbox_r(i))); | ||
2849 | |||
2850 | for (i = 0; i < pwr_pmu_debug__size_1_v(); i++) | ||
2851 | gk20a_err(dev_from_gk20a(g), "pwr_pmu_debug_r(%d) : 0x%x", | ||
2852 | i, gk20a_readl(g, pwr_pmu_debug_r(i))); | ||
2853 | |||
2854 | for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) { | ||
2855 | gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), | ||
2856 | pwr_pmu_falcon_icd_cmd_opc_rstat_f() | | ||
2857 | pwr_pmu_falcon_icd_cmd_idx_f(i)); | ||
2858 | gk20a_err(dev_from_gk20a(g), "pmu_rstat (%d) : 0x%x", | ||
2859 | i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); | ||
2860 | } | ||
2861 | |||
2862 | i = gk20a_readl(g, pwr_pmu_bar0_error_status_r()); | ||
2863 | gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_error_status_r : 0x%x", i); | ||
2864 | if (i != 0) { | ||
2865 | gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_addr_r : 0x%x", | ||
2866 | gk20a_readl(g, pwr_pmu_bar0_addr_r())); | ||
2867 | gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_data_r : 0x%x", | ||
2868 | gk20a_readl(g, pwr_pmu_bar0_data_r())); | ||
2869 | gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_timeout_r : 0x%x", | ||
2870 | gk20a_readl(g, pwr_pmu_bar0_timeout_r())); | ||
2871 | gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_ctl_r : 0x%x", | ||
2872 | gk20a_readl(g, pwr_pmu_bar0_ctl_r())); | ||
2873 | } | ||
2874 | |||
2875 | i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r()); | ||
2876 | gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_fecs_error_r : 0x%x", i); | ||
2877 | |||
2878 | i = gk20a_readl(g, pwr_falcon_exterrstat_r()); | ||
2879 | gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterrstat_r : 0x%x", i); | ||
2880 | if (pwr_falcon_exterrstat_valid_v(i) == | ||
2881 | pwr_falcon_exterrstat_valid_true_v()) { | ||
2882 | gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterraddr_r : 0x%x", | ||
2883 | gk20a_readl(g, pwr_falcon_exterraddr_r())); | ||
2884 | gk20a_err(dev_from_gk20a(g), "top_fs_status_r : 0x%x", | ||
2885 | gk20a_readl(g, top_fs_status_r())); | ||
2886 | gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x", | ||
2887 | gk20a_readl(g, mc_enable_r())); | ||
2888 | } | ||
2889 | |||
2890 | gk20a_err(dev_from_gk20a(g), "pwr_falcon_engctl_r : 0x%x", | ||
2891 | gk20a_readl(g, pwr_falcon_engctl_r())); | ||
2892 | gk20a_err(dev_from_gk20a(g), "pwr_falcon_curctx_r : 0x%x", | ||
2893 | gk20a_readl(g, pwr_falcon_curctx_r())); | ||
2894 | gk20a_err(dev_from_gk20a(g), "pwr_falcon_nxtctx_r : 0x%x", | ||
2895 | gk20a_readl(g, pwr_falcon_nxtctx_r())); | ||
2896 | |||
2897 | gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), | ||
2898 | pwr_pmu_falcon_icd_cmd_opc_rreg_f() | | ||
2899 | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB)); | ||
2900 | gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_IMB : 0x%x", | ||
2901 | gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); | ||
2902 | |||
2903 | gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), | ||
2904 | pwr_pmu_falcon_icd_cmd_opc_rreg_f() | | ||
2905 | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB)); | ||
2906 | gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_DMB : 0x%x", | ||
2907 | gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); | ||
2908 | |||
2909 | gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), | ||
2910 | pwr_pmu_falcon_icd_cmd_opc_rreg_f() | | ||
2911 | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW)); | ||
2912 | gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CSW : 0x%x", | ||
2913 | gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); | ||
2914 | |||
2915 | gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), | ||
2916 | pwr_pmu_falcon_icd_cmd_opc_rreg_f() | | ||
2917 | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX)); | ||
2918 | gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CTX : 0x%x", | ||
2919 | gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); | ||
2920 | |||
2921 | gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), | ||
2922 | pwr_pmu_falcon_icd_cmd_opc_rreg_f() | | ||
2923 | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI)); | ||
2924 | gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_EXCI : 0x%x", | ||
2925 | gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); | ||
2926 | |||
2927 | for (i = 0; i < 4; i++) { | ||
2928 | gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), | ||
2929 | pwr_pmu_falcon_icd_cmd_opc_rreg_f() | | ||
2930 | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC)); | ||
2931 | gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_PC : 0x%x", | ||
2932 | gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); | ||
2933 | |||
2934 | gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), | ||
2935 | pwr_pmu_falcon_icd_cmd_opc_rreg_f() | | ||
2936 | pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP)); | ||
2937 | gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_SP : 0x%x", | ||
2938 | gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); | ||
2939 | } | ||
2940 | |||
2941 | /* PMU may crash due to FECS crash. Dump FECS status */ | ||
2942 | gk20a_fecs_dump_falcon_stats(g); | ||
2943 | } | ||
2944 | |||
2945 | void gk20a_pmu_isr(struct gk20a *g) | ||
2946 | { | ||
2947 | struct pmu_gk20a *pmu = &g->pmu; | ||
2948 | struct pmu_queue *queue; | ||
2949 | u32 intr, mask; | ||
2950 | bool recheck = false; | ||
2951 | |||
2952 | gk20a_dbg_fn(""); | ||
2953 | |||
2954 | mutex_lock(&pmu->isr_mutex); | ||
2955 | |||
2956 | mask = gk20a_readl(g, pwr_falcon_irqmask_r()) & | ||
2957 | gk20a_readl(g, pwr_falcon_irqdest_r()); | ||
2958 | |||
2959 | intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask; | ||
2960 | |||
2961 | gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr); | ||
2962 | |||
2963 | if (!intr) { | ||
2964 | mutex_unlock(&pmu->isr_mutex); | ||
2965 | return; | ||
2966 | } | ||
2967 | |||
2968 | if (intr & pwr_falcon_irqstat_halt_true_f()) { | ||
2969 | gk20a_err(dev_from_gk20a(g), | ||
2970 | "pmu halt intr not implemented"); | ||
2971 | pmu_dump_falcon_stats(pmu); | ||
2972 | } | ||
2973 | if (intr & pwr_falcon_irqstat_exterr_true_f()) { | ||
2974 | gk20a_err(dev_from_gk20a(g), | ||
2975 | "pmu exterr intr not implemented. Clearing interrupt."); | ||
2976 | pmu_dump_falcon_stats(pmu); | ||
2977 | |||
2978 | gk20a_writel(g, pwr_falcon_exterrstat_r(), | ||
2979 | gk20a_readl(g, pwr_falcon_exterrstat_r()) & | ||
2980 | ~pwr_falcon_exterrstat_valid_m()); | ||
2981 | } | ||
2982 | if (intr & pwr_falcon_irqstat_swgen0_true_f()) { | ||
2983 | pmu_process_message(pmu); | ||
2984 | recheck = true; | ||
2985 | } | ||
2986 | |||
2987 | gk20a_writel(g, pwr_falcon_irqsclr_r(), intr); | ||
2988 | |||
2989 | if (recheck) { | ||
2990 | queue = &pmu->queue[PMU_MESSAGE_QUEUE]; | ||
2991 | if (!pmu_queue_is_empty(pmu, queue)) | ||
2992 | gk20a_writel(g, pwr_falcon_irqsset_r(), | ||
2993 | pwr_falcon_irqsset_swgen0_set_f()); | ||
2994 | } | ||
2995 | |||
2996 | mutex_unlock(&pmu->isr_mutex); | ||
2997 | } | ||
2998 | |||
2999 | static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd, | ||
3000 | struct pmu_msg *msg, struct pmu_payload *payload, | ||
3001 | u32 queue_id) | ||
3002 | { | ||
3003 | struct gk20a *g = pmu->g; | ||
3004 | struct pmu_queue *queue; | ||
3005 | u32 in_size, out_size; | ||
3006 | |||
3007 | if (!PMU_IS_SW_COMMAND_QUEUE(queue_id)) | ||
3008 | goto invalid_cmd; | ||
3009 | |||
3010 | queue = &pmu->queue[queue_id]; | ||
3011 | if (cmd->hdr.size < PMU_CMD_HDR_SIZE) | ||
3012 | goto invalid_cmd; | ||
3013 | |||
3014 | if (cmd->hdr.size > (queue->size >> 1)) | ||
3015 | goto invalid_cmd; | ||
3016 | |||
3017 | if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE) | ||
3018 | goto invalid_cmd; | ||
3019 | |||
3020 | if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id)) | ||
3021 | goto invalid_cmd; | ||
3022 | |||
3023 | if (payload == NULL) | ||
3024 | return true; | ||
3025 | |||
3026 | if (payload->in.buf == NULL && payload->out.buf == NULL) | ||
3027 | goto invalid_cmd; | ||
3028 | |||
3029 | if ((payload->in.buf != NULL && payload->in.size == 0) || | ||
3030 | (payload->out.buf != NULL && payload->out.size == 0)) | ||
3031 | goto invalid_cmd; | ||
3032 | |||
3033 | in_size = PMU_CMD_HDR_SIZE; | ||
3034 | if (payload->in.buf) { | ||
3035 | in_size += payload->in.offset; | ||
3036 | in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu); | ||
3037 | } | ||
3038 | |||
3039 | out_size = PMU_CMD_HDR_SIZE; | ||
3040 | if (payload->out.buf) { | ||
3041 | out_size += payload->out.offset; | ||
3042 | out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu); | ||
3043 | } | ||
3044 | |||
3045 | if (in_size > cmd->hdr.size || out_size > cmd->hdr.size) | ||
3046 | goto invalid_cmd; | ||
3047 | |||
3048 | |||
3049 | if ((payload->in.offset != 0 && payload->in.buf == NULL) || | ||
3050 | (payload->out.offset != 0 && payload->out.buf == NULL)) | ||
3051 | goto invalid_cmd; | ||
3052 | |||
3053 | return true; | ||
3054 | |||
3055 | invalid_cmd: | ||
3056 | gk20a_err(dev_from_gk20a(g), "invalid pmu cmd :\n" | ||
3057 | "queue_id=%d,\n" | ||
3058 | "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n" | ||
3059 | "payload in=%p, in_size=%d, in_offset=%d,\n" | ||
3060 | "payload out=%p, out_size=%d, out_offset=%d", | ||
3061 | queue_id, cmd->hdr.size, cmd->hdr.unit_id, | ||
3062 | msg, msg?msg->hdr.unit_id:~0, | ||
3063 | &payload->in, payload->in.size, payload->in.offset, | ||
3064 | &payload->out, payload->out.size, payload->out.offset); | ||
3065 | |||
3066 | return false; | ||
3067 | } | ||
3068 | |||
3069 | static int pmu_write_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd, | ||
3070 | u32 queue_id, unsigned long timeout) | ||
3071 | { | ||
3072 | struct gk20a *g = pmu->g; | ||
3073 | struct pmu_queue *queue; | ||
3074 | unsigned long end_jiffies = jiffies + | ||
3075 | msecs_to_jiffies(timeout); | ||
3076 | int err; | ||
3077 | |||
3078 | gk20a_dbg_fn(""); | ||
3079 | |||
3080 | queue = &pmu->queue[queue_id]; | ||
3081 | |||
3082 | do { | ||
3083 | err = pmu_queue_open_write(pmu, queue, cmd->hdr.size); | ||
3084 | if (err == -EAGAIN && time_before(jiffies, end_jiffies)) | ||
3085 | usleep_range(1000, 2000); | ||
3086 | else | ||
3087 | break; | ||
3088 | } while (1); | ||
3089 | |||
3090 | if (err) | ||
3091 | goto clean_up; | ||
3092 | |||
3093 | pmu_queue_push(pmu, queue, cmd, cmd->hdr.size); | ||
3094 | |||
3095 | err = pmu_queue_close(pmu, queue, true); | ||
3096 | |||
3097 | clean_up: | ||
3098 | if (err) | ||
3099 | gk20a_err(dev_from_gk20a(g), | ||
3100 | "fail to write cmd to queue %d", queue_id); | ||
3101 | else | ||
3102 | gk20a_dbg_fn("done"); | ||
3103 | |||
3104 | return err; | ||
3105 | } | ||
3106 | |||
3107 | int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, | ||
3108 | struct pmu_msg *msg, struct pmu_payload *payload, | ||
3109 | u32 queue_id, pmu_callback callback, void* cb_param, | ||
3110 | u32 *seq_desc, unsigned long timeout) | ||
3111 | { | ||
3112 | struct pmu_gk20a *pmu = &g->pmu; | ||
3113 | struct pmu_v *pv = &g->ops.pmu_ver; | ||
3114 | struct pmu_sequence *seq; | ||
3115 | void *in = NULL, *out = NULL; | ||
3116 | int err; | ||
3117 | |||
3118 | gk20a_dbg_fn(""); | ||
3119 | |||
3120 | BUG_ON(!cmd); | ||
3121 | BUG_ON(!seq_desc); | ||
3122 | BUG_ON(!pmu->pmu_ready); | ||
3123 | |||
3124 | if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id)) | ||
3125 | return -EINVAL; | ||
3126 | |||
3127 | err = pmu_seq_acquire(pmu, &seq); | ||
3128 | if (err) | ||
3129 | return err; | ||
3130 | |||
3131 | cmd->hdr.seq_id = seq->id; | ||
3132 | |||
3133 | cmd->hdr.ctrl_flags = 0; | ||
3134 | cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS; | ||
3135 | cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR; | ||
3136 | |||
3137 | seq->callback = callback; | ||
3138 | seq->cb_params = cb_param; | ||
3139 | seq->msg = msg; | ||
3140 | seq->out_payload = NULL; | ||
3141 | seq->desc = pmu->next_seq_desc++; | ||
3142 | |||
3143 | if (payload) | ||
3144 | seq->out_payload = payload->out.buf; | ||
3145 | |||
3146 | *seq_desc = seq->desc; | ||
3147 | |||
3148 | if (payload && payload->in.offset != 0) { | ||
3149 | pv->set_pmu_allocation_ptr(pmu, &in, | ||
3150 | ((u8 *)&cmd->cmd + payload->in.offset)); | ||
3151 | |||
3152 | if (payload->in.buf != payload->out.buf) | ||
3153 | pv->pmu_allocation_set_dmem_size(pmu, in, | ||
3154 | (u16)payload->in.size); | ||
3155 | else | ||
3156 | pv->pmu_allocation_set_dmem_size(pmu, in, | ||
3157 | (u16)max(payload->in.size, payload->out.size)); | ||
3158 | |||
3159 | err = pmu->dmem.alloc(&pmu->dmem, | ||
3160 | pv->pmu_allocation_get_dmem_offset_addr(pmu, in), | ||
3161 | pv->pmu_allocation_get_dmem_size(pmu, in)); | ||
3162 | if (err) | ||
3163 | goto clean_up; | ||
3164 | |||
3165 | pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu, | ||
3166 | in)), | ||
3167 | payload->in.buf, payload->in.size, 0); | ||
3168 | pv->pmu_allocation_set_dmem_size(pmu, | ||
3169 | pv->get_pmu_seq_in_a_ptr(seq), | ||
3170 | pv->pmu_allocation_get_dmem_size(pmu, in)); | ||
3171 | pv->pmu_allocation_set_dmem_offset(pmu, | ||
3172 | pv->get_pmu_seq_in_a_ptr(seq), | ||
3173 | pv->pmu_allocation_get_dmem_offset(pmu, in)); | ||
3174 | } | ||
3175 | |||
3176 | if (payload && payload->out.offset != 0) { | ||
3177 | pv->set_pmu_allocation_ptr(pmu, &out, | ||
3178 | ((u8 *)&cmd->cmd + payload->out.offset)); | ||
3179 | pv->pmu_allocation_set_dmem_size(pmu, out, | ||
3180 | (u16)payload->out.size); | ||
3181 | |||
3182 | if (payload->out.buf != payload->in.buf) { | ||
3183 | err = pmu->dmem.alloc(&pmu->dmem, | ||
3184 | pv->pmu_allocation_get_dmem_offset_addr(pmu, out), | ||
3185 | pv->pmu_allocation_get_dmem_size(pmu, out)); | ||
3186 | if (err) | ||
3187 | goto clean_up; | ||
3188 | } else { | ||
3189 | BUG_ON(in == NULL); | ||
3190 | pv->pmu_allocation_set_dmem_offset(pmu, out, | ||
3191 | pv->pmu_allocation_get_dmem_offset(pmu, in)); | ||
3192 | } | ||
3193 | |||
3194 | pv->pmu_allocation_set_dmem_size(pmu, | ||
3195 | pv->get_pmu_seq_out_a_ptr(seq), | ||
3196 | pv->pmu_allocation_get_dmem_size(pmu, out)); | ||
3197 | pv->pmu_allocation_set_dmem_offset(pmu, | ||
3198 | pv->get_pmu_seq_out_a_ptr(seq), | ||
3199 | pv->pmu_allocation_get_dmem_offset(pmu, out)); | ||
3200 | } | ||
3201 | |||
3202 | seq->state = PMU_SEQ_STATE_USED; | ||
3203 | err = pmu_write_cmd(pmu, cmd, queue_id, timeout); | ||
3204 | if (err) | ||
3205 | seq->state = PMU_SEQ_STATE_PENDING; | ||
3206 | |||
3207 | gk20a_dbg_fn("done"); | ||
3208 | |||
3209 | return 0; | ||
3210 | |||
3211 | clean_up: | ||
3212 | gk20a_dbg_fn("fail"); | ||
3213 | if (in) | ||
3214 | pmu->dmem.free(&pmu->dmem, | ||
3215 | pv->pmu_allocation_get_dmem_offset(pmu, in), | ||
3216 | pv->pmu_allocation_get_dmem_size(pmu, in)); | ||
3217 | if (out) | ||
3218 | pmu->dmem.free(&pmu->dmem, | ||
3219 | pv->pmu_allocation_get_dmem_offset(pmu, out), | ||
3220 | pv->pmu_allocation_get_dmem_size(pmu, out)); | ||
3221 | |||
3222 | pmu_seq_release(pmu, seq); | ||
3223 | return err; | ||
3224 | } | ||
3225 | |||
3226 | static int gk20a_pmu_enable_elpg_locked(struct gk20a *g) | ||
3227 | { | ||
3228 | struct pmu_gk20a *pmu = &g->pmu; | ||
3229 | struct pmu_cmd cmd; | ||
3230 | u32 seq, status; | ||
3231 | |||
3232 | gk20a_dbg_fn(""); | ||
3233 | |||
3234 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
3235 | cmd.hdr.unit_id = PMU_UNIT_PG; | ||
3236 | cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); | ||
3237 | cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; | ||
3238 | cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; | ||
3239 | cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW; | ||
3240 | |||
3241 | /* no need to wait ack for ELPG enable but set pending to sync | ||
3242 | with follow up ELPG disable */ | ||
3243 | pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING; | ||
3244 | |||
3245 | status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, | ||
3246 | pmu_handle_pg_elpg_msg, pmu, &seq, ~0); | ||
3247 | |||
3248 | BUG_ON(status != 0); | ||
3249 | |||
3250 | gk20a_dbg_fn("done"); | ||
3251 | return 0; | ||
3252 | } | ||
3253 | |||
3254 | int gk20a_pmu_enable_elpg(struct gk20a *g) | ||
3255 | { | ||
3256 | struct pmu_gk20a *pmu = &g->pmu; | ||
3257 | struct gr_gk20a *gr = &g->gr; | ||
3258 | |||
3259 | int ret = 0; | ||
3260 | |||
3261 | gk20a_dbg_fn(""); | ||
3262 | |||
3263 | if (!pmu->elpg_ready || !pmu->initialized) | ||
3264 | goto exit; | ||
3265 | |||
3266 | mutex_lock(&pmu->elpg_mutex); | ||
3267 | |||
3268 | pmu->elpg_refcnt++; | ||
3269 | if (pmu->elpg_refcnt <= 0) | ||
3270 | goto exit_unlock; | ||
3271 | |||
3272 | /* something is not right if we end up in following code path */ | ||
3273 | if (unlikely(pmu->elpg_refcnt > 1)) { | ||
3274 | gk20a_warn(dev_from_gk20a(g), | ||
3275 | "%s(): possible elpg refcnt mismatch. elpg refcnt=%d", | ||
3276 | __func__, pmu->elpg_refcnt); | ||
3277 | WARN_ON(1); | ||
3278 | } | ||
3279 | |||
3280 | /* do NOT enable elpg until golden ctx is created, | ||
3281 | which is related with the ctx that ELPG save and restore. */ | ||
3282 | if (unlikely(!gr->ctx_vars.golden_image_initialized)) | ||
3283 | goto exit_unlock; | ||
3284 | |||
3285 | /* return if ELPG is already on or on_pending or off_on_pending */ | ||
3286 | if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) | ||
3287 | goto exit_unlock; | ||
3288 | |||
3289 | /* if ELPG is not allowed right now, mark that it should be enabled | ||
3290 | * immediately after it is allowed */ | ||
3291 | if (!pmu->elpg_enable_allow) { | ||
3292 | pmu->elpg_stat = PMU_ELPG_STAT_OFF_ON_PENDING; | ||
3293 | goto exit_unlock; | ||
3294 | } | ||
3295 | |||
3296 | ret = gk20a_pmu_enable_elpg_locked(g); | ||
3297 | |||
3298 | exit_unlock: | ||
3299 | mutex_unlock(&pmu->elpg_mutex); | ||
3300 | exit: | ||
3301 | gk20a_dbg_fn("done"); | ||
3302 | return ret; | ||
3303 | } | ||
3304 | |||
3305 | static void pmu_elpg_enable_allow(struct work_struct *work) | ||
3306 | { | ||
3307 | struct pmu_gk20a *pmu = container_of(to_delayed_work(work), | ||
3308 | struct pmu_gk20a, elpg_enable); | ||
3309 | |||
3310 | gk20a_dbg_fn(""); | ||
3311 | |||
3312 | mutex_lock(&pmu->elpg_mutex); | ||
3313 | |||
3314 | /* It is ok to enabled powergating now */ | ||
3315 | pmu->elpg_enable_allow = true; | ||
3316 | |||
3317 | /* do we have pending requests? */ | ||
3318 | if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) { | ||
3319 | pmu->elpg_stat = PMU_ELPG_STAT_OFF; | ||
3320 | gk20a_pmu_enable_elpg_locked(pmu->g); | ||
3321 | } | ||
3322 | |||
3323 | mutex_unlock(&pmu->elpg_mutex); | ||
3324 | |||
3325 | gk20a_dbg_fn("done"); | ||
3326 | } | ||
3327 | |||
3328 | static int gk20a_pmu_disable_elpg_defer_enable(struct gk20a *g, bool enable) | ||
3329 | { | ||
3330 | struct pmu_gk20a *pmu = &g->pmu; | ||
3331 | struct pmu_cmd cmd; | ||
3332 | u32 seq; | ||
3333 | int ret = 0; | ||
3334 | |||
3335 | gk20a_dbg_fn(""); | ||
3336 | |||
3337 | if (!pmu->elpg_ready || !pmu->initialized) | ||
3338 | return 0; | ||
3339 | |||
3340 | /* remove the work from queue */ | ||
3341 | cancel_delayed_work_sync(&pmu->elpg_enable); | ||
3342 | |||
3343 | mutex_lock(&pmu->elpg_mutex); | ||
3344 | |||
3345 | pmu->elpg_refcnt--; | ||
3346 | if (pmu->elpg_refcnt > 0) { | ||
3347 | gk20a_warn(dev_from_gk20a(g), | ||
3348 | "%s(): possible elpg refcnt mismatch. elpg refcnt=%d", | ||
3349 | __func__, pmu->elpg_refcnt); | ||
3350 | WARN_ON(1); | ||
3351 | ret = 0; | ||
3352 | goto exit_unlock; | ||
3353 | } | ||
3354 | |||
3355 | /* cancel off_on_pending and return */ | ||
3356 | if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) { | ||
3357 | pmu->elpg_stat = PMU_ELPG_STAT_OFF; | ||
3358 | ret = 0; | ||
3359 | goto exit_reschedule; | ||
3360 | } | ||
3361 | /* wait if on_pending */ | ||
3362 | else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) { | ||
3363 | |||
3364 | pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g), | ||
3365 | &pmu->elpg_stat, PMU_ELPG_STAT_ON); | ||
3366 | |||
3367 | if (pmu->elpg_stat != PMU_ELPG_STAT_ON) { | ||
3368 | gk20a_err(dev_from_gk20a(g), | ||
3369 | "ELPG_ALLOW_ACK failed, elpg_stat=%d", | ||
3370 | pmu->elpg_stat); | ||
3371 | pmu_dump_elpg_stats(pmu); | ||
3372 | pmu_dump_falcon_stats(pmu); | ||
3373 | ret = -EBUSY; | ||
3374 | goto exit_unlock; | ||
3375 | } | ||
3376 | } | ||
3377 | /* return if ELPG is already off */ | ||
3378 | else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) { | ||
3379 | ret = 0; | ||
3380 | goto exit_reschedule; | ||
3381 | } | ||
3382 | |||
3383 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
3384 | cmd.hdr.unit_id = PMU_UNIT_PG; | ||
3385 | cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); | ||
3386 | cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; | ||
3387 | cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; | ||
3388 | cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW; | ||
3389 | |||
3390 | pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING; | ||
3391 | |||
3392 | gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, | ||
3393 | pmu_handle_pg_elpg_msg, pmu, &seq, ~0); | ||
3394 | |||
3395 | pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g), | ||
3396 | &pmu->elpg_stat, PMU_ELPG_STAT_OFF); | ||
3397 | if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) { | ||
3398 | gk20a_err(dev_from_gk20a(g), | ||
3399 | "ELPG_DISALLOW_ACK failed"); | ||
3400 | pmu_dump_elpg_stats(pmu); | ||
3401 | pmu_dump_falcon_stats(pmu); | ||
3402 | ret = -EBUSY; | ||
3403 | goto exit_unlock; | ||
3404 | } | ||
3405 | |||
3406 | exit_reschedule: | ||
3407 | if (enable) { | ||
3408 | pmu->elpg_enable_allow = false; | ||
3409 | schedule_delayed_work(&pmu->elpg_enable, | ||
3410 | msecs_to_jiffies(PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC)); | ||
3411 | } else | ||
3412 | pmu->elpg_enable_allow = true; | ||
3413 | |||
3414 | |||
3415 | exit_unlock: | ||
3416 | mutex_unlock(&pmu->elpg_mutex); | ||
3417 | gk20a_dbg_fn("done"); | ||
3418 | return ret; | ||
3419 | } | ||
3420 | |||
3421 | int gk20a_pmu_disable_elpg(struct gk20a *g) | ||
3422 | { | ||
3423 | return gk20a_pmu_disable_elpg_defer_enable(g, true); | ||
3424 | } | ||
3425 | |||
3426 | int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable) | ||
3427 | { | ||
3428 | struct pmu_gk20a *pmu = &g->pmu; | ||
3429 | int err; | ||
3430 | |||
3431 | gk20a_dbg_fn(""); | ||
3432 | |||
3433 | if (enable) | ||
3434 | err = pmu_perfmon_start_sampling(pmu); | ||
3435 | else | ||
3436 | err = pmu_perfmon_stop_sampling(pmu); | ||
3437 | |||
3438 | return err; | ||
3439 | } | ||
3440 | |||
3441 | int gk20a_pmu_destroy(struct gk20a *g) | ||
3442 | { | ||
3443 | struct pmu_gk20a *pmu = &g->pmu; | ||
3444 | u32 elpg_ingating_time, elpg_ungating_time, gating_cnt; | ||
3445 | |||
3446 | gk20a_dbg_fn(""); | ||
3447 | |||
3448 | if (!support_gk20a_pmu()) | ||
3449 | return 0; | ||
3450 | |||
3451 | /* make sure the pending operations are finished before we continue */ | ||
3452 | cancel_delayed_work_sync(&pmu->elpg_enable); | ||
3453 | cancel_work_sync(&pmu->pg_init); | ||
3454 | |||
3455 | gk20a_pmu_get_elpg_residency_gating(g, &elpg_ingating_time, | ||
3456 | &elpg_ungating_time, &gating_cnt); | ||
3457 | |||
3458 | gk20a_pmu_disable_elpg_defer_enable(g, false); | ||
3459 | pmu->initialized = false; | ||
3460 | |||
3461 | /* update the s/w ELPG residency counters */ | ||
3462 | g->pg_ingating_time_us += (u64)elpg_ingating_time; | ||
3463 | g->pg_ungating_time_us += (u64)elpg_ungating_time; | ||
3464 | g->pg_gating_cnt += gating_cnt; | ||
3465 | |||
3466 | pmu_enable(pmu, false); | ||
3467 | |||
3468 | if (pmu->remove_support) { | ||
3469 | pmu->remove_support(pmu); | ||
3470 | pmu->remove_support = NULL; | ||
3471 | } | ||
3472 | |||
3473 | gk20a_dbg_fn("done"); | ||
3474 | return 0; | ||
3475 | } | ||
3476 | |||
3477 | int gk20a_pmu_load_norm(struct gk20a *g, u32 *load) | ||
3478 | { | ||
3479 | struct pmu_gk20a *pmu = &g->pmu; | ||
3480 | u16 _load = 0; | ||
3481 | |||
3482 | if (!pmu->perfmon_ready) { | ||
3483 | *load = 0; | ||
3484 | return 0; | ||
3485 | } | ||
3486 | |||
3487 | pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); | ||
3488 | *load = _load / 10; | ||
3489 | |||
3490 | return 0; | ||
3491 | } | ||
3492 | |||
3493 | void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, | ||
3494 | u32 *total_cycles) | ||
3495 | { | ||
3496 | if (!g->power_on) { | ||
3497 | *busy_cycles = 0; | ||
3498 | *total_cycles = 0; | ||
3499 | return; | ||
3500 | } | ||
3501 | |||
3502 | gk20a_busy(g->dev); | ||
3503 | *busy_cycles = pwr_pmu_idle_count_value_v( | ||
3504 | gk20a_readl(g, pwr_pmu_idle_count_r(1))); | ||
3505 | rmb(); | ||
3506 | *total_cycles = pwr_pmu_idle_count_value_v( | ||
3507 | gk20a_readl(g, pwr_pmu_idle_count_r(2))); | ||
3508 | gk20a_idle(g->dev); | ||
3509 | } | ||
3510 | |||
3511 | void gk20a_pmu_reset_load_counters(struct gk20a *g) | ||
3512 | { | ||
3513 | u32 reg_val = pwr_pmu_idle_count_reset_f(1); | ||
3514 | |||
3515 | if (!g->power_on) | ||
3516 | return; | ||
3517 | |||
3518 | gk20a_busy(g->dev); | ||
3519 | gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val); | ||
3520 | wmb(); | ||
3521 | gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val); | ||
3522 | gk20a_idle(g->dev); | ||
3523 | } | ||
3524 | |||
3525 | static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g, | ||
3526 | u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt) | ||
3527 | { | ||
3528 | struct pmu_gk20a *pmu = &g->pmu; | ||
3529 | struct pmu_pg_stats stats; | ||
3530 | |||
3531 | if (!pmu->initialized) { | ||
3532 | *ingating_time = 0; | ||
3533 | *ungating_time = 0; | ||
3534 | *gating_cnt = 0; | ||
3535 | return 0; | ||
3536 | } | ||
3537 | |||
3538 | pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset, | ||
3539 | (u8 *)&stats, sizeof(struct pmu_pg_stats), 0); | ||
3540 | |||
3541 | *ingating_time = stats.pg_ingating_time_us; | ||
3542 | *ungating_time = stats.pg_ungating_time_us; | ||
3543 | *gating_cnt = stats.pg_gating_cnt; | ||
3544 | |||
3545 | return 0; | ||
3546 | } | ||
3547 | |||
3548 | /* Send an Adaptive Power (AP) related command to PMU */ | ||
3549 | static int gk20a_pmu_ap_send_command(struct gk20a *g, | ||
3550 | union pmu_ap_cmd *p_ap_cmd, bool b_block) | ||
3551 | { | ||
3552 | struct pmu_gk20a *pmu = &g->pmu; | ||
3553 | /* FIXME: where is the PG structure defined?? */ | ||
3554 | u32 status = 0; | ||
3555 | struct pmu_cmd cmd; | ||
3556 | u32 seq; | ||
3557 | pmu_callback p_callback = NULL; | ||
3558 | |||
3559 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
3560 | |||
3561 | /* Copy common members */ | ||
3562 | cmd.hdr.unit_id = PMU_UNIT_PG; | ||
3563 | cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd); | ||
3564 | |||
3565 | cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP; | ||
3566 | cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id; | ||
3567 | |||
3568 | /* Copy other members of command */ | ||
3569 | switch (p_ap_cmd->cmn.cmd_id) { | ||
3570 | case PMU_AP_CMD_ID_INIT: | ||
3571 | cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us = | ||
3572 | p_ap_cmd->init.pg_sampling_period_us; | ||
3573 | p_callback = ap_callback_init_and_enable_ctrl; | ||
3574 | break; | ||
3575 | |||
3576 | case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL: | ||
3577 | cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id = | ||
3578 | p_ap_cmd->init_and_enable_ctrl.ctrl_id; | ||
3579 | memcpy( | ||
3580 | (void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params), | ||
3581 | (void *)&(p_ap_cmd->init_and_enable_ctrl.params), | ||
3582 | sizeof(struct pmu_ap_ctrl_init_params)); | ||
3583 | |||
3584 | p_callback = ap_callback_init_and_enable_ctrl; | ||
3585 | break; | ||
3586 | |||
3587 | case PMU_AP_CMD_ID_ENABLE_CTRL: | ||
3588 | cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id = | ||
3589 | p_ap_cmd->enable_ctrl.ctrl_id; | ||
3590 | break; | ||
3591 | |||
3592 | case PMU_AP_CMD_ID_DISABLE_CTRL: | ||
3593 | cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id = | ||
3594 | p_ap_cmd->disable_ctrl.ctrl_id; | ||
3595 | break; | ||
3596 | |||
3597 | case PMU_AP_CMD_ID_KICK_CTRL: | ||
3598 | cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id = | ||
3599 | p_ap_cmd->kick_ctrl.ctrl_id; | ||
3600 | cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count = | ||
3601 | p_ap_cmd->kick_ctrl.skip_count; | ||
3602 | break; | ||
3603 | |||
3604 | default: | ||
3605 | gk20a_dbg_pmu("%s: Invalid Adaptive Power command %d\n", | ||
3606 | __func__, p_ap_cmd->cmn.cmd_id); | ||
3607 | return 0x2f; | ||
3608 | } | ||
3609 | |||
3610 | status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, | ||
3611 | p_callback, pmu, &seq, ~0); | ||
3612 | |||
3613 | if (!status) { | ||
3614 | gk20a_dbg_pmu( | ||
3615 | "%s: Unable to submit Adaptive Power Command %d\n", | ||
3616 | __func__, p_ap_cmd->cmn.cmd_id); | ||
3617 | goto err_return; | ||
3618 | } | ||
3619 | |||
3620 | /* TODO: Implement blocking calls (b_block) */ | ||
3621 | |||
3622 | err_return: | ||
3623 | return status; | ||
3624 | } | ||
3625 | |||
3626 | static void ap_callback_init_and_enable_ctrl( | ||
3627 | struct gk20a *g, struct pmu_msg *msg, | ||
3628 | void *param, u32 seq_desc, u32 status) | ||
3629 | { | ||
3630 | /* Define p_ap (i.e pointer to pmu_ap structure) */ | ||
3631 | WARN_ON(!msg); | ||
3632 | |||
3633 | if (!status) { | ||
3634 | switch (msg->msg.pg.ap_msg.cmn.msg_id) { | ||
3635 | case PMU_AP_MSG_ID_INIT_ACK: | ||
3636 | break; | ||
3637 | |||
3638 | default: | ||
3639 | gk20a_dbg_pmu( | ||
3640 | "%s: Invalid Adaptive Power Message: %x\n", | ||
3641 | __func__, msg->msg.pg.ap_msg.cmn.msg_id); | ||
3642 | break; | ||
3643 | } | ||
3644 | } | ||
3645 | } | ||
3646 | |||
3647 | static int gk20a_aelpg_init(struct gk20a *g) | ||
3648 | { | ||
3649 | int status = 0; | ||
3650 | |||
3651 | /* Remove reliance on app_ctrl field. */ | ||
3652 | union pmu_ap_cmd ap_cmd; | ||
3653 | |||
3654 | /* TODO: Check for elpg being ready? */ | ||
3655 | ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT; | ||
3656 | ap_cmd.init.pg_sampling_period_us = | ||
3657 | APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US; | ||
3658 | |||
3659 | status = gk20a_pmu_ap_send_command(g, &ap_cmd, false); | ||
3660 | return status; | ||
3661 | } | ||
3662 | |||
3663 | static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id) | ||
3664 | { | ||
3665 | int status = 0; | ||
3666 | union pmu_ap_cmd ap_cmd; | ||
3667 | |||
3668 | /* TODO: Probably check if ELPG is ready? */ | ||
3669 | |||
3670 | ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL; | ||
3671 | ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id; | ||
3672 | ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us = | ||
3673 | APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US; | ||
3674 | ap_cmd.init_and_enable_ctrl.params.min_target_saving_us = | ||
3675 | APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US; | ||
3676 | ap_cmd.init_and_enable_ctrl.params.power_break_even_us = | ||
3677 | APCTRL_POWER_BREAKEVEN_DEFAULT_US; | ||
3678 | ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max = | ||
3679 | APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT; | ||
3680 | |||
3681 | switch (ctrl_id) { | ||
3682 | case PMU_AP_CTRL_ID_GRAPHICS: | ||
3683 | break; | ||
3684 | default: | ||
3685 | break; | ||
3686 | } | ||
3687 | |||
3688 | status = gk20a_pmu_ap_send_command(g, &ap_cmd, true); | ||
3689 | return status; | ||
3690 | } | ||
3691 | |||
3692 | #if CONFIG_DEBUG_FS | ||
3693 | static int elpg_residency_show(struct seq_file *s, void *data) | ||
3694 | { | ||
3695 | struct gk20a *g = s->private; | ||
3696 | u32 ingating_time = 0; | ||
3697 | u32 ungating_time = 0; | ||
3698 | u32 gating_cnt; | ||
3699 | u64 total_ingating, total_ungating, residency, divisor, dividend; | ||
3700 | |||
3701 | /* Don't unnecessarily power on the device */ | ||
3702 | if (g->power_on) { | ||
3703 | gk20a_busy(g->dev); | ||
3704 | gk20a_pmu_get_elpg_residency_gating(g, &ingating_time, | ||
3705 | &ungating_time, &gating_cnt); | ||
3706 | gk20a_idle(g->dev); | ||
3707 | } | ||
3708 | total_ingating = g->pg_ingating_time_us + (u64)ingating_time; | ||
3709 | total_ungating = g->pg_ungating_time_us + (u64)ungating_time; | ||
3710 | divisor = total_ingating + total_ungating; | ||
3711 | |||
3712 | /* We compute the residency on a scale of 1000 */ | ||
3713 | dividend = total_ingating * 1000; | ||
3714 | |||
3715 | if (divisor) | ||
3716 | residency = div64_u64(dividend, divisor); | ||
3717 | else | ||
3718 | residency = 0; | ||
3719 | |||
3720 | seq_printf(s, "Time in ELPG: %llu us\n" | ||
3721 | "Time out of ELPG: %llu us\n" | ||
3722 | "ELPG residency ratio: %llu\n", | ||
3723 | total_ingating, total_ungating, residency); | ||
3724 | return 0; | ||
3725 | |||
3726 | } | ||
3727 | |||
3728 | static int elpg_residency_open(struct inode *inode, struct file *file) | ||
3729 | { | ||
3730 | return single_open(file, elpg_residency_show, inode->i_private); | ||
3731 | } | ||
3732 | |||
3733 | static const struct file_operations elpg_residency_fops = { | ||
3734 | .open = elpg_residency_open, | ||
3735 | .read = seq_read, | ||
3736 | .llseek = seq_lseek, | ||
3737 | .release = single_release, | ||
3738 | }; | ||
3739 | |||
3740 | static int elpg_transitions_show(struct seq_file *s, void *data) | ||
3741 | { | ||
3742 | struct gk20a *g = s->private; | ||
3743 | u32 ingating_time, ungating_time, total_gating_cnt; | ||
3744 | u32 gating_cnt = 0; | ||
3745 | |||
3746 | if (g->power_on) { | ||
3747 | gk20a_busy(g->dev); | ||
3748 | gk20a_pmu_get_elpg_residency_gating(g, &ingating_time, | ||
3749 | &ungating_time, &gating_cnt); | ||
3750 | gk20a_idle(g->dev); | ||
3751 | } | ||
3752 | total_gating_cnt = g->pg_gating_cnt + gating_cnt; | ||
3753 | |||
3754 | seq_printf(s, "%u\n", total_gating_cnt); | ||
3755 | return 0; | ||
3756 | |||
3757 | } | ||
3758 | |||
3759 | static int elpg_transitions_open(struct inode *inode, struct file *file) | ||
3760 | { | ||
3761 | return single_open(file, elpg_transitions_show, inode->i_private); | ||
3762 | } | ||
3763 | |||
3764 | static const struct file_operations elpg_transitions_fops = { | ||
3765 | .open = elpg_transitions_open, | ||
3766 | .read = seq_read, | ||
3767 | .llseek = seq_lseek, | ||
3768 | .release = single_release, | ||
3769 | }; | ||
3770 | |||
3771 | int gk20a_pmu_debugfs_init(struct platform_device *dev) | ||
3772 | { | ||
3773 | struct dentry *d; | ||
3774 | struct gk20a_platform *platform = platform_get_drvdata(dev); | ||
3775 | struct gk20a *g = get_gk20a(dev); | ||
3776 | |||
3777 | d = debugfs_create_file( | ||
3778 | "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g, | ||
3779 | &elpg_residency_fops); | ||
3780 | if (!d) | ||
3781 | goto err_out; | ||
3782 | |||
3783 | d = debugfs_create_file( | ||
3784 | "elpg_transitions", S_IRUGO, platform->debugfs, g, | ||
3785 | &elpg_transitions_fops); | ||
3786 | if (!d) | ||
3787 | goto err_out; | ||
3788 | |||
3789 | return 0; | ||
3790 | |||
3791 | err_out: | ||
3792 | pr_err("%s: Failed to make debugfs node\n", __func__); | ||
3793 | debugfs_remove_recursive(platform->debugfs); | ||
3794 | return -ENOMEM; | ||
3795 | } | ||
3796 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h new file mode 100644 index 00000000..c1b8ff1f --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | |||
@@ -0,0 +1,1097 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/pmu_gk20a.h | ||
3 | * | ||
4 | * GK20A PMU (aka. gPMU outside gk20a context) | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | #ifndef __PMU_GK20A_H__ | ||
22 | #define __PMU_GK20A_H__ | ||
23 | |||
24 | /* defined by pmu hw spec */ | ||
25 | #define GK20A_PMU_VA_START ((128 * 1024) << 10) | ||
26 | #define GK20A_PMU_VA_SIZE (512 * 1024 * 1024) | ||
27 | #define GK20A_PMU_INST_SIZE (4 * 1024) | ||
28 | #define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024) | ||
29 | #define GK20A_PMU_SEQ_BUF_SIZE 4096 | ||
30 | |||
31 | #define ZBC_MASK(i) (~(~(0) << ((i)+1)) & 0xfffe) | ||
32 | |||
33 | /* PMU Command/Message Interfaces for Adaptive Power */ | ||
34 | /* Macro to get Histogram index */ | ||
35 | #define PMU_AP_HISTOGRAM(idx) (idx) | ||
36 | #define PMU_AP_HISTOGRAM_CONT (4) | ||
37 | |||
38 | /* Total number of histogram bins */ | ||
39 | #define PMU_AP_CFG_HISTOGRAM_BIN_N (16) | ||
40 | |||
41 | /* Mapping between Idle counters and histograms */ | ||
42 | #define PMU_AP_IDLE_MASK_HIST_IDX_0 (2) | ||
43 | #define PMU_AP_IDLE_MASK_HIST_IDX_1 (3) | ||
44 | #define PMU_AP_IDLE_MASK_HIST_IDX_2 (5) | ||
45 | #define PMU_AP_IDLE_MASK_HIST_IDX_3 (6) | ||
46 | |||
47 | |||
48 | /* Mapping between AP_CTRLs and Histograms */ | ||
49 | #define PMU_AP_HISTOGRAM_IDX_GRAPHICS (PMU_AP_HISTOGRAM(1)) | ||
50 | |||
51 | /* Mapping between AP_CTRLs and Idle counters */ | ||
52 | #define PMU_AP_IDLE_MASK_GRAPHICS (PMU_AP_IDLE_MASK_HIST_IDX_1) | ||
53 | |||
54 | #define APP_VERSION_1 17997577 | ||
55 | #define APP_VERSION_0 16856675 | ||
56 | |||
57 | |||
58 | enum pmu_perfmon_cmd_start_fields { | ||
59 | COUNTER_ALLOC | ||
60 | }; | ||
61 | |||
62 | /* Adaptive Power Controls (AP_CTRL) */ | ||
63 | enum { | ||
64 | PMU_AP_CTRL_ID_GRAPHICS = 0x0, | ||
65 | /* PMU_AP_CTRL_ID_MS ,*/ | ||
66 | PMU_AP_CTRL_ID_MAX , | ||
67 | }; | ||
68 | |||
69 | /* AP_CTRL Statistics */ | ||
70 | struct pmu_ap_ctrl_stat { | ||
71 | /* | ||
72 | * Represents whether AP is active or not | ||
73 | * TODO: This is NvBool in RM; is that 1 byte of 4 bytes? | ||
74 | */ | ||
75 | u8 b_active; | ||
76 | |||
77 | /* Idle filter represented by histogram bin index */ | ||
78 | u8 idle_filter_x; | ||
79 | u8 rsvd[2]; | ||
80 | |||
81 | /* Total predicted power saving cycles. */ | ||
82 | s32 power_saving_h_cycles; | ||
83 | |||
84 | /* Counts how many times AP gave us -ve power benefits. */ | ||
85 | u32 bad_decision_count; | ||
86 | |||
87 | /* | ||
88 | * Number of times ap structure needs to skip AP iterations | ||
89 | * KICK_CTRL from kernel updates this parameter. | ||
90 | */ | ||
91 | u32 skip_count; | ||
92 | u8 bin[PMU_AP_CFG_HISTOGRAM_BIN_N]; | ||
93 | }; | ||
94 | |||
95 | /* Parameters initialized by INITn APCTRL command */ | ||
96 | struct pmu_ap_ctrl_init_params { | ||
97 | /* Minimum idle filter value in Us */ | ||
98 | u32 min_idle_filter_us; | ||
99 | |||
100 | /* | ||
101 | * Minimum Targeted Saving in Us. AP will update idle thresholds only | ||
102 | * if power saving achieved by updating idle thresholds is greater than | ||
103 | * Minimum targeted saving. | ||
104 | */ | ||
105 | u32 min_target_saving_us; | ||
106 | |||
107 | /* Minimum targeted residency of power feature in Us */ | ||
108 | u32 power_break_even_us; | ||
109 | |||
110 | /* | ||
111 | * Maximum number of allowed power feature cycles per sample. | ||
112 | * | ||
113 | * We are allowing at max "pgPerSampleMax" cycles in one iteration of AP | ||
114 | * AKA pgPerSampleMax in original algorithm. | ||
115 | */ | ||
116 | u32 cycles_per_sample_max; | ||
117 | }; | ||
118 | |||
119 | /* AP Commands/Message structures */ | ||
120 | |||
121 | /* | ||
122 | * Structure for Generic AP Commands | ||
123 | */ | ||
124 | struct pmu_ap_cmd_common { | ||
125 | u8 cmd_type; | ||
126 | u16 cmd_id; | ||
127 | }; | ||
128 | |||
129 | /* | ||
130 | * Structure for INIT AP command | ||
131 | */ | ||
132 | struct pmu_ap_cmd_init { | ||
133 | u8 cmd_type; | ||
134 | u16 cmd_id; | ||
135 | u8 rsvd; | ||
136 | u32 pg_sampling_period_us; | ||
137 | }; | ||
138 | |||
139 | /* | ||
140 | * Structure for Enable/Disable ApCtrl Commands | ||
141 | */ | ||
142 | struct pmu_ap_cmd_enable_ctrl { | ||
143 | u8 cmd_type; | ||
144 | u16 cmd_id; | ||
145 | |||
146 | u8 ctrl_id; | ||
147 | }; | ||
148 | |||
149 | struct pmu_ap_cmd_disable_ctrl { | ||
150 | u8 cmd_type; | ||
151 | u16 cmd_id; | ||
152 | |||
153 | u8 ctrl_id; | ||
154 | }; | ||
155 | |||
156 | /* | ||
157 | * Structure for INIT command | ||
158 | */ | ||
159 | struct pmu_ap_cmd_init_ctrl { | ||
160 | u8 cmd_type; | ||
161 | u16 cmd_id; | ||
162 | u8 ctrl_id; | ||
163 | struct pmu_ap_ctrl_init_params params; | ||
164 | }; | ||
165 | |||
166 | struct pmu_ap_cmd_init_and_enable_ctrl { | ||
167 | u8 cmd_type; | ||
168 | u16 cmd_id; | ||
169 | u8 ctrl_id; | ||
170 | struct pmu_ap_ctrl_init_params params; | ||
171 | }; | ||
172 | |||
173 | /* | ||
174 | * Structure for KICK_CTRL command | ||
175 | */ | ||
176 | struct pmu_ap_cmd_kick_ctrl { | ||
177 | u8 cmd_type; | ||
178 | u16 cmd_id; | ||
179 | u8 ctrl_id; | ||
180 | |||
181 | u32 skip_count; | ||
182 | }; | ||
183 | |||
184 | /* | ||
185 | * Structure for PARAM command | ||
186 | */ | ||
187 | struct pmu_ap_cmd_param { | ||
188 | u8 cmd_type; | ||
189 | u16 cmd_id; | ||
190 | u8 ctrl_id; | ||
191 | |||
192 | u32 data; | ||
193 | }; | ||
194 | |||
195 | /* | ||
196 | * Defines for AP commands | ||
197 | */ | ||
198 | enum { | ||
199 | PMU_AP_CMD_ID_INIT = 0x0 , | ||
200 | PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL, | ||
201 | PMU_AP_CMD_ID_ENABLE_CTRL , | ||
202 | PMU_AP_CMD_ID_DISABLE_CTRL , | ||
203 | PMU_AP_CMD_ID_KICK_CTRL , | ||
204 | }; | ||
205 | |||
206 | /* | ||
207 | * AP Command | ||
208 | */ | ||
209 | union pmu_ap_cmd { | ||
210 | u8 cmd_type; | ||
211 | struct pmu_ap_cmd_common cmn; | ||
212 | struct pmu_ap_cmd_init init; | ||
213 | struct pmu_ap_cmd_init_and_enable_ctrl init_and_enable_ctrl; | ||
214 | struct pmu_ap_cmd_enable_ctrl enable_ctrl; | ||
215 | struct pmu_ap_cmd_disable_ctrl disable_ctrl; | ||
216 | struct pmu_ap_cmd_kick_ctrl kick_ctrl; | ||
217 | }; | ||
218 | |||
219 | /* | ||
220 | * Structure for generic AP Message | ||
221 | */ | ||
222 | struct pmu_ap_msg_common { | ||
223 | u8 msg_type; | ||
224 | u16 msg_id; | ||
225 | }; | ||
226 | |||
227 | /* | ||
228 | * Structure for INIT_ACK Message | ||
229 | */ | ||
230 | struct pmu_ap_msg_init_ack { | ||
231 | u8 msg_type; | ||
232 | u16 msg_id; | ||
233 | u8 ctrl_id; | ||
234 | u32 stats_dmem_offset; | ||
235 | }; | ||
236 | |||
237 | /* | ||
238 | * Defines for AP messages | ||
239 | */ | ||
240 | enum { | ||
241 | PMU_AP_MSG_ID_INIT_ACK = 0x0, | ||
242 | }; | ||
243 | |||
244 | /* | ||
245 | * AP Message | ||
246 | */ | ||
247 | union pmu_ap_msg { | ||
248 | u8 msg_type; | ||
249 | struct pmu_ap_msg_common cmn; | ||
250 | struct pmu_ap_msg_init_ack init_ack; | ||
251 | }; | ||
252 | |||
253 | /* Default Sampling Period of AELPG */ | ||
254 | #define APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US (1000000) | ||
255 | |||
256 | /* Default values of APCTRL parameters */ | ||
257 | #define APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US (100) | ||
258 | #define APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US (10000) | ||
259 | #define APCTRL_POWER_BREAKEVEN_DEFAULT_US (2000) | ||
260 | #define APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT (100) | ||
261 | |||
262 | /* | ||
263 | * Disable reason for Adaptive Power Controller | ||
264 | */ | ||
265 | enum { | ||
266 | APCTRL_DISABLE_REASON_RM_UNLOAD, | ||
267 | APCTRL_DISABLE_REASON_RMCTRL, | ||
268 | }; | ||
269 | |||
270 | /* | ||
271 | * Adaptive Power Controller | ||
272 | */ | ||
273 | struct ap_ctrl { | ||
274 | u32 stats_dmem_offset; | ||
275 | u32 disable_reason_mask; | ||
276 | struct pmu_ap_ctrl_stat stat_cache; | ||
277 | u8 b_ready; | ||
278 | }; | ||
279 | |||
280 | /* | ||
281 | * Adaptive Power structure | ||
282 | * | ||
283 | * ap structure provides generic infrastructure to make any power feature | ||
284 | * adaptive. | ||
285 | */ | ||
286 | struct pmu_ap { | ||
287 | u32 supported_mask; | ||
288 | struct ap_ctrl ap_ctrl[PMU_AP_CTRL_ID_MAX]; | ||
289 | }; | ||
290 | |||
291 | |||
292 | enum { | ||
293 | GK20A_PMU_DMAIDX_UCODE = 0, | ||
294 | GK20A_PMU_DMAIDX_VIRT = 1, | ||
295 | GK20A_PMU_DMAIDX_PHYS_VID = 2, | ||
296 | GK20A_PMU_DMAIDX_PHYS_SYS_COH = 3, | ||
297 | GK20A_PMU_DMAIDX_PHYS_SYS_NCOH = 4, | ||
298 | GK20A_PMU_DMAIDX_RSVD = 5, | ||
299 | GK20A_PMU_DMAIDX_PELPG = 6, | ||
300 | GK20A_PMU_DMAIDX_END = 7 | ||
301 | }; | ||
302 | |||
303 | struct pmu_mem_v0 { | ||
304 | u32 dma_base; | ||
305 | u8 dma_offset; | ||
306 | u8 dma_idx; | ||
307 | }; | ||
308 | |||
309 | struct pmu_mem_v1 { | ||
310 | u32 dma_base; | ||
311 | u8 dma_offset; | ||
312 | u8 dma_idx; | ||
313 | u16 fb_size; | ||
314 | }; | ||
315 | |||
316 | struct pmu_dmem { | ||
317 | u16 size; | ||
318 | u32 offset; | ||
319 | }; | ||
320 | |||
321 | /* Make sure size of this structure is a multiple of 4 bytes */ | ||
322 | struct pmu_cmdline_args_v0 { | ||
323 | u32 cpu_freq_hz; /* Frequency of the clock driving PMU */ | ||
324 | u32 falc_trace_size; /* falctrace buffer size (bytes) */ | ||
325 | u32 falc_trace_dma_base; /* 256-byte block address */ | ||
326 | u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */ | ||
327 | struct pmu_mem_v0 gc6_ctx; /* dmem offset of gc6 context */ | ||
328 | }; | ||
329 | |||
330 | struct pmu_cmdline_args_v1 { | ||
331 | u32 cpu_freq_hz; /* Frequency of the clock driving PMU */ | ||
332 | u32 falc_trace_size; /* falctrace buffer size (bytes) */ | ||
333 | u32 falc_trace_dma_base; /* 256-byte block address */ | ||
334 | u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */ | ||
335 | u8 secure_mode; | ||
336 | struct pmu_mem_v1 gc6_ctx; /* dmem offset of gc6 context */ | ||
337 | }; | ||
338 | |||
339 | #define GK20A_PMU_DMEM_BLKSIZE2 8 | ||
340 | |||
341 | #define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32 | ||
342 | #define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64 | ||
343 | |||
344 | struct pmu_ucode_desc { | ||
345 | u32 descriptor_size; | ||
346 | u32 image_size; | ||
347 | u32 tools_version; | ||
348 | u32 app_version; | ||
349 | char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH]; | ||
350 | u32 bootloader_start_offset; | ||
351 | u32 bootloader_size; | ||
352 | u32 bootloader_imem_offset; | ||
353 | u32 bootloader_entry_point; | ||
354 | u32 app_start_offset; | ||
355 | u32 app_size; | ||
356 | u32 app_imem_offset; | ||
357 | u32 app_imem_entry; | ||
358 | u32 app_dmem_offset; | ||
359 | u32 app_resident_code_offset; /* Offset from appStartOffset */ | ||
360 | u32 app_resident_code_size; /* Exact size of the resident code ( potentially contains CRC inside at the end ) */ | ||
361 | u32 app_resident_data_offset; /* Offset from appStartOffset */ | ||
362 | u32 app_resident_data_size; /* Exact size of the resident code ( potentially contains CRC inside at the end ) */ | ||
363 | u32 nb_overlays; | ||
364 | struct {u32 start; u32 size;} load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY]; | ||
365 | u32 compressed; | ||
366 | }; | ||
367 | |||
368 | #define PMU_UNIT_REWIND (0x00) | ||
369 | #define PMU_UNIT_I2C (0x01) | ||
370 | #define PMU_UNIT_SEQ (0x02) | ||
371 | #define PMU_UNIT_PG (0x03) | ||
372 | #define PMU_UNIT_AVAILABLE1 (0x04) | ||
373 | #define PMU_UNIT_AVAILABLE2 (0x05) | ||
374 | #define PMU_UNIT_MEM (0x06) | ||
375 | #define PMU_UNIT_INIT (0x07) | ||
376 | #define PMU_UNIT_FBBA (0x08) | ||
377 | #define PMU_UNIT_DIDLE (0x09) | ||
378 | #define PMU_UNIT_AVAILABLE3 (0x0A) | ||
379 | #define PMU_UNIT_AVAILABLE4 (0x0B) | ||
380 | #define PMU_UNIT_HDCP_MAIN (0x0C) | ||
381 | #define PMU_UNIT_HDCP_V (0x0D) | ||
382 | #define PMU_UNIT_HDCP_SRM (0x0E) | ||
383 | #define PMU_UNIT_NVDPS (0x0F) | ||
384 | #define PMU_UNIT_DEINIT (0x10) | ||
385 | #define PMU_UNIT_AVAILABLE5 (0x11) | ||
386 | #define PMU_UNIT_PERFMON (0x12) | ||
387 | #define PMU_UNIT_FAN (0x13) | ||
388 | #define PMU_UNIT_PBI (0x14) | ||
389 | #define PMU_UNIT_ISOBLIT (0x15) | ||
390 | #define PMU_UNIT_DETACH (0x16) | ||
391 | #define PMU_UNIT_DISP (0x17) | ||
392 | #define PMU_UNIT_HDCP (0x18) | ||
393 | #define PMU_UNIT_REGCACHE (0x19) | ||
394 | #define PMU_UNIT_SYSMON (0x1A) | ||
395 | #define PMU_UNIT_THERM (0x1B) | ||
396 | #define PMU_UNIT_PMGR (0x1C) | ||
397 | #define PMU_UNIT_PERF (0x1D) | ||
398 | #define PMU_UNIT_PCM (0x1E) | ||
399 | #define PMU_UNIT_RC (0x1F) | ||
400 | #define PMU_UNIT_NULL (0x20) | ||
401 | #define PMU_UNIT_LOGGER (0x21) | ||
402 | #define PMU_UNIT_SMBPBI (0x22) | ||
403 | #define PMU_UNIT_END (0x23) | ||
404 | |||
405 | #define PMU_UNIT_TEST_START (0xFE) | ||
406 | #define PMU_UNIT_END_SIM (0xFF) | ||
407 | #define PMU_UNIT_TEST_END (0xFF) | ||
408 | |||
409 | #define PMU_UNIT_ID_IS_VALID(id) \ | ||
410 | (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START)) | ||
411 | |||
412 | #define PMU_DMEM_ALLOC_ALIGNMENT (32) | ||
413 | #define PMU_DMEM_ALIGNMENT (4) | ||
414 | |||
415 | #define PMU_CMD_FLAGS_PMU_MASK (0xF0) | ||
416 | |||
417 | #define PMU_CMD_FLAGS_STATUS BIT(0) | ||
418 | #define PMU_CMD_FLAGS_INTR BIT(1) | ||
419 | #define PMU_CMD_FLAGS_EVENT BIT(2) | ||
420 | #define PMU_CMD_FLAGS_WATERMARK BIT(3) | ||
421 | |||
422 | struct pmu_hdr { | ||
423 | u8 unit_id; | ||
424 | u8 size; | ||
425 | u8 ctrl_flags; | ||
426 | u8 seq_id; | ||
427 | }; | ||
428 | #define PMU_MSG_HDR_SIZE sizeof(struct pmu_hdr) | ||
429 | #define PMU_CMD_HDR_SIZE sizeof(struct pmu_hdr) | ||
430 | |||
431 | #define PMU_QUEUE_COUNT 5 | ||
432 | |||
433 | struct pmu_allocation_v0 { | ||
434 | u8 pad[3]; | ||
435 | u8 fb_mem_use; | ||
436 | struct { | ||
437 | struct pmu_dmem dmem; | ||
438 | struct pmu_mem_v0 fb; | ||
439 | } alloc; | ||
440 | }; | ||
441 | |||
442 | struct pmu_allocation_v1 { | ||
443 | struct { | ||
444 | struct pmu_dmem dmem; | ||
445 | struct pmu_mem_v1 fb; | ||
446 | } alloc; | ||
447 | }; | ||
448 | |||
449 | enum { | ||
450 | PMU_INIT_MSG_TYPE_PMU_INIT = 0, | ||
451 | }; | ||
452 | |||
453 | struct pmu_init_msg_pmu_v0 { | ||
454 | u8 msg_type; | ||
455 | u8 pad; | ||
456 | |||
457 | struct { | ||
458 | u16 size; | ||
459 | u16 offset; | ||
460 | u8 index; | ||
461 | u8 pad; | ||
462 | } queue_info[PMU_QUEUE_COUNT]; | ||
463 | |||
464 | u16 sw_managed_area_offset; | ||
465 | u16 sw_managed_area_size; | ||
466 | }; | ||
467 | |||
468 | struct pmu_init_msg_pmu_v1 { | ||
469 | u8 msg_type; | ||
470 | u8 pad; | ||
471 | u16 os_debug_entry_point; | ||
472 | |||
473 | struct { | ||
474 | u16 size; | ||
475 | u16 offset; | ||
476 | u8 index; | ||
477 | u8 pad; | ||
478 | } queue_info[PMU_QUEUE_COUNT]; | ||
479 | |||
480 | u16 sw_managed_area_offset; | ||
481 | u16 sw_managed_area_size; | ||
482 | }; | ||
483 | |||
484 | union pmu_init_msg_pmu { | ||
485 | struct pmu_init_msg_pmu_v0 v0; | ||
486 | struct pmu_init_msg_pmu_v1 v1; | ||
487 | }; | ||
488 | |||
489 | struct pmu_init_msg { | ||
490 | union { | ||
491 | u8 msg_type; | ||
492 | struct pmu_init_msg_pmu_v1 pmu_init_v1; | ||
493 | struct pmu_init_msg_pmu_v0 pmu_init_v0; | ||
494 | }; | ||
495 | }; | ||
496 | |||
497 | enum { | ||
498 | PMU_PG_ELPG_MSG_INIT_ACK, | ||
499 | PMU_PG_ELPG_MSG_DISALLOW_ACK, | ||
500 | PMU_PG_ELPG_MSG_ALLOW_ACK, | ||
501 | PMU_PG_ELPG_MSG_FREEZE_ACK, | ||
502 | PMU_PG_ELPG_MSG_FREEZE_ABORT, | ||
503 | PMU_PG_ELPG_MSG_UNFREEZE_ACK, | ||
504 | }; | ||
505 | |||
506 | struct pmu_pg_msg_elpg_msg { | ||
507 | u8 msg_type; | ||
508 | u8 engine_id; | ||
509 | u16 msg; | ||
510 | }; | ||
511 | |||
512 | enum { | ||
513 | PMU_PG_STAT_MSG_RESP_DMEM_OFFSET = 0, | ||
514 | }; | ||
515 | |||
516 | struct pmu_pg_msg_stat { | ||
517 | u8 msg_type; | ||
518 | u8 engine_id; | ||
519 | u16 sub_msg_id; | ||
520 | u32 data; | ||
521 | }; | ||
522 | |||
523 | enum { | ||
524 | PMU_PG_MSG_ENG_BUF_LOADED, | ||
525 | PMU_PG_MSG_ENG_BUF_UNLOADED, | ||
526 | PMU_PG_MSG_ENG_BUF_FAILED, | ||
527 | }; | ||
528 | |||
529 | struct pmu_pg_msg_eng_buf_stat { | ||
530 | u8 msg_type; | ||
531 | u8 engine_id; | ||
532 | u8 buf_idx; | ||
533 | u8 status; | ||
534 | }; | ||
535 | |||
536 | struct pmu_pg_msg { | ||
537 | union { | ||
538 | u8 msg_type; | ||
539 | struct pmu_pg_msg_elpg_msg elpg_msg; | ||
540 | struct pmu_pg_msg_stat stat; | ||
541 | struct pmu_pg_msg_eng_buf_stat eng_buf_stat; | ||
542 | /* TBD: other pg messages */ | ||
543 | union pmu_ap_msg ap_msg; | ||
544 | }; | ||
545 | }; | ||
546 | |||
547 | enum { | ||
548 | PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0, | ||
549 | }; | ||
550 | |||
551 | struct pmu_rc_msg_unhandled_cmd { | ||
552 | u8 msg_type; | ||
553 | u8 unit_id; | ||
554 | }; | ||
555 | |||
556 | struct pmu_rc_msg { | ||
557 | u8 msg_type; | ||
558 | struct pmu_rc_msg_unhandled_cmd unhandled_cmd; | ||
559 | }; | ||
560 | |||
561 | enum { | ||
562 | PMU_PG_CMD_ID_ELPG_CMD = 0, | ||
563 | PMU_PG_CMD_ID_ENG_BUF_LOAD, | ||
564 | PMU_PG_CMD_ID_ENG_BUF_UNLOAD, | ||
565 | PMU_PG_CMD_ID_PG_STAT, | ||
566 | PMU_PG_CMD_ID_PG_LOG_INIT, | ||
567 | PMU_PG_CMD_ID_PG_LOG_FLUSH, | ||
568 | PMU_PG_CMD_ID_PG_PARAM, | ||
569 | PMU_PG_CMD_ID_ELPG_INIT, | ||
570 | PMU_PG_CMD_ID_ELPG_POLL_CTXSAVE, | ||
571 | PMU_PG_CMD_ID_ELPG_ABORT_POLL, | ||
572 | PMU_PG_CMD_ID_ELPG_PWR_UP, | ||
573 | PMU_PG_CMD_ID_ELPG_DISALLOW, | ||
574 | PMU_PG_CMD_ID_ELPG_ALLOW, | ||
575 | PMU_PG_CMD_ID_AP, | ||
576 | RM_PMU_PG_CMD_ID_PSI, | ||
577 | RM_PMU_PG_CMD_ID_CG, | ||
578 | PMU_PG_CMD_ID_ZBC_TABLE_UPDATE, | ||
579 | PMU_PG_CMD_ID_PWR_RAIL_GATE_DISABLE = 0x20, | ||
580 | PMU_PG_CMD_ID_PWR_RAIL_GATE_ENABLE, | ||
581 | PMU_PG_CMD_ID_PWR_RAIL_SMU_MSG_DISABLE | ||
582 | }; | ||
583 | |||
584 | enum { | ||
585 | PMU_PG_ELPG_CMD_INIT, | ||
586 | PMU_PG_ELPG_CMD_DISALLOW, | ||
587 | PMU_PG_ELPG_CMD_ALLOW, | ||
588 | PMU_PG_ELPG_CMD_FREEZE, | ||
589 | PMU_PG_ELPG_CMD_UNFREEZE, | ||
590 | }; | ||
591 | |||
592 | struct pmu_pg_cmd_elpg_cmd { | ||
593 | u8 cmd_type; | ||
594 | u8 engine_id; | ||
595 | u16 cmd; | ||
596 | }; | ||
597 | |||
598 | struct pmu_pg_cmd_eng_buf_load { | ||
599 | u8 cmd_type; | ||
600 | u8 engine_id; | ||
601 | u8 buf_idx; | ||
602 | u8 pad; | ||
603 | u16 buf_size; | ||
604 | u32 dma_base; | ||
605 | u8 dma_offset; | ||
606 | u8 dma_idx; | ||
607 | }; | ||
608 | |||
609 | enum { | ||
610 | PMU_PG_STAT_CMD_ALLOC_DMEM = 0, | ||
611 | }; | ||
612 | |||
613 | struct pmu_pg_cmd_stat { | ||
614 | u8 cmd_type; | ||
615 | u8 engine_id; | ||
616 | u16 sub_cmd_id; | ||
617 | u32 data; | ||
618 | }; | ||
619 | |||
620 | struct pmu_pg_cmd { | ||
621 | union { | ||
622 | u8 cmd_type; | ||
623 | struct pmu_pg_cmd_elpg_cmd elpg_cmd; | ||
624 | struct pmu_pg_cmd_eng_buf_load eng_buf_load; | ||
625 | struct pmu_pg_cmd_stat stat; | ||
626 | /* TBD: other pg commands */ | ||
627 | union pmu_ap_cmd ap_cmd; | ||
628 | }; | ||
629 | }; | ||
630 | |||
631 | /* PERFMON */ | ||
632 | #define PMU_DOMAIN_GROUP_PSTATE 0 | ||
633 | #define PMU_DOMAIN_GROUP_GPC2CLK 1 | ||
634 | #define PMU_DOMAIN_GROUP_NUM 2 | ||
635 | |||
636 | /* TBD: smart strategy */ | ||
637 | #define PMU_PERFMON_PCT_TO_INC 58 | ||
638 | #define PMU_PERFMON_PCT_TO_DEC 23 | ||
639 | |||
640 | struct pmu_perfmon_counter { | ||
641 | u8 index; | ||
642 | u8 flags; | ||
643 | u8 group_id; | ||
644 | u8 valid; | ||
645 | u16 upper_threshold; /* units of 0.01% */ | ||
646 | u16 lower_threshold; /* units of 0.01% */ | ||
647 | }; | ||
648 | |||
649 | #define PMU_PERFMON_FLAG_ENABLE_INCREASE (0x00000001) | ||
650 | #define PMU_PERFMON_FLAG_ENABLE_DECREASE (0x00000002) | ||
651 | #define PMU_PERFMON_FLAG_CLEAR_PREV (0x00000004) | ||
652 | |||
653 | /* PERFMON CMD */ | ||
654 | enum { | ||
655 | PMU_PERFMON_CMD_ID_START = 0, | ||
656 | PMU_PERFMON_CMD_ID_STOP = 1, | ||
657 | PMU_PERFMON_CMD_ID_INIT = 2 | ||
658 | }; | ||
659 | |||
660 | struct pmu_perfmon_cmd_start_v1 { | ||
661 | u8 cmd_type; | ||
662 | u8 group_id; | ||
663 | u8 state_id; | ||
664 | u8 flags; | ||
665 | struct pmu_allocation_v1 counter_alloc; | ||
666 | }; | ||
667 | |||
668 | struct pmu_perfmon_cmd_start_v0 { | ||
669 | u8 cmd_type; | ||
670 | u8 group_id; | ||
671 | u8 state_id; | ||
672 | u8 flags; | ||
673 | struct pmu_allocation_v0 counter_alloc; | ||
674 | }; | ||
675 | |||
676 | struct pmu_perfmon_cmd_stop { | ||
677 | u8 cmd_type; | ||
678 | }; | ||
679 | |||
680 | struct pmu_perfmon_cmd_init_v1 { | ||
681 | u8 cmd_type; | ||
682 | u8 to_decrease_count; | ||
683 | u8 base_counter_id; | ||
684 | u32 sample_period_us; | ||
685 | struct pmu_allocation_v1 counter_alloc; | ||
686 | u8 num_counters; | ||
687 | u8 samples_in_moving_avg; | ||
688 | u16 sample_buffer; | ||
689 | }; | ||
690 | |||
691 | struct pmu_perfmon_cmd_init_v0 { | ||
692 | u8 cmd_type; | ||
693 | u8 to_decrease_count; | ||
694 | u8 base_counter_id; | ||
695 | u32 sample_period_us; | ||
696 | struct pmu_allocation_v0 counter_alloc; | ||
697 | u8 num_counters; | ||
698 | u8 samples_in_moving_avg; | ||
699 | u16 sample_buffer; | ||
700 | }; | ||
701 | |||
702 | struct pmu_perfmon_cmd { | ||
703 | union { | ||
704 | u8 cmd_type; | ||
705 | struct pmu_perfmon_cmd_start_v0 start_v0; | ||
706 | struct pmu_perfmon_cmd_start_v1 start_v1; | ||
707 | struct pmu_perfmon_cmd_stop stop; | ||
708 | struct pmu_perfmon_cmd_init_v0 init_v0; | ||
709 | struct pmu_perfmon_cmd_init_v1 init_v1; | ||
710 | }; | ||
711 | }; | ||
712 | |||
713 | struct pmu_zbc_cmd { | ||
714 | u8 cmd_type; | ||
715 | u8 pad; | ||
716 | u16 entry_mask; | ||
717 | }; | ||
718 | |||
719 | /* PERFMON MSG */ | ||
720 | enum { | ||
721 | PMU_PERFMON_MSG_ID_INCREASE_EVENT = 0, | ||
722 | PMU_PERFMON_MSG_ID_DECREASE_EVENT = 1, | ||
723 | PMU_PERFMON_MSG_ID_INIT_EVENT = 2, | ||
724 | PMU_PERFMON_MSG_ID_ACK = 3 | ||
725 | }; | ||
726 | |||
727 | struct pmu_perfmon_msg_generic { | ||
728 | u8 msg_type; | ||
729 | u8 state_id; | ||
730 | u8 group_id; | ||
731 | u8 data; | ||
732 | }; | ||
733 | |||
734 | struct pmu_perfmon_msg { | ||
735 | union { | ||
736 | u8 msg_type; | ||
737 | struct pmu_perfmon_msg_generic gen; | ||
738 | }; | ||
739 | }; | ||
740 | |||
741 | |||
742 | struct pmu_cmd { | ||
743 | struct pmu_hdr hdr; | ||
744 | union { | ||
745 | struct pmu_perfmon_cmd perfmon; | ||
746 | struct pmu_pg_cmd pg; | ||
747 | struct pmu_zbc_cmd zbc; | ||
748 | } cmd; | ||
749 | }; | ||
750 | |||
751 | struct pmu_msg { | ||
752 | struct pmu_hdr hdr; | ||
753 | union { | ||
754 | struct pmu_init_msg init; | ||
755 | struct pmu_perfmon_msg perfmon; | ||
756 | struct pmu_pg_msg pg; | ||
757 | struct pmu_rc_msg rc; | ||
758 | } msg; | ||
759 | }; | ||
760 | |||
761 | #define PMU_SHA1_GID_SIGNATURE 0xA7C66AD2 | ||
762 | #define PMU_SHA1_GID_SIGNATURE_SIZE 4 | ||
763 | |||
764 | #define PMU_SHA1_GID_SIZE 16 | ||
765 | |||
766 | struct pmu_sha1_gid { | ||
767 | bool valid; | ||
768 | u8 gid[PMU_SHA1_GID_SIZE]; | ||
769 | }; | ||
770 | |||
771 | struct pmu_sha1_gid_data { | ||
772 | u8 signature[PMU_SHA1_GID_SIGNATURE_SIZE]; | ||
773 | u8 gid[PMU_SHA1_GID_SIZE]; | ||
774 | }; | ||
775 | |||
776 | #define PMU_COMMAND_QUEUE_HPQ 0 /* write by sw, read by pmu, protected by sw mutex lock */ | ||
777 | #define PMU_COMMAND_QUEUE_LPQ 1 /* write by sw, read by pmu, protected by sw mutex lock */ | ||
778 | #define PMU_COMMAND_QUEUE_BIOS 2 /* read/write by sw/hw, protected by hw pmu mutex, id = 2 */ | ||
779 | #define PMU_COMMAND_QUEUE_SMI 3 /* read/write by sw/hw, protected by hw pmu mutex, id = 3 */ | ||
780 | #define PMU_MESSAGE_QUEUE 4 /* write by pmu, read by sw, accessed by interrupt handler, no lock */ | ||
781 | #define PMU_QUEUE_COUNT 5 | ||
782 | |||
783 | enum { | ||
784 | PMU_MUTEX_ID_RSVD1 = 0 , | ||
785 | PMU_MUTEX_ID_GPUSER , | ||
786 | PMU_MUTEX_ID_QUEUE_BIOS , | ||
787 | PMU_MUTEX_ID_QUEUE_SMI , | ||
788 | PMU_MUTEX_ID_GPMUTEX , | ||
789 | PMU_MUTEX_ID_I2C , | ||
790 | PMU_MUTEX_ID_RMLOCK , | ||
791 | PMU_MUTEX_ID_MSGBOX , | ||
792 | PMU_MUTEX_ID_FIFO , | ||
793 | PMU_MUTEX_ID_PG , | ||
794 | PMU_MUTEX_ID_GR , | ||
795 | PMU_MUTEX_ID_CLK , | ||
796 | PMU_MUTEX_ID_RSVD6 , | ||
797 | PMU_MUTEX_ID_RSVD7 , | ||
798 | PMU_MUTEX_ID_RSVD8 , | ||
799 | PMU_MUTEX_ID_RSVD9 , | ||
800 | PMU_MUTEX_ID_INVALID | ||
801 | }; | ||
802 | |||
803 | #define PMU_IS_COMMAND_QUEUE(id) \ | ||
804 | ((id) < PMU_MESSAGE_QUEUE) | ||
805 | |||
806 | #define PMU_IS_SW_COMMAND_QUEUE(id) \ | ||
807 | (((id) == PMU_COMMAND_QUEUE_HPQ) || \ | ||
808 | ((id) == PMU_COMMAND_QUEUE_LPQ)) | ||
809 | |||
810 | #define PMU_IS_MESSAGE_QUEUE(id) \ | ||
811 | ((id) == PMU_MESSAGE_QUEUE) | ||
812 | |||
813 | enum | ||
814 | { | ||
815 | OFLAG_READ = 0, | ||
816 | OFLAG_WRITE | ||
817 | }; | ||
818 | |||
819 | #define QUEUE_SET (true) | ||
820 | #define QUEUE_GET (false) | ||
821 | |||
822 | #define QUEUE_ALIGNMENT (4) | ||
823 | |||
824 | #define PMU_PGENG_GR_BUFFER_IDX_INIT (0) | ||
825 | #define PMU_PGENG_GR_BUFFER_IDX_ZBC (1) | ||
826 | #define PMU_PGENG_GR_BUFFER_IDX_FECS (2) | ||
827 | |||
828 | enum | ||
829 | { | ||
830 | PMU_DMAIDX_UCODE = 0, | ||
831 | PMU_DMAIDX_VIRT = 1, | ||
832 | PMU_DMAIDX_PHYS_VID = 2, | ||
833 | PMU_DMAIDX_PHYS_SYS_COH = 3, | ||
834 | PMU_DMAIDX_PHYS_SYS_NCOH = 4, | ||
835 | PMU_DMAIDX_RSVD = 5, | ||
836 | PMU_DMAIDX_PELPG = 6, | ||
837 | PMU_DMAIDX_END = 7 | ||
838 | }; | ||
839 | |||
840 | struct pmu_gk20a; | ||
841 | struct pmu_queue; | ||
842 | |||
843 | struct pmu_queue { | ||
844 | |||
845 | /* used by hw, for BIOS/SMI queue */ | ||
846 | u32 mutex_id; | ||
847 | u32 mutex_lock; | ||
848 | /* used by sw, for LPQ/HPQ queue */ | ||
849 | struct mutex mutex; | ||
850 | |||
851 | /* current write position */ | ||
852 | u32 position; | ||
853 | /* physical dmem offset where this queue begins */ | ||
854 | u32 offset; | ||
855 | /* logical queue identifier */ | ||
856 | u32 id; | ||
857 | /* physical queue index */ | ||
858 | u32 index; | ||
859 | /* in bytes */ | ||
860 | u32 size; | ||
861 | |||
862 | /* open-flag */ | ||
863 | u32 oflag; | ||
864 | bool opened; /* opened implies locked */ | ||
865 | bool locked; /* check free space after setting locked but before setting opened */ | ||
866 | }; | ||
867 | |||
868 | |||
869 | #define PMU_MUTEX_ID_IS_VALID(id) \ | ||
870 | ((id) < PMU_MUTEX_ID_INVALID) | ||
871 | |||
872 | #define PMU_INVALID_MUTEX_OWNER_ID (0) | ||
873 | |||
874 | struct pmu_mutex { | ||
875 | u32 id; | ||
876 | u32 index; | ||
877 | u32 ref_cnt; | ||
878 | }; | ||
879 | |||
880 | #define PMU_MAX_NUM_SEQUENCES (256) | ||
881 | #define PMU_SEQ_BIT_SHIFT (5) | ||
882 | #define PMU_SEQ_TBL_SIZE \ | ||
883 | (PMU_MAX_NUM_SEQUENCES >> PMU_SEQ_BIT_SHIFT) | ||
884 | |||
885 | #define PMU_INVALID_SEQ_DESC (~0) | ||
886 | |||
887 | enum | ||
888 | { | ||
889 | PMU_SEQ_STATE_FREE = 0, | ||
890 | PMU_SEQ_STATE_PENDING, | ||
891 | PMU_SEQ_STATE_USED, | ||
892 | PMU_SEQ_STATE_CANCELLED | ||
893 | }; | ||
894 | |||
895 | struct pmu_payload { | ||
896 | struct { | ||
897 | void *buf; | ||
898 | u32 offset; | ||
899 | u32 size; | ||
900 | } in, out; | ||
901 | }; | ||
902 | |||
903 | typedef void (*pmu_callback)(struct gk20a *, struct pmu_msg *, void *, u32, | ||
904 | u32); | ||
905 | |||
906 | struct pmu_sequence { | ||
907 | u8 id; | ||
908 | u32 state; | ||
909 | u32 desc; | ||
910 | struct pmu_msg *msg; | ||
911 | union { | ||
912 | struct pmu_allocation_v0 in_v0; | ||
913 | struct pmu_allocation_v1 in_v1; | ||
914 | }; | ||
915 | union { | ||
916 | struct pmu_allocation_v0 out_v0; | ||
917 | struct pmu_allocation_v1 out_v1; | ||
918 | }; | ||
919 | u8 *out_payload; | ||
920 | pmu_callback callback; | ||
921 | void* cb_params; | ||
922 | }; | ||
923 | |||
924 | struct pmu_pg_stats { | ||
925 | u64 pg_entry_start_timestamp; | ||
926 | u64 pg_ingating_start_timestamp; | ||
927 | u64 pg_exit_start_timestamp; | ||
928 | u64 pg_ungating_start_timestamp; | ||
929 | u32 pg_avg_entry_time_us; | ||
930 | u32 pg_ingating_cnt; | ||
931 | u32 pg_ingating_time_us; | ||
932 | u32 pg_avg_exit_time_us; | ||
933 | u32 pg_ungating_count; | ||
934 | u32 pg_ungating_time_us; | ||
935 | u32 pg_gating_cnt; | ||
936 | u32 pg_gating_deny_cnt; | ||
937 | }; | ||
938 | |||
939 | #define PMU_PG_IDLE_THRESHOLD_SIM 1000 | ||
940 | #define PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM 4000000 | ||
941 | /* TBD: QT or else ? */ | ||
942 | #define PMU_PG_IDLE_THRESHOLD 15000 | ||
943 | #define PMU_PG_POST_POWERUP_IDLE_THRESHOLD 1000000 | ||
944 | |||
945 | /* state transition : | ||
946 | OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF | ||
947 | ON => OFF is always synchronized */ | ||
948 | #define PMU_ELPG_STAT_OFF 0 /* elpg is off */ | ||
949 | #define PMU_ELPG_STAT_ON 1 /* elpg is on */ | ||
950 | #define PMU_ELPG_STAT_ON_PENDING 2 /* elpg is off, ALLOW cmd has been sent, wait for ack */ | ||
951 | #define PMU_ELPG_STAT_OFF_PENDING 3 /* elpg is on, DISALLOW cmd has been sent, wait for ack */ | ||
952 | #define PMU_ELPG_STAT_OFF_ON_PENDING 4 /* elpg is off, caller has requested on, but ALLOW | ||
953 | cmd hasn't been sent due to ENABLE_ALLOW delay */ | ||
954 | |||
955 | /* Falcon Register index */ | ||
956 | #define PMU_FALCON_REG_R0 (0) | ||
957 | #define PMU_FALCON_REG_R1 (1) | ||
958 | #define PMU_FALCON_REG_R2 (2) | ||
959 | #define PMU_FALCON_REG_R3 (3) | ||
960 | #define PMU_FALCON_REG_R4 (4) | ||
961 | #define PMU_FALCON_REG_R5 (5) | ||
962 | #define PMU_FALCON_REG_R6 (6) | ||
963 | #define PMU_FALCON_REG_R7 (7) | ||
964 | #define PMU_FALCON_REG_R8 (8) | ||
965 | #define PMU_FALCON_REG_R9 (9) | ||
966 | #define PMU_FALCON_REG_R10 (10) | ||
967 | #define PMU_FALCON_REG_R11 (11) | ||
968 | #define PMU_FALCON_REG_R12 (12) | ||
969 | #define PMU_FALCON_REG_R13 (13) | ||
970 | #define PMU_FALCON_REG_R14 (14) | ||
971 | #define PMU_FALCON_REG_R15 (15) | ||
972 | #define PMU_FALCON_REG_IV0 (16) | ||
973 | #define PMU_FALCON_REG_IV1 (17) | ||
974 | #define PMU_FALCON_REG_UNDEFINED (18) | ||
975 | #define PMU_FALCON_REG_EV (19) | ||
976 | #define PMU_FALCON_REG_SP (20) | ||
977 | #define PMU_FALCON_REG_PC (21) | ||
978 | #define PMU_FALCON_REG_IMB (22) | ||
979 | #define PMU_FALCON_REG_DMB (23) | ||
980 | #define PMU_FALCON_REG_CSW (24) | ||
981 | #define PMU_FALCON_REG_CCR (25) | ||
982 | #define PMU_FALCON_REG_SEC (26) | ||
983 | #define PMU_FALCON_REG_CTX (27) | ||
984 | #define PMU_FALCON_REG_EXCI (28) | ||
985 | #define PMU_FALCON_REG_RSVD0 (29) | ||
986 | #define PMU_FALCON_REG_RSVD1 (30) | ||
987 | #define PMU_FALCON_REG_RSVD2 (31) | ||
988 | #define PMU_FALCON_REG_SIZE (32) | ||
989 | |||
990 | struct pmu_gk20a { | ||
991 | |||
992 | struct gk20a *g; | ||
993 | |||
994 | struct pmu_ucode_desc *desc; | ||
995 | struct pmu_mem_desc ucode; | ||
996 | |||
997 | struct pmu_mem_desc pg_buf; | ||
998 | /* TBD: remove this if ZBC seq is fixed */ | ||
999 | struct pmu_mem_desc seq_buf; | ||
1000 | bool buf_loaded; | ||
1001 | |||
1002 | struct pmu_sha1_gid gid_info; | ||
1003 | |||
1004 | struct pmu_queue queue[PMU_QUEUE_COUNT]; | ||
1005 | |||
1006 | struct pmu_sequence *seq; | ||
1007 | unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE]; | ||
1008 | u32 next_seq_desc; | ||
1009 | |||
1010 | struct pmu_mutex *mutex; | ||
1011 | u32 mutex_cnt; | ||
1012 | |||
1013 | struct mutex pmu_copy_lock; | ||
1014 | struct mutex pmu_seq_lock; | ||
1015 | |||
1016 | struct gk20a_allocator dmem; | ||
1017 | |||
1018 | u32 *ucode_image; | ||
1019 | bool pmu_ready; | ||
1020 | |||
1021 | u32 zbc_save_done; | ||
1022 | |||
1023 | u32 stat_dmem_offset; | ||
1024 | |||
1025 | bool elpg_ready; | ||
1026 | u32 elpg_stat; | ||
1027 | wait_queue_head_t pg_wq; | ||
1028 | |||
1029 | #define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC 1 /* msec */ | ||
1030 | struct delayed_work elpg_enable; /* deferred elpg enable */ | ||
1031 | struct work_struct pg_init; | ||
1032 | bool elpg_enable_allow; /* true after init, false after disable, true after delay */ | ||
1033 | struct mutex elpg_mutex; /* protect elpg enable/disable */ | ||
1034 | int elpg_refcnt; /* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */ | ||
1035 | |||
1036 | struct pmu_perfmon_counter perfmon_counter; | ||
1037 | u32 perfmon_state_id[PMU_DOMAIN_GROUP_NUM]; | ||
1038 | |||
1039 | bool initialized; | ||
1040 | |||
1041 | void (*remove_support)(struct pmu_gk20a *pmu); | ||
1042 | bool sw_ready; | ||
1043 | bool perfmon_ready; | ||
1044 | |||
1045 | u32 sample_buffer; | ||
1046 | |||
1047 | struct mutex isr_mutex; | ||
1048 | bool zbc_ready; | ||
1049 | union { | ||
1050 | struct pmu_cmdline_args_v0 args_v0; | ||
1051 | struct pmu_cmdline_args_v1 args_v1; | ||
1052 | }; | ||
1053 | }; | ||
1054 | |||
1055 | struct gk20a_pmu_save_state { | ||
1056 | struct pmu_sequence *seq; | ||
1057 | u32 next_seq_desc; | ||
1058 | struct pmu_mutex *mutex; | ||
1059 | u32 mutex_cnt; | ||
1060 | struct pmu_ucode_desc *desc; | ||
1061 | struct pmu_mem_desc ucode; | ||
1062 | struct pmu_mem_desc seq_buf; | ||
1063 | struct pmu_mem_desc pg_buf; | ||
1064 | struct delayed_work elpg_enable; | ||
1065 | wait_queue_head_t pg_wq; | ||
1066 | bool sw_ready; | ||
1067 | struct work_struct pg_init; | ||
1068 | }; | ||
1069 | |||
1070 | int gk20a_init_pmu_support(struct gk20a *g); | ||
1071 | int gk20a_init_pmu_setup_hw2(struct gk20a *g); | ||
1072 | |||
1073 | void gk20a_pmu_isr(struct gk20a *g); | ||
1074 | |||
1075 | /* send a cmd to pmu */ | ||
1076 | int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, struct pmu_msg *msg, | ||
1077 | struct pmu_payload *payload, u32 queue_id, | ||
1078 | pmu_callback callback, void* cb_param, | ||
1079 | u32 *seq_desc, unsigned long timeout); | ||
1080 | |||
1081 | int gk20a_pmu_enable_elpg(struct gk20a *g); | ||
1082 | int gk20a_pmu_disable_elpg(struct gk20a *g); | ||
1083 | |||
1084 | void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries); | ||
1085 | |||
1086 | int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable); | ||
1087 | |||
1088 | int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token); | ||
1089 | int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token); | ||
1090 | int gk20a_pmu_destroy(struct gk20a *g); | ||
1091 | int gk20a_pmu_load_norm(struct gk20a *g, u32 *load); | ||
1092 | int gk20a_pmu_debugfs_init(struct platform_device *dev); | ||
1093 | void gk20a_pmu_reset_load_counters(struct gk20a *g); | ||
1094 | void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, | ||
1095 | u32 *total_cycles); | ||
1096 | |||
1097 | #endif /*__PMU_GK20A_H__*/ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c new file mode 100644 index 00000000..aea1a80b --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c | |||
@@ -0,0 +1,91 @@ | |||
1 | /* | ||
2 | * GK20A priv ring | ||
3 | * | ||
4 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/delay.h> /* for mdelay */ | ||
20 | |||
21 | #include "gk20a.h" | ||
22 | #include "hw_mc_gk20a.h" | ||
23 | #include "hw_pri_ringmaster_gk20a.h" | ||
24 | #include "hw_pri_ringstation_sys_gk20a.h" | ||
25 | #include "hw_trim_gk20a.h" | ||
26 | |||
27 | void gk20a_reset_priv_ring(struct gk20a *g) | ||
28 | { | ||
29 | u32 data; | ||
30 | |||
31 | if (tegra_platform_is_linsim()) | ||
32 | return; | ||
33 | |||
34 | data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); | ||
35 | data = set_field(data, | ||
36 | trim_sys_gpc2clk_out_bypdiv_m(), | ||
37 | trim_sys_gpc2clk_out_bypdiv_f(0)); | ||
38 | gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); | ||
39 | |||
40 | gk20a_reset(g, mc_enable_priv_ring_enabled_f()); | ||
41 | |||
42 | gk20a_writel(g,pri_ringmaster_command_r(), | ||
43 | 0x4); | ||
44 | |||
45 | gk20a_writel(g, pri_ringstation_sys_decode_config_r(), | ||
46 | 0x2); | ||
47 | |||
48 | gk20a_readl(g, pri_ringstation_sys_decode_config_r()); | ||
49 | } | ||
50 | |||
51 | void gk20a_priv_ring_isr(struct gk20a *g) | ||
52 | { | ||
53 | u32 status0, status1; | ||
54 | u32 cmd; | ||
55 | s32 retry = 100; | ||
56 | |||
57 | if (tegra_platform_is_linsim()) | ||
58 | return; | ||
59 | |||
60 | status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); | ||
61 | status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); | ||
62 | |||
63 | gk20a_dbg_info("ringmaster intr status0: 0x%08x," | ||
64 | "status1: 0x%08x", status0, status1); | ||
65 | |||
66 | if (status0 & (0x1 | 0x2 | 0x4)) { | ||
67 | gk20a_reset_priv_ring(g); | ||
68 | } | ||
69 | |||
70 | cmd = gk20a_readl(g, pri_ringmaster_command_r()); | ||
71 | cmd = set_field(cmd, pri_ringmaster_command_cmd_m(), | ||
72 | pri_ringmaster_command_cmd_ack_interrupt_f()); | ||
73 | gk20a_writel(g, pri_ringmaster_command_r(), cmd); | ||
74 | |||
75 | do { | ||
76 | cmd = pri_ringmaster_command_cmd_v( | ||
77 | gk20a_readl(g, pri_ringmaster_command_r())); | ||
78 | usleep_range(20, 40); | ||
79 | } while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && --retry); | ||
80 | |||
81 | if (retry <= 0) | ||
82 | gk20a_warn(dev_from_gk20a(g), | ||
83 | "priv ringmaster cmd ack too many retries"); | ||
84 | |||
85 | status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); | ||
86 | status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); | ||
87 | |||
88 | gk20a_dbg_info("ringmaster intr status0: 0x%08x," | ||
89 | " status1: 0x%08x", status0, status1); | ||
90 | } | ||
91 | |||
diff --git a/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h new file mode 100644 index 00000000..cb9d49c7 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/priv_ring_gk20a.h | ||
3 | * | ||
4 | * GK20A PRIV ringmaster | ||
5 | * | ||
6 | * Copyright (c) 2011-2012, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | #ifndef __PRIV_RING_GK20A_H__ | ||
22 | #define __PRIV_RING_GK20A_H__ | ||
23 | |||
24 | void gk20a_reset_priv_ring(struct gk20a *g); | ||
25 | void gk20a_priv_ring_isr(struct gk20a *g); | ||
26 | |||
27 | #endif /*__PRIV_RING_GK20A_H__*/ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c new file mode 100644 index 00000000..4a115fb1 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c | |||
@@ -0,0 +1,704 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Tegra GK20A GPU Debugger Driver Register Ops | ||
4 | * | ||
5 | * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms and conditions of the GNU General Public License, | ||
9 | * version 2, as published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
14 | * more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
18 | */ | ||
19 | |||
20 | #include <linux/slab.h> | ||
21 | #include <linux/err.h> | ||
22 | #include <linux/bsearch.h> | ||
23 | #include <linux/nvhost_dbg_gpu_ioctl.h> | ||
24 | |||
25 | #include "gk20a.h" | ||
26 | #include "gr_gk20a.h" | ||
27 | #include "dbg_gpu_gk20a.h" | ||
28 | #include "regops_gk20a.h" | ||
29 | |||
30 | |||
31 | |||
32 | struct regop_offset_range { | ||
33 | u32 base:24; | ||
34 | u32 count:8; | ||
35 | }; | ||
36 | |||
37 | static int regop_bsearch_range_cmp(const void *pkey, const void *pelem) | ||
38 | { | ||
39 | u32 key = *(u32 *)pkey; | ||
40 | struct regop_offset_range *prange = (struct regop_offset_range *)pelem; | ||
41 | if (key < prange->base) | ||
42 | return -1; | ||
43 | else if (prange->base <= key && key < (prange->base + | ||
44 | (prange->count * 4))) | ||
45 | return 0; | ||
46 | return 1; | ||
47 | } | ||
48 | |||
49 | static inline bool linear_search(u32 offset, const u32 *list, int size) | ||
50 | { | ||
51 | int i; | ||
52 | for (i = 0; i < size; i++) | ||
53 | if (list[i] == offset) | ||
54 | return true; | ||
55 | return false; | ||
56 | } | ||
57 | |||
58 | static const struct regop_offset_range gk20a_global_whitelist_ranges[] = { | ||
59 | { 0x000004f0, 1 }, | ||
60 | { 0x00001a00, 3 }, | ||
61 | { 0x0000259c, 1 }, | ||
62 | { 0x0000280c, 1 }, | ||
63 | { 0x00009400, 1 }, | ||
64 | { 0x00009410, 1 }, | ||
65 | { 0x00020200, 1 }, | ||
66 | { 0x00022430, 7 }, | ||
67 | { 0x00022548, 1 }, | ||
68 | { 0x00100c18, 3 }, | ||
69 | { 0x00100c84, 1 }, | ||
70 | { 0x00100cc4, 1 }, | ||
71 | { 0x00106640, 1 }, | ||
72 | { 0x0010a0a8, 1 }, | ||
73 | { 0x0010a4f0, 1 }, | ||
74 | { 0x0010e064, 1 }, | ||
75 | { 0x0010e164, 1 }, | ||
76 | { 0x0010e490, 1 }, | ||
77 | { 0x00110100, 1 }, | ||
78 | { 0x00140028, 1 }, | ||
79 | { 0x001408dc, 1 }, | ||
80 | { 0x00140a5c, 1 }, | ||
81 | { 0x001410dc, 1 }, | ||
82 | { 0x0014125c, 1 }, | ||
83 | { 0x0017e028, 1 }, | ||
84 | { 0x0017e8dc, 1 }, | ||
85 | { 0x0017ea5c, 1 }, | ||
86 | { 0x0017f0dc, 1 }, | ||
87 | { 0x0017f25c, 1 }, | ||
88 | { 0x00180000, 68 }, | ||
89 | { 0x00180200, 68 }, | ||
90 | { 0x001a0000, 68 }, | ||
91 | { 0x001b0000, 68 }, | ||
92 | { 0x001b0200, 68 }, | ||
93 | { 0x001b0400, 68 }, | ||
94 | { 0x001b0600, 68 }, | ||
95 | { 0x001b4000, 3 }, | ||
96 | { 0x001b4010, 3 }, | ||
97 | { 0x001b4020, 3 }, | ||
98 | { 0x001b4040, 3 }, | ||
99 | { 0x001b4050, 3 }, | ||
100 | { 0x001b4060, 16 }, | ||
101 | { 0x001b40a4, 1 }, | ||
102 | { 0x001b4100, 6 }, | ||
103 | { 0x001b4124, 2 }, | ||
104 | { 0x001b8000, 7 }, | ||
105 | { 0x001bc000, 7 }, | ||
106 | { 0x001be000, 7 }, | ||
107 | { 0x00400500, 1 }, | ||
108 | { 0x00400700, 1 }, | ||
109 | { 0x0040415c, 1 }, | ||
110 | { 0x00405850, 1 }, | ||
111 | { 0x00405908, 1 }, | ||
112 | { 0x00405b40, 1 }, | ||
113 | { 0x00405b50, 1 }, | ||
114 | { 0x00406024, 1 }, | ||
115 | { 0x00407010, 1 }, | ||
116 | { 0x00407808, 1 }, | ||
117 | { 0x0040803c, 1 }, | ||
118 | { 0x0040880c, 1 }, | ||
119 | { 0x00408910, 1 }, | ||
120 | { 0x00408984, 1 }, | ||
121 | { 0x004090a8, 1 }, | ||
122 | { 0x004098a0, 1 }, | ||
123 | { 0x0041000c, 1 }, | ||
124 | { 0x00410110, 1 }, | ||
125 | { 0x00410184, 1 }, | ||
126 | { 0x00418384, 1 }, | ||
127 | { 0x004184a0, 1 }, | ||
128 | { 0x00418604, 1 }, | ||
129 | { 0x00418680, 1 }, | ||
130 | { 0x00418714, 1 }, | ||
131 | { 0x0041881c, 1 }, | ||
132 | { 0x004188c8, 2 }, | ||
133 | { 0x00418b04, 1 }, | ||
134 | { 0x00418c04, 1 }, | ||
135 | { 0x00418c64, 2 }, | ||
136 | { 0x00418c88, 1 }, | ||
137 | { 0x00418cb4, 2 }, | ||
138 | { 0x00418d00, 1 }, | ||
139 | { 0x00418d28, 2 }, | ||
140 | { 0x00418e08, 1 }, | ||
141 | { 0x00418e1c, 2 }, | ||
142 | { 0x00418f08, 1 }, | ||
143 | { 0x00418f20, 2 }, | ||
144 | { 0x00419000, 1 }, | ||
145 | { 0x0041900c, 1 }, | ||
146 | { 0x00419018, 1 }, | ||
147 | { 0x00419854, 1 }, | ||
148 | { 0x00419ab0, 1 }, | ||
149 | { 0x00419ab8, 3 }, | ||
150 | { 0x00419ac8, 1 }, | ||
151 | { 0x00419c0c, 1 }, | ||
152 | { 0x00419c8c, 3 }, | ||
153 | { 0x00419ca8, 1 }, | ||
154 | { 0x00419d08, 2 }, | ||
155 | { 0x00419e00, 1 }, | ||
156 | { 0x00419e0c, 1 }, | ||
157 | { 0x00419e14, 2 }, | ||
158 | { 0x00419e24, 2 }, | ||
159 | { 0x00419e34, 2 }, | ||
160 | { 0x00419e44, 4 }, | ||
161 | { 0x00419ea4, 1 }, | ||
162 | { 0x00419eb0, 1 }, | ||
163 | { 0x0041a0a0, 1 }, | ||
164 | { 0x0041a0a8, 1 }, | ||
165 | { 0x0041a17c, 1 }, | ||
166 | { 0x0041a890, 2 }, | ||
167 | { 0x0041a8a0, 3 }, | ||
168 | { 0x0041a8b0, 2 }, | ||
169 | { 0x0041b014, 1 }, | ||
170 | { 0x0041b0a0, 1 }, | ||
171 | { 0x0041b0cc, 1 }, | ||
172 | { 0x0041b0e8, 2 }, | ||
173 | { 0x0041b1dc, 1 }, | ||
174 | { 0x0041b1f8, 2 }, | ||
175 | { 0x0041be14, 1 }, | ||
176 | { 0x0041bea0, 1 }, | ||
177 | { 0x0041becc, 1 }, | ||
178 | { 0x0041bee8, 2 }, | ||
179 | { 0x0041bfdc, 1 }, | ||
180 | { 0x0041bff8, 2 }, | ||
181 | { 0x0041c054, 1 }, | ||
182 | { 0x0041c2b0, 1 }, | ||
183 | { 0x0041c2b8, 3 }, | ||
184 | { 0x0041c2c8, 1 }, | ||
185 | { 0x0041c40c, 1 }, | ||
186 | { 0x0041c48c, 3 }, | ||
187 | { 0x0041c4a8, 1 }, | ||
188 | { 0x0041c508, 2 }, | ||
189 | { 0x0041c600, 1 }, | ||
190 | { 0x0041c60c, 1 }, | ||
191 | { 0x0041c614, 2 }, | ||
192 | { 0x0041c624, 2 }, | ||
193 | { 0x0041c634, 2 }, | ||
194 | { 0x0041c644, 4 }, | ||
195 | { 0x0041c6a4, 1 }, | ||
196 | { 0x0041c6b0, 1 }, | ||
197 | { 0x00500384, 1 }, | ||
198 | { 0x005004a0, 1 }, | ||
199 | { 0x00500604, 1 }, | ||
200 | { 0x00500680, 1 }, | ||
201 | { 0x00500714, 1 }, | ||
202 | { 0x0050081c, 1 }, | ||
203 | { 0x005008c8, 2 }, | ||
204 | { 0x00500b04, 1 }, | ||
205 | { 0x00500c04, 1 }, | ||
206 | { 0x00500c64, 2 }, | ||
207 | { 0x00500c88, 1 }, | ||
208 | { 0x00500cb4, 2 }, | ||
209 | { 0x00500d00, 1 }, | ||
210 | { 0x00500d28, 2 }, | ||
211 | { 0x00500e08, 1 }, | ||
212 | { 0x00500e1c, 2 }, | ||
213 | { 0x00500f08, 1 }, | ||
214 | { 0x00500f20, 2 }, | ||
215 | { 0x00501000, 1 }, | ||
216 | { 0x0050100c, 1 }, | ||
217 | { 0x00501018, 1 }, | ||
218 | { 0x00501854, 1 }, | ||
219 | { 0x00501ab0, 1 }, | ||
220 | { 0x00501ab8, 3 }, | ||
221 | { 0x00501ac8, 1 }, | ||
222 | { 0x00501c0c, 1 }, | ||
223 | { 0x00501c8c, 3 }, | ||
224 | { 0x00501ca8, 1 }, | ||
225 | { 0x00501d08, 2 }, | ||
226 | { 0x00501e00, 1 }, | ||
227 | { 0x00501e0c, 1 }, | ||
228 | { 0x00501e14, 2 }, | ||
229 | { 0x00501e24, 2 }, | ||
230 | { 0x00501e34, 2 }, | ||
231 | { 0x00501e44, 4 }, | ||
232 | { 0x00501ea4, 1 }, | ||
233 | { 0x00501eb0, 1 }, | ||
234 | { 0x005020a0, 1 }, | ||
235 | { 0x005020a8, 1 }, | ||
236 | { 0x0050217c, 1 }, | ||
237 | { 0x00502890, 2 }, | ||
238 | { 0x005028a0, 3 }, | ||
239 | { 0x005028b0, 2 }, | ||
240 | { 0x00503014, 1 }, | ||
241 | { 0x005030a0, 1 }, | ||
242 | { 0x005030cc, 1 }, | ||
243 | { 0x005030e8, 2 }, | ||
244 | { 0x005031dc, 1 }, | ||
245 | { 0x005031f8, 2 }, | ||
246 | { 0x00503e14, 1 }, | ||
247 | { 0x00503ea0, 1 }, | ||
248 | { 0x00503ecc, 1 }, | ||
249 | { 0x00503ee8, 2 }, | ||
250 | { 0x00503fdc, 1 }, | ||
251 | { 0x00503ff8, 2 }, | ||
252 | { 0x00504054, 1 }, | ||
253 | { 0x005042b0, 1 }, | ||
254 | { 0x005042b8, 3 }, | ||
255 | { 0x005042c8, 1 }, | ||
256 | { 0x0050440c, 1 }, | ||
257 | { 0x0050448c, 3 }, | ||
258 | { 0x005044a8, 1 }, | ||
259 | { 0x00504508, 2 }, | ||
260 | { 0x00504600, 1 }, | ||
261 | { 0x0050460c, 1 }, | ||
262 | { 0x00504614, 2 }, | ||
263 | { 0x00504624, 2 }, | ||
264 | { 0x00504634, 2 }, | ||
265 | { 0x00504644, 4 }, | ||
266 | { 0x005046a4, 1 }, | ||
267 | { 0x005046b0, 1 }, | ||
268 | }; | ||
269 | static const u32 gk20a_global_whitelist_ranges_count = | ||
270 | ARRAY_SIZE(gk20a_global_whitelist_ranges); | ||
271 | |||
272 | /* context */ | ||
273 | |||
274 | static const struct regop_offset_range gk20a_context_whitelist_ranges[] = { | ||
275 | { 0x0000280c, 1 }, | ||
276 | { 0x00100cc4, 1 }, | ||
277 | { 0x00400500, 1 }, | ||
278 | { 0x00405b40, 1 }, | ||
279 | { 0x00419000, 1 }, | ||
280 | { 0x00419c8c, 3 }, | ||
281 | { 0x00419d08, 2 }, | ||
282 | { 0x00419e04, 3 }, | ||
283 | { 0x00419e14, 2 }, | ||
284 | { 0x00419e24, 2 }, | ||
285 | { 0x00419e34, 2 }, | ||
286 | { 0x00419e44, 4 }, | ||
287 | { 0x00419e58, 6 }, | ||
288 | { 0x00419e84, 5 }, | ||
289 | { 0x00419ea4, 1 }, | ||
290 | { 0x00419eac, 2 }, | ||
291 | { 0x00419f30, 8 }, | ||
292 | { 0x0041c48c, 3 }, | ||
293 | { 0x0041c508, 2 }, | ||
294 | { 0x0041c604, 3 }, | ||
295 | { 0x0041c614, 2 }, | ||
296 | { 0x0041c624, 2 }, | ||
297 | { 0x0041c634, 2 }, | ||
298 | { 0x0041c644, 4 }, | ||
299 | { 0x0041c658, 6 }, | ||
300 | { 0x0041c684, 5 }, | ||
301 | { 0x0041c6a4, 1 }, | ||
302 | { 0x0041c6ac, 2 }, | ||
303 | { 0x0041c730, 8 }, | ||
304 | { 0x00501000, 1 }, | ||
305 | { 0x00501c8c, 3 }, | ||
306 | { 0x00501d08, 2 }, | ||
307 | { 0x00501e04, 3 }, | ||
308 | { 0x00501e14, 2 }, | ||
309 | { 0x00501e24, 2 }, | ||
310 | { 0x00501e34, 2 }, | ||
311 | { 0x00501e44, 4 }, | ||
312 | { 0x00501e58, 6 }, | ||
313 | { 0x00501e84, 5 }, | ||
314 | { 0x00501ea4, 1 }, | ||
315 | { 0x00501eac, 2 }, | ||
316 | { 0x00501f30, 8 }, | ||
317 | { 0x0050448c, 3 }, | ||
318 | { 0x00504508, 2 }, | ||
319 | { 0x00504604, 3 }, | ||
320 | { 0x00504614, 2 }, | ||
321 | { 0x00504624, 2 }, | ||
322 | { 0x00504634, 2 }, | ||
323 | { 0x00504644, 4 }, | ||
324 | { 0x00504658, 6 }, | ||
325 | { 0x00504684, 5 }, | ||
326 | { 0x005046a4, 1 }, | ||
327 | { 0x005046ac, 2 }, | ||
328 | { 0x00504730, 8 }, | ||
329 | }; | ||
330 | static const u32 gk20a_context_whitelist_ranges_count = | ||
331 | ARRAY_SIZE(gk20a_context_whitelist_ranges); | ||
332 | |||
333 | /* runcontrol */ | ||
334 | static const u32 gk20a_runcontrol_whitelist[] = { | ||
335 | 0x00419e10, | ||
336 | 0x0041c610, | ||
337 | 0x00501e10, | ||
338 | 0x00504610, | ||
339 | }; | ||
340 | static const u32 gk20a_runcontrol_whitelist_count = | ||
341 | ARRAY_SIZE(gk20a_runcontrol_whitelist); | ||
342 | |||
343 | static const struct regop_offset_range gk20a_runcontrol_whitelist_ranges[] = { | ||
344 | { 0x00419e10, 1 }, | ||
345 | { 0x0041c610, 1 }, | ||
346 | { 0x00501e10, 1 }, | ||
347 | { 0x00504610, 1 }, | ||
348 | }; | ||
349 | static const u32 gk20a_runcontrol_whitelist_ranges_count = | ||
350 | ARRAY_SIZE(gk20a_runcontrol_whitelist_ranges); | ||
351 | |||
352 | |||
353 | /* quad ctl */ | ||
354 | static const u32 gk20a_qctl_whitelist[] = { | ||
355 | 0x00504670, | ||
356 | 0x00504674, | ||
357 | 0x00504678, | ||
358 | 0x0050467c, | ||
359 | 0x00504680, | ||
360 | 0x00504730, | ||
361 | 0x00504734, | ||
362 | 0x00504738, | ||
363 | 0x0050473c, | ||
364 | }; | ||
365 | static const u32 gk20a_qctl_whitelist_count = | ||
366 | ARRAY_SIZE(gk20a_qctl_whitelist); | ||
367 | |||
368 | static const struct regop_offset_range gk20a_qctl_whitelist_ranges[] = { | ||
369 | { 0x00504670, 1 }, | ||
370 | { 0x00504730, 4 }, | ||
371 | }; | ||
372 | static const u32 gk20a_qctl_whitelist_ranges_count = | ||
373 | ARRAY_SIZE(gk20a_qctl_whitelist_ranges); | ||
374 | |||
375 | |||
376 | |||
377 | |||
378 | static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s, | ||
379 | u32 *ctx_rd_count, u32 *ctx_wr_count, | ||
380 | struct nvhost_dbg_gpu_reg_op *ops, | ||
381 | u32 op_count); | ||
382 | |||
383 | |||
384 | int exec_regops_gk20a(struct dbg_session_gk20a *dbg_s, | ||
385 | struct nvhost_dbg_gpu_reg_op *ops, | ||
386 | u64 num_ops) | ||
387 | { | ||
388 | int err = 0, i; | ||
389 | struct channel_gk20a *ch = NULL; | ||
390 | struct gk20a *g = dbg_s->g; | ||
391 | /*struct gr_gk20a *gr = &g->gr;*/ | ||
392 | u32 data32_lo = 0, data32_hi = 0; | ||
393 | u32 ctx_rd_count = 0, ctx_wr_count = 0; | ||
394 | bool skip_read_lo, skip_read_hi; | ||
395 | bool ok; | ||
396 | |||
397 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
398 | |||
399 | ch = dbg_s->ch; | ||
400 | |||
401 | ok = validate_reg_ops(dbg_s, | ||
402 | &ctx_rd_count, &ctx_wr_count, | ||
403 | ops, num_ops); | ||
404 | if (!ok) { | ||
405 | dev_err(dbg_s->dev, "invalid op(s)"); | ||
406 | err = -EINVAL; | ||
407 | /* each op has its own err/status */ | ||
408 | goto clean_up; | ||
409 | } | ||
410 | |||
411 | for (i = 0; i < num_ops; i++) { | ||
412 | /* if it isn't global then it is done in the ctx ops... */ | ||
413 | if (ops[i].type != REGOP(TYPE_GLOBAL)) | ||
414 | continue; | ||
415 | |||
416 | switch (ops[i].op) { | ||
417 | |||
418 | case REGOP(READ_32): | ||
419 | ops[i].value_hi = 0; | ||
420 | ops[i].value_lo = gk20a_readl(g, ops[i].offset); | ||
421 | gk20a_dbg(gpu_dbg_gpu_dbg, "read_32 0x%08x from 0x%08x", | ||
422 | ops[i].value_lo, ops[i].offset); | ||
423 | |||
424 | break; | ||
425 | |||
426 | case REGOP(READ_64): | ||
427 | ops[i].value_lo = gk20a_readl(g, ops[i].offset); | ||
428 | ops[i].value_hi = | ||
429 | gk20a_readl(g, ops[i].offset + 4); | ||
430 | |||
431 | gk20a_dbg(gpu_dbg_gpu_dbg, "read_64 0x%08x:%08x from 0x%08x", | ||
432 | ops[i].value_hi, ops[i].value_lo, | ||
433 | ops[i].offset); | ||
434 | break; | ||
435 | |||
436 | case REGOP(WRITE_32): | ||
437 | case REGOP(WRITE_64): | ||
438 | /* some of this appears wonky/unnecessary but | ||
439 | we've kept it for compat with existing | ||
440 | debugger code. just in case... */ | ||
441 | skip_read_lo = skip_read_hi = false; | ||
442 | if (ops[i].and_n_mask_lo == ~(u32)0) { | ||
443 | data32_lo = ops[i].value_lo; | ||
444 | skip_read_lo = true; | ||
445 | } | ||
446 | |||
447 | if ((ops[i].op == REGOP(WRITE_64)) && | ||
448 | (ops[i].and_n_mask_hi == ~(u32)0)) { | ||
449 | data32_hi = ops[i].value_hi; | ||
450 | skip_read_hi = true; | ||
451 | } | ||
452 | |||
453 | /* read first 32bits */ | ||
454 | if (unlikely(skip_read_lo == false)) { | ||
455 | data32_lo = gk20a_readl(g, ops[i].offset); | ||
456 | data32_lo &= ~ops[i].and_n_mask_lo; | ||
457 | data32_lo |= ops[i].value_lo; | ||
458 | } | ||
459 | |||
460 | /* if desired, read second 32bits */ | ||
461 | if ((ops[i].op == REGOP(WRITE_64)) && | ||
462 | !skip_read_hi) { | ||
463 | data32_hi = gk20a_readl(g, ops[i].offset + 4); | ||
464 | data32_hi &= ~ops[i].and_n_mask_hi; | ||
465 | data32_hi |= ops[i].value_hi; | ||
466 | } | ||
467 | |||
468 | /* now update first 32bits */ | ||
469 | gk20a_writel(g, ops[i].offset, data32_lo); | ||
470 | gk20a_dbg(gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ", | ||
471 | data32_lo, ops[i].offset); | ||
472 | /* if desired, update second 32bits */ | ||
473 | if (ops[i].op == REGOP(WRITE_64)) { | ||
474 | gk20a_writel(g, ops[i].offset + 4, data32_hi); | ||
475 | gk20a_dbg(gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ", | ||
476 | data32_hi, ops[i].offset + 4); | ||
477 | |||
478 | } | ||
479 | |||
480 | |||
481 | break; | ||
482 | |||
483 | /* shouldn't happen as we've already screened */ | ||
484 | default: | ||
485 | BUG(); | ||
486 | err = -EINVAL; | ||
487 | goto clean_up; | ||
488 | break; | ||
489 | } | ||
490 | } | ||
491 | |||
492 | if (ctx_wr_count | ctx_rd_count) { | ||
493 | err = gr_gk20a_exec_ctx_ops(ch, ops, num_ops, | ||
494 | ctx_wr_count, ctx_rd_count); | ||
495 | if (err) { | ||
496 | dev_warn(dbg_s->dev, | ||
497 | "failed to perform ctx ops\n"); | ||
498 | goto clean_up; | ||
499 | } | ||
500 | } | ||
501 | |||
502 | clean_up: | ||
503 | gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); | ||
504 | return err; | ||
505 | |||
506 | } | ||
507 | |||
508 | |||
509 | static int validate_reg_op_info(struct dbg_session_gk20a *dbg_s, | ||
510 | struct nvhost_dbg_gpu_reg_op *op) | ||
511 | { | ||
512 | int err = 0; | ||
513 | |||
514 | op->status = REGOP(STATUS_SUCCESS); | ||
515 | |||
516 | switch (op->op) { | ||
517 | case REGOP(READ_32): | ||
518 | case REGOP(READ_64): | ||
519 | case REGOP(WRITE_32): | ||
520 | case REGOP(WRITE_64): | ||
521 | break; | ||
522 | default: | ||
523 | op->status |= REGOP(STATUS_UNSUPPORTED_OP); | ||
524 | /*gk20a_err(dbg_s->dev, "Invalid regops op %d!", op->op);*/ | ||
525 | err = -EINVAL; | ||
526 | break; | ||
527 | } | ||
528 | |||
529 | switch (op->type) { | ||
530 | case REGOP(TYPE_GLOBAL): | ||
531 | case REGOP(TYPE_GR_CTX): | ||
532 | case REGOP(TYPE_GR_CTX_TPC): | ||
533 | case REGOP(TYPE_GR_CTX_SM): | ||
534 | case REGOP(TYPE_GR_CTX_CROP): | ||
535 | case REGOP(TYPE_GR_CTX_ZROP): | ||
536 | case REGOP(TYPE_GR_CTX_QUAD): | ||
537 | break; | ||
538 | /* | ||
539 | case NVHOST_DBG_GPU_REG_OP_TYPE_FB: | ||
540 | */ | ||
541 | default: | ||
542 | op->status |= REGOP(STATUS_INVALID_TYPE); | ||
543 | /*gk20a_err(dbg_s->dev, "Invalid regops type %d!", op->type);*/ | ||
544 | err = -EINVAL; | ||
545 | break; | ||
546 | } | ||
547 | |||
548 | return err; | ||
549 | } | ||
550 | |||
551 | static bool check_whitelists(struct dbg_session_gk20a *dbg_s, | ||
552 | struct nvhost_dbg_gpu_reg_op *op, u32 offset) | ||
553 | { | ||
554 | bool valid = false; | ||
555 | |||
556 | if (op->type == REGOP(TYPE_GLOBAL)) { | ||
557 | /* search global list */ | ||
558 | valid = !!bsearch(&offset, | ||
559 | gk20a_global_whitelist_ranges, | ||
560 | gk20a_global_whitelist_ranges_count, | ||
561 | sizeof(*gk20a_global_whitelist_ranges), | ||
562 | regop_bsearch_range_cmp); | ||
563 | |||
564 | /* if debug session and channel is bound search context list */ | ||
565 | if ((!valid) && (!dbg_s->is_profiler && dbg_s->ch)) { | ||
566 | /* binary search context list */ | ||
567 | valid = !!bsearch(&offset, | ||
568 | gk20a_context_whitelist_ranges, | ||
569 | gk20a_context_whitelist_ranges_count, | ||
570 | sizeof(*gk20a_context_whitelist_ranges), | ||
571 | regop_bsearch_range_cmp); | ||
572 | } | ||
573 | |||
574 | /* if debug session and channel is bound search runcontrol list */ | ||
575 | if ((!valid) && (!dbg_s->is_profiler && dbg_s->ch)) { | ||
576 | valid = linear_search(offset, | ||
577 | gk20a_runcontrol_whitelist, | ||
578 | gk20a_runcontrol_whitelist_count); | ||
579 | } | ||
580 | } else if (op->type == REGOP(TYPE_GR_CTX)) { | ||
581 | /* it's a context-relative op */ | ||
582 | if (!dbg_s->ch) { | ||
583 | gk20a_err(dbg_s->dev, "can't perform ctx regop unless bound"); | ||
584 | op->status = REGOP(STATUS_UNSUPPORTED_OP); | ||
585 | return -ENODEV; | ||
586 | } | ||
587 | |||
588 | /* binary search context list */ | ||
589 | valid = !!bsearch(&offset, | ||
590 | gk20a_context_whitelist_ranges, | ||
591 | gk20a_context_whitelist_ranges_count, | ||
592 | sizeof(*gk20a_context_whitelist_ranges), | ||
593 | regop_bsearch_range_cmp); | ||
594 | |||
595 | /* if debug session and channel is bound search runcontrol list */ | ||
596 | if ((!valid) && (!dbg_s->is_profiler && dbg_s->ch)) { | ||
597 | valid = linear_search(offset, | ||
598 | gk20a_runcontrol_whitelist, | ||
599 | gk20a_runcontrol_whitelist_count); | ||
600 | } | ||
601 | |||
602 | } else if (op->type == REGOP(TYPE_GR_CTX_QUAD)) { | ||
603 | valid = linear_search(offset, | ||
604 | gk20a_qctl_whitelist, | ||
605 | gk20a_qctl_whitelist_count); | ||
606 | } | ||
607 | |||
608 | return valid; | ||
609 | } | ||
610 | |||
611 | /* note: the op here has already been through validate_reg_op_info */ | ||
612 | static int validate_reg_op_offset(struct dbg_session_gk20a *dbg_s, | ||
613 | struct nvhost_dbg_gpu_reg_op *op) | ||
614 | { | ||
615 | int err; | ||
616 | u32 buf_offset_lo, buf_offset_addr, num_offsets, offset; | ||
617 | bool valid = false; | ||
618 | |||
619 | op->status = 0; | ||
620 | offset = op->offset; | ||
621 | |||
622 | /* support only 24-bit 4-byte aligned offsets */ | ||
623 | if (offset & 0xFF000003) { | ||
624 | gk20a_err(dbg_s->dev, "invalid regop offset: 0x%x\n", offset); | ||
625 | op->status |= REGOP(STATUS_INVALID_OFFSET); | ||
626 | return -EINVAL; | ||
627 | } | ||
628 | |||
629 | valid = check_whitelists(dbg_s, op, offset); | ||
630 | if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) | ||
631 | valid = check_whitelists(dbg_s, op, offset + 4); | ||
632 | |||
633 | if (valid && (op->type != REGOP(TYPE_GLOBAL))) { | ||
634 | err = gr_gk20a_get_ctx_buffer_offsets(dbg_s->g, | ||
635 | op->offset, | ||
636 | 1, | ||
637 | &buf_offset_lo, | ||
638 | &buf_offset_addr, | ||
639 | &num_offsets, | ||
640 | op->type == REGOP(TYPE_GR_CTX_QUAD), | ||
641 | op->quad); | ||
642 | if (err) { | ||
643 | op->status |= REGOP(STATUS_INVALID_OFFSET); | ||
644 | return -EINVAL; | ||
645 | } | ||
646 | if (!buf_offset_lo) { | ||
647 | op->status |= REGOP(STATUS_INVALID_OFFSET); | ||
648 | return -EINVAL; | ||
649 | } | ||
650 | } | ||
651 | |||
652 | if (!valid) { | ||
653 | gk20a_err(dbg_s->dev, "invalid regop offset: 0x%x\n", offset); | ||
654 | op->status |= REGOP(STATUS_INVALID_OFFSET); | ||
655 | return -EINVAL; | ||
656 | } | ||
657 | |||
658 | return 0; | ||
659 | } | ||
660 | |||
661 | static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s, | ||
662 | u32 *ctx_rd_count, u32 *ctx_wr_count, | ||
663 | struct nvhost_dbg_gpu_reg_op *ops, | ||
664 | u32 op_count) | ||
665 | { | ||
666 | u32 i; | ||
667 | int err; | ||
668 | bool ok = true; | ||
669 | |||
670 | /* keep going until the end so every op can get | ||
671 | * a separate error code if needed */ | ||
672 | for (i = 0; i < op_count; i++) { | ||
673 | |||
674 | err = validate_reg_op_info(dbg_s, &ops[i]); | ||
675 | ok &= !err; | ||
676 | |||
677 | if (reg_op_is_gr_ctx(ops[i].type)) { | ||
678 | if (reg_op_is_read(ops[i].op)) | ||
679 | (*ctx_rd_count)++; | ||
680 | else | ||
681 | (*ctx_wr_count)++; | ||
682 | } | ||
683 | |||
684 | err = validate_reg_op_offset(dbg_s, &ops[i]); | ||
685 | ok &= !err; | ||
686 | } | ||
687 | |||
688 | gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d\n", | ||
689 | *ctx_wr_count, *ctx_rd_count); | ||
690 | |||
691 | return ok; | ||
692 | } | ||
693 | |||
694 | /* exported for tools like cyclestats, etc */ | ||
695 | bool is_bar0_global_offset_whitelisted_gk20a(u32 offset) | ||
696 | { | ||
697 | |||
698 | bool valid = !!bsearch(&offset, | ||
699 | gk20a_global_whitelist_ranges, | ||
700 | gk20a_global_whitelist_ranges_count, | ||
701 | sizeof(*gk20a_global_whitelist_ranges), | ||
702 | regop_bsearch_range_cmp); | ||
703 | return valid; | ||
704 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.h b/drivers/gpu/nvgpu/gk20a/regops_gk20a.h new file mode 100644 index 00000000..23b4865b --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.h | |||
@@ -0,0 +1,47 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Tegra GK20A GPU Debugger Driver Register Ops | ||
4 | * | ||
5 | * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms and conditions of the GNU General Public License, | ||
9 | * version 2, as published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
14 | * more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
18 | */ | ||
19 | #ifndef __REGOPS_GK20A_H_ | ||
20 | #define __REGOPS_GK20A_H_ | ||
21 | |||
22 | int exec_regops_gk20a(struct dbg_session_gk20a *dbg_s, | ||
23 | struct nvhost_dbg_gpu_reg_op *ops, | ||
24 | u64 num_ops); | ||
25 | |||
26 | /* turn seriously unwieldy names -> something shorter */ | ||
27 | #define REGOP(x) NVHOST_DBG_GPU_REG_OP_##x | ||
28 | |||
29 | |||
30 | static inline bool reg_op_is_gr_ctx(u8 type) | ||
31 | { | ||
32 | return type == REGOP(TYPE_GR_CTX) || | ||
33 | type == REGOP(TYPE_GR_CTX_TPC) || | ||
34 | type == REGOP(TYPE_GR_CTX_SM) || | ||
35 | type == REGOP(TYPE_GR_CTX_CROP) || | ||
36 | type == REGOP(TYPE_GR_CTX_ZROP) || | ||
37 | type == REGOP(TYPE_GR_CTX_QUAD); | ||
38 | } | ||
39 | static inline bool reg_op_is_read(u8 op) | ||
40 | { | ||
41 | return op == REGOP(READ_32) || | ||
42 | op == REGOP(READ_64) ; | ||
43 | } | ||
44 | |||
45 | bool is_bar0_global_offset_whitelisted_gk20a(u32 offset); | ||
46 | |||
47 | #endif /* __REGOPS_GK20A_H_ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/sim_gk20a.h b/drivers/gpu/nvgpu/gk20a/sim_gk20a.h new file mode 100644 index 00000000..5fc8006e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/sim_gk20a.h | |||
@@ -0,0 +1,62 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/sim_gk20a.h | ||
3 | * | ||
4 | * GK20A sim support | ||
5 | * | ||
6 | * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | #ifndef __SIM_GK20A_H__ | ||
22 | #define __SIM_GK20A_H__ | ||
23 | |||
24 | |||
25 | struct gk20a; | ||
26 | struct sim_gk20a { | ||
27 | struct gk20a *g; | ||
28 | struct resource *reg_mem; | ||
29 | void __iomem *regs; | ||
30 | struct { | ||
31 | struct page *page; | ||
32 | void *kvaddr; | ||
33 | phys_addr_t phys; | ||
34 | } send_bfr, recv_bfr, msg_bfr; | ||
35 | u32 send_ring_put; | ||
36 | u32 recv_ring_get; | ||
37 | u32 recv_ring_put; | ||
38 | u32 sequence_base; | ||
39 | void (*remove_support)(struct sim_gk20a *); | ||
40 | }; | ||
41 | |||
42 | |||
43 | int gk20a_sim_esc_read(struct gk20a *g, char *path, u32 index, | ||
44 | u32 count, u32 *data); | ||
45 | |||
46 | static inline int gk20a_sim_esc_read_no_sim(struct gk20a *g, char *p, | ||
47 | u32 i, u32 c, u32 *d) | ||
48 | { | ||
49 | *d = ~(u32)0; | ||
50 | return -1; | ||
51 | } | ||
52 | |||
53 | static inline int gk20a_sim_esc_readl(struct gk20a *g, char * p, u32 i, u32 *d) | ||
54 | { | ||
55 | if (tegra_cpu_is_asim()) | ||
56 | return gk20a_sim_esc_read(g, p, i, sizeof(u32), d); | ||
57 | |||
58 | return gk20a_sim_esc_read_no_sim(g, p, i, sizeof(u32), d); | ||
59 | } | ||
60 | |||
61 | |||
62 | #endif /*__SIM_GK20A_H__*/ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/therm_gk20a.c b/drivers/gpu/nvgpu/gk20a/therm_gk20a.c new file mode 100644 index 00000000..da911979 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/therm_gk20a.c | |||
@@ -0,0 +1,142 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/therm_gk20a.c | ||
3 | * | ||
4 | * GK20A Therm | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | |||
22 | #include "gk20a.h" | ||
23 | #include "hw_chiplet_pwr_gk20a.h" | ||
24 | #include "hw_gr_gk20a.h" | ||
25 | #include "hw_therm_gk20a.h" | ||
26 | |||
27 | static int gk20a_init_therm_reset_enable_hw(struct gk20a *g) | ||
28 | { | ||
29 | return 0; | ||
30 | } | ||
31 | |||
32 | static int gk20a_init_therm_setup_sw(struct gk20a *g) | ||
33 | { | ||
34 | return 0; | ||
35 | } | ||
36 | |||
37 | static int gk20a_init_therm_setup_hw(struct gk20a *g) | ||
38 | { | ||
39 | /* program NV_THERM registers */ | ||
40 | gk20a_writel(g, therm_use_a_r(), NV_THERM_USE_A_INIT); | ||
41 | gk20a_writel(g, therm_evt_ext_therm_0_r(), | ||
42 | NV_THERM_EVT_EXT_THERM_0_INIT); | ||
43 | gk20a_writel(g, therm_evt_ext_therm_1_r(), | ||
44 | NV_THERM_EVT_EXT_THERM_1_INIT); | ||
45 | gk20a_writel(g, therm_evt_ext_therm_2_r(), | ||
46 | NV_THERM_EVT_EXT_THERM_2_INIT); | ||
47 | |||
48 | /* | ||
49 | u32 data; | ||
50 | |||
51 | data = gk20a_readl(g, gr_gpcs_tpcs_l1c_cfg_r()); | ||
52 | data = set_field(data, gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_m(), | ||
53 | gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_enable_f()); | ||
54 | gk20a_writel(g, gr_gpcs_tpcs_l1c_cfg_r(), data); | ||
55 | |||
56 | data = gk20a_readl(g, gr_gpcs_tpcs_l1c_pm_r()); | ||
57 | data = set_field(data, gr_gpcs_tpcs_l1c_pm_enable_m(), | ||
58 | gr_gpcs_tpcs_l1c_pm_enable_enable_f()); | ||
59 | gk20a_writel(g, gr_gpcs_tpcs_l1c_pm_r(), data); | ||
60 | |||
61 | data = gk20a_readl(g, gr_gpcs_tpcs_sm_pm_ctrl_r()); | ||
62 | data = set_field(data, gr_gpcs_tpcs_sm_pm_ctrl_core_enable_m(), | ||
63 | gr_gpcs_tpcs_sm_pm_ctrl_core_enable_enable_f()); | ||
64 | data = set_field(data, gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_m(), | ||
65 | gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_enable_f()); | ||
66 | gk20a_writel(g, gr_gpcs_tpcs_sm_pm_ctrl_r(), data); | ||
67 | |||
68 | data = gk20a_readl(g, gr_gpcs_tpcs_sm_halfctl_ctrl_r()); | ||
69 | data = set_field(data, gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_m(), | ||
70 | gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_enable_f()); | ||
71 | gk20a_writel(g, gr_gpcs_tpcs_sm_halfctl_ctrl_r(), data); | ||
72 | |||
73 | data = gk20a_readl(g, gr_gpcs_tpcs_sm_debug_sfe_control_r()); | ||
74 | data = set_field(data, gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_m(), | ||
75 | gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_enable_f()); | ||
76 | gk20a_writel(g, gr_gpcs_tpcs_sm_debug_sfe_control_r(), data); | ||
77 | |||
78 | gk20a_writel(g, therm_peakpower_config6_r(0), | ||
79 | therm_peakpower_config6_trigger_cfg_1h_intr_f() | | ||
80 | therm_peakpower_config6_trigger_cfg_1l_intr_f()); | ||
81 | |||
82 | gk20a_writel(g, chiplet_pwr_gpcs_config_1_r(), | ||
83 | chiplet_pwr_gpcs_config_1_ba_enable_yes_f()); | ||
84 | gk20a_writel(g, chiplet_pwr_fbps_config_1_r(), | ||
85 | chiplet_pwr_fbps_config_1_ba_enable_yes_f()); | ||
86 | |||
87 | data = gk20a_readl(g, therm_config1_r()); | ||
88 | data = set_field(data, therm_config1_ba_enable_m(), | ||
89 | therm_config1_ba_enable_yes_f()); | ||
90 | gk20a_writel(g, therm_config1_r(), data); | ||
91 | |||
92 | gk20a_writel(g, gr_gpcs_tpcs_sm_power_throttle_r(), 0x441a); | ||
93 | |||
94 | gk20a_writel(g, therm_weight_1_r(), 0xd3); | ||
95 | gk20a_writel(g, chiplet_pwr_gpcs_weight_6_r(), 0x7d); | ||
96 | gk20a_writel(g, chiplet_pwr_gpcs_weight_7_r(), 0xff); | ||
97 | gk20a_writel(g, chiplet_pwr_fbps_weight_0_r(), 0x13000000); | ||
98 | gk20a_writel(g, chiplet_pwr_fbps_weight_1_r(), 0x19); | ||
99 | |||
100 | gk20a_writel(g, therm_peakpower_config8_r(0), 0x8); | ||
101 | gk20a_writel(g, therm_peakpower_config9_r(0), 0x0); | ||
102 | |||
103 | gk20a_writel(g, therm_evt_ba_w0_t1h_r(), 0x100); | ||
104 | |||
105 | gk20a_writel(g, therm_use_a_r(), therm_use_a_ba_w0_t1h_yes_f()); | ||
106 | |||
107 | gk20a_writel(g, therm_peakpower_config1_r(0), | ||
108 | therm_peakpower_config1_window_period_2m_f() | | ||
109 | therm_peakpower_config1_ba_sum_shift_20_f() | | ||
110 | therm_peakpower_config1_window_en_enabled_f()); | ||
111 | |||
112 | gk20a_writel(g, therm_peakpower_config2_r(0), | ||
113 | therm_peakpower_config2_ba_threshold_1h_val_f(1) | | ||
114 | therm_peakpower_config2_ba_threshold_1h_en_enabled_f()); | ||
115 | |||
116 | gk20a_writel(g, therm_peakpower_config4_r(0), | ||
117 | therm_peakpower_config4_ba_threshold_1l_val_f(1) | | ||
118 | therm_peakpower_config4_ba_threshold_1l_en_enabled_f()); | ||
119 | */ | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | int gk20a_init_therm_support(struct gk20a *g) | ||
124 | { | ||
125 | u32 err; | ||
126 | |||
127 | gk20a_dbg_fn(""); | ||
128 | |||
129 | err = gk20a_init_therm_reset_enable_hw(g); | ||
130 | if (err) | ||
131 | return err; | ||
132 | |||
133 | err = gk20a_init_therm_setup_sw(g); | ||
134 | if (err) | ||
135 | return err; | ||
136 | |||
137 | err = gk20a_init_therm_setup_hw(g); | ||
138 | if (err) | ||
139 | return err; | ||
140 | |||
141 | return err; | ||
142 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/therm_gk20a.h b/drivers/gpu/nvgpu/gk20a/therm_gk20a.h new file mode 100644 index 00000000..3f67ee12 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/therm_gk20a.h | |||
@@ -0,0 +1,33 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/therm_gk20a.h | ||
3 | * | ||
4 | * GK20A Therm | ||
5 | * | ||
6 | * Copyright (c) 2011 - 2012, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | #ifndef _NVHOST_THERM_GK20A_H_ | ||
22 | #define _NVHOST_THERM_GK20A_H_ | ||
23 | |||
24 | /* priority for EXT_THERM_0 event set to highest */ | ||
25 | #define NV_THERM_EVT_EXT_THERM_0_INIT 0x3000100 | ||
26 | #define NV_THERM_EVT_EXT_THERM_1_INIT 0x2000200 | ||
27 | #define NV_THERM_EVT_EXT_THERM_2_INIT 0x1000300 | ||
28 | /* configures the thermal events that may cause clock slowdown */ | ||
29 | #define NV_THERM_USE_A_INIT 0x7 | ||
30 | |||
31 | int gk20a_init_therm_support(struct gk20a *g); | ||
32 | |||
33 | #endif /* _NVHOST_THERM_GK20A_H_ */ | ||