diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-22 10:38:37 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-22 10:38:37 -0500 |
commit | fcc9d2e5a6c89d22b8b773a64fb4ad21ac318446 (patch) | |
tree | a57612d1888735a2ec7972891b68c1ac5ec8faea /drivers/video/tegra/host/gr3d | |
parent | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (diff) |
Diffstat (limited to 'drivers/video/tegra/host/gr3d')
-rw-r--r-- | drivers/video/tegra/host/gr3d/Makefile | 10 | ||||
-rw-r--r-- | drivers/video/tegra/host/gr3d/gr3d.c | 211 | ||||
-rw-r--r-- | drivers/video/tegra/host/gr3d/gr3d.h | 54 | ||||
-rw-r--r-- | drivers/video/tegra/host/gr3d/gr3d_t20.c | 395 | ||||
-rw-r--r-- | drivers/video/tegra/host/gr3d/gr3d_t20.h | 30 | ||||
-rw-r--r-- | drivers/video/tegra/host/gr3d/gr3d_t30.c | 435 | ||||
-rw-r--r-- | drivers/video/tegra/host/gr3d/gr3d_t30.h | 30 | ||||
-rw-r--r-- | drivers/video/tegra/host/gr3d/scale3d.c | 661 | ||||
-rw-r--r-- | drivers/video/tegra/host/gr3d/scale3d.h | 47 |
9 files changed, 1873 insertions, 0 deletions
diff --git a/drivers/video/tegra/host/gr3d/Makefile b/drivers/video/tegra/host/gr3d/Makefile new file mode 100644 index 00000000000..dfbd078ab42 --- /dev/null +++ b/drivers/video/tegra/host/gr3d/Makefile | |||
@@ -0,0 +1,10 @@ | |||
1 | GCOV_PROFILE := y | ||
2 | EXTRA_CFLAGS += -Idrivers/video/tegra/host | ||
3 | |||
4 | nvhost-gr3d-objs = \ | ||
5 | gr3d.o \ | ||
6 | gr3d_t20.o \ | ||
7 | gr3d_t30.o \ | ||
8 | scale3d.o | ||
9 | |||
10 | obj-$(CONFIG_TEGRA_GRHOST) += nvhost-gr3d.o | ||
diff --git a/drivers/video/tegra/host/gr3d/gr3d.c b/drivers/video/tegra/host/gr3d/gr3d.c new file mode 100644 index 00000000000..f387d54e585 --- /dev/null +++ b/drivers/video/tegra/host/gr3d/gr3d.c | |||
@@ -0,0 +1,211 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gr3d/gr3d.c | ||
3 | * | ||
4 | * Tegra Graphics Host 3D | ||
5 | * | ||
6 | * Copyright (c) 2012 NVIDIA Corporation. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include <mach/nvmap.h> | ||
22 | #include <linux/slab.h> | ||
23 | |||
24 | #include "t20/t20.h" | ||
25 | #include "host1x/host1x_channel.h" | ||
26 | #include "host1x/host1x_hardware.h" | ||
27 | #include "host1x/host1x_syncpt.h" | ||
28 | #include "nvhost_hwctx.h" | ||
29 | #include "dev.h" | ||
30 | #include "gr3d.h" | ||
31 | #include "bus_client.h" | ||
32 | |||
33 | #ifndef TEGRA_POWERGATE_3D1 | ||
34 | #define TEGRA_POWERGATE_3D1 -1 | ||
35 | #endif | ||
36 | |||
37 | void nvhost_3dctx_restore_begin(struct host1x_hwctx_handler *p, u32 *ptr) | ||
38 | { | ||
39 | /* set class to host */ | ||
40 | ptr[0] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID, | ||
41 | NV_CLASS_HOST_INCR_SYNCPT_BASE, 1); | ||
42 | /* increment sync point base */ | ||
43 | ptr[1] = nvhost_class_host_incr_syncpt_base(p->waitbase, | ||
44 | p->restore_incrs); | ||
45 | /* set class to 3D */ | ||
46 | ptr[2] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0); | ||
47 | /* program PSEQ_QUAD_ID */ | ||
48 | ptr[3] = nvhost_opcode_imm(AR3D_PSEQ_QUAD_ID, 0); | ||
49 | } | ||
50 | |||
51 | void nvhost_3dctx_restore_direct(u32 *ptr, u32 start_reg, u32 count) | ||
52 | { | ||
53 | ptr[0] = nvhost_opcode_incr(start_reg, count); | ||
54 | } | ||
55 | |||
56 | void nvhost_3dctx_restore_indirect(u32 *ptr, u32 offset_reg, u32 offset, | ||
57 | u32 data_reg, u32 count) | ||
58 | { | ||
59 | ptr[0] = nvhost_opcode_imm(offset_reg, offset); | ||
60 | ptr[1] = nvhost_opcode_nonincr(data_reg, count); | ||
61 | } | ||
62 | |||
63 | void nvhost_3dctx_restore_end(struct host1x_hwctx_handler *p, u32 *ptr) | ||
64 | { | ||
65 | /* syncpt increment to track restore gather. */ | ||
66 | ptr[0] = nvhost_opcode_imm_incr_syncpt( | ||
67 | NV_SYNCPT_OP_DONE, p->syncpt); | ||
68 | } | ||
69 | |||
70 | /*** ctx3d ***/ | ||
71 | |||
72 | struct host1x_hwctx *nvhost_3dctx_alloc_common(struct host1x_hwctx_handler *p, | ||
73 | struct nvhost_channel *ch, bool map_restore) | ||
74 | { | ||
75 | struct nvmap_client *nvmap = nvhost_get_host(ch->dev)->nvmap; | ||
76 | struct host1x_hwctx *ctx; | ||
77 | |||
78 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); | ||
79 | if (!ctx) | ||
80 | return NULL; | ||
81 | ctx->restore = nvmap_alloc(nvmap, p->restore_size * 4, 32, | ||
82 | map_restore ? NVMAP_HANDLE_WRITE_COMBINE | ||
83 | : NVMAP_HANDLE_UNCACHEABLE, 0); | ||
84 | if (IS_ERR_OR_NULL(ctx->restore)) | ||
85 | goto fail; | ||
86 | |||
87 | if (map_restore) { | ||
88 | ctx->restore_virt = nvmap_mmap(ctx->restore); | ||
89 | if (!ctx->restore_virt) | ||
90 | goto fail; | ||
91 | } else | ||
92 | ctx->restore_virt = NULL; | ||
93 | |||
94 | kref_init(&ctx->hwctx.ref); | ||
95 | ctx->hwctx.h = &p->h; | ||
96 | ctx->hwctx.channel = ch; | ||
97 | ctx->hwctx.valid = false; | ||
98 | ctx->save_incrs = p->save_incrs; | ||
99 | ctx->save_thresh = p->save_thresh; | ||
100 | ctx->save_slots = p->save_slots; | ||
101 | ctx->restore_phys = nvmap_pin(nvmap, ctx->restore); | ||
102 | if (IS_ERR_VALUE(ctx->restore_phys)) | ||
103 | goto fail; | ||
104 | |||
105 | ctx->restore_size = p->restore_size; | ||
106 | ctx->restore_incrs = p->restore_incrs; | ||
107 | return ctx; | ||
108 | |||
109 | fail: | ||
110 | if (map_restore && ctx->restore_virt) { | ||
111 | nvmap_munmap(ctx->restore, ctx->restore_virt); | ||
112 | ctx->restore_virt = NULL; | ||
113 | } | ||
114 | nvmap_free(nvmap, ctx->restore); | ||
115 | ctx->restore = NULL; | ||
116 | kfree(ctx); | ||
117 | return NULL; | ||
118 | } | ||
119 | |||
120 | void nvhost_3dctx_get(struct nvhost_hwctx *ctx) | ||
121 | { | ||
122 | kref_get(&ctx->ref); | ||
123 | } | ||
124 | |||
125 | void nvhost_3dctx_free(struct kref *ref) | ||
126 | { | ||
127 | struct nvhost_hwctx *nctx = container_of(ref, struct nvhost_hwctx, ref); | ||
128 | struct host1x_hwctx *ctx = to_host1x_hwctx(nctx); | ||
129 | struct nvmap_client *nvmap = | ||
130 | nvhost_get_host(nctx->channel->dev)->nvmap; | ||
131 | |||
132 | if (ctx->restore_virt) { | ||
133 | nvmap_munmap(ctx->restore, ctx->restore_virt); | ||
134 | ctx->restore_virt = NULL; | ||
135 | } | ||
136 | nvmap_unpin(nvmap, ctx->restore); | ||
137 | ctx->restore_phys = 0; | ||
138 | nvmap_free(nvmap, ctx->restore); | ||
139 | ctx->restore = NULL; | ||
140 | kfree(ctx); | ||
141 | } | ||
142 | |||
143 | void nvhost_3dctx_put(struct nvhost_hwctx *ctx) | ||
144 | { | ||
145 | kref_put(&ctx->ref, nvhost_3dctx_free); | ||
146 | } | ||
147 | |||
148 | int nvhost_gr3d_prepare_power_off(struct nvhost_device *dev) | ||
149 | { | ||
150 | return host1x_save_context(dev, NVSYNCPT_3D); | ||
151 | } | ||
152 | |||
153 | static int __devinit gr3d_probe(struct nvhost_device *dev) | ||
154 | { | ||
155 | return nvhost_client_device_init(dev); | ||
156 | } | ||
157 | |||
158 | static int __exit gr3d_remove(struct nvhost_device *dev) | ||
159 | { | ||
160 | /* Add clean-up */ | ||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | static int gr3d_suspend(struct nvhost_device *dev, pm_message_t state) | ||
165 | { | ||
166 | return nvhost_client_device_suspend(dev); | ||
167 | } | ||
168 | |||
169 | static int gr3d_resume(struct nvhost_device *dev) | ||
170 | { | ||
171 | dev_info(&dev->dev, "resuming\n"); | ||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | struct nvhost_device *gr3d_device; | ||
176 | |||
177 | static struct nvhost_driver gr3d_driver = { | ||
178 | .probe = gr3d_probe, | ||
179 | .remove = __exit_p(gr3d_remove), | ||
180 | #ifdef CONFIG_PM | ||
181 | .suspend = gr3d_suspend, | ||
182 | .resume = gr3d_resume, | ||
183 | #endif | ||
184 | .driver = { | ||
185 | .owner = THIS_MODULE, | ||
186 | .name = "gr3d", | ||
187 | } | ||
188 | }; | ||
189 | |||
190 | static int __init gr3d_init(void) | ||
191 | { | ||
192 | int err; | ||
193 | |||
194 | gr3d_device = nvhost_get_device("gr3d"); | ||
195 | if (!gr3d_device) | ||
196 | return -ENXIO; | ||
197 | |||
198 | err = nvhost_device_register(gr3d_device); | ||
199 | if (err) | ||
200 | return err; | ||
201 | |||
202 | return nvhost_driver_register(&gr3d_driver); | ||
203 | } | ||
204 | |||
205 | static void __exit gr3d_exit(void) | ||
206 | { | ||
207 | nvhost_driver_unregister(&gr3d_driver); | ||
208 | } | ||
209 | |||
210 | module_init(gr3d_init); | ||
211 | module_exit(gr3d_exit); | ||
diff --git a/drivers/video/tegra/host/gr3d/gr3d.h b/drivers/video/tegra/host/gr3d/gr3d.h new file mode 100644 index 00000000000..3855b237b70 --- /dev/null +++ b/drivers/video/tegra/host/gr3d/gr3d.h | |||
@@ -0,0 +1,54 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gr3d/gr3d.h | ||
3 | * | ||
4 | * Tegra Graphics Host 3D | ||
5 | * | ||
6 | * Copyright (c) 2011-2012, NVIDIA Corporation. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #ifndef __NVHOST_GR3D_GR3D_H | ||
22 | #define __NVHOST_GR3D_GR3D_H | ||
23 | |||
24 | #include "host1x/host1x_hwctx.h" | ||
25 | #include <linux/types.h> | ||
26 | |||
27 | /* Registers of 3D unit */ | ||
28 | |||
29 | #define AR3D_PSEQ_QUAD_ID 0x545 | ||
30 | #define AR3D_DW_MEMORY_OUTPUT_ADDRESS 0x904 | ||
31 | #define AR3D_DW_MEMORY_OUTPUT_DATA 0x905 | ||
32 | #define AR3D_GSHIM_WRITE_MASK 0xb00 | ||
33 | #define AR3D_GSHIM_READ_SELECT 0xb01 | ||
34 | #define AR3D_GLOBAL_MEMORY_OUTPUT_READS 0xe40 | ||
35 | |||
36 | struct nvhost_hwctx; | ||
37 | struct nvhost_channel; | ||
38 | struct kref; | ||
39 | |||
40 | /* Functions used commonly by all 3D context switch modules */ | ||
41 | void nvhost_3dctx_restore_begin(struct host1x_hwctx_handler *h, u32 *ptr); | ||
42 | void nvhost_3dctx_restore_direct(u32 *ptr, u32 start_reg, u32 count); | ||
43 | void nvhost_3dctx_restore_indirect(u32 *ptr, u32 offset_reg, | ||
44 | u32 offset, u32 data_reg, u32 count); | ||
45 | void nvhost_3dctx_restore_end(struct host1x_hwctx_handler *h, u32 *ptr); | ||
46 | struct host1x_hwctx *nvhost_3dctx_alloc_common( | ||
47 | struct host1x_hwctx_handler *p, | ||
48 | struct nvhost_channel *ch, bool map_restore); | ||
49 | void nvhost_3dctx_get(struct nvhost_hwctx *ctx); | ||
50 | void nvhost_3dctx_free(struct kref *ref); | ||
51 | void nvhost_3dctx_put(struct nvhost_hwctx *ctx); | ||
52 | int nvhost_gr3d_prepare_power_off(struct nvhost_device *dev); | ||
53 | |||
54 | #endif | ||
diff --git a/drivers/video/tegra/host/gr3d/gr3d_t20.c b/drivers/video/tegra/host/gr3d/gr3d_t20.c new file mode 100644 index 00000000000..3604142aaf2 --- /dev/null +++ b/drivers/video/tegra/host/gr3d/gr3d_t20.c | |||
@@ -0,0 +1,395 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gr3d/gr3d_t20.c | ||
3 | * | ||
4 | * Tegra Graphics Host 3D for Tegra2 | ||
5 | * | ||
6 | * Copyright (c) 2010-2012, NVIDIA Corporation. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include "nvhost_hwctx.h" | ||
22 | #include "dev.h" | ||
23 | #include "host1x/host1x_channel.h" | ||
24 | #include "host1x/host1x_hardware.h" | ||
25 | #include "host1x/host1x_syncpt.h" | ||
26 | #include "gr3d.h" | ||
27 | |||
28 | #include <linux/slab.h> | ||
29 | |||
30 | static const struct hwctx_reginfo ctxsave_regs_3d_global[] = { | ||
31 | HWCTX_REGINFO(0xe00, 4, DIRECT), | ||
32 | HWCTX_REGINFO(0xe05, 30, DIRECT), | ||
33 | HWCTX_REGINFO(0xe25, 2, DIRECT), | ||
34 | HWCTX_REGINFO(0xe28, 2, DIRECT), | ||
35 | HWCTX_REGINFO(0x001, 2, DIRECT), | ||
36 | HWCTX_REGINFO(0x00c, 10, DIRECT), | ||
37 | HWCTX_REGINFO(0x100, 34, DIRECT), | ||
38 | HWCTX_REGINFO(0x124, 2, DIRECT), | ||
39 | HWCTX_REGINFO(0x200, 5, DIRECT), | ||
40 | HWCTX_REGINFO(0x205, 1024, INDIRECT), | ||
41 | HWCTX_REGINFO(0x207, 1024, INDIRECT), | ||
42 | HWCTX_REGINFO(0x209, 1, DIRECT), | ||
43 | HWCTX_REGINFO(0x300, 64, DIRECT), | ||
44 | HWCTX_REGINFO(0x343, 25, DIRECT), | ||
45 | HWCTX_REGINFO(0x363, 2, DIRECT), | ||
46 | HWCTX_REGINFO(0x400, 16, DIRECT), | ||
47 | HWCTX_REGINFO(0x411, 1, DIRECT), | ||
48 | HWCTX_REGINFO(0x500, 4, DIRECT), | ||
49 | HWCTX_REGINFO(0x520, 32, DIRECT), | ||
50 | HWCTX_REGINFO(0x540, 64, INDIRECT), | ||
51 | HWCTX_REGINFO(0x600, 16, INDIRECT_4X), | ||
52 | HWCTX_REGINFO(0x603, 128, INDIRECT), | ||
53 | HWCTX_REGINFO(0x608, 4, DIRECT), | ||
54 | HWCTX_REGINFO(0x60e, 1, DIRECT), | ||
55 | HWCTX_REGINFO(0x700, 64, INDIRECT), | ||
56 | HWCTX_REGINFO(0x710, 50, DIRECT), | ||
57 | HWCTX_REGINFO(0x800, 16, INDIRECT_4X), | ||
58 | HWCTX_REGINFO(0x803, 512, INDIRECT), | ||
59 | HWCTX_REGINFO(0x805, 64, INDIRECT), | ||
60 | HWCTX_REGINFO(0x820, 32, DIRECT), | ||
61 | HWCTX_REGINFO(0x900, 64, INDIRECT), | ||
62 | HWCTX_REGINFO(0x902, 2, DIRECT), | ||
63 | HWCTX_REGINFO(0xa02, 10, DIRECT), | ||
64 | HWCTX_REGINFO(0xe04, 1, DIRECT), | ||
65 | HWCTX_REGINFO(0xe2a, 1, DIRECT), | ||
66 | }; | ||
67 | |||
68 | /* the same context save command sequence is used for all contexts. */ | ||
69 | #define SAVE_BEGIN_V0_SIZE 5 | ||
70 | #define SAVE_DIRECT_V0_SIZE 3 | ||
71 | #define SAVE_INDIRECT_V0_SIZE 5 | ||
72 | #define SAVE_END_V0_SIZE 5 | ||
73 | #define SAVE_INCRS 3 | ||
74 | #define SAVE_THRESH_OFFSET 1 | ||
75 | #define RESTORE_BEGIN_SIZE 4 | ||
76 | #define RESTORE_DIRECT_SIZE 1 | ||
77 | #define RESTORE_INDIRECT_SIZE 2 | ||
78 | #define RESTORE_END_SIZE 1 | ||
79 | |||
80 | struct save_info { | ||
81 | u32 *ptr; | ||
82 | unsigned int save_count; | ||
83 | unsigned int restore_count; | ||
84 | unsigned int save_incrs; | ||
85 | unsigned int restore_incrs; | ||
86 | }; | ||
87 | |||
88 | static u32 *setup_restore_regs_v0(u32 *ptr, | ||
89 | const struct hwctx_reginfo *regs, | ||
90 | unsigned int nr_regs) | ||
91 | { | ||
92 | const struct hwctx_reginfo *rend = regs + nr_regs; | ||
93 | |||
94 | for ( ; regs != rend; ++regs) { | ||
95 | u32 offset = regs->offset; | ||
96 | u32 count = regs->count; | ||
97 | u32 indoff = offset + 1; | ||
98 | switch (regs->type) { | ||
99 | case HWCTX_REGINFO_DIRECT: | ||
100 | nvhost_3dctx_restore_direct(ptr, offset, count); | ||
101 | ptr += RESTORE_DIRECT_SIZE; | ||
102 | break; | ||
103 | case HWCTX_REGINFO_INDIRECT_4X: | ||
104 | ++indoff; | ||
105 | /* fall through */ | ||
106 | case HWCTX_REGINFO_INDIRECT: | ||
107 | nvhost_3dctx_restore_indirect(ptr, | ||
108 | offset, 0, indoff, count); | ||
109 | ptr += RESTORE_INDIRECT_SIZE; | ||
110 | break; | ||
111 | } | ||
112 | ptr += count; | ||
113 | } | ||
114 | return ptr; | ||
115 | } | ||
116 | |||
117 | static void setup_restore_v0(struct host1x_hwctx_handler *h, u32 *ptr) | ||
118 | { | ||
119 | nvhost_3dctx_restore_begin(h, ptr); | ||
120 | ptr += RESTORE_BEGIN_SIZE; | ||
121 | |||
122 | ptr = setup_restore_regs_v0(ptr, | ||
123 | ctxsave_regs_3d_global, | ||
124 | ARRAY_SIZE(ctxsave_regs_3d_global)); | ||
125 | |||
126 | nvhost_3dctx_restore_end(h, ptr); | ||
127 | |||
128 | wmb(); | ||
129 | } | ||
130 | |||
131 | /*** v0 saver ***/ | ||
132 | |||
133 | static void save_push_v0(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma) | ||
134 | { | ||
135 | struct host1x_hwctx *ctx = to_host1x_hwctx(nctx); | ||
136 | struct host1x_hwctx_handler *p = host1x_hwctx_handler(ctx); | ||
137 | |||
138 | nvhost_cdma_push_gather(cdma, | ||
139 | (void *)NVHOST_CDMA_PUSH_GATHER_CTXSAVE, | ||
140 | (void *)NVHOST_CDMA_PUSH_GATHER_CTXSAVE, | ||
141 | nvhost_opcode_gather(p->save_size), | ||
142 | p->save_phys); | ||
143 | } | ||
144 | |||
145 | static void __init save_begin_v0(struct host1x_hwctx_handler *h, u32 *ptr) | ||
146 | { | ||
147 | /* 3d: when done, increment syncpt to base+1 */ | ||
148 | ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0); | ||
149 | ptr[1] = nvhost_opcode_imm_incr_syncpt(NV_SYNCPT_OP_DONE, | ||
150 | h->syncpt); /* incr 1 */ | ||
151 | /* host: wait for syncpt base+1 */ | ||
152 | ptr[2] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID, | ||
153 | NV_CLASS_HOST_WAIT_SYNCPT_BASE, 1); | ||
154 | ptr[3] = nvhost_class_host_wait_syncpt_base(h->syncpt, | ||
155 | h->waitbase, 1); | ||
156 | /* host: signal context read thread to start reading */ | ||
157 | ptr[4] = nvhost_opcode_imm_incr_syncpt(NV_SYNCPT_IMMEDIATE, | ||
158 | h->syncpt); /* incr 2 */ | ||
159 | } | ||
160 | |||
161 | static void __init save_direct_v0(u32 *ptr, u32 start_reg, u32 count) | ||
162 | { | ||
163 | ptr[0] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDOFF, 1); | ||
164 | ptr[1] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D, | ||
165 | start_reg, true); | ||
166 | ptr[2] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDDATA, count); | ||
167 | } | ||
168 | |||
169 | static void __init save_indirect_v0(u32 *ptr, u32 offset_reg, u32 offset, | ||
170 | u32 data_reg, u32 count) | ||
171 | { | ||
172 | ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, | ||
173 | offset_reg, 1); | ||
174 | ptr[1] = offset; | ||
175 | ptr[2] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID, | ||
176 | NV_CLASS_HOST_INDOFF, 1); | ||
177 | ptr[3] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D, | ||
178 | data_reg, false); | ||
179 | ptr[4] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDDATA, count); | ||
180 | } | ||
181 | |||
182 | static void __init save_end_v0(struct host1x_hwctx_handler *h, u32 *ptr) | ||
183 | { | ||
184 | /* Wait for context read service to finish (cpu incr 3) */ | ||
185 | ptr[0] = nvhost_opcode_nonincr(NV_CLASS_HOST_WAIT_SYNCPT_BASE, 1); | ||
186 | ptr[1] = nvhost_class_host_wait_syncpt_base(h->syncpt, | ||
187 | h->waitbase, h->save_incrs); | ||
188 | /* Advance syncpoint base */ | ||
189 | ptr[2] = nvhost_opcode_nonincr(NV_CLASS_HOST_INCR_SYNCPT_BASE, 1); | ||
190 | ptr[3] = nvhost_class_host_incr_syncpt_base(NVWAITBASE_3D, | ||
191 | h->save_incrs); | ||
192 | /* set class back to the unit */ | ||
193 | ptr[4] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0); | ||
194 | } | ||
195 | |||
196 | static u32 *save_regs_v0(u32 *ptr, unsigned int *pending, | ||
197 | void __iomem *chan_regs, | ||
198 | const struct hwctx_reginfo *regs, | ||
199 | unsigned int nr_regs) | ||
200 | { | ||
201 | const struct hwctx_reginfo *rend = regs + nr_regs; | ||
202 | int drain_result = 0; | ||
203 | |||
204 | for ( ; regs != rend; ++regs) { | ||
205 | u32 count = regs->count; | ||
206 | switch (regs->type) { | ||
207 | case HWCTX_REGINFO_DIRECT: | ||
208 | ptr += RESTORE_DIRECT_SIZE; | ||
209 | break; | ||
210 | case HWCTX_REGINFO_INDIRECT: | ||
211 | case HWCTX_REGINFO_INDIRECT_4X: | ||
212 | ptr += RESTORE_INDIRECT_SIZE; | ||
213 | break; | ||
214 | } | ||
215 | drain_result = host1x_drain_read_fifo(chan_regs, | ||
216 | ptr, count, pending); | ||
217 | BUG_ON(drain_result < 0); | ||
218 | ptr += count; | ||
219 | } | ||
220 | return ptr; | ||
221 | } | ||
222 | |||
223 | /*** save ***/ | ||
224 | |||
225 | static void __init setup_save_regs(struct save_info *info, | ||
226 | const struct hwctx_reginfo *regs, | ||
227 | unsigned int nr_regs) | ||
228 | { | ||
229 | const struct hwctx_reginfo *rend = regs + nr_regs; | ||
230 | u32 *ptr = info->ptr; | ||
231 | unsigned int save_count = info->save_count; | ||
232 | unsigned int restore_count = info->restore_count; | ||
233 | |||
234 | for ( ; regs != rend; ++regs) { | ||
235 | u32 offset = regs->offset; | ||
236 | u32 count = regs->count; | ||
237 | u32 indoff = offset + 1; | ||
238 | switch (regs->type) { | ||
239 | case HWCTX_REGINFO_DIRECT: | ||
240 | if (ptr) { | ||
241 | save_direct_v0(ptr, offset, count); | ||
242 | ptr += SAVE_DIRECT_V0_SIZE; | ||
243 | } | ||
244 | save_count += SAVE_DIRECT_V0_SIZE; | ||
245 | restore_count += RESTORE_DIRECT_SIZE; | ||
246 | break; | ||
247 | case HWCTX_REGINFO_INDIRECT_4X: | ||
248 | ++indoff; | ||
249 | /* fall through */ | ||
250 | case HWCTX_REGINFO_INDIRECT: | ||
251 | if (ptr) { | ||
252 | save_indirect_v0(ptr, offset, 0, | ||
253 | indoff, count); | ||
254 | ptr += SAVE_INDIRECT_V0_SIZE; | ||
255 | } | ||
256 | save_count += SAVE_INDIRECT_V0_SIZE; | ||
257 | restore_count += RESTORE_INDIRECT_SIZE; | ||
258 | break; | ||
259 | } | ||
260 | if (ptr) { | ||
261 | /* SAVE cases only: reserve room for incoming data */ | ||
262 | u32 k = 0; | ||
263 | /* | ||
264 | * Create a signature pattern for indirect data (which | ||
265 | * will be overwritten by true incoming data) for | ||
266 | * better deducing where we are in a long command | ||
267 | * sequence, when given only a FIFO snapshot for debug | ||
268 | * purposes. | ||
269 | */ | ||
270 | for (k = 0; k < count; k++) | ||
271 | *(ptr + k) = 0xd000d000 | (offset << 16) | k; | ||
272 | ptr += count; | ||
273 | } | ||
274 | save_count += count; | ||
275 | restore_count += count; | ||
276 | } | ||
277 | |||
278 | info->ptr = ptr; | ||
279 | info->save_count = save_count; | ||
280 | info->restore_count = restore_count; | ||
281 | } | ||
282 | |||
283 | static void __init setup_save(struct host1x_hwctx_handler *h, u32 *ptr) | ||
284 | { | ||
285 | struct save_info info = { | ||
286 | ptr, | ||
287 | SAVE_BEGIN_V0_SIZE, | ||
288 | RESTORE_BEGIN_SIZE, | ||
289 | SAVE_INCRS, | ||
290 | 1 | ||
291 | }; | ||
292 | |||
293 | if (info.ptr) { | ||
294 | save_begin_v0(h, info.ptr); | ||
295 | info.ptr += SAVE_BEGIN_V0_SIZE; | ||
296 | } | ||
297 | |||
298 | /* save regs */ | ||
299 | setup_save_regs(&info, | ||
300 | ctxsave_regs_3d_global, | ||
301 | ARRAY_SIZE(ctxsave_regs_3d_global)); | ||
302 | |||
303 | if (info.ptr) { | ||
304 | save_end_v0(h, info.ptr); | ||
305 | info.ptr += SAVE_END_V0_SIZE; | ||
306 | } | ||
307 | |||
308 | wmb(); | ||
309 | |||
310 | h->save_size = info.save_count + SAVE_END_V0_SIZE; | ||
311 | h->restore_size = info.restore_count + RESTORE_END_SIZE; | ||
312 | h->save_incrs = info.save_incrs; | ||
313 | h->save_thresh = h->save_incrs - SAVE_THRESH_OFFSET; | ||
314 | h->restore_incrs = info.restore_incrs; | ||
315 | } | ||
316 | |||
317 | |||
318 | |||
319 | /*** ctx3d ***/ | ||
320 | |||
321 | static struct nvhost_hwctx *ctx3d_alloc_v0(struct nvhost_hwctx_handler *h, | ||
322 | struct nvhost_channel *ch) | ||
323 | { | ||
324 | struct host1x_hwctx_handler *p = to_host1x_hwctx_handler(h); | ||
325 | struct host1x_hwctx *ctx = | ||
326 | nvhost_3dctx_alloc_common(p, ch, true); | ||
327 | if (ctx) { | ||
328 | setup_restore_v0(p, ctx->restore_virt); | ||
329 | return &ctx->hwctx; | ||
330 | } else | ||
331 | return NULL; | ||
332 | } | ||
333 | |||
334 | static void ctx3d_save_service(struct nvhost_hwctx *nctx) | ||
335 | { | ||
336 | struct host1x_hwctx *ctx = to_host1x_hwctx(nctx); | ||
337 | |||
338 | u32 *ptr = (u32 *)ctx->restore_virt + RESTORE_BEGIN_SIZE; | ||
339 | unsigned int pending = 0; | ||
340 | |||
341 | ptr = save_regs_v0(ptr, &pending, nctx->channel->aperture, | ||
342 | ctxsave_regs_3d_global, | ||
343 | ARRAY_SIZE(ctxsave_regs_3d_global)); | ||
344 | |||
345 | wmb(); | ||
346 | nvhost_syncpt_cpu_incr(&nvhost_get_host(nctx->channel->dev)->syncpt, | ||
347 | host1x_hwctx_handler(ctx)->syncpt); | ||
348 | } | ||
349 | |||
350 | struct nvhost_hwctx_handler * __init nvhost_gr3d_t20_ctxhandler_init( | ||
351 | u32 syncpt, u32 waitbase, | ||
352 | struct nvhost_channel *ch) | ||
353 | { | ||
354 | struct nvmap_client *nvmap; | ||
355 | u32 *save_ptr; | ||
356 | struct host1x_hwctx_handler *p; | ||
357 | |||
358 | p = kmalloc(sizeof(*p), GFP_KERNEL); | ||
359 | if (!p) | ||
360 | return NULL; | ||
361 | nvmap = nvhost_get_host(ch->dev)->nvmap; | ||
362 | |||
363 | p->syncpt = syncpt; | ||
364 | p->waitbase = waitbase; | ||
365 | |||
366 | setup_save(p, NULL); | ||
367 | |||
368 | p->save_buf = nvmap_alloc(nvmap, p->save_size * sizeof(u32), 32, | ||
369 | NVMAP_HANDLE_WRITE_COMBINE, 0); | ||
370 | if (IS_ERR(p->save_buf)) { | ||
371 | p->save_buf = NULL; | ||
372 | return NULL; | ||
373 | } | ||
374 | |||
375 | p->save_slots = 1; | ||
376 | |||
377 | save_ptr = nvmap_mmap(p->save_buf); | ||
378 | if (!save_ptr) { | ||
379 | nvmap_free(nvmap, p->save_buf); | ||
380 | p->save_buf = NULL; | ||
381 | return NULL; | ||
382 | } | ||
383 | |||
384 | p->save_phys = nvmap_pin(nvmap, p->save_buf); | ||
385 | |||
386 | setup_save(p, save_ptr); | ||
387 | |||
388 | p->h.alloc = ctx3d_alloc_v0; | ||
389 | p->h.save_push = save_push_v0; | ||
390 | p->h.save_service = ctx3d_save_service; | ||
391 | p->h.get = nvhost_3dctx_get; | ||
392 | p->h.put = nvhost_3dctx_put; | ||
393 | |||
394 | return &p->h; | ||
395 | } | ||
diff --git a/drivers/video/tegra/host/gr3d/gr3d_t20.h b/drivers/video/tegra/host/gr3d/gr3d_t20.h new file mode 100644 index 00000000000..5fe6d50d0c3 --- /dev/null +++ b/drivers/video/tegra/host/gr3d/gr3d_t20.h | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gr3d/gr3d_t20.h | ||
3 | * | ||
4 | * Tegra Graphics Host 3D for Tegra2 | ||
5 | * | ||
6 | * Copyright (c) 2011-2012, NVIDIA Corporation. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #ifndef __NVHOST_GR3D_GR3D_T20_H | ||
22 | #define __NVHOST_GR3D_GR3D_T20_H | ||
23 | |||
24 | struct nvhost_hwctx_handler; | ||
25 | |||
26 | struct nvhost_hwctx_handler *nvhost_gr3d_t20_ctxhandler_init( | ||
27 | u32 syncpt, u32 waitbase, | ||
28 | struct nvhost_channel *ch); | ||
29 | |||
30 | #endif | ||
diff --git a/drivers/video/tegra/host/gr3d/gr3d_t30.c b/drivers/video/tegra/host/gr3d/gr3d_t30.c new file mode 100644 index 00000000000..e7329e50e3d --- /dev/null +++ b/drivers/video/tegra/host/gr3d/gr3d_t30.c | |||
@@ -0,0 +1,435 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gr3d/gr3d_t30.c | ||
3 | * | ||
4 | * Tegra Graphics Host 3D for Tegra3 | ||
5 | * | ||
6 | * Copyright (c) 2011-2012 NVIDIA Corporation. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include "nvhost_hwctx.h" | ||
22 | #include "dev.h" | ||
23 | #include "host1x/host1x_hardware.h" | ||
24 | #include "host1x/host1x_syncpt.h" | ||
25 | #include "gr3d.h" | ||
26 | |||
27 | #include <mach/gpufuse.h> | ||
28 | #include <mach/hardware.h> | ||
29 | #include <linux/slab.h> | ||
30 | |||
31 | /* 99 > 2, which makes kernel panic if register set is incorrect */ | ||
32 | static int register_sets = 99; | ||
33 | |||
34 | static const struct hwctx_reginfo ctxsave_regs_3d_global[] = { | ||
35 | HWCTX_REGINFO(0xe00, 4, DIRECT), | ||
36 | HWCTX_REGINFO(0xe05, 30, DIRECT), | ||
37 | HWCTX_REGINFO(0xe25, 2, DIRECT), | ||
38 | HWCTX_REGINFO(0xe28, 2, DIRECT), | ||
39 | HWCTX_REGINFO(0xe30, 16, DIRECT), | ||
40 | HWCTX_REGINFO(0x001, 2, DIRECT), | ||
41 | HWCTX_REGINFO(0x00c, 10, DIRECT), | ||
42 | HWCTX_REGINFO(0x100, 34, DIRECT), | ||
43 | HWCTX_REGINFO(0x124, 2, DIRECT), | ||
44 | HWCTX_REGINFO(0x200, 5, DIRECT), | ||
45 | HWCTX_REGINFO(0x205, 1024, INDIRECT), | ||
46 | HWCTX_REGINFO(0x207, 1024, INDIRECT), | ||
47 | HWCTX_REGINFO(0x209, 1, DIRECT), | ||
48 | HWCTX_REGINFO(0x300, 64, DIRECT), | ||
49 | HWCTX_REGINFO(0x343, 25, DIRECT), | ||
50 | HWCTX_REGINFO(0x363, 2, DIRECT), | ||
51 | HWCTX_REGINFO(0x400, 16, DIRECT), | ||
52 | HWCTX_REGINFO(0x411, 1, DIRECT), | ||
53 | HWCTX_REGINFO(0x412, 1, DIRECT), | ||
54 | HWCTX_REGINFO(0x500, 4, DIRECT), | ||
55 | HWCTX_REGINFO(0x520, 32, DIRECT), | ||
56 | HWCTX_REGINFO(0x540, 64, INDIRECT), | ||
57 | HWCTX_REGINFO(0x600, 16, INDIRECT_4X), | ||
58 | HWCTX_REGINFO(0x603, 128, INDIRECT), | ||
59 | HWCTX_REGINFO(0x608, 4, DIRECT), | ||
60 | HWCTX_REGINFO(0x60e, 1, DIRECT), | ||
61 | HWCTX_REGINFO(0x700, 64, INDIRECT), | ||
62 | HWCTX_REGINFO(0x710, 50, DIRECT), | ||
63 | HWCTX_REGINFO(0x750, 16, DIRECT), | ||
64 | HWCTX_REGINFO(0x800, 16, INDIRECT_4X), | ||
65 | HWCTX_REGINFO(0x803, 512, INDIRECT), | ||
66 | HWCTX_REGINFO(0x805, 64, INDIRECT), | ||
67 | HWCTX_REGINFO(0x820, 32, DIRECT), | ||
68 | HWCTX_REGINFO(0x900, 64, INDIRECT), | ||
69 | HWCTX_REGINFO(0x902, 2, DIRECT), | ||
70 | HWCTX_REGINFO(0x90a, 1, DIRECT), | ||
71 | HWCTX_REGINFO(0xa02, 10, DIRECT), | ||
72 | HWCTX_REGINFO(0xb04, 1, DIRECT), | ||
73 | HWCTX_REGINFO(0xb06, 13, DIRECT), | ||
74 | HWCTX_REGINFO(0xe42, 2, DIRECT), /* HW bug workaround */ | ||
75 | }; | ||
76 | |||
77 | static const struct hwctx_reginfo ctxsave_regs_3d_perset[] = { | ||
78 | HWCTX_REGINFO(0xe04, 1, DIRECT), | ||
79 | HWCTX_REGINFO(0xe2a, 1, DIRECT), | ||
80 | HWCTX_REGINFO(0x413, 1, DIRECT), | ||
81 | HWCTX_REGINFO(0x90b, 1, DIRECT), | ||
82 | HWCTX_REGINFO(0xe41, 1, DIRECT), | ||
83 | }; | ||
84 | |||
85 | static unsigned int restore_set1_offset; | ||
86 | |||
87 | #define SAVE_BEGIN_V1_SIZE (1 + RESTORE_BEGIN_SIZE) | ||
88 | #define SAVE_DIRECT_V1_SIZE (4 + RESTORE_DIRECT_SIZE) | ||
89 | #define SAVE_INDIRECT_V1_SIZE (6 + RESTORE_INDIRECT_SIZE) | ||
90 | #define SAVE_END_V1_SIZE (9 + RESTORE_END_SIZE) | ||
91 | #define SAVE_INCRS 3 | ||
92 | #define SAVE_THRESH_OFFSET 0 | ||
93 | #define RESTORE_BEGIN_SIZE 4 | ||
94 | #define RESTORE_DIRECT_SIZE 1 | ||
95 | #define RESTORE_INDIRECT_SIZE 2 | ||
96 | #define RESTORE_END_SIZE 1 | ||
97 | |||
98 | struct save_info { | ||
99 | u32 *ptr; | ||
100 | unsigned int save_count; | ||
101 | unsigned int restore_count; | ||
102 | unsigned int save_incrs; | ||
103 | unsigned int restore_incrs; | ||
104 | }; | ||
105 | |||
106 | /*** v1 saver ***/ | ||
107 | |||
108 | static void save_push_v1(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma) | ||
109 | { | ||
110 | struct host1x_hwctx *ctx = to_host1x_hwctx(nctx); | ||
111 | struct host1x_hwctx_handler *p = host1x_hwctx_handler(ctx); | ||
112 | |||
113 | /* wait for 3d idle */ | ||
114 | nvhost_cdma_push(cdma, | ||
115 | nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0), | ||
116 | nvhost_opcode_imm_incr_syncpt(NV_SYNCPT_OP_DONE, | ||
117 | p->syncpt)); | ||
118 | nvhost_cdma_push(cdma, | ||
119 | nvhost_opcode_setclass(NV_HOST1X_CLASS_ID, | ||
120 | NV_CLASS_HOST_WAIT_SYNCPT_BASE, 1), | ||
121 | nvhost_class_host_wait_syncpt_base(p->syncpt, | ||
122 | p->waitbase, 1)); | ||
123 | /* back to 3d */ | ||
124 | nvhost_cdma_push(cdma, | ||
125 | nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0), | ||
126 | NVHOST_OPCODE_NOOP); | ||
127 | /* set register set 0 and 1 register read memory output addresses, | ||
128 | and send their reads to memory */ | ||
129 | if (register_sets == 2) { | ||
130 | nvhost_cdma_push(cdma, | ||
131 | nvhost_opcode_imm(AR3D_GSHIM_WRITE_MASK, 2), | ||
132 | nvhost_opcode_imm(AR3D_GLOBAL_MEMORY_OUTPUT_READS, | ||
133 | 1)); | ||
134 | nvhost_cdma_push(cdma, | ||
135 | nvhost_opcode_nonincr(0x904, 1), | ||
136 | ctx->restore_phys + restore_set1_offset * 4); | ||
137 | } | ||
138 | nvhost_cdma_push(cdma, | ||
139 | nvhost_opcode_imm(AR3D_GSHIM_WRITE_MASK, 1), | ||
140 | nvhost_opcode_imm(AR3D_GLOBAL_MEMORY_OUTPUT_READS, 1)); | ||
141 | nvhost_cdma_push(cdma, | ||
142 | nvhost_opcode_nonincr(AR3D_DW_MEMORY_OUTPUT_ADDRESS, 1), | ||
143 | ctx->restore_phys); | ||
144 | /* gather the save buffer */ | ||
145 | nvhost_cdma_push_gather(cdma, | ||
146 | (void *)NVHOST_CDMA_PUSH_GATHER_CTXSAVE, | ||
147 | (void *)NVHOST_CDMA_PUSH_GATHER_CTXSAVE, | ||
148 | nvhost_opcode_gather(p->save_size), | ||
149 | p->save_phys); | ||
150 | } | ||
151 | |||
152 | static void __init save_begin_v1(struct host1x_hwctx_handler *p, u32 *ptr) | ||
153 | { | ||
154 | ptr[0] = nvhost_opcode_nonincr(AR3D_DW_MEMORY_OUTPUT_DATA, | ||
155 | RESTORE_BEGIN_SIZE); | ||
156 | nvhost_3dctx_restore_begin(p, ptr + 1); | ||
157 | ptr += RESTORE_BEGIN_SIZE; | ||
158 | } | ||
159 | |||
160 | static void __init save_direct_v1(u32 *ptr, u32 start_reg, u32 count) | ||
161 | { | ||
162 | ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, | ||
163 | AR3D_DW_MEMORY_OUTPUT_DATA, 1); | ||
164 | nvhost_3dctx_restore_direct(ptr + 1, start_reg, count); | ||
165 | ptr += RESTORE_DIRECT_SIZE; | ||
166 | ptr[1] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID, | ||
167 | NV_CLASS_HOST_INDOFF, 1); | ||
168 | ptr[2] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D, | ||
169 | start_reg, true); | ||
170 | /* TODO could do this in the setclass if count < 6 */ | ||
171 | ptr[3] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDDATA, count); | ||
172 | } | ||
173 | |||
174 | static void __init save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset, | ||
175 | u32 data_reg, u32 count) | ||
176 | { | ||
177 | ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0); | ||
178 | ptr[1] = nvhost_opcode_nonincr(AR3D_DW_MEMORY_OUTPUT_DATA, | ||
179 | RESTORE_INDIRECT_SIZE); | ||
180 | nvhost_3dctx_restore_indirect(ptr + 2, offset_reg, offset, data_reg, | ||
181 | count); | ||
182 | ptr += RESTORE_INDIRECT_SIZE; | ||
183 | ptr[2] = nvhost_opcode_imm(offset_reg, offset); | ||
184 | ptr[3] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID, | ||
185 | NV_CLASS_HOST_INDOFF, 1); | ||
186 | ptr[4] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D, | ||
187 | data_reg, false); | ||
188 | ptr[5] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDDATA, count); | ||
189 | } | ||
190 | |||
191 | static void __init save_end_v1(struct host1x_hwctx_handler *p, u32 *ptr) | ||
192 | { | ||
193 | /* write end of restore buffer */ | ||
194 | ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, | ||
195 | AR3D_DW_MEMORY_OUTPUT_DATA, 1); | ||
196 | nvhost_3dctx_restore_end(p, ptr + 1); | ||
197 | ptr += RESTORE_END_SIZE; | ||
198 | /* reset to dual reg if necessary */ | ||
199 | ptr[1] = nvhost_opcode_imm(AR3D_GSHIM_WRITE_MASK, | ||
200 | (1 << register_sets) - 1); | ||
201 | /* op_done syncpt incr to flush FDC */ | ||
202 | ptr[2] = nvhost_opcode_imm_incr_syncpt(NV_SYNCPT_OP_DONE, p->syncpt); | ||
203 | /* host wait for that syncpt incr, and advance the wait base */ | ||
204 | ptr[3] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID, | ||
205 | NV_CLASS_HOST_WAIT_SYNCPT_BASE, | ||
206 | nvhost_mask2( | ||
207 | NV_CLASS_HOST_WAIT_SYNCPT_BASE, | ||
208 | NV_CLASS_HOST_INCR_SYNCPT_BASE)); | ||
209 | ptr[4] = nvhost_class_host_wait_syncpt_base(p->syncpt, | ||
210 | p->waitbase, p->save_incrs - 1); | ||
211 | ptr[5] = nvhost_class_host_incr_syncpt_base(p->waitbase, | ||
212 | p->save_incrs); | ||
213 | /* set class back to 3d */ | ||
214 | ptr[6] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0); | ||
215 | /* send reg reads back to host */ | ||
216 | ptr[7] = nvhost_opcode_imm(AR3D_GLOBAL_MEMORY_OUTPUT_READS, 0); | ||
217 | /* final syncpt increment to release waiters */ | ||
218 | ptr[8] = nvhost_opcode_imm(0, p->syncpt); | ||
219 | } | ||
220 | |||
221 | /*** save ***/ | ||
222 | |||
223 | |||
224 | |||
225 | static void __init setup_save_regs(struct save_info *info, | ||
226 | const struct hwctx_reginfo *regs, | ||
227 | unsigned int nr_regs) | ||
228 | { | ||
229 | const struct hwctx_reginfo *rend = regs + nr_regs; | ||
230 | u32 *ptr = info->ptr; | ||
231 | unsigned int save_count = info->save_count; | ||
232 | unsigned int restore_count = info->restore_count; | ||
233 | |||
234 | for ( ; regs != rend; ++regs) { | ||
235 | u32 offset = regs->offset; | ||
236 | u32 count = regs->count; | ||
237 | u32 indoff = offset + 1; | ||
238 | switch (regs->type) { | ||
239 | case HWCTX_REGINFO_DIRECT: | ||
240 | if (ptr) { | ||
241 | save_direct_v1(ptr, offset, count); | ||
242 | ptr += SAVE_DIRECT_V1_SIZE; | ||
243 | } | ||
244 | save_count += SAVE_DIRECT_V1_SIZE; | ||
245 | restore_count += RESTORE_DIRECT_SIZE; | ||
246 | break; | ||
247 | case HWCTX_REGINFO_INDIRECT_4X: | ||
248 | ++indoff; | ||
249 | /* fall through */ | ||
250 | case HWCTX_REGINFO_INDIRECT: | ||
251 | if (ptr) { | ||
252 | save_indirect_v1(ptr, offset, 0, | ||
253 | indoff, count); | ||
254 | ptr += SAVE_INDIRECT_V1_SIZE; | ||
255 | } | ||
256 | save_count += SAVE_INDIRECT_V1_SIZE; | ||
257 | restore_count += RESTORE_INDIRECT_SIZE; | ||
258 | break; | ||
259 | } | ||
260 | if (ptr) { | ||
261 | /* SAVE cases only: reserve room for incoming data */ | ||
262 | u32 k = 0; | ||
263 | /* | ||
264 | * Create a signature pattern for indirect data (which | ||
265 | * will be overwritten by true incoming data) for | ||
266 | * better deducing where we are in a long command | ||
267 | * sequence, when given only a FIFO snapshot for debug | ||
268 | * purposes. | ||
269 | */ | ||
270 | for (k = 0; k < count; k++) | ||
271 | *(ptr + k) = 0xd000d000 | (offset << 16) | k; | ||
272 | ptr += count; | ||
273 | } | ||
274 | save_count += count; | ||
275 | restore_count += count; | ||
276 | } | ||
277 | |||
278 | info->ptr = ptr; | ||
279 | info->save_count = save_count; | ||
280 | info->restore_count = restore_count; | ||
281 | } | ||
282 | |||
283 | static void __init switch_gpu(struct save_info *info, | ||
284 | unsigned int save_src_set, | ||
285 | u32 save_dest_sets, | ||
286 | u32 restore_dest_sets) | ||
287 | { | ||
288 | if (info->ptr) { | ||
289 | info->ptr[0] = nvhost_opcode_setclass( | ||
290 | NV_GRAPHICS_3D_CLASS_ID, | ||
291 | AR3D_DW_MEMORY_OUTPUT_DATA, 1); | ||
292 | info->ptr[1] = nvhost_opcode_imm(AR3D_GSHIM_WRITE_MASK, | ||
293 | restore_dest_sets); | ||
294 | info->ptr[2] = nvhost_opcode_imm(AR3D_GSHIM_WRITE_MASK, | ||
295 | save_dest_sets); | ||
296 | info->ptr[3] = nvhost_opcode_imm(AR3D_GSHIM_READ_SELECT, | ||
297 | save_src_set); | ||
298 | info->ptr += 4; | ||
299 | } | ||
300 | info->save_count += 4; | ||
301 | info->restore_count += 1; | ||
302 | } | ||
303 | |||
304 | static void __init setup_save(struct host1x_hwctx_handler *p, u32 *ptr) | ||
305 | { | ||
306 | struct save_info info = { | ||
307 | ptr, | ||
308 | SAVE_BEGIN_V1_SIZE, | ||
309 | RESTORE_BEGIN_SIZE, | ||
310 | SAVE_INCRS, | ||
311 | 1 | ||
312 | }; | ||
313 | int save_end_size = SAVE_END_V1_SIZE; | ||
314 | |||
315 | BUG_ON(register_sets > 2); | ||
316 | |||
317 | if (info.ptr) { | ||
318 | save_begin_v1(p, info.ptr); | ||
319 | info.ptr += SAVE_BEGIN_V1_SIZE; | ||
320 | } | ||
321 | |||
322 | /* read from set0, write cmds through set0, restore to set0 and 1 */ | ||
323 | if (register_sets == 2) | ||
324 | switch_gpu(&info, 0, 1, 3); | ||
325 | |||
326 | /* save regs that are common to both sets */ | ||
327 | setup_save_regs(&info, | ||
328 | ctxsave_regs_3d_global, | ||
329 | ARRAY_SIZE(ctxsave_regs_3d_global)); | ||
330 | |||
331 | /* read from set 0, write cmds through set0, restore to set0 */ | ||
332 | if (register_sets == 2) | ||
333 | switch_gpu(&info, 0, 1, 1); | ||
334 | |||
335 | /* save set 0 specific regs */ | ||
336 | setup_save_regs(&info, | ||
337 | ctxsave_regs_3d_perset, | ||
338 | ARRAY_SIZE(ctxsave_regs_3d_perset)); | ||
339 | |||
340 | if (register_sets == 2) { | ||
341 | /* read from set1, write cmds through set1, restore to set1 */ | ||
342 | switch_gpu(&info, 1, 2, 2); | ||
343 | /* note offset at which set 1 restore starts */ | ||
344 | restore_set1_offset = info.restore_count; | ||
345 | /* save set 1 specific regs */ | ||
346 | setup_save_regs(&info, | ||
347 | ctxsave_regs_3d_perset, | ||
348 | ARRAY_SIZE(ctxsave_regs_3d_perset)); | ||
349 | } | ||
350 | |||
351 | /* read from set0, write cmds through set1, restore to set0 and 1 */ | ||
352 | if (register_sets == 2) | ||
353 | switch_gpu(&info, 0, 2, 3); | ||
354 | |||
355 | if (info.ptr) { | ||
356 | save_end_v1(p, info.ptr); | ||
357 | info.ptr += SAVE_END_V1_SIZE; | ||
358 | } | ||
359 | |||
360 | wmb(); | ||
361 | |||
362 | p->save_size = info.save_count + save_end_size; | ||
363 | p->restore_size = info.restore_count + RESTORE_END_SIZE; | ||
364 | p->save_incrs = info.save_incrs; | ||
365 | p->save_thresh = p->save_incrs - SAVE_THRESH_OFFSET; | ||
366 | p->restore_incrs = info.restore_incrs; | ||
367 | } | ||
368 | |||
369 | |||
370 | /*** ctx3d ***/ | ||
371 | |||
372 | static struct nvhost_hwctx *ctx3d_alloc_v1(struct nvhost_hwctx_handler *h, | ||
373 | struct nvhost_channel *ch) | ||
374 | { | ||
375 | struct host1x_hwctx_handler *p = to_host1x_hwctx_handler(h); | ||
376 | struct host1x_hwctx *ctx = nvhost_3dctx_alloc_common(p, ch, false); | ||
377 | |||
378 | if (ctx) | ||
379 | return &ctx->hwctx; | ||
380 | else | ||
381 | return NULL; | ||
382 | } | ||
383 | |||
384 | struct nvhost_hwctx_handler *__init nvhost_gr3d_t30_ctxhandler_init( | ||
385 | u32 syncpt, u32 waitbase, | ||
386 | struct nvhost_channel *ch) | ||
387 | { | ||
388 | struct nvmap_client *nvmap; | ||
389 | u32 *save_ptr; | ||
390 | struct host1x_hwctx_handler *p; | ||
391 | |||
392 | p = kmalloc(sizeof(*p), GFP_KERNEL); | ||
393 | if (!p) | ||
394 | return NULL; | ||
395 | |||
396 | nvmap = nvhost_get_host(ch->dev)->nvmap; | ||
397 | |||
398 | register_sets = tegra_gpu_register_sets(); | ||
399 | BUG_ON(register_sets == 0 || register_sets > 2); | ||
400 | |||
401 | p->syncpt = syncpt; | ||
402 | p->waitbase = waitbase; | ||
403 | |||
404 | setup_save(p, NULL); | ||
405 | |||
406 | p->save_buf = nvmap_alloc(nvmap, p->save_size * 4, 32, | ||
407 | NVMAP_HANDLE_WRITE_COMBINE, 0); | ||
408 | if (IS_ERR(p->save_buf)) { | ||
409 | p->save_buf = NULL; | ||
410 | return NULL; | ||
411 | } | ||
412 | |||
413 | p->save_slots = 6; | ||
414 | if (register_sets == 2) | ||
415 | p->save_slots += 2; | ||
416 | |||
417 | save_ptr = nvmap_mmap(p->save_buf); | ||
418 | if (!save_ptr) { | ||
419 | nvmap_free(nvmap, p->save_buf); | ||
420 | p->save_buf = NULL; | ||
421 | return NULL; | ||
422 | } | ||
423 | |||
424 | p->save_phys = nvmap_pin(nvmap, p->save_buf); | ||
425 | |||
426 | setup_save(p, save_ptr); | ||
427 | |||
428 | p->h.alloc = ctx3d_alloc_v1; | ||
429 | p->h.save_push = save_push_v1; | ||
430 | p->h.save_service = NULL; | ||
431 | p->h.get = nvhost_3dctx_get; | ||
432 | p->h.put = nvhost_3dctx_put; | ||
433 | |||
434 | return &p->h; | ||
435 | } | ||
diff --git a/drivers/video/tegra/host/gr3d/gr3d_t30.h b/drivers/video/tegra/host/gr3d/gr3d_t30.h new file mode 100644 index 00000000000..d1b787e14b4 --- /dev/null +++ b/drivers/video/tegra/host/gr3d/gr3d_t30.h | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gr3d/gr3d_t30.h | ||
3 | * | ||
4 | * Tegra Graphics Host 3D for Tegra3 | ||
5 | * | ||
6 | * Copyright (c) 2011-2012, NVIDIA Corporation. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #ifndef __NVHOST_GR3D_GR3D_T30_H | ||
22 | #define __NVHOST_GR3D_GR3D_T30_H | ||
23 | |||
24 | struct nvhost_hwctx_handler; | ||
25 | |||
26 | struct nvhost_hwctx_handler *nvhost_gr3d_t30_ctxhandler_init( | ||
27 | u32 syncpt, u32 waitbase, | ||
28 | struct nvhost_channel *ch); | ||
29 | |||
30 | #endif | ||
diff --git a/drivers/video/tegra/host/gr3d/scale3d.c b/drivers/video/tegra/host/gr3d/scale3d.c new file mode 100644 index 00000000000..8a267a127ea --- /dev/null +++ b/drivers/video/tegra/host/gr3d/scale3d.c | |||
@@ -0,0 +1,661 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/t20/scale3d.c | ||
3 | * | ||
4 | * Tegra Graphics Host 3D clock scaling | ||
5 | * | ||
6 | * Copyright (c) 2010-2012, NVIDIA Corporation. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | /* | ||
22 | * 3d clock scaling | ||
23 | * | ||
24 | * module3d_notify_busy() is called upon submit, module3d_notify_idle() is | ||
25 | * called when all outstanding submits are completed. Idle times are measured | ||
26 | * over a fixed time period (scale3d.p_period). If the 3d module idle time | ||
27 | * percentage goes over the limit (set in scale3d.p_idle_max), 3d clocks are | ||
28 | * scaled down. If the percentage goes under the minimum limit (set in | ||
29 | * scale3d.p_idle_min), 3d clocks are scaled up. An additional test is made | ||
30 | * over the time frame given in scale3d.p_fast_response for clocking up | ||
31 | * quickly in response to load peaks. | ||
32 | * | ||
33 | * 3d.emc clock is scaled proportionately to 3d clock, with a quadratic- | ||
34 | * bezier-like factor added to pull 3d.emc rate a bit lower. | ||
35 | */ | ||
36 | |||
37 | #include <linux/debugfs.h> | ||
38 | #include <linux/types.h> | ||
39 | #include <linux/clk.h> | ||
40 | #include <mach/clk.h> | ||
41 | #include <mach/hardware.h> | ||
42 | #include "scale3d.h" | ||
43 | #include "dev.h" | ||
44 | |||
45 | static int scale3d_is_enabled(void); | ||
46 | static void scale3d_enable(int enable); | ||
47 | |||
48 | #define POW2(x) ((x) * (x)) | ||
49 | |||
50 | /* | ||
51 | * debugfs parameters to control 3d clock scaling test | ||
52 | * | ||
53 | * period - time period for clock rate evaluation | ||
54 | * fast_response - time period for evaluation of 'busy' spikes | ||
55 | * idle_min - if less than [idle_min] percent idle over [fast_response] | ||
56 | * microseconds, clock up. | ||
57 | * idle_max - if over [idle_max] percent idle over [period] microseconds, | ||
58 | * clock down. | ||
59 | * max_scale - limits rate changes to no less than (100 - max_scale)% or | ||
60 | * (100 + 2 * max_scale)% of current clock rate | ||
61 | * verbosity - set above 5 for debug printouts | ||
62 | */ | ||
63 | |||
64 | struct scale3d_info_rec { | ||
65 | struct mutex lock; /* lock for timestamps etc */ | ||
66 | int enable; | ||
67 | int init; | ||
68 | ktime_t idle_frame; | ||
69 | ktime_t fast_frame; | ||
70 | ktime_t last_idle; | ||
71 | ktime_t last_short_term_idle; | ||
72 | int is_idle; | ||
73 | ktime_t last_tweak; | ||
74 | ktime_t last_down; | ||
75 | int fast_up_count; | ||
76 | int slow_down_count; | ||
77 | int is_scaled; | ||
78 | int fast_responses; | ||
79 | unsigned long idle_total; | ||
80 | unsigned long idle_short_term_total; | ||
81 | unsigned long max_rate_3d; | ||
82 | long emc_slope; | ||
83 | long emc_offset; | ||
84 | long emc_dip_slope; | ||
85 | long emc_dip_offset; | ||
86 | long emc_xmid; | ||
87 | unsigned long min_rate_3d; | ||
88 | struct work_struct work; | ||
89 | struct delayed_work idle_timer; | ||
90 | unsigned int scale; | ||
91 | unsigned int p_period; | ||
92 | unsigned int period; | ||
93 | unsigned int p_idle_min; | ||
94 | unsigned int idle_min; | ||
95 | unsigned int p_idle_max; | ||
96 | unsigned int idle_max; | ||
97 | unsigned int p_fast_response; | ||
98 | unsigned int fast_response; | ||
99 | unsigned int p_adjust; | ||
100 | unsigned int p_scale_emc; | ||
101 | unsigned int p_emc_dip; | ||
102 | unsigned int p_verbosity; | ||
103 | struct clk *clk_3d; | ||
104 | struct clk *clk_3d2; | ||
105 | struct clk *clk_3d_emc; | ||
106 | }; | ||
107 | |||
108 | static struct scale3d_info_rec scale3d; | ||
109 | |||
110 | static void scale3d_clocks(unsigned long percent) | ||
111 | { | ||
112 | unsigned long hz, curr; | ||
113 | |||
114 | if (!tegra_is_clk_enabled(scale3d.clk_3d)) | ||
115 | return; | ||
116 | |||
117 | if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) | ||
118 | if (!tegra_is_clk_enabled(scale3d.clk_3d2)) | ||
119 | return; | ||
120 | |||
121 | curr = clk_get_rate(scale3d.clk_3d); | ||
122 | hz = percent * (curr / 100); | ||
123 | |||
124 | if (!(hz >= scale3d.max_rate_3d && curr == scale3d.max_rate_3d)) { | ||
125 | if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) | ||
126 | clk_set_rate(scale3d.clk_3d2, 0); | ||
127 | clk_set_rate(scale3d.clk_3d, hz); | ||
128 | |||
129 | if (scale3d.p_scale_emc) { | ||
130 | long after = (long) clk_get_rate(scale3d.clk_3d); | ||
131 | hz = after * scale3d.emc_slope + scale3d.emc_offset; | ||
132 | if (scale3d.p_emc_dip) | ||
133 | hz -= | ||
134 | (scale3d.emc_dip_slope * | ||
135 | POW2(after / 1000 - scale3d.emc_xmid) + | ||
136 | scale3d.emc_dip_offset); | ||
137 | clk_set_rate(scale3d.clk_3d_emc, hz); | ||
138 | } | ||
139 | } | ||
140 | } | ||
141 | |||
142 | static void scale3d_clocks_handler(struct work_struct *work) | ||
143 | { | ||
144 | unsigned int scale; | ||
145 | |||
146 | mutex_lock(&scale3d.lock); | ||
147 | scale = scale3d.scale; | ||
148 | mutex_unlock(&scale3d.lock); | ||
149 | |||
150 | if (scale != 0) | ||
151 | scale3d_clocks(scale); | ||
152 | } | ||
153 | |||
154 | void nvhost_scale3d_suspend(struct nvhost_device *dev) | ||
155 | { | ||
156 | if (!scale3d.enable) | ||
157 | return; | ||
158 | |||
159 | cancel_work_sync(&scale3d.work); | ||
160 | cancel_delayed_work(&scale3d.idle_timer); | ||
161 | } | ||
162 | |||
163 | /* set 3d clocks to max */ | ||
164 | static void reset_3d_clocks(void) | ||
165 | { | ||
166 | if (clk_get_rate(scale3d.clk_3d) != scale3d.max_rate_3d) { | ||
167 | clk_set_rate(scale3d.clk_3d, scale3d.max_rate_3d); | ||
168 | if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) | ||
169 | clk_set_rate(scale3d.clk_3d2, scale3d.max_rate_3d); | ||
170 | if (scale3d.p_scale_emc) | ||
171 | clk_set_rate(scale3d.clk_3d_emc, | ||
172 | clk_round_rate(scale3d.clk_3d_emc, UINT_MAX)); | ||
173 | } | ||
174 | } | ||
175 | |||
176 | static int scale3d_is_enabled(void) | ||
177 | { | ||
178 | int enable; | ||
179 | |||
180 | if (!scale3d.enable) | ||
181 | return 0; | ||
182 | |||
183 | mutex_lock(&scale3d.lock); | ||
184 | enable = scale3d.enable; | ||
185 | mutex_unlock(&scale3d.lock); | ||
186 | |||
187 | return enable; | ||
188 | } | ||
189 | |||
190 | static void scale3d_enable(int enable) | ||
191 | { | ||
192 | int disable = 0; | ||
193 | |||
194 | mutex_lock(&scale3d.lock); | ||
195 | |||
196 | if (enable) { | ||
197 | if (scale3d.max_rate_3d != scale3d.min_rate_3d) | ||
198 | scale3d.enable = 1; | ||
199 | } else { | ||
200 | scale3d.enable = 0; | ||
201 | disable = 1; | ||
202 | } | ||
203 | |||
204 | mutex_unlock(&scale3d.lock); | ||
205 | |||
206 | if (disable) | ||
207 | reset_3d_clocks(); | ||
208 | } | ||
209 | |||
210 | static void reset_scaling_counters(ktime_t time) | ||
211 | { | ||
212 | scale3d.idle_total = 0; | ||
213 | scale3d.idle_short_term_total = 0; | ||
214 | scale3d.last_idle = time; | ||
215 | scale3d.last_short_term_idle = time; | ||
216 | scale3d.idle_frame = time; | ||
217 | } | ||
218 | |||
219 | /* scaling_adjust - use scale up / scale down hint counts to adjust scaling | ||
220 | * parameters. | ||
221 | * | ||
222 | * hint_ratio is 100 x the ratio of scale up to scale down hints. Three cases | ||
223 | * are distinguished: | ||
224 | * | ||
225 | * hint_ratio < HINT_RATIO_MIN - set parameters to maximize scaling effect | ||
226 | * hint_ratio > HINT_RATIO_MAX - set parameters to minimize scaling effect | ||
227 | * hint_ratio between limits - scale parameters linearly | ||
228 | * | ||
229 | * the parameters adjusted are | ||
230 | * | ||
231 | * * fast_response time | ||
232 | * * period - time for scaling down estimate | ||
233 | * * idle_min percentage | ||
234 | * * idle_max percentage | ||
235 | */ | ||
236 | #define SCALING_ADJUST_PERIOD 1000000 | ||
237 | #define HINT_RATIO_MAX 400 | ||
238 | #define HINT_RATIO_MIN 100 | ||
239 | #define HINT_RATIO_MID ((HINT_RATIO_MAX + HINT_RATIO_MIN) / 2) | ||
240 | #define HINT_RATIO_DIFF (HINT_RATIO_MAX - HINT_RATIO_MIN) | ||
241 | |||
242 | static void scaling_adjust(ktime_t time) | ||
243 | { | ||
244 | long hint_ratio; | ||
245 | long fast_response_adjustment; | ||
246 | long period_adjustment; | ||
247 | int idle_min_adjustment; | ||
248 | int idle_max_adjustment; | ||
249 | unsigned long dt; | ||
250 | |||
251 | dt = (unsigned long) ktime_us_delta(time, scale3d.last_tweak); | ||
252 | if (dt < SCALING_ADJUST_PERIOD) | ||
253 | return; | ||
254 | |||
255 | hint_ratio = (100 * (scale3d.fast_up_count + 1)) / | ||
256 | (scale3d.slow_down_count + 1); | ||
257 | |||
258 | if (hint_ratio > HINT_RATIO_MAX) { | ||
259 | fast_response_adjustment = -((int) scale3d.p_fast_response) / 4; | ||
260 | period_adjustment = scale3d.p_period / 2; | ||
261 | idle_min_adjustment = scale3d.p_idle_min; | ||
262 | idle_max_adjustment = scale3d.p_idle_max; | ||
263 | } else if (hint_ratio < HINT_RATIO_MIN) { | ||
264 | fast_response_adjustment = scale3d.p_fast_response / 2; | ||
265 | period_adjustment = -((int) scale3d.p_period) / 4; | ||
266 | idle_min_adjustment = -((int) scale3d.p_idle_min) / 2; | ||
267 | idle_max_adjustment = -((int) scale3d.p_idle_max) / 2; | ||
268 | } else { | ||
269 | int diff; | ||
270 | int factor; | ||
271 | |||
272 | diff = HINT_RATIO_MID - hint_ratio; | ||
273 | if (diff < 0) | ||
274 | factor = -diff * 2; | ||
275 | else { | ||
276 | factor = -diff; | ||
277 | diff *= 2; | ||
278 | } | ||
279 | |||
280 | fast_response_adjustment = diff * | ||
281 | (scale3d.p_fast_response / (HINT_RATIO_DIFF * 2)); | ||
282 | period_adjustment = | ||
283 | diff * (scale3d.p_period / HINT_RATIO_DIFF); | ||
284 | idle_min_adjustment = | ||
285 | (factor * (int) scale3d.p_idle_min) / HINT_RATIO_DIFF; | ||
286 | idle_max_adjustment = | ||
287 | (factor * (int) scale3d.p_idle_max) / HINT_RATIO_DIFF; | ||
288 | } | ||
289 | |||
290 | scale3d.fast_response = | ||
291 | scale3d.p_fast_response + fast_response_adjustment; | ||
292 | scale3d.period = scale3d.p_period + period_adjustment; | ||
293 | scale3d.idle_min = scale3d.p_idle_min + idle_min_adjustment; | ||
294 | scale3d.idle_max = scale3d.p_idle_max + idle_max_adjustment; | ||
295 | |||
296 | if (scale3d.p_verbosity >= 10) | ||
297 | pr_info("scale3d stats: + %d - %d (/ %d) f %u p %u min %u max %u\n", | ||
298 | scale3d.fast_up_count, scale3d.slow_down_count, | ||
299 | scale3d.fast_responses, scale3d.fast_response, | ||
300 | scale3d.period, scale3d.idle_min, scale3d.idle_max); | ||
301 | |||
302 | scale3d.fast_up_count = 0; | ||
303 | scale3d.slow_down_count = 0; | ||
304 | scale3d.fast_responses = 0; | ||
305 | scale3d.last_down = time; | ||
306 | scale3d.last_tweak = time; | ||
307 | } | ||
308 | |||
309 | #undef SCALING_ADJUST_PERIOD | ||
310 | #undef HINT_RATIO_MAX | ||
311 | #undef HINT_RATIO_MIN | ||
312 | #undef HINT_RATIO_MID | ||
313 | #undef HINT_RATIO_DIFF | ||
314 | |||
315 | static void scaling_state_check(ktime_t time) | ||
316 | { | ||
317 | unsigned long dt; | ||
318 | |||
319 | /* adjustment: set scale parameters (fast_response, period) +/- 25% | ||
320 | * based on ratio of scale up to scale down hints | ||
321 | */ | ||
322 | if (scale3d.p_adjust) | ||
323 | scaling_adjust(time); | ||
324 | else { | ||
325 | scale3d.fast_response = scale3d.p_fast_response; | ||
326 | scale3d.period = scale3d.p_period; | ||
327 | scale3d.idle_min = scale3d.p_idle_min; | ||
328 | scale3d.idle_max = scale3d.p_idle_max; | ||
329 | } | ||
330 | |||
331 | /* check for load peaks */ | ||
332 | dt = (unsigned long) ktime_us_delta(time, scale3d.fast_frame); | ||
333 | if (dt > scale3d.fast_response) { | ||
334 | unsigned long idleness = | ||
335 | (scale3d.idle_short_term_total * 100) / dt; | ||
336 | scale3d.fast_responses++; | ||
337 | scale3d.fast_frame = time; | ||
338 | /* if too busy, scale up */ | ||
339 | if (idleness < scale3d.idle_min) { | ||
340 | scale3d.is_scaled = 0; | ||
341 | scale3d.fast_up_count++; | ||
342 | if (scale3d.p_verbosity >= 5) | ||
343 | pr_info("scale3d: %ld%% busy\n", | ||
344 | 100 - idleness); | ||
345 | |||
346 | reset_3d_clocks(); | ||
347 | reset_scaling_counters(time); | ||
348 | return; | ||
349 | } | ||
350 | scale3d.idle_short_term_total = 0; | ||
351 | scale3d.last_short_term_idle = time; | ||
352 | } | ||
353 | |||
354 | dt = (unsigned long) ktime_us_delta(time, scale3d.idle_frame); | ||
355 | if (dt > scale3d.period) { | ||
356 | unsigned long idleness = (scale3d.idle_total * 100) / dt; | ||
357 | |||
358 | if (scale3d.p_verbosity >= 5) | ||
359 | pr_info("scale3d: idle %lu, ~%lu%%\n", | ||
360 | scale3d.idle_total, idleness); | ||
361 | |||
362 | if (idleness > scale3d.idle_max) { | ||
363 | if (!scale3d.is_scaled) { | ||
364 | scale3d.is_scaled = 1; | ||
365 | scale3d.last_down = time; | ||
366 | } | ||
367 | scale3d.slow_down_count++; | ||
368 | /* if idle time is high, clock down */ | ||
369 | scale3d.scale = 100 - (idleness - scale3d.idle_min); | ||
370 | schedule_work(&scale3d.work); | ||
371 | } | ||
372 | |||
373 | reset_scaling_counters(time); | ||
374 | } | ||
375 | } | ||
376 | |||
377 | void nvhost_scale3d_notify_idle(struct nvhost_device *dev) | ||
378 | { | ||
379 | ktime_t t; | ||
380 | unsigned long dt; | ||
381 | |||
382 | if (!scale3d.enable) | ||
383 | return; | ||
384 | |||
385 | mutex_lock(&scale3d.lock); | ||
386 | |||
387 | t = ktime_get(); | ||
388 | |||
389 | if (scale3d.is_idle) { | ||
390 | dt = ktime_us_delta(t, scale3d.last_idle); | ||
391 | scale3d.idle_total += dt; | ||
392 | dt = ktime_us_delta(t, scale3d.last_short_term_idle); | ||
393 | scale3d.idle_short_term_total += dt; | ||
394 | } else | ||
395 | scale3d.is_idle = 1; | ||
396 | |||
397 | scale3d.last_idle = t; | ||
398 | scale3d.last_short_term_idle = t; | ||
399 | |||
400 | scaling_state_check(scale3d.last_idle); | ||
401 | |||
402 | /* delay idle_max % of 2 * fast_response time (given in microseconds) */ | ||
403 | schedule_delayed_work(&scale3d.idle_timer, | ||
404 | msecs_to_jiffies((scale3d.idle_max * scale3d.fast_response) | ||
405 | / 50000)); | ||
406 | |||
407 | mutex_unlock(&scale3d.lock); | ||
408 | } | ||
409 | |||
410 | void nvhost_scale3d_notify_busy(struct nvhost_device *dev) | ||
411 | { | ||
412 | unsigned long idle; | ||
413 | unsigned long short_term_idle; | ||
414 | ktime_t t; | ||
415 | |||
416 | if (!scale3d.enable) | ||
417 | return; | ||
418 | |||
419 | mutex_lock(&scale3d.lock); | ||
420 | |||
421 | cancel_delayed_work(&scale3d.idle_timer); | ||
422 | |||
423 | t = ktime_get(); | ||
424 | |||
425 | if (scale3d.is_idle) { | ||
426 | idle = (unsigned long) | ||
427 | ktime_us_delta(t, scale3d.last_idle); | ||
428 | scale3d.idle_total += idle; | ||
429 | short_term_idle = | ||
430 | ktime_us_delta(t, scale3d.last_short_term_idle); | ||
431 | scale3d.idle_short_term_total += short_term_idle; | ||
432 | scale3d.is_idle = 0; | ||
433 | } | ||
434 | |||
435 | scaling_state_check(t); | ||
436 | |||
437 | mutex_unlock(&scale3d.lock); | ||
438 | } | ||
439 | |||
440 | static void scale3d_idle_handler(struct work_struct *work) | ||
441 | { | ||
442 | int notify_idle = 0; | ||
443 | |||
444 | if (!scale3d.enable) | ||
445 | return; | ||
446 | |||
447 | mutex_lock(&scale3d.lock); | ||
448 | |||
449 | if (scale3d.is_idle && tegra_is_clk_enabled(scale3d.clk_3d)) { | ||
450 | unsigned long curr = clk_get_rate(scale3d.clk_3d); | ||
451 | if (curr > scale3d.min_rate_3d) | ||
452 | notify_idle = 1; | ||
453 | } | ||
454 | |||
455 | mutex_unlock(&scale3d.lock); | ||
456 | |||
457 | if (notify_idle) | ||
458 | nvhost_scale3d_notify_idle(NULL); | ||
459 | } | ||
460 | |||
461 | void nvhost_scale3d_reset() | ||
462 | { | ||
463 | ktime_t t; | ||
464 | |||
465 | if (!scale3d.enable) | ||
466 | return; | ||
467 | |||
468 | t = ktime_get(); | ||
469 | mutex_lock(&scale3d.lock); | ||
470 | reset_scaling_counters(t); | ||
471 | mutex_unlock(&scale3d.lock); | ||
472 | } | ||
473 | |||
474 | /* | ||
475 | * debugfs parameters to control 3d clock scaling | ||
476 | */ | ||
477 | |||
478 | void nvhost_scale3d_debug_init(struct dentry *de) | ||
479 | { | ||
480 | struct dentry *d, *f; | ||
481 | |||
482 | d = debugfs_create_dir("scaling", de); | ||
483 | if (!d) { | ||
484 | pr_err("scale3d: can\'t create debugfs directory\n"); | ||
485 | return; | ||
486 | } | ||
487 | |||
488 | #define CREATE_SCALE3D_FILE(fname) \ | ||
489 | do {\ | ||
490 | f = debugfs_create_u32(#fname, S_IRUGO | S_IWUSR, d,\ | ||
491 | &scale3d.p_##fname);\ | ||
492 | if (NULL == f) {\ | ||
493 | pr_err("scale3d: can\'t create file " #fname "\n");\ | ||
494 | return;\ | ||
495 | } \ | ||
496 | } while (0) | ||
497 | |||
498 | CREATE_SCALE3D_FILE(fast_response); | ||
499 | CREATE_SCALE3D_FILE(idle_min); | ||
500 | CREATE_SCALE3D_FILE(idle_max); | ||
501 | CREATE_SCALE3D_FILE(period); | ||
502 | CREATE_SCALE3D_FILE(adjust); | ||
503 | CREATE_SCALE3D_FILE(scale_emc); | ||
504 | CREATE_SCALE3D_FILE(emc_dip); | ||
505 | CREATE_SCALE3D_FILE(verbosity); | ||
506 | #undef CREATE_SCALE3D_FILE | ||
507 | } | ||
508 | |||
509 | static ssize_t enable_3d_scaling_show(struct device *device, | ||
510 | struct device_attribute *attr, char *buf) | ||
511 | { | ||
512 | ssize_t res; | ||
513 | |||
514 | res = snprintf(buf, PAGE_SIZE, "%d\n", scale3d_is_enabled()); | ||
515 | |||
516 | return res; | ||
517 | } | ||
518 | |||
519 | static ssize_t enable_3d_scaling_store(struct device *dev, | ||
520 | struct device_attribute *attr, const char *buf, size_t count) | ||
521 | { | ||
522 | unsigned long val = 0; | ||
523 | |||
524 | if (strict_strtoul(buf, 10, &val) < 0) | ||
525 | return -EINVAL; | ||
526 | |||
527 | scale3d_enable(val); | ||
528 | |||
529 | return count; | ||
530 | } | ||
531 | |||
532 | static DEVICE_ATTR(enable_3d_scaling, S_IRUGO | S_IWUSR, | ||
533 | enable_3d_scaling_show, enable_3d_scaling_store); | ||
534 | |||
535 | void nvhost_scale3d_init(struct nvhost_device *d) | ||
536 | { | ||
537 | if (!scale3d.init) { | ||
538 | int error; | ||
539 | unsigned long max_emc, min_emc; | ||
540 | long correction; | ||
541 | mutex_init(&scale3d.lock); | ||
542 | |||
543 | scale3d.clk_3d = d->clk[0]; | ||
544 | if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) { | ||
545 | scale3d.clk_3d2 = d->clk[1]; | ||
546 | scale3d.clk_3d_emc = d->clk[2]; | ||
547 | } else | ||
548 | scale3d.clk_3d_emc = d->clk[1]; | ||
549 | |||
550 | scale3d.max_rate_3d = clk_round_rate(scale3d.clk_3d, UINT_MAX); | ||
551 | scale3d.min_rate_3d = clk_round_rate(scale3d.clk_3d, 0); | ||
552 | |||
553 | if (scale3d.max_rate_3d == scale3d.min_rate_3d) { | ||
554 | pr_warn("scale3d: 3d max rate = min rate (%lu), " | ||
555 | "disabling\n", scale3d.max_rate_3d); | ||
556 | scale3d.enable = 0; | ||
557 | return; | ||
558 | } | ||
559 | |||
560 | /* emc scaling: | ||
561 | * | ||
562 | * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od) | ||
563 | * | ||
564 | * Remc - 3d.emc rate | ||
565 | * R3d - 3d.cbus rate | ||
566 | * Rm - 3d.cbus 'middle' rate = (max + min)/2 | ||
567 | * S - emc_slope | ||
568 | * O - emc_offset | ||
569 | * Sd - emc_dip_slope | ||
570 | * Od - emc_dip_offset | ||
571 | * | ||
572 | * this superposes a quadratic dip centered around the middle 3d | ||
573 | * frequency over a linear correlation of 3d.emc to 3d clock | ||
574 | * rates. | ||
575 | * | ||
576 | * S, O are chosen so that the maximum 3d rate produces the | ||
577 | * maximum 3d.emc rate exactly, and the minimum 3d rate produces | ||
578 | * at least the minimum 3d.emc rate. | ||
579 | * | ||
580 | * Sd and Od are chosen to produce the largest dip that will | ||
581 | * keep 3d.emc frequencies monotonously decreasing with 3d | ||
582 | * frequencies. To achieve this, the first derivative of Remc | ||
583 | * with respect to R3d should be zero for the minimal 3d rate: | ||
584 | * | ||
585 | * R'emc = S - 2 * Sd * (R3d - Rm) | ||
586 | * R'emc(R3d-min) = 0 | ||
587 | * S = 2 * Sd * (R3d-min - Rm) | ||
588 | * = 2 * Sd * (R3d-min - R3d-max) / 2 | ||
589 | * Sd = S / (R3d-min - R3d-max) | ||
590 | * | ||
591 | * +---------------------------------------------------+ | ||
592 | * | Sd = -(emc-max - emc-min) / (R3d-min - R3d-max)^2 | | ||
593 | * +---------------------------------------------------+ | ||
594 | * | ||
595 | * dip = Sd * (R3d - Rm)^2 + Od | ||
596 | * | ||
597 | * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives | ||
598 | * | ||
599 | * Sd * (R3d-min - Rm)^2 + Od = 0 | ||
600 | * Od = -Sd * ((R3d-min - R3d-max) / 2)^2 | ||
601 | * = -Sd * ((R3d-min - R3d-max)^2) / 4 | ||
602 | * | ||
603 | * +------------------------------+ | ||
604 | * | Od = (emc-max - emc-min) / 4 | | ||
605 | * +------------------------------+ | ||
606 | */ | ||
607 | |||
608 | max_emc = clk_round_rate(scale3d.clk_3d_emc, UINT_MAX); | ||
609 | min_emc = clk_round_rate(scale3d.clk_3d_emc, 0); | ||
610 | |||
611 | scale3d.emc_slope = (max_emc - min_emc) / | ||
612 | (scale3d.max_rate_3d - scale3d.min_rate_3d); | ||
613 | scale3d.emc_offset = max_emc - | ||
614 | scale3d.emc_slope * scale3d.max_rate_3d; | ||
615 | /* guarantee max 3d rate maps to max emc rate */ | ||
616 | scale3d.emc_offset += max_emc - | ||
617 | (scale3d.emc_slope * scale3d.max_rate_3d + | ||
618 | scale3d.emc_offset); | ||
619 | |||
620 | scale3d.emc_dip_offset = (max_emc - min_emc) / 4; | ||
621 | scale3d.emc_dip_slope = | ||
622 | -4 * (scale3d.emc_dip_offset / | ||
623 | (POW2(scale3d.max_rate_3d - scale3d.min_rate_3d))); | ||
624 | scale3d.emc_xmid = | ||
625 | (scale3d.max_rate_3d + scale3d.min_rate_3d) / 2; | ||
626 | correction = | ||
627 | scale3d.emc_dip_offset + | ||
628 | scale3d.emc_dip_slope * | ||
629 | POW2(scale3d.max_rate_3d - scale3d.emc_xmid); | ||
630 | scale3d.emc_dip_offset -= correction; | ||
631 | |||
632 | INIT_WORK(&scale3d.work, scale3d_clocks_handler); | ||
633 | INIT_DELAYED_WORK(&scale3d.idle_timer, scale3d_idle_handler); | ||
634 | |||
635 | /* set scaling parameter defaults */ | ||
636 | scale3d.enable = 1; | ||
637 | scale3d.period = scale3d.p_period = 100000; | ||
638 | scale3d.idle_min = scale3d.p_idle_min = 10; | ||
639 | scale3d.idle_max = scale3d.p_idle_max = 15; | ||
640 | scale3d.fast_response = scale3d.p_fast_response = 7000; | ||
641 | scale3d.p_scale_emc = 1; | ||
642 | scale3d.p_emc_dip = 1; | ||
643 | scale3d.p_verbosity = 0; | ||
644 | scale3d.p_adjust = 1; | ||
645 | |||
646 | error = device_create_file(&d->dev, | ||
647 | &dev_attr_enable_3d_scaling); | ||
648 | if (error) | ||
649 | dev_err(&d->dev, "failed to create sysfs attributes"); | ||
650 | |||
651 | scale3d.init = 1; | ||
652 | } | ||
653 | |||
654 | nvhost_scale3d_reset(); | ||
655 | } | ||
656 | |||
657 | void nvhost_scale3d_deinit(struct nvhost_device *dev) | ||
658 | { | ||
659 | device_remove_file(&dev->dev, &dev_attr_enable_3d_scaling); | ||
660 | scale3d.init = 0; | ||
661 | } | ||
diff --git a/drivers/video/tegra/host/gr3d/scale3d.h b/drivers/video/tegra/host/gr3d/scale3d.h new file mode 100644 index 00000000000..f8aae1d591a --- /dev/null +++ b/drivers/video/tegra/host/gr3d/scale3d.h | |||
@@ -0,0 +1,47 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/t30/scale3d.h | ||
3 | * | ||
4 | * Tegra Graphics Host 3D Clock Scaling | ||
5 | * | ||
6 | * Copyright (c) 2010-2012, NVIDIA Corporation. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #ifndef NVHOST_T30_SCALE3D_H | ||
22 | #define NVHOST_T30_SCALE3D_H | ||
23 | |||
24 | struct nvhost_device; | ||
25 | struct device; | ||
26 | struct dentry; | ||
27 | |||
28 | /* Initialization and de-initialization for module */ | ||
29 | void nvhost_scale3d_init(struct nvhost_device *); | ||
30 | void nvhost_scale3d_deinit(struct nvhost_device *); | ||
31 | |||
32 | /* Suspend is called when powering down module */ | ||
33 | void nvhost_scale3d_suspend(struct nvhost_device *); | ||
34 | |||
35 | /* reset 3d module load counters, called on resume */ | ||
36 | void nvhost_scale3d_reset(void); | ||
37 | |||
38 | /* | ||
39 | * call when performing submit to notify scaling mechanism that 3d module is | ||
40 | * in use | ||
41 | */ | ||
42 | void nvhost_scale3d_notify_busy(struct nvhost_device *); | ||
43 | void nvhost_scale3d_notify_idle(struct nvhost_device *); | ||
44 | |||
45 | void nvhost_scale3d_debug_init(struct dentry *de); | ||
46 | |||
47 | #endif | ||