diff options
Diffstat (limited to 'drivers/gpu/nvgpu/clk')
-rw-r--r-- | drivers/gpu/nvgpu/clk/clk.c | 61 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/clk/clk.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/clk/clk_arb.c | 765 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/clk/clk_arb.h | 26 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/clk/clk_mclk.c | 19 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/clk/clk_mclk.h | 10 |
6 files changed, 698 insertions, 184 deletions
diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c index ef0834f4..bec5fad1 100644 --- a/drivers/gpu/nvgpu/clk/clk.c +++ b/drivers/gpu/nvgpu/clk/clk.c | |||
@@ -255,7 +255,7 @@ static int get_regime_id(struct gk20a *g, u32 domain, u32 *regimeid) | |||
255 | return -EINVAL; | 255 | return -EINVAL; |
256 | } | 256 | } |
257 | 257 | ||
258 | int clk_program_fllclks(struct gk20a *g, struct change_fll_clk *fllclk) | 258 | int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk) |
259 | { | 259 | { |
260 | int status = -EINVAL; | 260 | int status = -EINVAL; |
261 | struct clk_domain *pdomain; | 261 | struct clk_domain *pdomain; |
@@ -277,8 +277,6 @@ int clk_program_fllclks(struct gk20a *g, struct change_fll_clk *fllclk) | |||
277 | if (fllclk->clkmhz == 0) | 277 | if (fllclk->clkmhz == 0) |
278 | return -EINVAL; | 278 | return -EINVAL; |
279 | 279 | ||
280 | mutex_lock(&pclk->changeclkmutex); | ||
281 | |||
282 | setfllclk.voltuv = fllclk->voltuv; | 280 | setfllclk.voltuv = fllclk->voltuv; |
283 | setfllclk.gpc2clkmhz = fllclk->clkmhz; | 281 | setfllclk.gpc2clkmhz = fllclk->clkmhz; |
284 | 282 | ||
@@ -376,63 +374,6 @@ int clk_program_fllclks(struct gk20a *g, struct change_fll_clk *fllclk) | |||
376 | if (status) | 374 | if (status) |
377 | goto done; | 375 | goto done; |
378 | done: | 376 | done: |
379 | mutex_unlock(&pclk->changeclkmutex); | ||
380 | return status; | ||
381 | } | ||
382 | |||
383 | int clk_set_boot_fll_clk(struct gk20a *g) | ||
384 | { | ||
385 | int status; | ||
386 | struct change_fll_clk bootfllclk; | ||
387 | u16 gpc2clk_clkmhz = BOOT_GPC2CLK_MHZ; | ||
388 | u32 gpc2clk_voltuv = 0; | ||
389 | u32 gpc2clk_voltuv_sram = 0; | ||
390 | u16 mclk_clkmhz = BOOT_MCLK_MHZ; | ||
391 | u32 mclk_voltuv = 0; | ||
392 | u32 mclk_voltuv_sram = 0; | ||
393 | u32 voltuv = 0; | ||
394 | u32 voltuv_sram = 0; | ||
395 | |||
396 | mutex_init(&g->clk_pmu.changeclkmutex); | ||
397 | status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, | ||
398 | &gpc2clk_clkmhz, &gpc2clk_voltuv, CTRL_VOLT_DOMAIN_LOGIC); | ||
399 | if (status) | ||
400 | return status; | ||
401 | status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, | ||
402 | &gpc2clk_clkmhz, &gpc2clk_voltuv_sram, CTRL_VOLT_DOMAIN_SRAM); | ||
403 | if (status) | ||
404 | return status; | ||
405 | status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, | ||
406 | &mclk_clkmhz, &mclk_voltuv, CTRL_VOLT_DOMAIN_LOGIC); | ||
407 | if (status) | ||
408 | return status; | ||
409 | status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, | ||
410 | &mclk_clkmhz, &mclk_voltuv_sram, CTRL_VOLT_DOMAIN_SRAM); | ||
411 | if (status) | ||
412 | return status; | ||
413 | |||
414 | voltuv = ((gpc2clk_voltuv) > (mclk_voltuv)) ? (gpc2clk_voltuv) | ||
415 | : (mclk_voltuv); | ||
416 | |||
417 | voltuv_sram = ((gpc2clk_voltuv_sram) > (mclk_voltuv_sram)) ? | ||
418 | (gpc2clk_voltuv_sram) : (mclk_voltuv_sram); | ||
419 | |||
420 | status = volt_set_voltage(g, voltuv, voltuv_sram); | ||
421 | if (status) | ||
422 | gk20a_err(dev_from_gk20a(g), | ||
423 | "attempt to set boot voltage failed %d %d", | ||
424 | voltuv, voltuv_sram); | ||
425 | |||
426 | bootfllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; | ||
427 | bootfllclk.clkmhz = gpc2clk_clkmhz; | ||
428 | bootfllclk.voltuv = voltuv; | ||
429 | status = clk_program_fllclks(g, &bootfllclk); | ||
430 | if (status) | ||
431 | gk20a_err(dev_from_gk20a(g), "attempt to set boot gpc2clk failed"); | ||
432 | status = g->clk_pmu.clk_mclk.change(g, DEFAULT_BOOT_MCLK_SPEED); | ||
433 | if (status) | ||
434 | gk20a_err(dev_from_gk20a(g), "attempt to set boot mclk failed"); | ||
435 | |||
436 | return status; | 377 | return status; |
437 | } | 378 | } |
438 | 379 | ||
diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h index a0b88dcb..d0e82173 100644 --- a/drivers/gpu/nvgpu/clk/clk.h +++ b/drivers/gpu/nvgpu/clk/clk.h | |||
@@ -119,6 +119,5 @@ u32 clk_domain_get_f_points( | |||
119 | u32 *fpointscount, | 119 | u32 *fpointscount, |
120 | u16 *freqpointsinmhz | 120 | u16 *freqpointsinmhz |
121 | ); | 121 | ); |
122 | int clk_set_boot_fll_clk(struct gk20a *g); | ||
123 | int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk); | 122 | int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk); |
124 | #endif | 123 | #endif |
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c index 98b7cb5f..f868100b 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.c +++ b/drivers/gpu/nvgpu/clk/clk_arb.c | |||
@@ -18,9 +18,17 @@ | |||
18 | #include <linux/anon_inodes.h> | 18 | #include <linux/anon_inodes.h> |
19 | #include <linux/nvgpu.h> | 19 | #include <linux/nvgpu.h> |
20 | #include <linux/bitops.h> | 20 | #include <linux/bitops.h> |
21 | #include <linux/spinlock.h> | ||
21 | 22 | ||
22 | #include "clk/clk_arb.h" | 23 | #include "clk/clk_arb.h" |
23 | 24 | ||
25 | |||
26 | #define MAX_F_POINTS 127 | ||
27 | |||
28 | #ifdef CONFIG_DEBUG_FS | ||
29 | static int nvgpu_clk_arb_debugfs_init(struct gk20a *g); | ||
30 | #endif | ||
31 | |||
24 | static int nvgpu_clk_arb_release_event_dev(struct inode *inode, | 32 | static int nvgpu_clk_arb_release_event_dev(struct inode *inode, |
25 | struct file *filp); | 33 | struct file *filp); |
26 | static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, | 34 | static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, |
@@ -28,21 +36,57 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, | |||
28 | static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait); | 36 | static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait); |
29 | 37 | ||
30 | static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work); | 38 | static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work); |
39 | static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work); | ||
40 | static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *); | ||
41 | |||
42 | struct nvgpu_clk_vf_point { | ||
43 | u16 mhz; | ||
44 | u32 uvolt; | ||
45 | u32 uvolt_sram; | ||
46 | }; | ||
31 | 47 | ||
32 | struct nvgpu_clk_arb { | 48 | struct nvgpu_clk_arb { |
33 | struct mutex req_lock; | 49 | spinlock_t sessions_lock; |
34 | struct mutex users_lock; | 50 | spinlock_t users_lock; |
51 | spinlock_t req_lock; | ||
52 | |||
35 | struct list_head users; | 53 | struct list_head users; |
54 | struct list_head sessions; | ||
36 | struct list_head requests; | 55 | struct list_head requests; |
37 | 56 | ||
38 | u64 gpc2clk_current_hz; | 57 | struct gk20a *g; |
39 | u64 gpc2clk_target_hz; | 58 | spinlock_t data_lock; |
40 | u64 gpc2clk_default_hz; | 59 | spinlock_t vf_lock; |
41 | u64 mclk_current_hz; | 60 | |
42 | u64 mclk_target_hz; | 61 | u16 gpc2clk_actual_mhz; |
43 | u64 mclk_default_hz; | 62 | u16 gpc2clk_default_mhz; |
44 | atomic_t usercount; | 63 | |
64 | u16 mclk_actual_mhz; | ||
65 | u16 mclk_default_mhz; | ||
66 | u32 voltuv_actual; | ||
67 | |||
45 | struct work_struct update_fn_work; | 68 | struct work_struct update_fn_work; |
69 | struct work_struct vftable_fn_work; | ||
70 | wait_queue_head_t vftable_wq; | ||
71 | |||
72 | u16 *mclk_f_points; | ||
73 | bool vftable_set; | ||
74 | |||
75 | struct nvgpu_clk_vf_point *mclk_vf_points; | ||
76 | u32 mclk_f_numpoints; | ||
77 | u16 *gpc2clk_f_points; | ||
78 | u32 gpc2clk_f_numpoints; | ||
79 | struct nvgpu_clk_vf_point *gpc2clk_vf_points; | ||
80 | |||
81 | #ifdef CONFIG_DEBUG_FS | ||
82 | struct mutex debug_lock; | ||
83 | s64 switch_max; | ||
84 | s64 switch_min; | ||
85 | u64 switch_num; | ||
86 | s64 switch_avg; | ||
87 | s64 switch_std; | ||
88 | bool debugfs_set; | ||
89 | #endif | ||
46 | }; | 90 | }; |
47 | 91 | ||
48 | 92 | ||
@@ -51,15 +95,20 @@ struct nvgpu_clk_dev { | |||
51 | struct list_head link; | 95 | struct list_head link; |
52 | wait_queue_head_t readout_wq; | 96 | wait_queue_head_t readout_wq; |
53 | atomic_t poll_mask; | 97 | atomic_t poll_mask; |
98 | u16 gpc2clk_target_mhz; | ||
99 | u16 mclk_target_mhz; | ||
54 | }; | 100 | }; |
55 | 101 | ||
56 | struct nvgpu_clk_session { | 102 | struct nvgpu_clk_session { |
57 | bool zombie; | 103 | bool zombie; |
58 | struct gk20a *g; | 104 | struct gk20a *g; |
59 | struct kref refcount; | 105 | struct kref refcount; |
106 | struct list_head link; | ||
107 | struct list_head targets; | ||
60 | 108 | ||
61 | u64 gpc2clk_target_hz; | 109 | spinlock_t target_lock; |
62 | u64 mclk_target_hz; | 110 | u16 gpc2clk_target_mhz; |
111 | u16 mclk_target_mhz; | ||
63 | }; | 112 | }; |
64 | 113 | ||
65 | static const struct file_operations completion_dev_ops = { | 114 | static const struct file_operations completion_dev_ops = { |
@@ -77,7 +126,7 @@ static const struct file_operations event_dev_ops = { | |||
77 | int nvgpu_clk_arb_init_arbiter(struct gk20a *g) | 126 | int nvgpu_clk_arb_init_arbiter(struct gk20a *g) |
78 | { | 127 | { |
79 | struct nvgpu_clk_arb *arb; | 128 | struct nvgpu_clk_arb *arb; |
80 | u64 default_hz; | 129 | u16 default_mhz; |
81 | int err; | 130 | int err; |
82 | 131 | ||
83 | gk20a_dbg_fn(""); | 132 | gk20a_dbg_fn(""); |
@@ -86,39 +135,104 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g) | |||
86 | return 0; | 135 | return 0; |
87 | 136 | ||
88 | arb = kzalloc(sizeof(struct nvgpu_clk_arb), GFP_KERNEL); | 137 | arb = kzalloc(sizeof(struct nvgpu_clk_arb), GFP_KERNEL); |
89 | if (!arb) | 138 | if (!arb) { |
90 | return -ENOMEM; | 139 | err = -ENOMEM; |
140 | goto init_fail; | ||
141 | } | ||
142 | |||
143 | arb->gpc2clk_f_numpoints = MAX_F_POINTS; | ||
144 | arb->mclk_f_numpoints = MAX_F_POINTS; | ||
145 | |||
146 | arb->gpc2clk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL); | ||
147 | if (!arb->gpc2clk_f_points) { | ||
148 | err = -ENOMEM; | ||
149 | goto init_fail; | ||
150 | } | ||
151 | |||
152 | arb->mclk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL); | ||
153 | if (!arb->mclk_f_points) { | ||
154 | err = -ENOMEM; | ||
155 | goto init_fail; | ||
156 | } | ||
157 | |||
158 | arb->gpc2clk_vf_points = kcalloc(MAX_F_POINTS, | ||
159 | sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL); | ||
160 | if (!arb->gpc2clk_vf_points) { | ||
161 | err = -ENOMEM; | ||
162 | goto init_fail; | ||
163 | } | ||
164 | |||
165 | arb->mclk_vf_points = kcalloc(MAX_F_POINTS, | ||
166 | sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL); | ||
167 | if (!arb->mclk_vf_points) { | ||
168 | err = -ENOMEM; | ||
169 | goto init_fail; | ||
170 | } | ||
91 | 171 | ||
92 | g->clk_arb = arb; | 172 | g->clk_arb = arb; |
173 | arb->g = g; | ||
93 | 174 | ||
94 | mutex_init(&arb->req_lock); | 175 | spin_lock_init(&arb->sessions_lock); |
95 | mutex_init(&arb->users_lock); | 176 | spin_lock_init(&arb->users_lock); |
177 | spin_lock_init(&arb->req_lock); | ||
178 | spin_lock_init(&arb->data_lock); | ||
179 | spin_lock_init(&arb->vf_lock); | ||
96 | 180 | ||
97 | err = g->ops.clk_arb.get_arbiter_clk_default(g, | 181 | err = g->ops.clk_arb.get_arbiter_clk_default(g, |
98 | NVGPU_GPU_CLK_DOMAIN_MCLK, &default_hz); | 182 | NVGPU_GPU_CLK_DOMAIN_MCLK, &default_mhz); |
99 | if (err) | 183 | if (err) { |
100 | return -EINVAL; | 184 | err = -EINVAL; |
185 | goto init_fail; | ||
186 | } | ||
101 | 187 | ||
102 | arb->mclk_target_hz = default_hz; | 188 | arb->mclk_default_mhz = default_mhz; |
103 | arb->mclk_current_hz = default_hz; | ||
104 | arb->mclk_default_hz = default_hz; | ||
105 | 189 | ||
106 | err = g->ops.clk_arb.get_arbiter_clk_default(g, | 190 | err = g->ops.clk_arb.get_arbiter_clk_default(g, |
107 | NVGPU_GPU_CLK_DOMAIN_GPC2CLK, &default_hz); | 191 | NVGPU_GPU_CLK_DOMAIN_GPC2CLK, &default_mhz); |
108 | if (err) | 192 | if (err) { |
109 | return -EINVAL; | 193 | err = -EINVAL; |
110 | 194 | goto init_fail; | |
111 | arb->gpc2clk_target_hz = default_hz; | 195 | } |
112 | arb->gpc2clk_current_hz = default_hz; | ||
113 | arb->gpc2clk_default_hz = default_hz; | ||
114 | 196 | ||
115 | atomic_set(&arb->usercount, 0); | 197 | arb->gpc2clk_default_mhz = default_mhz; |
116 | 198 | ||
117 | INIT_LIST_HEAD(&arb->users); | 199 | INIT_LIST_HEAD(&arb->users); |
200 | INIT_LIST_HEAD(&arb->sessions); | ||
118 | INIT_LIST_HEAD(&arb->requests); | 201 | INIT_LIST_HEAD(&arb->requests); |
202 | |||
203 | init_waitqueue_head(&arb->vftable_wq); | ||
204 | |||
205 | INIT_WORK(&arb->vftable_fn_work, nvgpu_clk_arb_run_vftable_cb); | ||
206 | |||
119 | INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb); | 207 | INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb); |
120 | 208 | ||
209 | #ifdef CONFIG_DEBUG_FS | ||
210 | mutex_init(&arb->debug_lock); | ||
211 | if (!arb->debugfs_set) { | ||
212 | if (nvgpu_clk_arb_debugfs_init(g)) | ||
213 | arb->debugfs_set = true; | ||
214 | } | ||
215 | #endif | ||
216 | err = nvgpu_clk_arb_update_vftable(arb); | ||
217 | if (err < 0) | ||
218 | goto init_fail; | ||
219 | |||
220 | /* Schedule first run */ | ||
221 | schedule_work(&arb->update_fn_work); | ||
222 | |||
121 | return 0; | 223 | return 0; |
224 | |||
225 | init_fail: | ||
226 | |||
227 | kfree(arb->gpc2clk_f_points); | ||
228 | kfree(arb->gpc2clk_vf_points); | ||
229 | |||
230 | kfree(arb->mclk_f_points); | ||
231 | kfree(arb->mclk_vf_points); | ||
232 | |||
233 | kfree(arb); | ||
234 | |||
235 | return err; | ||
122 | } | 236 | } |
123 | 237 | ||
124 | void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g) | 238 | void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g) |
@@ -170,6 +284,7 @@ static int nvgpu_clk_arb_install_fd(struct gk20a *g, | |||
170 | fail: | 284 | fail: |
171 | kfree(dev); | 285 | kfree(dev); |
172 | put_unused_fd(fd); | 286 | put_unused_fd(fd); |
287 | |||
173 | return err; | 288 | return err; |
174 | } | 289 | } |
175 | 290 | ||
@@ -190,12 +305,16 @@ int nvgpu_clk_arb_init_session(struct gk20a *g, | |||
190 | session->g = g; | 305 | session->g = g; |
191 | 306 | ||
192 | kref_init(&session->refcount); | 307 | kref_init(&session->refcount); |
193 | 308 | spin_lock_init(&session->target_lock); | |
194 | atomic_inc(&arb->usercount); | ||
195 | 309 | ||
196 | session->zombie = false; | 310 | session->zombie = false; |
197 | session->mclk_target_hz = arb->mclk_default_hz; | 311 | session->mclk_target_mhz = arb->mclk_default_mhz; |
198 | session->gpc2clk_target_hz = arb->gpc2clk_default_hz; | 312 | session->gpc2clk_target_mhz = arb->gpc2clk_default_mhz; |
313 | INIT_LIST_HEAD(&session->targets); | ||
314 | |||
315 | spin_lock(&arb->sessions_lock); | ||
316 | list_add_tail(&session->link, &arb->sessions); | ||
317 | spin_unlock(&arb->sessions_lock); | ||
199 | 318 | ||
200 | *_session = session; | 319 | *_session = session; |
201 | 320 | ||
@@ -206,8 +325,15 @@ void nvgpu_clk_arb_free_session(struct kref *refcount) | |||
206 | { | 325 | { |
207 | struct nvgpu_clk_session *session = container_of(refcount, | 326 | struct nvgpu_clk_session *session = container_of(refcount, |
208 | struct nvgpu_clk_session, refcount); | 327 | struct nvgpu_clk_session, refcount); |
328 | struct nvgpu_clk_arb *arb = session->g->clk_arb; | ||
209 | 329 | ||
330 | gk20a_dbg_fn(""); | ||
331 | |||
332 | spin_lock(&arb->sessions_lock); | ||
333 | list_del(&session->link); | ||
334 | spin_unlock(&arb->sessions_lock); | ||
210 | kfree(session); | 335 | kfree(session); |
336 | ; | ||
211 | } | 337 | } |
212 | 338 | ||
213 | void nvgpu_clk_arb_release_session(struct gk20a *g, | 339 | void nvgpu_clk_arb_release_session(struct gk20a *g, |
@@ -215,12 +341,12 @@ void nvgpu_clk_arb_release_session(struct gk20a *g, | |||
215 | { | 341 | { |
216 | struct nvgpu_clk_arb *arb = g->clk_arb; | 342 | struct nvgpu_clk_arb *arb = g->clk_arb; |
217 | 343 | ||
344 | gk20a_dbg_fn(""); | ||
345 | |||
218 | session->zombie = true; | 346 | session->zombie = true; |
219 | kref_put(&session->refcount, nvgpu_clk_arb_free_session); | 347 | kref_put(&session->refcount, nvgpu_clk_arb_free_session); |
220 | 348 | ||
221 | /* schedule arbiter if no more user */ | 349 | schedule_work(&arb->update_fn_work); |
222 | if (!atomic_dec_and_test(&arb->usercount)) | ||
223 | schedule_work(&arb->update_fn_work); | ||
224 | } | 350 | } |
225 | 351 | ||
226 | int nvgpu_clk_arb_install_event_fd(struct gk20a *g, | 352 | int nvgpu_clk_arb_install_event_fd(struct gk20a *g, |
@@ -230,19 +356,155 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g, | |||
230 | struct nvgpu_clk_dev *dev; | 356 | struct nvgpu_clk_dev *dev; |
231 | int fd; | 357 | int fd; |
232 | 358 | ||
359 | gk20a_dbg_fn(""); | ||
360 | |||
233 | fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev); | 361 | fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev); |
234 | if (fd < 0) | 362 | if (fd < 0) |
235 | return fd; | 363 | return fd; |
236 | 364 | ||
237 | mutex_lock(&arb->users_lock); | 365 | spin_lock(&arb->users_lock); |
238 | list_add_tail(&dev->link, &arb->users); | 366 | list_add_tail(&dev->link, &arb->users); |
239 | mutex_unlock(&arb->users_lock); | 367 | spin_unlock(&arb->users_lock); |
240 | 368 | ||
241 | *event_fd = fd; | 369 | *event_fd = fd; |
242 | 370 | ||
243 | return 0; | 371 | return 0; |
244 | } | 372 | } |
245 | 373 | ||
374 | int nvgpu_clk_arb_install_request_fd(struct gk20a *g, | ||
375 | struct nvgpu_clk_session *session, int *request_fd) | ||
376 | { | ||
377 | struct nvgpu_clk_dev *dev; | ||
378 | int fd; | ||
379 | |||
380 | gk20a_dbg_fn(""); | ||
381 | |||
382 | fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev); | ||
383 | if (fd < 0) | ||
384 | return fd; | ||
385 | |||
386 | *request_fd = fd; | ||
387 | |||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *arb) | ||
392 | { | ||
393 | struct gk20a *g = arb->g; | ||
394 | |||
395 | int i; | ||
396 | int status = 0; | ||
397 | u32 gpc2clk_voltuv = 0, mclk_voltuv = 0; | ||
398 | u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0; | ||
399 | |||
400 | /* the flag must be visible in all threads */ | ||
401 | mb(); | ||
402 | ACCESS_ONCE(arb->vftable_set) = false; | ||
403 | |||
404 | spin_lock(&arb->vf_lock); | ||
405 | |||
406 | if (!clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK, | ||
407 | &arb->gpc2clk_f_numpoints, arb->gpc2clk_f_points) < 0) { | ||
408 | gk20a_err(dev_from_gk20a(g), | ||
409 | "failed to fetch GPC2CLK frequency points"); | ||
410 | goto exit_vftable; | ||
411 | } | ||
412 | if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_MCLK, | ||
413 | &arb->mclk_f_numpoints, arb->mclk_f_points) < 0) { | ||
414 | gk20a_err(dev_from_gk20a(g), | ||
415 | "failed to fetch MCLK frequency points"); | ||
416 | goto exit_vftable; | ||
417 | } | ||
418 | |||
419 | |||
420 | memset(arb->mclk_vf_points, 0, | ||
421 | arb->mclk_f_numpoints*sizeof(struct nvgpu_clk_vf_point)); | ||
422 | memset(arb->gpc2clk_vf_points, 0, | ||
423 | arb->gpc2clk_f_numpoints*sizeof(struct nvgpu_clk_vf_point)); | ||
424 | |||
425 | for (i = 0 ; i < arb->mclk_f_numpoints; i++) { | ||
426 | arb->mclk_vf_points[i].mhz = arb->mclk_f_points[i]; | ||
427 | mclk_voltuv = mclk_voltuv_sram = 0; | ||
428 | |||
429 | status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, | ||
430 | &arb->mclk_vf_points[i].mhz, &mclk_voltuv, | ||
431 | CTRL_VOLT_DOMAIN_LOGIC); | ||
432 | if (status < 0) { | ||
433 | gk20a_err(dev_from_gk20a(g), | ||
434 | "failed to get MCLK LOGIC voltage"); | ||
435 | goto exit_vftable; | ||
436 | } | ||
437 | status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, | ||
438 | &arb->mclk_vf_points[i].mhz, &mclk_voltuv_sram, | ||
439 | CTRL_VOLT_DOMAIN_SRAM); | ||
440 | if (status < 0) { | ||
441 | gk20a_err(dev_from_gk20a(g), | ||
442 | "failed to get MCLK SRAM voltage"); | ||
443 | goto exit_vftable; | ||
444 | } | ||
445 | |||
446 | arb->mclk_vf_points[i].uvolt = mclk_voltuv; | ||
447 | arb->mclk_vf_points[i].uvolt_sram = mclk_voltuv_sram; | ||
448 | } | ||
449 | |||
450 | for (i = 0 ; i < arb->gpc2clk_f_numpoints; i++) { | ||
451 | arb->gpc2clk_vf_points[i].mhz = arb->gpc2clk_f_points[i]; | ||
452 | gpc2clk_voltuv = gpc2clk_voltuv_sram = 0; | ||
453 | |||
454 | status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, | ||
455 | &arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv, | ||
456 | CTRL_VOLT_DOMAIN_LOGIC); | ||
457 | if (status < 0) { | ||
458 | gk20a_err(dev_from_gk20a(g), | ||
459 | "failed to get GPC2CLK LOGIC voltage"); | ||
460 | goto exit_vftable; | ||
461 | } | ||
462 | status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, | ||
463 | &arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv_sram, | ||
464 | CTRL_VOLT_DOMAIN_SRAM); | ||
465 | if (status < 0) { | ||
466 | gk20a_err(dev_from_gk20a(g), | ||
467 | "failed to get GPC2CLK SRAM voltage"); | ||
468 | goto exit_vftable; | ||
469 | } | ||
470 | |||
471 | arb->gpc2clk_vf_points[i].uvolt = gpc2clk_voltuv; | ||
472 | arb->gpc2clk_vf_points[i].uvolt_sram = gpc2clk_voltuv_sram; | ||
473 | |||
474 | } | ||
475 | |||
476 | /* make flag visible when all data has resolved in the tables */ | ||
477 | wmb(); | ||
478 | ACCESS_ONCE(arb->vftable_set) = true; | ||
479 | |||
480 | wake_up(&arb->vftable_wq); | ||
481 | exit_vftable: | ||
482 | |||
483 | spin_unlock(&arb->vf_lock); | ||
484 | |||
485 | return status; | ||
486 | } | ||
487 | |||
488 | void nvgpu_clk_arb_schedule_vftable_update(struct gk20a *g) | ||
489 | { | ||
490 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
491 | |||
492 | ACCESS_ONCE(arb->vftable_set) = false; | ||
493 | /* Disable the flag in case arbiter gets scheduled first */ | ||
494 | mb(); | ||
495 | |||
496 | schedule_work(&arb->vftable_fn_work); | ||
497 | schedule_work(&arb->update_fn_work); | ||
498 | } | ||
499 | |||
500 | static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work) | ||
501 | { | ||
502 | struct nvgpu_clk_arb *arb = | ||
503 | container_of(work, struct nvgpu_clk_arb, update_fn_work); | ||
504 | |||
505 | nvgpu_clk_arb_update_vftable(arb); | ||
506 | } | ||
507 | |||
246 | static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | 508 | static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) |
247 | { | 509 | { |
248 | struct nvgpu_clk_arb *arb = | 510 | struct nvgpu_clk_arb *arb = |
@@ -250,67 +512,270 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
250 | struct nvgpu_clk_session *session; | 512 | struct nvgpu_clk_session *session; |
251 | struct nvgpu_clk_dev *dev; | 513 | struct nvgpu_clk_dev *dev; |
252 | struct nvgpu_clk_dev *tmp; | 514 | struct nvgpu_clk_dev *tmp; |
515 | struct gk20a *g = arb->g; | ||
516 | |||
517 | struct change_fll_clk fllclk; | ||
518 | u32 gpc2clk_voltuv = 0, mclk_voltuv = 0; | ||
519 | u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0; | ||
520 | |||
521 | u32 voltuv, voltuv_sram; | ||
522 | |||
523 | int status; | ||
524 | |||
525 | /* Temporary variables for checking target frequency */ | ||
526 | u16 gpc2clk_target, mclk_target; | ||
253 | 527 | ||
254 | mutex_lock(&arb->req_lock); | 528 | /* iteration index */ |
529 | u32 index; | ||
255 | 530 | ||
256 | arb->mclk_target_hz = arb->mclk_default_hz; | 531 | #ifdef CONFIG_DEBUG_FS |
257 | arb->gpc2clk_target_hz = arb->gpc2clk_default_hz; | 532 | u64 t0, t1; |
533 | #endif | ||
258 | 534 | ||
259 | list_for_each_entry(dev, &arb->requests, link) { | 535 | gk20a_dbg_fn(""); |
260 | session = dev->session; | 536 | |
537 | #ifdef CONFIG_DEBUG_FS | ||
538 | g->ops.read_ptimer(g, &t0); | ||
539 | #endif | ||
540 | |||
541 | /* Only one arbiter should be running */ | ||
542 | gpc2clk_target = 0; | ||
543 | mclk_target = 0; | ||
544 | |||
545 | spin_lock(&arb->sessions_lock); | ||
546 | list_for_each_entry(session, &arb->sessions, link) { | ||
261 | if (!session->zombie) { | 547 | if (!session->zombie) { |
262 | /* TODO: arbiter policy. For now last request wins */ | 548 | spin_lock(&arb->req_lock); |
549 | spin_lock(&session->target_lock); | ||
550 | |||
551 | mclk_target = mclk_target > session->mclk_target_mhz ? | ||
552 | mclk_target : session->mclk_target_mhz; | ||
553 | |||
554 | gpc2clk_target = | ||
555 | gpc2clk_target > session->gpc2clk_target_mhz ? | ||
556 | gpc2clk_target : session->gpc2clk_target_mhz; | ||
557 | /* Move processed requests to notification list*/ | ||
558 | list_for_each_entry_safe(dev, tmp, &session->targets, | ||
559 | link) { | ||
560 | list_del_init(&dev->link); | ||
561 | list_add_tail(&dev->link, &arb->requests); | ||
562 | } | ||
563 | spin_unlock(&session->target_lock); | ||
564 | spin_unlock(&arb->req_lock); | ||
565 | |||
566 | } | ||
567 | } | ||
568 | spin_unlock(&arb->sessions_lock); | ||
569 | |||
570 | gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target : | ||
571 | arb->gpc2clk_actual_mhz ? gpc2clk_target : | ||
572 | arb->gpc2clk_default_mhz; | ||
263 | 573 | ||
264 | arb->mclk_target_hz = session->mclk_target_hz; | 574 | mclk_target = (mclk_target > 0) ? mclk_target : |
265 | arb->gpc2clk_target_hz = session->gpc2clk_target_hz; | 575 | arb->mclk_actual_mhz ? mclk_target : |
576 | arb->mclk_default_mhz; | ||
577 | |||
578 | if (!gpc2clk_target && !mclk_target) { | ||
579 | mclk_target = arb->mclk_default_mhz; | ||
580 | gpc2clk_target = arb->gpc2clk_default_mhz; | ||
581 | } | ||
582 | |||
583 | if (!gpc2clk_target) | ||
584 | gpc2clk_target = arb->gpc2clk_actual_mhz; | ||
585 | |||
586 | do { | ||
587 | /* Check that the table is set */ | ||
588 | mb(); | ||
589 | wait_event(arb->vftable_wq, arb->vftable_set); | ||
590 | } while (!ACCESS_ONCE(arb->vftable_set)); | ||
591 | |||
592 | spin_lock(&arb->vf_lock); | ||
593 | /* round up the freq requests */ | ||
594 | for (index = 0; index < arb->gpc2clk_f_numpoints; index++) { | ||
595 | if (arb->gpc2clk_vf_points[index].mhz >= gpc2clk_target) { | ||
596 | gpc2clk_target = arb->gpc2clk_vf_points[index].mhz; | ||
597 | gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt; | ||
598 | gpc2clk_voltuv_sram = | ||
599 | arb->gpc2clk_vf_points[index].uvolt_sram; | ||
600 | break; | ||
266 | } | 601 | } |
267 | } | 602 | } |
268 | 603 | ||
269 | /* TODO: loop up higher or equal VF points */ | 604 | if (index == arb->gpc2clk_f_numpoints) { |
605 | gpc2clk_target = arb->gpc2clk_vf_points[index].mhz; | ||
606 | gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt; | ||
607 | gpc2clk_voltuv_sram = | ||
608 | arb->gpc2clk_vf_points[index].uvolt_sram; | ||
609 | } | ||
610 | |||
611 | if (!mclk_target) | ||
612 | mclk_target = arb->mclk_actual_mhz; | ||
613 | |||
614 | for (index = 0; index < arb->mclk_f_numpoints; index++) { | ||
615 | if (arb->mclk_vf_points[index].mhz >= mclk_target) { | ||
616 | mclk_target = arb->mclk_vf_points[index].mhz; | ||
617 | mclk_voltuv = arb->mclk_vf_points[index].uvolt; | ||
618 | mclk_voltuv_sram = | ||
619 | arb->mclk_vf_points[index].uvolt_sram; | ||
620 | break; | ||
621 | } | ||
622 | } | ||
623 | if (index == arb->mclk_f_numpoints) { | ||
624 | mclk_target = arb->mclk_vf_points[index].mhz; | ||
625 | mclk_voltuv = arb->mclk_vf_points[index].uvolt; | ||
626 | mclk_voltuv_sram = | ||
627 | arb->mclk_vf_points[index].uvolt_sram; | ||
628 | } | ||
629 | spin_unlock(&arb->vf_lock); | ||
630 | |||
631 | /* Program clocks */ | ||
632 | /* A change in both mclk of gpc2clk may require a change in voltage */ | ||
633 | if ((arb->gpc2clk_actual_mhz == gpc2clk_target) && | ||
634 | (arb->mclk_actual_mhz == mclk_target)) { | ||
635 | goto exit_arb; | ||
636 | } | ||
637 | |||
638 | voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv; | ||
639 | voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ? | ||
640 | gpc2clk_voltuv_sram : mclk_voltuv_sram; | ||
641 | |||
642 | /* if voltage ascends we do: | ||
643 | * (1) FLL change | ||
644 | * (2) Voltage change | ||
645 | * (3) MCLK change | ||
646 | * If it goes down | ||
647 | * (1) MCLK change | ||
648 | * (2) Voltage change | ||
649 | * (3) FLL change | ||
650 | */ | ||
651 | |||
652 | /* descending */ | ||
653 | if (voltuv <= arb->voltuv_actual) { | ||
654 | status = g->clk_pmu.clk_mclk.change(g, mclk_target); | ||
655 | if (status < 0) | ||
656 | goto exit_arb; | ||
657 | |||
658 | status = volt_set_voltage(g, voltuv, voltuv_sram); | ||
659 | if (status < 0) | ||
660 | goto exit_arb; | ||
661 | |||
662 | fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; | ||
663 | fllclk.clkmhz = gpc2clk_target; | ||
664 | fllclk.voltuv = voltuv; | ||
665 | status = clk_program_fll_clks(g, &fllclk); | ||
666 | if (status < 0) | ||
667 | goto exit_arb; | ||
668 | } else { | ||
669 | fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; | ||
670 | fllclk.clkmhz = gpc2clk_target; | ||
671 | fllclk.voltuv = voltuv; | ||
672 | status = clk_program_fll_clks(g, &fllclk); | ||
673 | if (status < 0) | ||
674 | goto exit_arb; | ||
675 | |||
676 | status = volt_set_voltage(g, voltuv, voltuv_sram); | ||
677 | if (status < 0) | ||
678 | goto exit_arb; | ||
679 | |||
680 | status = g->clk_pmu.clk_mclk.change(g, mclk_target); | ||
681 | if (status < 0) | ||
682 | goto exit_arb; | ||
683 | } | ||
684 | |||
685 | spin_lock(&arb->data_lock); | ||
686 | arb->gpc2clk_actual_mhz = gpc2clk_target; | ||
687 | arb->mclk_actual_mhz = mclk_target; | ||
688 | arb->voltuv_actual = voltuv; | ||
689 | /* Make changes visible to other threads */ | ||
690 | wmb(); | ||
691 | |||
692 | spin_unlock(&arb->data_lock); | ||
693 | |||
694 | #ifdef CONFIG_DEBUG_FS | ||
695 | g->ops.read_ptimer(g, &t1); | ||
696 | arb->switch_num++; | ||
697 | |||
698 | mutex_lock(&arb->debug_lock); | ||
699 | if (arb->switch_num == 1) { | ||
700 | arb->switch_max = arb->switch_min = | ||
701 | arb->switch_avg = (t1-t0)/1000; | ||
702 | arb->switch_std = 0; | ||
703 | } else { | ||
704 | s64 prev_avg; | ||
705 | u64 curr = (t1-t0)/1000; | ||
706 | |||
707 | arb->switch_max = curr > arb->switch_max ? | ||
708 | curr : arb->switch_max; | ||
709 | arb->switch_min = arb->switch_min ? | ||
710 | (curr < arb->switch_min ? | ||
711 | curr : arb->switch_min) : curr; | ||
712 | prev_avg = arb->switch_avg; | ||
713 | arb->switch_avg = (curr + | ||
714 | (arb->switch_avg * (arb->switch_num-1))) / | ||
715 | arb->switch_num; | ||
716 | arb->switch_std += | ||
717 | (curr - arb->switch_avg) * (curr - prev_avg); | ||
718 | } | ||
719 | mutex_unlock(&arb->debug_lock); | ||
270 | 720 | ||
271 | arb->mclk_current_hz = arb->mclk_target_hz; | 721 | #endif |
272 | arb->gpc2clk_current_hz = arb->gpc2clk_target_hz; | ||
273 | 722 | ||
274 | /* TODO: actually program the clocks */ | 723 | exit_arb: |
275 | 724 | ||
725 | spin_lock(&arb->req_lock); | ||
276 | /* notify completion for all requests */ | 726 | /* notify completion for all requests */ |
277 | list_for_each_entry_safe(dev, tmp, &arb->requests, link) { | 727 | list_for_each_entry_safe(dev, tmp, &arb->requests, link) { |
278 | atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); | 728 | atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); |
279 | wake_up_interruptible(&dev->readout_wq); | 729 | wake_up_interruptible(&dev->readout_wq); |
280 | list_del_init(&dev->link); | 730 | list_del_init(&dev->link); |
281 | } | 731 | } |
282 | mutex_unlock(&arb->req_lock); | 732 | spin_unlock(&arb->req_lock); |
283 | 733 | ||
284 | /* notify event for all users */ | 734 | /* notify event for all users */ |
285 | mutex_lock(&arb->users_lock); | 735 | spin_lock(&arb->users_lock); |
286 | list_for_each_entry(dev, &arb->users, link) { | 736 | list_for_each_entry(dev, &arb->users, link) { |
287 | atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); | 737 | atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); |
288 | wake_up_interruptible(&dev->readout_wq); | 738 | wake_up_interruptible(&dev->readout_wq); |
289 | } | 739 | } |
290 | mutex_unlock(&arb->users_lock); | 740 | spin_unlock(&arb->users_lock); |
291 | |||
292 | } | 741 | } |
293 | 742 | ||
294 | int nvgpu_clk_arb_apply_session_constraints(struct gk20a *g, | 743 | int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, |
295 | struct nvgpu_clk_session *session, int *completion_fd) | 744 | struct nvgpu_clk_session *session, int request_fd) |
296 | { | 745 | { |
297 | struct nvgpu_clk_arb *arb = g->clk_arb; | 746 | struct nvgpu_clk_arb *arb = g->clk_arb; |
298 | struct nvgpu_clk_dev *dev; | 747 | struct nvgpu_clk_dev *dev; |
299 | int fd; | 748 | struct fd fd; |
749 | int err = 0; | ||
300 | 750 | ||
301 | fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev); | 751 | gk20a_dbg_fn(""); |
302 | if (fd < 0) | 752 | |
303 | return fd; | 753 | fd = fdget(request_fd); |
754 | |||
755 | if (!fd.file) | ||
756 | return -EINVAL; | ||
757 | |||
758 | dev = (struct nvgpu_clk_dev *) fd.file->private_data; | ||
304 | 759 | ||
305 | *completion_fd = fd; | 760 | if (!dev || dev->session != session) { |
761 | err = -EINVAL; | ||
762 | goto fdput_fd; | ||
763 | } | ||
764 | spin_lock(&session->target_lock); | ||
765 | session->mclk_target_mhz = dev->mclk_target_mhz ? dev->mclk_target_mhz : | ||
766 | session->mclk_target_mhz; | ||
767 | session->gpc2clk_target_mhz = dev->gpc2clk_target_mhz ? | ||
768 | dev->gpc2clk_target_mhz : | ||
769 | session->gpc2clk_target_mhz; | ||
306 | 770 | ||
307 | mutex_lock(&arb->req_lock); | 771 | list_add_tail(&dev->link, &session->targets); |
308 | list_add_tail(&dev->link, &arb->requests); | 772 | spin_unlock(&session->target_lock); |
309 | mutex_unlock(&arb->req_lock); | ||
310 | 773 | ||
311 | schedule_work(&arb->update_fn_work); | 774 | schedule_work(&arb->update_fn_work); |
312 | 775 | ||
313 | return 0; | 776 | fdput_fd: |
777 | fdput(fd); | ||
778 | return err; | ||
314 | } | 779 | } |
315 | 780 | ||
316 | static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait) | 781 | static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait) |
@@ -328,11 +793,22 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, | |||
328 | { | 793 | { |
329 | struct nvgpu_clk_dev *dev = filp->private_data; | 794 | struct nvgpu_clk_dev *dev = filp->private_data; |
330 | struct nvgpu_clk_session *session = dev->session; | 795 | struct nvgpu_clk_session *session = dev->session; |
796 | struct nvgpu_clk_arb *arb; | ||
797 | |||
798 | arb = session->g->clk_arb; | ||
331 | 799 | ||
332 | gk20a_dbg_fn(""); | 800 | gk20a_dbg_fn(""); |
333 | 801 | ||
802 | spin_lock(&arb->req_lock); | ||
803 | spin_lock(&session->target_lock); | ||
804 | if (!list_empty(&dev->link)) | ||
805 | list_del_init(&dev->link); | ||
806 | spin_unlock(&session->target_lock); | ||
807 | spin_unlock(&arb->req_lock); | ||
808 | |||
334 | kref_put(&session->refcount, nvgpu_clk_arb_free_session); | 809 | kref_put(&session->refcount, nvgpu_clk_arb_free_session); |
335 | kfree(dev); | 810 | kfree(dev); |
811 | |||
336 | return 0; | 812 | return 0; |
337 | } | 813 | } |
338 | 814 | ||
@@ -341,94 +817,123 @@ static int nvgpu_clk_arb_release_event_dev(struct inode *inode, | |||
341 | { | 817 | { |
342 | struct nvgpu_clk_dev *dev = filp->private_data; | 818 | struct nvgpu_clk_dev *dev = filp->private_data; |
343 | struct nvgpu_clk_session *session = dev->session; | 819 | struct nvgpu_clk_session *session = dev->session; |
344 | struct nvgpu_clk_arb *arb = session->g->clk_arb; | 820 | struct nvgpu_clk_arb *arb; |
821 | |||
822 | arb = session->g->clk_arb; | ||
345 | 823 | ||
346 | gk20a_dbg_fn(""); | 824 | gk20a_dbg_fn(""); |
347 | 825 | ||
348 | mutex_lock(&arb->users_lock); | 826 | spin_lock(&arb->users_lock); |
349 | list_del_init(&dev->link); | 827 | list_del(&dev->link); |
350 | mutex_unlock(&arb->users_lock); | 828 | spin_unlock(&arb->users_lock); |
351 | 829 | ||
352 | kref_put(&session->refcount, nvgpu_clk_arb_free_session); | 830 | kref_put(&session->refcount, nvgpu_clk_arb_free_session); |
353 | kfree(dev); | 831 | kfree(dev); |
832 | |||
354 | return 0; | 833 | return 0; |
355 | } | 834 | } |
356 | 835 | ||
357 | int nvgpu_clk_arb_set_session_target_hz(struct nvgpu_clk_session *session, | 836 | int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, |
358 | u32 api_domain, u64 target_hz) | 837 | int request_fd, u32 api_domain, u16 target_mhz) |
359 | { | 838 | { |
839 | struct nvgpu_clk_dev *dev; | ||
840 | struct fd fd; | ||
841 | int err = 0; | ||
360 | 842 | ||
361 | gk20a_dbg_fn("domain=0x%08x target_hz=%llu", api_domain, target_hz); | 843 | gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz); |
844 | |||
845 | fd = fdget(request_fd); | ||
846 | |||
847 | if (!fd.file) | ||
848 | return -EINVAL; | ||
849 | |||
850 | dev = fd.file->private_data; | ||
851 | if (!dev || dev->session != session) { | ||
852 | err = -EINVAL; | ||
853 | goto fdput_fd; | ||
854 | } | ||
362 | 855 | ||
363 | switch (api_domain) { | 856 | switch (api_domain) { |
364 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | 857 | case NVGPU_GPU_CLK_DOMAIN_MCLK: |
365 | session->mclk_target_hz = target_hz; | 858 | dev->mclk_target_mhz = target_mhz; |
366 | return 0; | 859 | break; |
367 | 860 | ||
368 | case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: | 861 | case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: |
369 | session->gpc2clk_target_hz = target_hz; | 862 | dev->gpc2clk_target_mhz = target_mhz; |
370 | return 0; | 863 | break; |
371 | 864 | ||
372 | default: | 865 | default: |
373 | return -EINVAL; | 866 | err = -EINVAL; |
374 | } | 867 | } |
868 | |||
869 | fdput_fd: | ||
870 | fdput(fd); | ||
871 | return err; | ||
375 | } | 872 | } |
376 | 873 | ||
377 | int nvgpu_clk_arb_get_session_target_hz(struct nvgpu_clk_session *session, | 874 | int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, |
378 | u32 api_domain, u64 *freq_hz) | 875 | u32 api_domain, u16 *freq_mhz) |
379 | { | 876 | { |
877 | int err = 0; | ||
878 | |||
879 | spin_lock(&session->target_lock); | ||
880 | |||
380 | switch (api_domain) { | 881 | switch (api_domain) { |
381 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | 882 | case NVGPU_GPU_CLK_DOMAIN_MCLK: |
382 | *freq_hz = session->mclk_target_hz; | 883 | *freq_mhz = session->mclk_target_mhz; |
383 | return 0; | 884 | break; |
384 | 885 | ||
385 | case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: | 886 | case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: |
386 | *freq_hz = session->gpc2clk_target_hz; | 887 | *freq_mhz = session->gpc2clk_target_mhz; |
387 | return 0; | 888 | break; |
388 | 889 | ||
389 | default: | 890 | default: |
390 | *freq_hz = 0; | 891 | *freq_mhz = 0; |
391 | return -EINVAL; | 892 | err = -EINVAL; |
392 | } | 893 | } |
894 | |||
895 | spin_unlock(&session->target_lock); | ||
896 | return err; | ||
393 | } | 897 | } |
394 | 898 | ||
395 | int nvgpu_clk_arb_get_arbiter_actual_hz(struct gk20a *g, | 899 | int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, |
396 | u32 api_domain, u64 *freq_hz) | 900 | u32 api_domain, u16 *freq_mhz) |
397 | { | 901 | { |
398 | struct nvgpu_clk_arb *arb = g->clk_arb; | 902 | struct nvgpu_clk_arb *arb = g->clk_arb; |
399 | int err = 0; | 903 | int err = 0; |
400 | 904 | ||
401 | mutex_lock(&arb->req_lock); | 905 | spin_lock(&arb->data_lock); |
906 | |||
402 | switch (api_domain) { | 907 | switch (api_domain) { |
403 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | 908 | case NVGPU_GPU_CLK_DOMAIN_MCLK: |
404 | *freq_hz = arb->mclk_current_hz; | 909 | *freq_mhz = arb->mclk_actual_mhz; |
405 | break; | 910 | break; |
406 | 911 | ||
407 | case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: | 912 | case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: |
408 | *freq_hz = arb->gpc2clk_current_hz; | 913 | *freq_mhz = arb->gpc2clk_actual_mhz; |
409 | break; | 914 | break; |
410 | 915 | ||
411 | default: | 916 | default: |
412 | *freq_hz = 0; | 917 | *freq_mhz = 0; |
413 | err = -EINVAL; | 918 | err = -EINVAL; |
414 | } | 919 | } |
415 | mutex_unlock(&arb->req_lock); | ||
416 | 920 | ||
921 | spin_unlock(&arb->data_lock); | ||
417 | return err; | 922 | return err; |
418 | } | 923 | } |
419 | 924 | ||
420 | int nvgpu_clk_arb_get_arbiter_effective_hz(struct gk20a *g, | 925 | int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g, |
421 | u32 api_domain, u64 *freq_hz) | 926 | u32 api_domain, u16 *freq_mhz) |
422 | { | 927 | { |
423 | /* TODO: measure clocks from counters */ | 928 | /* TODO: measure clocks from counters */ |
424 | return nvgpu_clk_arb_get_arbiter_actual_hz(g, api_domain, freq_hz); | 929 | return nvgpu_clk_arb_get_arbiter_actual_mhz(g, api_domain, freq_mhz); |
425 | } | 930 | } |
426 | 931 | ||
427 | int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, | 932 | int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, |
428 | u64 *min_hz, u64 *max_hz) | 933 | u16 *min_mhz, u16 *max_mhz) |
429 | { | 934 | { |
430 | return g->ops.clk_arb.get_arbiter_clk_range(g, api_domain, | 935 | return g->ops.clk_arb.get_arbiter_clk_range(g, api_domain, |
431 | min_hz, max_hz); | 936 | min_mhz, max_mhz); |
432 | } | 937 | } |
433 | 938 | ||
434 | u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) | 939 | u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) |
@@ -441,3 +946,67 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, | |||
441 | { | 946 | { |
442 | return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints); | 947 | return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints); |
443 | } | 948 | } |
949 | |||
950 | #ifdef CONFIG_DEBUG_FS | ||
951 | static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) | ||
952 | { | ||
953 | struct gk20a *g = s->private; | ||
954 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
955 | u64 num; | ||
956 | s64 tmp, avg, std, max, min; | ||
957 | |||
958 | /* Make copy of structure to reduce time with lock held */ | ||
959 | mutex_lock(&arb->debug_lock); | ||
960 | std = arb->switch_std; | ||
961 | avg = arb->switch_avg; | ||
962 | max = arb->switch_max; | ||
963 | min = arb->switch_min; | ||
964 | num = arb->switch_num; | ||
965 | mutex_unlock(&arb->debug_lock); | ||
966 | |||
967 | tmp = std; | ||
968 | do_div(tmp, num); | ||
969 | seq_printf(s, "Number of transitions: %lld\n", | ||
970 | num); | ||
971 | seq_printf(s, "max / min : %lld / %lld usec\n", | ||
972 | max, min); | ||
973 | seq_printf(s, "avg / std : %lld / %ld usec\n", | ||
974 | avg, int_sqrt(tmp)); | ||
975 | |||
976 | return 0; | ||
977 | } | ||
978 | |||
979 | static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file) | ||
980 | { | ||
981 | return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private); | ||
982 | } | ||
983 | |||
984 | static const struct file_operations nvgpu_clk_arb_stats_fops = { | ||
985 | .open = nvgpu_clk_arb_stats_open, | ||
986 | .read = seq_read, | ||
987 | .llseek = seq_lseek, | ||
988 | .release = single_release, | ||
989 | }; | ||
990 | |||
991 | |||
992 | static int nvgpu_clk_arb_debugfs_init(struct gk20a *g) | ||
993 | { | ||
994 | struct gk20a_platform *platform = dev_get_drvdata(g->dev); | ||
995 | |||
996 | struct dentry *gpu_root = platform->debugfs; | ||
997 | struct dentry *d; | ||
998 | |||
999 | gk20a_dbg(gpu_dbg_info, "g=%p", g); | ||
1000 | |||
1001 | d = debugfs_create_file( | ||
1002 | "arb_stats", | ||
1003 | S_IRUGO, | ||
1004 | gpu_root, | ||
1005 | g, | ||
1006 | &nvgpu_clk_arb_stats_fops); | ||
1007 | if (!d) | ||
1008 | return -ENOMEM; | ||
1009 | |||
1010 | return 0; | ||
1011 | } | ||
1012 | #endif | ||
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.h b/drivers/gpu/nvgpu/clk/clk_arb.h index 95749369..717cca9b 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.h +++ b/drivers/gpu/nvgpu/clk/clk_arb.h | |||
@@ -22,13 +22,13 @@ struct nvgpu_clk_session; | |||
22 | int nvgpu_clk_arb_init_arbiter(struct gk20a *g); | 22 | int nvgpu_clk_arb_init_arbiter(struct gk20a *g); |
23 | 23 | ||
24 | int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, | 24 | int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, |
25 | u64 *min_hz, u64 *max_hz); | 25 | u16 *min_mhz, u16 *max_mhz); |
26 | 26 | ||
27 | int nvgpu_clk_arb_get_arbiter_actual_hz(struct gk20a *g, | 27 | int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, |
28 | u32 api_domain, u64 *actual_hz); | 28 | u32 api_domain, u16 *actual_mhz); |
29 | 29 | ||
30 | int nvgpu_clk_arb_get_arbiter_effective_hz(struct gk20a *g, | 30 | int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g, |
31 | u32 api_domain, u64 *actual_hz); | 31 | u32 api_domain, u16 *effective_mhz); |
32 | 32 | ||
33 | int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, | 33 | int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, |
34 | u32 api_domain, u32 *max_points, u16 *fpoints); | 34 | u32 api_domain, u32 *max_points, u16 *fpoints); |
@@ -46,19 +46,21 @@ int nvgpu_clk_arb_init_session(struct gk20a *g, | |||
46 | void nvgpu_clk_arb_release_session(struct gk20a *g, | 46 | void nvgpu_clk_arb_release_session(struct gk20a *g, |
47 | struct nvgpu_clk_session *session); | 47 | struct nvgpu_clk_session *session); |
48 | 48 | ||
49 | int nvgpu_clk_arb_apply_session_constraints(struct gk20a *g, | 49 | int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, |
50 | struct nvgpu_clk_session *session, int *completion_fd); | 50 | struct nvgpu_clk_session *session, int request_fd); |
51 | 51 | ||
52 | int nvgpu_clk_arb_set_session_target_hz(struct nvgpu_clk_session *session, | 52 | int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, |
53 | u32 api_domain, u64 target_hz); | 53 | int fd, u32 api_domain, u16 target_mhz); |
54 | 54 | ||
55 | int nvgpu_clk_arb_get_session_target_hz(struct nvgpu_clk_session *session, | 55 | int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, |
56 | u32 api_domain, u64 *target_hz); | 56 | u32 api_domain, u16 *target_mhz); |
57 | 57 | ||
58 | int nvgpu_clk_arb_install_event_fd(struct gk20a *g, | 58 | int nvgpu_clk_arb_install_event_fd(struct gk20a *g, |
59 | struct nvgpu_clk_session *session, int *event_fd); | 59 | struct nvgpu_clk_session *session, int *event_fd); |
60 | 60 | ||
61 | int nvgpu_clk_arb_install_request_fd(struct gk20a *g, | ||
62 | struct nvgpu_clk_session *session, int *event_fd); | ||
61 | 63 | ||
62 | 64 | void nvgpu_clk_arb_schedule_vftable_update(struct gk20a *g); | |
63 | #endif /* _CLK_ARB_H_ */ | 65 | #endif /* _CLK_ARB_H_ */ |
64 | 66 | ||
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c index 86f4ff6d..6ad6c054 100644 --- a/drivers/gpu/nvgpu/clk/clk_mclk.c +++ b/drivers/gpu/nvgpu/clk/clk_mclk.c | |||
@@ -2222,7 +2222,7 @@ int clk_mclkseq_init_mclk_gddr5(struct gk20a *g) | |||
2222 | return 0; | 2222 | return 0; |
2223 | } | 2223 | } |
2224 | 2224 | ||
2225 | int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed) | 2225 | int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val) |
2226 | { | 2226 | { |
2227 | struct clk_mclk_state *mclk; | 2227 | struct clk_mclk_state *mclk; |
2228 | struct pmu_payload payload = { {0} }; | 2228 | struct pmu_payload payload = { {0} }; |
@@ -2236,6 +2236,7 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed) | |||
2236 | #ifdef CONFIG_DEBUG_FS | 2236 | #ifdef CONFIG_DEBUG_FS |
2237 | u64 t0, t1; | 2237 | u64 t0, t1; |
2238 | #endif | 2238 | #endif |
2239 | enum gk20a_mclk_speed speed; | ||
2239 | 2240 | ||
2240 | gk20a_dbg_info(""); | 2241 | gk20a_dbg_info(""); |
2241 | 2242 | ||
@@ -2246,6 +2247,13 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed) | |||
2246 | if (!mclk->init) | 2247 | if (!mclk->init) |
2247 | goto exit_status; | 2248 | goto exit_status; |
2248 | 2249 | ||
2250 | /* TODO thia should be done according to VBIOS tables */ | ||
2251 | |||
2252 | speed = (val <= MCLK_LOW_SPEED_LIMIT) ? gk20a_mclk_low_speed : | ||
2253 | (val <= MCLK_MID_SPEED_LIMIT) ? gk20a_mclk_mid_speed : | ||
2254 | gk20a_mclk_high_speed; | ||
2255 | |||
2256 | |||
2249 | if (speed == mclk->speed) | 2257 | if (speed == mclk->speed) |
2250 | goto exit_status; | 2258 | goto exit_status; |
2251 | 2259 | ||
@@ -2374,20 +2382,13 @@ exit_status: | |||
2374 | #ifdef CONFIG_DEBUG_FS | 2382 | #ifdef CONFIG_DEBUG_FS |
2375 | static int mclk_debug_speed_set(void *data, u64 val) | 2383 | static int mclk_debug_speed_set(void *data, u64 val) |
2376 | { | 2384 | { |
2377 | enum gk20a_mclk_speed speed; | ||
2378 | struct gk20a *g = (struct gk20a *) data; | 2385 | struct gk20a *g = (struct gk20a *) data; |
2379 | struct clk_mclk_state *mclk; | 2386 | struct clk_mclk_state *mclk; |
2380 | 2387 | ||
2381 | mclk = &g->clk_pmu.clk_mclk; | 2388 | mclk = &g->clk_pmu.clk_mclk; |
2382 | 2389 | ||
2383 | /* TODO thia should be done according to VBIOS tables */ | ||
2384 | |||
2385 | speed = (val <= MCLK_LOW_SPEED_LIMIT) ? gk20a_mclk_low_speed : | ||
2386 | (val <= MCLK_MID_SPEED_LIMIT) ? gk20a_mclk_mid_speed : | ||
2387 | gk20a_mclk_high_speed; | ||
2388 | |||
2389 | if (mclk->change) | 2390 | if (mclk->change) |
2390 | return mclk->change(g, speed); | 2391 | return mclk->change(g, (u16) val); |
2391 | return 0; | 2392 | return 0; |
2392 | 2393 | ||
2393 | } | 2394 | } |
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.h b/drivers/gpu/nvgpu/clk/clk_mclk.h index 9d193c96..e3e6c1ee 100644 --- a/drivers/gpu/nvgpu/clk/clk_mclk.h +++ b/drivers/gpu/nvgpu/clk/clk_mclk.h | |||
@@ -22,9 +22,12 @@ enum gk20a_mclk_speed { | |||
22 | gk20a_mclk_high_speed, | 22 | gk20a_mclk_high_speed, |
23 | }; | 23 | }; |
24 | 24 | ||
25 | #define DEFAULT_BOOT_MCLK_SPEED gk20a_mclk_high_speed | ||
26 | #define MCLK_LOW_SPEED_LIMIT 405 | 25 | #define MCLK_LOW_SPEED_LIMIT 405 |
27 | #define MCLK_MID_SPEED_LIMIT 810 | 26 | #define MCLK_MID_SPEED_LIMIT 810 |
27 | #define MCLK_HIGH_SPEED_LIMIT 3003 | ||
28 | |||
29 | #define DEFAULT_BOOT_MCLK_SPEED MCLK_HIGH_SPEED_LIMIT | ||
30 | |||
28 | struct clk_mclk_state { | 31 | struct clk_mclk_state { |
29 | enum gk20a_mclk_speed speed; | 32 | enum gk20a_mclk_speed speed; |
30 | struct mutex mclk_mutex; | 33 | struct mutex mclk_mutex; |
@@ -32,7 +35,7 @@ struct clk_mclk_state { | |||
32 | bool init; | 35 | bool init; |
33 | 36 | ||
34 | /* function pointers */ | 37 | /* function pointers */ |
35 | int (*change)(struct gk20a *g, enum gk20a_mclk_speed speed); | 38 | int (*change)(struct gk20a *g, u16 val); |
36 | 39 | ||
37 | #ifdef CONFIG_DEBUG_FS | 40 | #ifdef CONFIG_DEBUG_FS |
38 | s64 switch_max; | 41 | s64 switch_max; |
@@ -45,7 +48,6 @@ struct clk_mclk_state { | |||
45 | }; | 48 | }; |
46 | 49 | ||
47 | int clk_mclkseq_init_mclk_gddr5(struct gk20a *g); | 50 | int clk_mclkseq_init_mclk_gddr5(struct gk20a *g); |
48 | int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, | 51 | int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val); |
49 | enum gk20a_mclk_speed speed); | ||
50 | 52 | ||
51 | #endif | 53 | #endif |