diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/clk/clk_arb.c | 893 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/pstate/pstate.c | 4 |
2 files changed, 546 insertions, 351 deletions
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c index 1f7c2aea..aea32cb8 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.c +++ b/drivers/gpu/nvgpu/clk/clk_arb.c | |||
@@ -19,7 +19,8 @@ | |||
19 | #include <linux/nvgpu.h> | 19 | #include <linux/nvgpu.h> |
20 | #include <linux/bitops.h> | 20 | #include <linux/bitops.h> |
21 | #include <linux/spinlock.h> | 21 | #include <linux/spinlock.h> |
22 | 22 | #include <linux/rculist.h> | |
23 | #include <linux/llist.h> | ||
23 | #include "clk/clk_arb.h" | 24 | #include "clk/clk_arb.h" |
24 | 25 | ||
25 | 26 | ||
@@ -36,8 +37,15 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, | |||
36 | static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait); | 37 | static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait); |
37 | 38 | ||
38 | static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work); | 39 | static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work); |
39 | static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work); | 40 | static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work); |
40 | static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *); | 41 | static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb); |
42 | static void nvgpu_clk_arb_free_fd(struct kref *refcount); | ||
43 | static void nvgpu_clk_arb_free_session(struct kref *refcount); | ||
44 | static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk, | ||
45 | u16 mclk, u32 voltuv, u32 voltuv_sram); | ||
46 | static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, | ||
47 | u16 *gpc2clk_target, u16 *mclk_target, u32 *voltuv, | ||
48 | u32 *voltuv_sram); | ||
41 | 49 | ||
42 | struct nvgpu_clk_vf_point { | 50 | struct nvgpu_clk_vf_point { |
43 | u16 mhz; | 51 | u16 mhz; |
@@ -45,58 +53,80 @@ struct nvgpu_clk_vf_point { | |||
45 | u32 uvolt_sram; | 53 | u32 uvolt_sram; |
46 | }; | 54 | }; |
47 | 55 | ||
56 | struct nvgpu_clk_vf_table { | ||
57 | u32 mclk_num_points; | ||
58 | struct nvgpu_clk_vf_point *mclk_points; | ||
59 | u32 gpc2clk_num_points; | ||
60 | struct nvgpu_clk_vf_point *gpc2clk_points; | ||
61 | }; | ||
62 | #ifdef CONFIG_DEBUG_FS | ||
63 | struct nvgpu_clk_arb_debug { | ||
64 | s64 switch_max; | ||
65 | s64 switch_min; | ||
66 | u64 switch_num; | ||
67 | s64 switch_avg; | ||
68 | s64 switch_std; | ||
69 | }; | ||
70 | #endif | ||
71 | |||
72 | struct nvgpu_clk_arb_target { | ||
73 | u16 mclk; | ||
74 | u16 gpc2clk; | ||
75 | }; | ||
76 | |||
48 | struct nvgpu_clk_arb { | 77 | struct nvgpu_clk_arb { |
49 | spinlock_t sessions_lock; | 78 | spinlock_t sessions_lock; |
50 | spinlock_t users_lock; | 79 | spinlock_t users_lock; |
51 | spinlock_t req_lock; | ||
52 | 80 | ||
53 | struct list_head users; | 81 | struct list_head users; |
54 | struct list_head sessions; | 82 | struct list_head sessions; |
55 | struct list_head requests; | 83 | struct llist_head requests; |
56 | 84 | ||
57 | struct gk20a *g; | 85 | struct gk20a *g; |
58 | spinlock_t data_lock; | ||
59 | spinlock_t vf_lock; | ||
60 | 86 | ||
61 | u16 gpc2clk_actual_mhz; | 87 | struct nvgpu_clk_arb_target actual_pool[2]; |
62 | u16 gpc2clk_default_mhz; | 88 | struct nvgpu_clk_arb_target *actual; |
63 | 89 | ||
64 | u16 mclk_actual_mhz; | 90 | u16 gpc2clk_default_mhz; |
65 | u16 mclk_default_mhz; | 91 | u16 mclk_default_mhz; |
66 | u32 voltuv_actual; | 92 | u32 voltuv_actual; |
67 | 93 | ||
68 | struct work_struct update_fn_work; | 94 | struct work_struct update_fn_work; |
69 | struct work_struct vftable_fn_work; | 95 | struct workqueue_struct *update_work_queue; |
70 | wait_queue_head_t vftable_wq; | 96 | struct work_struct vf_table_fn_work; |
97 | struct workqueue_struct *vf_table_work_queue; | ||
98 | |||
99 | wait_queue_head_t request_wq; | ||
100 | |||
101 | struct nvgpu_clk_vf_table *current_vf_table; | ||
102 | struct nvgpu_clk_vf_table vf_table_pool[2]; | ||
103 | u32 vf_table_index; | ||
71 | 104 | ||
72 | u16 *mclk_f_points; | 105 | u16 *mclk_f_points; |
73 | bool vftable_set; | 106 | atomic_t req_nr; |
74 | 107 | ||
75 | struct nvgpu_clk_vf_point *mclk_vf_points; | ||
76 | u32 mclk_f_numpoints; | 108 | u32 mclk_f_numpoints; |
77 | u16 *gpc2clk_f_points; | 109 | u16 *gpc2clk_f_points; |
78 | u32 gpc2clk_f_numpoints; | 110 | u32 gpc2clk_f_numpoints; |
79 | struct nvgpu_clk_vf_point *gpc2clk_vf_points; | ||
80 | 111 | ||
81 | #ifdef CONFIG_DEBUG_FS | 112 | #ifdef CONFIG_DEBUG_FS |
82 | struct mutex debug_lock; | 113 | struct nvgpu_clk_arb_debug debug_pool[2]; |
83 | s64 switch_max; | 114 | struct nvgpu_clk_arb_debug *debug; |
84 | s64 switch_min; | ||
85 | u64 switch_num; | ||
86 | s64 switch_avg; | ||
87 | s64 switch_std; | ||
88 | bool debugfs_set; | 115 | bool debugfs_set; |
89 | #endif | 116 | #endif |
90 | }; | 117 | }; |
91 | 118 | ||
92 | |||
93 | struct nvgpu_clk_dev { | 119 | struct nvgpu_clk_dev { |
94 | struct nvgpu_clk_session *session; | 120 | struct nvgpu_clk_session *session; |
95 | struct list_head link; | 121 | union { |
122 | struct list_head link; | ||
123 | struct llist_node node; | ||
124 | }; | ||
96 | wait_queue_head_t readout_wq; | 125 | wait_queue_head_t readout_wq; |
97 | atomic_t poll_mask; | 126 | atomic_t poll_mask; |
98 | u16 gpc2clk_target_mhz; | 127 | u16 gpc2clk_target_mhz; |
99 | u16 mclk_target_mhz; | 128 | u16 mclk_target_mhz; |
129 | struct kref refcount; | ||
100 | }; | 130 | }; |
101 | 131 | ||
102 | struct nvgpu_clk_session { | 132 | struct nvgpu_clk_session { |
@@ -104,11 +134,10 @@ struct nvgpu_clk_session { | |||
104 | struct gk20a *g; | 134 | struct gk20a *g; |
105 | struct kref refcount; | 135 | struct kref refcount; |
106 | struct list_head link; | 136 | struct list_head link; |
107 | struct list_head targets; | 137 | struct llist_head targets; |
108 | 138 | ||
109 | spinlock_t target_lock; | 139 | struct nvgpu_clk_arb_target target_pool[2]; |
110 | u16 gpc2clk_target_mhz; | 140 | struct nvgpu_clk_arb_target *target; |
111 | u16 mclk_target_mhz; | ||
112 | }; | 141 | }; |
113 | 142 | ||
114 | static const struct file_operations completion_dev_ops = { | 143 | static const struct file_operations completion_dev_ops = { |
@@ -128,6 +157,8 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g) | |||
128 | struct nvgpu_clk_arb *arb; | 157 | struct nvgpu_clk_arb *arb; |
129 | u16 default_mhz; | 158 | u16 default_mhz; |
130 | int err; | 159 | int err; |
160 | int index; | ||
161 | struct nvgpu_clk_vf_table *table; | ||
131 | 162 | ||
132 | gk20a_dbg_fn(""); | 163 | gk20a_dbg_fn(""); |
133 | 164 | ||
@@ -140,33 +171,37 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g) | |||
140 | goto init_fail; | 171 | goto init_fail; |
141 | } | 172 | } |
142 | 173 | ||
143 | arb->gpc2clk_f_numpoints = MAX_F_POINTS; | ||
144 | arb->mclk_f_numpoints = MAX_F_POINTS; | ||
145 | |||
146 | arb->gpc2clk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL); | ||
147 | if (!arb->gpc2clk_f_points) { | ||
148 | err = -ENOMEM; | ||
149 | goto init_fail; | ||
150 | } | ||
151 | |||
152 | arb->mclk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL); | 174 | arb->mclk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL); |
153 | if (!arb->mclk_f_points) { | 175 | if (!arb->mclk_f_points) { |
154 | err = -ENOMEM; | 176 | err = -ENOMEM; |
155 | goto init_fail; | 177 | goto init_fail; |
156 | } | 178 | } |
157 | 179 | ||
158 | arb->gpc2clk_vf_points = kcalloc(MAX_F_POINTS, | 180 | arb->gpc2clk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL); |
159 | sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL); | 181 | if (!arb->gpc2clk_f_points) { |
160 | if (!arb->gpc2clk_vf_points) { | ||
161 | err = -ENOMEM; | 182 | err = -ENOMEM; |
162 | goto init_fail; | 183 | goto init_fail; |
163 | } | 184 | } |
164 | 185 | ||
165 | arb->mclk_vf_points = kcalloc(MAX_F_POINTS, | 186 | for (index = 0; index < 2; index++) { |
166 | sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL); | 187 | table = &arb->vf_table_pool[index]; |
167 | if (!arb->mclk_vf_points) { | 188 | table->gpc2clk_num_points = MAX_F_POINTS; |
168 | err = -ENOMEM; | 189 | table->mclk_num_points = MAX_F_POINTS; |
169 | goto init_fail; | 190 | |
191 | table->gpc2clk_points = kcalloc(MAX_F_POINTS, | ||
192 | sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL); | ||
193 | if (!table->gpc2clk_points) { | ||
194 | err = -ENOMEM; | ||
195 | goto init_fail; | ||
196 | } | ||
197 | |||
198 | |||
199 | table->mclk_points = kcalloc(MAX_F_POINTS, | ||
200 | sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL); | ||
201 | if (!table->mclk_points) { | ||
202 | err = -ENOMEM; | ||
203 | goto init_fail; | ||
204 | } | ||
170 | } | 205 | } |
171 | 206 | ||
172 | g->clk_arb = arb; | 207 | g->clk_arb = arb; |
@@ -174,9 +209,6 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g) | |||
174 | 209 | ||
175 | spin_lock_init(&arb->sessions_lock); | 210 | spin_lock_init(&arb->sessions_lock); |
176 | spin_lock_init(&arb->users_lock); | 211 | spin_lock_init(&arb->users_lock); |
177 | spin_lock_init(&arb->req_lock); | ||
178 | spin_lock_init(&arb->data_lock); | ||
179 | spin_lock_init(&arb->vf_lock); | ||
180 | 212 | ||
181 | err = g->ops.clk_arb.get_arbiter_clk_default(g, | 213 | err = g->ops.clk_arb.get_arbiter_clk_default(g, |
182 | NVGPU_GPU_CLK_DOMAIN_MCLK, &default_mhz); | 214 | NVGPU_GPU_CLK_DOMAIN_MCLK, &default_mhz); |
@@ -196,39 +228,58 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g) | |||
196 | 228 | ||
197 | arb->gpc2clk_default_mhz = default_mhz; | 229 | arb->gpc2clk_default_mhz = default_mhz; |
198 | 230 | ||
199 | INIT_LIST_HEAD(&arb->users); | 231 | arb->actual = &arb->actual_pool[0]; |
200 | INIT_LIST_HEAD(&arb->sessions); | 232 | |
201 | INIT_LIST_HEAD(&arb->requests); | 233 | atomic_set(&arb->req_nr, 0); |
202 | 234 | ||
203 | init_waitqueue_head(&arb->vftable_wq); | 235 | INIT_LIST_HEAD_RCU(&arb->users); |
236 | INIT_LIST_HEAD_RCU(&arb->sessions); | ||
237 | init_llist_head(&arb->requests); | ||
204 | 238 | ||
205 | INIT_WORK(&arb->vftable_fn_work, nvgpu_clk_arb_run_vftable_cb); | 239 | init_waitqueue_head(&arb->request_wq); |
240 | arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1, | ||
241 | "vf_table_update"); | ||
242 | arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1, | ||
243 | "arbiter_update"); | ||
244 | |||
245 | |||
246 | INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb); | ||
206 | 247 | ||
207 | INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb); | 248 | INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb); |
208 | 249 | ||
209 | #ifdef CONFIG_DEBUG_FS | 250 | #ifdef CONFIG_DEBUG_FS |
210 | mutex_init(&arb->debug_lock); | 251 | arb->debug = &arb->debug_pool[0]; |
252 | |||
211 | if (!arb->debugfs_set) { | 253 | if (!arb->debugfs_set) { |
212 | if (nvgpu_clk_arb_debugfs_init(g)) | 254 | if (nvgpu_clk_arb_debugfs_init(g)) |
213 | arb->debugfs_set = true; | 255 | arb->debugfs_set = true; |
214 | } | 256 | } |
215 | #endif | 257 | #endif |
216 | err = nvgpu_clk_arb_update_vftable(arb); | 258 | err = clk_vf_point_cache(g); |
217 | if (err < 0) | 259 | if (err < 0) |
218 | goto init_fail; | 260 | goto init_fail; |
219 | 261 | ||
220 | /* Schedule first run */ | 262 | err = nvgpu_clk_arb_update_vf_table(arb); |
221 | schedule_work(&arb->update_fn_work); | 263 | if (err < 0) |
264 | goto init_fail; | ||
265 | do { | ||
266 | /* Check that first run is completed */ | ||
267 | smp_mb(); | ||
268 | wait_event_interruptible(arb->request_wq, | ||
269 | atomic_read(&arb->req_nr)); | ||
270 | } while (!atomic_read(&arb->req_nr)); | ||
222 | 271 | ||
223 | return 0; | 272 | return 0; |
224 | 273 | ||
225 | init_fail: | 274 | init_fail: |
226 | 275 | ||
227 | kfree(arb->gpc2clk_f_points); | 276 | kfree(arb->gpc2clk_f_points); |
228 | kfree(arb->gpc2clk_vf_points); | ||
229 | |||
230 | kfree(arb->mclk_f_points); | 277 | kfree(arb->mclk_f_points); |
231 | kfree(arb->mclk_vf_points); | 278 | |
279 | for (index = 0; index < 2; index++) { | ||
280 | kfree(arb->vf_table_pool[index].gpc2clk_points); | ||
281 | kfree(arb->vf_table_pool[index].mclk_points); | ||
282 | } | ||
232 | 283 | ||
233 | kfree(arb); | 284 | kfree(arb); |
234 | 285 | ||
@@ -275,6 +326,8 @@ static int nvgpu_clk_arb_install_fd(struct gk20a *g, | |||
275 | atomic_set(&dev->poll_mask, 0); | 326 | atomic_set(&dev->poll_mask, 0); |
276 | 327 | ||
277 | dev->session = session; | 328 | dev->session = session; |
329 | kref_init(&dev->refcount); | ||
330 | |||
278 | kref_get(&session->refcount); | 331 | kref_get(&session->refcount); |
279 | 332 | ||
280 | *_dev = dev; | 333 | *_dev = dev; |
@@ -305,15 +358,15 @@ int nvgpu_clk_arb_init_session(struct gk20a *g, | |||
305 | session->g = g; | 358 | session->g = g; |
306 | 359 | ||
307 | kref_init(&session->refcount); | 360 | kref_init(&session->refcount); |
308 | spin_lock_init(&session->target_lock); | ||
309 | 361 | ||
310 | session->zombie = false; | 362 | session->zombie = false; |
311 | session->mclk_target_mhz = arb->mclk_default_mhz; | 363 | session->target = &session->target_pool[0]; |
312 | session->gpc2clk_target_mhz = arb->gpc2clk_default_mhz; | 364 | session->target->mclk = arb->mclk_default_mhz; |
313 | INIT_LIST_HEAD(&session->targets); | 365 | session->target->gpc2clk = arb->gpc2clk_default_mhz; |
366 | init_llist_head(&session->targets); | ||
314 | 367 | ||
315 | spin_lock(&arb->sessions_lock); | 368 | spin_lock(&arb->sessions_lock); |
316 | list_add_tail(&session->link, &arb->sessions); | 369 | list_add_tail_rcu(&session->link, &arb->sessions); |
317 | spin_unlock(&arb->sessions_lock); | 370 | spin_unlock(&arb->sessions_lock); |
318 | 371 | ||
319 | *_session = session; | 372 | *_session = session; |
@@ -321,19 +374,34 @@ int nvgpu_clk_arb_init_session(struct gk20a *g, | |||
321 | return 0; | 374 | return 0; |
322 | } | 375 | } |
323 | 376 | ||
324 | void nvgpu_clk_arb_free_session(struct kref *refcount) | 377 | static void nvgpu_clk_arb_free_fd(struct kref *refcount) |
378 | { | ||
379 | struct nvgpu_clk_dev *dev = container_of(refcount, | ||
380 | struct nvgpu_clk_dev, refcount); | ||
381 | |||
382 | kfree(dev); | ||
383 | } | ||
384 | |||
385 | static void nvgpu_clk_arb_free_session(struct kref *refcount) | ||
325 | { | 386 | { |
326 | struct nvgpu_clk_session *session = container_of(refcount, | 387 | struct nvgpu_clk_session *session = container_of(refcount, |
327 | struct nvgpu_clk_session, refcount); | 388 | struct nvgpu_clk_session, refcount); |
328 | struct nvgpu_clk_arb *arb = session->g->clk_arb; | 389 | struct nvgpu_clk_arb *arb = session->g->clk_arb; |
390 | struct nvgpu_clk_dev *dev, *tmp; | ||
391 | struct llist_node *head; | ||
329 | 392 | ||
330 | gk20a_dbg_fn(""); | 393 | gk20a_dbg_fn(""); |
331 | 394 | ||
332 | spin_lock(&arb->sessions_lock); | 395 | spin_lock(&arb->sessions_lock); |
333 | list_del(&session->link); | 396 | list_del_rcu(&session->link); |
334 | spin_unlock(&arb->sessions_lock); | 397 | spin_unlock(&arb->sessions_lock); |
398 | |||
399 | head = llist_del_all(&session->targets); | ||
400 | llist_for_each_entry_safe(dev, tmp, head, node) { | ||
401 | kref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | ||
402 | } | ||
403 | synchronize_rcu(); | ||
335 | kfree(session); | 404 | kfree(session); |
336 | ; | ||
337 | } | 405 | } |
338 | 406 | ||
339 | void nvgpu_clk_arb_release_session(struct gk20a *g, | 407 | void nvgpu_clk_arb_release_session(struct gk20a *g, |
@@ -346,7 +414,7 @@ void nvgpu_clk_arb_release_session(struct gk20a *g, | |||
346 | session->zombie = true; | 414 | session->zombie = true; |
347 | kref_put(&session->refcount, nvgpu_clk_arb_free_session); | 415 | kref_put(&session->refcount, nvgpu_clk_arb_free_session); |
348 | 416 | ||
349 | schedule_work(&arb->update_fn_work); | 417 | queue_work(arb->update_work_queue, &arb->update_fn_work); |
350 | } | 418 | } |
351 | 419 | ||
352 | int nvgpu_clk_arb_install_event_fd(struct gk20a *g, | 420 | int nvgpu_clk_arb_install_event_fd(struct gk20a *g, |
@@ -363,7 +431,7 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g, | |||
363 | return fd; | 431 | return fd; |
364 | 432 | ||
365 | spin_lock(&arb->users_lock); | 433 | spin_lock(&arb->users_lock); |
366 | list_add_tail(&dev->link, &arb->users); | 434 | list_add_tail_rcu(&dev->link, &arb->users); |
367 | spin_unlock(&arb->users_lock); | 435 | spin_unlock(&arb->users_lock); |
368 | 436 | ||
369 | *event_fd = fd; | 437 | *event_fd = fd; |
@@ -388,121 +456,159 @@ int nvgpu_clk_arb_install_request_fd(struct gk20a *g, | |||
388 | return 0; | 456 | return 0; |
389 | } | 457 | } |
390 | 458 | ||
391 | static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *arb) | 459 | static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) |
392 | { | 460 | { |
393 | struct gk20a *g = arb->g; | 461 | struct gk20a *g = arb->g; |
462 | struct nvgpu_clk_vf_table *table; | ||
394 | 463 | ||
395 | int i; | 464 | int i, j; |
396 | int status = 0; | 465 | int status = 0; |
397 | u32 gpc2clk_voltuv = 0, mclk_voltuv = 0; | 466 | u32 gpc2clk_voltuv = 0, mclk_voltuv = 0; |
398 | u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0; | 467 | u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0; |
468 | u16 gpc2clk_min, gpc2clk_max, clk_cur; | ||
469 | u16 mclk_min, mclk_max; | ||
470 | u32 num_points; | ||
471 | |||
472 | table = ACCESS_ONCE(arb->current_vf_table); | ||
473 | /* make flag visible when all data has resolved in the tables */ | ||
474 | smp_rmb(); | ||
399 | 475 | ||
400 | /* the flag must be visible in all threads */ | 476 | table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] : |
401 | mb(); | 477 | &arb->vf_table_pool[0]; |
402 | ACCESS_ONCE(arb->vftable_set) = false; | ||
403 | 478 | ||
404 | spin_lock(&arb->vf_lock); | 479 | /* Get allowed memory ranges */ |
480 | if (nvgpu_clk_arb_get_arbiter_clk_range(g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK, | ||
481 | &gpc2clk_min, &gpc2clk_max) < 0) | ||
482 | goto exit_vf_table; | ||
483 | if (nvgpu_clk_arb_get_arbiter_clk_range(g, NVGPU_GPU_CLK_DOMAIN_MCLK, | ||
484 | &mclk_min, &mclk_max) < 0) | ||
485 | goto exit_vf_table; | ||
405 | 486 | ||
406 | if (!clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK, | 487 | if (!clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK, |
407 | &arb->gpc2clk_f_numpoints, arb->gpc2clk_f_points) < 0) { | 488 | &table->gpc2clk_num_points, arb->gpc2clk_f_points) < 0) { |
408 | gk20a_err(dev_from_gk20a(g), | 489 | gk20a_err(dev_from_gk20a(g), |
409 | "failed to fetch GPC2CLK frequency points"); | 490 | "failed to fetch GPC2CLK frequency points"); |
410 | goto exit_vftable; | 491 | goto exit_vf_table; |
411 | } | 492 | } |
412 | if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_MCLK, | 493 | if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_MCLK, |
413 | &arb->mclk_f_numpoints, arb->mclk_f_points) < 0) { | 494 | &table->mclk_num_points, arb->mclk_f_points) < 0) { |
414 | gk20a_err(dev_from_gk20a(g), | 495 | gk20a_err(dev_from_gk20a(g), |
415 | "failed to fetch MCLK frequency points"); | 496 | "failed to fetch MCLK frequency points"); |
416 | goto exit_vftable; | 497 | goto exit_vf_table; |
417 | } | 498 | } |
418 | 499 | ||
500 | memset(table->mclk_points, 0, | ||
501 | table->mclk_num_points*sizeof(struct nvgpu_clk_vf_point)); | ||
502 | memset(table->gpc2clk_points, 0, | ||
503 | table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point)); | ||
504 | |||
505 | for (i = 0, j = 0, num_points = 0, clk_cur = 0; | ||
506 | i < table->mclk_num_points; i++) { | ||
507 | if ((arb->mclk_f_points[i] >= mclk_min) && | ||
508 | (arb->mclk_f_points[i] <= mclk_max) && | ||
509 | (arb->mclk_f_points[i] != clk_cur)) { | ||
510 | |||
511 | table->mclk_points[j].mhz = arb->mclk_f_points[i]; | ||
512 | mclk_voltuv = mclk_voltuv_sram = 0; | ||
513 | |||
514 | status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, | ||
515 | &table->mclk_points[j].mhz, &mclk_voltuv, | ||
516 | CTRL_VOLT_DOMAIN_LOGIC); | ||
517 | if (status < 0) { | ||
518 | gk20a_err(dev_from_gk20a(g), | ||
519 | "failed to get MCLK LOGIC voltage"); | ||
520 | goto exit_vf_table; | ||
521 | } | ||
522 | status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, | ||
523 | &table->mclk_points[j].mhz, &mclk_voltuv_sram, | ||
524 | CTRL_VOLT_DOMAIN_SRAM); | ||
525 | if (status < 0) { | ||
526 | gk20a_err(dev_from_gk20a(g), | ||
527 | "failed to get MCLK SRAM voltage"); | ||
528 | goto exit_vf_table; | ||
529 | } | ||
419 | 530 | ||
420 | memset(arb->mclk_vf_points, 0, | 531 | table->mclk_points[j].uvolt = mclk_voltuv; |
421 | arb->mclk_f_numpoints*sizeof(struct nvgpu_clk_vf_point)); | 532 | table->mclk_points[j].uvolt_sram = mclk_voltuv_sram; |
422 | memset(arb->gpc2clk_vf_points, 0, | 533 | clk_cur = table->mclk_points[j].mhz; |
423 | arb->gpc2clk_f_numpoints*sizeof(struct nvgpu_clk_vf_point)); | 534 | j++; |
424 | 535 | num_points++; | |
425 | for (i = 0 ; i < arb->mclk_f_numpoints; i++) { | ||
426 | arb->mclk_vf_points[i].mhz = arb->mclk_f_points[i]; | ||
427 | mclk_voltuv = mclk_voltuv_sram = 0; | ||
428 | |||
429 | status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, | ||
430 | &arb->mclk_vf_points[i].mhz, &mclk_voltuv, | ||
431 | CTRL_VOLT_DOMAIN_LOGIC); | ||
432 | if (status < 0) { | ||
433 | gk20a_err(dev_from_gk20a(g), | ||
434 | "failed to get MCLK LOGIC voltage"); | ||
435 | goto exit_vftable; | ||
436 | } | ||
437 | status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, | ||
438 | &arb->mclk_vf_points[i].mhz, &mclk_voltuv_sram, | ||
439 | CTRL_VOLT_DOMAIN_SRAM); | ||
440 | if (status < 0) { | ||
441 | gk20a_err(dev_from_gk20a(g), | ||
442 | "failed to get MCLK SRAM voltage"); | ||
443 | goto exit_vftable; | ||
444 | } | 536 | } |
445 | |||
446 | arb->mclk_vf_points[i].uvolt = mclk_voltuv; | ||
447 | arb->mclk_vf_points[i].uvolt_sram = mclk_voltuv_sram; | ||
448 | } | 537 | } |
538 | table->mclk_num_points = num_points; | ||
539 | |||
540 | for (i = 0, j = 0, num_points = 0, clk_cur = 0; | ||
541 | i < table->gpc2clk_num_points; i++) { | ||
542 | if ((arb->gpc2clk_f_points[i] >= gpc2clk_min) && | ||
543 | (arb->gpc2clk_f_points[i] <= gpc2clk_max) && | ||
544 | (arb->gpc2clk_f_points[i] != clk_cur)) { | ||
545 | |||
546 | table->gpc2clk_points[j].mhz = arb->gpc2clk_f_points[i]; | ||
547 | gpc2clk_voltuv = gpc2clk_voltuv_sram = 0; | ||
548 | |||
549 | status = clk_domain_get_f_or_v(g, | ||
550 | CTRL_CLK_DOMAIN_GPC2CLK, | ||
551 | &table->gpc2clk_points[j].mhz, &gpc2clk_voltuv, | ||
552 | CTRL_VOLT_DOMAIN_LOGIC); | ||
553 | if (status < 0) { | ||
554 | gk20a_err(dev_from_gk20a(g), | ||
555 | "failed to get GPC2CLK LOGIC voltage"); | ||
556 | goto exit_vf_table; | ||
557 | } | ||
449 | 558 | ||
450 | for (i = 0 ; i < arb->gpc2clk_f_numpoints; i++) { | 559 | status = clk_domain_get_f_or_v(g, |
451 | arb->gpc2clk_vf_points[i].mhz = arb->gpc2clk_f_points[i]; | 560 | CTRL_CLK_DOMAIN_GPC2CLK, |
452 | gpc2clk_voltuv = gpc2clk_voltuv_sram = 0; | 561 | &table->gpc2clk_points[j].mhz, |
453 | 562 | &gpc2clk_voltuv_sram, | |
454 | status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, | 563 | CTRL_VOLT_DOMAIN_SRAM); |
455 | &arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv, | 564 | if (status < 0) { |
456 | CTRL_VOLT_DOMAIN_LOGIC); | 565 | gk20a_err(dev_from_gk20a(g), |
457 | if (status < 0) { | 566 | "failed to get GPC2CLK SRAM voltage"); |
458 | gk20a_err(dev_from_gk20a(g), | 567 | goto exit_vf_table; |
459 | "failed to get GPC2CLK LOGIC voltage"); | 568 | } |
460 | goto exit_vftable; | ||
461 | } | ||
462 | status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, | ||
463 | &arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv_sram, | ||
464 | CTRL_VOLT_DOMAIN_SRAM); | ||
465 | if (status < 0) { | ||
466 | gk20a_err(dev_from_gk20a(g), | ||
467 | "failed to get GPC2CLK SRAM voltage"); | ||
468 | goto exit_vftable; | ||
469 | } | ||
470 | |||
471 | arb->gpc2clk_vf_points[i].uvolt = gpc2clk_voltuv; | ||
472 | arb->gpc2clk_vf_points[i].uvolt_sram = gpc2clk_voltuv_sram; | ||
473 | 569 | ||
570 | table->gpc2clk_points[j].uvolt = gpc2clk_voltuv; | ||
571 | table->gpc2clk_points[j].uvolt_sram = | ||
572 | gpc2clk_voltuv_sram; | ||
573 | clk_cur = table->gpc2clk_points[j].mhz; | ||
574 | j++; | ||
575 | num_points++; | ||
576 | } | ||
474 | } | 577 | } |
578 | table->gpc2clk_num_points = num_points; | ||
475 | 579 | ||
476 | /* make flag visible when all data has resolved in the tables */ | 580 | /* make table visible when all data has resolved in the tables */ |
477 | wmb(); | 581 | smp_wmb(); |
478 | ACCESS_ONCE(arb->vftable_set) = true; | 582 | xchg(&arb->current_vf_table, table); |
479 | |||
480 | wake_up(&arb->vftable_wq); | ||
481 | exit_vftable: | ||
482 | 583 | ||
483 | spin_unlock(&arb->vf_lock); | 584 | queue_work(arb->update_work_queue, &arb->update_fn_work); |
585 | exit_vf_table: | ||
484 | 586 | ||
485 | return status; | 587 | return status; |
486 | } | 588 | } |
487 | 589 | ||
488 | void nvgpu_clk_arb_schedule_vftable_update(struct gk20a *g) | 590 | void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g) |
489 | { | 591 | { |
490 | struct nvgpu_clk_arb *arb = g->clk_arb; | 592 | struct nvgpu_clk_arb *arb = g->clk_arb; |
491 | 593 | ||
492 | ACCESS_ONCE(arb->vftable_set) = false; | 594 | queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work); |
493 | /* Disable the flag in case arbiter gets scheduled first */ | ||
494 | mb(); | ||
495 | |||
496 | schedule_work(&arb->vftable_fn_work); | ||
497 | schedule_work(&arb->update_fn_work); | ||
498 | } | 595 | } |
499 | 596 | ||
500 | static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work) | 597 | static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work) |
501 | { | 598 | { |
502 | struct nvgpu_clk_arb *arb = | 599 | struct nvgpu_clk_arb *arb = |
503 | container_of(work, struct nvgpu_clk_arb, update_fn_work); | 600 | container_of(work, struct nvgpu_clk_arb, vf_table_fn_work); |
601 | struct gk20a *g = arb->g; | ||
602 | u32 err; | ||
504 | 603 | ||
505 | nvgpu_clk_arb_update_vftable(arb); | 604 | /* get latest vf curve from pmu */ |
605 | err = clk_vf_point_cache(g); | ||
606 | if (err) { | ||
607 | gk20a_err(dev_from_gk20a(g), | ||
608 | "failed to get GPC2CLK SRAM voltage"); | ||
609 | return; | ||
610 | } | ||
611 | nvgpu_clk_arb_update_vf_table(arb); | ||
506 | } | 612 | } |
507 | 613 | ||
508 | static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | 614 | static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) |
@@ -512,24 +618,22 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
512 | struct nvgpu_clk_session *session; | 618 | struct nvgpu_clk_session *session; |
513 | struct nvgpu_clk_dev *dev; | 619 | struct nvgpu_clk_dev *dev; |
514 | struct nvgpu_clk_dev *tmp; | 620 | struct nvgpu_clk_dev *tmp; |
621 | struct nvgpu_clk_arb_target *target, *actual; | ||
515 | struct gk20a *g = arb->g; | 622 | struct gk20a *g = arb->g; |
516 | 623 | struct llist_node *head; | |
517 | struct change_fll_clk fllclk; | ||
518 | u32 gpc2clk_voltuv = 0, mclk_voltuv = 0; | ||
519 | u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0; | ||
520 | 624 | ||
521 | u32 voltuv, voltuv_sram; | 625 | u32 voltuv, voltuv_sram; |
626 | bool mclk_set, gpc2clk_set; | ||
522 | 627 | ||
523 | int status; | 628 | int status; |
524 | 629 | ||
525 | /* Temporary variables for checking target frequency */ | 630 | /* Temporary variables for checking target frequency */ |
526 | u16 gpc2clk_target, mclk_target; | 631 | u16 gpc2clk_target, mclk_target; |
527 | 632 | ||
528 | /* iteration index */ | ||
529 | u32 index; | ||
530 | |||
531 | #ifdef CONFIG_DEBUG_FS | 633 | #ifdef CONFIG_DEBUG_FS |
532 | u64 t0, t1; | 634 | u64 t0, t1; |
635 | struct nvgpu_clk_arb_debug *debug; | ||
636 | |||
533 | #endif | 637 | #endif |
534 | 638 | ||
535 | gk20a_dbg_fn(""); | 639 | gk20a_dbg_fn(""); |
@@ -542,37 +646,61 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
542 | gpc2clk_target = 0; | 646 | gpc2clk_target = 0; |
543 | mclk_target = 0; | 647 | mclk_target = 0; |
544 | 648 | ||
545 | spin_lock(&arb->sessions_lock); | 649 | rcu_read_lock(); |
546 | list_for_each_entry(session, &arb->sessions, link) { | 650 | list_for_each_entry_rcu(session, &arb->sessions, link) { |
547 | if (!session->zombie) { | 651 | if (!session->zombie) { |
548 | spin_lock(&arb->req_lock); | 652 | mclk_set = false; |
549 | spin_lock(&session->target_lock); | 653 | gpc2clk_set = false; |
654 | target = ACCESS_ONCE(session->target) == | ||
655 | &session->target_pool[0] ? | ||
656 | &session->target_pool[1] : | ||
657 | &session->target_pool[0]; | ||
658 | /* Do not reorder pointer */ | ||
659 | smp_rmb(); | ||
660 | head = llist_del_all(&session->targets); | ||
661 | if (head) { | ||
662 | |||
663 | /* Copy over state */ | ||
664 | target->mclk = session->target->mclk; | ||
665 | target->gpc2clk = session->target->gpc2clk; | ||
666 | /* Query the latest committed request */ | ||
667 | llist_for_each_entry_safe(dev, tmp, head, | ||
668 | node) { | ||
669 | if (!mclk_set && dev->mclk_target_mhz) { | ||
670 | target->mclk = | ||
671 | dev->mclk_target_mhz; | ||
672 | mclk_set = true; | ||
673 | } | ||
674 | if (!gpc2clk_set && | ||
675 | dev->gpc2clk_target_mhz) { | ||
676 | target->gpc2clk = | ||
677 | dev->gpc2clk_target_mhz; | ||
678 | gpc2clk_set = true; | ||
679 | } | ||
680 | kref_get(&dev->refcount); | ||
681 | llist_add(&dev->node, &arb->requests); | ||
682 | } | ||
683 | /* Ensure target is updated before ptr sawp */ | ||
684 | smp_wmb(); | ||
685 | xchg(&session->target, target); | ||
686 | } | ||
550 | 687 | ||
551 | mclk_target = mclk_target > session->mclk_target_mhz ? | 688 | mclk_target = mclk_target > session->target->mclk ? |
552 | mclk_target : session->mclk_target_mhz; | 689 | mclk_target : session->target->mclk; |
553 | 690 | ||
554 | gpc2clk_target = | 691 | gpc2clk_target = |
555 | gpc2clk_target > session->gpc2clk_target_mhz ? | 692 | gpc2clk_target > session->target->gpc2clk ? |
556 | gpc2clk_target : session->gpc2clk_target_mhz; | 693 | gpc2clk_target : session->target->gpc2clk; |
557 | /* Move processed requests to notification list*/ | ||
558 | list_for_each_entry_safe(dev, tmp, &session->targets, | ||
559 | link) { | ||
560 | list_del_init(&dev->link); | ||
561 | list_add_tail(&dev->link, &arb->requests); | ||
562 | } | ||
563 | spin_unlock(&session->target_lock); | ||
564 | spin_unlock(&arb->req_lock); | ||
565 | |||
566 | } | 694 | } |
567 | } | 695 | } |
568 | spin_unlock(&arb->sessions_lock); | 696 | rcu_read_unlock(); |
569 | 697 | ||
570 | gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target : | 698 | gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target : |
571 | arb->gpc2clk_actual_mhz ? gpc2clk_target : | 699 | arb->actual->gpc2clk ? gpc2clk_target : |
572 | arb->gpc2clk_default_mhz; | 700 | arb->gpc2clk_default_mhz; |
573 | 701 | ||
574 | mclk_target = (mclk_target > 0) ? mclk_target : | 702 | mclk_target = (mclk_target > 0) ? mclk_target : |
575 | arb->mclk_actual_mhz ? mclk_target : | 703 | arb->actual->mclk ? mclk_target : |
576 | arb->mclk_default_mhz; | 704 | arb->mclk_default_mhz; |
577 | 705 | ||
578 | if (!gpc2clk_target && !mclk_target) { | 706 | if (!gpc2clk_target && !mclk_target) { |
@@ -581,163 +709,100 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
581 | } | 709 | } |
582 | 710 | ||
583 | if (!gpc2clk_target) | 711 | if (!gpc2clk_target) |
584 | gpc2clk_target = arb->gpc2clk_actual_mhz; | 712 | gpc2clk_target = arb->actual->mclk; |
585 | 713 | ||
586 | do { | 714 | if (!mclk_target) |
587 | /* Check that the table is set */ | 715 | mclk_target = arb->actual->mclk; |
588 | mb(); | ||
589 | wait_event(arb->vftable_wq, arb->vftable_set); | ||
590 | } while (!ACCESS_ONCE(arb->vftable_set)); | ||
591 | |||
592 | spin_lock(&arb->vf_lock); | ||
593 | /* round up the freq requests */ | ||
594 | for (index = 0; index < arb->gpc2clk_f_numpoints; index++) { | ||
595 | if (arb->gpc2clk_vf_points[index].mhz >= gpc2clk_target) { | ||
596 | gpc2clk_target = arb->gpc2clk_vf_points[index].mhz; | ||
597 | gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt; | ||
598 | gpc2clk_voltuv_sram = | ||
599 | arb->gpc2clk_vf_points[index].uvolt_sram; | ||
600 | break; | ||
601 | } | ||
602 | } | ||
603 | 716 | ||
604 | if (index == arb->gpc2clk_f_numpoints) { | ||
605 | gpc2clk_target = arb->gpc2clk_vf_points[index].mhz; | ||
606 | gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt; | ||
607 | gpc2clk_voltuv_sram = | ||
608 | arb->gpc2clk_vf_points[index].uvolt_sram; | ||
609 | } | ||
610 | 717 | ||
611 | if (!mclk_target) | 718 | /* Query the table for the closest vf point to program */ |
612 | mclk_target = arb->mclk_actual_mhz; | 719 | nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target, &mclk_target, &voltuv, |
720 | &voltuv_sram); | ||
613 | 721 | ||
614 | for (index = 0; index < arb->mclk_f_numpoints; index++) { | 722 | if ((arb->actual->gpc2clk == gpc2clk_target) && |
615 | if (arb->mclk_vf_points[index].mhz >= mclk_target) { | 723 | (arb->actual->mclk == mclk_target) && |
616 | mclk_target = arb->mclk_vf_points[index].mhz; | 724 | (arb->voltuv_actual == voltuv)) { |
617 | mclk_voltuv = arb->mclk_vf_points[index].uvolt; | 725 | goto exit_arb; |
618 | mclk_voltuv_sram = | ||
619 | arb->mclk_vf_points[index].uvolt_sram; | ||
620 | break; | ||
621 | } | ||
622 | } | ||
623 | if (index == arb->mclk_f_numpoints) { | ||
624 | mclk_target = arb->mclk_vf_points[index].mhz; | ||
625 | mclk_voltuv = arb->mclk_vf_points[index].uvolt; | ||
626 | mclk_voltuv_sram = | ||
627 | arb->mclk_vf_points[index].uvolt_sram; | ||
628 | } | 726 | } |
629 | spin_unlock(&arb->vf_lock); | ||
630 | 727 | ||
631 | /* Program clocks */ | 728 | /* Program clocks */ |
632 | /* A change in both mclk of gpc2clk may require a change in voltage */ | 729 | /* A change in both mclk of gpc2clk may require a change in voltage */ |
633 | if ((arb->gpc2clk_actual_mhz == gpc2clk_target) && | ||
634 | (arb->mclk_actual_mhz == mclk_target)) { | ||
635 | goto exit_arb; | ||
636 | } | ||
637 | |||
638 | voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv; | ||
639 | voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ? | ||
640 | gpc2clk_voltuv_sram : mclk_voltuv_sram; | ||
641 | 730 | ||
642 | /* if voltage ascends we do: | 731 | status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target, mclk_target, |
643 | * (1) FLL change | 732 | voltuv, voltuv_sram); |
644 | * (2) Voltage change | ||
645 | * (3) MCLK change | ||
646 | * If it goes down | ||
647 | * (1) MCLK change | ||
648 | * (2) Voltage change | ||
649 | * (3) FLL change | ||
650 | */ | ||
651 | 733 | ||
652 | /* descending */ | 734 | if (status < 0) |
653 | if (voltuv <= arb->voltuv_actual) { | 735 | goto exit_arb; |
654 | status = g->clk_pmu.clk_mclk.change(g, mclk_target); | ||
655 | if (status < 0) | ||
656 | goto exit_arb; | ||
657 | 736 | ||
658 | status = volt_set_voltage(g, voltuv, voltuv_sram); | 737 | actual = ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ? |
659 | if (status < 0) | 738 | &arb->actual_pool[1] : &arb->actual_pool[0]; |
660 | goto exit_arb; | ||
661 | 739 | ||
662 | fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; | 740 | /* do not reorder this pointer */ |
663 | fllclk.clkmhz = gpc2clk_target; | 741 | smp_rmb(); |
664 | fllclk.voltuv = voltuv; | 742 | actual->gpc2clk = gpc2clk_target; |
665 | status = clk_program_fll_clks(g, &fllclk); | 743 | actual->mclk = mclk_target; |
666 | if (status < 0) | 744 | arb->voltuv_actual = voltuv; |
667 | goto exit_arb; | ||
668 | } else { | ||
669 | fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; | ||
670 | fllclk.clkmhz = gpc2clk_target; | ||
671 | fllclk.voltuv = voltuv; | ||
672 | status = clk_program_fll_clks(g, &fllclk); | ||
673 | if (status < 0) | ||
674 | goto exit_arb; | ||
675 | 745 | ||
676 | status = volt_set_voltage(g, voltuv, voltuv_sram); | 746 | /* Make changes visible to other threads */ |
677 | if (status < 0) | 747 | smp_wmb(); |
678 | goto exit_arb; | 748 | xchg(&arb->actual, actual); |
679 | 749 | ||
680 | status = g->clk_pmu.clk_mclk.change(g, mclk_target); | 750 | atomic_inc(&arb->req_nr); |
681 | if (status < 0) | ||
682 | goto exit_arb; | ||
683 | } | ||
684 | 751 | ||
685 | spin_lock(&arb->data_lock); | 752 | wake_up_interruptible(&arb->request_wq); |
686 | arb->gpc2clk_actual_mhz = gpc2clk_target; | ||
687 | arb->mclk_actual_mhz = mclk_target; | ||
688 | arb->voltuv_actual = voltuv; | ||
689 | /* Make changes visible to other threads */ | ||
690 | wmb(); | ||
691 | 753 | ||
692 | spin_unlock(&arb->data_lock); | ||
693 | 754 | ||
694 | #ifdef CONFIG_DEBUG_FS | 755 | #ifdef CONFIG_DEBUG_FS |
695 | g->ops.read_ptimer(g, &t1); | 756 | g->ops.read_ptimer(g, &t1); |
696 | arb->switch_num++; | ||
697 | 757 | ||
698 | mutex_lock(&arb->debug_lock); | 758 | debug = arb->debug == &arb->debug_pool[0] ? |
699 | if (arb->switch_num == 1) { | 759 | &arb->debug_pool[1] : &arb->debug_pool[0]; |
700 | arb->switch_max = arb->switch_min = | 760 | |
701 | arb->switch_avg = (t1-t0)/1000; | 761 | memcpy(debug, arb->debug, sizeof(arb->debug_pool[0])); |
702 | arb->switch_std = 0; | 762 | debug->switch_num++; |
763 | |||
764 | if (debug->switch_num == 1) { | ||
765 | debug->switch_max = debug->switch_min = | ||
766 | debug->switch_avg = (t1-t0)/1000; | ||
767 | debug->switch_std = 0; | ||
703 | } else { | 768 | } else { |
704 | s64 prev_avg; | 769 | s64 prev_avg; |
705 | u64 curr = (t1-t0)/1000; | 770 | u64 curr = (t1-t0)/1000; |
706 | 771 | ||
707 | arb->switch_max = curr > arb->switch_max ? | 772 | debug->switch_max = curr > debug->switch_max ? |
708 | curr : arb->switch_max; | 773 | curr : debug->switch_max; |
709 | arb->switch_min = arb->switch_min ? | 774 | debug->switch_min = debug->switch_min ? |
710 | (curr < arb->switch_min ? | 775 | (curr < debug->switch_min ? |
711 | curr : arb->switch_min) : curr; | 776 | curr : debug->switch_min) : curr; |
712 | prev_avg = arb->switch_avg; | 777 | prev_avg = debug->switch_avg; |
713 | arb->switch_avg = (curr + | 778 | debug->switch_avg = (curr + |
714 | (arb->switch_avg * (arb->switch_num-1))) / | 779 | (debug->switch_avg * (debug->switch_num-1))) / |
715 | arb->switch_num; | 780 | debug->switch_num; |
716 | arb->switch_std += | 781 | debug->switch_std += |
717 | (curr - arb->switch_avg) * (curr - prev_avg); | 782 | (curr - debug->switch_avg) * (curr - prev_avg); |
718 | } | 783 | } |
719 | mutex_unlock(&arb->debug_lock); | 784 | /* commit changes before exchanging debug pointer */ |
720 | 785 | smp_wmb(); | |
786 | xchg(&arb->debug, debug); | ||
721 | #endif | 787 | #endif |
722 | 788 | ||
723 | exit_arb: | 789 | exit_arb: |
724 | 790 | ||
725 | spin_lock(&arb->req_lock); | ||
726 | /* notify completion for all requests */ | 791 | /* notify completion for all requests */ |
727 | list_for_each_entry_safe(dev, tmp, &arb->requests, link) { | 792 | head = llist_del_all(&arb->requests); |
793 | llist_for_each_entry_safe(dev, tmp, head, node) { | ||
728 | atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); | 794 | atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); |
729 | wake_up_interruptible(&dev->readout_wq); | 795 | wake_up_interruptible(&dev->readout_wq); |
730 | list_del_init(&dev->link); | 796 | kref_put(&dev->refcount, nvgpu_clk_arb_free_fd); |
731 | } | 797 | } |
732 | spin_unlock(&arb->req_lock); | ||
733 | 798 | ||
734 | /* notify event for all users */ | 799 | /* notify event for all users */ |
735 | spin_lock(&arb->users_lock); | 800 | rcu_read_lock(); |
736 | list_for_each_entry(dev, &arb->users, link) { | 801 | list_for_each_entry_rcu(dev, &arb->users, link) { |
737 | atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); | 802 | atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); |
738 | wake_up_interruptible(&dev->readout_wq); | 803 | wake_up_interruptible(&dev->readout_wq); |
739 | } | 804 | } |
740 | spin_unlock(&arb->users_lock); | 805 | rcu_read_unlock(); |
741 | } | 806 | } |
742 | 807 | ||
743 | int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, | 808 | int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, |
@@ -761,17 +826,10 @@ int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, | |||
761 | err = -EINVAL; | 826 | err = -EINVAL; |
762 | goto fdput_fd; | 827 | goto fdput_fd; |
763 | } | 828 | } |
764 | spin_lock(&session->target_lock); | 829 | kref_get(&dev->refcount); |
765 | session->mclk_target_mhz = dev->mclk_target_mhz ? dev->mclk_target_mhz : | 830 | llist_add(&dev->node, &session->targets); |
766 | session->mclk_target_mhz; | ||
767 | session->gpc2clk_target_mhz = dev->gpc2clk_target_mhz ? | ||
768 | dev->gpc2clk_target_mhz : | ||
769 | session->gpc2clk_target_mhz; | ||
770 | |||
771 | list_add_tail(&dev->link, &session->targets); | ||
772 | spin_unlock(&session->target_lock); | ||
773 | 831 | ||
774 | schedule_work(&arb->update_fn_work); | 832 | queue_work(arb->update_work_queue, &arb->update_fn_work); |
775 | 833 | ||
776 | fdput_fd: | 834 | fdput_fd: |
777 | fdput(fd); | 835 | fdput(fd); |
@@ -799,15 +857,8 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, | |||
799 | 857 | ||
800 | gk20a_dbg_fn(""); | 858 | gk20a_dbg_fn(""); |
801 | 859 | ||
802 | spin_lock(&arb->req_lock); | ||
803 | spin_lock(&session->target_lock); | ||
804 | if (!list_empty(&dev->link)) | ||
805 | list_del_init(&dev->link); | ||
806 | spin_unlock(&session->target_lock); | ||
807 | spin_unlock(&arb->req_lock); | ||
808 | |||
809 | kref_put(&session->refcount, nvgpu_clk_arb_free_session); | 860 | kref_put(&session->refcount, nvgpu_clk_arb_free_session); |
810 | kfree(dev); | 861 | kref_put(&dev->refcount, nvgpu_clk_arb_free_fd); |
811 | 862 | ||
812 | return 0; | 863 | return 0; |
813 | } | 864 | } |
@@ -824,10 +875,11 @@ static int nvgpu_clk_arb_release_event_dev(struct inode *inode, | |||
824 | gk20a_dbg_fn(""); | 875 | gk20a_dbg_fn(""); |
825 | 876 | ||
826 | spin_lock(&arb->users_lock); | 877 | spin_lock(&arb->users_lock); |
827 | list_del(&dev->link); | 878 | list_del_rcu(&dev->link); |
828 | spin_unlock(&arb->users_lock); | 879 | spin_unlock(&arb->users_lock); |
829 | 880 | ||
830 | kref_put(&session->refcount, nvgpu_clk_arb_free_session); | 881 | kref_put(&session->refcount, nvgpu_clk_arb_free_session); |
882 | synchronize_rcu(); | ||
831 | kfree(dev); | 883 | kfree(dev); |
832 | 884 | ||
833 | return 0; | 885 | return 0; |
@@ -875,24 +927,27 @@ int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, | |||
875 | u32 api_domain, u16 *freq_mhz) | 927 | u32 api_domain, u16 *freq_mhz) |
876 | { | 928 | { |
877 | int err = 0; | 929 | int err = 0; |
930 | struct nvgpu_clk_arb_target *target; | ||
878 | 931 | ||
879 | spin_lock(&session->target_lock); | 932 | do { |
880 | 933 | target = ACCESS_ONCE(session->target); | |
881 | switch (api_domain) { | 934 | /* no reordering of this pointer */ |
882 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | 935 | smp_rmb(); |
883 | *freq_mhz = session->mclk_target_mhz; | ||
884 | break; | ||
885 | 936 | ||
886 | case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: | 937 | switch (api_domain) { |
887 | *freq_mhz = session->gpc2clk_target_mhz; | 938 | case NVGPU_GPU_CLK_DOMAIN_MCLK: |
888 | break; | 939 | *freq_mhz = target->mclk; |
940 | break; | ||
889 | 941 | ||
890 | default: | 942 | case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: |
891 | *freq_mhz = 0; | 943 | *freq_mhz = target->gpc2clk; |
892 | err = -EINVAL; | 944 | break; |
893 | } | ||
894 | 945 | ||
895 | spin_unlock(&session->target_lock); | 946 | default: |
947 | *freq_mhz = 0; | ||
948 | err = -EINVAL; | ||
949 | } | ||
950 | } while (target != ACCESS_ONCE(session->target)); | ||
896 | return err; | 951 | return err; |
897 | } | 952 | } |
898 | 953 | ||
@@ -901,24 +956,27 @@ int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, | |||
901 | { | 956 | { |
902 | struct nvgpu_clk_arb *arb = g->clk_arb; | 957 | struct nvgpu_clk_arb *arb = g->clk_arb; |
903 | int err = 0; | 958 | int err = 0; |
959 | struct nvgpu_clk_arb_target *actual; | ||
904 | 960 | ||
905 | spin_lock(&arb->data_lock); | 961 | do { |
906 | 962 | actual = ACCESS_ONCE(arb->actual); | |
907 | switch (api_domain) { | 963 | /* no reordering of this pointer */ |
908 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | 964 | smp_rmb(); |
909 | *freq_mhz = arb->mclk_actual_mhz; | ||
910 | break; | ||
911 | 965 | ||
912 | case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: | 966 | switch (api_domain) { |
913 | *freq_mhz = arb->gpc2clk_actual_mhz; | 967 | case NVGPU_GPU_CLK_DOMAIN_MCLK: |
914 | break; | 968 | *freq_mhz = actual->mclk; |
969 | break; | ||
915 | 970 | ||
916 | default: | 971 | case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: |
917 | *freq_mhz = 0; | 972 | *freq_mhz = actual->gpc2clk; |
918 | err = -EINVAL; | 973 | break; |
919 | } | ||
920 | 974 | ||
921 | spin_unlock(&arb->data_lock); | 975 | default: |
976 | *freq_mhz = 0; | ||
977 | err = -EINVAL; | ||
978 | } | ||
979 | } while (actual != ACCESS_ONCE(arb->actual)); | ||
922 | return err; | 980 | return err; |
923 | } | 981 | } |
924 | 982 | ||
@@ -948,22 +1006,163 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, | |||
948 | return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints); | 1006 | return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints); |
949 | } | 1007 | } |
950 | 1008 | ||
1009 | static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, | ||
1010 | u16 *gpc2clk, u16 *mclk, u32 *voltuv, | ||
1011 | u32 *voltuv_sram) | ||
1012 | { | ||
1013 | u16 gpc2clk_target, mclk_target; | ||
1014 | u32 gpc2clk_voltuv, gpc2clk_voltuv_sram; | ||
1015 | u32 mclk_voltuv, mclk_voltuv_sram; | ||
1016 | struct nvgpu_clk_vf_table *table; | ||
1017 | int index; | ||
1018 | |||
1019 | gpc2clk_target = *gpc2clk; | ||
1020 | mclk_target = *mclk; | ||
1021 | gpc2clk_voltuv = 0; | ||
1022 | gpc2clk_voltuv_sram = 0; | ||
1023 | mclk_voltuv = 0; | ||
1024 | mclk_voltuv_sram = 0; | ||
1025 | |||
1026 | do { | ||
1027 | table = ACCESS_ONCE(arb->current_vf_table); | ||
1028 | /* pointer to table can be updated by callback */ | ||
1029 | smp_rmb(); | ||
1030 | |||
1031 | if (!table) | ||
1032 | continue; | ||
1033 | /* round up the freq requests */ | ||
1034 | for (index = 0; index < table->gpc2clk_num_points; index++) { | ||
1035 | if (table->gpc2clk_points[index].mhz >= | ||
1036 | gpc2clk_target) { | ||
1037 | gpc2clk_target = | ||
1038 | table->gpc2clk_points[index].mhz; | ||
1039 | gpc2clk_voltuv = | ||
1040 | table->gpc2clk_points[index].uvolt; | ||
1041 | gpc2clk_voltuv_sram = | ||
1042 | table->gpc2clk_points[index].uvolt_sram; | ||
1043 | break; | ||
1044 | } | ||
1045 | } | ||
1046 | |||
1047 | if (index == table->gpc2clk_num_points) { | ||
1048 | gpc2clk_target = table->gpc2clk_points[index].mhz; | ||
1049 | gpc2clk_voltuv = table->gpc2clk_points[index].uvolt; | ||
1050 | gpc2clk_voltuv_sram = | ||
1051 | table->gpc2clk_points[index].uvolt_sram; | ||
1052 | } | ||
1053 | |||
1054 | for (index = 0; index < table->mclk_num_points; index++) { | ||
1055 | if (table->mclk_points[index].mhz >= mclk_target) { | ||
1056 | mclk_target = table->mclk_points[index].mhz; | ||
1057 | mclk_voltuv = table->mclk_points[index].uvolt; | ||
1058 | mclk_voltuv_sram = | ||
1059 | table->mclk_points[index].uvolt_sram; | ||
1060 | break; | ||
1061 | } | ||
1062 | } | ||
1063 | if (index == table->mclk_num_points) { | ||
1064 | mclk_target = table->mclk_points[index].mhz; | ||
1065 | mclk_voltuv = table->mclk_points[index].uvolt; | ||
1066 | mclk_voltuv_sram = | ||
1067 | table->mclk_points[index].uvolt_sram; | ||
1068 | } | ||
1069 | } while (!table || | ||
1070 | (ACCESS_ONCE(arb->current_vf_table) != table)); | ||
1071 | |||
1072 | *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv; | ||
1073 | *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ? | ||
1074 | gpc2clk_voltuv_sram : mclk_voltuv_sram; | ||
1075 | |||
1076 | *gpc2clk = gpc2clk_target; | ||
1077 | *mclk = mclk_target; | ||
1078 | } | ||
1079 | |||
1080 | static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target, | ||
1081 | u16 mclk_target, u32 voltuv, u32 voltuv_sram) | ||
1082 | { | ||
1083 | struct change_fll_clk fllclk; | ||
1084 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
1085 | int status; | ||
1086 | |||
1087 | /* if voltage ascends we do: | ||
1088 | * (1) FLL change | ||
1089 | * (2) Voltage change | ||
1090 | * (3) MCLK change | ||
1091 | * If it goes down | ||
1092 | * (1) MCLK change | ||
1093 | * (2) Voltage change | ||
1094 | * (3) FLL change | ||
1095 | */ | ||
1096 | |||
1097 | /* descending */ | ||
1098 | if (voltuv < arb->voltuv_actual) { | ||
1099 | status = g->clk_pmu.clk_mclk.change(g, mclk_target); | ||
1100 | if (status < 0) | ||
1101 | return status; | ||
1102 | |||
1103 | status = volt_set_voltage(g, voltuv, voltuv_sram); | ||
1104 | if (status < 0) | ||
1105 | return status; | ||
1106 | |||
1107 | fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; | ||
1108 | fllclk.clkmhz = gpc2clk_target; | ||
1109 | fllclk.voltuv = voltuv; | ||
1110 | status = clk_program_fll_clks(g, &fllclk); | ||
1111 | if (status < 0) | ||
1112 | return status; | ||
1113 | } else if (voltuv > arb->voltuv_actual) { | ||
1114 | fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; | ||
1115 | fllclk.clkmhz = gpc2clk_target; | ||
1116 | fllclk.voltuv = voltuv; | ||
1117 | status = clk_program_fll_clks(g, &fllclk); | ||
1118 | if (status < 0) | ||
1119 | return status; | ||
1120 | |||
1121 | status = volt_set_voltage(g, voltuv, voltuv_sram); | ||
1122 | if (status < 0) | ||
1123 | return status; | ||
1124 | |||
1125 | status = g->clk_pmu.clk_mclk.change(g, mclk_target); | ||
1126 | if (status < 0) | ||
1127 | return status; | ||
1128 | } else { | ||
1129 | status = g->clk_pmu.clk_mclk.change(g, mclk_target); | ||
1130 | if (status < 0) | ||
1131 | return status; | ||
1132 | |||
1133 | fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; | ||
1134 | fllclk.clkmhz = gpc2clk_target; | ||
1135 | fllclk.voltuv = voltuv; | ||
1136 | status = clk_program_fll_clks(g, &fllclk); | ||
1137 | if (status < 0) | ||
1138 | return status; | ||
1139 | |||
1140 | } | ||
1141 | |||
1142 | return 0; | ||
1143 | } | ||
1144 | |||
951 | #ifdef CONFIG_DEBUG_FS | 1145 | #ifdef CONFIG_DEBUG_FS |
952 | static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) | 1146 | static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) |
953 | { | 1147 | { |
954 | struct gk20a *g = s->private; | 1148 | struct gk20a *g = s->private; |
955 | struct nvgpu_clk_arb *arb = g->clk_arb; | 1149 | struct nvgpu_clk_arb *arb = g->clk_arb; |
1150 | struct nvgpu_clk_arb_debug *debug; | ||
1151 | |||
956 | u64 num; | 1152 | u64 num; |
957 | s64 tmp, avg, std, max, min; | 1153 | s64 tmp, avg, std, max, min; |
958 | 1154 | ||
959 | /* Make copy of structure to reduce time with lock held */ | 1155 | debug = ACCESS_ONCE(arb->debug); |
960 | mutex_lock(&arb->debug_lock); | 1156 | /* Make copy of structure and ensure no reordering */ |
961 | std = arb->switch_std; | 1157 | smp_rmb(); |
962 | avg = arb->switch_avg; | 1158 | if (!debug) |
963 | max = arb->switch_max; | 1159 | return -EINVAL; |
964 | min = arb->switch_min; | 1160 | |
965 | num = arb->switch_num; | 1161 | std = debug->switch_std; |
966 | mutex_unlock(&arb->debug_lock); | 1162 | avg = debug->switch_avg; |
1163 | max = debug->switch_max; | ||
1164 | min = debug->switch_min; | ||
1165 | num = debug->switch_num; | ||
967 | 1166 | ||
968 | tmp = std; | 1167 | tmp = std; |
969 | do_div(tmp, num); | 1168 | do_div(tmp, num); |
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c index f01b52ad..cf758023 100644 --- a/drivers/gpu/nvgpu/pstate/pstate.c +++ b/drivers/gpu/nvgpu/pstate/pstate.c | |||
@@ -149,10 +149,6 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g) | |||
149 | if (err) | 149 | if (err) |
150 | return err; | 150 | return err; |
151 | 151 | ||
152 | err = clk_vf_point_cache(g); | ||
153 | if (err) | ||
154 | return err; | ||
155 | |||
156 | err = pmgr_domain_pmu_setup(g); | 152 | err = pmgr_domain_pmu_setup(g); |
157 | return err; | 153 | return err; |
158 | } | 154 | } |