summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Nieto <dmartineznie@nvidia.com>2016-10-07 19:25:04 -0400
committerDeepak Nibade <dnibade@nvidia.com>2016-12-27 04:56:52 -0500
commitc4bb19d46e1c9121a0948fa506098cbf2f64e2a6 (patch)
tree29647922e8374377c05ab976c7616410d85eda4d
parentbfc12d25a41c2b5a4d06f233f16331e43c489d8e (diff)
nvgpu: gpu: arbiter for vf switch management
JIRA DNVGPU-143 The arbiter is charged with selecting the proper frequencies when multiple applications submit simultaneously clock change requests On the current implementation, the arbiter guarantees that the selected frequency will be always higher or equal to the request, as long as the request is in range. The current code is not yet realtime friendly, as requests are not pre-allocated. Summary of changes: (1) pstate/vf switch no longer selects boot frequency (2) changed mclk code change to accept input freq (3) added arbiter (4) now a single session can submit concurrent requests the last request is the one that applies for that session (5) modified locking mechanism to reduce lock contention (6) Added callback to notify the arbiter that the VF table has changed and is no longer valid (PMU/Thermals must call this when VF table is invalid) (7) changed internal API to work with MHz (8) added debugfs for stats Change-Id: I6a7b05c9447761e8536f84ef86b5ab0793164d63 Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: http://git-master/r/1239461 Reviewed-by: Thomas Fleury <tfleury@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1267120 Reviewed-by: Automatic_Commit_Validation_User
-rw-r--r--drivers/gpu/nvgpu/clk/clk.c61
-rw-r--r--drivers/gpu/nvgpu/clk/clk.h1
-rw-r--r--drivers/gpu/nvgpu/clk/clk_arb.c765
-rw-r--r--drivers/gpu/nvgpu/clk/clk_arb.h26
-rw-r--r--drivers/gpu/nvgpu/clk/clk_mclk.c19
-rw-r--r--drivers/gpu/nvgpu/clk/clk_mclk.h10
-rw-r--r--drivers/gpu/nvgpu/gp106/clk_arb_gp106.c10
-rw-r--r--drivers/gpu/nvgpu/pstate/pstate.c4
8 files changed, 703 insertions, 193 deletions
diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c
index ef0834f4..bec5fad1 100644
--- a/drivers/gpu/nvgpu/clk/clk.c
+++ b/drivers/gpu/nvgpu/clk/clk.c
@@ -255,7 +255,7 @@ static int get_regime_id(struct gk20a *g, u32 domain, u32 *regimeid)
255 return -EINVAL; 255 return -EINVAL;
256} 256}
257 257
258int clk_program_fllclks(struct gk20a *g, struct change_fll_clk *fllclk) 258int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk)
259{ 259{
260 int status = -EINVAL; 260 int status = -EINVAL;
261 struct clk_domain *pdomain; 261 struct clk_domain *pdomain;
@@ -277,8 +277,6 @@ int clk_program_fllclks(struct gk20a *g, struct change_fll_clk *fllclk)
277 if (fllclk->clkmhz == 0) 277 if (fllclk->clkmhz == 0)
278 return -EINVAL; 278 return -EINVAL;
279 279
280 mutex_lock(&pclk->changeclkmutex);
281
282 setfllclk.voltuv = fllclk->voltuv; 280 setfllclk.voltuv = fllclk->voltuv;
283 setfllclk.gpc2clkmhz = fllclk->clkmhz; 281 setfllclk.gpc2clkmhz = fllclk->clkmhz;
284 282
@@ -376,63 +374,6 @@ int clk_program_fllclks(struct gk20a *g, struct change_fll_clk *fllclk)
376 if (status) 374 if (status)
377 goto done; 375 goto done;
378done: 376done:
379 mutex_unlock(&pclk->changeclkmutex);
380 return status;
381}
382
383int clk_set_boot_fll_clk(struct gk20a *g)
384{
385 int status;
386 struct change_fll_clk bootfllclk;
387 u16 gpc2clk_clkmhz = BOOT_GPC2CLK_MHZ;
388 u32 gpc2clk_voltuv = 0;
389 u32 gpc2clk_voltuv_sram = 0;
390 u16 mclk_clkmhz = BOOT_MCLK_MHZ;
391 u32 mclk_voltuv = 0;
392 u32 mclk_voltuv_sram = 0;
393 u32 voltuv = 0;
394 u32 voltuv_sram = 0;
395
396 mutex_init(&g->clk_pmu.changeclkmutex);
397 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
398 &gpc2clk_clkmhz, &gpc2clk_voltuv, CTRL_VOLT_DOMAIN_LOGIC);
399 if (status)
400 return status;
401 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
402 &gpc2clk_clkmhz, &gpc2clk_voltuv_sram, CTRL_VOLT_DOMAIN_SRAM);
403 if (status)
404 return status;
405 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
406 &mclk_clkmhz, &mclk_voltuv, CTRL_VOLT_DOMAIN_LOGIC);
407 if (status)
408 return status;
409 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
410 &mclk_clkmhz, &mclk_voltuv_sram, CTRL_VOLT_DOMAIN_SRAM);
411 if (status)
412 return status;
413
414 voltuv = ((gpc2clk_voltuv) > (mclk_voltuv)) ? (gpc2clk_voltuv)
415 : (mclk_voltuv);
416
417 voltuv_sram = ((gpc2clk_voltuv_sram) > (mclk_voltuv_sram)) ?
418 (gpc2clk_voltuv_sram) : (mclk_voltuv_sram);
419
420 status = volt_set_voltage(g, voltuv, voltuv_sram);
421 if (status)
422 gk20a_err(dev_from_gk20a(g),
423 "attempt to set boot voltage failed %d %d",
424 voltuv, voltuv_sram);
425
426 bootfllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
427 bootfllclk.clkmhz = gpc2clk_clkmhz;
428 bootfllclk.voltuv = voltuv;
429 status = clk_program_fllclks(g, &bootfllclk);
430 if (status)
431 gk20a_err(dev_from_gk20a(g), "attempt to set boot gpc2clk failed");
432 status = g->clk_pmu.clk_mclk.change(g, DEFAULT_BOOT_MCLK_SPEED);
433 if (status)
434 gk20a_err(dev_from_gk20a(g), "attempt to set boot mclk failed");
435
436 return status; 377 return status;
437} 378}
438 379
diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h
index a0b88dcb..d0e82173 100644
--- a/drivers/gpu/nvgpu/clk/clk.h
+++ b/drivers/gpu/nvgpu/clk/clk.h
@@ -119,6 +119,5 @@ u32 clk_domain_get_f_points(
119 u32 *fpointscount, 119 u32 *fpointscount,
120 u16 *freqpointsinmhz 120 u16 *freqpointsinmhz
121); 121);
122int clk_set_boot_fll_clk(struct gk20a *g);
123int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk); 122int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk);
124#endif 123#endif
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 98b7cb5f..f868100b 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -18,9 +18,17 @@
18#include <linux/anon_inodes.h> 18#include <linux/anon_inodes.h>
19#include <linux/nvgpu.h> 19#include <linux/nvgpu.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/spinlock.h>
21 22
22#include "clk/clk_arb.h" 23#include "clk/clk_arb.h"
23 24
25
26#define MAX_F_POINTS 127
27
28#ifdef CONFIG_DEBUG_FS
29static int nvgpu_clk_arb_debugfs_init(struct gk20a *g);
30#endif
31
24static int nvgpu_clk_arb_release_event_dev(struct inode *inode, 32static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
25 struct file *filp); 33 struct file *filp);
26static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, 34static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
@@ -28,21 +36,57 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
28static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait); 36static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait);
29 37
30static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work); 38static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work);
39static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work);
40static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *);
41
42struct nvgpu_clk_vf_point {
43 u16 mhz;
44 u32 uvolt;
45 u32 uvolt_sram;
46};
31 47
32struct nvgpu_clk_arb { 48struct nvgpu_clk_arb {
33 struct mutex req_lock; 49 spinlock_t sessions_lock;
34 struct mutex users_lock; 50 spinlock_t users_lock;
51 spinlock_t req_lock;
52
35 struct list_head users; 53 struct list_head users;
54 struct list_head sessions;
36 struct list_head requests; 55 struct list_head requests;
37 56
38 u64 gpc2clk_current_hz; 57 struct gk20a *g;
39 u64 gpc2clk_target_hz; 58 spinlock_t data_lock;
40 u64 gpc2clk_default_hz; 59 spinlock_t vf_lock;
41 u64 mclk_current_hz; 60
42 u64 mclk_target_hz; 61 u16 gpc2clk_actual_mhz;
43 u64 mclk_default_hz; 62 u16 gpc2clk_default_mhz;
44 atomic_t usercount; 63
64 u16 mclk_actual_mhz;
65 u16 mclk_default_mhz;
66 u32 voltuv_actual;
67
45 struct work_struct update_fn_work; 68 struct work_struct update_fn_work;
69 struct work_struct vftable_fn_work;
70 wait_queue_head_t vftable_wq;
71
72 u16 *mclk_f_points;
73 bool vftable_set;
74
75 struct nvgpu_clk_vf_point *mclk_vf_points;
76 u32 mclk_f_numpoints;
77 u16 *gpc2clk_f_points;
78 u32 gpc2clk_f_numpoints;
79 struct nvgpu_clk_vf_point *gpc2clk_vf_points;
80
81#ifdef CONFIG_DEBUG_FS
82 struct mutex debug_lock;
83 s64 switch_max;
84 s64 switch_min;
85 u64 switch_num;
86 s64 switch_avg;
87 s64 switch_std;
88 bool debugfs_set;
89#endif
46}; 90};
47 91
48 92
@@ -51,15 +95,20 @@ struct nvgpu_clk_dev {
51 struct list_head link; 95 struct list_head link;
52 wait_queue_head_t readout_wq; 96 wait_queue_head_t readout_wq;
53 atomic_t poll_mask; 97 atomic_t poll_mask;
98 u16 gpc2clk_target_mhz;
99 u16 mclk_target_mhz;
54}; 100};
55 101
56struct nvgpu_clk_session { 102struct nvgpu_clk_session {
57 bool zombie; 103 bool zombie;
58 struct gk20a *g; 104 struct gk20a *g;
59 struct kref refcount; 105 struct kref refcount;
106 struct list_head link;
107 struct list_head targets;
60 108
61 u64 gpc2clk_target_hz; 109 spinlock_t target_lock;
62 u64 mclk_target_hz; 110 u16 gpc2clk_target_mhz;
111 u16 mclk_target_mhz;
63}; 112};
64 113
65static const struct file_operations completion_dev_ops = { 114static const struct file_operations completion_dev_ops = {
@@ -77,7 +126,7 @@ static const struct file_operations event_dev_ops = {
77int nvgpu_clk_arb_init_arbiter(struct gk20a *g) 126int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
78{ 127{
79 struct nvgpu_clk_arb *arb; 128 struct nvgpu_clk_arb *arb;
80 u64 default_hz; 129 u16 default_mhz;
81 int err; 130 int err;
82 131
83 gk20a_dbg_fn(""); 132 gk20a_dbg_fn("");
@@ -86,39 +135,104 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
86 return 0; 135 return 0;
87 136
88 arb = kzalloc(sizeof(struct nvgpu_clk_arb), GFP_KERNEL); 137 arb = kzalloc(sizeof(struct nvgpu_clk_arb), GFP_KERNEL);
89 if (!arb) 138 if (!arb) {
90 return -ENOMEM; 139 err = -ENOMEM;
140 goto init_fail;
141 }
142
143 arb->gpc2clk_f_numpoints = MAX_F_POINTS;
144 arb->mclk_f_numpoints = MAX_F_POINTS;
145
146 arb->gpc2clk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
147 if (!arb->gpc2clk_f_points) {
148 err = -ENOMEM;
149 goto init_fail;
150 }
151
152 arb->mclk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
153 if (!arb->mclk_f_points) {
154 err = -ENOMEM;
155 goto init_fail;
156 }
157
158 arb->gpc2clk_vf_points = kcalloc(MAX_F_POINTS,
159 sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL);
160 if (!arb->gpc2clk_vf_points) {
161 err = -ENOMEM;
162 goto init_fail;
163 }
164
165 arb->mclk_vf_points = kcalloc(MAX_F_POINTS,
166 sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL);
167 if (!arb->mclk_vf_points) {
168 err = -ENOMEM;
169 goto init_fail;
170 }
91 171
92 g->clk_arb = arb; 172 g->clk_arb = arb;
173 arb->g = g;
93 174
94 mutex_init(&arb->req_lock); 175 spin_lock_init(&arb->sessions_lock);
95 mutex_init(&arb->users_lock); 176 spin_lock_init(&arb->users_lock);
177 spin_lock_init(&arb->req_lock);
178 spin_lock_init(&arb->data_lock);
179 spin_lock_init(&arb->vf_lock);
96 180
97 err = g->ops.clk_arb.get_arbiter_clk_default(g, 181 err = g->ops.clk_arb.get_arbiter_clk_default(g,
98 NVGPU_GPU_CLK_DOMAIN_MCLK, &default_hz); 182 NVGPU_GPU_CLK_DOMAIN_MCLK, &default_mhz);
99 if (err) 183 if (err) {
100 return -EINVAL; 184 err = -EINVAL;
185 goto init_fail;
186 }
101 187
102 arb->mclk_target_hz = default_hz; 188 arb->mclk_default_mhz = default_mhz;
103 arb->mclk_current_hz = default_hz;
104 arb->mclk_default_hz = default_hz;
105 189
106 err = g->ops.clk_arb.get_arbiter_clk_default(g, 190 err = g->ops.clk_arb.get_arbiter_clk_default(g,
107 NVGPU_GPU_CLK_DOMAIN_GPC2CLK, &default_hz); 191 NVGPU_GPU_CLK_DOMAIN_GPC2CLK, &default_mhz);
108 if (err) 192 if (err) {
109 return -EINVAL; 193 err = -EINVAL;
110 194 goto init_fail;
111 arb->gpc2clk_target_hz = default_hz; 195 }
112 arb->gpc2clk_current_hz = default_hz;
113 arb->gpc2clk_default_hz = default_hz;
114 196
115 atomic_set(&arb->usercount, 0); 197 arb->gpc2clk_default_mhz = default_mhz;
116 198
117 INIT_LIST_HEAD(&arb->users); 199 INIT_LIST_HEAD(&arb->users);
200 INIT_LIST_HEAD(&arb->sessions);
118 INIT_LIST_HEAD(&arb->requests); 201 INIT_LIST_HEAD(&arb->requests);
202
203 init_waitqueue_head(&arb->vftable_wq);
204
205 INIT_WORK(&arb->vftable_fn_work, nvgpu_clk_arb_run_vftable_cb);
206
119 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb); 207 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
120 208
209#ifdef CONFIG_DEBUG_FS
210 mutex_init(&arb->debug_lock);
211 if (!arb->debugfs_set) {
212 if (nvgpu_clk_arb_debugfs_init(g))
213 arb->debugfs_set = true;
214 }
215#endif
216 err = nvgpu_clk_arb_update_vftable(arb);
217 if (err < 0)
218 goto init_fail;
219
220 /* Schedule first run */
221 schedule_work(&arb->update_fn_work);
222
121 return 0; 223 return 0;
224
225init_fail:
226
227 kfree(arb->gpc2clk_f_points);
228 kfree(arb->gpc2clk_vf_points);
229
230 kfree(arb->mclk_f_points);
231 kfree(arb->mclk_vf_points);
232
233 kfree(arb);
234
235 return err;
122} 236}
123 237
124void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g) 238void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
@@ -170,6 +284,7 @@ static int nvgpu_clk_arb_install_fd(struct gk20a *g,
170fail: 284fail:
171 kfree(dev); 285 kfree(dev);
172 put_unused_fd(fd); 286 put_unused_fd(fd);
287
173 return err; 288 return err;
174} 289}
175 290
@@ -190,12 +305,16 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
190 session->g = g; 305 session->g = g;
191 306
192 kref_init(&session->refcount); 307 kref_init(&session->refcount);
193 308 spin_lock_init(&session->target_lock);
194 atomic_inc(&arb->usercount);
195 309
196 session->zombie = false; 310 session->zombie = false;
197 session->mclk_target_hz = arb->mclk_default_hz; 311 session->mclk_target_mhz = arb->mclk_default_mhz;
198 session->gpc2clk_target_hz = arb->gpc2clk_default_hz; 312 session->gpc2clk_target_mhz = arb->gpc2clk_default_mhz;
313 INIT_LIST_HEAD(&session->targets);
314
315 spin_lock(&arb->sessions_lock);
316 list_add_tail(&session->link, &arb->sessions);
317 spin_unlock(&arb->sessions_lock);
199 318
200 *_session = session; 319 *_session = session;
201 320
@@ -206,8 +325,15 @@ void nvgpu_clk_arb_free_session(struct kref *refcount)
206{ 325{
207 struct nvgpu_clk_session *session = container_of(refcount, 326 struct nvgpu_clk_session *session = container_of(refcount,
208 struct nvgpu_clk_session, refcount); 327 struct nvgpu_clk_session, refcount);
328 struct nvgpu_clk_arb *arb = session->g->clk_arb;
209 329
330 gk20a_dbg_fn("");
331
332 spin_lock(&arb->sessions_lock);
333 list_del(&session->link);
334 spin_unlock(&arb->sessions_lock);
210 kfree(session); 335 kfree(session);
336;
211} 337}
212 338
213void nvgpu_clk_arb_release_session(struct gk20a *g, 339void nvgpu_clk_arb_release_session(struct gk20a *g,
@@ -215,12 +341,12 @@ void nvgpu_clk_arb_release_session(struct gk20a *g,
215{ 341{
216 struct nvgpu_clk_arb *arb = g->clk_arb; 342 struct nvgpu_clk_arb *arb = g->clk_arb;
217 343
344 gk20a_dbg_fn("");
345
218 session->zombie = true; 346 session->zombie = true;
219 kref_put(&session->refcount, nvgpu_clk_arb_free_session); 347 kref_put(&session->refcount, nvgpu_clk_arb_free_session);
220 348
221 /* schedule arbiter if no more user */ 349 schedule_work(&arb->update_fn_work);
222 if (!atomic_dec_and_test(&arb->usercount))
223 schedule_work(&arb->update_fn_work);
224} 350}
225 351
226int nvgpu_clk_arb_install_event_fd(struct gk20a *g, 352int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
@@ -230,19 +356,155 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
230 struct nvgpu_clk_dev *dev; 356 struct nvgpu_clk_dev *dev;
231 int fd; 357 int fd;
232 358
359 gk20a_dbg_fn("");
360
233 fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev); 361 fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
234 if (fd < 0) 362 if (fd < 0)
235 return fd; 363 return fd;
236 364
237 mutex_lock(&arb->users_lock); 365 spin_lock(&arb->users_lock);
238 list_add_tail(&dev->link, &arb->users); 366 list_add_tail(&dev->link, &arb->users);
239 mutex_unlock(&arb->users_lock); 367 spin_unlock(&arb->users_lock);
240 368
241 *event_fd = fd; 369 *event_fd = fd;
242 370
243 return 0; 371 return 0;
244} 372}
245 373
374int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
375 struct nvgpu_clk_session *session, int *request_fd)
376{
377 struct nvgpu_clk_dev *dev;
378 int fd;
379
380 gk20a_dbg_fn("");
381
382 fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
383 if (fd < 0)
384 return fd;
385
386 *request_fd = fd;
387
388 return 0;
389}
390
391static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *arb)
392{
393 struct gk20a *g = arb->g;
394
395 int i;
396 int status = 0;
397 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
398 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
399
400 /* the flag must be visible in all threads */
401 mb();
402 ACCESS_ONCE(arb->vftable_set) = false;
403
404 spin_lock(&arb->vf_lock);
405
406 if (!clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK,
407 &arb->gpc2clk_f_numpoints, arb->gpc2clk_f_points) < 0) {
408 gk20a_err(dev_from_gk20a(g),
409 "failed to fetch GPC2CLK frequency points");
410 goto exit_vftable;
411 }
412 if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_MCLK,
413 &arb->mclk_f_numpoints, arb->mclk_f_points) < 0) {
414 gk20a_err(dev_from_gk20a(g),
415 "failed to fetch MCLK frequency points");
416 goto exit_vftable;
417 }
418
419
420 memset(arb->mclk_vf_points, 0,
421 arb->mclk_f_numpoints*sizeof(struct nvgpu_clk_vf_point));
422 memset(arb->gpc2clk_vf_points, 0,
423 arb->gpc2clk_f_numpoints*sizeof(struct nvgpu_clk_vf_point));
424
425 for (i = 0 ; i < arb->mclk_f_numpoints; i++) {
426 arb->mclk_vf_points[i].mhz = arb->mclk_f_points[i];
427 mclk_voltuv = mclk_voltuv_sram = 0;
428
429 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
430 &arb->mclk_vf_points[i].mhz, &mclk_voltuv,
431 CTRL_VOLT_DOMAIN_LOGIC);
432 if (status < 0) {
433 gk20a_err(dev_from_gk20a(g),
434 "failed to get MCLK LOGIC voltage");
435 goto exit_vftable;
436 }
437 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
438 &arb->mclk_vf_points[i].mhz, &mclk_voltuv_sram,
439 CTRL_VOLT_DOMAIN_SRAM);
440 if (status < 0) {
441 gk20a_err(dev_from_gk20a(g),
442 "failed to get MCLK SRAM voltage");
443 goto exit_vftable;
444 }
445
446 arb->mclk_vf_points[i].uvolt = mclk_voltuv;
447 arb->mclk_vf_points[i].uvolt_sram = mclk_voltuv_sram;
448 }
449
450 for (i = 0 ; i < arb->gpc2clk_f_numpoints; i++) {
451 arb->gpc2clk_vf_points[i].mhz = arb->gpc2clk_f_points[i];
452 gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
453
454 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
455 &arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv,
456 CTRL_VOLT_DOMAIN_LOGIC);
457 if (status < 0) {
458 gk20a_err(dev_from_gk20a(g),
459 "failed to get GPC2CLK LOGIC voltage");
460 goto exit_vftable;
461 }
462 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
463 &arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv_sram,
464 CTRL_VOLT_DOMAIN_SRAM);
465 if (status < 0) {
466 gk20a_err(dev_from_gk20a(g),
467 "failed to get GPC2CLK SRAM voltage");
468 goto exit_vftable;
469 }
470
471 arb->gpc2clk_vf_points[i].uvolt = gpc2clk_voltuv;
472 arb->gpc2clk_vf_points[i].uvolt_sram = gpc2clk_voltuv_sram;
473
474 }
475
476 /* make flag visible when all data has resolved in the tables */
477 wmb();
478 ACCESS_ONCE(arb->vftable_set) = true;
479
480 wake_up(&arb->vftable_wq);
481exit_vftable:
482
483 spin_unlock(&arb->vf_lock);
484
485 return status;
486}
487
488void nvgpu_clk_arb_schedule_vftable_update(struct gk20a *g)
489{
490 struct nvgpu_clk_arb *arb = g->clk_arb;
491
492 ACCESS_ONCE(arb->vftable_set) = false;
493 /* Disable the flag in case arbiter gets scheduled first */
494 mb();
495
496 schedule_work(&arb->vftable_fn_work);
497 schedule_work(&arb->update_fn_work);
498}
499
500static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work)
501{
502 struct nvgpu_clk_arb *arb =
503 container_of(work, struct nvgpu_clk_arb, update_fn_work);
504
505 nvgpu_clk_arb_update_vftable(arb);
506}
507
246static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) 508static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
247{ 509{
248 struct nvgpu_clk_arb *arb = 510 struct nvgpu_clk_arb *arb =
@@ -250,67 +512,270 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
250 struct nvgpu_clk_session *session; 512 struct nvgpu_clk_session *session;
251 struct nvgpu_clk_dev *dev; 513 struct nvgpu_clk_dev *dev;
252 struct nvgpu_clk_dev *tmp; 514 struct nvgpu_clk_dev *tmp;
515 struct gk20a *g = arb->g;
516
517 struct change_fll_clk fllclk;
518 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
519 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
520
521 u32 voltuv, voltuv_sram;
522
523 int status;
524
525 /* Temporary variables for checking target frequency */
526 u16 gpc2clk_target, mclk_target;
253 527
254 mutex_lock(&arb->req_lock); 528 /* iteration index */
529 u32 index;
255 530
256 arb->mclk_target_hz = arb->mclk_default_hz; 531#ifdef CONFIG_DEBUG_FS
257 arb->gpc2clk_target_hz = arb->gpc2clk_default_hz; 532 u64 t0, t1;
533#endif
258 534
259 list_for_each_entry(dev, &arb->requests, link) { 535 gk20a_dbg_fn("");
260 session = dev->session; 536
537#ifdef CONFIG_DEBUG_FS
538 g->ops.read_ptimer(g, &t0);
539#endif
540
541 /* Only one arbiter should be running */
542 gpc2clk_target = 0;
543 mclk_target = 0;
544
545 spin_lock(&arb->sessions_lock);
546 list_for_each_entry(session, &arb->sessions, link) {
261 if (!session->zombie) { 547 if (!session->zombie) {
262 /* TODO: arbiter policy. For now last request wins */ 548 spin_lock(&arb->req_lock);
549 spin_lock(&session->target_lock);
550
551 mclk_target = mclk_target > session->mclk_target_mhz ?
552 mclk_target : session->mclk_target_mhz;
553
554 gpc2clk_target =
555 gpc2clk_target > session->gpc2clk_target_mhz ?
556 gpc2clk_target : session->gpc2clk_target_mhz;
557 /* Move processed requests to notification list*/
558 list_for_each_entry_safe(dev, tmp, &session->targets,
559 link) {
560 list_del_init(&dev->link);
561 list_add_tail(&dev->link, &arb->requests);
562 }
563 spin_unlock(&session->target_lock);
564 spin_unlock(&arb->req_lock);
565
566 }
567 }
568 spin_unlock(&arb->sessions_lock);
569
570 gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target :
571 arb->gpc2clk_actual_mhz ? gpc2clk_target :
572 arb->gpc2clk_default_mhz;
263 573
264 arb->mclk_target_hz = session->mclk_target_hz; 574 mclk_target = (mclk_target > 0) ? mclk_target :
265 arb->gpc2clk_target_hz = session->gpc2clk_target_hz; 575 arb->mclk_actual_mhz ? mclk_target :
576 arb->mclk_default_mhz;
577
578 if (!gpc2clk_target && !mclk_target) {
579 mclk_target = arb->mclk_default_mhz;
580 gpc2clk_target = arb->gpc2clk_default_mhz;
581 }
582
583 if (!gpc2clk_target)
584 gpc2clk_target = arb->gpc2clk_actual_mhz;
585
586 do {
587 /* Check that the table is set */
588 mb();
589 wait_event(arb->vftable_wq, arb->vftable_set);
590 } while (!ACCESS_ONCE(arb->vftable_set));
591
592 spin_lock(&arb->vf_lock);
593 /* round up the freq requests */
594 for (index = 0; index < arb->gpc2clk_f_numpoints; index++) {
595 if (arb->gpc2clk_vf_points[index].mhz >= gpc2clk_target) {
596 gpc2clk_target = arb->gpc2clk_vf_points[index].mhz;
597 gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt;
598 gpc2clk_voltuv_sram =
599 arb->gpc2clk_vf_points[index].uvolt_sram;
600 break;
266 } 601 }
267 } 602 }
268 603
269 /* TODO: loop up higher or equal VF points */ 604 if (index == arb->gpc2clk_f_numpoints) {
605 gpc2clk_target = arb->gpc2clk_vf_points[index].mhz;
606 gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt;
607 gpc2clk_voltuv_sram =
608 arb->gpc2clk_vf_points[index].uvolt_sram;
609 }
610
611 if (!mclk_target)
612 mclk_target = arb->mclk_actual_mhz;
613
614 for (index = 0; index < arb->mclk_f_numpoints; index++) {
615 if (arb->mclk_vf_points[index].mhz >= mclk_target) {
616 mclk_target = arb->mclk_vf_points[index].mhz;
617 mclk_voltuv = arb->mclk_vf_points[index].uvolt;
618 mclk_voltuv_sram =
619 arb->mclk_vf_points[index].uvolt_sram;
620 break;
621 }
622 }
623 if (index == arb->mclk_f_numpoints) {
624 mclk_target = arb->mclk_vf_points[index].mhz;
625 mclk_voltuv = arb->mclk_vf_points[index].uvolt;
626 mclk_voltuv_sram =
627 arb->mclk_vf_points[index].uvolt_sram;
628 }
629 spin_unlock(&arb->vf_lock);
630
631 /* Program clocks */
632 /* A change in both mclk of gpc2clk may require a change in voltage */
633 if ((arb->gpc2clk_actual_mhz == gpc2clk_target) &&
634 (arb->mclk_actual_mhz == mclk_target)) {
635 goto exit_arb;
636 }
637
638 voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
639 voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
640 gpc2clk_voltuv_sram : mclk_voltuv_sram;
641
642 /* if voltage ascends we do:
643 * (1) FLL change
644 * (2) Voltage change
645 * (3) MCLK change
646 * If it goes down
647 * (1) MCLK change
648 * (2) Voltage change
649 * (3) FLL change
650 */
651
652 /* descending */
653 if (voltuv <= arb->voltuv_actual) {
654 status = g->clk_pmu.clk_mclk.change(g, mclk_target);
655 if (status < 0)
656 goto exit_arb;
657
658 status = volt_set_voltage(g, voltuv, voltuv_sram);
659 if (status < 0)
660 goto exit_arb;
661
662 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
663 fllclk.clkmhz = gpc2clk_target;
664 fllclk.voltuv = voltuv;
665 status = clk_program_fll_clks(g, &fllclk);
666 if (status < 0)
667 goto exit_arb;
668 } else {
669 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
670 fllclk.clkmhz = gpc2clk_target;
671 fllclk.voltuv = voltuv;
672 status = clk_program_fll_clks(g, &fllclk);
673 if (status < 0)
674 goto exit_arb;
675
676 status = volt_set_voltage(g, voltuv, voltuv_sram);
677 if (status < 0)
678 goto exit_arb;
679
680 status = g->clk_pmu.clk_mclk.change(g, mclk_target);
681 if (status < 0)
682 goto exit_arb;
683 }
684
685 spin_lock(&arb->data_lock);
686 arb->gpc2clk_actual_mhz = gpc2clk_target;
687 arb->mclk_actual_mhz = mclk_target;
688 arb->voltuv_actual = voltuv;
689 /* Make changes visible to other threads */
690 wmb();
691
692 spin_unlock(&arb->data_lock);
693
694#ifdef CONFIG_DEBUG_FS
695 g->ops.read_ptimer(g, &t1);
696 arb->switch_num++;
697
698 mutex_lock(&arb->debug_lock);
699 if (arb->switch_num == 1) {
700 arb->switch_max = arb->switch_min =
701 arb->switch_avg = (t1-t0)/1000;
702 arb->switch_std = 0;
703 } else {
704 s64 prev_avg;
705 u64 curr = (t1-t0)/1000;
706
707 arb->switch_max = curr > arb->switch_max ?
708 curr : arb->switch_max;
709 arb->switch_min = arb->switch_min ?
710 (curr < arb->switch_min ?
711 curr : arb->switch_min) : curr;
712 prev_avg = arb->switch_avg;
713 arb->switch_avg = (curr +
714 (arb->switch_avg * (arb->switch_num-1))) /
715 arb->switch_num;
716 arb->switch_std +=
717 (curr - arb->switch_avg) * (curr - prev_avg);
718 }
719 mutex_unlock(&arb->debug_lock);
270 720
271 arb->mclk_current_hz = arb->mclk_target_hz; 721#endif
272 arb->gpc2clk_current_hz = arb->gpc2clk_target_hz;
273 722
274 /* TODO: actually program the clocks */ 723exit_arb:
275 724
725 spin_lock(&arb->req_lock);
276 /* notify completion for all requests */ 726 /* notify completion for all requests */
277 list_for_each_entry_safe(dev, tmp, &arb->requests, link) { 727 list_for_each_entry_safe(dev, tmp, &arb->requests, link) {
278 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); 728 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
279 wake_up_interruptible(&dev->readout_wq); 729 wake_up_interruptible(&dev->readout_wq);
280 list_del_init(&dev->link); 730 list_del_init(&dev->link);
281 } 731 }
282 mutex_unlock(&arb->req_lock); 732 spin_unlock(&arb->req_lock);
283 733
284 /* notify event for all users */ 734 /* notify event for all users */
285 mutex_lock(&arb->users_lock); 735 spin_lock(&arb->users_lock);
286 list_for_each_entry(dev, &arb->users, link) { 736 list_for_each_entry(dev, &arb->users, link) {
287 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); 737 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
288 wake_up_interruptible(&dev->readout_wq); 738 wake_up_interruptible(&dev->readout_wq);
289 } 739 }
290 mutex_unlock(&arb->users_lock); 740 spin_unlock(&arb->users_lock);
291
292} 741}
293 742
294int nvgpu_clk_arb_apply_session_constraints(struct gk20a *g, 743int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
295 struct nvgpu_clk_session *session, int *completion_fd) 744 struct nvgpu_clk_session *session, int request_fd)
296{ 745{
297 struct nvgpu_clk_arb *arb = g->clk_arb; 746 struct nvgpu_clk_arb *arb = g->clk_arb;
298 struct nvgpu_clk_dev *dev; 747 struct nvgpu_clk_dev *dev;
299 int fd; 748 struct fd fd;
749 int err = 0;
300 750
301 fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev); 751 gk20a_dbg_fn("");
302 if (fd < 0) 752
303 return fd; 753 fd = fdget(request_fd);
754
755 if (!fd.file)
756 return -EINVAL;
757
758 dev = (struct nvgpu_clk_dev *) fd.file->private_data;
304 759
305 *completion_fd = fd; 760 if (!dev || dev->session != session) {
761 err = -EINVAL;
762 goto fdput_fd;
763 }
764 spin_lock(&session->target_lock);
765 session->mclk_target_mhz = dev->mclk_target_mhz ? dev->mclk_target_mhz :
766 session->mclk_target_mhz;
767 session->gpc2clk_target_mhz = dev->gpc2clk_target_mhz ?
768 dev->gpc2clk_target_mhz :
769 session->gpc2clk_target_mhz;
306 770
307 mutex_lock(&arb->req_lock); 771 list_add_tail(&dev->link, &session->targets);
308 list_add_tail(&dev->link, &arb->requests); 772 spin_unlock(&session->target_lock);
309 mutex_unlock(&arb->req_lock);
310 773
311 schedule_work(&arb->update_fn_work); 774 schedule_work(&arb->update_fn_work);
312 775
313 return 0; 776fdput_fd:
777 fdput(fd);
778 return err;
314} 779}
315 780
316static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait) 781static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
@@ -328,11 +793,22 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
328{ 793{
329 struct nvgpu_clk_dev *dev = filp->private_data; 794 struct nvgpu_clk_dev *dev = filp->private_data;
330 struct nvgpu_clk_session *session = dev->session; 795 struct nvgpu_clk_session *session = dev->session;
796 struct nvgpu_clk_arb *arb;
797
798 arb = session->g->clk_arb;
331 799
332 gk20a_dbg_fn(""); 800 gk20a_dbg_fn("");
333 801
802 spin_lock(&arb->req_lock);
803 spin_lock(&session->target_lock);
804 if (!list_empty(&dev->link))
805 list_del_init(&dev->link);
806 spin_unlock(&session->target_lock);
807 spin_unlock(&arb->req_lock);
808
334 kref_put(&session->refcount, nvgpu_clk_arb_free_session); 809 kref_put(&session->refcount, nvgpu_clk_arb_free_session);
335 kfree(dev); 810 kfree(dev);
811
336 return 0; 812 return 0;
337} 813}
338 814
@@ -341,94 +817,123 @@ static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
341{ 817{
342 struct nvgpu_clk_dev *dev = filp->private_data; 818 struct nvgpu_clk_dev *dev = filp->private_data;
343 struct nvgpu_clk_session *session = dev->session; 819 struct nvgpu_clk_session *session = dev->session;
344 struct nvgpu_clk_arb *arb = session->g->clk_arb; 820 struct nvgpu_clk_arb *arb;
821
822 arb = session->g->clk_arb;
345 823
346 gk20a_dbg_fn(""); 824 gk20a_dbg_fn("");
347 825
348 mutex_lock(&arb->users_lock); 826 spin_lock(&arb->users_lock);
349 list_del_init(&dev->link); 827 list_del(&dev->link);
350 mutex_unlock(&arb->users_lock); 828 spin_unlock(&arb->users_lock);
351 829
352 kref_put(&session->refcount, nvgpu_clk_arb_free_session); 830 kref_put(&session->refcount, nvgpu_clk_arb_free_session);
353 kfree(dev); 831 kfree(dev);
832
354 return 0; 833 return 0;
355} 834}
356 835
357int nvgpu_clk_arb_set_session_target_hz(struct nvgpu_clk_session *session, 836int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
358 u32 api_domain, u64 target_hz) 837 int request_fd, u32 api_domain, u16 target_mhz)
359{ 838{
839 struct nvgpu_clk_dev *dev;
840 struct fd fd;
841 int err = 0;
360 842
361 gk20a_dbg_fn("domain=0x%08x target_hz=%llu", api_domain, target_hz); 843 gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz);
844
845 fd = fdget(request_fd);
846
847 if (!fd.file)
848 return -EINVAL;
849
850 dev = fd.file->private_data;
851 if (!dev || dev->session != session) {
852 err = -EINVAL;
853 goto fdput_fd;
854 }
362 855
363 switch (api_domain) { 856 switch (api_domain) {
364 case NVGPU_GPU_CLK_DOMAIN_MCLK: 857 case NVGPU_GPU_CLK_DOMAIN_MCLK:
365 session->mclk_target_hz = target_hz; 858 dev->mclk_target_mhz = target_mhz;
366 return 0; 859 break;
367 860
368 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: 861 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
369 session->gpc2clk_target_hz = target_hz; 862 dev->gpc2clk_target_mhz = target_mhz;
370 return 0; 863 break;
371 864
372 default: 865 default:
373 return -EINVAL; 866 err = -EINVAL;
374 } 867 }
868
869fdput_fd:
870 fdput(fd);
871 return err;
375} 872}
376 873
377int nvgpu_clk_arb_get_session_target_hz(struct nvgpu_clk_session *session, 874int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
378 u32 api_domain, u64 *freq_hz) 875 u32 api_domain, u16 *freq_mhz)
379{ 876{
877 int err = 0;
878
879 spin_lock(&session->target_lock);
880
380 switch (api_domain) { 881 switch (api_domain) {
381 case NVGPU_GPU_CLK_DOMAIN_MCLK: 882 case NVGPU_GPU_CLK_DOMAIN_MCLK:
382 *freq_hz = session->mclk_target_hz; 883 *freq_mhz = session->mclk_target_mhz;
383 return 0; 884 break;
384 885
385 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: 886 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
386 *freq_hz = session->gpc2clk_target_hz; 887 *freq_mhz = session->gpc2clk_target_mhz;
387 return 0; 888 break;
388 889
389 default: 890 default:
390 *freq_hz = 0; 891 *freq_mhz = 0;
391 return -EINVAL; 892 err = -EINVAL;
392 } 893 }
894
895 spin_unlock(&session->target_lock);
896 return err;
393} 897}
394 898
395int nvgpu_clk_arb_get_arbiter_actual_hz(struct gk20a *g, 899int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
396 u32 api_domain, u64 *freq_hz) 900 u32 api_domain, u16 *freq_mhz)
397{ 901{
398 struct nvgpu_clk_arb *arb = g->clk_arb; 902 struct nvgpu_clk_arb *arb = g->clk_arb;
399 int err = 0; 903 int err = 0;
400 904
401 mutex_lock(&arb->req_lock); 905 spin_lock(&arb->data_lock);
906
402 switch (api_domain) { 907 switch (api_domain) {
403 case NVGPU_GPU_CLK_DOMAIN_MCLK: 908 case NVGPU_GPU_CLK_DOMAIN_MCLK:
404 *freq_hz = arb->mclk_current_hz; 909 *freq_mhz = arb->mclk_actual_mhz;
405 break; 910 break;
406 911
407 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: 912 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
408 *freq_hz = arb->gpc2clk_current_hz; 913 *freq_mhz = arb->gpc2clk_actual_mhz;
409 break; 914 break;
410 915
411 default: 916 default:
412 *freq_hz = 0; 917 *freq_mhz = 0;
413 err = -EINVAL; 918 err = -EINVAL;
414 } 919 }
415 mutex_unlock(&arb->req_lock);
416 920
921 spin_unlock(&arb->data_lock);
417 return err; 922 return err;
418} 923}
419 924
420int nvgpu_clk_arb_get_arbiter_effective_hz(struct gk20a *g, 925int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g,
421 u32 api_domain, u64 *freq_hz) 926 u32 api_domain, u16 *freq_mhz)
422{ 927{
423 /* TODO: measure clocks from counters */ 928 /* TODO: measure clocks from counters */
424 return nvgpu_clk_arb_get_arbiter_actual_hz(g, api_domain, freq_hz); 929 return nvgpu_clk_arb_get_arbiter_actual_mhz(g, api_domain, freq_mhz);
425} 930}
426 931
427int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, 932int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
428 u64 *min_hz, u64 *max_hz) 933 u16 *min_mhz, u16 *max_mhz)
429{ 934{
430 return g->ops.clk_arb.get_arbiter_clk_range(g, api_domain, 935 return g->ops.clk_arb.get_arbiter_clk_range(g, api_domain,
431 min_hz, max_hz); 936 min_mhz, max_mhz);
432} 937}
433 938
434u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) 939u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
@@ -441,3 +946,67 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
441{ 946{
442 return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints); 947 return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints);
443} 948}
949
950#ifdef CONFIG_DEBUG_FS
951static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
952{
953 struct gk20a *g = s->private;
954 struct nvgpu_clk_arb *arb = g->clk_arb;
955 u64 num;
956 s64 tmp, avg, std, max, min;
957
958 /* Make copy of structure to reduce time with lock held */
959 mutex_lock(&arb->debug_lock);
960 std = arb->switch_std;
961 avg = arb->switch_avg;
962 max = arb->switch_max;
963 min = arb->switch_min;
964 num = arb->switch_num;
965 mutex_unlock(&arb->debug_lock);
966
967 tmp = std;
968 do_div(tmp, num);
969 seq_printf(s, "Number of transitions: %lld\n",
970 num);
971 seq_printf(s, "max / min : %lld / %lld usec\n",
972 max, min);
973 seq_printf(s, "avg / std : %lld / %ld usec\n",
974 avg, int_sqrt(tmp));
975
976 return 0;
977}
978
979static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
980{
981 return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
982}
983
984static const struct file_operations nvgpu_clk_arb_stats_fops = {
985 .open = nvgpu_clk_arb_stats_open,
986 .read = seq_read,
987 .llseek = seq_lseek,
988 .release = single_release,
989};
990
991
992static int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
993{
994 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
995
996 struct dentry *gpu_root = platform->debugfs;
997 struct dentry *d;
998
999 gk20a_dbg(gpu_dbg_info, "g=%p", g);
1000
1001 d = debugfs_create_file(
1002 "arb_stats",
1003 S_IRUGO,
1004 gpu_root,
1005 g,
1006 &nvgpu_clk_arb_stats_fops);
1007 if (!d)
1008 return -ENOMEM;
1009
1010 return 0;
1011}
1012#endif
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.h b/drivers/gpu/nvgpu/clk/clk_arb.h
index 95749369..717cca9b 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.h
+++ b/drivers/gpu/nvgpu/clk/clk_arb.h
@@ -22,13 +22,13 @@ struct nvgpu_clk_session;
22int nvgpu_clk_arb_init_arbiter(struct gk20a *g); 22int nvgpu_clk_arb_init_arbiter(struct gk20a *g);
23 23
24int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, 24int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
25 u64 *min_hz, u64 *max_hz); 25 u16 *min_mhz, u16 *max_mhz);
26 26
27int nvgpu_clk_arb_get_arbiter_actual_hz(struct gk20a *g, 27int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
28 u32 api_domain, u64 *actual_hz); 28 u32 api_domain, u16 *actual_mhz);
29 29
30int nvgpu_clk_arb_get_arbiter_effective_hz(struct gk20a *g, 30int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g,
31 u32 api_domain, u64 *actual_hz); 31 u32 api_domain, u16 *effective_mhz);
32 32
33int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, 33int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
34 u32 api_domain, u32 *max_points, u16 *fpoints); 34 u32 api_domain, u32 *max_points, u16 *fpoints);
@@ -46,19 +46,21 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
46void nvgpu_clk_arb_release_session(struct gk20a *g, 46void nvgpu_clk_arb_release_session(struct gk20a *g,
47 struct nvgpu_clk_session *session); 47 struct nvgpu_clk_session *session);
48 48
49int nvgpu_clk_arb_apply_session_constraints(struct gk20a *g, 49int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
50 struct nvgpu_clk_session *session, int *completion_fd); 50 struct nvgpu_clk_session *session, int request_fd);
51 51
52int nvgpu_clk_arb_set_session_target_hz(struct nvgpu_clk_session *session, 52int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
53 u32 api_domain, u64 target_hz); 53 int fd, u32 api_domain, u16 target_mhz);
54 54
55int nvgpu_clk_arb_get_session_target_hz(struct nvgpu_clk_session *session, 55int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
56 u32 api_domain, u64 *target_hz); 56 u32 api_domain, u16 *target_mhz);
57 57
58int nvgpu_clk_arb_install_event_fd(struct gk20a *g, 58int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
59 struct nvgpu_clk_session *session, int *event_fd); 59 struct nvgpu_clk_session *session, int *event_fd);
60 60
61int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
62 struct nvgpu_clk_session *session, int *event_fd);
61 63
62 64void nvgpu_clk_arb_schedule_vftable_update(struct gk20a *g);
63#endif /* _CLK_ARB_H_ */ 65#endif /* _CLK_ARB_H_ */
64 66
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c
index 86f4ff6d..6ad6c054 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.c
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.c
@@ -2222,7 +2222,7 @@ int clk_mclkseq_init_mclk_gddr5(struct gk20a *g)
2222 return 0; 2222 return 0;
2223} 2223}
2224 2224
2225int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed) 2225int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val)
2226{ 2226{
2227 struct clk_mclk_state *mclk; 2227 struct clk_mclk_state *mclk;
2228 struct pmu_payload payload = { {0} }; 2228 struct pmu_payload payload = { {0} };
@@ -2236,6 +2236,7 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed)
2236#ifdef CONFIG_DEBUG_FS 2236#ifdef CONFIG_DEBUG_FS
2237 u64 t0, t1; 2237 u64 t0, t1;
2238#endif 2238#endif
2239 enum gk20a_mclk_speed speed;
2239 2240
2240 gk20a_dbg_info(""); 2241 gk20a_dbg_info("");
2241 2242
@@ -2246,6 +2247,13 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed)
2246 if (!mclk->init) 2247 if (!mclk->init)
2247 goto exit_status; 2248 goto exit_status;
2248 2249
2250 /* TODO thia should be done according to VBIOS tables */
2251
2252 speed = (val <= MCLK_LOW_SPEED_LIMIT) ? gk20a_mclk_low_speed :
2253 (val <= MCLK_MID_SPEED_LIMIT) ? gk20a_mclk_mid_speed :
2254 gk20a_mclk_high_speed;
2255
2256
2249 if (speed == mclk->speed) 2257 if (speed == mclk->speed)
2250 goto exit_status; 2258 goto exit_status;
2251 2259
@@ -2374,20 +2382,13 @@ exit_status:
2374#ifdef CONFIG_DEBUG_FS 2382#ifdef CONFIG_DEBUG_FS
2375static int mclk_debug_speed_set(void *data, u64 val) 2383static int mclk_debug_speed_set(void *data, u64 val)
2376{ 2384{
2377 enum gk20a_mclk_speed speed;
2378 struct gk20a *g = (struct gk20a *) data; 2385 struct gk20a *g = (struct gk20a *) data;
2379 struct clk_mclk_state *mclk; 2386 struct clk_mclk_state *mclk;
2380 2387
2381 mclk = &g->clk_pmu.clk_mclk; 2388 mclk = &g->clk_pmu.clk_mclk;
2382 2389
2383 /* TODO thia should be done according to VBIOS tables */
2384
2385 speed = (val <= MCLK_LOW_SPEED_LIMIT) ? gk20a_mclk_low_speed :
2386 (val <= MCLK_MID_SPEED_LIMIT) ? gk20a_mclk_mid_speed :
2387 gk20a_mclk_high_speed;
2388
2389 if (mclk->change) 2390 if (mclk->change)
2390 return mclk->change(g, speed); 2391 return mclk->change(g, (u16) val);
2391 return 0; 2392 return 0;
2392 2393
2393} 2394}
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.h b/drivers/gpu/nvgpu/clk/clk_mclk.h
index 9d193c96..e3e6c1ee 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.h
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.h
@@ -22,9 +22,12 @@ enum gk20a_mclk_speed {
22 gk20a_mclk_high_speed, 22 gk20a_mclk_high_speed,
23}; 23};
24 24
25#define DEFAULT_BOOT_MCLK_SPEED gk20a_mclk_high_speed
26#define MCLK_LOW_SPEED_LIMIT 405 25#define MCLK_LOW_SPEED_LIMIT 405
27#define MCLK_MID_SPEED_LIMIT 810 26#define MCLK_MID_SPEED_LIMIT 810
27#define MCLK_HIGH_SPEED_LIMIT 3003
28
29#define DEFAULT_BOOT_MCLK_SPEED MCLK_HIGH_SPEED_LIMIT
30
28struct clk_mclk_state { 31struct clk_mclk_state {
29 enum gk20a_mclk_speed speed; 32 enum gk20a_mclk_speed speed;
30 struct mutex mclk_mutex; 33 struct mutex mclk_mutex;
@@ -32,7 +35,7 @@ struct clk_mclk_state {
32 bool init; 35 bool init;
33 36
34 /* function pointers */ 37 /* function pointers */
35 int (*change)(struct gk20a *g, enum gk20a_mclk_speed speed); 38 int (*change)(struct gk20a *g, u16 val);
36 39
37#ifdef CONFIG_DEBUG_FS 40#ifdef CONFIG_DEBUG_FS
38 s64 switch_max; 41 s64 switch_max;
@@ -45,7 +48,6 @@ struct clk_mclk_state {
45}; 48};
46 49
47int clk_mclkseq_init_mclk_gddr5(struct gk20a *g); 50int clk_mclkseq_init_mclk_gddr5(struct gk20a *g);
48int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, 51int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val);
49 enum gk20a_mclk_speed speed);
50 52
51#endif 53#endif
diff --git a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
index 112cb588..d1cbb32b 100644
--- a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
@@ -23,7 +23,7 @@ static u32 gp106_get_arbiter_clk_domains(struct gk20a *g)
23} 23}
24 24
25static int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, 25static int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
26 u64 *min_hz, u64 *max_hz) 26 u16 *min_mhz, u16 *max_mhz)
27{ 27{
28 enum nv_pmu_clk_clkwhich clkwhich; 28 enum nv_pmu_clk_clkwhich clkwhich;
29 struct clk_set_info *p0_info; 29 struct clk_set_info *p0_info;
@@ -52,14 +52,14 @@ static int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
52 if (!p0_info) 52 if (!p0_info)
53 return -EINVAL; 53 return -EINVAL;
54 54
55 *min_hz = (u64)(p5_info->min_mhz) * (u64)MHZ; 55 *min_mhz = p5_info->min_mhz;
56 *max_hz = (u64)(p0_info->max_mhz) * (u64)MHZ; 56 *max_mhz = p0_info->max_mhz;
57 57
58 return 0; 58 return 0;
59} 59}
60 60
61static int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, 61static int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain,
62 u64 *default_hz) 62 u16 *default_mhz)
63{ 63{
64 enum nv_pmu_clk_clkwhich clkwhich; 64 enum nv_pmu_clk_clkwhich clkwhich;
65 struct clk_set_info *p0_info; 65 struct clk_set_info *p0_info;
@@ -82,7 +82,7 @@ static int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain,
82 if (!p0_info) 82 if (!p0_info)
83 return -EINVAL; 83 return -EINVAL;
84 84
85 *default_hz = (u64)p0_info->max_mhz * (u64)MHZ; 85 *default_mhz = p0_info->max_mhz;
86 86
87 return 0; 87 return 0;
88} 88}
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index 0dc15201..f01b52ad 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -153,10 +153,6 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g)
153 if (err) 153 if (err)
154 return err; 154 return err;
155 155
156 err = clk_set_boot_fll_clk(g);
157 if (err)
158 return err;
159
160 err = pmgr_domain_pmu_setup(g); 156 err = pmgr_domain_pmu_setup(g);
161 return err; 157 return err;
162} 158}