diff options
author | ddutta <ddutta@nvidia.com> | 2018-09-14 01:18:48 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-09-20 13:50:02 -0400 |
commit | 1c7258411da89aea5279e9a8d117479928f8bf64 (patch) | |
tree | 854c2a248bf7b88141ecb3f27e804fa8065a0983 /drivers/gpu | |
parent | feefb7046a88311d88a37ad2cc934ec7b9a9c28f (diff) |
gpu: nvgpu: expose linux clock controls via HAL
Expose the linux specific clock implementations via the HAL
interface to allow nvgpu to use the controls globally. This patch
does the following.
1) Implement a new ops interface and a corresponding linux specific
implementation for allowing nvgpu to iterate through a list of
available clock frequencies via nvgpu_linux_clk_get_f_points().
2) Implement nvgpu_linux_clk_get_range().
Bug 2061372
Change-Id: I7ce9a999dbdcd9fafcc84301af148545f6ca97a9
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1774280
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/clk.c | 118 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/platform_gk20a.h | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c | 26 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/scale.c | 1 |
6 files changed, 158 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 8f2881ec..5821f742 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h | |||
@@ -1086,6 +1086,10 @@ struct gpu_ops { | |||
1086 | int (*clk_domain_get_f_points)(struct gk20a *g, | 1086 | int (*clk_domain_get_f_points)(struct gk20a *g, |
1087 | u32 clkapidomain, u32 *pfpointscount, | 1087 | u32 clkapidomain, u32 *pfpointscount, |
1088 | u16 *pfreqpointsinmhz); | 1088 | u16 *pfreqpointsinmhz); |
1089 | int (*clk_get_round_rate)(struct gk20a *g, u32 api_domain, | ||
1090 | unsigned long rate_target, unsigned long *rounded_rate); | ||
1091 | int (*get_clk_range)(struct gk20a *g, u32 api_domain, | ||
1092 | u16 *min_mhz, u16 *max_mhz); | ||
1089 | unsigned long (*measure_freq)(struct gk20a *g, u32 api_domain); | 1093 | unsigned long (*measure_freq)(struct gk20a *g, u32 api_domain); |
1090 | u32 (*get_rate_cntr)(struct gk20a *g, struct namemap_cfg *c); | 1094 | u32 (*get_rate_cntr)(struct gk20a *g, struct namemap_cfg *c); |
1091 | unsigned long (*get_rate)(struct gk20a *g, u32 api_domain); | 1095 | unsigned long (*get_rate)(struct gk20a *g, u32 api_domain); |
diff --git a/drivers/gpu/nvgpu/os/linux/clk.c b/drivers/gpu/nvgpu/os/linux/clk.c index 414b17c4..cc420994 100644 --- a/drivers/gpu/nvgpu/os/linux/clk.c +++ b/drivers/gpu/nvgpu/os/linux/clk.c | |||
@@ -27,6 +27,8 @@ | |||
27 | 27 | ||
28 | #include "gk20a/gk20a.h" | 28 | #include "gk20a/gk20a.h" |
29 | 29 | ||
30 | #define HZ_TO_MHZ(x) ((x) / 1000000) | ||
31 | |||
30 | static unsigned long nvgpu_linux_clk_get_rate(struct gk20a *g, u32 api_domain) | 32 | static unsigned long nvgpu_linux_clk_get_rate(struct gk20a *g, u32 api_domain) |
31 | { | 33 | { |
32 | struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); | 34 | struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); |
@@ -142,6 +144,119 @@ static unsigned long nvgpu_linux_get_maxrate(struct gk20a *g, u32 api_domain) | |||
142 | return ret; | 144 | return ret; |
143 | } | 145 | } |
144 | 146 | ||
147 | /* | ||
148 | * This API is used to return a list of supported frequencies by igpu. | ||
149 | * Set *num_points as 0 to get the size of the freqs list, returned | ||
150 | * by *num_points itself. freqs array must be provided by caller. | ||
151 | * If *num_points is non-zero, then freqs array size must atleast | ||
152 | * equal *num_points. | ||
153 | */ | ||
154 | static int nvgpu_linux_clk_get_f_points(struct gk20a *g, | ||
155 | u32 api_domain, u32 *num_points, u16 *freqs) | ||
156 | { | ||
157 | struct device *dev = dev_from_gk20a(g); | ||
158 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
159 | unsigned long *gpu_freq_table; | ||
160 | int ret = 0; | ||
161 | int num_supported_freq = 0; | ||
162 | u32 i; | ||
163 | |||
164 | switch (api_domain) { | ||
165 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
166 | ret = platform->get_clk_freqs(dev, &gpu_freq_table, | ||
167 | &num_supported_freq); | ||
168 | |||
169 | if (ret) { | ||
170 | return ret; | ||
171 | } | ||
172 | |||
173 | if (num_points == NULL) { | ||
174 | return -EINVAL; | ||
175 | } | ||
176 | |||
177 | if (*num_points != 0U) { | ||
178 | if (freqs == NULL || (*num_points > (u32)num_supported_freq)) { | ||
179 | return -EINVAL; | ||
180 | } | ||
181 | } | ||
182 | |||
183 | if (*num_points == 0) { | ||
184 | *num_points = num_supported_freq; | ||
185 | } else { | ||
186 | for (i = 0; i < *num_points; i++) { | ||
187 | freqs[i] = HZ_TO_MHZ(gpu_freq_table[i]); | ||
188 | } | ||
189 | } | ||
190 | break; | ||
191 | default: | ||
192 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
193 | ret = -EINVAL; | ||
194 | break; | ||
195 | } | ||
196 | |||
197 | return ret; | ||
198 | } | ||
199 | |||
200 | static int nvgpu_clk_get_range(struct gk20a *g, u32 api_domain, | ||
201 | u16 *min_mhz, u16 *max_mhz) | ||
202 | { | ||
203 | struct device *dev = dev_from_gk20a(g); | ||
204 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
205 | unsigned long *freqs; | ||
206 | int num_freqs; | ||
207 | int ret; | ||
208 | |||
209 | switch (api_domain) { | ||
210 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
211 | ret = platform->get_clk_freqs(dev, &freqs, &num_freqs); | ||
212 | |||
213 | if (!ret) { | ||
214 | *min_mhz = HZ_TO_MHZ(freqs[0]); | ||
215 | *max_mhz = HZ_TO_MHZ(freqs[num_freqs - 1]); | ||
216 | } | ||
217 | break; | ||
218 | default: | ||
219 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
220 | ret = -EINVAL; | ||
221 | break; | ||
222 | } | ||
223 | |||
224 | return ret; | ||
225 | } | ||
226 | |||
227 | /* rate_target should be passed in as Hz | ||
228 | rounded_rate is returned in Hz */ | ||
229 | static int nvgpu_clk_get_round_rate(struct gk20a *g, | ||
230 | u32 api_domain, unsigned long rate_target, | ||
231 | unsigned long *rounded_rate) | ||
232 | { | ||
233 | struct device *dev = dev_from_gk20a(g); | ||
234 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
235 | unsigned long *freqs; | ||
236 | int num_freqs; | ||
237 | int i, ret = 0; | ||
238 | |||
239 | switch (api_domain) { | ||
240 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
241 | ret = platform->get_clk_freqs(dev, &freqs, &num_freqs); | ||
242 | |||
243 | for (i = 0; i < num_freqs; ++i) { | ||
244 | if (freqs[i] >= rate_target) { | ||
245 | *rounded_rate = freqs[i]; | ||
246 | return 0; | ||
247 | } | ||
248 | } | ||
249 | *rounded_rate = freqs[num_freqs - 1]; | ||
250 | break; | ||
251 | default: | ||
252 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
253 | ret = -EINVAL; | ||
254 | break; | ||
255 | } | ||
256 | |||
257 | return ret; | ||
258 | } | ||
259 | |||
145 | static int nvgpu_linux_prepare_enable(struct clk_gk20a *clk) | 260 | static int nvgpu_linux_prepare_enable(struct clk_gk20a *clk) |
146 | { | 261 | { |
147 | return clk_prepare_enable(clk->tegra_clk); | 262 | return clk_prepare_enable(clk->tegra_clk); |
@@ -162,4 +277,7 @@ void nvgpu_linux_init_clk_support(struct gk20a *g) | |||
162 | g->ops.clk.get_maxrate = nvgpu_linux_get_maxrate; | 277 | g->ops.clk.get_maxrate = nvgpu_linux_get_maxrate; |
163 | g->ops.clk.prepare_enable = nvgpu_linux_prepare_enable; | 278 | g->ops.clk.prepare_enable = nvgpu_linux_prepare_enable; |
164 | g->ops.clk.disable_unprepare = nvgpu_linux_disable_unprepare; | 279 | g->ops.clk.disable_unprepare = nvgpu_linux_disable_unprepare; |
280 | g->ops.clk.clk_domain_get_f_points = nvgpu_linux_clk_get_f_points; | ||
281 | g->ops.clk.get_clk_range = nvgpu_clk_get_range; | ||
282 | g->ops.clk.clk_get_round_rate = nvgpu_clk_get_round_rate; | ||
165 | } | 283 | } |
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h index f3e80b8c..b5beeefe 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h +++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h | |||
@@ -274,6 +274,9 @@ struct gk20a_platform { | |||
274 | 274 | ||
275 | /* scaling rate */ | 275 | /* scaling rate */ |
276 | unsigned long cached_rate; | 276 | unsigned long cached_rate; |
277 | |||
278 | /* synchronized access to platform->clk_get_freqs */ | ||
279 | struct nvgpu_mutex clk_get_freq_lock; | ||
277 | }; | 280 | }; |
278 | 281 | ||
279 | static inline struct gk20a_platform *gk20a_get_platform( | 282 | static inline struct gk20a_platform *gk20a_get_platform( |
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c index c5464d5b..5fdcb05c 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c +++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c | |||
@@ -55,6 +55,9 @@ | |||
55 | static unsigned long | 55 | static unsigned long |
56 | gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP]; | 56 | gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP]; |
57 | 57 | ||
58 | static bool freq_table_init_complete; | ||
59 | static int num_supported_freq; | ||
60 | |||
58 | #define TEGRA_GP10B_BW_PER_FREQ 64 | 61 | #define TEGRA_GP10B_BW_PER_FREQ 64 |
59 | #define TEGRA_DDR4_BW_PER_FREQ 16 | 62 | #define TEGRA_DDR4_BW_PER_FREQ 16 |
60 | 63 | ||
@@ -166,6 +169,8 @@ static int gp10b_tegra_probe(struct device *dev) | |||
166 | gp10b_tegra_get_clocks(dev); | 169 | gp10b_tegra_get_clocks(dev); |
167 | nvgpu_linux_init_clk_support(platform->g); | 170 | nvgpu_linux_init_clk_support(platform->g); |
168 | 171 | ||
172 | nvgpu_mutex_init(&platform->clk_get_freq_lock); | ||
173 | |||
169 | return 0; | 174 | return 0; |
170 | } | 175 | } |
171 | 176 | ||
@@ -176,6 +181,8 @@ static int gp10b_tegra_late_probe(struct device *dev) | |||
176 | 181 | ||
177 | static int gp10b_tegra_remove(struct device *dev) | 182 | static int gp10b_tegra_remove(struct device *dev) |
178 | { | 183 | { |
184 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
185 | |||
179 | /* deinitialise tegra specific scaling quirks */ | 186 | /* deinitialise tegra specific scaling quirks */ |
180 | gp10b_tegra_scale_exit(dev); | 187 | gp10b_tegra_scale_exit(dev); |
181 | 188 | ||
@@ -183,6 +190,8 @@ static int gp10b_tegra_remove(struct device *dev) | |||
183 | nvgpu_free_nvhost_dev(get_gk20a(dev)); | 190 | nvgpu_free_nvhost_dev(get_gk20a(dev)); |
184 | #endif | 191 | #endif |
185 | 192 | ||
193 | nvgpu_mutex_destroy(&platform->clk_get_freq_lock); | ||
194 | |||
186 | return 0; | 195 | return 0; |
187 | } | 196 | } |
188 | 197 | ||
@@ -342,6 +351,18 @@ int gp10b_clk_get_freqs(struct device *dev, | |||
342 | int sel_freq_cnt; | 351 | int sel_freq_cnt; |
343 | unsigned long loc_freq_table[GP10B_MAX_SUPPORTED_FREQS]; | 352 | unsigned long loc_freq_table[GP10B_MAX_SUPPORTED_FREQS]; |
344 | 353 | ||
354 | nvgpu_mutex_acquire(&platform->clk_get_freq_lock); | ||
355 | |||
356 | if (freq_table_init_complete) { | ||
357 | |||
358 | *freqs = gp10b_freq_table; | ||
359 | *num_freqs = num_supported_freq; | ||
360 | |||
361 | nvgpu_mutex_release(&platform->clk_get_freq_lock); | ||
362 | |||
363 | return 0; | ||
364 | } | ||
365 | |||
345 | max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1)); | 366 | max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1)); |
346 | 367 | ||
347 | /* | 368 | /* |
@@ -392,10 +413,15 @@ int gp10b_clk_get_freqs(struct device *dev, | |||
392 | /* Fill freq table */ | 413 | /* Fill freq table */ |
393 | *freqs = gp10b_freq_table; | 414 | *freqs = gp10b_freq_table; |
394 | *num_freqs = sel_freq_cnt; | 415 | *num_freqs = sel_freq_cnt; |
416 | num_supported_freq = sel_freq_cnt; | ||
417 | |||
418 | freq_table_init_complete = true; | ||
395 | 419 | ||
396 | nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n", | 420 | nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n", |
397 | gp10b_freq_table[0], max_rate, *num_freqs); | 421 | gp10b_freq_table[0], max_rate, *num_freqs); |
398 | 422 | ||
423 | nvgpu_mutex_release(&platform->clk_get_freq_lock); | ||
424 | |||
399 | return 0; | 425 | return 0; |
400 | } | 426 | } |
401 | 427 | ||
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c index 1b4a5456..b055eb6e 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c +++ b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c | |||
@@ -97,6 +97,8 @@ static int gv11b_tegra_probe(struct device *dev) | |||
97 | gp10b_tegra_get_clocks(dev); | 97 | gp10b_tegra_get_clocks(dev); |
98 | nvgpu_linux_init_clk_support(platform->g); | 98 | nvgpu_linux_init_clk_support(platform->g); |
99 | 99 | ||
100 | nvgpu_mutex_init(&platform->clk_get_freq_lock); | ||
101 | |||
100 | return 0; | 102 | return 0; |
101 | } | 103 | } |
102 | 104 | ||
@@ -108,12 +110,16 @@ static int gv11b_tegra_late_probe(struct device *dev) | |||
108 | 110 | ||
109 | static int gv11b_tegra_remove(struct device *dev) | 111 | static int gv11b_tegra_remove(struct device *dev) |
110 | { | 112 | { |
113 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
114 | |||
111 | gv11b_tegra_scale_exit(dev); | 115 | gv11b_tegra_scale_exit(dev); |
112 | 116 | ||
113 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | 117 | #ifdef CONFIG_TEGRA_GK20A_NVHOST |
114 | nvgpu_free_nvhost_dev(get_gk20a(dev)); | 118 | nvgpu_free_nvhost_dev(get_gk20a(dev)); |
115 | #endif | 119 | #endif |
116 | 120 | ||
121 | nvgpu_mutex_destroy(&platform->clk_get_freq_lock); | ||
122 | |||
117 | return 0; | 123 | return 0; |
118 | } | 124 | } |
119 | 125 | ||
diff --git a/drivers/gpu/nvgpu/os/linux/scale.c b/drivers/gpu/nvgpu/os/linux/scale.c index 72ed94bd..ecc8207a 100644 --- a/drivers/gpu/nvgpu/os/linux/scale.c +++ b/drivers/gpu/nvgpu/os/linux/scale.c | |||
@@ -124,6 +124,7 @@ static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile) | |||
124 | /* get gpu frequency table */ | 124 | /* get gpu frequency table */ |
125 | err = platform->get_clk_freqs(profile->dev, &freqs, | 125 | err = platform->get_clk_freqs(profile->dev, &freqs, |
126 | &num_freqs); | 126 | &num_freqs); |
127 | |||
127 | if (err) | 128 | if (err) |
128 | return -ENOSYS; | 129 | return -ENOSYS; |
129 | } else | 130 | } else |