diff options
26 files changed, 1503 insertions, 1718 deletions
diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt index f7b12c071d53..e6bd1e6512a5 100644 --- a/Documentation/cpu-freq/intel-pstate.txt +++ b/Documentation/cpu-freq/intel-pstate.txt | |||
@@ -25,7 +25,7 @@ callback, so cpufreq core can't request a transition to a specific frequency. | |||
25 | The driver provides minimum and maximum frequency limits and callbacks to set a | 25 | The driver provides minimum and maximum frequency limits and callbacks to set a |
26 | policy. The policy in cpufreq sysfs is referred to as the "scaling governor". | 26 | policy. The policy in cpufreq sysfs is referred to as the "scaling governor". |
27 | The cpufreq core can request the driver to operate in any of the two policies: | 27 | The cpufreq core can request the driver to operate in any of the two policies: |
28 | "performance: and "powersave". The driver decides which frequency to use based | 28 | "performance" and "powersave". The driver decides which frequency to use based |
29 | on the above policy selection considering minimum and maximum frequency limits. | 29 | on the above policy selection considering minimum and maximum frequency limits. |
30 | 30 | ||
31 | The Intel P-State driver falls under the latter category, which implements the | 31 | The Intel P-State driver falls under the latter category, which implements the |
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index f93511031177..a7f45853c103 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig | |||
@@ -19,6 +19,7 @@ config CPU_FREQ | |||
19 | if CPU_FREQ | 19 | if CPU_FREQ |
20 | 20 | ||
21 | config CPU_FREQ_GOV_COMMON | 21 | config CPU_FREQ_GOV_COMMON |
22 | select IRQ_WORK | ||
22 | bool | 23 | bool |
23 | 24 | ||
24 | config CPU_FREQ_BOOST_SW | 25 | config CPU_FREQ_BOOST_SW |
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 51eef87bbc37..59a7b380fbe2 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c | |||
@@ -70,6 +70,8 @@ struct acpi_cpufreq_data { | |||
70 | unsigned int cpu_feature; | 70 | unsigned int cpu_feature; |
71 | unsigned int acpi_perf_cpu; | 71 | unsigned int acpi_perf_cpu; |
72 | cpumask_var_t freqdomain_cpus; | 72 | cpumask_var_t freqdomain_cpus; |
73 | void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val); | ||
74 | u32 (*cpu_freq_read)(struct acpi_pct_register *reg); | ||
73 | }; | 75 | }; |
74 | 76 | ||
75 | /* acpi_perf_data is a pointer to percpu data. */ | 77 | /* acpi_perf_data is a pointer to percpu data. */ |
@@ -243,125 +245,119 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) | |||
243 | } | 245 | } |
244 | } | 246 | } |
245 | 247 | ||
246 | struct msr_addr { | 248 | u32 cpu_freq_read_intel(struct acpi_pct_register *not_used) |
247 | u32 reg; | 249 | { |
248 | }; | 250 | u32 val, dummy; |
249 | 251 | ||
250 | struct io_addr { | 252 | rdmsr(MSR_IA32_PERF_CTL, val, dummy); |
251 | u16 port; | 253 | return val; |
252 | u8 bit_width; | 254 | } |
253 | }; | 255 | |
256 | void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val) | ||
257 | { | ||
258 | u32 lo, hi; | ||
259 | |||
260 | rdmsr(MSR_IA32_PERF_CTL, lo, hi); | ||
261 | lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE); | ||
262 | wrmsr(MSR_IA32_PERF_CTL, lo, hi); | ||
263 | } | ||
264 | |||
265 | u32 cpu_freq_read_amd(struct acpi_pct_register *not_used) | ||
266 | { | ||
267 | u32 val, dummy; | ||
268 | |||
269 | rdmsr(MSR_AMD_PERF_CTL, val, dummy); | ||
270 | return val; | ||
271 | } | ||
272 | |||
273 | void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val) | ||
274 | { | ||
275 | wrmsr(MSR_AMD_PERF_CTL, val, 0); | ||
276 | } | ||
277 | |||
278 | u32 cpu_freq_read_io(struct acpi_pct_register *reg) | ||
279 | { | ||
280 | u32 val; | ||
281 | |||
282 | acpi_os_read_port(reg->address, &val, reg->bit_width); | ||
283 | return val; | ||
284 | } | ||
285 | |||
286 | void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val) | ||
287 | { | ||
288 | acpi_os_write_port(reg->address, val, reg->bit_width); | ||
289 | } | ||
254 | 290 | ||
255 | struct drv_cmd { | 291 | struct drv_cmd { |
256 | unsigned int type; | 292 | struct acpi_pct_register *reg; |
257 | const struct cpumask *mask; | ||
258 | union { | ||
259 | struct msr_addr msr; | ||
260 | struct io_addr io; | ||
261 | } addr; | ||
262 | u32 val; | 293 | u32 val; |
294 | union { | ||
295 | void (*write)(struct acpi_pct_register *reg, u32 val); | ||
296 | u32 (*read)(struct acpi_pct_register *reg); | ||
297 | } func; | ||
263 | }; | 298 | }; |
264 | 299 | ||
265 | /* Called via smp_call_function_single(), on the target CPU */ | 300 | /* Called via smp_call_function_single(), on the target CPU */ |
266 | static void do_drv_read(void *_cmd) | 301 | static void do_drv_read(void *_cmd) |
267 | { | 302 | { |
268 | struct drv_cmd *cmd = _cmd; | 303 | struct drv_cmd *cmd = _cmd; |
269 | u32 h; | ||
270 | 304 | ||
271 | switch (cmd->type) { | 305 | cmd->val = cmd->func.read(cmd->reg); |
272 | case SYSTEM_INTEL_MSR_CAPABLE: | ||
273 | case SYSTEM_AMD_MSR_CAPABLE: | ||
274 | rdmsr(cmd->addr.msr.reg, cmd->val, h); | ||
275 | break; | ||
276 | case SYSTEM_IO_CAPABLE: | ||
277 | acpi_os_read_port((acpi_io_address)cmd->addr.io.port, | ||
278 | &cmd->val, | ||
279 | (u32)cmd->addr.io.bit_width); | ||
280 | break; | ||
281 | default: | ||
282 | break; | ||
283 | } | ||
284 | } | 306 | } |
285 | 307 | ||
286 | /* Called via smp_call_function_many(), on the target CPUs */ | 308 | static u32 drv_read(struct acpi_cpufreq_data *data, const struct cpumask *mask) |
287 | static void do_drv_write(void *_cmd) | ||
288 | { | 309 | { |
289 | struct drv_cmd *cmd = _cmd; | 310 | struct acpi_processor_performance *perf = to_perf_data(data); |
290 | u32 lo, hi; | 311 | struct drv_cmd cmd = { |
312 | .reg = &perf->control_register, | ||
313 | .func.read = data->cpu_freq_read, | ||
314 | }; | ||
315 | int err; | ||
291 | 316 | ||
292 | switch (cmd->type) { | 317 | err = smp_call_function_any(mask, do_drv_read, &cmd, 1); |
293 | case SYSTEM_INTEL_MSR_CAPABLE: | 318 | WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ |
294 | rdmsr(cmd->addr.msr.reg, lo, hi); | 319 | return cmd.val; |
295 | lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); | ||
296 | wrmsr(cmd->addr.msr.reg, lo, hi); | ||
297 | break; | ||
298 | case SYSTEM_AMD_MSR_CAPABLE: | ||
299 | wrmsr(cmd->addr.msr.reg, cmd->val, 0); | ||
300 | break; | ||
301 | case SYSTEM_IO_CAPABLE: | ||
302 | acpi_os_write_port((acpi_io_address)cmd->addr.io.port, | ||
303 | cmd->val, | ||
304 | (u32)cmd->addr.io.bit_width); | ||
305 | break; | ||
306 | default: | ||
307 | break; | ||
308 | } | ||
309 | } | 320 | } |
310 | 321 | ||
311 | static void drv_read(struct drv_cmd *cmd) | 322 | /* Called via smp_call_function_many(), on the target CPUs */ |
323 | static void do_drv_write(void *_cmd) | ||
312 | { | 324 | { |
313 | int err; | 325 | struct drv_cmd *cmd = _cmd; |
314 | cmd->val = 0; | ||
315 | 326 | ||
316 | err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1); | 327 | cmd->func.write(cmd->reg, cmd->val); |
317 | WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ | ||
318 | } | 328 | } |
319 | 329 | ||
320 | static void drv_write(struct drv_cmd *cmd) | 330 | static void drv_write(struct acpi_cpufreq_data *data, |
331 | const struct cpumask *mask, u32 val) | ||
321 | { | 332 | { |
333 | struct acpi_processor_performance *perf = to_perf_data(data); | ||
334 | struct drv_cmd cmd = { | ||
335 | .reg = &perf->control_register, | ||
336 | .val = val, | ||
337 | .func.write = data->cpu_freq_write, | ||
338 | }; | ||
322 | int this_cpu; | 339 | int this_cpu; |
323 | 340 | ||
324 | this_cpu = get_cpu(); | 341 | this_cpu = get_cpu(); |
325 | if (cpumask_test_cpu(this_cpu, cmd->mask)) | 342 | if (cpumask_test_cpu(this_cpu, mask)) |
326 | do_drv_write(cmd); | 343 | do_drv_write(&cmd); |
327 | smp_call_function_many(cmd->mask, do_drv_write, cmd, 1); | 344 | |
345 | smp_call_function_many(mask, do_drv_write, &cmd, 1); | ||
328 | put_cpu(); | 346 | put_cpu(); |
329 | } | 347 | } |
330 | 348 | ||
331 | static u32 | 349 | static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data) |
332 | get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data) | ||
333 | { | 350 | { |
334 | struct acpi_processor_performance *perf; | 351 | u32 val; |
335 | struct drv_cmd cmd; | ||
336 | 352 | ||
337 | if (unlikely(cpumask_empty(mask))) | 353 | if (unlikely(cpumask_empty(mask))) |
338 | return 0; | 354 | return 0; |
339 | 355 | ||
340 | switch (data->cpu_feature) { | 356 | val = drv_read(data, mask); |
341 | case SYSTEM_INTEL_MSR_CAPABLE: | ||
342 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | ||
343 | cmd.addr.msr.reg = MSR_IA32_PERF_CTL; | ||
344 | break; | ||
345 | case SYSTEM_AMD_MSR_CAPABLE: | ||
346 | cmd.type = SYSTEM_AMD_MSR_CAPABLE; | ||
347 | cmd.addr.msr.reg = MSR_AMD_PERF_CTL; | ||
348 | break; | ||
349 | case SYSTEM_IO_CAPABLE: | ||
350 | cmd.type = SYSTEM_IO_CAPABLE; | ||
351 | perf = to_perf_data(data); | ||
352 | cmd.addr.io.port = perf->control_register.address; | ||
353 | cmd.addr.io.bit_width = perf->control_register.bit_width; | ||
354 | break; | ||
355 | default: | ||
356 | return 0; | ||
357 | } | ||
358 | |||
359 | cmd.mask = mask; | ||
360 | drv_read(&cmd); | ||
361 | 357 | ||
362 | pr_debug("get_cur_val = %u\n", cmd.val); | 358 | pr_debug("get_cur_val = %u\n", val); |
363 | 359 | ||
364 | return cmd.val; | 360 | return val; |
365 | } | 361 | } |
366 | 362 | ||
367 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | 363 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) |
@@ -416,7 +412,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
416 | { | 412 | { |
417 | struct acpi_cpufreq_data *data = policy->driver_data; | 413 | struct acpi_cpufreq_data *data = policy->driver_data; |
418 | struct acpi_processor_performance *perf; | 414 | struct acpi_processor_performance *perf; |
419 | struct drv_cmd cmd; | 415 | const struct cpumask *mask; |
420 | unsigned int next_perf_state = 0; /* Index into perf table */ | 416 | unsigned int next_perf_state = 0; /* Index into perf table */ |
421 | int result = 0; | 417 | int result = 0; |
422 | 418 | ||
@@ -434,42 +430,21 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
434 | } else { | 430 | } else { |
435 | pr_debug("Already at target state (P%d)\n", | 431 | pr_debug("Already at target state (P%d)\n", |
436 | next_perf_state); | 432 | next_perf_state); |
437 | goto out; | 433 | return 0; |
438 | } | 434 | } |
439 | } | 435 | } |
440 | 436 | ||
441 | switch (data->cpu_feature) { | 437 | /* |
442 | case SYSTEM_INTEL_MSR_CAPABLE: | 438 | * The core won't allow CPUs to go away until the governor has been |
443 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | 439 | * stopped, so we can rely on the stability of policy->cpus. |
444 | cmd.addr.msr.reg = MSR_IA32_PERF_CTL; | 440 | */ |
445 | cmd.val = (u32) perf->states[next_perf_state].control; | 441 | mask = policy->shared_type == CPUFREQ_SHARED_TYPE_ANY ? |
446 | break; | 442 | cpumask_of(policy->cpu) : policy->cpus; |
447 | case SYSTEM_AMD_MSR_CAPABLE: | ||
448 | cmd.type = SYSTEM_AMD_MSR_CAPABLE; | ||
449 | cmd.addr.msr.reg = MSR_AMD_PERF_CTL; | ||
450 | cmd.val = (u32) perf->states[next_perf_state].control; | ||
451 | break; | ||
452 | case SYSTEM_IO_CAPABLE: | ||
453 | cmd.type = SYSTEM_IO_CAPABLE; | ||
454 | cmd.addr.io.port = perf->control_register.address; | ||
455 | cmd.addr.io.bit_width = perf->control_register.bit_width; | ||
456 | cmd.val = (u32) perf->states[next_perf_state].control; | ||
457 | break; | ||
458 | default: | ||
459 | result = -ENODEV; | ||
460 | goto out; | ||
461 | } | ||
462 | |||
463 | /* cpufreq holds the hotplug lock, so we are safe from here on */ | ||
464 | if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) | ||
465 | cmd.mask = policy->cpus; | ||
466 | else | ||
467 | cmd.mask = cpumask_of(policy->cpu); | ||
468 | 443 | ||
469 | drv_write(&cmd); | 444 | drv_write(data, mask, perf->states[next_perf_state].control); |
470 | 445 | ||
471 | if (acpi_pstate_strict) { | 446 | if (acpi_pstate_strict) { |
472 | if (!check_freqs(cmd.mask, data->freq_table[index].frequency, | 447 | if (!check_freqs(mask, data->freq_table[index].frequency, |
473 | data)) { | 448 | data)) { |
474 | pr_debug("acpi_cpufreq_target failed (%d)\n", | 449 | pr_debug("acpi_cpufreq_target failed (%d)\n", |
475 | policy->cpu); | 450 | policy->cpu); |
@@ -480,7 +455,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
480 | if (!result) | 455 | if (!result) |
481 | perf->state = next_perf_state; | 456 | perf->state = next_perf_state; |
482 | 457 | ||
483 | out: | ||
484 | return result; | 458 | return result; |
485 | } | 459 | } |
486 | 460 | ||
@@ -740,15 +714,21 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
740 | } | 714 | } |
741 | pr_debug("SYSTEM IO addr space\n"); | 715 | pr_debug("SYSTEM IO addr space\n"); |
742 | data->cpu_feature = SYSTEM_IO_CAPABLE; | 716 | data->cpu_feature = SYSTEM_IO_CAPABLE; |
717 | data->cpu_freq_read = cpu_freq_read_io; | ||
718 | data->cpu_freq_write = cpu_freq_write_io; | ||
743 | break; | 719 | break; |
744 | case ACPI_ADR_SPACE_FIXED_HARDWARE: | 720 | case ACPI_ADR_SPACE_FIXED_HARDWARE: |
745 | pr_debug("HARDWARE addr space\n"); | 721 | pr_debug("HARDWARE addr space\n"); |
746 | if (check_est_cpu(cpu)) { | 722 | if (check_est_cpu(cpu)) { |
747 | data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; | 723 | data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; |
724 | data->cpu_freq_read = cpu_freq_read_intel; | ||
725 | data->cpu_freq_write = cpu_freq_write_intel; | ||
748 | break; | 726 | break; |
749 | } | 727 | } |
750 | if (check_amd_hwpstate_cpu(cpu)) { | 728 | if (check_amd_hwpstate_cpu(cpu)) { |
751 | data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE; | 729 | data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE; |
730 | data->cpu_freq_read = cpu_freq_read_amd; | ||
731 | data->cpu_freq_write = cpu_freq_write_amd; | ||
752 | break; | 732 | break; |
753 | } | 733 | } |
754 | result = -ENODEV; | 734 | result = -ENODEV; |
diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index f6b79ab0070b..404360cad25c 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <asm/msr.h> | 21 | #include <asm/msr.h> |
22 | #include <asm/cpufeature.h> | 22 | #include <asm/cpufeature.h> |
23 | 23 | ||
24 | #include "cpufreq_governor.h" | 24 | #include "cpufreq_ondemand.h" |
25 | 25 | ||
26 | #define MSR_AMD64_FREQ_SENSITIVITY_ACTUAL 0xc0010080 | 26 | #define MSR_AMD64_FREQ_SENSITIVITY_ACTUAL 0xc0010080 |
27 | #define MSR_AMD64_FREQ_SENSITIVITY_REFERENCE 0xc0010081 | 27 | #define MSR_AMD64_FREQ_SENSITIVITY_REFERENCE 0xc0010081 |
@@ -45,10 +45,10 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, | |||
45 | long d_actual, d_reference; | 45 | long d_actual, d_reference; |
46 | struct msr actual, reference; | 46 | struct msr actual, reference; |
47 | struct cpu_data_t *data = &per_cpu(cpu_data, policy->cpu); | 47 | struct cpu_data_t *data = &per_cpu(cpu_data, policy->cpu); |
48 | struct dbs_data *od_data = policy->governor_data; | 48 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
49 | struct dbs_data *od_data = policy_dbs->dbs_data; | ||
49 | struct od_dbs_tuners *od_tuners = od_data->tuners; | 50 | struct od_dbs_tuners *od_tuners = od_data->tuners; |
50 | struct od_cpu_dbs_info_s *od_info = | 51 | struct od_policy_dbs_info *od_info = to_dbs_info(policy_dbs); |
51 | od_data->cdata->get_cpu_dbs_info_s(policy->cpu); | ||
52 | 52 | ||
53 | if (!od_info->freq_table) | 53 | if (!od_info->freq_table) |
54 | return freq_next; | 54 | return freq_next; |
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 0ca74d070058..f951f911786e 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c | |||
@@ -31,9 +31,8 @@ | |||
31 | 31 | ||
32 | struct private_data { | 32 | struct private_data { |
33 | struct device *cpu_dev; | 33 | struct device *cpu_dev; |
34 | struct regulator *cpu_reg; | ||
35 | struct thermal_cooling_device *cdev; | 34 | struct thermal_cooling_device *cdev; |
36 | unsigned int voltage_tolerance; /* in percentage */ | 35 | const char *reg_name; |
37 | }; | 36 | }; |
38 | 37 | ||
39 | static struct freq_attr *cpufreq_dt_attr[] = { | 38 | static struct freq_attr *cpufreq_dt_attr[] = { |
@@ -44,175 +43,128 @@ static struct freq_attr *cpufreq_dt_attr[] = { | |||
44 | 43 | ||
45 | static int set_target(struct cpufreq_policy *policy, unsigned int index) | 44 | static int set_target(struct cpufreq_policy *policy, unsigned int index) |
46 | { | 45 | { |
47 | struct dev_pm_opp *opp; | ||
48 | struct cpufreq_frequency_table *freq_table = policy->freq_table; | ||
49 | struct clk *cpu_clk = policy->clk; | ||
50 | struct private_data *priv = policy->driver_data; | 46 | struct private_data *priv = policy->driver_data; |
51 | struct device *cpu_dev = priv->cpu_dev; | ||
52 | struct regulator *cpu_reg = priv->cpu_reg; | ||
53 | unsigned long volt = 0, tol = 0; | ||
54 | int volt_old = 0; | ||
55 | unsigned int old_freq, new_freq; | ||
56 | long freq_Hz, freq_exact; | ||
57 | int ret; | ||
58 | |||
59 | freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000); | ||
60 | if (freq_Hz <= 0) | ||
61 | freq_Hz = freq_table[index].frequency * 1000; | ||
62 | 47 | ||
63 | freq_exact = freq_Hz; | 48 | return dev_pm_opp_set_rate(priv->cpu_dev, |
64 | new_freq = freq_Hz / 1000; | 49 | policy->freq_table[index].frequency * 1000); |
65 | old_freq = clk_get_rate(cpu_clk) / 1000; | 50 | } |
66 | 51 | ||
67 | if (!IS_ERR(cpu_reg)) { | 52 | /* |
68 | unsigned long opp_freq; | 53 | * An earlier version of opp-v1 bindings used to name the regulator |
54 | * "cpu0-supply", we still need to handle that for backwards compatibility. | ||
55 | */ | ||
56 | static const char *find_supply_name(struct device *dev) | ||
57 | { | ||
58 | struct device_node *np; | ||
59 | struct property *pp; | ||
60 | int cpu = dev->id; | ||
61 | const char *name = NULL; | ||
69 | 62 | ||
70 | rcu_read_lock(); | 63 | np = of_node_get(dev->of_node); |
71 | opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_Hz); | ||
72 | if (IS_ERR(opp)) { | ||
73 | rcu_read_unlock(); | ||
74 | dev_err(cpu_dev, "failed to find OPP for %ld\n", | ||
75 | freq_Hz); | ||
76 | return PTR_ERR(opp); | ||
77 | } | ||
78 | volt = dev_pm_opp_get_voltage(opp); | ||
79 | opp_freq = dev_pm_opp_get_freq(opp); | ||
80 | rcu_read_unlock(); | ||
81 | tol = volt * priv->voltage_tolerance / 100; | ||
82 | volt_old = regulator_get_voltage(cpu_reg); | ||
83 | dev_dbg(cpu_dev, "Found OPP: %ld kHz, %ld uV\n", | ||
84 | opp_freq / 1000, volt); | ||
85 | } | ||
86 | 64 | ||
87 | dev_dbg(cpu_dev, "%u MHz, %d mV --> %u MHz, %ld mV\n", | 65 | /* This must be valid for sure */ |
88 | old_freq / 1000, (volt_old > 0) ? volt_old / 1000 : -1, | 66 | if (WARN_ON(!np)) |
89 | new_freq / 1000, volt ? volt / 1000 : -1); | 67 | return NULL; |
90 | 68 | ||
91 | /* scaling up? scale voltage before frequency */ | 69 | /* Try "cpu0" for older DTs */ |
92 | if (!IS_ERR(cpu_reg) && new_freq > old_freq) { | 70 | if (!cpu) { |
93 | ret = regulator_set_voltage_tol(cpu_reg, volt, tol); | 71 | pp = of_find_property(np, "cpu0-supply", NULL); |
94 | if (ret) { | 72 | if (pp) { |
95 | dev_err(cpu_dev, "failed to scale voltage up: %d\n", | 73 | name = "cpu0"; |
96 | ret); | 74 | goto node_put; |
97 | return ret; | ||
98 | } | 75 | } |
99 | } | 76 | } |
100 | 77 | ||
101 | ret = clk_set_rate(cpu_clk, freq_exact); | 78 | pp = of_find_property(np, "cpu-supply", NULL); |
102 | if (ret) { | 79 | if (pp) { |
103 | dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); | 80 | name = "cpu"; |
104 | if (!IS_ERR(cpu_reg) && volt_old > 0) | 81 | goto node_put; |
105 | regulator_set_voltage_tol(cpu_reg, volt_old, tol); | ||
106 | return ret; | ||
107 | } | 82 | } |
108 | 83 | ||
109 | /* scaling down? scale voltage after frequency */ | 84 | dev_dbg(dev, "no regulator for cpu%d\n", cpu); |
110 | if (!IS_ERR(cpu_reg) && new_freq < old_freq) { | 85 | node_put: |
111 | ret = regulator_set_voltage_tol(cpu_reg, volt, tol); | 86 | of_node_put(np); |
112 | if (ret) { | 87 | return name; |
113 | dev_err(cpu_dev, "failed to scale voltage down: %d\n", | ||
114 | ret); | ||
115 | clk_set_rate(cpu_clk, old_freq * 1000); | ||
116 | } | ||
117 | } | ||
118 | |||
119 | return ret; | ||
120 | } | 88 | } |
121 | 89 | ||
122 | static int allocate_resources(int cpu, struct device **cdev, | 90 | static int resources_available(void) |
123 | struct regulator **creg, struct clk **cclk) | ||
124 | { | 91 | { |
125 | struct device *cpu_dev; | 92 | struct device *cpu_dev; |
126 | struct regulator *cpu_reg; | 93 | struct regulator *cpu_reg; |
127 | struct clk *cpu_clk; | 94 | struct clk *cpu_clk; |
128 | int ret = 0; | 95 | int ret = 0; |
129 | char *reg_cpu0 = "cpu0", *reg_cpu = "cpu", *reg; | 96 | const char *name; |
130 | 97 | ||
131 | cpu_dev = get_cpu_device(cpu); | 98 | cpu_dev = get_cpu_device(0); |
132 | if (!cpu_dev) { | 99 | if (!cpu_dev) { |
133 | pr_err("failed to get cpu%d device\n", cpu); | 100 | pr_err("failed to get cpu0 device\n"); |
134 | return -ENODEV; | 101 | return -ENODEV; |
135 | } | 102 | } |
136 | 103 | ||
137 | /* Try "cpu0" for older DTs */ | 104 | cpu_clk = clk_get(cpu_dev, NULL); |
138 | if (!cpu) | 105 | ret = PTR_ERR_OR_ZERO(cpu_clk); |
139 | reg = reg_cpu0; | ||
140 | else | ||
141 | reg = reg_cpu; | ||
142 | |||
143 | try_again: | ||
144 | cpu_reg = regulator_get_optional(cpu_dev, reg); | ||
145 | ret = PTR_ERR_OR_ZERO(cpu_reg); | ||
146 | if (ret) { | 106 | if (ret) { |
147 | /* | 107 | /* |
148 | * If cpu's regulator supply node is present, but regulator is | 108 | * If cpu's clk node is present, but clock is not yet |
149 | * not yet registered, we should try defering probe. | 109 | * registered, we should try defering probe. |
150 | */ | 110 | */ |
151 | if (ret == -EPROBE_DEFER) { | 111 | if (ret == -EPROBE_DEFER) |
152 | dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n", | 112 | dev_dbg(cpu_dev, "clock not ready, retry\n"); |
153 | cpu); | 113 | else |
154 | return ret; | 114 | dev_err(cpu_dev, "failed to get clock: %d\n", ret); |
155 | } | ||
156 | |||
157 | /* Try with "cpu-supply" */ | ||
158 | if (reg == reg_cpu0) { | ||
159 | reg = reg_cpu; | ||
160 | goto try_again; | ||
161 | } | ||
162 | 115 | ||
163 | dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret); | 116 | return ret; |
164 | } | 117 | } |
165 | 118 | ||
166 | cpu_clk = clk_get(cpu_dev, NULL); | 119 | clk_put(cpu_clk); |
167 | ret = PTR_ERR_OR_ZERO(cpu_clk); | ||
168 | if (ret) { | ||
169 | /* put regulator */ | ||
170 | if (!IS_ERR(cpu_reg)) | ||
171 | regulator_put(cpu_reg); | ||
172 | 120 | ||
121 | name = find_supply_name(cpu_dev); | ||
122 | /* Platform doesn't require regulator */ | ||
123 | if (!name) | ||
124 | return 0; | ||
125 | |||
126 | cpu_reg = regulator_get_optional(cpu_dev, name); | ||
127 | ret = PTR_ERR_OR_ZERO(cpu_reg); | ||
128 | if (ret) { | ||
173 | /* | 129 | /* |
174 | * If cpu's clk node is present, but clock is not yet | 130 | * If cpu's regulator supply node is present, but regulator is |
175 | * registered, we should try defering probe. | 131 | * not yet registered, we should try defering probe. |
176 | */ | 132 | */ |
177 | if (ret == -EPROBE_DEFER) | 133 | if (ret == -EPROBE_DEFER) |
178 | dev_dbg(cpu_dev, "cpu%d clock not ready, retry\n", cpu); | 134 | dev_dbg(cpu_dev, "cpu0 regulator not ready, retry\n"); |
179 | else | 135 | else |
180 | dev_err(cpu_dev, "failed to get cpu%d clock: %d\n", cpu, | 136 | dev_dbg(cpu_dev, "no regulator for cpu0: %d\n", ret); |
181 | ret); | 137 | |
182 | } else { | 138 | return ret; |
183 | *cdev = cpu_dev; | ||
184 | *creg = cpu_reg; | ||
185 | *cclk = cpu_clk; | ||
186 | } | 139 | } |
187 | 140 | ||
188 | return ret; | 141 | regulator_put(cpu_reg); |
142 | return 0; | ||
189 | } | 143 | } |
190 | 144 | ||
191 | static int cpufreq_init(struct cpufreq_policy *policy) | 145 | static int cpufreq_init(struct cpufreq_policy *policy) |
192 | { | 146 | { |
193 | struct cpufreq_frequency_table *freq_table; | 147 | struct cpufreq_frequency_table *freq_table; |
194 | struct device_node *np; | ||
195 | struct private_data *priv; | 148 | struct private_data *priv; |
196 | struct device *cpu_dev; | 149 | struct device *cpu_dev; |
197 | struct regulator *cpu_reg; | ||
198 | struct clk *cpu_clk; | 150 | struct clk *cpu_clk; |
199 | struct dev_pm_opp *suspend_opp; | 151 | struct dev_pm_opp *suspend_opp; |
200 | unsigned long min_uV = ~0, max_uV = 0; | ||
201 | unsigned int transition_latency; | 152 | unsigned int transition_latency; |
202 | bool need_update = false; | 153 | bool opp_v1 = false; |
154 | const char *name; | ||
203 | int ret; | 155 | int ret; |
204 | 156 | ||
205 | ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk); | 157 | cpu_dev = get_cpu_device(policy->cpu); |
206 | if (ret) { | 158 | if (!cpu_dev) { |
207 | pr_err("%s: Failed to allocate resources: %d\n", __func__, ret); | 159 | pr_err("failed to get cpu%d device\n", policy->cpu); |
208 | return ret; | 160 | return -ENODEV; |
209 | } | 161 | } |
210 | 162 | ||
211 | np = of_node_get(cpu_dev->of_node); | 163 | cpu_clk = clk_get(cpu_dev, NULL); |
212 | if (!np) { | 164 | if (IS_ERR(cpu_clk)) { |
213 | dev_err(cpu_dev, "failed to find cpu%d node\n", policy->cpu); | 165 | ret = PTR_ERR(cpu_clk); |
214 | ret = -ENOENT; | 166 | dev_err(cpu_dev, "%s: failed to get clk: %d\n", __func__, ret); |
215 | goto out_put_reg_clk; | 167 | return ret; |
216 | } | 168 | } |
217 | 169 | ||
218 | /* Get OPP-sharing information from "operating-points-v2" bindings */ | 170 | /* Get OPP-sharing information from "operating-points-v2" bindings */ |
@@ -223,9 +175,23 @@ static int cpufreq_init(struct cpufreq_policy *policy) | |||
223 | * finding shared-OPPs for backward compatibility. | 175 | * finding shared-OPPs for backward compatibility. |
224 | */ | 176 | */ |
225 | if (ret == -ENOENT) | 177 | if (ret == -ENOENT) |
226 | need_update = true; | 178 | opp_v1 = true; |
227 | else | 179 | else |
228 | goto out_node_put; | 180 | goto out_put_clk; |
181 | } | ||
182 | |||
183 | /* | ||
184 | * OPP layer will be taking care of regulators now, but it needs to know | ||
185 | * the name of the regulator first. | ||
186 | */ | ||
187 | name = find_supply_name(cpu_dev); | ||
188 | if (name) { | ||
189 | ret = dev_pm_opp_set_regulator(cpu_dev, name); | ||
190 | if (ret) { | ||
191 | dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n", | ||
192 | policy->cpu, ret); | ||
193 | goto out_put_clk; | ||
194 | } | ||
229 | } | 195 | } |
230 | 196 | ||
231 | /* | 197 | /* |
@@ -246,12 +212,12 @@ static int cpufreq_init(struct cpufreq_policy *policy) | |||
246 | */ | 212 | */ |
247 | ret = dev_pm_opp_get_opp_count(cpu_dev); | 213 | ret = dev_pm_opp_get_opp_count(cpu_dev); |
248 | if (ret <= 0) { | 214 | if (ret <= 0) { |
249 | pr_debug("OPP table is not ready, deferring probe\n"); | 215 | dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n"); |
250 | ret = -EPROBE_DEFER; | 216 | ret = -EPROBE_DEFER; |
251 | goto out_free_opp; | 217 | goto out_free_opp; |
252 | } | 218 | } |
253 | 219 | ||
254 | if (need_update) { | 220 | if (opp_v1) { |
255 | struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data(); | 221 | struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data(); |
256 | 222 | ||
257 | if (!pd || !pd->independent_clocks) | 223 | if (!pd || !pd->independent_clocks) |
@@ -265,10 +231,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) | |||
265 | if (ret) | 231 | if (ret) |
266 | dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", | 232 | dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", |
267 | __func__, ret); | 233 | __func__, ret); |
268 | |||
269 | of_property_read_u32(np, "clock-latency", &transition_latency); | ||
270 | } else { | ||
271 | transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev); | ||
272 | } | 234 | } |
273 | 235 | ||
274 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | 236 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); |
@@ -277,62 +239,16 @@ static int cpufreq_init(struct cpufreq_policy *policy) | |||
277 | goto out_free_opp; | 239 | goto out_free_opp; |
278 | } | 240 | } |
279 | 241 | ||
280 | of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance); | 242 | priv->reg_name = name; |
281 | |||
282 | if (!transition_latency) | ||
283 | transition_latency = CPUFREQ_ETERNAL; | ||
284 | |||
285 | if (!IS_ERR(cpu_reg)) { | ||
286 | unsigned long opp_freq = 0; | ||
287 | |||
288 | /* | ||
289 | * Disable any OPPs where the connected regulator isn't able to | ||
290 | * provide the specified voltage and record minimum and maximum | ||
291 | * voltage levels. | ||
292 | */ | ||
293 | while (1) { | ||
294 | struct dev_pm_opp *opp; | ||
295 | unsigned long opp_uV, tol_uV; | ||
296 | |||
297 | rcu_read_lock(); | ||
298 | opp = dev_pm_opp_find_freq_ceil(cpu_dev, &opp_freq); | ||
299 | if (IS_ERR(opp)) { | ||
300 | rcu_read_unlock(); | ||
301 | break; | ||
302 | } | ||
303 | opp_uV = dev_pm_opp_get_voltage(opp); | ||
304 | rcu_read_unlock(); | ||
305 | |||
306 | tol_uV = opp_uV * priv->voltage_tolerance / 100; | ||
307 | if (regulator_is_supported_voltage(cpu_reg, | ||
308 | opp_uV - tol_uV, | ||
309 | opp_uV + tol_uV)) { | ||
310 | if (opp_uV < min_uV) | ||
311 | min_uV = opp_uV; | ||
312 | if (opp_uV > max_uV) | ||
313 | max_uV = opp_uV; | ||
314 | } else { | ||
315 | dev_pm_opp_disable(cpu_dev, opp_freq); | ||
316 | } | ||
317 | |||
318 | opp_freq++; | ||
319 | } | ||
320 | |||
321 | ret = regulator_set_voltage_time(cpu_reg, min_uV, max_uV); | ||
322 | if (ret > 0) | ||
323 | transition_latency += ret * 1000; | ||
324 | } | ||
325 | 243 | ||
326 | ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); | 244 | ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); |
327 | if (ret) { | 245 | if (ret) { |
328 | pr_err("failed to init cpufreq table: %d\n", ret); | 246 | dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); |
329 | goto out_free_priv; | 247 | goto out_free_priv; |
330 | } | 248 | } |
331 | 249 | ||
332 | priv->cpu_dev = cpu_dev; | 250 | priv->cpu_dev = cpu_dev; |
333 | priv->cpu_reg = cpu_reg; | ||
334 | policy->driver_data = priv; | 251 | policy->driver_data = priv; |
335 | |||
336 | policy->clk = cpu_clk; | 252 | policy->clk = cpu_clk; |
337 | 253 | ||
338 | rcu_read_lock(); | 254 | rcu_read_lock(); |
@@ -357,9 +273,11 @@ static int cpufreq_init(struct cpufreq_policy *policy) | |||
357 | cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; | 273 | cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; |
358 | } | 274 | } |
359 | 275 | ||
360 | policy->cpuinfo.transition_latency = transition_latency; | 276 | transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); |
277 | if (!transition_latency) | ||
278 | transition_latency = CPUFREQ_ETERNAL; | ||
361 | 279 | ||
362 | of_node_put(np); | 280 | policy->cpuinfo.transition_latency = transition_latency; |
363 | 281 | ||
364 | return 0; | 282 | return 0; |
365 | 283 | ||
@@ -369,12 +287,10 @@ out_free_priv: | |||
369 | kfree(priv); | 287 | kfree(priv); |
370 | out_free_opp: | 288 | out_free_opp: |
371 | dev_pm_opp_of_cpumask_remove_table(policy->cpus); | 289 | dev_pm_opp_of_cpumask_remove_table(policy->cpus); |
372 | out_node_put: | 290 | if (name) |
373 | of_node_put(np); | 291 | dev_pm_opp_put_regulator(cpu_dev); |
374 | out_put_reg_clk: | 292 | out_put_clk: |
375 | clk_put(cpu_clk); | 293 | clk_put(cpu_clk); |
376 | if (!IS_ERR(cpu_reg)) | ||
377 | regulator_put(cpu_reg); | ||
378 | 294 | ||
379 | return ret; | 295 | return ret; |
380 | } | 296 | } |
@@ -386,9 +302,10 @@ static int cpufreq_exit(struct cpufreq_policy *policy) | |||
386 | cpufreq_cooling_unregister(priv->cdev); | 302 | cpufreq_cooling_unregister(priv->cdev); |
387 | dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); | 303 | dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); |
388 | dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); | 304 | dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); |
305 | if (priv->reg_name) | ||
306 | dev_pm_opp_put_regulator(priv->cpu_dev); | ||
307 | |||
389 | clk_put(policy->clk); | 308 | clk_put(policy->clk); |
390 | if (!IS_ERR(priv->cpu_reg)) | ||
391 | regulator_put(priv->cpu_reg); | ||
392 | kfree(priv); | 309 | kfree(priv); |
393 | 310 | ||
394 | return 0; | 311 | return 0; |
@@ -441,9 +358,6 @@ static struct cpufreq_driver dt_cpufreq_driver = { | |||
441 | 358 | ||
442 | static int dt_cpufreq_probe(struct platform_device *pdev) | 359 | static int dt_cpufreq_probe(struct platform_device *pdev) |
443 | { | 360 | { |
444 | struct device *cpu_dev; | ||
445 | struct regulator *cpu_reg; | ||
446 | struct clk *cpu_clk; | ||
447 | int ret; | 361 | int ret; |
448 | 362 | ||
449 | /* | 363 | /* |
@@ -453,19 +367,15 @@ static int dt_cpufreq_probe(struct platform_device *pdev) | |||
453 | * | 367 | * |
454 | * FIXME: Is checking this only for CPU0 sufficient ? | 368 | * FIXME: Is checking this only for CPU0 sufficient ? |
455 | */ | 369 | */ |
456 | ret = allocate_resources(0, &cpu_dev, &cpu_reg, &cpu_clk); | 370 | ret = resources_available(); |
457 | if (ret) | 371 | if (ret) |
458 | return ret; | 372 | return ret; |
459 | 373 | ||
460 | clk_put(cpu_clk); | ||
461 | if (!IS_ERR(cpu_reg)) | ||
462 | regulator_put(cpu_reg); | ||
463 | |||
464 | dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev); | 374 | dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev); |
465 | 375 | ||
466 | ret = cpufreq_register_driver(&dt_cpufreq_driver); | 376 | ret = cpufreq_register_driver(&dt_cpufreq_driver); |
467 | if (ret) | 377 | if (ret) |
468 | dev_err(cpu_dev, "failed register driver: %d\n", ret); | 378 | dev_err(&pdev->dev, "failed register driver: %d\n", ret); |
469 | 379 | ||
470 | return ret; | 380 | return ret; |
471 | } | 381 | } |
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e979ec78b695..4c7825856eab 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c | |||
@@ -38,48 +38,10 @@ static inline bool policy_is_inactive(struct cpufreq_policy *policy) | |||
38 | return cpumask_empty(policy->cpus); | 38 | return cpumask_empty(policy->cpus); |
39 | } | 39 | } |
40 | 40 | ||
41 | static bool suitable_policy(struct cpufreq_policy *policy, bool active) | ||
42 | { | ||
43 | return active == !policy_is_inactive(policy); | ||
44 | } | ||
45 | |||
46 | /* Finds Next Acive/Inactive policy */ | ||
47 | static struct cpufreq_policy *next_policy(struct cpufreq_policy *policy, | ||
48 | bool active) | ||
49 | { | ||
50 | do { | ||
51 | /* No more policies in the list */ | ||
52 | if (list_is_last(&policy->policy_list, &cpufreq_policy_list)) | ||
53 | return NULL; | ||
54 | |||
55 | policy = list_next_entry(policy, policy_list); | ||
56 | } while (!suitable_policy(policy, active)); | ||
57 | |||
58 | return policy; | ||
59 | } | ||
60 | |||
61 | static struct cpufreq_policy *first_policy(bool active) | ||
62 | { | ||
63 | struct cpufreq_policy *policy; | ||
64 | |||
65 | /* No policies in the list */ | ||
66 | if (list_empty(&cpufreq_policy_list)) | ||
67 | return NULL; | ||
68 | |||
69 | policy = list_first_entry(&cpufreq_policy_list, typeof(*policy), | ||
70 | policy_list); | ||
71 | |||
72 | if (!suitable_policy(policy, active)) | ||
73 | policy = next_policy(policy, active); | ||
74 | |||
75 | return policy; | ||
76 | } | ||
77 | |||
78 | /* Macros to iterate over CPU policies */ | 41 | /* Macros to iterate over CPU policies */ |
79 | #define for_each_suitable_policy(__policy, __active) \ | 42 | #define for_each_suitable_policy(__policy, __active) \ |
80 | for (__policy = first_policy(__active); \ | 43 | list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) \ |
81 | __policy; \ | 44 | if ((__active) == !policy_is_inactive(__policy)) |
82 | __policy = next_policy(__policy, __active)) | ||
83 | 45 | ||
84 | #define for_each_active_policy(__policy) \ | 46 | #define for_each_active_policy(__policy) \ |
85 | for_each_suitable_policy(__policy, true) | 47 | for_each_suitable_policy(__policy, true) |
@@ -102,7 +64,6 @@ static LIST_HEAD(cpufreq_governor_list); | |||
102 | static struct cpufreq_driver *cpufreq_driver; | 64 | static struct cpufreq_driver *cpufreq_driver; |
103 | static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); | 65 | static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); |
104 | static DEFINE_RWLOCK(cpufreq_driver_lock); | 66 | static DEFINE_RWLOCK(cpufreq_driver_lock); |
105 | DEFINE_MUTEX(cpufreq_governor_lock); | ||
106 | 67 | ||
107 | /* Flag to suspend/resume CPUFreq governors */ | 68 | /* Flag to suspend/resume CPUFreq governors */ |
108 | static bool cpufreq_suspended; | 69 | static bool cpufreq_suspended; |
@@ -113,10 +74,8 @@ static inline bool has_target(void) | |||
113 | } | 74 | } |
114 | 75 | ||
115 | /* internal prototypes */ | 76 | /* internal prototypes */ |
116 | static int __cpufreq_governor(struct cpufreq_policy *policy, | 77 | static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); |
117 | unsigned int event); | ||
118 | static unsigned int __cpufreq_get(struct cpufreq_policy *policy); | 78 | static unsigned int __cpufreq_get(struct cpufreq_policy *policy); |
119 | static void handle_update(struct work_struct *work); | ||
120 | 79 | ||
121 | /** | 80 | /** |
122 | * Two notifier lists: the "policy" list is involved in the | 81 | * Two notifier lists: the "policy" list is involved in the |
@@ -818,12 +777,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) | |||
818 | ssize_t ret; | 777 | ssize_t ret; |
819 | 778 | ||
820 | down_read(&policy->rwsem); | 779 | down_read(&policy->rwsem); |
821 | 780 | ret = fattr->show(policy, buf); | |
822 | if (fattr->show) | ||
823 | ret = fattr->show(policy, buf); | ||
824 | else | ||
825 | ret = -EIO; | ||
826 | |||
827 | up_read(&policy->rwsem); | 781 | up_read(&policy->rwsem); |
828 | 782 | ||
829 | return ret; | 783 | return ret; |
@@ -838,18 +792,12 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, | |||
838 | 792 | ||
839 | get_online_cpus(); | 793 | get_online_cpus(); |
840 | 794 | ||
841 | if (!cpu_online(policy->cpu)) | 795 | if (cpu_online(policy->cpu)) { |
842 | goto unlock; | 796 | down_write(&policy->rwsem); |
843 | |||
844 | down_write(&policy->rwsem); | ||
845 | |||
846 | if (fattr->store) | ||
847 | ret = fattr->store(policy, buf, count); | 797 | ret = fattr->store(policy, buf, count); |
848 | else | 798 | up_write(&policy->rwsem); |
849 | ret = -EIO; | 799 | } |
850 | 800 | ||
851 | up_write(&policy->rwsem); | ||
852 | unlock: | ||
853 | put_online_cpus(); | 801 | put_online_cpus(); |
854 | 802 | ||
855 | return ret; | 803 | return ret; |
@@ -959,6 +907,11 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) | |||
959 | return cpufreq_add_dev_symlink(policy); | 907 | return cpufreq_add_dev_symlink(policy); |
960 | } | 908 | } |
961 | 909 | ||
910 | __weak struct cpufreq_governor *cpufreq_default_governor(void) | ||
911 | { | ||
912 | return NULL; | ||
913 | } | ||
914 | |||
962 | static int cpufreq_init_policy(struct cpufreq_policy *policy) | 915 | static int cpufreq_init_policy(struct cpufreq_policy *policy) |
963 | { | 916 | { |
964 | struct cpufreq_governor *gov = NULL; | 917 | struct cpufreq_governor *gov = NULL; |
@@ -968,11 +921,14 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) | |||
968 | 921 | ||
969 | /* Update governor of new_policy to the governor used before hotplug */ | 922 | /* Update governor of new_policy to the governor used before hotplug */ |
970 | gov = find_governor(policy->last_governor); | 923 | gov = find_governor(policy->last_governor); |
971 | if (gov) | 924 | if (gov) { |
972 | pr_debug("Restoring governor %s for cpu %d\n", | 925 | pr_debug("Restoring governor %s for cpu %d\n", |
973 | policy->governor->name, policy->cpu); | 926 | policy->governor->name, policy->cpu); |
974 | else | 927 | } else { |
975 | gov = CPUFREQ_DEFAULT_GOVERNOR; | 928 | gov = cpufreq_default_governor(); |
929 | if (!gov) | ||
930 | return -ENODATA; | ||
931 | } | ||
976 | 932 | ||
977 | new_policy.governor = gov; | 933 | new_policy.governor = gov; |
978 | 934 | ||
@@ -996,36 +952,45 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp | |||
996 | if (cpumask_test_cpu(cpu, policy->cpus)) | 952 | if (cpumask_test_cpu(cpu, policy->cpus)) |
997 | return 0; | 953 | return 0; |
998 | 954 | ||
955 | down_write(&policy->rwsem); | ||
999 | if (has_target()) { | 956 | if (has_target()) { |
1000 | ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); | 957 | ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); |
1001 | if (ret) { | 958 | if (ret) { |
1002 | pr_err("%s: Failed to stop governor\n", __func__); | 959 | pr_err("%s: Failed to stop governor\n", __func__); |
1003 | return ret; | 960 | goto unlock; |
1004 | } | 961 | } |
1005 | } | 962 | } |
1006 | 963 | ||
1007 | down_write(&policy->rwsem); | ||
1008 | cpumask_set_cpu(cpu, policy->cpus); | 964 | cpumask_set_cpu(cpu, policy->cpus); |
1009 | up_write(&policy->rwsem); | ||
1010 | 965 | ||
1011 | if (has_target()) { | 966 | if (has_target()) { |
1012 | ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); | 967 | ret = cpufreq_governor(policy, CPUFREQ_GOV_START); |
1013 | if (!ret) | 968 | if (!ret) |
1014 | ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); | 969 | ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); |
1015 | 970 | ||
1016 | if (ret) { | 971 | if (ret) |
1017 | pr_err("%s: Failed to start governor\n", __func__); | 972 | pr_err("%s: Failed to start governor\n", __func__); |
1018 | return ret; | ||
1019 | } | ||
1020 | } | 973 | } |
1021 | 974 | ||
1022 | return 0; | 975 | unlock: |
976 | up_write(&policy->rwsem); | ||
977 | return ret; | ||
978 | } | ||
979 | |||
980 | static void handle_update(struct work_struct *work) | ||
981 | { | ||
982 | struct cpufreq_policy *policy = | ||
983 | container_of(work, struct cpufreq_policy, update); | ||
984 | unsigned int cpu = policy->cpu; | ||
985 | pr_debug("handle_update for cpu %u called\n", cpu); | ||
986 | cpufreq_update_policy(cpu); | ||
1023 | } | 987 | } |
1024 | 988 | ||
1025 | static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) | 989 | static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) |
1026 | { | 990 | { |
1027 | struct device *dev = get_cpu_device(cpu); | 991 | struct device *dev = get_cpu_device(cpu); |
1028 | struct cpufreq_policy *policy; | 992 | struct cpufreq_policy *policy; |
993 | int ret; | ||
1029 | 994 | ||
1030 | if (WARN_ON(!dev)) | 995 | if (WARN_ON(!dev)) |
1031 | return NULL; | 996 | return NULL; |
@@ -1043,7 +1008,13 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) | |||
1043 | if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) | 1008 | if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) |
1044 | goto err_free_rcpumask; | 1009 | goto err_free_rcpumask; |
1045 | 1010 | ||
1046 | kobject_init(&policy->kobj, &ktype_cpufreq); | 1011 | ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, |
1012 | cpufreq_global_kobject, "policy%u", cpu); | ||
1013 | if (ret) { | ||
1014 | pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); | ||
1015 | goto err_free_real_cpus; | ||
1016 | } | ||
1017 | |||
1047 | INIT_LIST_HEAD(&policy->policy_list); | 1018 | INIT_LIST_HEAD(&policy->policy_list); |
1048 | init_rwsem(&policy->rwsem); | 1019 | init_rwsem(&policy->rwsem); |
1049 | spin_lock_init(&policy->transition_lock); | 1020 | spin_lock_init(&policy->transition_lock); |
@@ -1054,6 +1025,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) | |||
1054 | policy->cpu = cpu; | 1025 | policy->cpu = cpu; |
1055 | return policy; | 1026 | return policy; |
1056 | 1027 | ||
1028 | err_free_real_cpus: | ||
1029 | free_cpumask_var(policy->real_cpus); | ||
1057 | err_free_rcpumask: | 1030 | err_free_rcpumask: |
1058 | free_cpumask_var(policy->related_cpus); | 1031 | free_cpumask_var(policy->related_cpus); |
1059 | err_free_cpumask: | 1032 | err_free_cpumask: |
@@ -1158,16 +1131,6 @@ static int cpufreq_online(unsigned int cpu) | |||
1158 | cpumask_copy(policy->related_cpus, policy->cpus); | 1131 | cpumask_copy(policy->related_cpus, policy->cpus); |
1159 | /* Remember CPUs present at the policy creation time. */ | 1132 | /* Remember CPUs present at the policy creation time. */ |
1160 | cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); | 1133 | cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); |
1161 | |||
1162 | /* Name and add the kobject */ | ||
1163 | ret = kobject_add(&policy->kobj, cpufreq_global_kobject, | ||
1164 | "policy%u", | ||
1165 | cpumask_first(policy->related_cpus)); | ||
1166 | if (ret) { | ||
1167 | pr_err("%s: failed to add policy->kobj: %d\n", __func__, | ||
1168 | ret); | ||
1169 | goto out_exit_policy; | ||
1170 | } | ||
1171 | } | 1134 | } |
1172 | 1135 | ||
1173 | /* | 1136 | /* |
@@ -1309,9 +1272,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) | |||
1309 | return ret; | 1272 | return ret; |
1310 | } | 1273 | } |
1311 | 1274 | ||
1312 | static void cpufreq_offline_prepare(unsigned int cpu) | 1275 | static void cpufreq_offline(unsigned int cpu) |
1313 | { | 1276 | { |
1314 | struct cpufreq_policy *policy; | 1277 | struct cpufreq_policy *policy; |
1278 | int ret; | ||
1315 | 1279 | ||
1316 | pr_debug("%s: unregistering CPU %u\n", __func__, cpu); | 1280 | pr_debug("%s: unregistering CPU %u\n", __func__, cpu); |
1317 | 1281 | ||
@@ -1321,13 +1285,13 @@ static void cpufreq_offline_prepare(unsigned int cpu) | |||
1321 | return; | 1285 | return; |
1322 | } | 1286 | } |
1323 | 1287 | ||
1288 | down_write(&policy->rwsem); | ||
1324 | if (has_target()) { | 1289 | if (has_target()) { |
1325 | int ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); | 1290 | ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); |
1326 | if (ret) | 1291 | if (ret) |
1327 | pr_err("%s: Failed to stop governor\n", __func__); | 1292 | pr_err("%s: Failed to stop governor\n", __func__); |
1328 | } | 1293 | } |
1329 | 1294 | ||
1330 | down_write(&policy->rwsem); | ||
1331 | cpumask_clear_cpu(cpu, policy->cpus); | 1295 | cpumask_clear_cpu(cpu, policy->cpus); |
1332 | 1296 | ||
1333 | if (policy_is_inactive(policy)) { | 1297 | if (policy_is_inactive(policy)) { |
@@ -1340,39 +1304,27 @@ static void cpufreq_offline_prepare(unsigned int cpu) | |||
1340 | /* Nominate new CPU */ | 1304 | /* Nominate new CPU */ |
1341 | policy->cpu = cpumask_any(policy->cpus); | 1305 | policy->cpu = cpumask_any(policy->cpus); |
1342 | } | 1306 | } |
1343 | up_write(&policy->rwsem); | ||
1344 | 1307 | ||
1345 | /* Start governor again for active policy */ | 1308 | /* Start governor again for active policy */ |
1346 | if (!policy_is_inactive(policy)) { | 1309 | if (!policy_is_inactive(policy)) { |
1347 | if (has_target()) { | 1310 | if (has_target()) { |
1348 | int ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); | 1311 | ret = cpufreq_governor(policy, CPUFREQ_GOV_START); |
1349 | if (!ret) | 1312 | if (!ret) |
1350 | ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); | 1313 | ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); |
1351 | 1314 | ||
1352 | if (ret) | 1315 | if (ret) |
1353 | pr_err("%s: Failed to start governor\n", __func__); | 1316 | pr_err("%s: Failed to start governor\n", __func__); |
1354 | } | 1317 | } |
1355 | } else if (cpufreq_driver->stop_cpu) { | ||
1356 | cpufreq_driver->stop_cpu(policy); | ||
1357 | } | ||
1358 | } | ||
1359 | 1318 | ||
1360 | static void cpufreq_offline_finish(unsigned int cpu) | 1319 | goto unlock; |
1361 | { | ||
1362 | struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); | ||
1363 | |||
1364 | if (!policy) { | ||
1365 | pr_debug("%s: No cpu_data found\n", __func__); | ||
1366 | return; | ||
1367 | } | 1320 | } |
1368 | 1321 | ||
1369 | /* Only proceed for inactive policies */ | 1322 | if (cpufreq_driver->stop_cpu) |
1370 | if (!policy_is_inactive(policy)) | 1323 | cpufreq_driver->stop_cpu(policy); |
1371 | return; | ||
1372 | 1324 | ||
1373 | /* If cpu is last user of policy, free policy */ | 1325 | /* If cpu is last user of policy, free policy */ |
1374 | if (has_target()) { | 1326 | if (has_target()) { |
1375 | int ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); | 1327 | ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); |
1376 | if (ret) | 1328 | if (ret) |
1377 | pr_err("%s: Failed to exit governor\n", __func__); | 1329 | pr_err("%s: Failed to exit governor\n", __func__); |
1378 | } | 1330 | } |
@@ -1386,6 +1338,9 @@ static void cpufreq_offline_finish(unsigned int cpu) | |||
1386 | cpufreq_driver->exit(policy); | 1338 | cpufreq_driver->exit(policy); |
1387 | policy->freq_table = NULL; | 1339 | policy->freq_table = NULL; |
1388 | } | 1340 | } |
1341 | |||
1342 | unlock: | ||
1343 | up_write(&policy->rwsem); | ||
1389 | } | 1344 | } |
1390 | 1345 | ||
1391 | /** | 1346 | /** |
@@ -1401,10 +1356,8 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) | |||
1401 | if (!policy) | 1356 | if (!policy) |
1402 | return; | 1357 | return; |
1403 | 1358 | ||
1404 | if (cpu_online(cpu)) { | 1359 | if (cpu_online(cpu)) |
1405 | cpufreq_offline_prepare(cpu); | 1360 | cpufreq_offline(cpu); |
1406 | cpufreq_offline_finish(cpu); | ||
1407 | } | ||
1408 | 1361 | ||
1409 | cpumask_clear_cpu(cpu, policy->real_cpus); | 1362 | cpumask_clear_cpu(cpu, policy->real_cpus); |
1410 | remove_cpu_dev_symlink(policy, cpu); | 1363 | remove_cpu_dev_symlink(policy, cpu); |
@@ -1413,15 +1366,6 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) | |||
1413 | cpufreq_policy_free(policy, true); | 1366 | cpufreq_policy_free(policy, true); |
1414 | } | 1367 | } |
1415 | 1368 | ||
1416 | static void handle_update(struct work_struct *work) | ||
1417 | { | ||
1418 | struct cpufreq_policy *policy = | ||
1419 | container_of(work, struct cpufreq_policy, update); | ||
1420 | unsigned int cpu = policy->cpu; | ||
1421 | pr_debug("handle_update for cpu %u called\n", cpu); | ||
1422 | cpufreq_update_policy(cpu); | ||
1423 | } | ||
1424 | |||
1425 | /** | 1369 | /** |
1426 | * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're | 1370 | * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're |
1427 | * in deep trouble. | 1371 | * in deep trouble. |
@@ -1584,6 +1528,7 @@ EXPORT_SYMBOL(cpufreq_generic_suspend); | |||
1584 | void cpufreq_suspend(void) | 1528 | void cpufreq_suspend(void) |
1585 | { | 1529 | { |
1586 | struct cpufreq_policy *policy; | 1530 | struct cpufreq_policy *policy; |
1531 | int ret; | ||
1587 | 1532 | ||
1588 | if (!cpufreq_driver) | 1533 | if (!cpufreq_driver) |
1589 | return; | 1534 | return; |
@@ -1594,7 +1539,11 @@ void cpufreq_suspend(void) | |||
1594 | pr_debug("%s: Suspending Governors\n", __func__); | 1539 | pr_debug("%s: Suspending Governors\n", __func__); |
1595 | 1540 | ||
1596 | for_each_active_policy(policy) { | 1541 | for_each_active_policy(policy) { |
1597 | if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP)) | 1542 | down_write(&policy->rwsem); |
1543 | ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); | ||
1544 | up_write(&policy->rwsem); | ||
1545 | |||
1546 | if (ret) | ||
1598 | pr_err("%s: Failed to stop governor for policy: %p\n", | 1547 | pr_err("%s: Failed to stop governor for policy: %p\n", |
1599 | __func__, policy); | 1548 | __func__, policy); |
1600 | else if (cpufreq_driver->suspend | 1549 | else if (cpufreq_driver->suspend |
@@ -1616,6 +1565,7 @@ suspend: | |||
1616 | void cpufreq_resume(void) | 1565 | void cpufreq_resume(void) |
1617 | { | 1566 | { |
1618 | struct cpufreq_policy *policy; | 1567 | struct cpufreq_policy *policy; |
1568 | int ret; | ||
1619 | 1569 | ||
1620 | if (!cpufreq_driver) | 1570 | if (!cpufreq_driver) |
1621 | return; | 1571 | return; |
@@ -1628,13 +1578,20 @@ void cpufreq_resume(void) | |||
1628 | pr_debug("%s: Resuming Governors\n", __func__); | 1578 | pr_debug("%s: Resuming Governors\n", __func__); |
1629 | 1579 | ||
1630 | for_each_active_policy(policy) { | 1580 | for_each_active_policy(policy) { |
1631 | if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) | 1581 | if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) { |
1632 | pr_err("%s: Failed to resume driver: %p\n", __func__, | 1582 | pr_err("%s: Failed to resume driver: %p\n", __func__, |
1633 | policy); | 1583 | policy); |
1634 | else if (__cpufreq_governor(policy, CPUFREQ_GOV_START) | 1584 | } else { |
1635 | || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS)) | 1585 | down_write(&policy->rwsem); |
1636 | pr_err("%s: Failed to start governor for policy: %p\n", | 1586 | ret = cpufreq_governor(policy, CPUFREQ_GOV_START); |
1637 | __func__, policy); | 1587 | if (!ret) |
1588 | cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); | ||
1589 | up_write(&policy->rwsem); | ||
1590 | |||
1591 | if (ret) | ||
1592 | pr_err("%s: Failed to start governor for policy: %p\n", | ||
1593 | __func__, policy); | ||
1594 | } | ||
1638 | } | 1595 | } |
1639 | 1596 | ||
1640 | /* | 1597 | /* |
@@ -1846,7 +1803,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, | |||
1846 | unsigned int relation) | 1803 | unsigned int relation) |
1847 | { | 1804 | { |
1848 | unsigned int old_target_freq = target_freq; | 1805 | unsigned int old_target_freq = target_freq; |
1849 | int retval = -EINVAL; | 1806 | struct cpufreq_frequency_table *freq_table; |
1807 | int index, retval; | ||
1850 | 1808 | ||
1851 | if (cpufreq_disabled()) | 1809 | if (cpufreq_disabled()) |
1852 | return -ENODEV; | 1810 | return -ENODEV; |
@@ -1873,34 +1831,28 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, | |||
1873 | policy->restore_freq = policy->cur; | 1831 | policy->restore_freq = policy->cur; |
1874 | 1832 | ||
1875 | if (cpufreq_driver->target) | 1833 | if (cpufreq_driver->target) |
1876 | retval = cpufreq_driver->target(policy, target_freq, relation); | 1834 | return cpufreq_driver->target(policy, target_freq, relation); |
1877 | else if (cpufreq_driver->target_index) { | ||
1878 | struct cpufreq_frequency_table *freq_table; | ||
1879 | int index; | ||
1880 | |||
1881 | freq_table = cpufreq_frequency_get_table(policy->cpu); | ||
1882 | if (unlikely(!freq_table)) { | ||
1883 | pr_err("%s: Unable to find freq_table\n", __func__); | ||
1884 | goto out; | ||
1885 | } | ||
1886 | 1835 | ||
1887 | retval = cpufreq_frequency_table_target(policy, freq_table, | 1836 | if (!cpufreq_driver->target_index) |
1888 | target_freq, relation, &index); | 1837 | return -EINVAL; |
1889 | if (unlikely(retval)) { | ||
1890 | pr_err("%s: Unable to find matching freq\n", __func__); | ||
1891 | goto out; | ||
1892 | } | ||
1893 | 1838 | ||
1894 | if (freq_table[index].frequency == policy->cur) { | 1839 | freq_table = cpufreq_frequency_get_table(policy->cpu); |
1895 | retval = 0; | 1840 | if (unlikely(!freq_table)) { |
1896 | goto out; | 1841 | pr_err("%s: Unable to find freq_table\n", __func__); |
1897 | } | 1842 | return -EINVAL; |
1843 | } | ||
1898 | 1844 | ||
1899 | retval = __target_index(policy, freq_table, index); | 1845 | retval = cpufreq_frequency_table_target(policy, freq_table, target_freq, |
1846 | relation, &index); | ||
1847 | if (unlikely(retval)) { | ||
1848 | pr_err("%s: Unable to find matching freq\n", __func__); | ||
1849 | return retval; | ||
1900 | } | 1850 | } |
1901 | 1851 | ||
1902 | out: | 1852 | if (freq_table[index].frequency == policy->cur) |
1903 | return retval; | 1853 | return 0; |
1854 | |||
1855 | return __target_index(policy, freq_table, index); | ||
1904 | } | 1856 | } |
1905 | EXPORT_SYMBOL_GPL(__cpufreq_driver_target); | 1857 | EXPORT_SYMBOL_GPL(__cpufreq_driver_target); |
1906 | 1858 | ||
@@ -1920,20 +1872,14 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, | |||
1920 | } | 1872 | } |
1921 | EXPORT_SYMBOL_GPL(cpufreq_driver_target); | 1873 | EXPORT_SYMBOL_GPL(cpufreq_driver_target); |
1922 | 1874 | ||
1923 | static int __cpufreq_governor(struct cpufreq_policy *policy, | 1875 | __weak struct cpufreq_governor *cpufreq_fallback_governor(void) |
1924 | unsigned int event) | ||
1925 | { | 1876 | { |
1926 | int ret; | 1877 | return NULL; |
1878 | } | ||
1927 | 1879 | ||
1928 | /* Only must be defined when default governor is known to have latency | 1880 | static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) |
1929 | restrictions, like e.g. conservative or ondemand. | 1881 | { |
1930 | That this is the case is already ensured in Kconfig | 1882 | int ret; |
1931 | */ | ||
1932 | #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE | ||
1933 | struct cpufreq_governor *gov = &cpufreq_gov_performance; | ||
1934 | #else | ||
1935 | struct cpufreq_governor *gov = NULL; | ||
1936 | #endif | ||
1937 | 1883 | ||
1938 | /* Don't start any governor operations if we are entering suspend */ | 1884 | /* Don't start any governor operations if we are entering suspend */ |
1939 | if (cpufreq_suspended) | 1885 | if (cpufreq_suspended) |
@@ -1948,12 +1894,14 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, | |||
1948 | if (policy->governor->max_transition_latency && | 1894 | if (policy->governor->max_transition_latency && |
1949 | policy->cpuinfo.transition_latency > | 1895 | policy->cpuinfo.transition_latency > |
1950 | policy->governor->max_transition_latency) { | 1896 | policy->governor->max_transition_latency) { |
1951 | if (!gov) | 1897 | struct cpufreq_governor *gov = cpufreq_fallback_governor(); |
1952 | return -EINVAL; | 1898 | |
1953 | else { | 1899 | if (gov) { |
1954 | pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n", | 1900 | pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n", |
1955 | policy->governor->name, gov->name); | 1901 | policy->governor->name, gov->name); |
1956 | policy->governor = gov; | 1902 | policy->governor = gov; |
1903 | } else { | ||
1904 | return -EINVAL; | ||
1957 | } | 1905 | } |
1958 | } | 1906 | } |
1959 | 1907 | ||
@@ -1963,21 +1911,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, | |||
1963 | 1911 | ||
1964 | pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event); | 1912 | pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event); |
1965 | 1913 | ||
1966 | mutex_lock(&cpufreq_governor_lock); | ||
1967 | if ((policy->governor_enabled && event == CPUFREQ_GOV_START) | ||
1968 | || (!policy->governor_enabled | ||
1969 | && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) { | ||
1970 | mutex_unlock(&cpufreq_governor_lock); | ||
1971 | return -EBUSY; | ||
1972 | } | ||
1973 | |||
1974 | if (event == CPUFREQ_GOV_STOP) | ||
1975 | policy->governor_enabled = false; | ||
1976 | else if (event == CPUFREQ_GOV_START) | ||
1977 | policy->governor_enabled = true; | ||
1978 | |||
1979 | mutex_unlock(&cpufreq_governor_lock); | ||
1980 | |||
1981 | ret = policy->governor->governor(policy, event); | 1914 | ret = policy->governor->governor(policy, event); |
1982 | 1915 | ||
1983 | if (!ret) { | 1916 | if (!ret) { |
@@ -1985,14 +1918,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, | |||
1985 | policy->governor->initialized++; | 1918 | policy->governor->initialized++; |
1986 | else if (event == CPUFREQ_GOV_POLICY_EXIT) | 1919 | else if (event == CPUFREQ_GOV_POLICY_EXIT) |
1987 | policy->governor->initialized--; | 1920 | policy->governor->initialized--; |
1988 | } else { | ||
1989 | /* Restore original values */ | ||
1990 | mutex_lock(&cpufreq_governor_lock); | ||
1991 | if (event == CPUFREQ_GOV_STOP) | ||
1992 | policy->governor_enabled = true; | ||
1993 | else if (event == CPUFREQ_GOV_START) | ||
1994 | policy->governor_enabled = false; | ||
1995 | mutex_unlock(&cpufreq_governor_lock); | ||
1996 | } | 1921 | } |
1997 | 1922 | ||
1998 | if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) || | 1923 | if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) || |
@@ -2147,7 +2072,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, | |||
2147 | old_gov = policy->governor; | 2072 | old_gov = policy->governor; |
2148 | /* end old governor */ | 2073 | /* end old governor */ |
2149 | if (old_gov) { | 2074 | if (old_gov) { |
2150 | ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); | 2075 | ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); |
2151 | if (ret) { | 2076 | if (ret) { |
2152 | /* This can happen due to race with other operations */ | 2077 | /* This can happen due to race with other operations */ |
2153 | pr_debug("%s: Failed to Stop Governor: %s (%d)\n", | 2078 | pr_debug("%s: Failed to Stop Governor: %s (%d)\n", |
@@ -2155,10 +2080,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, | |||
2155 | return ret; | 2080 | return ret; |
2156 | } | 2081 | } |
2157 | 2082 | ||
2158 | up_write(&policy->rwsem); | 2083 | ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); |
2159 | ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); | ||
2160 | down_write(&policy->rwsem); | ||
2161 | |||
2162 | if (ret) { | 2084 | if (ret) { |
2163 | pr_err("%s: Failed to Exit Governor: %s (%d)\n", | 2085 | pr_err("%s: Failed to Exit Governor: %s (%d)\n", |
2164 | __func__, old_gov->name, ret); | 2086 | __func__, old_gov->name, ret); |
@@ -2168,32 +2090,30 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, | |||
2168 | 2090 | ||
2169 | /* start new governor */ | 2091 | /* start new governor */ |
2170 | policy->governor = new_policy->governor; | 2092 | policy->governor = new_policy->governor; |
2171 | ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); | 2093 | ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); |
2172 | if (!ret) { | 2094 | if (!ret) { |
2173 | ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); | 2095 | ret = cpufreq_governor(policy, CPUFREQ_GOV_START); |
2174 | if (!ret) | 2096 | if (!ret) |
2175 | goto out; | 2097 | goto out; |
2176 | 2098 | ||
2177 | up_write(&policy->rwsem); | 2099 | cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); |
2178 | __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); | ||
2179 | down_write(&policy->rwsem); | ||
2180 | } | 2100 | } |
2181 | 2101 | ||
2182 | /* new governor failed, so re-start old one */ | 2102 | /* new governor failed, so re-start old one */ |
2183 | pr_debug("starting governor %s failed\n", policy->governor->name); | 2103 | pr_debug("starting governor %s failed\n", policy->governor->name); |
2184 | if (old_gov) { | 2104 | if (old_gov) { |
2185 | policy->governor = old_gov; | 2105 | policy->governor = old_gov; |
2186 | if (__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) | 2106 | if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) |
2187 | policy->governor = NULL; | 2107 | policy->governor = NULL; |
2188 | else | 2108 | else |
2189 | __cpufreq_governor(policy, CPUFREQ_GOV_START); | 2109 | cpufreq_governor(policy, CPUFREQ_GOV_START); |
2190 | } | 2110 | } |
2191 | 2111 | ||
2192 | return ret; | 2112 | return ret; |
2193 | 2113 | ||
2194 | out: | 2114 | out: |
2195 | pr_debug("governor: change or update limits\n"); | 2115 | pr_debug("governor: change or update limits\n"); |
2196 | return __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); | 2116 | return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); |
2197 | } | 2117 | } |
2198 | 2118 | ||
2199 | /** | 2119 | /** |
@@ -2260,11 +2180,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, | |||
2260 | break; | 2180 | break; |
2261 | 2181 | ||
2262 | case CPU_DOWN_PREPARE: | 2182 | case CPU_DOWN_PREPARE: |
2263 | cpufreq_offline_prepare(cpu); | 2183 | cpufreq_offline(cpu); |
2264 | break; | ||
2265 | |||
2266 | case CPU_POST_DEAD: | ||
2267 | cpufreq_offline_finish(cpu); | ||
2268 | break; | 2184 | break; |
2269 | 2185 | ||
2270 | case CPU_DOWN_FAILED: | 2186 | case CPU_DOWN_FAILED: |
@@ -2297,8 +2213,11 @@ static int cpufreq_boost_set_sw(int state) | |||
2297 | __func__); | 2213 | __func__); |
2298 | break; | 2214 | break; |
2299 | } | 2215 | } |
2216 | |||
2217 | down_write(&policy->rwsem); | ||
2300 | policy->user_policy.max = policy->max; | 2218 | policy->user_policy.max = policy->max; |
2301 | __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); | 2219 | cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); |
2220 | up_write(&policy->rwsem); | ||
2302 | } | 2221 | } |
2303 | } | 2222 | } |
2304 | 2223 | ||
@@ -2384,7 +2303,7 @@ EXPORT_SYMBOL_GPL(cpufreq_boost_enabled); | |||
2384 | * submitted by the CPU Frequency driver. | 2303 | * submitted by the CPU Frequency driver. |
2385 | * | 2304 | * |
2386 | * Registers a CPU Frequency driver to this core code. This code | 2305 | * Registers a CPU Frequency driver to this core code. This code |
2387 | * returns zero on success, -EBUSY when another driver got here first | 2306 | * returns zero on success, -EEXIST when another driver got here first |
2388 | * (and isn't unregistered in the meantime). | 2307 | * (and isn't unregistered in the meantime). |
2389 | * | 2308 | * |
2390 | */ | 2309 | */ |
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 606ad74abe6e..bf4913f6453b 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c | |||
@@ -14,6 +14,22 @@ | |||
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include "cpufreq_governor.h" | 15 | #include "cpufreq_governor.h" |
16 | 16 | ||
17 | struct cs_policy_dbs_info { | ||
18 | struct policy_dbs_info policy_dbs; | ||
19 | unsigned int down_skip; | ||
20 | unsigned int requested_freq; | ||
21 | }; | ||
22 | |||
23 | static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs) | ||
24 | { | ||
25 | return container_of(policy_dbs, struct cs_policy_dbs_info, policy_dbs); | ||
26 | } | ||
27 | |||
28 | struct cs_dbs_tuners { | ||
29 | unsigned int down_threshold; | ||
30 | unsigned int freq_step; | ||
31 | }; | ||
32 | |||
17 | /* Conservative governor macros */ | 33 | /* Conservative governor macros */ |
18 | #define DEF_FREQUENCY_UP_THRESHOLD (80) | 34 | #define DEF_FREQUENCY_UP_THRESHOLD (80) |
19 | #define DEF_FREQUENCY_DOWN_THRESHOLD (20) | 35 | #define DEF_FREQUENCY_DOWN_THRESHOLD (20) |
@@ -21,21 +37,6 @@ | |||
21 | #define DEF_SAMPLING_DOWN_FACTOR (1) | 37 | #define DEF_SAMPLING_DOWN_FACTOR (1) |
22 | #define MAX_SAMPLING_DOWN_FACTOR (10) | 38 | #define MAX_SAMPLING_DOWN_FACTOR (10) |
23 | 39 | ||
24 | static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); | ||
25 | |||
26 | static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, | ||
27 | unsigned int event); | ||
28 | |||
29 | #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE | ||
30 | static | ||
31 | #endif | ||
32 | struct cpufreq_governor cpufreq_gov_conservative = { | ||
33 | .name = "conservative", | ||
34 | .governor = cs_cpufreq_governor_dbs, | ||
35 | .max_transition_latency = TRANSITION_LATENCY_LIMIT, | ||
36 | .owner = THIS_MODULE, | ||
37 | }; | ||
38 | |||
39 | static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, | 40 | static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, |
40 | struct cpufreq_policy *policy) | 41 | struct cpufreq_policy *policy) |
41 | { | 42 | { |
@@ -57,27 +58,28 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, | |||
57 | * Any frequency increase takes it to the maximum frequency. Frequency reduction | 58 | * Any frequency increase takes it to the maximum frequency. Frequency reduction |
58 | * happens at minimum steps of 5% (default) of maximum frequency | 59 | * happens at minimum steps of 5% (default) of maximum frequency |
59 | */ | 60 | */ |
60 | static void cs_check_cpu(int cpu, unsigned int load) | 61 | static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) |
61 | { | 62 | { |
62 | struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); | 63 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
63 | struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; | 64 | struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); |
64 | struct dbs_data *dbs_data = policy->governor_data; | 65 | struct dbs_data *dbs_data = policy_dbs->dbs_data; |
65 | struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; | 66 | struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; |
67 | unsigned int load = dbs_update(policy); | ||
66 | 68 | ||
67 | /* | 69 | /* |
68 | * break out if we 'cannot' reduce the speed as the user might | 70 | * break out if we 'cannot' reduce the speed as the user might |
69 | * want freq_step to be zero | 71 | * want freq_step to be zero |
70 | */ | 72 | */ |
71 | if (cs_tuners->freq_step == 0) | 73 | if (cs_tuners->freq_step == 0) |
72 | return; | 74 | goto out; |
73 | 75 | ||
74 | /* Check for frequency increase */ | 76 | /* Check for frequency increase */ |
75 | if (load > cs_tuners->up_threshold) { | 77 | if (load > dbs_data->up_threshold) { |
76 | dbs_info->down_skip = 0; | 78 | dbs_info->down_skip = 0; |
77 | 79 | ||
78 | /* if we are already at full speed then break out early */ | 80 | /* if we are already at full speed then break out early */ |
79 | if (dbs_info->requested_freq == policy->max) | 81 | if (dbs_info->requested_freq == policy->max) |
80 | return; | 82 | goto out; |
81 | 83 | ||
82 | dbs_info->requested_freq += get_freq_target(cs_tuners, policy); | 84 | dbs_info->requested_freq += get_freq_target(cs_tuners, policy); |
83 | 85 | ||
@@ -86,12 +88,12 @@ static void cs_check_cpu(int cpu, unsigned int load) | |||
86 | 88 | ||
87 | __cpufreq_driver_target(policy, dbs_info->requested_freq, | 89 | __cpufreq_driver_target(policy, dbs_info->requested_freq, |
88 | CPUFREQ_RELATION_H); | 90 | CPUFREQ_RELATION_H); |
89 | return; | 91 | goto out; |
90 | } | 92 | } |
91 | 93 | ||
92 | /* if sampling_down_factor is active break out early */ | 94 | /* if sampling_down_factor is active break out early */ |
93 | if (++dbs_info->down_skip < cs_tuners->sampling_down_factor) | 95 | if (++dbs_info->down_skip < dbs_data->sampling_down_factor) |
94 | return; | 96 | goto out; |
95 | dbs_info->down_skip = 0; | 97 | dbs_info->down_skip = 0; |
96 | 98 | ||
97 | /* Check for frequency decrease */ | 99 | /* Check for frequency decrease */ |
@@ -101,7 +103,7 @@ static void cs_check_cpu(int cpu, unsigned int load) | |||
101 | * if we cannot reduce the frequency anymore, break out early | 103 | * if we cannot reduce the frequency anymore, break out early |
102 | */ | 104 | */ |
103 | if (policy->cur == policy->min) | 105 | if (policy->cur == policy->min) |
104 | return; | 106 | goto out; |
105 | 107 | ||
106 | freq_target = get_freq_target(cs_tuners, policy); | 108 | freq_target = get_freq_target(cs_tuners, policy); |
107 | if (dbs_info->requested_freq > freq_target) | 109 | if (dbs_info->requested_freq > freq_target) |
@@ -111,58 +113,25 @@ static void cs_check_cpu(int cpu, unsigned int load) | |||
111 | 113 | ||
112 | __cpufreq_driver_target(policy, dbs_info->requested_freq, | 114 | __cpufreq_driver_target(policy, dbs_info->requested_freq, |
113 | CPUFREQ_RELATION_L); | 115 | CPUFREQ_RELATION_L); |
114 | return; | ||
115 | } | 116 | } |
116 | } | ||
117 | |||
118 | static unsigned int cs_dbs_timer(struct cpufreq_policy *policy, bool modify_all) | ||
119 | { | ||
120 | struct dbs_data *dbs_data = policy->governor_data; | ||
121 | struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; | ||
122 | |||
123 | if (modify_all) | ||
124 | dbs_check_cpu(dbs_data, policy->cpu); | ||
125 | 117 | ||
126 | return delay_for_sampling_rate(cs_tuners->sampling_rate); | 118 | out: |
119 | return dbs_data->sampling_rate; | ||
127 | } | 120 | } |
128 | 121 | ||
129 | static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | 122 | static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, |
130 | void *data) | 123 | void *data); |
131 | { | ||
132 | struct cpufreq_freqs *freq = data; | ||
133 | struct cs_cpu_dbs_info_s *dbs_info = | ||
134 | &per_cpu(cs_cpu_dbs_info, freq->cpu); | ||
135 | struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); | ||
136 | |||
137 | if (!policy) | ||
138 | return 0; | ||
139 | |||
140 | /* policy isn't governed by conservative governor */ | ||
141 | if (policy->governor != &cpufreq_gov_conservative) | ||
142 | return 0; | ||
143 | |||
144 | /* | ||
145 | * we only care if our internally tracked freq moves outside the 'valid' | ||
146 | * ranges of frequency available to us otherwise we do not change it | ||
147 | */ | ||
148 | if (dbs_info->requested_freq > policy->max | ||
149 | || dbs_info->requested_freq < policy->min) | ||
150 | dbs_info->requested_freq = freq->new; | ||
151 | |||
152 | return 0; | ||
153 | } | ||
154 | 124 | ||
155 | static struct notifier_block cs_cpufreq_notifier_block = { | 125 | static struct notifier_block cs_cpufreq_notifier_block = { |
156 | .notifier_call = dbs_cpufreq_notifier, | 126 | .notifier_call = dbs_cpufreq_notifier, |
157 | }; | 127 | }; |
158 | 128 | ||
159 | /************************** sysfs interface ************************/ | 129 | /************************** sysfs interface ************************/ |
160 | static struct common_dbs_data cs_dbs_cdata; | 130 | static struct dbs_governor cs_dbs_gov; |
161 | 131 | ||
162 | static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, | 132 | static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, |
163 | const char *buf, size_t count) | 133 | const char *buf, size_t count) |
164 | { | 134 | { |
165 | struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; | ||
166 | unsigned int input; | 135 | unsigned int input; |
167 | int ret; | 136 | int ret; |
168 | ret = sscanf(buf, "%u", &input); | 137 | ret = sscanf(buf, "%u", &input); |
@@ -170,22 +139,7 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, | |||
170 | if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) | 139 | if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) |
171 | return -EINVAL; | 140 | return -EINVAL; |
172 | 141 | ||
173 | cs_tuners->sampling_down_factor = input; | 142 | dbs_data->sampling_down_factor = input; |
174 | return count; | ||
175 | } | ||
176 | |||
177 | static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, | ||
178 | size_t count) | ||
179 | { | ||
180 | struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; | ||
181 | unsigned int input; | ||
182 | int ret; | ||
183 | ret = sscanf(buf, "%u", &input); | ||
184 | |||
185 | if (ret != 1) | ||
186 | return -EINVAL; | ||
187 | |||
188 | cs_tuners->sampling_rate = max(input, dbs_data->min_sampling_rate); | ||
189 | return count; | 143 | return count; |
190 | } | 144 | } |
191 | 145 | ||
@@ -200,7 +154,7 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, | |||
200 | if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold) | 154 | if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold) |
201 | return -EINVAL; | 155 | return -EINVAL; |
202 | 156 | ||
203 | cs_tuners->up_threshold = input; | 157 | dbs_data->up_threshold = input; |
204 | return count; | 158 | return count; |
205 | } | 159 | } |
206 | 160 | ||
@@ -214,7 +168,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, | |||
214 | 168 | ||
215 | /* cannot be lower than 11 otherwise freq will not fall */ | 169 | /* cannot be lower than 11 otherwise freq will not fall */ |
216 | if (ret != 1 || input < 11 || input > 100 || | 170 | if (ret != 1 || input < 11 || input > 100 || |
217 | input >= cs_tuners->up_threshold) | 171 | input >= dbs_data->up_threshold) |
218 | return -EINVAL; | 172 | return -EINVAL; |
219 | 173 | ||
220 | cs_tuners->down_threshold = input; | 174 | cs_tuners->down_threshold = input; |
@@ -224,8 +178,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, | |||
224 | static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, | 178 | static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, |
225 | const char *buf, size_t count) | 179 | const char *buf, size_t count) |
226 | { | 180 | { |
227 | struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; | 181 | unsigned int input; |
228 | unsigned int input, j; | ||
229 | int ret; | 182 | int ret; |
230 | 183 | ||
231 | ret = sscanf(buf, "%u", &input); | 184 | ret = sscanf(buf, "%u", &input); |
@@ -235,21 +188,14 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, | |||
235 | if (input > 1) | 188 | if (input > 1) |
236 | input = 1; | 189 | input = 1; |
237 | 190 | ||
238 | if (input == cs_tuners->ignore_nice_load) /* nothing to do */ | 191 | if (input == dbs_data->ignore_nice_load) /* nothing to do */ |
239 | return count; | 192 | return count; |
240 | 193 | ||
241 | cs_tuners->ignore_nice_load = input; | 194 | dbs_data->ignore_nice_load = input; |
242 | 195 | ||
243 | /* we need to re-evaluate prev_cpu_idle */ | 196 | /* we need to re-evaluate prev_cpu_idle */ |
244 | for_each_online_cpu(j) { | 197 | gov_update_cpu_data(dbs_data); |
245 | struct cs_cpu_dbs_info_s *dbs_info; | 198 | |
246 | dbs_info = &per_cpu(cs_cpu_dbs_info, j); | ||
247 | dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, | ||
248 | &dbs_info->cdbs.prev_cpu_wall, 0); | ||
249 | if (cs_tuners->ignore_nice_load) | ||
250 | dbs_info->cdbs.prev_cpu_nice = | ||
251 | kcpustat_cpu(j).cpustat[CPUTIME_NICE]; | ||
252 | } | ||
253 | return count; | 199 | return count; |
254 | } | 200 | } |
255 | 201 | ||
@@ -275,55 +221,47 @@ static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf, | |||
275 | return count; | 221 | return count; |
276 | } | 222 | } |
277 | 223 | ||
278 | show_store_one(cs, sampling_rate); | 224 | gov_show_one_common(sampling_rate); |
279 | show_store_one(cs, sampling_down_factor); | 225 | gov_show_one_common(sampling_down_factor); |
280 | show_store_one(cs, up_threshold); | 226 | gov_show_one_common(up_threshold); |
281 | show_store_one(cs, down_threshold); | 227 | gov_show_one_common(ignore_nice_load); |
282 | show_store_one(cs, ignore_nice_load); | 228 | gov_show_one_common(min_sampling_rate); |
283 | show_store_one(cs, freq_step); | 229 | gov_show_one(cs, down_threshold); |
284 | declare_show_sampling_rate_min(cs); | 230 | gov_show_one(cs, freq_step); |
285 | 231 | ||
286 | gov_sys_pol_attr_rw(sampling_rate); | 232 | gov_attr_rw(sampling_rate); |
287 | gov_sys_pol_attr_rw(sampling_down_factor); | 233 | gov_attr_rw(sampling_down_factor); |
288 | gov_sys_pol_attr_rw(up_threshold); | 234 | gov_attr_rw(up_threshold); |
289 | gov_sys_pol_attr_rw(down_threshold); | 235 | gov_attr_rw(ignore_nice_load); |
290 | gov_sys_pol_attr_rw(ignore_nice_load); | 236 | gov_attr_ro(min_sampling_rate); |
291 | gov_sys_pol_attr_rw(freq_step); | 237 | gov_attr_rw(down_threshold); |
292 | gov_sys_pol_attr_ro(sampling_rate_min); | 238 | gov_attr_rw(freq_step); |
293 | 239 | ||
294 | static struct attribute *dbs_attributes_gov_sys[] = { | 240 | static struct attribute *cs_attributes[] = { |
295 | &sampling_rate_min_gov_sys.attr, | 241 | &min_sampling_rate.attr, |
296 | &sampling_rate_gov_sys.attr, | 242 | &sampling_rate.attr, |
297 | &sampling_down_factor_gov_sys.attr, | 243 | &sampling_down_factor.attr, |
298 | &up_threshold_gov_sys.attr, | 244 | &up_threshold.attr, |
299 | &down_threshold_gov_sys.attr, | 245 | &down_threshold.attr, |
300 | &ignore_nice_load_gov_sys.attr, | 246 | &ignore_nice_load.attr, |
301 | &freq_step_gov_sys.attr, | 247 | &freq_step.attr, |
302 | NULL | 248 | NULL |
303 | }; | 249 | }; |
304 | 250 | ||
305 | static struct attribute_group cs_attr_group_gov_sys = { | 251 | /************************** sysfs end ************************/ |
306 | .attrs = dbs_attributes_gov_sys, | ||
307 | .name = "conservative", | ||
308 | }; | ||
309 | 252 | ||
310 | static struct attribute *dbs_attributes_gov_pol[] = { | 253 | static struct policy_dbs_info *cs_alloc(void) |
311 | &sampling_rate_min_gov_pol.attr, | 254 | { |
312 | &sampling_rate_gov_pol.attr, | 255 | struct cs_policy_dbs_info *dbs_info; |
313 | &sampling_down_factor_gov_pol.attr, | ||
314 | &up_threshold_gov_pol.attr, | ||
315 | &down_threshold_gov_pol.attr, | ||
316 | &ignore_nice_load_gov_pol.attr, | ||
317 | &freq_step_gov_pol.attr, | ||
318 | NULL | ||
319 | }; | ||
320 | 256 | ||
321 | static struct attribute_group cs_attr_group_gov_pol = { | 257 | dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL); |
322 | .attrs = dbs_attributes_gov_pol, | 258 | return dbs_info ? &dbs_info->policy_dbs : NULL; |
323 | .name = "conservative", | 259 | } |
324 | }; | ||
325 | 260 | ||
326 | /************************** sysfs end ************************/ | 261 | static void cs_free(struct policy_dbs_info *policy_dbs) |
262 | { | ||
263 | kfree(to_dbs_info(policy_dbs)); | ||
264 | } | ||
327 | 265 | ||
328 | static int cs_init(struct dbs_data *dbs_data, bool notify) | 266 | static int cs_init(struct dbs_data *dbs_data, bool notify) |
329 | { | 267 | { |
@@ -335,11 +273,11 @@ static int cs_init(struct dbs_data *dbs_data, bool notify) | |||
335 | return -ENOMEM; | 273 | return -ENOMEM; |
336 | } | 274 | } |
337 | 275 | ||
338 | tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; | ||
339 | tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD; | 276 | tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD; |
340 | tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; | ||
341 | tuners->ignore_nice_load = 0; | ||
342 | tuners->freq_step = DEF_FREQUENCY_STEP; | 277 | tuners->freq_step = DEF_FREQUENCY_STEP; |
278 | dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; | ||
279 | dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; | ||
280 | dbs_data->ignore_nice_load = 0; | ||
343 | 281 | ||
344 | dbs_data->tuners = tuners; | 282 | dbs_data->tuners = tuners; |
345 | dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * | 283 | dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * |
@@ -361,35 +299,66 @@ static void cs_exit(struct dbs_data *dbs_data, bool notify) | |||
361 | kfree(dbs_data->tuners); | 299 | kfree(dbs_data->tuners); |
362 | } | 300 | } |
363 | 301 | ||
364 | define_get_cpu_dbs_routines(cs_cpu_dbs_info); | 302 | static void cs_start(struct cpufreq_policy *policy) |
303 | { | ||
304 | struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); | ||
305 | |||
306 | dbs_info->down_skip = 0; | ||
307 | dbs_info->requested_freq = policy->cur; | ||
308 | } | ||
365 | 309 | ||
366 | static struct common_dbs_data cs_dbs_cdata = { | 310 | static struct dbs_governor cs_dbs_gov = { |
367 | .governor = GOV_CONSERVATIVE, | 311 | .gov = { |
368 | .attr_group_gov_sys = &cs_attr_group_gov_sys, | 312 | .name = "conservative", |
369 | .attr_group_gov_pol = &cs_attr_group_gov_pol, | 313 | .governor = cpufreq_governor_dbs, |
370 | .get_cpu_cdbs = get_cpu_cdbs, | 314 | .max_transition_latency = TRANSITION_LATENCY_LIMIT, |
371 | .get_cpu_dbs_info_s = get_cpu_dbs_info_s, | 315 | .owner = THIS_MODULE, |
316 | }, | ||
317 | .kobj_type = { .default_attrs = cs_attributes }, | ||
372 | .gov_dbs_timer = cs_dbs_timer, | 318 | .gov_dbs_timer = cs_dbs_timer, |
373 | .gov_check_cpu = cs_check_cpu, | 319 | .alloc = cs_alloc, |
320 | .free = cs_free, | ||
374 | .init = cs_init, | 321 | .init = cs_init, |
375 | .exit = cs_exit, | 322 | .exit = cs_exit, |
376 | .mutex = __MUTEX_INITIALIZER(cs_dbs_cdata.mutex), | 323 | .start = cs_start, |
377 | }; | 324 | }; |
378 | 325 | ||
379 | static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, | 326 | #define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov) |
380 | unsigned int event) | 327 | |
328 | static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | ||
329 | void *data) | ||
381 | { | 330 | { |
382 | return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event); | 331 | struct cpufreq_freqs *freq = data; |
332 | struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); | ||
333 | struct cs_policy_dbs_info *dbs_info; | ||
334 | |||
335 | if (!policy) | ||
336 | return 0; | ||
337 | |||
338 | /* policy isn't governed by conservative governor */ | ||
339 | if (policy->governor != CPU_FREQ_GOV_CONSERVATIVE) | ||
340 | return 0; | ||
341 | |||
342 | dbs_info = to_dbs_info(policy->governor_data); | ||
343 | /* | ||
344 | * we only care if our internally tracked freq moves outside the 'valid' | ||
345 | * ranges of frequency available to us otherwise we do not change it | ||
346 | */ | ||
347 | if (dbs_info->requested_freq > policy->max | ||
348 | || dbs_info->requested_freq < policy->min) | ||
349 | dbs_info->requested_freq = freq->new; | ||
350 | |||
351 | return 0; | ||
383 | } | 352 | } |
384 | 353 | ||
385 | static int __init cpufreq_gov_dbs_init(void) | 354 | static int __init cpufreq_gov_dbs_init(void) |
386 | { | 355 | { |
387 | return cpufreq_register_governor(&cpufreq_gov_conservative); | 356 | return cpufreq_register_governor(CPU_FREQ_GOV_CONSERVATIVE); |
388 | } | 357 | } |
389 | 358 | ||
390 | static void __exit cpufreq_gov_dbs_exit(void) | 359 | static void __exit cpufreq_gov_dbs_exit(void) |
391 | { | 360 | { |
392 | cpufreq_unregister_governor(&cpufreq_gov_conservative); | 361 | cpufreq_unregister_governor(CPU_FREQ_GOV_CONSERVATIVE); |
393 | } | 362 | } |
394 | 363 | ||
395 | MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>"); | 364 | MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>"); |
@@ -399,6 +368,11 @@ MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " | |||
399 | MODULE_LICENSE("GPL"); | 368 | MODULE_LICENSE("GPL"); |
400 | 369 | ||
401 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE | 370 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE |
371 | struct cpufreq_governor *cpufreq_default_governor(void) | ||
372 | { | ||
373 | return CPU_FREQ_GOV_CONSERVATIVE; | ||
374 | } | ||
375 | |||
402 | fs_initcall(cpufreq_gov_dbs_init); | 376 | fs_initcall(cpufreq_gov_dbs_init); |
403 | #else | 377 | #else |
404 | module_init(cpufreq_gov_dbs_init); | 378 | module_init(cpufreq_gov_dbs_init); |
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index e0d111024d48..1c25ef405616 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c | |||
@@ -18,95 +18,193 @@ | |||
18 | 18 | ||
19 | #include <linux/export.h> | 19 | #include <linux/export.h> |
20 | #include <linux/kernel_stat.h> | 20 | #include <linux/kernel_stat.h> |
21 | #include <linux/sched.h> | ||
21 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
22 | 23 | ||
23 | #include "cpufreq_governor.h" | 24 | #include "cpufreq_governor.h" |
24 | 25 | ||
25 | static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) | 26 | static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); |
26 | { | 27 | |
27 | if (have_governor_per_policy()) | 28 | static DEFINE_MUTEX(gov_dbs_data_mutex); |
28 | return dbs_data->cdata->attr_group_gov_pol; | ||
29 | else | ||
30 | return dbs_data->cdata->attr_group_gov_sys; | ||
31 | } | ||
32 | 29 | ||
33 | void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) | 30 | /* Common sysfs tunables */ |
31 | /** | ||
32 | * store_sampling_rate - update sampling rate effective immediately if needed. | ||
33 | * | ||
34 | * If new rate is smaller than the old, simply updating | ||
35 | * dbs.sampling_rate might not be appropriate. For example, if the | ||
36 | * original sampling_rate was 1 second and the requested new sampling rate is 10 | ||
37 | * ms because the user needs immediate reaction from ondemand governor, but not | ||
38 | * sure if higher frequency will be required or not, then, the governor may | ||
39 | * change the sampling rate too late; up to 1 second later. Thus, if we are | ||
40 | * reducing the sampling rate, we need to make the new value effective | ||
41 | * immediately. | ||
42 | * | ||
43 | * This must be called with dbs_data->mutex held, otherwise traversing | ||
44 | * policy_dbs_list isn't safe. | ||
45 | */ | ||
46 | ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, | ||
47 | size_t count) | ||
34 | { | 48 | { |
35 | struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); | 49 | struct policy_dbs_info *policy_dbs; |
36 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; | 50 | unsigned int rate; |
37 | struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; | 51 | int ret; |
38 | struct cpufreq_policy *policy = cdbs->shared->policy; | 52 | ret = sscanf(buf, "%u", &rate); |
39 | unsigned int sampling_rate; | 53 | if (ret != 1) |
40 | unsigned int max_load = 0; | 54 | return -EINVAL; |
41 | unsigned int ignore_nice; | ||
42 | unsigned int j; | ||
43 | 55 | ||
44 | if (dbs_data->cdata->governor == GOV_ONDEMAND) { | 56 | dbs_data->sampling_rate = max(rate, dbs_data->min_sampling_rate); |
45 | struct od_cpu_dbs_info_s *od_dbs_info = | ||
46 | dbs_data->cdata->get_cpu_dbs_info_s(cpu); | ||
47 | 57 | ||
58 | /* | ||
59 | * We are operating under dbs_data->mutex and so the list and its | ||
60 | * entries can't be freed concurrently. | ||
61 | */ | ||
62 | list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { | ||
63 | mutex_lock(&policy_dbs->timer_mutex); | ||
48 | /* | 64 | /* |
49 | * Sometimes, the ondemand governor uses an additional | 65 | * On 32-bit architectures this may race with the |
50 | * multiplier to give long delays. So apply this multiplier to | 66 | * sample_delay_ns read in dbs_update_util_handler(), but that |
51 | * the 'sampling_rate', so as to keep the wake-up-from-idle | 67 | * really doesn't matter. If the read returns a value that's |
52 | * detection logic a bit conservative. | 68 | * too big, the sample will be skipped, but the next invocation |
69 | * of dbs_update_util_handler() (when the update has been | ||
70 | * completed) will take a sample. | ||
71 | * | ||
72 | * If this runs in parallel with dbs_work_handler(), we may end | ||
73 | * up overwriting the sample_delay_ns value that it has just | ||
74 | * written, but it will be corrected next time a sample is | ||
75 | * taken, so it shouldn't be significant. | ||
53 | */ | 76 | */ |
54 | sampling_rate = od_tuners->sampling_rate; | 77 | gov_update_sample_delay(policy_dbs, 0); |
55 | sampling_rate *= od_dbs_info->rate_mult; | 78 | mutex_unlock(&policy_dbs->timer_mutex); |
79 | } | ||
56 | 80 | ||
57 | ignore_nice = od_tuners->ignore_nice_load; | 81 | return count; |
58 | } else { | 82 | } |
59 | sampling_rate = cs_tuners->sampling_rate; | 83 | EXPORT_SYMBOL_GPL(store_sampling_rate); |
60 | ignore_nice = cs_tuners->ignore_nice_load; | 84 | |
85 | /** | ||
86 | * gov_update_cpu_data - Update CPU load data. | ||
87 | * @dbs_data: Top-level governor data pointer. | ||
88 | * | ||
89 | * Update CPU load data for all CPUs in the domain governed by @dbs_data | ||
90 | * (that may be a single policy or a bunch of them if governor tunables are | ||
91 | * system-wide). | ||
92 | * | ||
93 | * Call under the @dbs_data mutex. | ||
94 | */ | ||
95 | void gov_update_cpu_data(struct dbs_data *dbs_data) | ||
96 | { | ||
97 | struct policy_dbs_info *policy_dbs; | ||
98 | |||
99 | list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { | ||
100 | unsigned int j; | ||
101 | |||
102 | for_each_cpu(j, policy_dbs->policy->cpus) { | ||
103 | struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); | ||
104 | |||
105 | j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, | ||
106 | dbs_data->io_is_busy); | ||
107 | if (dbs_data->ignore_nice_load) | ||
108 | j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; | ||
109 | } | ||
61 | } | 110 | } |
111 | } | ||
112 | EXPORT_SYMBOL_GPL(gov_update_cpu_data); | ||
113 | |||
114 | static inline struct dbs_data *to_dbs_data(struct kobject *kobj) | ||
115 | { | ||
116 | return container_of(kobj, struct dbs_data, kobj); | ||
117 | } | ||
118 | |||
119 | static inline struct governor_attr *to_gov_attr(struct attribute *attr) | ||
120 | { | ||
121 | return container_of(attr, struct governor_attr, attr); | ||
122 | } | ||
123 | |||
124 | static ssize_t governor_show(struct kobject *kobj, struct attribute *attr, | ||
125 | char *buf) | ||
126 | { | ||
127 | struct dbs_data *dbs_data = to_dbs_data(kobj); | ||
128 | struct governor_attr *gattr = to_gov_attr(attr); | ||
129 | |||
130 | return gattr->show(dbs_data, buf); | ||
131 | } | ||
132 | |||
133 | static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, | ||
134 | const char *buf, size_t count) | ||
135 | { | ||
136 | struct dbs_data *dbs_data = to_dbs_data(kobj); | ||
137 | struct governor_attr *gattr = to_gov_attr(attr); | ||
138 | int ret = -EBUSY; | ||
139 | |||
140 | mutex_lock(&dbs_data->mutex); | ||
141 | |||
142 | if (dbs_data->usage_count) | ||
143 | ret = gattr->store(dbs_data, buf, count); | ||
144 | |||
145 | mutex_unlock(&dbs_data->mutex); | ||
146 | |||
147 | return ret; | ||
148 | } | ||
149 | |||
150 | /* | ||
151 | * Sysfs Ops for accessing governor attributes. | ||
152 | * | ||
153 | * All show/store invocations for governor specific sysfs attributes, will first | ||
154 | * call the below show/store callbacks and the attribute specific callback will | ||
155 | * be called from within it. | ||
156 | */ | ||
157 | static const struct sysfs_ops governor_sysfs_ops = { | ||
158 | .show = governor_show, | ||
159 | .store = governor_store, | ||
160 | }; | ||
161 | |||
162 | unsigned int dbs_update(struct cpufreq_policy *policy) | ||
163 | { | ||
164 | struct policy_dbs_info *policy_dbs = policy->governor_data; | ||
165 | struct dbs_data *dbs_data = policy_dbs->dbs_data; | ||
166 | unsigned int ignore_nice = dbs_data->ignore_nice_load; | ||
167 | unsigned int max_load = 0; | ||
168 | unsigned int sampling_rate, io_busy, j; | ||
169 | |||
170 | /* | ||
171 | * Sometimes governors may use an additional multiplier to increase | ||
172 | * sample delays temporarily. Apply that multiplier to sampling_rate | ||
173 | * so as to keep the wake-up-from-idle detection logic a bit | ||
174 | * conservative. | ||
175 | */ | ||
176 | sampling_rate = dbs_data->sampling_rate * policy_dbs->rate_mult; | ||
177 | /* | ||
178 | * For the purpose of ondemand, waiting for disk IO is an indication | ||
179 | * that you're performance critical, and not that the system is actually | ||
180 | * idle, so do not add the iowait time to the CPU idle time then. | ||
181 | */ | ||
182 | io_busy = dbs_data->io_is_busy; | ||
62 | 183 | ||
63 | /* Get Absolute Load */ | 184 | /* Get Absolute Load */ |
64 | for_each_cpu(j, policy->cpus) { | 185 | for_each_cpu(j, policy->cpus) { |
65 | struct cpu_dbs_info *j_cdbs; | 186 | struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); |
66 | u64 cur_wall_time, cur_idle_time; | 187 | u64 cur_wall_time, cur_idle_time; |
67 | unsigned int idle_time, wall_time; | 188 | unsigned int idle_time, wall_time; |
68 | unsigned int load; | 189 | unsigned int load; |
69 | int io_busy = 0; | ||
70 | |||
71 | j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); | ||
72 | 190 | ||
73 | /* | ||
74 | * For the purpose of ondemand, waiting for disk IO is | ||
75 | * an indication that you're performance critical, and | ||
76 | * not that the system is actually idle. So do not add | ||
77 | * the iowait time to the cpu idle time. | ||
78 | */ | ||
79 | if (dbs_data->cdata->governor == GOV_ONDEMAND) | ||
80 | io_busy = od_tuners->io_is_busy; | ||
81 | cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); | 191 | cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); |
82 | 192 | ||
83 | wall_time = (unsigned int) | 193 | wall_time = cur_wall_time - j_cdbs->prev_cpu_wall; |
84 | (cur_wall_time - j_cdbs->prev_cpu_wall); | ||
85 | j_cdbs->prev_cpu_wall = cur_wall_time; | 194 | j_cdbs->prev_cpu_wall = cur_wall_time; |
86 | 195 | ||
87 | if (cur_idle_time < j_cdbs->prev_cpu_idle) | 196 | if (cur_idle_time <= j_cdbs->prev_cpu_idle) { |
88 | cur_idle_time = j_cdbs->prev_cpu_idle; | 197 | idle_time = 0; |
89 | 198 | } else { | |
90 | idle_time = (unsigned int) | 199 | idle_time = cur_idle_time - j_cdbs->prev_cpu_idle; |
91 | (cur_idle_time - j_cdbs->prev_cpu_idle); | 200 | j_cdbs->prev_cpu_idle = cur_idle_time; |
92 | j_cdbs->prev_cpu_idle = cur_idle_time; | 201 | } |
93 | 202 | ||
94 | if (ignore_nice) { | 203 | if (ignore_nice) { |
95 | u64 cur_nice; | 204 | u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; |
96 | unsigned long cur_nice_jiffies; | ||
97 | |||
98 | cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - | ||
99 | cdbs->prev_cpu_nice; | ||
100 | /* | ||
101 | * Assumption: nice time between sampling periods will | ||
102 | * be less than 2^32 jiffies for 32 bit sys | ||
103 | */ | ||
104 | cur_nice_jiffies = (unsigned long) | ||
105 | cputime64_to_jiffies64(cur_nice); | ||
106 | 205 | ||
107 | cdbs->prev_cpu_nice = | 206 | idle_time += cputime_to_usecs(cur_nice - j_cdbs->prev_cpu_nice); |
108 | kcpustat_cpu(j).cpustat[CPUTIME_NICE]; | 207 | j_cdbs->prev_cpu_nice = cur_nice; |
109 | idle_time += jiffies_to_usecs(cur_nice_jiffies); | ||
110 | } | 208 | } |
111 | 209 | ||
112 | if (unlikely(!wall_time || wall_time < idle_time)) | 210 | if (unlikely(!wall_time || wall_time < idle_time)) |
@@ -128,10 +226,10 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) | |||
128 | * dropped down. So we perform the copy only once, upon the | 226 | * dropped down. So we perform the copy only once, upon the |
129 | * first wake-up from idle.) | 227 | * first wake-up from idle.) |
130 | * | 228 | * |
131 | * Detecting this situation is easy: the governor's deferrable | 229 | * Detecting this situation is easy: the governor's utilization |
132 | * timer would not have fired during CPU-idle periods. Hence | 230 | * update handler would not have run during CPU-idle periods. |
133 | * an unusually large 'wall_time' (as compared to the sampling | 231 | * Hence, an unusually large 'wall_time' (as compared to the |
134 | * rate) indicates this scenario. | 232 | * sampling rate) indicates this scenario. |
135 | * | 233 | * |
136 | * prev_load can be zero in two cases and we must recalculate it | 234 | * prev_load can be zero in two cases and we must recalculate it |
137 | * for both cases: | 235 | * for both cases: |
@@ -156,222 +254,224 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) | |||
156 | if (load > max_load) | 254 | if (load > max_load) |
157 | max_load = load; | 255 | max_load = load; |
158 | } | 256 | } |
159 | 257 | return max_load; | |
160 | dbs_data->cdata->gov_check_cpu(cpu, max_load); | ||
161 | } | 258 | } |
162 | EXPORT_SYMBOL_GPL(dbs_check_cpu); | 259 | EXPORT_SYMBOL_GPL(dbs_update); |
163 | 260 | ||
164 | void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay) | 261 | static void gov_set_update_util(struct policy_dbs_info *policy_dbs, |
262 | unsigned int delay_us) | ||
165 | { | 263 | { |
166 | struct dbs_data *dbs_data = policy->governor_data; | 264 | struct cpufreq_policy *policy = policy_dbs->policy; |
167 | struct cpu_dbs_info *cdbs; | ||
168 | int cpu; | 265 | int cpu; |
169 | 266 | ||
267 | gov_update_sample_delay(policy_dbs, delay_us); | ||
268 | policy_dbs->last_sample_time = 0; | ||
269 | |||
170 | for_each_cpu(cpu, policy->cpus) { | 270 | for_each_cpu(cpu, policy->cpus) { |
171 | cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); | 271 | struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu); |
172 | cdbs->timer.expires = jiffies + delay; | 272 | |
173 | add_timer_on(&cdbs->timer, cpu); | 273 | cpufreq_set_update_util_data(cpu, &cdbs->update_util); |
174 | } | 274 | } |
175 | } | 275 | } |
176 | EXPORT_SYMBOL_GPL(gov_add_timers); | ||
177 | 276 | ||
178 | static inline void gov_cancel_timers(struct cpufreq_policy *policy) | 277 | static inline void gov_clear_update_util(struct cpufreq_policy *policy) |
179 | { | 278 | { |
180 | struct dbs_data *dbs_data = policy->governor_data; | ||
181 | struct cpu_dbs_info *cdbs; | ||
182 | int i; | 279 | int i; |
183 | 280 | ||
184 | for_each_cpu(i, policy->cpus) { | 281 | for_each_cpu(i, policy->cpus) |
185 | cdbs = dbs_data->cdata->get_cpu_cdbs(i); | 282 | cpufreq_set_update_util_data(i, NULL); |
186 | del_timer_sync(&cdbs->timer); | ||
187 | } | ||
188 | } | ||
189 | 283 | ||
190 | void gov_cancel_work(struct cpu_common_dbs_info *shared) | 284 | synchronize_sched(); |
191 | { | ||
192 | /* Tell dbs_timer_handler() to skip queuing up work items. */ | ||
193 | atomic_inc(&shared->skip_work); | ||
194 | /* | ||
195 | * If dbs_timer_handler() is already running, it may not notice the | ||
196 | * incremented skip_work, so wait for it to complete to prevent its work | ||
197 | * item from being queued up after the cancel_work_sync() below. | ||
198 | */ | ||
199 | gov_cancel_timers(shared->policy); | ||
200 | /* | ||
201 | * In case dbs_timer_handler() managed to run and spawn a work item | ||
202 | * before the timers have been canceled, wait for that work item to | ||
203 | * complete and then cancel all of the timers set up by it. If | ||
204 | * dbs_timer_handler() runs again at that point, it will see the | ||
205 | * positive value of skip_work and won't spawn any more work items. | ||
206 | */ | ||
207 | cancel_work_sync(&shared->work); | ||
208 | gov_cancel_timers(shared->policy); | ||
209 | atomic_set(&shared->skip_work, 0); | ||
210 | } | 285 | } |
211 | EXPORT_SYMBOL_GPL(gov_cancel_work); | ||
212 | 286 | ||
213 | /* Will return if we need to evaluate cpu load again or not */ | 287 | static void gov_cancel_work(struct cpufreq_policy *policy) |
214 | static bool need_load_eval(struct cpu_common_dbs_info *shared, | ||
215 | unsigned int sampling_rate) | ||
216 | { | 288 | { |
217 | if (policy_is_shared(shared->policy)) { | 289 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
218 | ktime_t time_now = ktime_get(); | ||
219 | s64 delta_us = ktime_us_delta(time_now, shared->time_stamp); | ||
220 | |||
221 | /* Do nothing if we recently have sampled */ | ||
222 | if (delta_us < (s64)(sampling_rate / 2)) | ||
223 | return false; | ||
224 | else | ||
225 | shared->time_stamp = time_now; | ||
226 | } | ||
227 | 290 | ||
228 | return true; | 291 | gov_clear_update_util(policy_dbs->policy); |
292 | irq_work_sync(&policy_dbs->irq_work); | ||
293 | cancel_work_sync(&policy_dbs->work); | ||
294 | atomic_set(&policy_dbs->work_count, 0); | ||
295 | policy_dbs->work_in_progress = false; | ||
229 | } | 296 | } |
230 | 297 | ||
231 | static void dbs_work_handler(struct work_struct *work) | 298 | static void dbs_work_handler(struct work_struct *work) |
232 | { | 299 | { |
233 | struct cpu_common_dbs_info *shared = container_of(work, struct | 300 | struct policy_dbs_info *policy_dbs; |
234 | cpu_common_dbs_info, work); | ||
235 | struct cpufreq_policy *policy; | 301 | struct cpufreq_policy *policy; |
236 | struct dbs_data *dbs_data; | 302 | struct dbs_governor *gov; |
237 | unsigned int sampling_rate, delay; | ||
238 | bool eval_load; | ||
239 | |||
240 | policy = shared->policy; | ||
241 | dbs_data = policy->governor_data; | ||
242 | 303 | ||
243 | /* Kill all timers */ | 304 | policy_dbs = container_of(work, struct policy_dbs_info, work); |
244 | gov_cancel_timers(policy); | 305 | policy = policy_dbs->policy; |
306 | gov = dbs_governor_of(policy); | ||
245 | 307 | ||
246 | if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { | 308 | /* |
247 | struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; | 309 | * Make sure cpufreq_governor_limits() isn't evaluating load or the |
248 | 310 | * ondemand governor isn't updating the sampling rate in parallel. | |
249 | sampling_rate = cs_tuners->sampling_rate; | 311 | */ |
250 | } else { | 312 | mutex_lock(&policy_dbs->timer_mutex); |
251 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; | 313 | gov_update_sample_delay(policy_dbs, gov->gov_dbs_timer(policy)); |
252 | 314 | mutex_unlock(&policy_dbs->timer_mutex); | |
253 | sampling_rate = od_tuners->sampling_rate; | ||
254 | } | ||
255 | |||
256 | eval_load = need_load_eval(shared, sampling_rate); | ||
257 | 315 | ||
316 | /* Allow the utilization update handler to queue up more work. */ | ||
317 | atomic_set(&policy_dbs->work_count, 0); | ||
258 | /* | 318 | /* |
259 | * Make sure cpufreq_governor_limits() isn't evaluating load in | 319 | * If the update below is reordered with respect to the sample delay |
260 | * parallel. | 320 | * modification, the utilization update handler may end up using a stale |
321 | * sample delay value. | ||
261 | */ | 322 | */ |
262 | mutex_lock(&shared->timer_mutex); | 323 | smp_wmb(); |
263 | delay = dbs_data->cdata->gov_dbs_timer(policy, eval_load); | 324 | policy_dbs->work_in_progress = false; |
264 | mutex_unlock(&shared->timer_mutex); | 325 | } |
265 | 326 | ||
266 | atomic_dec(&shared->skip_work); | 327 | static void dbs_irq_work(struct irq_work *irq_work) |
328 | { | ||
329 | struct policy_dbs_info *policy_dbs; | ||
267 | 330 | ||
268 | gov_add_timers(policy, delay); | 331 | policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work); |
332 | schedule_work(&policy_dbs->work); | ||
269 | } | 333 | } |
270 | 334 | ||
271 | static void dbs_timer_handler(unsigned long data) | 335 | static void dbs_update_util_handler(struct update_util_data *data, u64 time, |
336 | unsigned long util, unsigned long max) | ||
272 | { | 337 | { |
273 | struct cpu_dbs_info *cdbs = (struct cpu_dbs_info *)data; | 338 | struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); |
274 | struct cpu_common_dbs_info *shared = cdbs->shared; | 339 | struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; |
340 | u64 delta_ns, lst; | ||
275 | 341 | ||
276 | /* | 342 | /* |
277 | * Timer handler may not be allowed to queue the work at the moment, | 343 | * The work may not be allowed to be queued up right now. |
278 | * because: | 344 | * Possible reasons: |
279 | * - Another timer handler has done that | 345 | * - Work has already been queued up or is in progress. |
280 | * - We are stopping the governor | 346 | * - It is too early (too little time from the previous sample). |
281 | * - Or we are updating the sampling rate of the ondemand governor | ||
282 | */ | 347 | */ |
283 | if (atomic_inc_return(&shared->skip_work) > 1) | 348 | if (policy_dbs->work_in_progress) |
284 | atomic_dec(&shared->skip_work); | 349 | return; |
285 | else | ||
286 | queue_work(system_wq, &shared->work); | ||
287 | } | ||
288 | 350 | ||
289 | static void set_sampling_rate(struct dbs_data *dbs_data, | 351 | /* |
290 | unsigned int sampling_rate) | 352 | * If the reads below are reordered before the check above, the value |
291 | { | 353 | * of sample_delay_ns used in the computation may be stale. |
292 | if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { | 354 | */ |
293 | struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; | 355 | smp_rmb(); |
294 | cs_tuners->sampling_rate = sampling_rate; | 356 | lst = READ_ONCE(policy_dbs->last_sample_time); |
295 | } else { | 357 | delta_ns = time - lst; |
296 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; | 358 | if ((s64)delta_ns < policy_dbs->sample_delay_ns) |
297 | od_tuners->sampling_rate = sampling_rate; | 359 | return; |
360 | |||
361 | /* | ||
362 | * If the policy is not shared, the irq_work may be queued up right away | ||
363 | * at this point. Otherwise, we need to ensure that only one of the | ||
364 | * CPUs sharing the policy will do that. | ||
365 | */ | ||
366 | if (policy_dbs->is_shared) { | ||
367 | if (!atomic_add_unless(&policy_dbs->work_count, 1, 1)) | ||
368 | return; | ||
369 | |||
370 | /* | ||
371 | * If another CPU updated last_sample_time in the meantime, we | ||
372 | * shouldn't be here, so clear the work counter and bail out. | ||
373 | */ | ||
374 | if (unlikely(lst != READ_ONCE(policy_dbs->last_sample_time))) { | ||
375 | atomic_set(&policy_dbs->work_count, 0); | ||
376 | return; | ||
377 | } | ||
298 | } | 378 | } |
379 | |||
380 | policy_dbs->last_sample_time = time; | ||
381 | policy_dbs->work_in_progress = true; | ||
382 | irq_work_queue(&policy_dbs->irq_work); | ||
299 | } | 383 | } |
300 | 384 | ||
301 | static int alloc_common_dbs_info(struct cpufreq_policy *policy, | 385 | static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, |
302 | struct common_dbs_data *cdata) | 386 | struct dbs_governor *gov) |
303 | { | 387 | { |
304 | struct cpu_common_dbs_info *shared; | 388 | struct policy_dbs_info *policy_dbs; |
305 | int j; | 389 | int j; |
306 | 390 | ||
307 | /* Allocate memory for the common information for policy->cpus */ | 391 | /* Allocate memory for per-policy governor data. */ |
308 | shared = kzalloc(sizeof(*shared), GFP_KERNEL); | 392 | policy_dbs = gov->alloc(); |
309 | if (!shared) | 393 | if (!policy_dbs) |
310 | return -ENOMEM; | 394 | return NULL; |
311 | 395 | ||
312 | /* Set shared for all CPUs, online+offline */ | 396 | policy_dbs->policy = policy; |
313 | for_each_cpu(j, policy->related_cpus) | 397 | mutex_init(&policy_dbs->timer_mutex); |
314 | cdata->get_cpu_cdbs(j)->shared = shared; | 398 | atomic_set(&policy_dbs->work_count, 0); |
399 | init_irq_work(&policy_dbs->irq_work, dbs_irq_work); | ||
400 | INIT_WORK(&policy_dbs->work, dbs_work_handler); | ||
315 | 401 | ||
316 | mutex_init(&shared->timer_mutex); | 402 | /* Set policy_dbs for all CPUs, online+offline */ |
317 | atomic_set(&shared->skip_work, 0); | 403 | for_each_cpu(j, policy->related_cpus) { |
318 | INIT_WORK(&shared->work, dbs_work_handler); | 404 | struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); |
319 | return 0; | 405 | |
406 | j_cdbs->policy_dbs = policy_dbs; | ||
407 | j_cdbs->update_util.func = dbs_update_util_handler; | ||
408 | } | ||
409 | return policy_dbs; | ||
320 | } | 410 | } |
321 | 411 | ||
322 | static void free_common_dbs_info(struct cpufreq_policy *policy, | 412 | static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs, |
323 | struct common_dbs_data *cdata) | 413 | struct dbs_governor *gov) |
324 | { | 414 | { |
325 | struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); | ||
326 | struct cpu_common_dbs_info *shared = cdbs->shared; | ||
327 | int j; | 415 | int j; |
328 | 416 | ||
329 | mutex_destroy(&shared->timer_mutex); | 417 | mutex_destroy(&policy_dbs->timer_mutex); |
330 | 418 | ||
331 | for_each_cpu(j, policy->cpus) | 419 | for_each_cpu(j, policy_dbs->policy->related_cpus) { |
332 | cdata->get_cpu_cdbs(j)->shared = NULL; | 420 | struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); |
333 | 421 | ||
334 | kfree(shared); | 422 | j_cdbs->policy_dbs = NULL; |
423 | j_cdbs->update_util.func = NULL; | ||
424 | } | ||
425 | gov->free(policy_dbs); | ||
335 | } | 426 | } |
336 | 427 | ||
337 | static int cpufreq_governor_init(struct cpufreq_policy *policy, | 428 | static int cpufreq_governor_init(struct cpufreq_policy *policy) |
338 | struct dbs_data *dbs_data, | ||
339 | struct common_dbs_data *cdata) | ||
340 | { | 429 | { |
430 | struct dbs_governor *gov = dbs_governor_of(policy); | ||
431 | struct dbs_data *dbs_data; | ||
432 | struct policy_dbs_info *policy_dbs; | ||
341 | unsigned int latency; | 433 | unsigned int latency; |
342 | int ret; | 434 | int ret = 0; |
343 | 435 | ||
344 | /* State should be equivalent to EXIT */ | 436 | /* State should be equivalent to EXIT */ |
345 | if (policy->governor_data) | 437 | if (policy->governor_data) |
346 | return -EBUSY; | 438 | return -EBUSY; |
347 | 439 | ||
348 | if (dbs_data) { | 440 | policy_dbs = alloc_policy_dbs_info(policy, gov); |
349 | if (WARN_ON(have_governor_per_policy())) | 441 | if (!policy_dbs) |
350 | return -EINVAL; | 442 | return -ENOMEM; |
351 | 443 | ||
352 | ret = alloc_common_dbs_info(policy, cdata); | 444 | /* Protect gov->gdbs_data against concurrent updates. */ |
353 | if (ret) | 445 | mutex_lock(&gov_dbs_data_mutex); |
354 | return ret; | ||
355 | 446 | ||
447 | dbs_data = gov->gdbs_data; | ||
448 | if (dbs_data) { | ||
449 | if (WARN_ON(have_governor_per_policy())) { | ||
450 | ret = -EINVAL; | ||
451 | goto free_policy_dbs_info; | ||
452 | } | ||
453 | policy_dbs->dbs_data = dbs_data; | ||
454 | policy->governor_data = policy_dbs; | ||
455 | |||
456 | mutex_lock(&dbs_data->mutex); | ||
356 | dbs_data->usage_count++; | 457 | dbs_data->usage_count++; |
357 | policy->governor_data = dbs_data; | 458 | list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); |
358 | return 0; | 459 | mutex_unlock(&dbs_data->mutex); |
460 | goto out; | ||
359 | } | 461 | } |
360 | 462 | ||
361 | dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); | 463 | dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); |
362 | if (!dbs_data) | 464 | if (!dbs_data) { |
363 | return -ENOMEM; | 465 | ret = -ENOMEM; |
364 | 466 | goto free_policy_dbs_info; | |
365 | ret = alloc_common_dbs_info(policy, cdata); | 467 | } |
366 | if (ret) | ||
367 | goto free_dbs_data; | ||
368 | 468 | ||
369 | dbs_data->cdata = cdata; | 469 | INIT_LIST_HEAD(&dbs_data->policy_dbs_list); |
370 | dbs_data->usage_count = 1; | 470 | mutex_init(&dbs_data->mutex); |
371 | 471 | ||
372 | ret = cdata->init(dbs_data, !policy->governor->initialized); | 472 | ret = gov->init(dbs_data, !policy->governor->initialized); |
373 | if (ret) | 473 | if (ret) |
374 | goto free_common_dbs_info; | 474 | goto free_policy_dbs_info; |
375 | 475 | ||
376 | /* policy latency is in ns. Convert it to us first */ | 476 | /* policy latency is in ns. Convert it to us first */ |
377 | latency = policy->cpuinfo.transition_latency / 1000; | 477 | latency = policy->cpuinfo.transition_latency / 1000; |
@@ -381,216 +481,156 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, | |||
381 | /* Bring kernel and HW constraints together */ | 481 | /* Bring kernel and HW constraints together */ |
382 | dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, | 482 | dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, |
383 | MIN_LATENCY_MULTIPLIER * latency); | 483 | MIN_LATENCY_MULTIPLIER * latency); |
384 | set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, | 484 | dbs_data->sampling_rate = max(dbs_data->min_sampling_rate, |
385 | latency * LATENCY_MULTIPLIER)); | 485 | LATENCY_MULTIPLIER * latency); |
386 | 486 | ||
387 | if (!have_governor_per_policy()) | 487 | if (!have_governor_per_policy()) |
388 | cdata->gdbs_data = dbs_data; | 488 | gov->gdbs_data = dbs_data; |
389 | 489 | ||
390 | policy->governor_data = dbs_data; | 490 | policy->governor_data = policy_dbs; |
391 | 491 | ||
392 | ret = sysfs_create_group(get_governor_parent_kobj(policy), | 492 | policy_dbs->dbs_data = dbs_data; |
393 | get_sysfs_attr(dbs_data)); | 493 | dbs_data->usage_count = 1; |
394 | if (ret) | 494 | list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); |
395 | goto reset_gdbs_data; | ||
396 | 495 | ||
397 | return 0; | 496 | gov->kobj_type.sysfs_ops = &governor_sysfs_ops; |
497 | ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, | ||
498 | get_governor_parent_kobj(policy), | ||
499 | "%s", gov->gov.name); | ||
500 | if (!ret) | ||
501 | goto out; | ||
502 | |||
503 | /* Failure, so roll back. */ | ||
504 | pr_err("cpufreq: Governor initialization failed (dbs_data kobject init error %d)\n", ret); | ||
398 | 505 | ||
399 | reset_gdbs_data: | ||
400 | policy->governor_data = NULL; | 506 | policy->governor_data = NULL; |
401 | 507 | ||
402 | if (!have_governor_per_policy()) | 508 | if (!have_governor_per_policy()) |
403 | cdata->gdbs_data = NULL; | 509 | gov->gdbs_data = NULL; |
404 | cdata->exit(dbs_data, !policy->governor->initialized); | 510 | gov->exit(dbs_data, !policy->governor->initialized); |
405 | free_common_dbs_info: | ||
406 | free_common_dbs_info(policy, cdata); | ||
407 | free_dbs_data: | ||
408 | kfree(dbs_data); | 511 | kfree(dbs_data); |
512 | |||
513 | free_policy_dbs_info: | ||
514 | free_policy_dbs_info(policy_dbs, gov); | ||
515 | |||
516 | out: | ||
517 | mutex_unlock(&gov_dbs_data_mutex); | ||
409 | return ret; | 518 | return ret; |
410 | } | 519 | } |
411 | 520 | ||
412 | static int cpufreq_governor_exit(struct cpufreq_policy *policy, | 521 | static int cpufreq_governor_exit(struct cpufreq_policy *policy) |
413 | struct dbs_data *dbs_data) | ||
414 | { | 522 | { |
415 | struct common_dbs_data *cdata = dbs_data->cdata; | 523 | struct dbs_governor *gov = dbs_governor_of(policy); |
416 | struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); | 524 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
525 | struct dbs_data *dbs_data = policy_dbs->dbs_data; | ||
526 | int count; | ||
417 | 527 | ||
418 | /* State should be equivalent to INIT */ | 528 | /* Protect gov->gdbs_data against concurrent updates. */ |
419 | if (!cdbs->shared || cdbs->shared->policy) | 529 | mutex_lock(&gov_dbs_data_mutex); |
420 | return -EBUSY; | 530 | |
531 | mutex_lock(&dbs_data->mutex); | ||
532 | list_del(&policy_dbs->list); | ||
533 | count = --dbs_data->usage_count; | ||
534 | mutex_unlock(&dbs_data->mutex); | ||
421 | 535 | ||
422 | if (!--dbs_data->usage_count) { | 536 | if (!count) { |
423 | sysfs_remove_group(get_governor_parent_kobj(policy), | 537 | kobject_put(&dbs_data->kobj); |
424 | get_sysfs_attr(dbs_data)); | ||
425 | 538 | ||
426 | policy->governor_data = NULL; | 539 | policy->governor_data = NULL; |
427 | 540 | ||
428 | if (!have_governor_per_policy()) | 541 | if (!have_governor_per_policy()) |
429 | cdata->gdbs_data = NULL; | 542 | gov->gdbs_data = NULL; |
430 | 543 | ||
431 | cdata->exit(dbs_data, policy->governor->initialized == 1); | 544 | gov->exit(dbs_data, policy->governor->initialized == 1); |
545 | mutex_destroy(&dbs_data->mutex); | ||
432 | kfree(dbs_data); | 546 | kfree(dbs_data); |
433 | } else { | 547 | } else { |
434 | policy->governor_data = NULL; | 548 | policy->governor_data = NULL; |
435 | } | 549 | } |
436 | 550 | ||
437 | free_common_dbs_info(policy, cdata); | 551 | free_policy_dbs_info(policy_dbs, gov); |
552 | |||
553 | mutex_unlock(&gov_dbs_data_mutex); | ||
438 | return 0; | 554 | return 0; |
439 | } | 555 | } |
440 | 556 | ||
441 | static int cpufreq_governor_start(struct cpufreq_policy *policy, | 557 | static int cpufreq_governor_start(struct cpufreq_policy *policy) |
442 | struct dbs_data *dbs_data) | ||
443 | { | 558 | { |
444 | struct common_dbs_data *cdata = dbs_data->cdata; | 559 | struct dbs_governor *gov = dbs_governor_of(policy); |
445 | unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; | 560 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
446 | struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); | 561 | struct dbs_data *dbs_data = policy_dbs->dbs_data; |
447 | struct cpu_common_dbs_info *shared = cdbs->shared; | 562 | unsigned int sampling_rate, ignore_nice, j; |
448 | int io_busy = 0; | 563 | unsigned int io_busy; |
449 | 564 | ||
450 | if (!policy->cur) | 565 | if (!policy->cur) |
451 | return -EINVAL; | 566 | return -EINVAL; |
452 | 567 | ||
453 | /* State should be equivalent to INIT */ | 568 | policy_dbs->is_shared = policy_is_shared(policy); |
454 | if (!shared || shared->policy) | 569 | policy_dbs->rate_mult = 1; |
455 | return -EBUSY; | ||
456 | 570 | ||
457 | if (cdata->governor == GOV_CONSERVATIVE) { | 571 | sampling_rate = dbs_data->sampling_rate; |
458 | struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; | 572 | ignore_nice = dbs_data->ignore_nice_load; |
459 | 573 | io_busy = dbs_data->io_is_busy; | |
460 | sampling_rate = cs_tuners->sampling_rate; | ||
461 | ignore_nice = cs_tuners->ignore_nice_load; | ||
462 | } else { | ||
463 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; | ||
464 | |||
465 | sampling_rate = od_tuners->sampling_rate; | ||
466 | ignore_nice = od_tuners->ignore_nice_load; | ||
467 | io_busy = od_tuners->io_is_busy; | ||
468 | } | ||
469 | |||
470 | shared->policy = policy; | ||
471 | shared->time_stamp = ktime_get(); | ||
472 | 574 | ||
473 | for_each_cpu(j, policy->cpus) { | 575 | for_each_cpu(j, policy->cpus) { |
474 | struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); | 576 | struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); |
475 | unsigned int prev_load; | 577 | unsigned int prev_load; |
476 | 578 | ||
477 | j_cdbs->prev_cpu_idle = | 579 | j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); |
478 | get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); | ||
479 | 580 | ||
480 | prev_load = (unsigned int)(j_cdbs->prev_cpu_wall - | 581 | prev_load = j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle; |
481 | j_cdbs->prev_cpu_idle); | 582 | j_cdbs->prev_load = 100 * prev_load / (unsigned int)j_cdbs->prev_cpu_wall; |
482 | j_cdbs->prev_load = 100 * prev_load / | ||
483 | (unsigned int)j_cdbs->prev_cpu_wall; | ||
484 | 583 | ||
485 | if (ignore_nice) | 584 | if (ignore_nice) |
486 | j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; | 585 | j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; |
487 | |||
488 | __setup_timer(&j_cdbs->timer, dbs_timer_handler, | ||
489 | (unsigned long)j_cdbs, | ||
490 | TIMER_DEFERRABLE | TIMER_IRQSAFE); | ||
491 | } | 586 | } |
492 | 587 | ||
493 | if (cdata->governor == GOV_CONSERVATIVE) { | 588 | gov->start(policy); |
494 | struct cs_cpu_dbs_info_s *cs_dbs_info = | ||
495 | cdata->get_cpu_dbs_info_s(cpu); | ||
496 | |||
497 | cs_dbs_info->down_skip = 0; | ||
498 | cs_dbs_info->requested_freq = policy->cur; | ||
499 | } else { | ||
500 | struct od_ops *od_ops = cdata->gov_ops; | ||
501 | struct od_cpu_dbs_info_s *od_dbs_info = cdata->get_cpu_dbs_info_s(cpu); | ||
502 | |||
503 | od_dbs_info->rate_mult = 1; | ||
504 | od_dbs_info->sample_type = OD_NORMAL_SAMPLE; | ||
505 | od_ops->powersave_bias_init_cpu(cpu); | ||
506 | } | ||
507 | 589 | ||
508 | gov_add_timers(policy, delay_for_sampling_rate(sampling_rate)); | 590 | gov_set_update_util(policy_dbs, sampling_rate); |
509 | return 0; | 591 | return 0; |
510 | } | 592 | } |
511 | 593 | ||
512 | static int cpufreq_governor_stop(struct cpufreq_policy *policy, | 594 | static int cpufreq_governor_stop(struct cpufreq_policy *policy) |
513 | struct dbs_data *dbs_data) | ||
514 | { | 595 | { |
515 | struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu); | 596 | gov_cancel_work(policy); |
516 | struct cpu_common_dbs_info *shared = cdbs->shared; | ||
517 | |||
518 | /* State should be equivalent to START */ | ||
519 | if (!shared || !shared->policy) | ||
520 | return -EBUSY; | ||
521 | |||
522 | gov_cancel_work(shared); | ||
523 | shared->policy = NULL; | ||
524 | |||
525 | return 0; | 597 | return 0; |
526 | } | 598 | } |
527 | 599 | ||
528 | static int cpufreq_governor_limits(struct cpufreq_policy *policy, | 600 | static int cpufreq_governor_limits(struct cpufreq_policy *policy) |
529 | struct dbs_data *dbs_data) | ||
530 | { | 601 | { |
531 | struct common_dbs_data *cdata = dbs_data->cdata; | 602 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
532 | unsigned int cpu = policy->cpu; | ||
533 | struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); | ||
534 | 603 | ||
535 | /* State should be equivalent to START */ | 604 | mutex_lock(&policy_dbs->timer_mutex); |
536 | if (!cdbs->shared || !cdbs->shared->policy) | 605 | |
537 | return -EBUSY; | 606 | if (policy->max < policy->cur) |
607 | __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); | ||
608 | else if (policy->min > policy->cur) | ||
609 | __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); | ||
610 | |||
611 | gov_update_sample_delay(policy_dbs, 0); | ||
538 | 612 | ||
539 | mutex_lock(&cdbs->shared->timer_mutex); | 613 | mutex_unlock(&policy_dbs->timer_mutex); |
540 | if (policy->max < cdbs->shared->policy->cur) | ||
541 | __cpufreq_driver_target(cdbs->shared->policy, policy->max, | ||
542 | CPUFREQ_RELATION_H); | ||
543 | else if (policy->min > cdbs->shared->policy->cur) | ||
544 | __cpufreq_driver_target(cdbs->shared->policy, policy->min, | ||
545 | CPUFREQ_RELATION_L); | ||
546 | dbs_check_cpu(dbs_data, cpu); | ||
547 | mutex_unlock(&cdbs->shared->timer_mutex); | ||
548 | 614 | ||
549 | return 0; | 615 | return 0; |
550 | } | 616 | } |
551 | 617 | ||
552 | int cpufreq_governor_dbs(struct cpufreq_policy *policy, | 618 | int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) |
553 | struct common_dbs_data *cdata, unsigned int event) | ||
554 | { | 619 | { |
555 | struct dbs_data *dbs_data; | 620 | if (event == CPUFREQ_GOV_POLICY_INIT) { |
556 | int ret; | 621 | return cpufreq_governor_init(policy); |
557 | 622 | } else if (policy->governor_data) { | |
558 | /* Lock governor to block concurrent initialization of governor */ | 623 | switch (event) { |
559 | mutex_lock(&cdata->mutex); | 624 | case CPUFREQ_GOV_POLICY_EXIT: |
560 | 625 | return cpufreq_governor_exit(policy); | |
561 | if (have_governor_per_policy()) | 626 | case CPUFREQ_GOV_START: |
562 | dbs_data = policy->governor_data; | 627 | return cpufreq_governor_start(policy); |
563 | else | 628 | case CPUFREQ_GOV_STOP: |
564 | dbs_data = cdata->gdbs_data; | 629 | return cpufreq_governor_stop(policy); |
565 | 630 | case CPUFREQ_GOV_LIMITS: | |
566 | if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) { | 631 | return cpufreq_governor_limits(policy); |
567 | ret = -EINVAL; | 632 | } |
568 | goto unlock; | ||
569 | } | ||
570 | |||
571 | switch (event) { | ||
572 | case CPUFREQ_GOV_POLICY_INIT: | ||
573 | ret = cpufreq_governor_init(policy, dbs_data, cdata); | ||
574 | break; | ||
575 | case CPUFREQ_GOV_POLICY_EXIT: | ||
576 | ret = cpufreq_governor_exit(policy, dbs_data); | ||
577 | break; | ||
578 | case CPUFREQ_GOV_START: | ||
579 | ret = cpufreq_governor_start(policy, dbs_data); | ||
580 | break; | ||
581 | case CPUFREQ_GOV_STOP: | ||
582 | ret = cpufreq_governor_stop(policy, dbs_data); | ||
583 | break; | ||
584 | case CPUFREQ_GOV_LIMITS: | ||
585 | ret = cpufreq_governor_limits(policy, dbs_data); | ||
586 | break; | ||
587 | default: | ||
588 | ret = -EINVAL; | ||
589 | } | 633 | } |
590 | 634 | return -EINVAL; | |
591 | unlock: | ||
592 | mutex_unlock(&cdata->mutex); | ||
593 | |||
594 | return ret; | ||
595 | } | 635 | } |
596 | EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); | 636 | EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); |
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 91e767a058a7..61ff82fe0613 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h | |||
@@ -18,6 +18,7 @@ | |||
18 | #define _CPUFREQ_GOVERNOR_H | 18 | #define _CPUFREQ_GOVERNOR_H |
19 | 19 | ||
20 | #include <linux/atomic.h> | 20 | #include <linux/atomic.h> |
21 | #include <linux/irq_work.h> | ||
21 | #include <linux/cpufreq.h> | 22 | #include <linux/cpufreq.h> |
22 | #include <linux/kernel_stat.h> | 23 | #include <linux/kernel_stat.h> |
23 | #include <linux/module.h> | 24 | #include <linux/module.h> |
@@ -41,96 +42,68 @@ | |||
41 | enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; | 42 | enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; |
42 | 43 | ||
43 | /* | 44 | /* |
44 | * Macro for creating governors sysfs routines | 45 | * Abbreviations: |
45 | * | 46 | * dbs: used as a shortform for demand based switching It helps to keep variable |
46 | * - gov_sys: One governor instance per whole system | 47 | * names smaller, simpler |
47 | * - gov_pol: One governor instance per policy | 48 | * cdbs: common dbs |
49 | * od_*: On-demand governor | ||
50 | * cs_*: Conservative governor | ||
48 | */ | 51 | */ |
49 | 52 | ||
50 | /* Create attributes */ | 53 | /* Governor demand based switching data (per-policy or global). */ |
51 | #define gov_sys_attr_ro(_name) \ | 54 | struct dbs_data { |
52 | static struct global_attr _name##_gov_sys = \ | 55 | int usage_count; |
53 | __ATTR(_name, 0444, show_##_name##_gov_sys, NULL) | 56 | void *tuners; |
54 | 57 | unsigned int min_sampling_rate; | |
55 | #define gov_sys_attr_rw(_name) \ | 58 | unsigned int ignore_nice_load; |
56 | static struct global_attr _name##_gov_sys = \ | 59 | unsigned int sampling_rate; |
57 | __ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) | 60 | unsigned int sampling_down_factor; |
58 | 61 | unsigned int up_threshold; | |
59 | #define gov_pol_attr_ro(_name) \ | 62 | unsigned int io_is_busy; |
60 | static struct freq_attr _name##_gov_pol = \ | ||
61 | __ATTR(_name, 0444, show_##_name##_gov_pol, NULL) | ||
62 | |||
63 | #define gov_pol_attr_rw(_name) \ | ||
64 | static struct freq_attr _name##_gov_pol = \ | ||
65 | __ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) | ||
66 | 63 | ||
67 | #define gov_sys_pol_attr_rw(_name) \ | 64 | struct kobject kobj; |
68 | gov_sys_attr_rw(_name); \ | 65 | struct list_head policy_dbs_list; |
69 | gov_pol_attr_rw(_name) | 66 | /* |
67 | * Protect concurrent updates to governor tunables from sysfs, | ||
68 | * policy_dbs_list and usage_count. | ||
69 | */ | ||
70 | struct mutex mutex; | ||
71 | }; | ||
70 | 72 | ||
71 | #define gov_sys_pol_attr_ro(_name) \ | 73 | /* Governor's specific attributes */ |
72 | gov_sys_attr_ro(_name); \ | 74 | struct dbs_data; |
73 | gov_pol_attr_ro(_name) | 75 | struct governor_attr { |
76 | struct attribute attr; | ||
77 | ssize_t (*show)(struct dbs_data *dbs_data, char *buf); | ||
78 | ssize_t (*store)(struct dbs_data *dbs_data, const char *buf, | ||
79 | size_t count); | ||
80 | }; | ||
74 | 81 | ||
75 | /* Create show/store routines */ | 82 | #define gov_show_one(_gov, file_name) \ |
76 | #define show_one(_gov, file_name) \ | 83 | static ssize_t show_##file_name \ |
77 | static ssize_t show_##file_name##_gov_sys \ | 84 | (struct dbs_data *dbs_data, char *buf) \ |
78 | (struct kobject *kobj, struct attribute *attr, char *buf) \ | ||
79 | { \ | 85 | { \ |
80 | struct _gov##_dbs_tuners *tuners = _gov##_dbs_cdata.gdbs_data->tuners; \ | ||
81 | return sprintf(buf, "%u\n", tuners->file_name); \ | ||
82 | } \ | ||
83 | \ | ||
84 | static ssize_t show_##file_name##_gov_pol \ | ||
85 | (struct cpufreq_policy *policy, char *buf) \ | ||
86 | { \ | ||
87 | struct dbs_data *dbs_data = policy->governor_data; \ | ||
88 | struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ | 86 | struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ |
89 | return sprintf(buf, "%u\n", tuners->file_name); \ | 87 | return sprintf(buf, "%u\n", tuners->file_name); \ |
90 | } | 88 | } |
91 | 89 | ||
92 | #define store_one(_gov, file_name) \ | 90 | #define gov_show_one_common(file_name) \ |
93 | static ssize_t store_##file_name##_gov_sys \ | 91 | static ssize_t show_##file_name \ |
94 | (struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) \ | 92 | (struct dbs_data *dbs_data, char *buf) \ |
95 | { \ | ||
96 | struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ | ||
97 | return store_##file_name(dbs_data, buf, count); \ | ||
98 | } \ | ||
99 | \ | ||
100 | static ssize_t store_##file_name##_gov_pol \ | ||
101 | (struct cpufreq_policy *policy, const char *buf, size_t count) \ | ||
102 | { \ | 93 | { \ |
103 | struct dbs_data *dbs_data = policy->governor_data; \ | 94 | return sprintf(buf, "%u\n", dbs_data->file_name); \ |
104 | return store_##file_name(dbs_data, buf, count); \ | ||
105 | } | 95 | } |
106 | 96 | ||
107 | #define show_store_one(_gov, file_name) \ | 97 | #define gov_attr_ro(_name) \ |
108 | show_one(_gov, file_name); \ | 98 | static struct governor_attr _name = \ |
109 | store_one(_gov, file_name) | 99 | __ATTR(_name, 0444, show_##_name, NULL) |
110 | 100 | ||
111 | /* create helper routines */ | 101 | #define gov_attr_rw(_name) \ |
112 | #define define_get_cpu_dbs_routines(_dbs_info) \ | 102 | static struct governor_attr _name = \ |
113 | static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ | 103 | __ATTR(_name, 0644, show_##_name, store_##_name) |
114 | { \ | ||
115 | return &per_cpu(_dbs_info, cpu).cdbs; \ | ||
116 | } \ | ||
117 | \ | ||
118 | static void *get_cpu_dbs_info_s(int cpu) \ | ||
119 | { \ | ||
120 | return &per_cpu(_dbs_info, cpu); \ | ||
121 | } | ||
122 | |||
123 | /* | ||
124 | * Abbreviations: | ||
125 | * dbs: used as a shortform for demand based switching It helps to keep variable | ||
126 | * names smaller, simpler | ||
127 | * cdbs: common dbs | ||
128 | * od_*: On-demand governor | ||
129 | * cs_*: Conservative governor | ||
130 | */ | ||
131 | 104 | ||
132 | /* Common to all CPUs of a policy */ | 105 | /* Common to all CPUs of a policy */ |
133 | struct cpu_common_dbs_info { | 106 | struct policy_dbs_info { |
134 | struct cpufreq_policy *policy; | 107 | struct cpufreq_policy *policy; |
135 | /* | 108 | /* |
136 | * Per policy mutex that serializes load evaluation from limit-change | 109 | * Per policy mutex that serializes load evaluation from limit-change |
@@ -138,11 +111,27 @@ struct cpu_common_dbs_info { | |||
138 | */ | 111 | */ |
139 | struct mutex timer_mutex; | 112 | struct mutex timer_mutex; |
140 | 113 | ||
141 | ktime_t time_stamp; | 114 | u64 last_sample_time; |
142 | atomic_t skip_work; | 115 | s64 sample_delay_ns; |
116 | atomic_t work_count; | ||
117 | struct irq_work irq_work; | ||
143 | struct work_struct work; | 118 | struct work_struct work; |
119 | /* dbs_data may be shared between multiple policy objects */ | ||
120 | struct dbs_data *dbs_data; | ||
121 | struct list_head list; | ||
122 | /* Multiplier for increasing sample delay temporarily. */ | ||
123 | unsigned int rate_mult; | ||
124 | /* Status indicators */ | ||
125 | bool is_shared; /* This object is used by multiple CPUs */ | ||
126 | bool work_in_progress; /* Work is being queued up or in progress */ | ||
144 | }; | 127 | }; |
145 | 128 | ||
129 | static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, | ||
130 | unsigned int delay_us) | ||
131 | { | ||
132 | policy_dbs->sample_delay_ns = delay_us * NSEC_PER_USEC; | ||
133 | } | ||
134 | |||
146 | /* Per cpu structures */ | 135 | /* Per cpu structures */ |
147 | struct cpu_dbs_info { | 136 | struct cpu_dbs_info { |
148 | u64 prev_cpu_idle; | 137 | u64 prev_cpu_idle; |
@@ -155,54 +144,14 @@ struct cpu_dbs_info { | |||
155 | * wake-up from idle. | 144 | * wake-up from idle. |
156 | */ | 145 | */ |
157 | unsigned int prev_load; | 146 | unsigned int prev_load; |
158 | struct timer_list timer; | 147 | struct update_util_data update_util; |
159 | struct cpu_common_dbs_info *shared; | 148 | struct policy_dbs_info *policy_dbs; |
160 | }; | ||
161 | |||
162 | struct od_cpu_dbs_info_s { | ||
163 | struct cpu_dbs_info cdbs; | ||
164 | struct cpufreq_frequency_table *freq_table; | ||
165 | unsigned int freq_lo; | ||
166 | unsigned int freq_lo_jiffies; | ||
167 | unsigned int freq_hi_jiffies; | ||
168 | unsigned int rate_mult; | ||
169 | unsigned int sample_type:1; | ||
170 | }; | ||
171 | |||
172 | struct cs_cpu_dbs_info_s { | ||
173 | struct cpu_dbs_info cdbs; | ||
174 | unsigned int down_skip; | ||
175 | unsigned int requested_freq; | ||
176 | }; | ||
177 | |||
178 | /* Per policy Governors sysfs tunables */ | ||
179 | struct od_dbs_tuners { | ||
180 | unsigned int ignore_nice_load; | ||
181 | unsigned int sampling_rate; | ||
182 | unsigned int sampling_down_factor; | ||
183 | unsigned int up_threshold; | ||
184 | unsigned int powersave_bias; | ||
185 | unsigned int io_is_busy; | ||
186 | }; | ||
187 | |||
188 | struct cs_dbs_tuners { | ||
189 | unsigned int ignore_nice_load; | ||
190 | unsigned int sampling_rate; | ||
191 | unsigned int sampling_down_factor; | ||
192 | unsigned int up_threshold; | ||
193 | unsigned int down_threshold; | ||
194 | unsigned int freq_step; | ||
195 | }; | 149 | }; |
196 | 150 | ||
197 | /* Common Governor data across policies */ | 151 | /* Common Governor data across policies */ |
198 | struct dbs_data; | 152 | struct dbs_governor { |
199 | struct common_dbs_data { | 153 | struct cpufreq_governor gov; |
200 | /* Common across governors */ | 154 | struct kobj_type kobj_type; |
201 | #define GOV_ONDEMAND 0 | ||
202 | #define GOV_CONSERVATIVE 1 | ||
203 | int governor; | ||
204 | struct attribute_group *attr_group_gov_sys; /* one governor - system */ | ||
205 | struct attribute_group *attr_group_gov_pol; /* one governor - policy */ | ||
206 | 155 | ||
207 | /* | 156 | /* |
208 | * Common data for platforms that don't set | 157 | * Common data for platforms that don't set |
@@ -210,74 +159,32 @@ struct common_dbs_data { | |||
210 | */ | 159 | */ |
211 | struct dbs_data *gdbs_data; | 160 | struct dbs_data *gdbs_data; |
212 | 161 | ||
213 | struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); | 162 | unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); |
214 | void *(*get_cpu_dbs_info_s)(int cpu); | 163 | struct policy_dbs_info *(*alloc)(void); |
215 | unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy, | 164 | void (*free)(struct policy_dbs_info *policy_dbs); |
216 | bool modify_all); | ||
217 | void (*gov_check_cpu)(int cpu, unsigned int load); | ||
218 | int (*init)(struct dbs_data *dbs_data, bool notify); | 165 | int (*init)(struct dbs_data *dbs_data, bool notify); |
219 | void (*exit)(struct dbs_data *dbs_data, bool notify); | 166 | void (*exit)(struct dbs_data *dbs_data, bool notify); |
220 | 167 | void (*start)(struct cpufreq_policy *policy); | |
221 | /* Governor specific ops, see below */ | ||
222 | void *gov_ops; | ||
223 | |||
224 | /* | ||
225 | * Protects governor's data (struct dbs_data and struct common_dbs_data) | ||
226 | */ | ||
227 | struct mutex mutex; | ||
228 | }; | 168 | }; |
229 | 169 | ||
230 | /* Governor Per policy data */ | 170 | static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy) |
231 | struct dbs_data { | 171 | { |
232 | struct common_dbs_data *cdata; | 172 | return container_of(policy->governor, struct dbs_governor, gov); |
233 | unsigned int min_sampling_rate; | 173 | } |
234 | int usage_count; | ||
235 | void *tuners; | ||
236 | }; | ||
237 | 174 | ||
238 | /* Governor specific ops, will be passed to dbs_data->gov_ops */ | 175 | /* Governor specific operations */ |
239 | struct od_ops { | 176 | struct od_ops { |
240 | void (*powersave_bias_init_cpu)(int cpu); | ||
241 | unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, | 177 | unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, |
242 | unsigned int freq_next, unsigned int relation); | 178 | unsigned int freq_next, unsigned int relation); |
243 | void (*freq_increase)(struct cpufreq_policy *policy, unsigned int freq); | ||
244 | }; | 179 | }; |
245 | 180 | ||
246 | static inline int delay_for_sampling_rate(unsigned int sampling_rate) | 181 | unsigned int dbs_update(struct cpufreq_policy *policy); |
247 | { | 182 | int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); |
248 | int delay = usecs_to_jiffies(sampling_rate); | ||
249 | |||
250 | /* We want all CPUs to do sampling nearly on same jiffy */ | ||
251 | if (num_online_cpus() > 1) | ||
252 | delay -= jiffies % delay; | ||
253 | |||
254 | return delay; | ||
255 | } | ||
256 | |||
257 | #define declare_show_sampling_rate_min(_gov) \ | ||
258 | static ssize_t show_sampling_rate_min_gov_sys \ | ||
259 | (struct kobject *kobj, struct attribute *attr, char *buf) \ | ||
260 | { \ | ||
261 | struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ | ||
262 | return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ | ||
263 | } \ | ||
264 | \ | ||
265 | static ssize_t show_sampling_rate_min_gov_pol \ | ||
266 | (struct cpufreq_policy *policy, char *buf) \ | ||
267 | { \ | ||
268 | struct dbs_data *dbs_data = policy->governor_data; \ | ||
269 | return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ | ||
270 | } | ||
271 | |||
272 | extern struct mutex cpufreq_governor_lock; | ||
273 | |||
274 | void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay); | ||
275 | void gov_cancel_work(struct cpu_common_dbs_info *shared); | ||
276 | void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); | ||
277 | int cpufreq_governor_dbs(struct cpufreq_policy *policy, | ||
278 | struct common_dbs_data *cdata, unsigned int event); | ||
279 | void od_register_powersave_bias_handler(unsigned int (*f) | 183 | void od_register_powersave_bias_handler(unsigned int (*f) |
280 | (struct cpufreq_policy *, unsigned int, unsigned int), | 184 | (struct cpufreq_policy *, unsigned int, unsigned int), |
281 | unsigned int powersave_bias); | 185 | unsigned int powersave_bias); |
282 | void od_unregister_powersave_bias_handler(void); | 186 | void od_unregister_powersave_bias_handler(void); |
187 | ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, | ||
188 | size_t count); | ||
189 | void gov_update_cpu_data(struct dbs_data *dbs_data); | ||
283 | #endif /* _CPUFREQ_GOVERNOR_H */ | 190 | #endif /* _CPUFREQ_GOVERNOR_H */ |
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index eae51070c034..acd80272ded6 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c | |||
@@ -16,7 +16,8 @@ | |||
16 | #include <linux/percpu-defs.h> | 16 | #include <linux/percpu-defs.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/tick.h> | 18 | #include <linux/tick.h> |
19 | #include "cpufreq_governor.h" | 19 | |
20 | #include "cpufreq_ondemand.h" | ||
20 | 21 | ||
21 | /* On-demand governor macros */ | 22 | /* On-demand governor macros */ |
22 | #define DEF_FREQUENCY_UP_THRESHOLD (80) | 23 | #define DEF_FREQUENCY_UP_THRESHOLD (80) |
@@ -27,24 +28,10 @@ | |||
27 | #define MIN_FREQUENCY_UP_THRESHOLD (11) | 28 | #define MIN_FREQUENCY_UP_THRESHOLD (11) |
28 | #define MAX_FREQUENCY_UP_THRESHOLD (100) | 29 | #define MAX_FREQUENCY_UP_THRESHOLD (100) |
29 | 30 | ||
30 | static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); | ||
31 | |||
32 | static struct od_ops od_ops; | 31 | static struct od_ops od_ops; |
33 | 32 | ||
34 | #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND | ||
35 | static struct cpufreq_governor cpufreq_gov_ondemand; | ||
36 | #endif | ||
37 | |||
38 | static unsigned int default_powersave_bias; | 33 | static unsigned int default_powersave_bias; |
39 | 34 | ||
40 | static void ondemand_powersave_bias_init_cpu(int cpu) | ||
41 | { | ||
42 | struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); | ||
43 | |||
44 | dbs_info->freq_table = cpufreq_frequency_get_table(cpu); | ||
45 | dbs_info->freq_lo = 0; | ||
46 | } | ||
47 | |||
48 | /* | 35 | /* |
49 | * Not all CPUs want IO time to be accounted as busy; this depends on how | 36 | * Not all CPUs want IO time to be accounted as busy; this depends on how |
50 | * efficient idling at a higher frequency/voltage is. | 37 | * efficient idling at a higher frequency/voltage is. |
@@ -70,8 +57,8 @@ static int should_io_be_busy(void) | |||
70 | 57 | ||
71 | /* | 58 | /* |
72 | * Find right freq to be set now with powersave_bias on. | 59 | * Find right freq to be set now with powersave_bias on. |
73 | * Returns the freq_hi to be used right now and will set freq_hi_jiffies, | 60 | * Returns the freq_hi to be used right now and will set freq_hi_delay_us, |
74 | * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. | 61 | * freq_lo, and freq_lo_delay_us in percpu area for averaging freqs. |
75 | */ | 62 | */ |
76 | static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, | 63 | static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, |
77 | unsigned int freq_next, unsigned int relation) | 64 | unsigned int freq_next, unsigned int relation) |
@@ -79,15 +66,15 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, | |||
79 | unsigned int freq_req, freq_reduc, freq_avg; | 66 | unsigned int freq_req, freq_reduc, freq_avg; |
80 | unsigned int freq_hi, freq_lo; | 67 | unsigned int freq_hi, freq_lo; |
81 | unsigned int index = 0; | 68 | unsigned int index = 0; |
82 | unsigned int jiffies_total, jiffies_hi, jiffies_lo; | 69 | unsigned int delay_hi_us; |
83 | struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, | 70 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
84 | policy->cpu); | 71 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); |
85 | struct dbs_data *dbs_data = policy->governor_data; | 72 | struct dbs_data *dbs_data = policy_dbs->dbs_data; |
86 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; | 73 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; |
87 | 74 | ||
88 | if (!dbs_info->freq_table) { | 75 | if (!dbs_info->freq_table) { |
89 | dbs_info->freq_lo = 0; | 76 | dbs_info->freq_lo = 0; |
90 | dbs_info->freq_lo_jiffies = 0; | 77 | dbs_info->freq_lo_delay_us = 0; |
91 | return freq_next; | 78 | return freq_next; |
92 | } | 79 | } |
93 | 80 | ||
@@ -110,31 +97,30 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, | |||
110 | /* Find out how long we have to be in hi and lo freqs */ | 97 | /* Find out how long we have to be in hi and lo freqs */ |
111 | if (freq_hi == freq_lo) { | 98 | if (freq_hi == freq_lo) { |
112 | dbs_info->freq_lo = 0; | 99 | dbs_info->freq_lo = 0; |
113 | dbs_info->freq_lo_jiffies = 0; | 100 | dbs_info->freq_lo_delay_us = 0; |
114 | return freq_lo; | 101 | return freq_lo; |
115 | } | 102 | } |
116 | jiffies_total = usecs_to_jiffies(od_tuners->sampling_rate); | 103 | delay_hi_us = (freq_avg - freq_lo) * dbs_data->sampling_rate; |
117 | jiffies_hi = (freq_avg - freq_lo) * jiffies_total; | 104 | delay_hi_us += (freq_hi - freq_lo) / 2; |
118 | jiffies_hi += ((freq_hi - freq_lo) / 2); | 105 | delay_hi_us /= freq_hi - freq_lo; |
119 | jiffies_hi /= (freq_hi - freq_lo); | 106 | dbs_info->freq_hi_delay_us = delay_hi_us; |
120 | jiffies_lo = jiffies_total - jiffies_hi; | ||
121 | dbs_info->freq_lo = freq_lo; | 107 | dbs_info->freq_lo = freq_lo; |
122 | dbs_info->freq_lo_jiffies = jiffies_lo; | 108 | dbs_info->freq_lo_delay_us = dbs_data->sampling_rate - delay_hi_us; |
123 | dbs_info->freq_hi_jiffies = jiffies_hi; | ||
124 | return freq_hi; | 109 | return freq_hi; |
125 | } | 110 | } |
126 | 111 | ||
127 | static void ondemand_powersave_bias_init(void) | 112 | static void ondemand_powersave_bias_init(struct cpufreq_policy *policy) |
128 | { | 113 | { |
129 | int i; | 114 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); |
130 | for_each_online_cpu(i) { | 115 | |
131 | ondemand_powersave_bias_init_cpu(i); | 116 | dbs_info->freq_table = cpufreq_frequency_get_table(policy->cpu); |
132 | } | 117 | dbs_info->freq_lo = 0; |
133 | } | 118 | } |
134 | 119 | ||
135 | static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) | 120 | static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) |
136 | { | 121 | { |
137 | struct dbs_data *dbs_data = policy->governor_data; | 122 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
123 | struct dbs_data *dbs_data = policy_dbs->dbs_data; | ||
138 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; | 124 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; |
139 | 125 | ||
140 | if (od_tuners->powersave_bias) | 126 | if (od_tuners->powersave_bias) |
@@ -152,21 +138,21 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) | |||
152 | * (default), then we try to increase frequency. Else, we adjust the frequency | 138 | * (default), then we try to increase frequency. Else, we adjust the frequency |
153 | * proportional to load. | 139 | * proportional to load. |
154 | */ | 140 | */ |
155 | static void od_check_cpu(int cpu, unsigned int load) | 141 | static void od_update(struct cpufreq_policy *policy) |
156 | { | 142 | { |
157 | struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); | 143 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
158 | struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; | 144 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); |
159 | struct dbs_data *dbs_data = policy->governor_data; | 145 | struct dbs_data *dbs_data = policy_dbs->dbs_data; |
160 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; | 146 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; |
147 | unsigned int load = dbs_update(policy); | ||
161 | 148 | ||
162 | dbs_info->freq_lo = 0; | 149 | dbs_info->freq_lo = 0; |
163 | 150 | ||
164 | /* Check for frequency increase */ | 151 | /* Check for frequency increase */ |
165 | if (load > od_tuners->up_threshold) { | 152 | if (load > dbs_data->up_threshold) { |
166 | /* If switching to max speed, apply sampling_down_factor */ | 153 | /* If switching to max speed, apply sampling_down_factor */ |
167 | if (policy->cur < policy->max) | 154 | if (policy->cur < policy->max) |
168 | dbs_info->rate_mult = | 155 | policy_dbs->rate_mult = dbs_data->sampling_down_factor; |
169 | od_tuners->sampling_down_factor; | ||
170 | dbs_freq_increase(policy, policy->max); | 156 | dbs_freq_increase(policy, policy->max); |
171 | } else { | 157 | } else { |
172 | /* Calculate the next frequency proportional to load */ | 158 | /* Calculate the next frequency proportional to load */ |
@@ -177,177 +163,70 @@ static void od_check_cpu(int cpu, unsigned int load) | |||
177 | freq_next = min_f + load * (max_f - min_f) / 100; | 163 | freq_next = min_f + load * (max_f - min_f) / 100; |
178 | 164 | ||
179 | /* No longer fully busy, reset rate_mult */ | 165 | /* No longer fully busy, reset rate_mult */ |
180 | dbs_info->rate_mult = 1; | 166 | policy_dbs->rate_mult = 1; |
181 | 167 | ||
182 | if (!od_tuners->powersave_bias) { | 168 | if (od_tuners->powersave_bias) |
183 | __cpufreq_driver_target(policy, freq_next, | 169 | freq_next = od_ops.powersave_bias_target(policy, |
184 | CPUFREQ_RELATION_C); | 170 | freq_next, |
185 | return; | 171 | CPUFREQ_RELATION_L); |
186 | } | ||
187 | 172 | ||
188 | freq_next = od_ops.powersave_bias_target(policy, freq_next, | ||
189 | CPUFREQ_RELATION_L); | ||
190 | __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_C); | 173 | __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_C); |
191 | } | 174 | } |
192 | } | 175 | } |
193 | 176 | ||
194 | static unsigned int od_dbs_timer(struct cpufreq_policy *policy, bool modify_all) | 177 | static unsigned int od_dbs_timer(struct cpufreq_policy *policy) |
195 | { | 178 | { |
196 | struct dbs_data *dbs_data = policy->governor_data; | 179 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
197 | unsigned int cpu = policy->cpu; | 180 | struct dbs_data *dbs_data = policy_dbs->dbs_data; |
198 | struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, | 181 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); |
199 | cpu); | 182 | int sample_type = dbs_info->sample_type; |
200 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; | ||
201 | int delay = 0, sample_type = dbs_info->sample_type; | ||
202 | |||
203 | if (!modify_all) | ||
204 | goto max_delay; | ||
205 | 183 | ||
206 | /* Common NORMAL_SAMPLE setup */ | 184 | /* Common NORMAL_SAMPLE setup */ |
207 | dbs_info->sample_type = OD_NORMAL_SAMPLE; | 185 | dbs_info->sample_type = OD_NORMAL_SAMPLE; |
208 | if (sample_type == OD_SUB_SAMPLE) { | 186 | /* |
209 | delay = dbs_info->freq_lo_jiffies; | 187 | * OD_SUB_SAMPLE doesn't make sense if sample_delay_ns is 0, so ignore |
188 | * it then. | ||
189 | */ | ||
190 | if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) { | ||
210 | __cpufreq_driver_target(policy, dbs_info->freq_lo, | 191 | __cpufreq_driver_target(policy, dbs_info->freq_lo, |
211 | CPUFREQ_RELATION_H); | 192 | CPUFREQ_RELATION_H); |
212 | } else { | 193 | return dbs_info->freq_lo_delay_us; |
213 | dbs_check_cpu(dbs_data, cpu); | ||
214 | if (dbs_info->freq_lo) { | ||
215 | /* Setup timer for SUB_SAMPLE */ | ||
216 | dbs_info->sample_type = OD_SUB_SAMPLE; | ||
217 | delay = dbs_info->freq_hi_jiffies; | ||
218 | } | ||
219 | } | 194 | } |
220 | 195 | ||
221 | max_delay: | 196 | od_update(policy); |
222 | if (!delay) | ||
223 | delay = delay_for_sampling_rate(od_tuners->sampling_rate | ||
224 | * dbs_info->rate_mult); | ||
225 | |||
226 | return delay; | ||
227 | } | ||
228 | |||
229 | /************************** sysfs interface ************************/ | ||
230 | static struct common_dbs_data od_dbs_cdata; | ||
231 | |||
232 | /** | ||
233 | * update_sampling_rate - update sampling rate effective immediately if needed. | ||
234 | * @new_rate: new sampling rate | ||
235 | * | ||
236 | * If new rate is smaller than the old, simply updating | ||
237 | * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the | ||
238 | * original sampling_rate was 1 second and the requested new sampling rate is 10 | ||
239 | * ms because the user needs immediate reaction from ondemand governor, but not | ||
240 | * sure if higher frequency will be required or not, then, the governor may | ||
241 | * change the sampling rate too late; up to 1 second later. Thus, if we are | ||
242 | * reducing the sampling rate, we need to make the new value effective | ||
243 | * immediately. | ||
244 | */ | ||
245 | static void update_sampling_rate(struct dbs_data *dbs_data, | ||
246 | unsigned int new_rate) | ||
247 | { | ||
248 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; | ||
249 | struct cpumask cpumask; | ||
250 | int cpu; | ||
251 | |||
252 | od_tuners->sampling_rate = new_rate = max(new_rate, | ||
253 | dbs_data->min_sampling_rate); | ||
254 | |||
255 | /* | ||
256 | * Lock governor so that governor start/stop can't execute in parallel. | ||
257 | */ | ||
258 | mutex_lock(&od_dbs_cdata.mutex); | ||
259 | |||
260 | cpumask_copy(&cpumask, cpu_online_mask); | ||
261 | |||
262 | for_each_cpu(cpu, &cpumask) { | ||
263 | struct cpufreq_policy *policy; | ||
264 | struct od_cpu_dbs_info_s *dbs_info; | ||
265 | struct cpu_dbs_info *cdbs; | ||
266 | struct cpu_common_dbs_info *shared; | ||
267 | unsigned long next_sampling, appointed_at; | ||
268 | |||
269 | dbs_info = &per_cpu(od_cpu_dbs_info, cpu); | ||
270 | cdbs = &dbs_info->cdbs; | ||
271 | shared = cdbs->shared; | ||
272 | |||
273 | /* | ||
274 | * A valid shared and shared->policy means governor hasn't | ||
275 | * stopped or exited yet. | ||
276 | */ | ||
277 | if (!shared || !shared->policy) | ||
278 | continue; | ||
279 | |||
280 | policy = shared->policy; | ||
281 | |||
282 | /* clear all CPUs of this policy */ | ||
283 | cpumask_andnot(&cpumask, &cpumask, policy->cpus); | ||
284 | 197 | ||
285 | /* | 198 | if (dbs_info->freq_lo) { |
286 | * Update sampling rate for CPUs whose policy is governed by | 199 | /* Setup timer for SUB_SAMPLE */ |
287 | * dbs_data. In case of governor_per_policy, only a single | 200 | dbs_info->sample_type = OD_SUB_SAMPLE; |
288 | * policy will be governed by dbs_data, otherwise there can be | 201 | return dbs_info->freq_hi_delay_us; |
289 | * multiple policies that are governed by the same dbs_data. | ||
290 | */ | ||
291 | if (dbs_data != policy->governor_data) | ||
292 | continue; | ||
293 | |||
294 | /* | ||
295 | * Checking this for any CPU should be fine, timers for all of | ||
296 | * them are scheduled together. | ||
297 | */ | ||
298 | next_sampling = jiffies + usecs_to_jiffies(new_rate); | ||
299 | appointed_at = dbs_info->cdbs.timer.expires; | ||
300 | |||
301 | if (time_before(next_sampling, appointed_at)) { | ||
302 | gov_cancel_work(shared); | ||
303 | gov_add_timers(policy, usecs_to_jiffies(new_rate)); | ||
304 | |||
305 | } | ||
306 | } | 202 | } |
307 | 203 | ||
308 | mutex_unlock(&od_dbs_cdata.mutex); | 204 | return dbs_data->sampling_rate * policy_dbs->rate_mult; |
309 | } | 205 | } |
310 | 206 | ||
311 | static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, | 207 | /************************** sysfs interface ************************/ |
312 | size_t count) | 208 | static struct dbs_governor od_dbs_gov; |
313 | { | ||
314 | unsigned int input; | ||
315 | int ret; | ||
316 | ret = sscanf(buf, "%u", &input); | ||
317 | if (ret != 1) | ||
318 | return -EINVAL; | ||
319 | |||
320 | update_sampling_rate(dbs_data, input); | ||
321 | return count; | ||
322 | } | ||
323 | 209 | ||
324 | static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, | 210 | static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, |
325 | size_t count) | 211 | size_t count) |
326 | { | 212 | { |
327 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; | ||
328 | unsigned int input; | 213 | unsigned int input; |
329 | int ret; | 214 | int ret; |
330 | unsigned int j; | ||
331 | 215 | ||
332 | ret = sscanf(buf, "%u", &input); | 216 | ret = sscanf(buf, "%u", &input); |
333 | if (ret != 1) | 217 | if (ret != 1) |
334 | return -EINVAL; | 218 | return -EINVAL; |
335 | od_tuners->io_is_busy = !!input; | 219 | dbs_data->io_is_busy = !!input; |
336 | 220 | ||
337 | /* we need to re-evaluate prev_cpu_idle */ | 221 | /* we need to re-evaluate prev_cpu_idle */ |
338 | for_each_online_cpu(j) { | 222 | gov_update_cpu_data(dbs_data); |
339 | struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, | 223 | |
340 | j); | ||
341 | dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, | ||
342 | &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); | ||
343 | } | ||
344 | return count; | 224 | return count; |
345 | } | 225 | } |
346 | 226 | ||
347 | static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, | 227 | static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, |
348 | size_t count) | 228 | size_t count) |
349 | { | 229 | { |
350 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; | ||
351 | unsigned int input; | 230 | unsigned int input; |
352 | int ret; | 231 | int ret; |
353 | ret = sscanf(buf, "%u", &input); | 232 | ret = sscanf(buf, "%u", &input); |
@@ -357,40 +236,43 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, | |||
357 | return -EINVAL; | 236 | return -EINVAL; |
358 | } | 237 | } |
359 | 238 | ||
360 | od_tuners->up_threshold = input; | 239 | dbs_data->up_threshold = input; |
361 | return count; | 240 | return count; |
362 | } | 241 | } |
363 | 242 | ||
364 | static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, | 243 | static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, |
365 | const char *buf, size_t count) | 244 | const char *buf, size_t count) |
366 | { | 245 | { |
367 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; | 246 | struct policy_dbs_info *policy_dbs; |
368 | unsigned int input, j; | 247 | unsigned int input; |
369 | int ret; | 248 | int ret; |
370 | ret = sscanf(buf, "%u", &input); | 249 | ret = sscanf(buf, "%u", &input); |
371 | 250 | ||
372 | if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) | 251 | if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) |
373 | return -EINVAL; | 252 | return -EINVAL; |
374 | od_tuners->sampling_down_factor = input; | 253 | |
254 | dbs_data->sampling_down_factor = input; | ||
375 | 255 | ||
376 | /* Reset down sampling multiplier in case it was active */ | 256 | /* Reset down sampling multiplier in case it was active */ |
377 | for_each_online_cpu(j) { | 257 | list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { |
378 | struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, | 258 | /* |
379 | j); | 259 | * Doing this without locking might lead to using different |
380 | dbs_info->rate_mult = 1; | 260 | * rate_mult values in od_update() and od_dbs_timer(). |
261 | */ | ||
262 | mutex_lock(&policy_dbs->timer_mutex); | ||
263 | policy_dbs->rate_mult = 1; | ||
264 | mutex_unlock(&policy_dbs->timer_mutex); | ||
381 | } | 265 | } |
266 | |||
382 | return count; | 267 | return count; |
383 | } | 268 | } |
384 | 269 | ||
385 | static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, | 270 | static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, |
386 | const char *buf, size_t count) | 271 | const char *buf, size_t count) |
387 | { | 272 | { |
388 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; | ||
389 | unsigned int input; | 273 | unsigned int input; |
390 | int ret; | 274 | int ret; |
391 | 275 | ||
392 | unsigned int j; | ||
393 | |||
394 | ret = sscanf(buf, "%u", &input); | 276 | ret = sscanf(buf, "%u", &input); |
395 | if (ret != 1) | 277 | if (ret != 1) |
396 | return -EINVAL; | 278 | return -EINVAL; |
@@ -398,22 +280,14 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, | |||
398 | if (input > 1) | 280 | if (input > 1) |
399 | input = 1; | 281 | input = 1; |
400 | 282 | ||
401 | if (input == od_tuners->ignore_nice_load) { /* nothing to do */ | 283 | if (input == dbs_data->ignore_nice_load) { /* nothing to do */ |
402 | return count; | 284 | return count; |
403 | } | 285 | } |
404 | od_tuners->ignore_nice_load = input; | 286 | dbs_data->ignore_nice_load = input; |
405 | 287 | ||
406 | /* we need to re-evaluate prev_cpu_idle */ | 288 | /* we need to re-evaluate prev_cpu_idle */ |
407 | for_each_online_cpu(j) { | 289 | gov_update_cpu_data(dbs_data); |
408 | struct od_cpu_dbs_info_s *dbs_info; | ||
409 | dbs_info = &per_cpu(od_cpu_dbs_info, j); | ||
410 | dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, | ||
411 | &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); | ||
412 | if (od_tuners->ignore_nice_load) | ||
413 | dbs_info->cdbs.prev_cpu_nice = | ||
414 | kcpustat_cpu(j).cpustat[CPUTIME_NICE]; | ||
415 | 290 | ||
416 | } | ||
417 | return count; | 291 | return count; |
418 | } | 292 | } |
419 | 293 | ||
@@ -421,6 +295,7 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, | |||
421 | size_t count) | 295 | size_t count) |
422 | { | 296 | { |
423 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; | 297 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; |
298 | struct policy_dbs_info *policy_dbs; | ||
424 | unsigned int input; | 299 | unsigned int input; |
425 | int ret; | 300 | int ret; |
426 | ret = sscanf(buf, "%u", &input); | 301 | ret = sscanf(buf, "%u", &input); |
@@ -432,59 +307,54 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, | |||
432 | input = 1000; | 307 | input = 1000; |
433 | 308 | ||
434 | od_tuners->powersave_bias = input; | 309 | od_tuners->powersave_bias = input; |
435 | ondemand_powersave_bias_init(); | 310 | |
311 | list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) | ||
312 | ondemand_powersave_bias_init(policy_dbs->policy); | ||
313 | |||
436 | return count; | 314 | return count; |
437 | } | 315 | } |
438 | 316 | ||
439 | show_store_one(od, sampling_rate); | 317 | gov_show_one_common(sampling_rate); |
440 | show_store_one(od, io_is_busy); | 318 | gov_show_one_common(up_threshold); |
441 | show_store_one(od, up_threshold); | 319 | gov_show_one_common(sampling_down_factor); |
442 | show_store_one(od, sampling_down_factor); | 320 | gov_show_one_common(ignore_nice_load); |
443 | show_store_one(od, ignore_nice_load); | 321 | gov_show_one_common(min_sampling_rate); |
444 | show_store_one(od, powersave_bias); | 322 | gov_show_one_common(io_is_busy); |
445 | declare_show_sampling_rate_min(od); | 323 | gov_show_one(od, powersave_bias); |
446 | 324 | ||
447 | gov_sys_pol_attr_rw(sampling_rate); | 325 | gov_attr_rw(sampling_rate); |
448 | gov_sys_pol_attr_rw(io_is_busy); | 326 | gov_attr_rw(io_is_busy); |
449 | gov_sys_pol_attr_rw(up_threshold); | 327 | gov_attr_rw(up_threshold); |
450 | gov_sys_pol_attr_rw(sampling_down_factor); | 328 | gov_attr_rw(sampling_down_factor); |
451 | gov_sys_pol_attr_rw(ignore_nice_load); | 329 | gov_attr_rw(ignore_nice_load); |
452 | gov_sys_pol_attr_rw(powersave_bias); | 330 | gov_attr_rw(powersave_bias); |
453 | gov_sys_pol_attr_ro(sampling_rate_min); | 331 | gov_attr_ro(min_sampling_rate); |
454 | 332 | ||
455 | static struct attribute *dbs_attributes_gov_sys[] = { | 333 | static struct attribute *od_attributes[] = { |
456 | &sampling_rate_min_gov_sys.attr, | 334 | &min_sampling_rate.attr, |
457 | &sampling_rate_gov_sys.attr, | 335 | &sampling_rate.attr, |
458 | &up_threshold_gov_sys.attr, | 336 | &up_threshold.attr, |
459 | &sampling_down_factor_gov_sys.attr, | 337 | &sampling_down_factor.attr, |
460 | &ignore_nice_load_gov_sys.attr, | 338 | &ignore_nice_load.attr, |
461 | &powersave_bias_gov_sys.attr, | 339 | &powersave_bias.attr, |
462 | &io_is_busy_gov_sys.attr, | 340 | &io_is_busy.attr, |
463 | NULL | 341 | NULL |
464 | }; | 342 | }; |
465 | 343 | ||
466 | static struct attribute_group od_attr_group_gov_sys = { | 344 | /************************** sysfs end ************************/ |
467 | .attrs = dbs_attributes_gov_sys, | ||
468 | .name = "ondemand", | ||
469 | }; | ||
470 | 345 | ||
471 | static struct attribute *dbs_attributes_gov_pol[] = { | 346 | static struct policy_dbs_info *od_alloc(void) |
472 | &sampling_rate_min_gov_pol.attr, | 347 | { |
473 | &sampling_rate_gov_pol.attr, | 348 | struct od_policy_dbs_info *dbs_info; |
474 | &up_threshold_gov_pol.attr, | ||
475 | &sampling_down_factor_gov_pol.attr, | ||
476 | &ignore_nice_load_gov_pol.attr, | ||
477 | &powersave_bias_gov_pol.attr, | ||
478 | &io_is_busy_gov_pol.attr, | ||
479 | NULL | ||
480 | }; | ||
481 | 349 | ||
482 | static struct attribute_group od_attr_group_gov_pol = { | 350 | dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL); |
483 | .attrs = dbs_attributes_gov_pol, | 351 | return dbs_info ? &dbs_info->policy_dbs : NULL; |
484 | .name = "ondemand", | 352 | } |
485 | }; | ||
486 | 353 | ||
487 | /************************** sysfs end ************************/ | 354 | static void od_free(struct policy_dbs_info *policy_dbs) |
355 | { | ||
356 | kfree(to_dbs_info(policy_dbs)); | ||
357 | } | ||
488 | 358 | ||
489 | static int od_init(struct dbs_data *dbs_data, bool notify) | 359 | static int od_init(struct dbs_data *dbs_data, bool notify) |
490 | { | 360 | { |
@@ -503,7 +373,7 @@ static int od_init(struct dbs_data *dbs_data, bool notify) | |||
503 | put_cpu(); | 373 | put_cpu(); |
504 | if (idle_time != -1ULL) { | 374 | if (idle_time != -1ULL) { |
505 | /* Idle micro accounting is supported. Use finer thresholds */ | 375 | /* Idle micro accounting is supported. Use finer thresholds */ |
506 | tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; | 376 | dbs_data->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; |
507 | /* | 377 | /* |
508 | * In nohz/micro accounting case we set the minimum frequency | 378 | * In nohz/micro accounting case we set the minimum frequency |
509 | * not depending on HZ, but fixed (very low). The deferred | 379 | * not depending on HZ, but fixed (very low). The deferred |
@@ -511,17 +381,17 @@ static int od_init(struct dbs_data *dbs_data, bool notify) | |||
511 | */ | 381 | */ |
512 | dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; | 382 | dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; |
513 | } else { | 383 | } else { |
514 | tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; | 384 | dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; |
515 | 385 | ||
516 | /* For correct statistics, we need 10 ticks for each measure */ | 386 | /* For correct statistics, we need 10 ticks for each measure */ |
517 | dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * | 387 | dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * |
518 | jiffies_to_usecs(10); | 388 | jiffies_to_usecs(10); |
519 | } | 389 | } |
520 | 390 | ||
521 | tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; | 391 | dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; |
522 | tuners->ignore_nice_load = 0; | 392 | dbs_data->ignore_nice_load = 0; |
523 | tuners->powersave_bias = default_powersave_bias; | 393 | tuners->powersave_bias = default_powersave_bias; |
524 | tuners->io_is_busy = should_io_be_busy(); | 394 | dbs_data->io_is_busy = should_io_be_busy(); |
525 | 395 | ||
526 | dbs_data->tuners = tuners; | 396 | dbs_data->tuners = tuners; |
527 | return 0; | 397 | return 0; |
@@ -532,33 +402,38 @@ static void od_exit(struct dbs_data *dbs_data, bool notify) | |||
532 | kfree(dbs_data->tuners); | 402 | kfree(dbs_data->tuners); |
533 | } | 403 | } |
534 | 404 | ||
535 | define_get_cpu_dbs_routines(od_cpu_dbs_info); | 405 | static void od_start(struct cpufreq_policy *policy) |
406 | { | ||
407 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); | ||
408 | |||
409 | dbs_info->sample_type = OD_NORMAL_SAMPLE; | ||
410 | ondemand_powersave_bias_init(policy); | ||
411 | } | ||
536 | 412 | ||
537 | static struct od_ops od_ops = { | 413 | static struct od_ops od_ops = { |
538 | .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu, | ||
539 | .powersave_bias_target = generic_powersave_bias_target, | 414 | .powersave_bias_target = generic_powersave_bias_target, |
540 | .freq_increase = dbs_freq_increase, | ||
541 | }; | 415 | }; |
542 | 416 | ||
543 | static struct common_dbs_data od_dbs_cdata = { | 417 | static struct dbs_governor od_dbs_gov = { |
544 | .governor = GOV_ONDEMAND, | 418 | .gov = { |
545 | .attr_group_gov_sys = &od_attr_group_gov_sys, | 419 | .name = "ondemand", |
546 | .attr_group_gov_pol = &od_attr_group_gov_pol, | 420 | .governor = cpufreq_governor_dbs, |
547 | .get_cpu_cdbs = get_cpu_cdbs, | 421 | .max_transition_latency = TRANSITION_LATENCY_LIMIT, |
548 | .get_cpu_dbs_info_s = get_cpu_dbs_info_s, | 422 | .owner = THIS_MODULE, |
423 | }, | ||
424 | .kobj_type = { .default_attrs = od_attributes }, | ||
549 | .gov_dbs_timer = od_dbs_timer, | 425 | .gov_dbs_timer = od_dbs_timer, |
550 | .gov_check_cpu = od_check_cpu, | 426 | .alloc = od_alloc, |
551 | .gov_ops = &od_ops, | 427 | .free = od_free, |
552 | .init = od_init, | 428 | .init = od_init, |
553 | .exit = od_exit, | 429 | .exit = od_exit, |
554 | .mutex = __MUTEX_INITIALIZER(od_dbs_cdata.mutex), | 430 | .start = od_start, |
555 | }; | 431 | }; |
556 | 432 | ||
433 | #define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov) | ||
434 | |||
557 | static void od_set_powersave_bias(unsigned int powersave_bias) | 435 | static void od_set_powersave_bias(unsigned int powersave_bias) |
558 | { | 436 | { |
559 | struct cpufreq_policy *policy; | ||
560 | struct dbs_data *dbs_data; | ||
561 | struct od_dbs_tuners *od_tuners; | ||
562 | unsigned int cpu; | 437 | unsigned int cpu; |
563 | cpumask_t done; | 438 | cpumask_t done; |
564 | 439 | ||
@@ -567,22 +442,25 @@ static void od_set_powersave_bias(unsigned int powersave_bias) | |||
567 | 442 | ||
568 | get_online_cpus(); | 443 | get_online_cpus(); |
569 | for_each_online_cpu(cpu) { | 444 | for_each_online_cpu(cpu) { |
570 | struct cpu_common_dbs_info *shared; | 445 | struct cpufreq_policy *policy; |
446 | struct policy_dbs_info *policy_dbs; | ||
447 | struct dbs_data *dbs_data; | ||
448 | struct od_dbs_tuners *od_tuners; | ||
571 | 449 | ||
572 | if (cpumask_test_cpu(cpu, &done)) | 450 | if (cpumask_test_cpu(cpu, &done)) |
573 | continue; | 451 | continue; |
574 | 452 | ||
575 | shared = per_cpu(od_cpu_dbs_info, cpu).cdbs.shared; | 453 | policy = cpufreq_cpu_get_raw(cpu); |
576 | if (!shared) | 454 | if (!policy || policy->governor != CPU_FREQ_GOV_ONDEMAND) |
577 | continue; | 455 | continue; |
578 | 456 | ||
579 | policy = shared->policy; | 457 | policy_dbs = policy->governor_data; |
580 | cpumask_or(&done, &done, policy->cpus); | 458 | if (!policy_dbs) |
581 | |||
582 | if (policy->governor != &cpufreq_gov_ondemand) | ||
583 | continue; | 459 | continue; |
584 | 460 | ||
585 | dbs_data = policy->governor_data; | 461 | cpumask_or(&done, &done, policy->cpus); |
462 | |||
463 | dbs_data = policy_dbs->dbs_data; | ||
586 | od_tuners = dbs_data->tuners; | 464 | od_tuners = dbs_data->tuners; |
587 | od_tuners->powersave_bias = default_powersave_bias; | 465 | od_tuners->powersave_bias = default_powersave_bias; |
588 | } | 466 | } |
@@ -605,30 +483,14 @@ void od_unregister_powersave_bias_handler(void) | |||
605 | } | 483 | } |
606 | EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); | 484 | EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); |
607 | 485 | ||
608 | static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, | ||
609 | unsigned int event) | ||
610 | { | ||
611 | return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); | ||
612 | } | ||
613 | |||
614 | #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND | ||
615 | static | ||
616 | #endif | ||
617 | struct cpufreq_governor cpufreq_gov_ondemand = { | ||
618 | .name = "ondemand", | ||
619 | .governor = od_cpufreq_governor_dbs, | ||
620 | .max_transition_latency = TRANSITION_LATENCY_LIMIT, | ||
621 | .owner = THIS_MODULE, | ||
622 | }; | ||
623 | |||
624 | static int __init cpufreq_gov_dbs_init(void) | 486 | static int __init cpufreq_gov_dbs_init(void) |
625 | { | 487 | { |
626 | return cpufreq_register_governor(&cpufreq_gov_ondemand); | 488 | return cpufreq_register_governor(CPU_FREQ_GOV_ONDEMAND); |
627 | } | 489 | } |
628 | 490 | ||
629 | static void __exit cpufreq_gov_dbs_exit(void) | 491 | static void __exit cpufreq_gov_dbs_exit(void) |
630 | { | 492 | { |
631 | cpufreq_unregister_governor(&cpufreq_gov_ondemand); | 493 | cpufreq_unregister_governor(CPU_FREQ_GOV_ONDEMAND); |
632 | } | 494 | } |
633 | 495 | ||
634 | MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); | 496 | MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); |
@@ -638,6 +500,11 @@ MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " | |||
638 | MODULE_LICENSE("GPL"); | 500 | MODULE_LICENSE("GPL"); |
639 | 501 | ||
640 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND | 502 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND |
503 | struct cpufreq_governor *cpufreq_default_governor(void) | ||
504 | { | ||
505 | return CPU_FREQ_GOV_ONDEMAND; | ||
506 | } | ||
507 | |||
641 | fs_initcall(cpufreq_gov_dbs_init); | 508 | fs_initcall(cpufreq_gov_dbs_init); |
642 | #else | 509 | #else |
643 | module_init(cpufreq_gov_dbs_init); | 510 | module_init(cpufreq_gov_dbs_init); |
diff --git a/drivers/cpufreq/cpufreq_ondemand.h b/drivers/cpufreq/cpufreq_ondemand.h new file mode 100644 index 000000000000..f0121db3cd9e --- /dev/null +++ b/drivers/cpufreq/cpufreq_ondemand.h | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * Header file for CPUFreq ondemand governor and related code. | ||
3 | * | ||
4 | * Copyright (C) 2016, Intel Corporation | ||
5 | * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include "cpufreq_governor.h" | ||
13 | |||
14 | struct od_policy_dbs_info { | ||
15 | struct policy_dbs_info policy_dbs; | ||
16 | struct cpufreq_frequency_table *freq_table; | ||
17 | unsigned int freq_lo; | ||
18 | unsigned int freq_lo_delay_us; | ||
19 | unsigned int freq_hi_delay_us; | ||
20 | unsigned int sample_type:1; | ||
21 | }; | ||
22 | |||
23 | static inline struct od_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs) | ||
24 | { | ||
25 | return container_of(policy_dbs, struct od_policy_dbs_info, policy_dbs); | ||
26 | } | ||
27 | |||
28 | struct od_dbs_tuners { | ||
29 | unsigned int powersave_bias; | ||
30 | }; | ||
diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index cf117deb39b1..af9f4b96f5a8 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c | |||
@@ -33,10 +33,7 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy, | |||
33 | return 0; | 33 | return 0; |
34 | } | 34 | } |
35 | 35 | ||
36 | #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE | 36 | static struct cpufreq_governor cpufreq_gov_performance = { |
37 | static | ||
38 | #endif | ||
39 | struct cpufreq_governor cpufreq_gov_performance = { | ||
40 | .name = "performance", | 37 | .name = "performance", |
41 | .governor = cpufreq_governor_performance, | 38 | .governor = cpufreq_governor_performance, |
42 | .owner = THIS_MODULE, | 39 | .owner = THIS_MODULE, |
@@ -52,6 +49,19 @@ static void __exit cpufreq_gov_performance_exit(void) | |||
52 | cpufreq_unregister_governor(&cpufreq_gov_performance); | 49 | cpufreq_unregister_governor(&cpufreq_gov_performance); |
53 | } | 50 | } |
54 | 51 | ||
52 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE | ||
53 | struct cpufreq_governor *cpufreq_default_governor(void) | ||
54 | { | ||
55 | return &cpufreq_gov_performance; | ||
56 | } | ||
57 | #endif | ||
58 | #ifndef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE | ||
59 | struct cpufreq_governor *cpufreq_fallback_governor(void) | ||
60 | { | ||
61 | return &cpufreq_gov_performance; | ||
62 | } | ||
63 | #endif | ||
64 | |||
55 | MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); | 65 | MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); |
56 | MODULE_DESCRIPTION("CPUfreq policy governor 'performance'"); | 66 | MODULE_DESCRIPTION("CPUfreq policy governor 'performance'"); |
57 | MODULE_LICENSE("GPL"); | 67 | MODULE_LICENSE("GPL"); |
diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index e3b874c235ea..b8b400232a74 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c | |||
@@ -33,10 +33,7 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy, | |||
33 | return 0; | 33 | return 0; |
34 | } | 34 | } |
35 | 35 | ||
36 | #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE | 36 | static struct cpufreq_governor cpufreq_gov_powersave = { |
37 | static | ||
38 | #endif | ||
39 | struct cpufreq_governor cpufreq_gov_powersave = { | ||
40 | .name = "powersave", | 37 | .name = "powersave", |
41 | .governor = cpufreq_governor_powersave, | 38 | .governor = cpufreq_governor_powersave, |
42 | .owner = THIS_MODULE, | 39 | .owner = THIS_MODULE, |
@@ -57,6 +54,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'"); | |||
57 | MODULE_LICENSE("GPL"); | 54 | MODULE_LICENSE("GPL"); |
58 | 55 | ||
59 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE | 56 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE |
57 | struct cpufreq_governor *cpufreq_default_governor(void) | ||
58 | { | ||
59 | return &cpufreq_gov_powersave; | ||
60 | } | ||
61 | |||
60 | fs_initcall(cpufreq_gov_powersave_init); | 62 | fs_initcall(cpufreq_gov_powersave_init); |
61 | #else | 63 | #else |
62 | module_init(cpufreq_gov_powersave_init); | 64 | module_init(cpufreq_gov_powersave_init); |
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index 4dbf1db16aca..4d16f45ee1da 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c | |||
@@ -89,10 +89,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, | |||
89 | return rc; | 89 | return rc; |
90 | } | 90 | } |
91 | 91 | ||
92 | #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE | 92 | static struct cpufreq_governor cpufreq_gov_userspace = { |
93 | static | ||
94 | #endif | ||
95 | struct cpufreq_governor cpufreq_gov_userspace = { | ||
96 | .name = "userspace", | 93 | .name = "userspace", |
97 | .governor = cpufreq_governor_userspace, | 94 | .governor = cpufreq_governor_userspace, |
98 | .store_setspeed = cpufreq_set, | 95 | .store_setspeed = cpufreq_set, |
@@ -116,6 +113,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'"); | |||
116 | MODULE_LICENSE("GPL"); | 113 | MODULE_LICENSE("GPL"); |
117 | 114 | ||
118 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE | 115 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE |
116 | struct cpufreq_governor *cpufreq_default_governor(void) | ||
117 | { | ||
118 | return &cpufreq_gov_userspace; | ||
119 | } | ||
120 | |||
119 | fs_initcall(cpufreq_gov_userspace_init); | 121 | fs_initcall(cpufreq_gov_userspace_init); |
120 | #else | 122 | #else |
121 | module_init(cpufreq_gov_userspace_init); | 123 | module_init(cpufreq_gov_userspace_init); |
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index cd83d477e32d..cb5607495816 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c | |||
@@ -71,7 +71,7 @@ struct sample { | |||
71 | u64 mperf; | 71 | u64 mperf; |
72 | u64 tsc; | 72 | u64 tsc; |
73 | int freq; | 73 | int freq; |
74 | ktime_t time; | 74 | u64 time; |
75 | }; | 75 | }; |
76 | 76 | ||
77 | struct pstate_data { | 77 | struct pstate_data { |
@@ -103,13 +103,13 @@ struct _pid { | |||
103 | struct cpudata { | 103 | struct cpudata { |
104 | int cpu; | 104 | int cpu; |
105 | 105 | ||
106 | struct timer_list timer; | 106 | struct update_util_data update_util; |
107 | 107 | ||
108 | struct pstate_data pstate; | 108 | struct pstate_data pstate; |
109 | struct vid_data vid; | 109 | struct vid_data vid; |
110 | struct _pid pid; | 110 | struct _pid pid; |
111 | 111 | ||
112 | ktime_t last_sample_time; | 112 | u64 last_sample_time; |
113 | u64 prev_aperf; | 113 | u64 prev_aperf; |
114 | u64 prev_mperf; | 114 | u64 prev_mperf; |
115 | u64 prev_tsc; | 115 | u64 prev_tsc; |
@@ -120,6 +120,7 @@ struct cpudata { | |||
120 | static struct cpudata **all_cpu_data; | 120 | static struct cpudata **all_cpu_data; |
121 | struct pstate_adjust_policy { | 121 | struct pstate_adjust_policy { |
122 | int sample_rate_ms; | 122 | int sample_rate_ms; |
123 | s64 sample_rate_ns; | ||
123 | int deadband; | 124 | int deadband; |
124 | int setpoint; | 125 | int setpoint; |
125 | int p_gain_pct; | 126 | int p_gain_pct; |
@@ -197,8 +198,8 @@ static struct perf_limits *limits = &powersave_limits; | |||
197 | 198 | ||
198 | static inline void pid_reset(struct _pid *pid, int setpoint, int busy, | 199 | static inline void pid_reset(struct _pid *pid, int setpoint, int busy, |
199 | int deadband, int integral) { | 200 | int deadband, int integral) { |
200 | pid->setpoint = setpoint; | 201 | pid->setpoint = int_tofp(setpoint); |
201 | pid->deadband = deadband; | 202 | pid->deadband = int_tofp(deadband); |
202 | pid->integral = int_tofp(integral); | 203 | pid->integral = int_tofp(integral); |
203 | pid->last_err = int_tofp(setpoint) - int_tofp(busy); | 204 | pid->last_err = int_tofp(setpoint) - int_tofp(busy); |
204 | } | 205 | } |
@@ -224,9 +225,9 @@ static signed int pid_calc(struct _pid *pid, int32_t busy) | |||
224 | int32_t pterm, dterm, fp_error; | 225 | int32_t pterm, dterm, fp_error; |
225 | int32_t integral_limit; | 226 | int32_t integral_limit; |
226 | 227 | ||
227 | fp_error = int_tofp(pid->setpoint) - busy; | 228 | fp_error = pid->setpoint - busy; |
228 | 229 | ||
229 | if (abs(fp_error) <= int_tofp(pid->deadband)) | 230 | if (abs(fp_error) <= pid->deadband) |
230 | return 0; | 231 | return 0; |
231 | 232 | ||
232 | pterm = mul_fp(pid->p_gain, fp_error); | 233 | pterm = mul_fp(pid->p_gain, fp_error); |
@@ -286,7 +287,7 @@ static inline void update_turbo_state(void) | |||
286 | cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); | 287 | cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); |
287 | } | 288 | } |
288 | 289 | ||
289 | static void intel_pstate_hwp_set(void) | 290 | static void intel_pstate_hwp_set(const struct cpumask *cpumask) |
290 | { | 291 | { |
291 | int min, hw_min, max, hw_max, cpu, range, adj_range; | 292 | int min, hw_min, max, hw_max, cpu, range, adj_range; |
292 | u64 value, cap; | 293 | u64 value, cap; |
@@ -296,9 +297,7 @@ static void intel_pstate_hwp_set(void) | |||
296 | hw_max = HWP_HIGHEST_PERF(cap); | 297 | hw_max = HWP_HIGHEST_PERF(cap); |
297 | range = hw_max - hw_min; | 298 | range = hw_max - hw_min; |
298 | 299 | ||
299 | get_online_cpus(); | 300 | for_each_cpu(cpu, cpumask) { |
300 | |||
301 | for_each_online_cpu(cpu) { | ||
302 | rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); | 301 | rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); |
303 | adj_range = limits->min_perf_pct * range / 100; | 302 | adj_range = limits->min_perf_pct * range / 100; |
304 | min = hw_min + adj_range; | 303 | min = hw_min + adj_range; |
@@ -317,7 +316,12 @@ static void intel_pstate_hwp_set(void) | |||
317 | value |= HWP_MAX_PERF(max); | 316 | value |= HWP_MAX_PERF(max); |
318 | wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); | 317 | wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); |
319 | } | 318 | } |
319 | } | ||
320 | 320 | ||
321 | static void intel_pstate_hwp_set_online_cpus(void) | ||
322 | { | ||
323 | get_online_cpus(); | ||
324 | intel_pstate_hwp_set(cpu_online_mask); | ||
321 | put_online_cpus(); | 325 | put_online_cpus(); |
322 | } | 326 | } |
323 | 327 | ||
@@ -439,7 +443,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, | |||
439 | limits->no_turbo = clamp_t(int, input, 0, 1); | 443 | limits->no_turbo = clamp_t(int, input, 0, 1); |
440 | 444 | ||
441 | if (hwp_active) | 445 | if (hwp_active) |
442 | intel_pstate_hwp_set(); | 446 | intel_pstate_hwp_set_online_cpus(); |
443 | 447 | ||
444 | return count; | 448 | return count; |
445 | } | 449 | } |
@@ -465,7 +469,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, | |||
465 | int_tofp(100)); | 469 | int_tofp(100)); |
466 | 470 | ||
467 | if (hwp_active) | 471 | if (hwp_active) |
468 | intel_pstate_hwp_set(); | 472 | intel_pstate_hwp_set_online_cpus(); |
469 | return count; | 473 | return count; |
470 | } | 474 | } |
471 | 475 | ||
@@ -490,7 +494,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, | |||
490 | int_tofp(100)); | 494 | int_tofp(100)); |
491 | 495 | ||
492 | if (hwp_active) | 496 | if (hwp_active) |
493 | intel_pstate_hwp_set(); | 497 | intel_pstate_hwp_set_online_cpus(); |
494 | return count; | 498 | return count; |
495 | } | 499 | } |
496 | 500 | ||
@@ -531,6 +535,9 @@ static void __init intel_pstate_sysfs_expose_params(void) | |||
531 | 535 | ||
532 | static void intel_pstate_hwp_enable(struct cpudata *cpudata) | 536 | static void intel_pstate_hwp_enable(struct cpudata *cpudata) |
533 | { | 537 | { |
538 | /* First disable HWP notification interrupt as we don't process them */ | ||
539 | wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); | ||
540 | |||
534 | wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); | 541 | wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); |
535 | } | 542 | } |
536 | 543 | ||
@@ -712,7 +719,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate) | |||
712 | if (limits->no_turbo && !limits->turbo_disabled) | 719 | if (limits->no_turbo && !limits->turbo_disabled) |
713 | val |= (u64)1 << 32; | 720 | val |= (u64)1 << 32; |
714 | 721 | ||
715 | wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); | 722 | wrmsrl(MSR_IA32_PERF_CTL, val); |
716 | } | 723 | } |
717 | 724 | ||
718 | static int knl_get_turbo_pstate(void) | 725 | static int knl_get_turbo_pstate(void) |
@@ -824,11 +831,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) | |||
824 | * policy, or by cpu specific default values determined through | 831 | * policy, or by cpu specific default values determined through |
825 | * experimentation. | 832 | * experimentation. |
826 | */ | 833 | */ |
827 | max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf)); | 834 | max_perf_adj = fp_toint(max_perf * limits->max_perf); |
828 | *max = clamp_t(int, max_perf_adj, | 835 | *max = clamp_t(int, max_perf_adj, |
829 | cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); | 836 | cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); |
830 | 837 | ||
831 | min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf)); | 838 | min_perf = fp_toint(max_perf * limits->min_perf); |
832 | *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); | 839 | *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); |
833 | } | 840 | } |
834 | 841 | ||
@@ -874,16 +881,10 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) | |||
874 | core_pct = int_tofp(sample->aperf) * int_tofp(100); | 881 | core_pct = int_tofp(sample->aperf) * int_tofp(100); |
875 | core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); | 882 | core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); |
876 | 883 | ||
877 | sample->freq = fp_toint( | ||
878 | mul_fp(int_tofp( | ||
879 | cpu->pstate.max_pstate_physical * | ||
880 | cpu->pstate.scaling / 100), | ||
881 | core_pct)); | ||
882 | |||
883 | sample->core_pct_busy = (int32_t)core_pct; | 884 | sample->core_pct_busy = (int32_t)core_pct; |
884 | } | 885 | } |
885 | 886 | ||
886 | static inline void intel_pstate_sample(struct cpudata *cpu) | 887 | static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) |
887 | { | 888 | { |
888 | u64 aperf, mperf; | 889 | u64 aperf, mperf; |
889 | unsigned long flags; | 890 | unsigned long flags; |
@@ -893,14 +894,14 @@ static inline void intel_pstate_sample(struct cpudata *cpu) | |||
893 | rdmsrl(MSR_IA32_APERF, aperf); | 894 | rdmsrl(MSR_IA32_APERF, aperf); |
894 | rdmsrl(MSR_IA32_MPERF, mperf); | 895 | rdmsrl(MSR_IA32_MPERF, mperf); |
895 | tsc = rdtsc(); | 896 | tsc = rdtsc(); |
896 | if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) { | 897 | if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) { |
897 | local_irq_restore(flags); | 898 | local_irq_restore(flags); |
898 | return; | 899 | return false; |
899 | } | 900 | } |
900 | local_irq_restore(flags); | 901 | local_irq_restore(flags); |
901 | 902 | ||
902 | cpu->last_sample_time = cpu->sample.time; | 903 | cpu->last_sample_time = cpu->sample.time; |
903 | cpu->sample.time = ktime_get(); | 904 | cpu->sample.time = time; |
904 | cpu->sample.aperf = aperf; | 905 | cpu->sample.aperf = aperf; |
905 | cpu->sample.mperf = mperf; | 906 | cpu->sample.mperf = mperf; |
906 | cpu->sample.tsc = tsc; | 907 | cpu->sample.tsc = tsc; |
@@ -908,27 +909,16 @@ static inline void intel_pstate_sample(struct cpudata *cpu) | |||
908 | cpu->sample.mperf -= cpu->prev_mperf; | 909 | cpu->sample.mperf -= cpu->prev_mperf; |
909 | cpu->sample.tsc -= cpu->prev_tsc; | 910 | cpu->sample.tsc -= cpu->prev_tsc; |
910 | 911 | ||
911 | intel_pstate_calc_busy(cpu); | ||
912 | |||
913 | cpu->prev_aperf = aperf; | 912 | cpu->prev_aperf = aperf; |
914 | cpu->prev_mperf = mperf; | 913 | cpu->prev_mperf = mperf; |
915 | cpu->prev_tsc = tsc; | 914 | cpu->prev_tsc = tsc; |
915 | return true; | ||
916 | } | 916 | } |
917 | 917 | ||
918 | static inline void intel_hwp_set_sample_time(struct cpudata *cpu) | 918 | static inline int32_t get_avg_frequency(struct cpudata *cpu) |
919 | { | ||
920 | int delay; | ||
921 | |||
922 | delay = msecs_to_jiffies(50); | ||
923 | mod_timer_pinned(&cpu->timer, jiffies + delay); | ||
924 | } | ||
925 | |||
926 | static inline void intel_pstate_set_sample_time(struct cpudata *cpu) | ||
927 | { | 919 | { |
928 | int delay; | 920 | return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf * |
929 | 921 | cpu->pstate.scaling, cpu->sample.mperf); | |
930 | delay = msecs_to_jiffies(pid_params.sample_rate_ms); | ||
931 | mod_timer_pinned(&cpu->timer, jiffies + delay); | ||
932 | } | 922 | } |
933 | 923 | ||
934 | static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) | 924 | static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) |
@@ -954,7 +944,6 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) | |||
954 | mperf = cpu->sample.mperf + delta_iowait_mperf; | 944 | mperf = cpu->sample.mperf + delta_iowait_mperf; |
955 | cpu->prev_cummulative_iowait = cummulative_iowait; | 945 | cpu->prev_cummulative_iowait = cummulative_iowait; |
956 | 946 | ||
957 | |||
958 | /* | 947 | /* |
959 | * The load can be estimated as the ratio of the mperf counter | 948 | * The load can be estimated as the ratio of the mperf counter |
960 | * running at a constant frequency during active periods | 949 | * running at a constant frequency during active periods |
@@ -970,8 +959,9 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) | |||
970 | static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) | 959 | static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) |
971 | { | 960 | { |
972 | int32_t core_busy, max_pstate, current_pstate, sample_ratio; | 961 | int32_t core_busy, max_pstate, current_pstate, sample_ratio; |
973 | s64 duration_us; | 962 | u64 duration_ns; |
974 | u32 sample_time; | 963 | |
964 | intel_pstate_calc_busy(cpu); | ||
975 | 965 | ||
976 | /* | 966 | /* |
977 | * core_busy is the ratio of actual performance to max | 967 | * core_busy is the ratio of actual performance to max |
@@ -990,18 +980,16 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) | |||
990 | core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); | 980 | core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); |
991 | 981 | ||
992 | /* | 982 | /* |
993 | * Since we have a deferred timer, it will not fire unless | 983 | * Since our utilization update callback will not run unless we are |
994 | * we are in C0. So, determine if the actual elapsed time | 984 | * in C0, check if the actual elapsed time is significantly greater (3x) |
995 | * is significantly greater (3x) than our sample interval. If it | 985 | * than our sample interval. If it is, then we were idle for a long |
996 | * is, then we were idle for a long enough period of time | 986 | * enough period of time to adjust our busyness. |
997 | * to adjust our busyness. | ||
998 | */ | 987 | */ |
999 | sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; | 988 | duration_ns = cpu->sample.time - cpu->last_sample_time; |
1000 | duration_us = ktime_us_delta(cpu->sample.time, | 989 | if ((s64)duration_ns > pid_params.sample_rate_ns * 3 |
1001 | cpu->last_sample_time); | 990 | && cpu->last_sample_time > 0) { |
1002 | if (duration_us > sample_time * 3) { | 991 | sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns), |
1003 | sample_ratio = div_fp(int_tofp(sample_time), | 992 | int_tofp(duration_ns)); |
1004 | int_tofp(duration_us)); | ||
1005 | core_busy = mul_fp(core_busy, sample_ratio); | 993 | core_busy = mul_fp(core_busy, sample_ratio); |
1006 | } | 994 | } |
1007 | 995 | ||
@@ -1028,26 +1016,21 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) | |||
1028 | sample->mperf, | 1016 | sample->mperf, |
1029 | sample->aperf, | 1017 | sample->aperf, |
1030 | sample->tsc, | 1018 | sample->tsc, |
1031 | sample->freq); | 1019 | get_avg_frequency(cpu)); |
1032 | } | 1020 | } |
1033 | 1021 | ||
1034 | static void intel_hwp_timer_func(unsigned long __data) | 1022 | static void intel_pstate_update_util(struct update_util_data *data, u64 time, |
1023 | unsigned long util, unsigned long max) | ||
1035 | { | 1024 | { |
1036 | struct cpudata *cpu = (struct cpudata *) __data; | 1025 | struct cpudata *cpu = container_of(data, struct cpudata, update_util); |
1026 | u64 delta_ns = time - cpu->sample.time; | ||
1037 | 1027 | ||
1038 | intel_pstate_sample(cpu); | 1028 | if ((s64)delta_ns >= pid_params.sample_rate_ns) { |
1039 | intel_hwp_set_sample_time(cpu); | 1029 | bool sample_taken = intel_pstate_sample(cpu, time); |
1040 | } | ||
1041 | 1030 | ||
1042 | static void intel_pstate_timer_func(unsigned long __data) | 1031 | if (sample_taken && !hwp_active) |
1043 | { | 1032 | intel_pstate_adjust_busy_pstate(cpu); |
1044 | struct cpudata *cpu = (struct cpudata *) __data; | 1033 | } |
1045 | |||
1046 | intel_pstate_sample(cpu); | ||
1047 | |||
1048 | intel_pstate_adjust_busy_pstate(cpu); | ||
1049 | |||
1050 | intel_pstate_set_sample_time(cpu); | ||
1051 | } | 1034 | } |
1052 | 1035 | ||
1053 | #define ICPU(model, policy) \ | 1036 | #define ICPU(model, policy) \ |
@@ -1095,24 +1078,19 @@ static int intel_pstate_init_cpu(unsigned int cpunum) | |||
1095 | 1078 | ||
1096 | cpu->cpu = cpunum; | 1079 | cpu->cpu = cpunum; |
1097 | 1080 | ||
1098 | if (hwp_active) | 1081 | if (hwp_active) { |
1099 | intel_pstate_hwp_enable(cpu); | 1082 | intel_pstate_hwp_enable(cpu); |
1083 | pid_params.sample_rate_ms = 50; | ||
1084 | pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC; | ||
1085 | } | ||
1100 | 1086 | ||
1101 | intel_pstate_get_cpu_pstates(cpu); | 1087 | intel_pstate_get_cpu_pstates(cpu); |
1102 | 1088 | ||
1103 | init_timer_deferrable(&cpu->timer); | ||
1104 | cpu->timer.data = (unsigned long)cpu; | ||
1105 | cpu->timer.expires = jiffies + HZ/100; | ||
1106 | |||
1107 | if (!hwp_active) | ||
1108 | cpu->timer.function = intel_pstate_timer_func; | ||
1109 | else | ||
1110 | cpu->timer.function = intel_hwp_timer_func; | ||
1111 | |||
1112 | intel_pstate_busy_pid_reset(cpu); | 1089 | intel_pstate_busy_pid_reset(cpu); |
1113 | intel_pstate_sample(cpu); | 1090 | intel_pstate_sample(cpu, 0); |
1114 | 1091 | ||
1115 | add_timer_on(&cpu->timer, cpunum); | 1092 | cpu->update_util.func = intel_pstate_update_util; |
1093 | cpufreq_set_update_util_data(cpunum, &cpu->update_util); | ||
1116 | 1094 | ||
1117 | pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); | 1095 | pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); |
1118 | 1096 | ||
@@ -1128,7 +1106,7 @@ static unsigned int intel_pstate_get(unsigned int cpu_num) | |||
1128 | if (!cpu) | 1106 | if (!cpu) |
1129 | return 0; | 1107 | return 0; |
1130 | sample = &cpu->sample; | 1108 | sample = &cpu->sample; |
1131 | return sample->freq; | 1109 | return get_avg_frequency(cpu); |
1132 | } | 1110 | } |
1133 | 1111 | ||
1134 | static int intel_pstate_set_policy(struct cpufreq_policy *policy) | 1112 | static int intel_pstate_set_policy(struct cpufreq_policy *policy) |
@@ -1141,7 +1119,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) | |||
1141 | pr_debug("intel_pstate: set performance\n"); | 1119 | pr_debug("intel_pstate: set performance\n"); |
1142 | limits = &performance_limits; | 1120 | limits = &performance_limits; |
1143 | if (hwp_active) | 1121 | if (hwp_active) |
1144 | intel_pstate_hwp_set(); | 1122 | intel_pstate_hwp_set(policy->cpus); |
1145 | return 0; | 1123 | return 0; |
1146 | } | 1124 | } |
1147 | 1125 | ||
@@ -1173,7 +1151,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) | |||
1173 | int_tofp(100)); | 1151 | int_tofp(100)); |
1174 | 1152 | ||
1175 | if (hwp_active) | 1153 | if (hwp_active) |
1176 | intel_pstate_hwp_set(); | 1154 | intel_pstate_hwp_set(policy->cpus); |
1177 | 1155 | ||
1178 | return 0; | 1156 | return 0; |
1179 | } | 1157 | } |
@@ -1196,7 +1174,9 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) | |||
1196 | 1174 | ||
1197 | pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); | 1175 | pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); |
1198 | 1176 | ||
1199 | del_timer_sync(&all_cpu_data[cpu_num]->timer); | 1177 | cpufreq_set_update_util_data(cpu_num, NULL); |
1178 | synchronize_sched(); | ||
1179 | |||
1200 | if (hwp_active) | 1180 | if (hwp_active) |
1201 | return; | 1181 | return; |
1202 | 1182 | ||
@@ -1260,6 +1240,7 @@ static int intel_pstate_msrs_not_valid(void) | |||
1260 | static void copy_pid_params(struct pstate_adjust_policy *policy) | 1240 | static void copy_pid_params(struct pstate_adjust_policy *policy) |
1261 | { | 1241 | { |
1262 | pid_params.sample_rate_ms = policy->sample_rate_ms; | 1242 | pid_params.sample_rate_ms = policy->sample_rate_ms; |
1243 | pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; | ||
1263 | pid_params.p_gain_pct = policy->p_gain_pct; | 1244 | pid_params.p_gain_pct = policy->p_gain_pct; |
1264 | pid_params.i_gain_pct = policy->i_gain_pct; | 1245 | pid_params.i_gain_pct = policy->i_gain_pct; |
1265 | pid_params.d_gain_pct = policy->d_gain_pct; | 1246 | pid_params.d_gain_pct = policy->d_gain_pct; |
@@ -1397,6 +1378,11 @@ static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } | |||
1397 | static inline bool intel_pstate_has_acpi_ppc(void) { return false; } | 1378 | static inline bool intel_pstate_has_acpi_ppc(void) { return false; } |
1398 | #endif /* CONFIG_ACPI */ | 1379 | #endif /* CONFIG_ACPI */ |
1399 | 1380 | ||
1381 | static const struct x86_cpu_id hwp_support_ids[] __initconst = { | ||
1382 | { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP }, | ||
1383 | {} | ||
1384 | }; | ||
1385 | |||
1400 | static int __init intel_pstate_init(void) | 1386 | static int __init intel_pstate_init(void) |
1401 | { | 1387 | { |
1402 | int cpu, rc = 0; | 1388 | int cpu, rc = 0; |
@@ -1406,17 +1392,16 @@ static int __init intel_pstate_init(void) | |||
1406 | if (no_load) | 1392 | if (no_load) |
1407 | return -ENODEV; | 1393 | return -ENODEV; |
1408 | 1394 | ||
1395 | if (x86_match_cpu(hwp_support_ids) && !no_hwp) { | ||
1396 | copy_cpu_funcs(&core_params.funcs); | ||
1397 | hwp_active++; | ||
1398 | goto hwp_cpu_matched; | ||
1399 | } | ||
1400 | |||
1409 | id = x86_match_cpu(intel_pstate_cpu_ids); | 1401 | id = x86_match_cpu(intel_pstate_cpu_ids); |
1410 | if (!id) | 1402 | if (!id) |
1411 | return -ENODEV; | 1403 | return -ENODEV; |
1412 | 1404 | ||
1413 | /* | ||
1414 | * The Intel pstate driver will be ignored if the platform | ||
1415 | * firmware has its own power management modes. | ||
1416 | */ | ||
1417 | if (intel_pstate_platform_pwr_mgmt_exists()) | ||
1418 | return -ENODEV; | ||
1419 | |||
1420 | cpu_def = (struct cpu_defaults *)id->driver_data; | 1405 | cpu_def = (struct cpu_defaults *)id->driver_data; |
1421 | 1406 | ||
1422 | copy_pid_params(&cpu_def->pid_policy); | 1407 | copy_pid_params(&cpu_def->pid_policy); |
@@ -1425,17 +1410,20 @@ static int __init intel_pstate_init(void) | |||
1425 | if (intel_pstate_msrs_not_valid()) | 1410 | if (intel_pstate_msrs_not_valid()) |
1426 | return -ENODEV; | 1411 | return -ENODEV; |
1427 | 1412 | ||
1413 | hwp_cpu_matched: | ||
1414 | /* | ||
1415 | * The Intel pstate driver will be ignored if the platform | ||
1416 | * firmware has its own power management modes. | ||
1417 | */ | ||
1418 | if (intel_pstate_platform_pwr_mgmt_exists()) | ||
1419 | return -ENODEV; | ||
1420 | |||
1428 | pr_info("Intel P-state driver initializing.\n"); | 1421 | pr_info("Intel P-state driver initializing.\n"); |
1429 | 1422 | ||
1430 | all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); | 1423 | all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); |
1431 | if (!all_cpu_data) | 1424 | if (!all_cpu_data) |
1432 | return -ENOMEM; | 1425 | return -ENOMEM; |
1433 | 1426 | ||
1434 | if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) { | ||
1435 | pr_info("intel_pstate: HWP enabled\n"); | ||
1436 | hwp_active++; | ||
1437 | } | ||
1438 | |||
1439 | if (!hwp_active && hwp_only) | 1427 | if (!hwp_active && hwp_only) |
1440 | goto out; | 1428 | goto out; |
1441 | 1429 | ||
@@ -1446,12 +1434,16 @@ static int __init intel_pstate_init(void) | |||
1446 | intel_pstate_debug_expose_params(); | 1434 | intel_pstate_debug_expose_params(); |
1447 | intel_pstate_sysfs_expose_params(); | 1435 | intel_pstate_sysfs_expose_params(); |
1448 | 1436 | ||
1437 | if (hwp_active) | ||
1438 | pr_info("intel_pstate: HWP enabled\n"); | ||
1439 | |||
1449 | return rc; | 1440 | return rc; |
1450 | out: | 1441 | out: |
1451 | get_online_cpus(); | 1442 | get_online_cpus(); |
1452 | for_each_online_cpu(cpu) { | 1443 | for_each_online_cpu(cpu) { |
1453 | if (all_cpu_data[cpu]) { | 1444 | if (all_cpu_data[cpu]) { |
1454 | del_timer_sync(&all_cpu_data[cpu]->timer); | 1445 | cpufreq_set_update_util_data(cpu, NULL); |
1446 | synchronize_sched(); | ||
1455 | kfree(all_cpu_data[cpu]); | 1447 | kfree(all_cpu_data[cpu]); |
1456 | } | 1448 | } |
1457 | } | 1449 | } |
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 547890fd9572..50bf12033bbc 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c | |||
@@ -28,6 +28,8 @@ | |||
28 | #include <linux/of.h> | 28 | #include <linux/of.h> |
29 | #include <linux/reboot.h> | 29 | #include <linux/reboot.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/cpu.h> | ||
32 | #include <trace/events/power.h> | ||
31 | 33 | ||
32 | #include <asm/cputhreads.h> | 34 | #include <asm/cputhreads.h> |
33 | #include <asm/firmware.h> | 35 | #include <asm/firmware.h> |
@@ -42,13 +44,24 @@ | |||
42 | 44 | ||
43 | static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; | 45 | static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; |
44 | static bool rebooting, throttled, occ_reset; | 46 | static bool rebooting, throttled, occ_reset; |
47 | static unsigned int *core_to_chip_map; | ||
48 | |||
49 | static const char * const throttle_reason[] = { | ||
50 | "No throttling", | ||
51 | "Power Cap", | ||
52 | "Processor Over Temperature", | ||
53 | "Power Supply Failure", | ||
54 | "Over Current", | ||
55 | "OCC Reset" | ||
56 | }; | ||
45 | 57 | ||
46 | static struct chip { | 58 | static struct chip { |
47 | unsigned int id; | 59 | unsigned int id; |
48 | bool throttled; | 60 | bool throttled; |
61 | bool restore; | ||
62 | u8 throttle_reason; | ||
49 | cpumask_t mask; | 63 | cpumask_t mask; |
50 | struct work_struct throttle; | 64 | struct work_struct throttle; |
51 | bool restore; | ||
52 | } *chips; | 65 | } *chips; |
53 | 66 | ||
54 | static int nr_chips; | 67 | static int nr_chips; |
@@ -312,13 +325,14 @@ static inline unsigned int get_nominal_index(void) | |||
312 | static void powernv_cpufreq_throttle_check(void *data) | 325 | static void powernv_cpufreq_throttle_check(void *data) |
313 | { | 326 | { |
314 | unsigned int cpu = smp_processor_id(); | 327 | unsigned int cpu = smp_processor_id(); |
328 | unsigned int chip_id = core_to_chip_map[cpu_core_index_of_thread(cpu)]; | ||
315 | unsigned long pmsr; | 329 | unsigned long pmsr; |
316 | int pmsr_pmax, i; | 330 | int pmsr_pmax, i; |
317 | 331 | ||
318 | pmsr = get_pmspr(SPRN_PMSR); | 332 | pmsr = get_pmspr(SPRN_PMSR); |
319 | 333 | ||
320 | for (i = 0; i < nr_chips; i++) | 334 | for (i = 0; i < nr_chips; i++) |
321 | if (chips[i].id == cpu_to_chip_id(cpu)) | 335 | if (chips[i].id == chip_id) |
322 | break; | 336 | break; |
323 | 337 | ||
324 | /* Check for Pmax Capping */ | 338 | /* Check for Pmax Capping */ |
@@ -328,17 +342,17 @@ static void powernv_cpufreq_throttle_check(void *data) | |||
328 | goto next; | 342 | goto next; |
329 | chips[i].throttled = true; | 343 | chips[i].throttled = true; |
330 | if (pmsr_pmax < powernv_pstate_info.nominal) | 344 | if (pmsr_pmax < powernv_pstate_info.nominal) |
331 | pr_crit("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", | 345 | pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", |
332 | cpu, chips[i].id, pmsr_pmax, | 346 | cpu, chips[i].id, pmsr_pmax, |
333 | powernv_pstate_info.nominal); | 347 | powernv_pstate_info.nominal); |
334 | else | 348 | trace_powernv_throttle(chips[i].id, |
335 | pr_info("CPU %d on Chip %u has Pmax reduced below turbo frequency (%d < %d)\n", | 349 | throttle_reason[chips[i].throttle_reason], |
336 | cpu, chips[i].id, pmsr_pmax, | 350 | pmsr_pmax); |
337 | powernv_pstate_info.max); | ||
338 | } else if (chips[i].throttled) { | 351 | } else if (chips[i].throttled) { |
339 | chips[i].throttled = false; | 352 | chips[i].throttled = false; |
340 | pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu, | 353 | trace_powernv_throttle(chips[i].id, |
341 | chips[i].id, pmsr_pmax); | 354 | throttle_reason[chips[i].throttle_reason], |
355 | pmsr_pmax); | ||
342 | } | 356 | } |
343 | 357 | ||
344 | /* Check if Psafe_mode_active is set in PMSR. */ | 358 | /* Check if Psafe_mode_active is set in PMSR. */ |
@@ -356,7 +370,7 @@ next: | |||
356 | 370 | ||
357 | if (throttled) { | 371 | if (throttled) { |
358 | pr_info("PMSR = %16lx\n", pmsr); | 372 | pr_info("PMSR = %16lx\n", pmsr); |
359 | pr_crit("CPU Frequency could be throttled\n"); | 373 | pr_warn("CPU Frequency could be throttled\n"); |
360 | } | 374 | } |
361 | } | 375 | } |
362 | 376 | ||
@@ -423,18 +437,19 @@ void powernv_cpufreq_work_fn(struct work_struct *work) | |||
423 | { | 437 | { |
424 | struct chip *chip = container_of(work, struct chip, throttle); | 438 | struct chip *chip = container_of(work, struct chip, throttle); |
425 | unsigned int cpu; | 439 | unsigned int cpu; |
426 | cpumask_var_t mask; | 440 | cpumask_t mask; |
427 | 441 | ||
428 | smp_call_function_any(&chip->mask, | 442 | get_online_cpus(); |
443 | cpumask_and(&mask, &chip->mask, cpu_online_mask); | ||
444 | smp_call_function_any(&mask, | ||
429 | powernv_cpufreq_throttle_check, NULL, 0); | 445 | powernv_cpufreq_throttle_check, NULL, 0); |
430 | 446 | ||
431 | if (!chip->restore) | 447 | if (!chip->restore) |
432 | return; | 448 | goto out; |
433 | 449 | ||
434 | chip->restore = false; | 450 | chip->restore = false; |
435 | cpumask_copy(mask, &chip->mask); | 451 | for_each_cpu(cpu, &mask) { |
436 | for_each_cpu_and(cpu, mask, cpu_online_mask) { | 452 | int index; |
437 | int index, tcpu; | ||
438 | struct cpufreq_policy policy; | 453 | struct cpufreq_policy policy; |
439 | 454 | ||
440 | cpufreq_get_policy(&policy, cpu); | 455 | cpufreq_get_policy(&policy, cpu); |
@@ -442,20 +457,12 @@ void powernv_cpufreq_work_fn(struct work_struct *work) | |||
442 | policy.cur, | 457 | policy.cur, |
443 | CPUFREQ_RELATION_C, &index); | 458 | CPUFREQ_RELATION_C, &index); |
444 | powernv_cpufreq_target_index(&policy, index); | 459 | powernv_cpufreq_target_index(&policy, index); |
445 | for_each_cpu(tcpu, policy.cpus) | 460 | cpumask_andnot(&mask, &mask, policy.cpus); |
446 | cpumask_clear_cpu(tcpu, mask); | ||
447 | } | 461 | } |
462 | out: | ||
463 | put_online_cpus(); | ||
448 | } | 464 | } |
449 | 465 | ||
450 | static char throttle_reason[][30] = { | ||
451 | "No throttling", | ||
452 | "Power Cap", | ||
453 | "Processor Over Temperature", | ||
454 | "Power Supply Failure", | ||
455 | "Over Current", | ||
456 | "OCC Reset" | ||
457 | }; | ||
458 | |||
459 | static int powernv_cpufreq_occ_msg(struct notifier_block *nb, | 466 | static int powernv_cpufreq_occ_msg(struct notifier_block *nb, |
460 | unsigned long msg_type, void *_msg) | 467 | unsigned long msg_type, void *_msg) |
461 | { | 468 | { |
@@ -481,7 +488,7 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, | |||
481 | */ | 488 | */ |
482 | if (!throttled) { | 489 | if (!throttled) { |
483 | throttled = true; | 490 | throttled = true; |
484 | pr_crit("CPU frequency is throttled for duration\n"); | 491 | pr_warn("CPU frequency is throttled for duration\n"); |
485 | } | 492 | } |
486 | 493 | ||
487 | break; | 494 | break; |
@@ -505,23 +512,18 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, | |||
505 | return 0; | 512 | return 0; |
506 | } | 513 | } |
507 | 514 | ||
508 | if (omsg.throttle_status && | 515 | for (i = 0; i < nr_chips; i++) |
516 | if (chips[i].id == omsg.chip) | ||
517 | break; | ||
518 | |||
519 | if (omsg.throttle_status >= 0 && | ||
509 | omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) | 520 | omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) |
510 | pr_info("OCC: Chip %u Pmax reduced due to %s\n", | 521 | chips[i].throttle_reason = omsg.throttle_status; |
511 | (unsigned int)omsg.chip, | ||
512 | throttle_reason[omsg.throttle_status]); | ||
513 | else if (!omsg.throttle_status) | ||
514 | pr_info("OCC: Chip %u %s\n", (unsigned int)omsg.chip, | ||
515 | throttle_reason[omsg.throttle_status]); | ||
516 | else | ||
517 | return 0; | ||
518 | 522 | ||
519 | for (i = 0; i < nr_chips; i++) | 523 | if (!omsg.throttle_status) |
520 | if (chips[i].id == omsg.chip) { | 524 | chips[i].restore = true; |
521 | if (!omsg.throttle_status) | 525 | |
522 | chips[i].restore = true; | 526 | schedule_work(&chips[i].throttle); |
523 | schedule_work(&chips[i].throttle); | ||
524 | } | ||
525 | } | 527 | } |
526 | return 0; | 528 | return 0; |
527 | } | 529 | } |
@@ -556,29 +558,54 @@ static int init_chip_info(void) | |||
556 | unsigned int chip[256]; | 558 | unsigned int chip[256]; |
557 | unsigned int cpu, i; | 559 | unsigned int cpu, i; |
558 | unsigned int prev_chip_id = UINT_MAX; | 560 | unsigned int prev_chip_id = UINT_MAX; |
561 | cpumask_t cpu_mask; | ||
562 | int ret = -ENOMEM; | ||
563 | |||
564 | core_to_chip_map = kcalloc(cpu_nr_cores(), sizeof(unsigned int), | ||
565 | GFP_KERNEL); | ||
566 | if (!core_to_chip_map) | ||
567 | goto out; | ||
559 | 568 | ||
560 | for_each_possible_cpu(cpu) { | 569 | cpumask_copy(&cpu_mask, cpu_possible_mask); |
570 | for_each_cpu(cpu, &cpu_mask) { | ||
561 | unsigned int id = cpu_to_chip_id(cpu); | 571 | unsigned int id = cpu_to_chip_id(cpu); |
562 | 572 | ||
563 | if (prev_chip_id != id) { | 573 | if (prev_chip_id != id) { |
564 | prev_chip_id = id; | 574 | prev_chip_id = id; |
565 | chip[nr_chips++] = id; | 575 | chip[nr_chips++] = id; |
566 | } | 576 | } |
577 | core_to_chip_map[cpu_core_index_of_thread(cpu)] = id; | ||
578 | cpumask_andnot(&cpu_mask, &cpu_mask, cpu_sibling_mask(cpu)); | ||
567 | } | 579 | } |
568 | 580 | ||
569 | chips = kmalloc_array(nr_chips, sizeof(struct chip), GFP_KERNEL); | 581 | chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL); |
570 | if (!chips) | 582 | if (!chips) |
571 | return -ENOMEM; | 583 | goto free_chip_map; |
572 | 584 | ||
573 | for (i = 0; i < nr_chips; i++) { | 585 | for (i = 0; i < nr_chips; i++) { |
574 | chips[i].id = chip[i]; | 586 | chips[i].id = chip[i]; |
575 | chips[i].throttled = false; | ||
576 | cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); | 587 | cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); |
577 | INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); | 588 | INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); |
578 | chips[i].restore = false; | ||
579 | } | 589 | } |
580 | 590 | ||
581 | return 0; | 591 | return 0; |
592 | free_chip_map: | ||
593 | kfree(core_to_chip_map); | ||
594 | out: | ||
595 | return ret; | ||
596 | } | ||
597 | |||
598 | static inline void clean_chip_info(void) | ||
599 | { | ||
600 | kfree(chips); | ||
601 | kfree(core_to_chip_map); | ||
602 | } | ||
603 | |||
604 | static inline void unregister_all_notifiers(void) | ||
605 | { | ||
606 | opal_message_notifier_unregister(OPAL_MSG_OCC, | ||
607 | &powernv_cpufreq_opal_nb); | ||
608 | unregister_reboot_notifier(&powernv_cpufreq_reboot_nb); | ||
582 | } | 609 | } |
583 | 610 | ||
584 | static int __init powernv_cpufreq_init(void) | 611 | static int __init powernv_cpufreq_init(void) |
@@ -591,28 +618,35 @@ static int __init powernv_cpufreq_init(void) | |||
591 | 618 | ||
592 | /* Discover pstates from device tree and init */ | 619 | /* Discover pstates from device tree and init */ |
593 | rc = init_powernv_pstates(); | 620 | rc = init_powernv_pstates(); |
594 | if (rc) { | 621 | if (rc) |
595 | pr_info("powernv-cpufreq disabled. System does not support PState control\n"); | 622 | goto out; |
596 | return rc; | ||
597 | } | ||
598 | 623 | ||
599 | /* Populate chip info */ | 624 | /* Populate chip info */ |
600 | rc = init_chip_info(); | 625 | rc = init_chip_info(); |
601 | if (rc) | 626 | if (rc) |
602 | return rc; | 627 | goto out; |
603 | 628 | ||
604 | register_reboot_notifier(&powernv_cpufreq_reboot_nb); | 629 | register_reboot_notifier(&powernv_cpufreq_reboot_nb); |
605 | opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); | 630 | opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); |
606 | return cpufreq_register_driver(&powernv_cpufreq_driver); | 631 | |
632 | rc = cpufreq_register_driver(&powernv_cpufreq_driver); | ||
633 | if (!rc) | ||
634 | return 0; | ||
635 | |||
636 | pr_info("Failed to register the cpufreq driver (%d)\n", rc); | ||
637 | unregister_all_notifiers(); | ||
638 | clean_chip_info(); | ||
639 | out: | ||
640 | pr_info("Platform driver disabled. System does not support PState control\n"); | ||
641 | return rc; | ||
607 | } | 642 | } |
608 | module_init(powernv_cpufreq_init); | 643 | module_init(powernv_cpufreq_init); |
609 | 644 | ||
610 | static void __exit powernv_cpufreq_exit(void) | 645 | static void __exit powernv_cpufreq_exit(void) |
611 | { | 646 | { |
612 | unregister_reboot_notifier(&powernv_cpufreq_reboot_nb); | ||
613 | opal_message_notifier_unregister(OPAL_MSG_OCC, | ||
614 | &powernv_cpufreq_opal_nb); | ||
615 | cpufreq_unregister_driver(&powernv_cpufreq_driver); | 647 | cpufreq_unregister_driver(&powernv_cpufreq_driver); |
648 | unregister_all_notifiers(); | ||
649 | clean_chip_info(); | ||
616 | } | 650 | } |
617 | module_exit(powernv_cpufreq_exit); | 651 | module_exit(powernv_cpufreq_exit); |
618 | 652 | ||
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 88a4215125bc..718e8725de8a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h | |||
@@ -80,7 +80,6 @@ struct cpufreq_policy { | |||
80 | unsigned int last_policy; /* policy before unplug */ | 80 | unsigned int last_policy; /* policy before unplug */ |
81 | struct cpufreq_governor *governor; /* see below */ | 81 | struct cpufreq_governor *governor; /* see below */ |
82 | void *governor_data; | 82 | void *governor_data; |
83 | bool governor_enabled; /* governor start/stop flag */ | ||
84 | char last_governor[CPUFREQ_NAME_LEN]; /* last governor used */ | 83 | char last_governor[CPUFREQ_NAME_LEN]; /* last governor used */ |
85 | 84 | ||
86 | struct work_struct update; /* if update_policy() needs to be | 85 | struct work_struct update; /* if update_policy() needs to be |
@@ -100,10 +99,6 @@ struct cpufreq_policy { | |||
100 | * - Any routine that will write to the policy structure and/or may take away | 99 | * - Any routine that will write to the policy structure and/or may take away |
101 | * the policy altogether (eg. CPU hotplug), will hold this lock in write | 100 | * the policy altogether (eg. CPU hotplug), will hold this lock in write |
102 | * mode before doing so. | 101 | * mode before doing so. |
103 | * | ||
104 | * Additional rules: | ||
105 | * - Lock should not be held across | ||
106 | * __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); | ||
107 | */ | 102 | */ |
108 | struct rw_semaphore rwsem; | 103 | struct rw_semaphore rwsem; |
109 | 104 | ||
@@ -464,29 +459,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, | |||
464 | int cpufreq_register_governor(struct cpufreq_governor *governor); | 459 | int cpufreq_register_governor(struct cpufreq_governor *governor); |
465 | void cpufreq_unregister_governor(struct cpufreq_governor *governor); | 460 | void cpufreq_unregister_governor(struct cpufreq_governor *governor); |
466 | 461 | ||
467 | /* CPUFREQ DEFAULT GOVERNOR */ | 462 | struct cpufreq_governor *cpufreq_default_governor(void); |
468 | /* | 463 | struct cpufreq_governor *cpufreq_fallback_governor(void); |
469 | * Performance governor is fallback governor if any other gov failed to auto | ||
470 | * load due latency restrictions | ||
471 | */ | ||
472 | #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE | ||
473 | extern struct cpufreq_governor cpufreq_gov_performance; | ||
474 | #endif | ||
475 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE | ||
476 | #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_performance) | ||
477 | #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE) | ||
478 | extern struct cpufreq_governor cpufreq_gov_powersave; | ||
479 | #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_powersave) | ||
480 | #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE) | ||
481 | extern struct cpufreq_governor cpufreq_gov_userspace; | ||
482 | #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_userspace) | ||
483 | #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND) | ||
484 | extern struct cpufreq_governor cpufreq_gov_ondemand; | ||
485 | #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_ondemand) | ||
486 | #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) | ||
487 | extern struct cpufreq_governor cpufreq_gov_conservative; | ||
488 | #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) | ||
489 | #endif | ||
490 | 464 | ||
491 | /********************************************************************* | 465 | /********************************************************************* |
492 | * FREQUENCY TABLE HELPERS * | 466 | * FREQUENCY TABLE HELPERS * |
@@ -525,16 +499,6 @@ static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, | |||
525 | } | 499 | } |
526 | #endif | 500 | #endif |
527 | 501 | ||
528 | static inline bool cpufreq_next_valid(struct cpufreq_frequency_table **pos) | ||
529 | { | ||
530 | while ((*pos)->frequency != CPUFREQ_TABLE_END) | ||
531 | if ((*pos)->frequency != CPUFREQ_ENTRY_INVALID) | ||
532 | return true; | ||
533 | else | ||
534 | (*pos)++; | ||
535 | return false; | ||
536 | } | ||
537 | |||
538 | /* | 502 | /* |
539 | * cpufreq_for_each_entry - iterate over a cpufreq_frequency_table | 503 | * cpufreq_for_each_entry - iterate over a cpufreq_frequency_table |
540 | * @pos: the cpufreq_frequency_table * to use as a loop cursor. | 504 | * @pos: the cpufreq_frequency_table * to use as a loop cursor. |
@@ -551,8 +515,11 @@ static inline bool cpufreq_next_valid(struct cpufreq_frequency_table **pos) | |||
551 | * @table: the cpufreq_frequency_table * to iterate over. | 515 | * @table: the cpufreq_frequency_table * to iterate over. |
552 | */ | 516 | */ |
553 | 517 | ||
554 | #define cpufreq_for_each_valid_entry(pos, table) \ | 518 | #define cpufreq_for_each_valid_entry(pos, table) \ |
555 | for (pos = table; cpufreq_next_valid(&pos); pos++) | 519 | for (pos = table; pos->frequency != CPUFREQ_TABLE_END; pos++) \ |
520 | if (pos->frequency == CPUFREQ_ENTRY_INVALID) \ | ||
521 | continue; \ | ||
522 | else | ||
556 | 523 | ||
557 | int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, | 524 | int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, |
558 | struct cpufreq_frequency_table *table); | 525 | struct cpufreq_frequency_table *table); |
diff --git a/include/linux/sched.h b/include/linux/sched.h index a10494a94cc3..913e755ef7b8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -3207,4 +3207,13 @@ static inline unsigned long rlimit_max(unsigned int limit) | |||
3207 | return task_rlimit_max(current, limit); | 3207 | return task_rlimit_max(current, limit); |
3208 | } | 3208 | } |
3209 | 3209 | ||
3210 | #ifdef CONFIG_CPU_FREQ | ||
3211 | struct update_util_data { | ||
3212 | void (*func)(struct update_util_data *data, | ||
3213 | u64 time, unsigned long util, unsigned long max); | ||
3214 | }; | ||
3215 | |||
3216 | void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); | ||
3217 | #endif /* CONFIG_CPU_FREQ */ | ||
3218 | |||
3210 | #endif | 3219 | #endif |
diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 284244ebfe8d..19e50300ce7d 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h | |||
@@ -38,6 +38,28 @@ DEFINE_EVENT(cpu, cpu_idle, | |||
38 | TP_ARGS(state, cpu_id) | 38 | TP_ARGS(state, cpu_id) |
39 | ); | 39 | ); |
40 | 40 | ||
41 | TRACE_EVENT(powernv_throttle, | ||
42 | |||
43 | TP_PROTO(int chip_id, const char *reason, int pmax), | ||
44 | |||
45 | TP_ARGS(chip_id, reason, pmax), | ||
46 | |||
47 | TP_STRUCT__entry( | ||
48 | __field(int, chip_id) | ||
49 | __string(reason, reason) | ||
50 | __field(int, pmax) | ||
51 | ), | ||
52 | |||
53 | TP_fast_assign( | ||
54 | __entry->chip_id = chip_id; | ||
55 | __assign_str(reason, reason); | ||
56 | __entry->pmax = pmax; | ||
57 | ), | ||
58 | |||
59 | TP_printk("Chip %d Pmax %d %s", __entry->chip_id, | ||
60 | __entry->pmax, __get_str(reason)) | ||
61 | ); | ||
62 | |||
41 | TRACE_EVENT(pstate_sample, | 63 | TRACE_EVENT(pstate_sample, |
42 | 64 | ||
43 | TP_PROTO(u32 core_busy, | 65 | TP_PROTO(u32 core_busy, |
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 67687973ce80..9507522164ac 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile | |||
@@ -19,3 +19,4 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o | |||
19 | obj-$(CONFIG_SCHEDSTATS) += stats.o | 19 | obj-$(CONFIG_SCHEDSTATS) += stats.o |
20 | obj-$(CONFIG_SCHED_DEBUG) += debug.o | 20 | obj-$(CONFIG_SCHED_DEBUG) += debug.o |
21 | obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o | 21 | obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o |
22 | obj-$(CONFIG_CPU_FREQ) += cpufreq.o | ||
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c new file mode 100644 index 000000000000..928c4ba32f68 --- /dev/null +++ b/kernel/sched/cpufreq.c | |||
@@ -0,0 +1,37 @@ | |||
1 | /* | ||
2 | * Scheduler code and data structures related to cpufreq. | ||
3 | * | ||
4 | * Copyright (C) 2016, Intel Corporation | ||
5 | * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include "sched.h" | ||
13 | |||
14 | DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); | ||
15 | |||
16 | /** | ||
17 | * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer. | ||
18 | * @cpu: The CPU to set the pointer for. | ||
19 | * @data: New pointer value. | ||
20 | * | ||
21 | * Set and publish the update_util_data pointer for the given CPU. That pointer | ||
22 | * points to a struct update_util_data object containing a callback function | ||
23 | * to call from cpufreq_update_util(). That function will be called from an RCU | ||
24 | * read-side critical section, so it must not sleep. | ||
25 | * | ||
26 | * Callers must use RCU-sched callbacks to free any memory that might be | ||
27 | * accessed via the old update_util_data pointer or invoke synchronize_sched() | ||
28 | * right after this function to avoid use-after-free. | ||
29 | */ | ||
30 | void cpufreq_set_update_util_data(int cpu, struct update_util_data *data) | ||
31 | { | ||
32 | if (WARN_ON(data && !data->func)) | ||
33 | return; | ||
34 | |||
35 | rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); | ||
36 | } | ||
37 | EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); | ||
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 57b939c81bce..2037cf432a45 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -726,6 +726,10 @@ static void update_curr_dl(struct rq *rq) | |||
726 | if (!dl_task(curr) || !on_dl_rq(dl_se)) | 726 | if (!dl_task(curr) || !on_dl_rq(dl_se)) |
727 | return; | 727 | return; |
728 | 728 | ||
729 | /* Kick cpufreq (see the comment in linux/cpufreq.h). */ | ||
730 | if (cpu_of(rq) == smp_processor_id()) | ||
731 | cpufreq_trigger_update(rq_clock(rq)); | ||
732 | |||
729 | /* | 733 | /* |
730 | * Consumed budget is computed considering the time as | 734 | * Consumed budget is computed considering the time as |
731 | * observed by schedulable tasks (excluding time spent | 735 | * observed by schedulable tasks (excluding time spent |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 56b7d4b83947..e2987a7e489d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -2824,7 +2824,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) | |||
2824 | { | 2824 | { |
2825 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 2825 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
2826 | u64 now = cfs_rq_clock_task(cfs_rq); | 2826 | u64 now = cfs_rq_clock_task(cfs_rq); |
2827 | int cpu = cpu_of(rq_of(cfs_rq)); | 2827 | struct rq *rq = rq_of(cfs_rq); |
2828 | int cpu = cpu_of(rq); | ||
2828 | 2829 | ||
2829 | /* | 2830 | /* |
2830 | * Track task load average for carrying it to new CPU after migrated, and | 2831 | * Track task load average for carrying it to new CPU after migrated, and |
@@ -2836,6 +2837,29 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) | |||
2836 | 2837 | ||
2837 | if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg) | 2838 | if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg) |
2838 | update_tg_load_avg(cfs_rq, 0); | 2839 | update_tg_load_avg(cfs_rq, 0); |
2840 | |||
2841 | if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) { | ||
2842 | unsigned long max = rq->cpu_capacity_orig; | ||
2843 | |||
2844 | /* | ||
2845 | * There are a few boundary cases this might miss but it should | ||
2846 | * get called often enough that that should (hopefully) not be | ||
2847 | * a real problem -- added to that it only calls on the local | ||
2848 | * CPU, so if we enqueue remotely we'll miss an update, but | ||
2849 | * the next tick/schedule should update. | ||
2850 | * | ||
2851 | * It will not get called when we go idle, because the idle | ||
2852 | * thread is a different class (!fair), nor will the utilization | ||
2853 | * number include things like RT tasks. | ||
2854 | * | ||
2855 | * As is, the util number is not freq-invariant (we'd have to | ||
2856 | * implement arch_scale_freq_capacity() for that). | ||
2857 | * | ||
2858 | * See cpu_util(). | ||
2859 | */ | ||
2860 | cpufreq_update_util(rq_clock(rq), | ||
2861 | min(cfs_rq->avg.util_avg, max), max); | ||
2862 | } | ||
2839 | } | 2863 | } |
2840 | 2864 | ||
2841 | static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) | 2865 | static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8ec86abe0ea1..27f5b03cbdbe 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -945,6 +945,10 @@ static void update_curr_rt(struct rq *rq) | |||
945 | if (curr->sched_class != &rt_sched_class) | 945 | if (curr->sched_class != &rt_sched_class) |
946 | return; | 946 | return; |
947 | 947 | ||
948 | /* Kick cpufreq (see the comment in linux/cpufreq.h). */ | ||
949 | if (cpu_of(rq) == smp_processor_id()) | ||
950 | cpufreq_trigger_update(rq_clock(rq)); | ||
951 | |||
948 | delta_exec = rq_clock_task(rq) - curr->se.exec_start; | 952 | delta_exec = rq_clock_task(rq) - curr->se.exec_start; |
949 | if (unlikely((s64)delta_exec <= 0)) | 953 | if (unlikely((s64)delta_exec <= 0)) |
950 | return; | 954 | return; |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 10f16374df7f..faf7e2758dd0 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1738,3 +1738,51 @@ static inline u64 irq_time_read(int cpu) | |||
1738 | } | 1738 | } |
1739 | #endif /* CONFIG_64BIT */ | 1739 | #endif /* CONFIG_64BIT */ |
1740 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | 1740 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
1741 | |||
1742 | #ifdef CONFIG_CPU_FREQ | ||
1743 | DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); | ||
1744 | |||
1745 | /** | ||
1746 | * cpufreq_update_util - Take a note about CPU utilization changes. | ||
1747 | * @time: Current time. | ||
1748 | * @util: Current utilization. | ||
1749 | * @max: Utilization ceiling. | ||
1750 | * | ||
1751 | * This function is called by the scheduler on every invocation of | ||
1752 | * update_load_avg() on the CPU whose utilization is being updated. | ||
1753 | * | ||
1754 | * It can only be called from RCU-sched read-side critical sections. | ||
1755 | */ | ||
1756 | static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) | ||
1757 | { | ||
1758 | struct update_util_data *data; | ||
1759 | |||
1760 | data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); | ||
1761 | if (data) | ||
1762 | data->func(data, time, util, max); | ||
1763 | } | ||
1764 | |||
1765 | /** | ||
1766 | * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. | ||
1767 | * @time: Current time. | ||
1768 | * | ||
1769 | * The way cpufreq is currently arranged requires it to evaluate the CPU | ||
1770 | * performance state (frequency/voltage) on a regular basis to prevent it from | ||
1771 | * being stuck in a completely inadequate performance level for too long. | ||
1772 | * That is not guaranteed to happen if the updates are only triggered from CFS, | ||
1773 | * though, because they may not be coming in if RT or deadline tasks are active | ||
1774 | * all the time (or there are RT and DL tasks only). | ||
1775 | * | ||
1776 | * As a workaround for that issue, this function is called by the RT and DL | ||
1777 | * sched classes to trigger extra cpufreq updates to prevent it from stalling, | ||
1778 | * but that really is a band-aid. Going forward it should be replaced with | ||
1779 | * solutions targeted more specifically at RT and DL tasks. | ||
1780 | */ | ||
1781 | static inline void cpufreq_trigger_update(u64 time) | ||
1782 | { | ||
1783 | cpufreq_update_util(time, ULONG_MAX, 0); | ||
1784 | } | ||
1785 | #else | ||
1786 | static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {} | ||
1787 | static inline void cpufreq_trigger_update(u64 time) {} | ||
1788 | #endif /* CONFIG_CPU_FREQ */ | ||
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index eb4220a132ec..81b87451c0ea 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c | |||
@@ -15,4 +15,5 @@ | |||
15 | 15 | ||
16 | EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume); | 16 | EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume); |
17 | EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); | 17 | EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); |
18 | EXPORT_TRACEPOINT_SYMBOL_GPL(powernv_throttle); | ||
18 | 19 | ||