diff options
30 files changed, 1037 insertions, 154 deletions
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst index 106379e2619f..9c58b35a81cb 100644 --- a/Documentation/admin-guide/pm/cpuidle.rst +++ b/Documentation/admin-guide/pm/cpuidle.rst | |||
| @@ -155,14 +155,14 @@ governor uses that information depends on what algorithm is implemented by it | |||
| 155 | and that is the primary reason for having more than one governor in the | 155 | and that is the primary reason for having more than one governor in the |
| 156 | ``CPUIdle`` subsystem. | 156 | ``CPUIdle`` subsystem. |
| 157 | 157 | ||
| 158 | There are two ``CPUIdle`` governors available, ``menu`` and ``ladder``. Which | 158 | There are three ``CPUIdle`` governors available, ``menu``, `TEO <teo-gov_>`_ |
| 159 | of them is used depends on the configuration of the kernel and in particular on | 159 | and ``ladder``. Which of them is used by default depends on the configuration |
| 160 | whether or not the scheduler tick can be `stopped by the idle | 160 | of the kernel and in particular on whether or not the scheduler tick can be |
| 161 | loop <idle-cpus-and-tick_>`_. It is possible to change the governor at run time | 161 | `stopped by the idle loop <idle-cpus-and-tick_>`_. It is possible to change the |
| 162 | if the ``cpuidle_sysfs_switch`` command line parameter has been passed to the | 162 | governor at run time if the ``cpuidle_sysfs_switch`` command line parameter has |
| 163 | kernel, but that is not safe in general, so it should not be done on production | 163 | been passed to the kernel, but that is not safe in general, so it should not be |
| 164 | systems (that may change in the future, though). The name of the ``CPUIdle`` | 164 | done on production systems (that may change in the future, though). The name of |
| 165 | governor currently used by the kernel can be read from the | 165 | the ``CPUIdle`` governor currently used by the kernel can be read from the |
| 166 | :file:`current_governor_ro` (or :file:`current_governor` if | 166 | :file:`current_governor_ro` (or :file:`current_governor` if |
| 167 | ``cpuidle_sysfs_switch`` is present in the kernel command line) file under | 167 | ``cpuidle_sysfs_switch`` is present in the kernel command line) file under |
| 168 | :file:`/sys/devices/system/cpu/cpuidle/` in ``sysfs``. | 168 | :file:`/sys/devices/system/cpu/cpuidle/` in ``sysfs``. |
| @@ -256,6 +256,8 @@ the ``menu`` governor by default and if it is not tickless, the default | |||
| 256 | ``CPUIdle`` governor on it will be ``ladder``. | 256 | ``CPUIdle`` governor on it will be ``ladder``. |
| 257 | 257 | ||
| 258 | 258 | ||
| 259 | .. _menu-gov: | ||
| 260 | |||
| 259 | The ``menu`` Governor | 261 | The ``menu`` Governor |
| 260 | ===================== | 262 | ===================== |
| 261 | 263 | ||
| @@ -333,6 +335,92 @@ that time, the governor may need to select a shallower state with a suitable | |||
| 333 | target residency. | 335 | target residency. |
| 334 | 336 | ||
| 335 | 337 | ||
| 338 | .. _teo-gov: | ||
| 339 | |||
| 340 | The Timer Events Oriented (TEO) Governor | ||
| 341 | ======================================== | ||
| 342 | |||
| 343 | The timer events oriented (TEO) governor is an alternative ``CPUIdle`` governor | ||
| 344 | for tickless systems. It follows the same basic strategy as the ``menu`` `one | ||
| 345 | <menu-gov_>`_: it always tries to find the deepest idle state suitable for the | ||
| 346 | given conditions. However, it applies a different approach to that problem. | ||
| 347 | |||
| 348 | First, it does not use sleep length correction factors, but instead it attempts | ||
| 349 | to correlate the observed idle duration values with the available idle states | ||
| 350 | and use that information to pick up the idle state that is most likely to | ||
| 351 | "match" the upcoming CPU idle interval. Second, it does not take the tasks | ||
| 352 | that were running on the given CPU in the past and are waiting on some I/O | ||
| 353 | operations to complete now at all (there is no guarantee that they will run on | ||
| 354 | the same CPU when they become runnable again) and the pattern detection code in | ||
| 355 | it avoids taking timer wakeups into account. It also only uses idle duration | ||
| 356 | values less than the current time till the closest timer (with the scheduler | ||
| 357 | tick excluded) for that purpose. | ||
| 358 | |||
| 359 | Like in the ``menu`` governor `case <menu-gov_>`_, the first step is to obtain | ||
| 360 | the *sleep length*, which is the time until the closest timer event with the | ||
| 361 | assumption that the scheduler tick will be stopped (that also is the upper bound | ||
| 362 | on the time until the next CPU wakeup). That value is then used to preselect an | ||
| 363 | idle state on the basis of three metrics maintained for each idle state provided | ||
| 364 | by the ``CPUIdle`` driver: ``hits``, ``misses`` and ``early_hits``. | ||
| 365 | |||
| 366 | The ``hits`` and ``misses`` metrics measure the likelihood that a given idle | ||
| 367 | state will "match" the observed (post-wakeup) idle duration if it "matches" the | ||
| 368 | sleep length. They both are subject to decay (after a CPU wakeup) every time | ||
| 369 | the target residency of the idle state corresponding to them is less than or | ||
| 370 | equal to the sleep length and the target residency of the next idle state is | ||
| 371 | greater than the sleep length (that is, when the idle state corresponding to | ||
| 372 | them "matches" the sleep length). The ``hits`` metric is increased if the | ||
| 373 | former condition is satisfied and the target residency of the given idle state | ||
| 374 | is less than or equal to the observed idle duration and the target residency of | ||
| 375 | the next idle state is greater than the observed idle duration at the same time | ||
| 376 | (that is, it is increased when the given idle state "matches" both the sleep | ||
| 377 | length and the observed idle duration). In turn, the ``misses`` metric is | ||
| 378 | increased when the given idle state "matches" the sleep length only and the | ||
| 379 | observed idle duration is too short for its target residency. | ||
| 380 | |||
| 381 | The ``early_hits`` metric measures the likelihood that a given idle state will | ||
| 382 | "match" the observed (post-wakeup) idle duration if it does not "match" the | ||
| 383 | sleep length. It is subject to decay on every CPU wakeup and it is increased | ||
| 384 | when the idle state corresponding to it "matches" the observed (post-wakeup) | ||
| 385 | idle duration and the target residency of the next idle state is less than or | ||
| 386 | equal to the sleep length (i.e. the idle state "matching" the sleep length is | ||
| 387 | deeper than the given one). | ||
| 388 | |||
| 389 | The governor walks the list of idle states provided by the ``CPUIdle`` driver | ||
| 390 | and finds the last (deepest) one with the target residency less than or equal | ||
| 391 | to the sleep length. Then, the ``hits`` and ``misses`` metrics of that idle | ||
| 392 | state are compared with each other and it is preselected if the ``hits`` one is | ||
| 393 | greater (which means that that idle state is likely to "match" the observed idle | ||
| 394 | duration after CPU wakeup). If the ``misses`` one is greater, the governor | ||
| 395 | preselects the shallower idle state with the maximum ``early_hits`` metric | ||
| 396 | (or if there are multiple shallower idle states with equal ``early_hits`` | ||
| 397 | metric which also is the maximum, the shallowest of them will be preselected). | ||
| 398 | [If there is a wakeup latency constraint coming from the `PM QoS framework | ||
| 399 | <cpu-pm-qos_>`_ which is hit before reaching the deepest idle state with the | ||
| 400 | target residency within the sleep length, the deepest idle state with the exit | ||
| 401 | latency within the constraint is preselected without consulting the ``hits``, | ||
| 402 | ``misses`` and ``early_hits`` metrics.] | ||
| 403 | |||
| 404 | Next, the governor takes several idle duration values observed most recently | ||
| 405 | into consideration and if at least a half of them are greater than or equal to | ||
| 406 | the target residency of the preselected idle state, that idle state becomes the | ||
| 407 | final candidate to ask for. Otherwise, the average of the most recent idle | ||
| 408 | duration values below the target residency of the preselected idle state is | ||
| 409 | computed and the governor walks the idle states shallower than the preselected | ||
| 410 | one and finds the deepest of them with the target residency within that average. | ||
| 411 | That idle state is then taken as the final candidate to ask for. | ||
| 412 | |||
| 413 | Still, at this point the governor may need to refine the idle state selection if | ||
| 414 | it has not decided to `stop the scheduler tick <idle-cpus-and-tick_>`_. That | ||
| 415 | generally happens if the target residency of the idle state selected so far is | ||
| 416 | less than the tick period and the tick has not been stopped already (in a | ||
| 417 | previous iteration of the idle loop). Then, like in the ``menu`` governor | ||
| 418 | `case <menu-gov_>`_, the sleep length used in the previous computations may not | ||
| 419 | reflect the real time until the closest timer event and if it really is greater | ||
| 420 | than that time, a shallower state with a suitable target residency may need to | ||
| 421 | be selected. | ||
| 422 | |||
| 423 | |||
| 336 | .. _idle-states-representation: | 424 | .. _idle-states-representation: |
| 337 | 425 | ||
| 338 | Representation of Idle States | 426 | Representation of Idle States |
diff --git a/Documentation/cpuidle/driver.txt b/Documentation/cpuidle/driver.txt deleted file mode 100644 index 1b0d81d92583..000000000000 --- a/Documentation/cpuidle/driver.txt +++ /dev/null | |||
| @@ -1,37 +0,0 @@ | |||
| 1 | |||
| 2 | |||
| 3 | Supporting multiple CPU idle levels in kernel | ||
| 4 | |||
| 5 | cpuidle drivers | ||
| 6 | |||
| 7 | |||
| 8 | |||
| 9 | |||
| 10 | cpuidle driver hooks into the cpuidle infrastructure and handles the | ||
| 11 | architecture/platform dependent part of CPU idle states. Driver | ||
| 12 | provides the platform idle state detection capability and also | ||
| 13 | has mechanisms in place to support actual entry-exit into CPU idle states. | ||
| 14 | |||
| 15 | cpuidle driver initializes the cpuidle_device structure for each CPU device | ||
| 16 | and registers with cpuidle using cpuidle_register_device. | ||
| 17 | |||
| 18 | If all the idle states are the same, the wrapper function cpuidle_register | ||
| 19 | could be used instead. | ||
| 20 | |||
| 21 | It can also support the dynamic changes (like battery <-> AC), by using | ||
| 22 | cpuidle_pause_and_lock, cpuidle_disable_device and cpuidle_enable_device, | ||
| 23 | cpuidle_resume_and_unlock. | ||
| 24 | |||
| 25 | Interfaces: | ||
| 26 | extern int cpuidle_register(struct cpuidle_driver *drv, | ||
| 27 | const struct cpumask *const coupled_cpus); | ||
| 28 | extern int cpuidle_unregister(struct cpuidle_driver *drv); | ||
| 29 | extern int cpuidle_register_driver(struct cpuidle_driver *drv); | ||
| 30 | extern void cpuidle_unregister_driver(struct cpuidle_driver *drv); | ||
| 31 | extern int cpuidle_register_device(struct cpuidle_device *dev); | ||
| 32 | extern void cpuidle_unregister_device(struct cpuidle_device *dev); | ||
| 33 | |||
| 34 | extern void cpuidle_pause_and_lock(void); | ||
| 35 | extern void cpuidle_resume_and_unlock(void); | ||
| 36 | extern int cpuidle_enable_device(struct cpuidle_device *dev); | ||
| 37 | extern void cpuidle_disable_device(struct cpuidle_device *dev); | ||
diff --git a/Documentation/cpuidle/governor.txt b/Documentation/cpuidle/governor.txt deleted file mode 100644 index d9020f5e847b..000000000000 --- a/Documentation/cpuidle/governor.txt +++ /dev/null | |||
| @@ -1,28 +0,0 @@ | |||
| 1 | |||
| 2 | |||
| 3 | |||
| 4 | Supporting multiple CPU idle levels in kernel | ||
| 5 | |||
| 6 | cpuidle governors | ||
| 7 | |||
| 8 | |||
| 9 | |||
| 10 | |||
| 11 | cpuidle governor is policy routine that decides what idle state to enter at | ||
| 12 | any given time. cpuidle core uses different callbacks to the governor. | ||
| 13 | |||
| 14 | * enable() to enable governor for a particular device | ||
| 15 | * disable() to disable governor for a particular device | ||
| 16 | * select() to select an idle state to enter | ||
| 17 | * reflect() called after returning from the idle state, which can be used | ||
| 18 | by the governor for some record keeping. | ||
| 19 | |||
| 20 | More than one governor can be registered at the same time and | ||
| 21 | users can switch between drivers using /sysfs interface (when enabled). | ||
| 22 | More than one governor part is supported for developers to easily experiment | ||
| 23 | with different governors. By default, most optimal governor based on your | ||
| 24 | kernel configuration and platform will be selected by cpuidle. | ||
| 25 | |||
| 26 | Interfaces: | ||
| 27 | extern int cpuidle_register_governor(struct cpuidle_governor *gov); | ||
| 28 | struct cpuidle_governor | ||
diff --git a/Documentation/driver-api/pm/cpuidle.rst b/Documentation/driver-api/pm/cpuidle.rst new file mode 100644 index 000000000000..5842ab621a58 --- /dev/null +++ b/Documentation/driver-api/pm/cpuidle.rst | |||
| @@ -0,0 +1,282 @@ | |||
| 1 | .. |struct cpuidle_governor| replace:: :c:type:`struct cpuidle_governor <cpuidle_governor>` | ||
| 2 | .. |struct cpuidle_device| replace:: :c:type:`struct cpuidle_device <cpuidle_device>` | ||
| 3 | .. |struct cpuidle_driver| replace:: :c:type:`struct cpuidle_driver <cpuidle_driver>` | ||
| 4 | .. |struct cpuidle_state| replace:: :c:type:`struct cpuidle_state <cpuidle_state>` | ||
| 5 | |||
| 6 | ======================== | ||
| 7 | CPU Idle Time Management | ||
| 8 | ======================== | ||
| 9 | |||
| 10 | :: | ||
| 11 | |||
| 12 | Copyright (c) 2019 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com> | ||
| 13 | |||
| 14 | |||
| 15 | CPU Idle Time Management Subsystem | ||
| 16 | ================================== | ||
| 17 | |||
| 18 | Every time one of the logical CPUs in the system (the entities that appear to | ||
| 19 | fetch and execute instructions: hardware threads, if present, or processor | ||
| 20 | cores) is idle after an interrupt or equivalent wakeup event, which means that | ||
| 21 | there are no tasks to run on it except for the special "idle" task associated | ||
| 22 | with it, there is an opportunity to save energy for the processor that it | ||
| 23 | belongs to. That can be done by making the idle logical CPU stop fetching | ||
| 24 | instructions from memory and putting some of the processor's functional units | ||
| 25 | depended on by it into an idle state in which they will draw less power. | ||
| 26 | |||
| 27 | However, there may be multiple different idle states that can be used in such a | ||
| 28 | situation in principle, so it may be necessary to find the most suitable one | ||
| 29 | (from the kernel perspective) and ask the processor to use (or "enter") that | ||
| 30 | particular idle state. That is the role of the CPU idle time management | ||
| 31 | subsystem in the kernel, called ``CPUIdle``. | ||
| 32 | |||
| 33 | The design of ``CPUIdle`` is modular and based on the code duplication avoidance | ||
| 34 | principle, so the generic code that in principle need not depend on the hardware | ||
| 35 | or platform design details in it is separate from the code that interacts with | ||
| 36 | the hardware. It generally is divided into three categories of functional | ||
| 37 | units: *governors* responsible for selecting idle states to ask the processor | ||
| 38 | to enter, *drivers* that pass the governors' decisions on to the hardware and | ||
| 39 | the *core* providing a common framework for them. | ||
| 40 | |||
| 41 | |||
| 42 | CPU Idle Time Governors | ||
| 43 | ======================= | ||
| 44 | |||
| 45 | A CPU idle time (``CPUIdle``) governor is a bundle of policy code invoked when | ||
| 46 | one of the logical CPUs in the system turns out to be idle. Its role is to | ||
| 47 | select an idle state to ask the processor to enter in order to save some energy. | ||
| 48 | |||
| 49 | ``CPUIdle`` governors are generic and each of them can be used on any hardware | ||
| 50 | platform that the Linux kernel can run on. For this reason, data structures | ||
| 51 | operated on by them cannot depend on any hardware architecture or platform | ||
| 52 | design details as well. | ||
| 53 | |||
| 54 | The governor itself is represented by a |struct cpuidle_governor| object | ||
| 55 | containing four callback pointers, :c:member:`enable`, :c:member:`disable`, | ||
| 56 | :c:member:`select`, :c:member:`reflect`, a :c:member:`rating` field described | ||
| 57 | below, and a name (string) used for identifying it. | ||
| 58 | |||
| 59 | For the governor to be available at all, that object needs to be registered | ||
| 60 | with the ``CPUIdle`` core by calling :c:func:`cpuidle_register_governor()` with | ||
| 61 | a pointer to it passed as the argument. If successful, that causes the core to | ||
| 62 | add the governor to the global list of available governors and, if it is the | ||
| 63 | only one in the list (that is, the list was empty before) or the value of its | ||
| 64 | :c:member:`rating` field is greater than the value of that field for the | ||
| 65 | governor currently in use, or the name of the new governor was passed to the | ||
| 66 | kernel as the value of the ``cpuidle.governor=`` command line parameter, the new | ||
| 67 | governor will be used from that point on (there can be only one ``CPUIdle`` | ||
| 68 | governor in use at a time). Also, if ``cpuidle_sysfs_switch`` is passed to the | ||
| 69 | kernel in the command line, user space can choose the ``CPUIdle`` governor to | ||
| 70 | use at run time via ``sysfs``. | ||
| 71 | |||
| 72 | Once registered, ``CPUIdle`` governors cannot be unregistered, so it is not | ||
| 73 | practical to put them into loadable kernel modules. | ||
| 74 | |||
| 75 | The interface between ``CPUIdle`` governors and the core consists of four | ||
| 76 | callbacks: | ||
| 77 | |||
| 78 | :c:member:`enable` | ||
| 79 | :: | ||
| 80 | |||
| 81 | int (*enable) (struct cpuidle_driver *drv, struct cpuidle_device *dev); | ||
| 82 | |||
| 83 | The role of this callback is to prepare the governor for handling the | ||
| 84 | (logical) CPU represented by the |struct cpuidle_device| object pointed | ||
| 85 | to by the ``dev`` argument. The |struct cpuidle_driver| object pointed | ||
| 86 | to by the ``drv`` argument represents the ``CPUIdle`` driver to be used | ||
| 87 | with that CPU (among other things, it should contain the list of | ||
| 88 | |struct cpuidle_state| objects representing idle states that the | ||
| 89 | processor holding the given CPU can be asked to enter). | ||
| 90 | |||
| 91 | It may fail, in which case it is expected to return a negative error | ||
| 92 | code, and that causes the kernel to run the architecture-specific | ||
| 93 | default code for idle CPUs on the CPU in question instead of ``CPUIdle`` | ||
| 94 | until the ``->enable()`` governor callback is invoked for that CPU | ||
| 95 | again. | ||
| 96 | |||
| 97 | :c:member:`disable` | ||
| 98 | :: | ||
| 99 | |||
| 100 | void (*disable) (struct cpuidle_driver *drv, struct cpuidle_device *dev); | ||
| 101 | |||
| 102 | Called to make the governor stop handling the (logical) CPU represented | ||
| 103 | by the |struct cpuidle_device| object pointed to by the ``dev`` | ||
| 104 | argument. | ||
| 105 | |||
| 106 | It is expected to reverse any changes made by the ``->enable()`` | ||
| 107 | callback when it was last invoked for the target CPU, free all memory | ||
| 108 | allocated by that callback and so on. | ||
| 109 | |||
| 110 | :c:member:`select` | ||
| 111 | :: | ||
| 112 | |||
| 113 | int (*select) (struct cpuidle_driver *drv, struct cpuidle_device *dev, | ||
| 114 | bool *stop_tick); | ||
| 115 | |||
| 116 | Called to select an idle state for the processor holding the (logical) | ||
| 117 | CPU represented by the |struct cpuidle_device| object pointed to by the | ||
| 118 | ``dev`` argument. | ||
| 119 | |||
| 120 | The list of idle states to take into consideration is represented by the | ||
| 121 | :c:member:`states` array of |struct cpuidle_state| objects held by the | ||
| 122 | |struct cpuidle_driver| object pointed to by the ``drv`` argument (which | ||
| 123 | represents the ``CPUIdle`` driver to be used with the CPU at hand). The | ||
| 124 | value returned by this callback is interpreted as an index into that | ||
| 125 | array (unless it is a negative error code). | ||
| 126 | |||
| 127 | The ``stop_tick`` argument is used to indicate whether or not to stop | ||
| 128 | the scheduler tick before asking the processor to enter the selected | ||
| 129 | idle state. When the ``bool`` variable pointed to by it (which is set | ||
| 130 | to ``true`` before invoking this callback) is cleared to ``false``, the | ||
| 131 | processor will be asked to enter the selected idle state without | ||
| 132 | stopping the scheduler tick on the given CPU (if the tick has been | ||
| 133 | stopped on that CPU already, however, it will not be restarted before | ||
| 134 | asking the processor to enter the idle state). | ||
| 135 | |||
| 136 | This callback is mandatory (i.e. the :c:member:`select` callback pointer | ||
| 137 | in |struct cpuidle_governor| must not be ``NULL`` for the registration | ||
| 138 | of the governor to succeed). | ||
| 139 | |||
| 140 | :c:member:`reflect` | ||
| 141 | :: | ||
| 142 | |||
| 143 | void (*reflect) (struct cpuidle_device *dev, int index); | ||
| 144 | |||
| 145 | Called to allow the governor to evaluate the accuracy of the idle state | ||
| 146 | selection made by the ``->select()`` callback (when it was invoked last | ||
| 147 | time) and possibly use the result of that to improve the accuracy of | ||
| 148 | idle state selections in the future. | ||
| 149 | |||
| 150 | In addition, ``CPUIdle`` governors are required to take power management | ||
| 151 | quality of service (PM QoS) constraints on the processor wakeup latency into | ||
| 152 | account when selecting idle states. In order to obtain the current effective | ||
| 153 | PM QoS wakeup latency constraint for a given CPU, a ``CPUIdle`` governor is | ||
| 154 | expected to pass the number of the CPU to | ||
| 155 | :c:func:`cpuidle_governor_latency_req()`. Then, the governor's ``->select()`` | ||
| 156 | callback must not return the index of an indle state whose | ||
| 157 | :c:member:`exit_latency` value is greater than the number returned by that | ||
| 158 | function. | ||
| 159 | |||
| 160 | |||
| 161 | CPU Idle Time Management Drivers | ||
| 162 | ================================ | ||
| 163 | |||
| 164 | CPU idle time management (``CPUIdle``) drivers provide an interface between the | ||
| 165 | other parts of ``CPUIdle`` and the hardware. | ||
| 166 | |||
| 167 | First of all, a ``CPUIdle`` driver has to populate the :c:member:`states` array | ||
| 168 | of |struct cpuidle_state| objects included in the |struct cpuidle_driver| object | ||
| 169 | representing it. Going forward this array will represent the list of available | ||
| 170 | idle states that the processor hardware can be asked to enter shared by all of | ||
| 171 | the logical CPUs handled by the given driver. | ||
| 172 | |||
| 173 | The entries in the :c:member:`states` array are expected to be sorted by the | ||
| 174 | value of the :c:member:`target_residency` field in |struct cpuidle_state| in | ||
| 175 | the ascending order (that is, index 0 should correspond to the idle state with | ||
| 176 | the minimum value of :c:member:`target_residency`). [Since the | ||
| 177 | :c:member:`target_residency` value is expected to reflect the "depth" of the | ||
| 178 | idle state represented by the |struct cpuidle_state| object holding it, this | ||
| 179 | sorting order should be the same as the ascending sorting order by the idle | ||
| 180 | state "depth".] | ||
| 181 | |||
| 182 | Three fields in |struct cpuidle_state| are used by the existing ``CPUIdle`` | ||
| 183 | governors for computations related to idle state selection: | ||
| 184 | |||
| 185 | :c:member:`target_residency` | ||
| 186 | Minimum time to spend in this idle state including the time needed to | ||
| 187 | enter it (which may be substantial) to save more energy than could | ||
| 188 | be saved by staying in a shallower idle state for the same amount of | ||
| 189 | time, in microseconds. | ||
| 190 | |||
| 191 | :c:member:`exit_latency` | ||
| 192 | Maximum time it will take a CPU asking the processor to enter this idle | ||
| 193 | state to start executing the first instruction after a wakeup from it, | ||
| 194 | in microseconds. | ||
| 195 | |||
| 196 | :c:member:`flags` | ||
| 197 | Flags representing idle state properties. Currently, governors only use | ||
| 198 | the ``CPUIDLE_FLAG_POLLING`` flag which is set if the given object | ||
| 199 | does not represent a real idle state, but an interface to a software | ||
| 200 | "loop" that can be used in order to avoid asking the processor to enter | ||
| 201 | any idle state at all. [There are other flags used by the ``CPUIdle`` | ||
| 202 | core in special situations.] | ||
| 203 | |||
| 204 | The :c:member:`enter` callback pointer in |struct cpuidle_state|, which must not | ||
| 205 | be ``NULL``, points to the routine to execute in order to ask the processor to | ||
| 206 | enter this particular idle state: | ||
| 207 | |||
| 208 | :: | ||
| 209 | |||
| 210 | void (*enter) (struct cpuidle_device *dev, struct cpuidle_driver *drv, | ||
| 211 | int index); | ||
| 212 | |||
| 213 | The first two arguments of it point to the |struct cpuidle_device| object | ||
| 214 | representing the logical CPU running this callback and the | ||
| 215 | |struct cpuidle_driver| object representing the driver itself, respectively, | ||
| 216 | and the last one is an index of the |struct cpuidle_state| entry in the driver's | ||
| 217 | :c:member:`states` array representing the idle state to ask the processor to | ||
| 218 | enter. | ||
| 219 | |||
| 220 | The analogous ``->enter_s2idle()`` callback in |struct cpuidle_state| is used | ||
| 221 | only for implementing the suspend-to-idle system-wide power management feature. | ||
| 222 | The difference between in and ``->enter()`` is that it must not re-enable | ||
| 223 | interrupts at any point (even temporarily) or attempt to change the states of | ||
| 224 | clock event devices, which the ``->enter()`` callback may do sometimes. | ||
| 225 | |||
| 226 | Once the :c:member:`states` array has been populated, the number of valid | ||
| 227 | entries in it has to be stored in the :c:member:`state_count` field of the | ||
| 228 | |struct cpuidle_driver| object representing the driver. Moreover, if any | ||
| 229 | entries in the :c:member:`states` array represent "coupled" idle states (that | ||
| 230 | is, idle states that can only be asked for if multiple related logical CPUs are | ||
| 231 | idle), the :c:member:`safe_state_index` field in |struct cpuidle_driver| needs | ||
| 232 | to be the index of an idle state that is not "coupled" (that is, one that can be | ||
| 233 | asked for if only one logical CPU is idle). | ||
| 234 | |||
| 235 | In addition to that, if the given ``CPUIdle`` driver is only going to handle a | ||
| 236 | subset of logical CPUs in the system, the :c:member:`cpumask` field in its | ||
| 237 | |struct cpuidle_driver| object must point to the set (mask) of CPUs that will be | ||
| 238 | handled by it. | ||
| 239 | |||
| 240 | A ``CPUIdle`` driver can only be used after it has been registered. If there | ||
| 241 | are no "coupled" idle state entries in the driver's :c:member:`states` array, | ||
| 242 | that can be accomplished by passing the driver's |struct cpuidle_driver| object | ||
| 243 | to :c:func:`cpuidle_register_driver()`. Otherwise, :c:func:`cpuidle_register()` | ||
| 244 | should be used for this purpose. | ||
| 245 | |||
| 246 | However, it also is necessary to register |struct cpuidle_device| objects for | ||
| 247 | all of the logical CPUs to be handled by the given ``CPUIdle`` driver with the | ||
| 248 | help of :c:func:`cpuidle_register_device()` after the driver has been registered | ||
| 249 | and :c:func:`cpuidle_register_driver()`, unlike :c:func:`cpuidle_register()`, | ||
| 250 | does not do that automatically. For this reason, the drivers that use | ||
| 251 | :c:func:`cpuidle_register_driver()` to register themselves must also take care | ||
| 252 | of registering the |struct cpuidle_device| objects as needed, so it is generally | ||
| 253 | recommended to use :c:func:`cpuidle_register()` for ``CPUIdle`` driver | ||
| 254 | registration in all cases. | ||
| 255 | |||
| 256 | The registration of a |struct cpuidle_device| object causes the ``CPUIdle`` | ||
| 257 | ``sysfs`` interface to be created and the governor's ``->enable()`` callback to | ||
| 258 | be invoked for the logical CPU represented by it, so it must take place after | ||
| 259 | registering the driver that will handle the CPU in question. | ||
| 260 | |||
| 261 | ``CPUIdle`` drivers and |struct cpuidle_device| objects can be unregistered | ||
| 262 | when they are not necessary any more which allows some resources associated with | ||
| 263 | them to be released. Due to dependencies between them, all of the | ||
| 264 | |struct cpuidle_device| objects representing CPUs handled by the given | ||
| 265 | ``CPUIdle`` driver must be unregistered, with the help of | ||
| 266 | :c:func:`cpuidle_unregister_device()`, before calling | ||
| 267 | :c:func:`cpuidle_unregister_driver()` to unregister the driver. Alternatively, | ||
| 268 | :c:func:`cpuidle_unregister()` can be called to unregister a ``CPUIdle`` driver | ||
| 269 | along with all of the |struct cpuidle_device| objects representing CPUs handled | ||
| 270 | by it. | ||
| 271 | |||
| 272 | ``CPUIdle`` drivers can respond to runtime system configuration changes that | ||
| 273 | lead to modifications of the list of available processor idle states (which can | ||
| 274 | happen, for example, when the system's power source is switched from AC to | ||
| 275 | battery or the other way around). Upon a notification of such a change, | ||
| 276 | a ``CPUIdle`` driver is expected to call :c:func:`cpuidle_pause_and_lock()` to | ||
| 277 | turn ``CPUIdle`` off temporarily and then :c:func:`cpuidle_disable_device()` for | ||
| 278 | all of the |struct cpuidle_device| objects representing CPUs affected by that | ||
| 279 | change. Next, it can update its :c:member:`states` array in accordance with | ||
| 280 | the new configuration of the system, call :c:func:`cpuidle_enable_device()` for | ||
| 281 | all of the relevant |struct cpuidle_device| objects and invoke | ||
| 282 | :c:func:`cpuidle_resume_and_unlock()` to allow ``CPUIdle`` to be used again. | ||
diff --git a/Documentation/driver-api/pm/index.rst b/Documentation/driver-api/pm/index.rst index 2f6d0e9cf6b7..56975c6bc789 100644 --- a/Documentation/driver-api/pm/index.rst +++ b/Documentation/driver-api/pm/index.rst | |||
| @@ -1,9 +1,10 @@ | |||
| 1 | ======================= | 1 | =============================== |
| 2 | Device Power Management | 2 | CPU and Device Power Management |
| 3 | ======================= | 3 | =============================== |
| 4 | 4 | ||
| 5 | .. toctree:: | 5 | .. toctree:: |
| 6 | 6 | ||
| 7 | cpuidle | ||
| 7 | devices | 8 | devices |
| 8 | notifiers | 9 | notifiers |
| 9 | types | 10 | types |
diff --git a/MAINTAINERS b/MAINTAINERS index 3f2c6d697cab..d8db9ad441c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -4022,6 +4022,7 @@ S: Maintained | |||
| 4022 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git | 4022 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git |
| 4023 | B: https://bugzilla.kernel.org | 4023 | B: https://bugzilla.kernel.org |
| 4024 | F: Documentation/admin-guide/pm/cpuidle.rst | 4024 | F: Documentation/admin-guide/pm/cpuidle.rst |
| 4025 | F: Documentation/driver-api/pm/cpuidle.rst | ||
| 4025 | F: drivers/cpuidle/* | 4026 | F: drivers/cpuidle/* |
| 4026 | F: include/linux/cpuidle.h | 4027 | F: include/linux/cpuidle.h |
| 4027 | 4028 | ||
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index b2131c4ea124..98d4ec5bf450 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c | |||
| @@ -282,6 +282,13 @@ static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr) | |||
| 282 | pr->power.states[ACPI_STATE_C2].address, | 282 | pr->power.states[ACPI_STATE_C2].address, |
| 283 | pr->power.states[ACPI_STATE_C3].address)); | 283 | pr->power.states[ACPI_STATE_C3].address)); |
| 284 | 284 | ||
| 285 | snprintf(pr->power.states[ACPI_STATE_C2].desc, | ||
| 286 | ACPI_CX_DESC_LEN, "ACPI P_LVL2 IOPORT 0x%x", | ||
| 287 | pr->power.states[ACPI_STATE_C2].address); | ||
| 288 | snprintf(pr->power.states[ACPI_STATE_C3].desc, | ||
| 289 | ACPI_CX_DESC_LEN, "ACPI P_LVL3 IOPORT 0x%x", | ||
| 290 | pr->power.states[ACPI_STATE_C3].address); | ||
| 291 | |||
| 285 | return 0; | 292 | return 0; |
| 286 | } | 293 | } |
| 287 | 294 | ||
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index eb9443d5bae1..6ce93a52bf3f 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c | |||
| @@ -427,6 +427,7 @@ __cpu_device_create(struct device *parent, void *drvdata, | |||
| 427 | dev->parent = parent; | 427 | dev->parent = parent; |
| 428 | dev->groups = groups; | 428 | dev->groups = groups; |
| 429 | dev->release = device_create_release; | 429 | dev->release = device_create_release; |
| 430 | device_set_pm_not_required(dev); | ||
| 430 | dev_set_drvdata(dev, drvdata); | 431 | dev_set_drvdata(dev, drvdata); |
| 431 | 432 | ||
| 432 | retval = kobject_set_name_vargs(&dev->kobj, fmt, args); | 433 | retval = kobject_set_name_vargs(&dev->kobj, fmt, args); |
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 5a42ae4078c2..365ad751ce0f 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c | |||
| @@ -65,10 +65,15 @@ static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce) | |||
| 65 | if (IS_ERR(ce->clk)) { | 65 | if (IS_ERR(ce->clk)) { |
| 66 | ce->status = PCE_STATUS_ERROR; | 66 | ce->status = PCE_STATUS_ERROR; |
| 67 | } else { | 67 | } else { |
| 68 | clk_prepare(ce->clk); | 68 | if (clk_prepare(ce->clk)) { |
| 69 | ce->status = PCE_STATUS_ACQUIRED; | 69 | ce->status = PCE_STATUS_ERROR; |
| 70 | dev_dbg(dev, "Clock %pC con_id %s managed by runtime PM.\n", | 70 | dev_err(dev, "clk_prepare() failed\n"); |
| 71 | ce->clk, ce->con_id); | 71 | } else { |
| 72 | ce->status = PCE_STATUS_ACQUIRED; | ||
| 73 | dev_dbg(dev, | ||
| 74 | "Clock %pC con_id %s managed by runtime PM.\n", | ||
| 75 | ce->clk, ce->con_id); | ||
| 76 | } | ||
| 72 | } | 77 | } |
| 73 | } | 78 | } |
| 74 | 79 | ||
diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index b413951c6abc..22aedb28aad7 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c | |||
| @@ -160,7 +160,7 @@ EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_id); | |||
| 160 | * For a detailed function description, see dev_pm_domain_attach_by_id(). | 160 | * For a detailed function description, see dev_pm_domain_attach_by_id(). |
| 161 | */ | 161 | */ |
| 162 | struct device *dev_pm_domain_attach_by_name(struct device *dev, | 162 | struct device *dev_pm_domain_attach_by_name(struct device *dev, |
| 163 | char *name) | 163 | const char *name) |
| 164 | { | 164 | { |
| 165 | if (dev->pm_domain) | 165 | if (dev->pm_domain) |
| 166 | return ERR_PTR(-EEXIST); | 166 | return ERR_PTR(-EEXIST); |
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 500de1dee967..2c334c01fc43 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c | |||
| @@ -2483,7 +2483,7 @@ EXPORT_SYMBOL_GPL(genpd_dev_pm_attach_by_id); | |||
| 2483 | * power-domain-names DT property. For further description see | 2483 | * power-domain-names DT property. For further description see |
| 2484 | * genpd_dev_pm_attach_by_id(). | 2484 | * genpd_dev_pm_attach_by_id(). |
| 2485 | */ | 2485 | */ |
| 2486 | struct device *genpd_dev_pm_attach_by_name(struct device *dev, char *name) | 2486 | struct device *genpd_dev_pm_attach_by_name(struct device *dev, const char *name) |
| 2487 | { | 2487 | { |
| 2488 | int index; | 2488 | int index; |
| 2489 | 2489 | ||
| @@ -2948,18 +2948,11 @@ static int __init genpd_debug_init(void) | |||
| 2948 | 2948 | ||
| 2949 | genpd_debugfs_dir = debugfs_create_dir("pm_genpd", NULL); | 2949 | genpd_debugfs_dir = debugfs_create_dir("pm_genpd", NULL); |
| 2950 | 2950 | ||
| 2951 | if (!genpd_debugfs_dir) | 2951 | debugfs_create_file("pm_genpd_summary", S_IRUGO, genpd_debugfs_dir, |
| 2952 | return -ENOMEM; | 2952 | NULL, &summary_fops); |
| 2953 | |||
| 2954 | d = debugfs_create_file("pm_genpd_summary", S_IRUGO, | ||
| 2955 | genpd_debugfs_dir, NULL, &summary_fops); | ||
| 2956 | if (!d) | ||
| 2957 | return -ENOMEM; | ||
| 2958 | 2953 | ||
| 2959 | list_for_each_entry(genpd, &gpd_list, gpd_list_node) { | 2954 | list_for_each_entry(genpd, &gpd_list, gpd_list_node) { |
| 2960 | d = debugfs_create_dir(genpd->name, genpd_debugfs_dir); | 2955 | d = debugfs_create_dir(genpd->name, genpd_debugfs_dir); |
| 2961 | if (!d) | ||
| 2962 | return -ENOMEM; | ||
| 2963 | 2956 | ||
| 2964 | debugfs_create_file("current_state", 0444, | 2957 | debugfs_create_file("current_state", 0444, |
| 2965 | d, genpd, &status_fops); | 2958 | d, genpd, &status_fops); |
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 0992e67e862b..893ae464bfd6 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c | |||
| @@ -124,6 +124,10 @@ void device_pm_unlock(void) | |||
| 124 | */ | 124 | */ |
| 125 | void device_pm_add(struct device *dev) | 125 | void device_pm_add(struct device *dev) |
| 126 | { | 126 | { |
| 127 | /* Skip PM setup/initialization. */ | ||
| 128 | if (device_pm_not_required(dev)) | ||
| 129 | return; | ||
| 130 | |||
| 127 | pr_debug("PM: Adding info for %s:%s\n", | 131 | pr_debug("PM: Adding info for %s:%s\n", |
| 128 | dev->bus ? dev->bus->name : "No Bus", dev_name(dev)); | 132 | dev->bus ? dev->bus->name : "No Bus", dev_name(dev)); |
| 129 | device_pm_check_callbacks(dev); | 133 | device_pm_check_callbacks(dev); |
| @@ -142,6 +146,9 @@ void device_pm_add(struct device *dev) | |||
| 142 | */ | 146 | */ |
| 143 | void device_pm_remove(struct device *dev) | 147 | void device_pm_remove(struct device *dev) |
| 144 | { | 148 | { |
| 149 | if (device_pm_not_required(dev)) | ||
| 150 | return; | ||
| 151 | |||
| 145 | pr_debug("PM: Removing info for %s:%s\n", | 152 | pr_debug("PM: Removing info for %s:%s\n", |
| 146 | dev->bus ? dev->bus->name : "No Bus", dev_name(dev)); | 153 | dev->bus ? dev->bus->name : "No Bus", dev_name(dev)); |
| 147 | complete_all(&dev->power.completion); | 154 | complete_all(&dev->power.completion); |
| @@ -1741,8 +1748,10 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) | |||
| 1741 | if (dev->power.direct_complete) { | 1748 | if (dev->power.direct_complete) { |
| 1742 | if (pm_runtime_status_suspended(dev)) { | 1749 | if (pm_runtime_status_suspended(dev)) { |
| 1743 | pm_runtime_disable(dev); | 1750 | pm_runtime_disable(dev); |
| 1744 | if (pm_runtime_status_suspended(dev)) | 1751 | if (pm_runtime_status_suspended(dev)) { |
| 1752 | pm_dev_dbg(dev, state, "direct-complete "); | ||
| 1745 | goto Complete; | 1753 | goto Complete; |
| 1754 | } | ||
| 1746 | 1755 | ||
| 1747 | pm_runtime_enable(dev); | 1756 | pm_runtime_enable(dev); |
| 1748 | } | 1757 | } |
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 0ea2139c50d8..78937c45278c 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c | |||
| @@ -66,20 +66,30 @@ static int rpm_suspend(struct device *dev, int rpmflags); | |||
| 66 | */ | 66 | */ |
| 67 | void update_pm_runtime_accounting(struct device *dev) | 67 | void update_pm_runtime_accounting(struct device *dev) |
| 68 | { | 68 | { |
| 69 | unsigned long now = jiffies; | 69 | u64 now, last, delta; |
| 70 | unsigned long delta; | ||
| 71 | 70 | ||
| 72 | delta = now - dev->power.accounting_timestamp; | 71 | if (dev->power.disable_depth > 0) |
| 72 | return; | ||
| 73 | |||
| 74 | last = dev->power.accounting_timestamp; | ||
| 73 | 75 | ||
| 76 | now = ktime_get_mono_fast_ns(); | ||
| 74 | dev->power.accounting_timestamp = now; | 77 | dev->power.accounting_timestamp = now; |
| 75 | 78 | ||
| 76 | if (dev->power.disable_depth > 0) | 79 | /* |
| 80 | * Because ktime_get_mono_fast_ns() is not monotonic during | ||
| 81 | * timekeeping updates, ensure that 'now' is after the last saved | ||
| 82 | * timesptamp. | ||
| 83 | */ | ||
| 84 | if (now < last) | ||
| 77 | return; | 85 | return; |
| 78 | 86 | ||
| 87 | delta = now - last; | ||
| 88 | |||
| 79 | if (dev->power.runtime_status == RPM_SUSPENDED) | 89 | if (dev->power.runtime_status == RPM_SUSPENDED) |
| 80 | dev->power.suspended_jiffies += delta; | 90 | dev->power.suspended_time += delta; |
| 81 | else | 91 | else |
| 82 | dev->power.active_jiffies += delta; | 92 | dev->power.active_time += delta; |
| 83 | } | 93 | } |
| 84 | 94 | ||
| 85 | static void __update_runtime_status(struct device *dev, enum rpm_status status) | 95 | static void __update_runtime_status(struct device *dev, enum rpm_status status) |
| @@ -88,6 +98,22 @@ static void __update_runtime_status(struct device *dev, enum rpm_status status) | |||
| 88 | dev->power.runtime_status = status; | 98 | dev->power.runtime_status = status; |
| 89 | } | 99 | } |
| 90 | 100 | ||
| 101 | u64 pm_runtime_suspended_time(struct device *dev) | ||
| 102 | { | ||
| 103 | u64 time; | ||
| 104 | unsigned long flags; | ||
| 105 | |||
| 106 | spin_lock_irqsave(&dev->power.lock, flags); | ||
| 107 | |||
| 108 | update_pm_runtime_accounting(dev); | ||
| 109 | time = dev->power.suspended_time; | ||
| 110 | |||
| 111 | spin_unlock_irqrestore(&dev->power.lock, flags); | ||
| 112 | |||
| 113 | return time; | ||
| 114 | } | ||
| 115 | EXPORT_SYMBOL_GPL(pm_runtime_suspended_time); | ||
| 116 | |||
| 91 | /** | 117 | /** |
| 92 | * pm_runtime_deactivate_timer - Deactivate given device's suspend timer. | 118 | * pm_runtime_deactivate_timer - Deactivate given device's suspend timer. |
| 93 | * @dev: Device to handle. | 119 | * @dev: Device to handle. |
| @@ -95,7 +121,7 @@ static void __update_runtime_status(struct device *dev, enum rpm_status status) | |||
| 95 | static void pm_runtime_deactivate_timer(struct device *dev) | 121 | static void pm_runtime_deactivate_timer(struct device *dev) |
| 96 | { | 122 | { |
| 97 | if (dev->power.timer_expires > 0) { | 123 | if (dev->power.timer_expires > 0) { |
| 98 | hrtimer_cancel(&dev->power.suspend_timer); | 124 | hrtimer_try_to_cancel(&dev->power.suspend_timer); |
| 99 | dev->power.timer_expires = 0; | 125 | dev->power.timer_expires = 0; |
| 100 | } | 126 | } |
| 101 | } | 127 | } |
| @@ -129,24 +155,21 @@ static void pm_runtime_cancel_pending(struct device *dev) | |||
| 129 | u64 pm_runtime_autosuspend_expiration(struct device *dev) | 155 | u64 pm_runtime_autosuspend_expiration(struct device *dev) |
| 130 | { | 156 | { |
| 131 | int autosuspend_delay; | 157 | int autosuspend_delay; |
| 132 | u64 last_busy, expires = 0; | 158 | u64 expires; |
| 133 | u64 now = ktime_get_mono_fast_ns(); | ||
| 134 | 159 | ||
| 135 | if (!dev->power.use_autosuspend) | 160 | if (!dev->power.use_autosuspend) |
| 136 | goto out; | 161 | return 0; |
| 137 | 162 | ||
| 138 | autosuspend_delay = READ_ONCE(dev->power.autosuspend_delay); | 163 | autosuspend_delay = READ_ONCE(dev->power.autosuspend_delay); |
| 139 | if (autosuspend_delay < 0) | 164 | if (autosuspend_delay < 0) |
| 140 | goto out; | 165 | return 0; |
| 141 | |||
| 142 | last_busy = READ_ONCE(dev->power.last_busy); | ||
| 143 | 166 | ||
| 144 | expires = last_busy + (u64)autosuspend_delay * NSEC_PER_MSEC; | 167 | expires = READ_ONCE(dev->power.last_busy); |
| 145 | if (expires <= now) | 168 | expires += (u64)autosuspend_delay * NSEC_PER_MSEC; |
| 146 | expires = 0; /* Already expired. */ | 169 | if (expires > ktime_get_mono_fast_ns()) |
| 170 | return expires; /* Expires in the future */ | ||
| 147 | 171 | ||
| 148 | out: | 172 | return 0; |
| 149 | return expires; | ||
| 150 | } | 173 | } |
| 151 | EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration); | 174 | EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration); |
| 152 | 175 | ||
| @@ -1276,6 +1299,9 @@ void __pm_runtime_disable(struct device *dev, bool check_resume) | |||
| 1276 | pm_runtime_put_noidle(dev); | 1299 | pm_runtime_put_noidle(dev); |
| 1277 | } | 1300 | } |
| 1278 | 1301 | ||
| 1302 | /* Update time accounting before disabling PM-runtime. */ | ||
| 1303 | update_pm_runtime_accounting(dev); | ||
| 1304 | |||
| 1279 | if (!dev->power.disable_depth++) | 1305 | if (!dev->power.disable_depth++) |
| 1280 | __pm_runtime_barrier(dev); | 1306 | __pm_runtime_barrier(dev); |
| 1281 | 1307 | ||
| @@ -1294,10 +1320,15 @@ void pm_runtime_enable(struct device *dev) | |||
| 1294 | 1320 | ||
| 1295 | spin_lock_irqsave(&dev->power.lock, flags); | 1321 | spin_lock_irqsave(&dev->power.lock, flags); |
| 1296 | 1322 | ||
| 1297 | if (dev->power.disable_depth > 0) | 1323 | if (dev->power.disable_depth > 0) { |
| 1298 | dev->power.disable_depth--; | 1324 | dev->power.disable_depth--; |
| 1299 | else | 1325 | |
| 1326 | /* About to enable runtime pm, set accounting_timestamp to now */ | ||
| 1327 | if (!dev->power.disable_depth) | ||
| 1328 | dev->power.accounting_timestamp = ktime_get_mono_fast_ns(); | ||
| 1329 | } else { | ||
| 1300 | dev_warn(dev, "Unbalanced %s!\n", __func__); | 1330 | dev_warn(dev, "Unbalanced %s!\n", __func__); |
| 1331 | } | ||
| 1301 | 1332 | ||
| 1302 | WARN(!dev->power.disable_depth && | 1333 | WARN(!dev->power.disable_depth && |
| 1303 | dev->power.runtime_status == RPM_SUSPENDED && | 1334 | dev->power.runtime_status == RPM_SUSPENDED && |
| @@ -1494,7 +1525,6 @@ void pm_runtime_init(struct device *dev) | |||
| 1494 | dev->power.request_pending = false; | 1525 | dev->power.request_pending = false; |
| 1495 | dev->power.request = RPM_REQ_NONE; | 1526 | dev->power.request = RPM_REQ_NONE; |
| 1496 | dev->power.deferred_resume = false; | 1527 | dev->power.deferred_resume = false; |
| 1497 | dev->power.accounting_timestamp = jiffies; | ||
| 1498 | INIT_WORK(&dev->power.work, pm_runtime_work); | 1528 | INIT_WORK(&dev->power.work, pm_runtime_work); |
| 1499 | 1529 | ||
| 1500 | dev->power.timer_expires = 0; | 1530 | dev->power.timer_expires = 0; |
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index d713738ce796..c6bf76124184 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c | |||
| @@ -125,9 +125,12 @@ static ssize_t runtime_active_time_show(struct device *dev, | |||
| 125 | struct device_attribute *attr, char *buf) | 125 | struct device_attribute *attr, char *buf) |
| 126 | { | 126 | { |
| 127 | int ret; | 127 | int ret; |
| 128 | u64 tmp; | ||
| 128 | spin_lock_irq(&dev->power.lock); | 129 | spin_lock_irq(&dev->power.lock); |
| 129 | update_pm_runtime_accounting(dev); | 130 | update_pm_runtime_accounting(dev); |
| 130 | ret = sprintf(buf, "%i\n", jiffies_to_msecs(dev->power.active_jiffies)); | 131 | tmp = dev->power.active_time; |
| 132 | do_div(tmp, NSEC_PER_MSEC); | ||
| 133 | ret = sprintf(buf, "%llu\n", tmp); | ||
| 131 | spin_unlock_irq(&dev->power.lock); | 134 | spin_unlock_irq(&dev->power.lock); |
| 132 | return ret; | 135 | return ret; |
| 133 | } | 136 | } |
| @@ -138,10 +141,12 @@ static ssize_t runtime_suspended_time_show(struct device *dev, | |||
| 138 | struct device_attribute *attr, char *buf) | 141 | struct device_attribute *attr, char *buf) |
| 139 | { | 142 | { |
| 140 | int ret; | 143 | int ret; |
| 144 | u64 tmp; | ||
| 141 | spin_lock_irq(&dev->power.lock); | 145 | spin_lock_irq(&dev->power.lock); |
| 142 | update_pm_runtime_accounting(dev); | 146 | update_pm_runtime_accounting(dev); |
| 143 | ret = sprintf(buf, "%i\n", | 147 | tmp = dev->power.suspended_time; |
| 144 | jiffies_to_msecs(dev->power.suspended_jiffies)); | 148 | do_div(tmp, NSEC_PER_MSEC); |
| 149 | ret = sprintf(buf, "%llu\n", tmp); | ||
| 145 | spin_unlock_irq(&dev->power.lock); | 150 | spin_unlock_irq(&dev->power.lock); |
| 146 | return ret; | 151 | return ret; |
| 147 | } | 152 | } |
| @@ -648,6 +653,10 @@ int dpm_sysfs_add(struct device *dev) | |||
| 648 | { | 653 | { |
| 649 | int rc; | 654 | int rc; |
| 650 | 655 | ||
| 656 | /* No need to create PM sysfs if explicitly disabled. */ | ||
| 657 | if (device_pm_not_required(dev)) | ||
| 658 | return 0; | ||
| 659 | |||
| 651 | rc = sysfs_create_group(&dev->kobj, &pm_attr_group); | 660 | rc = sysfs_create_group(&dev->kobj, &pm_attr_group); |
| 652 | if (rc) | 661 | if (rc) |
| 653 | return rc; | 662 | return rc; |
| @@ -727,6 +736,8 @@ void rpm_sysfs_remove(struct device *dev) | |||
| 727 | 736 | ||
| 728 | void dpm_sysfs_remove(struct device *dev) | 737 | void dpm_sysfs_remove(struct device *dev) |
| 729 | { | 738 | { |
| 739 | if (device_pm_not_required(dev)) | ||
| 740 | return; | ||
| 730 | sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); | 741 | sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); |
| 731 | dev_pm_qos_constraints_destroy(dev); | 742 | dev_pm_qos_constraints_destroy(dev); |
| 732 | rpm_sysfs_remove(dev); | 743 | rpm_sysfs_remove(dev); |
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 5fa1898755a3..f1fee72ed970 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c | |||
| @@ -783,7 +783,7 @@ void pm_wakeup_ws_event(struct wakeup_source *ws, unsigned int msec, bool hard) | |||
| 783 | EXPORT_SYMBOL_GPL(pm_wakeup_ws_event); | 783 | EXPORT_SYMBOL_GPL(pm_wakeup_ws_event); |
| 784 | 784 | ||
| 785 | /** | 785 | /** |
| 786 | * pm_wakeup_event - Notify the PM core of a wakeup event. | 786 | * pm_wakeup_dev_event - Notify the PM core of a wakeup event. |
| 787 | * @dev: Device the wakeup event is related to. | 787 | * @dev: Device the wakeup event is related to. |
| 788 | * @msec: Anticipated event processing time (in milliseconds). | 788 | * @msec: Anticipated event processing time (in milliseconds). |
| 789 | * @hard: If set, abort suspends in progress and wake up from suspend-to-idle. | 789 | * @hard: If set, abort suspends in progress and wake up from suspend-to-idle. |
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index 7e48eb5bf0a7..8caccbbd7353 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig | |||
| @@ -4,7 +4,7 @@ config CPU_IDLE | |||
| 4 | bool "CPU idle PM support" | 4 | bool "CPU idle PM support" |
| 5 | default y if ACPI || PPC_PSERIES | 5 | default y if ACPI || PPC_PSERIES |
| 6 | select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE) | 6 | select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE) |
| 7 | select CPU_IDLE_GOV_MENU if (NO_HZ || NO_HZ_IDLE) | 7 | select CPU_IDLE_GOV_MENU if (NO_HZ || NO_HZ_IDLE) && !CPU_IDLE_GOV_TEO |
| 8 | help | 8 | help |
| 9 | CPU idle is a generic framework for supporting software-controlled | 9 | CPU idle is a generic framework for supporting software-controlled |
| 10 | idle processor power management. It includes modular cross-platform | 10 | idle processor power management. It includes modular cross-platform |
| @@ -23,6 +23,15 @@ config CPU_IDLE_GOV_LADDER | |||
| 23 | config CPU_IDLE_GOV_MENU | 23 | config CPU_IDLE_GOV_MENU |
| 24 | bool "Menu governor (for tickless system)" | 24 | bool "Menu governor (for tickless system)" |
| 25 | 25 | ||
| 26 | config CPU_IDLE_GOV_TEO | ||
| 27 | bool "Timer events oriented (TEO) governor (for tickless systems)" | ||
| 28 | help | ||
| 29 | This governor implements a simplified idle state selection method | ||
| 30 | focused on timer events and does not do any interactivity boosting. | ||
| 31 | |||
| 32 | Some workloads benefit from using it and it generally should be safe | ||
| 33 | to use. Say Y here if you are not happy with the alternatives. | ||
| 34 | |||
| 26 | config DT_IDLE_STATES | 35 | config DT_IDLE_STATES |
| 27 | bool | 36 | bool |
| 28 | 37 | ||
diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c index 53342b7f1010..add9569636b5 100644 --- a/drivers/cpuidle/dt_idle_states.c +++ b/drivers/cpuidle/dt_idle_states.c | |||
| @@ -22,16 +22,12 @@ | |||
| 22 | #include "dt_idle_states.h" | 22 | #include "dt_idle_states.h" |
| 23 | 23 | ||
| 24 | static int init_state_node(struct cpuidle_state *idle_state, | 24 | static int init_state_node(struct cpuidle_state *idle_state, |
| 25 | const struct of_device_id *matches, | 25 | const struct of_device_id *match_id, |
| 26 | struct device_node *state_node) | 26 | struct device_node *state_node) |
| 27 | { | 27 | { |
| 28 | int err; | 28 | int err; |
| 29 | const struct of_device_id *match_id; | ||
| 30 | const char *desc; | 29 | const char *desc; |
| 31 | 30 | ||
| 32 | match_id = of_match_node(matches, state_node); | ||
| 33 | if (!match_id) | ||
| 34 | return -ENODEV; | ||
| 35 | /* | 31 | /* |
| 36 | * CPUidle drivers are expected to initialize the const void *data | 32 | * CPUidle drivers are expected to initialize the const void *data |
| 37 | * pointer of the passed in struct of_device_id array to the idle | 33 | * pointer of the passed in struct of_device_id array to the idle |
| @@ -160,6 +156,7 @@ int dt_init_idle_driver(struct cpuidle_driver *drv, | |||
| 160 | { | 156 | { |
| 161 | struct cpuidle_state *idle_state; | 157 | struct cpuidle_state *idle_state; |
| 162 | struct device_node *state_node, *cpu_node; | 158 | struct device_node *state_node, *cpu_node; |
| 159 | const struct of_device_id *match_id; | ||
| 163 | int i, err = 0; | 160 | int i, err = 0; |
| 164 | const cpumask_t *cpumask; | 161 | const cpumask_t *cpumask; |
| 165 | unsigned int state_idx = start_idx; | 162 | unsigned int state_idx = start_idx; |
| @@ -180,6 +177,12 @@ int dt_init_idle_driver(struct cpuidle_driver *drv, | |||
| 180 | if (!state_node) | 177 | if (!state_node) |
| 181 | break; | 178 | break; |
| 182 | 179 | ||
| 180 | match_id = of_match_node(matches, state_node); | ||
| 181 | if (!match_id) { | ||
| 182 | err = -ENODEV; | ||
| 183 | break; | ||
| 184 | } | ||
| 185 | |||
| 183 | if (!of_device_is_available(state_node)) { | 186 | if (!of_device_is_available(state_node)) { |
| 184 | of_node_put(state_node); | 187 | of_node_put(state_node); |
| 185 | continue; | 188 | continue; |
| @@ -198,7 +201,7 @@ int dt_init_idle_driver(struct cpuidle_driver *drv, | |||
| 198 | } | 201 | } |
| 199 | 202 | ||
| 200 | idle_state = &drv->states[state_idx++]; | 203 | idle_state = &drv->states[state_idx++]; |
| 201 | err = init_state_node(idle_state, matches, state_node); | 204 | err = init_state_node(idle_state, match_id, state_node); |
| 202 | if (err) { | 205 | if (err) { |
| 203 | pr_err("Parsing idle state node %pOF failed with err %d\n", | 206 | pr_err("Parsing idle state node %pOF failed with err %d\n", |
| 204 | state_node, err); | 207 | state_node, err); |
diff --git a/drivers/cpuidle/governors/Makefile b/drivers/cpuidle/governors/Makefile index 1b512722689f..4d8aff5248a8 100644 --- a/drivers/cpuidle/governors/Makefile +++ b/drivers/cpuidle/governors/Makefile | |||
| @@ -4,3 +4,4 @@ | |||
| 4 | 4 | ||
| 5 | obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o | 5 | obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o |
| 6 | obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o | 6 | obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o |
| 7 | obj-$(CONFIG_CPU_IDLE_GOV_TEO) += teo.o | ||
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c new file mode 100644 index 000000000000..7d05efdbd3c6 --- /dev/null +++ b/drivers/cpuidle/governors/teo.c | |||
| @@ -0,0 +1,444 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | /* | ||
| 3 | * Timer events oriented CPU idle governor | ||
| 4 | * | ||
| 5 | * Copyright (C) 2018 Intel Corporation | ||
| 6 | * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> | ||
| 7 | * | ||
| 8 | * The idea of this governor is based on the observation that on many systems | ||
| 9 | * timer events are two or more orders of magnitude more frequent than any | ||
| 10 | * other interrupts, so they are likely to be the most significant source of CPU | ||
| 11 | * wakeups from idle states. Moreover, information about what happened in the | ||
| 12 | * (relatively recent) past can be used to estimate whether or not the deepest | ||
| 13 | * idle state with target residency within the time to the closest timer is | ||
| 14 | * likely to be suitable for the upcoming idle time of the CPU and, if not, then | ||
| 15 | * which of the shallower idle states to choose. | ||
| 16 | * | ||
| 17 | * Of course, non-timer wakeup sources are more important in some use cases and | ||
| 18 | * they can be covered by taking a few most recent idle time intervals of the | ||
| 19 | * CPU into account. However, even in that case it is not necessary to consider | ||
| 20 | * idle duration values greater than the time till the closest timer, as the | ||
| 21 | * patterns that they may belong to produce average values close enough to | ||
| 22 | * the time till the closest timer (sleep length) anyway. | ||
| 23 | * | ||
| 24 | * Thus this governor estimates whether or not the upcoming idle time of the CPU | ||
| 25 | * is likely to be significantly shorter than the sleep length and selects an | ||
| 26 | * idle state for it in accordance with that, as follows: | ||
| 27 | * | ||
| 28 | * - Find an idle state on the basis of the sleep length and state statistics | ||
| 29 | * collected over time: | ||
| 30 | * | ||
| 31 | * o Find the deepest idle state whose target residency is less than or equal | ||
| 32 | * to the sleep length. | ||
| 33 | * | ||
| 34 | * o Select it if it matched both the sleep length and the observed idle | ||
| 35 | * duration in the past more often than it matched the sleep length alone | ||
| 36 | * (i.e. the observed idle duration was significantly shorter than the sleep | ||
| 37 | * length matched by it). | ||
| 38 | * | ||
| 39 | * o Otherwise, select the shallower state with the greatest matched "early" | ||
| 40 | * wakeups metric. | ||
| 41 | * | ||
| 42 | * - If the majority of the most recent idle duration values are below the | ||
| 43 | * target residency of the idle state selected so far, use those values to | ||
| 44 | * compute the new expected idle duration and find an idle state matching it | ||
| 45 | * (which has to be shallower than the one selected so far). | ||
| 46 | */ | ||
| 47 | |||
| 48 | #include <linux/cpuidle.h> | ||
| 49 | #include <linux/jiffies.h> | ||
| 50 | #include <linux/kernel.h> | ||
| 51 | #include <linux/sched/clock.h> | ||
| 52 | #include <linux/tick.h> | ||
| 53 | |||
| 54 | /* | ||
| 55 | * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value | ||
| 56 | * is used for decreasing metrics on a regular basis. | ||
| 57 | */ | ||
| 58 | #define PULSE 1024 | ||
| 59 | #define DECAY_SHIFT 3 | ||
| 60 | |||
| 61 | /* | ||
| 62 | * Number of the most recent idle duration values to take into consideration for | ||
| 63 | * the detection of wakeup patterns. | ||
| 64 | */ | ||
| 65 | #define INTERVALS 8 | ||
| 66 | |||
| 67 | /** | ||
| 68 | * struct teo_idle_state - Idle state data used by the TEO cpuidle governor. | ||
| 69 | * @early_hits: "Early" CPU wakeups "matching" this state. | ||
| 70 | * @hits: "On time" CPU wakeups "matching" this state. | ||
| 71 | * @misses: CPU wakeups "missing" this state. | ||
| 72 | * | ||
| 73 | * A CPU wakeup is "matched" by a given idle state if the idle duration measured | ||
| 74 | * after the wakeup is between the target residency of that state and the target | ||
| 75 | * residency of the next one (or if this is the deepest available idle state, it | ||
| 76 | * "matches" a CPU wakeup when the measured idle duration is at least equal to | ||
| 77 | * its target residency). | ||
| 78 | * | ||
| 79 | * Also, from the TEO governor perspective, a CPU wakeup from idle is "early" if | ||
| 80 | * it occurs significantly earlier than the closest expected timer event (that | ||
| 81 | * is, early enough to match an idle state shallower than the one matching the | ||
| 82 | * time till the closest timer event). Otherwise, the wakeup is "on time", or | ||
| 83 | * it is a "hit". | ||
| 84 | * | ||
| 85 | * A "miss" occurs when the given state doesn't match the wakeup, but it matches | ||
| 86 | * the time till the closest timer event used for idle state selection. | ||
| 87 | */ | ||
| 88 | struct teo_idle_state { | ||
| 89 | unsigned int early_hits; | ||
| 90 | unsigned int hits; | ||
| 91 | unsigned int misses; | ||
| 92 | }; | ||
| 93 | |||
| 94 | /** | ||
| 95 | * struct teo_cpu - CPU data used by the TEO cpuidle governor. | ||
| 96 | * @time_span_ns: Time between idle state selection and post-wakeup update. | ||
| 97 | * @sleep_length_ns: Time till the closest timer event (at the selection time). | ||
| 98 | * @states: Idle states data corresponding to this CPU. | ||
| 99 | * @last_state: Idle state entered by the CPU last time. | ||
| 100 | * @interval_idx: Index of the most recent saved idle interval. | ||
| 101 | * @intervals: Saved idle duration values. | ||
| 102 | */ | ||
| 103 | struct teo_cpu { | ||
| 104 | u64 time_span_ns; | ||
| 105 | u64 sleep_length_ns; | ||
| 106 | struct teo_idle_state states[CPUIDLE_STATE_MAX]; | ||
| 107 | int last_state; | ||
| 108 | int interval_idx; | ||
| 109 | unsigned int intervals[INTERVALS]; | ||
| 110 | }; | ||
| 111 | |||
| 112 | static DEFINE_PER_CPU(struct teo_cpu, teo_cpus); | ||
| 113 | |||
| 114 | /** | ||
| 115 | * teo_update - Update CPU data after wakeup. | ||
| 116 | * @drv: cpuidle driver containing state data. | ||
| 117 | * @dev: Target CPU. | ||
| 118 | */ | ||
| 119 | static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) | ||
| 120 | { | ||
| 121 | struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); | ||
| 122 | unsigned int sleep_length_us = ktime_to_us(cpu_data->sleep_length_ns); | ||
| 123 | int i, idx_hit = -1, idx_timer = -1; | ||
| 124 | unsigned int measured_us; | ||
| 125 | |||
| 126 | if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) { | ||
| 127 | /* | ||
| 128 | * One of the safety nets has triggered or this was a timer | ||
| 129 | * wakeup (or equivalent). | ||
| 130 | */ | ||
| 131 | measured_us = sleep_length_us; | ||
| 132 | } else { | ||
| 133 | unsigned int lat = drv->states[cpu_data->last_state].exit_latency; | ||
| 134 | |||
| 135 | measured_us = ktime_to_us(cpu_data->time_span_ns); | ||
| 136 | /* | ||
| 137 | * The delay between the wakeup and the first instruction | ||
| 138 | * executed by the CPU is not likely to be worst-case every | ||
| 139 | * time, so take 1/2 of the exit latency as a very rough | ||
| 140 | * approximation of the average of it. | ||
| 141 | */ | ||
| 142 | if (measured_us >= lat) | ||
| 143 | measured_us -= lat / 2; | ||
| 144 | else | ||
| 145 | measured_us /= 2; | ||
| 146 | } | ||
| 147 | |||
| 148 | /* | ||
| 149 | * Decay the "early hits" metric for all of the states and find the | ||
| 150 | * states matching the sleep length and the measured idle duration. | ||
| 151 | */ | ||
| 152 | for (i = 0; i < drv->state_count; i++) { | ||
| 153 | unsigned int early_hits = cpu_data->states[i].early_hits; | ||
| 154 | |||
| 155 | cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT; | ||
| 156 | |||
| 157 | if (drv->states[i].target_residency <= sleep_length_us) { | ||
| 158 | idx_timer = i; | ||
| 159 | if (drv->states[i].target_residency <= measured_us) | ||
| 160 | idx_hit = i; | ||
| 161 | } | ||
| 162 | } | ||
| 163 | |||
| 164 | /* | ||
| 165 | * Update the "hits" and "misses" data for the state matching the sleep | ||
| 166 | * length. If it matches the measured idle duration too, this is a hit, | ||
| 167 | * so increase the "hits" metric for it then. Otherwise, this is a | ||
| 168 | * miss, so increase the "misses" metric for it. In the latter case | ||
| 169 | * also increase the "early hits" metric for the state that actually | ||
| 170 | * matches the measured idle duration. | ||
| 171 | */ | ||
| 172 | if (idx_timer >= 0) { | ||
| 173 | unsigned int hits = cpu_data->states[idx_timer].hits; | ||
| 174 | unsigned int misses = cpu_data->states[idx_timer].misses; | ||
| 175 | |||
| 176 | hits -= hits >> DECAY_SHIFT; | ||
| 177 | misses -= misses >> DECAY_SHIFT; | ||
| 178 | |||
| 179 | if (idx_timer > idx_hit) { | ||
| 180 | misses += PULSE; | ||
| 181 | if (idx_hit >= 0) | ||
| 182 | cpu_data->states[idx_hit].early_hits += PULSE; | ||
| 183 | } else { | ||
| 184 | hits += PULSE; | ||
| 185 | } | ||
| 186 | |||
| 187 | cpu_data->states[idx_timer].misses = misses; | ||
| 188 | cpu_data->states[idx_timer].hits = hits; | ||
| 189 | } | ||
| 190 | |||
| 191 | /* | ||
| 192 | * If the total time span between idle state selection and the "reflect" | ||
| 193 | * callback is greater than or equal to the sleep length determined at | ||
| 194 | * the idle state selection time, the wakeup is likely to be due to a | ||
| 195 | * timer event. | ||
| 196 | */ | ||
| 197 | if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) | ||
| 198 | measured_us = UINT_MAX; | ||
| 199 | |||
| 200 | /* | ||
| 201 | * Save idle duration values corresponding to non-timer wakeups for | ||
| 202 | * pattern detection. | ||
| 203 | */ | ||
| 204 | cpu_data->intervals[cpu_data->interval_idx++] = measured_us; | ||
| 205 | if (cpu_data->interval_idx > INTERVALS) | ||
| 206 | cpu_data->interval_idx = 0; | ||
| 207 | } | ||
| 208 | |||
| 209 | /** | ||
| 210 | * teo_find_shallower_state - Find shallower idle state matching given duration. | ||
| 211 | * @drv: cpuidle driver containing state data. | ||
| 212 | * @dev: Target CPU. | ||
| 213 | * @state_idx: Index of the capping idle state. | ||
| 214 | * @duration_us: Idle duration value to match. | ||
| 215 | */ | ||
| 216 | static int teo_find_shallower_state(struct cpuidle_driver *drv, | ||
| 217 | struct cpuidle_device *dev, int state_idx, | ||
| 218 | unsigned int duration_us) | ||
| 219 | { | ||
| 220 | int i; | ||
| 221 | |||
| 222 | for (i = state_idx - 1; i >= 0; i--) { | ||
| 223 | if (drv->states[i].disabled || dev->states_usage[i].disable) | ||
| 224 | continue; | ||
| 225 | |||
| 226 | state_idx = i; | ||
| 227 | if (drv->states[i].target_residency <= duration_us) | ||
| 228 | break; | ||
| 229 | } | ||
| 230 | return state_idx; | ||
| 231 | } | ||
| 232 | |||
| 233 | /** | ||
| 234 | * teo_select - Selects the next idle state to enter. | ||
| 235 | * @drv: cpuidle driver containing state data. | ||
| 236 | * @dev: Target CPU. | ||
| 237 | * @stop_tick: Indication on whether or not to stop the scheduler tick. | ||
| 238 | */ | ||
| 239 | static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, | ||
| 240 | bool *stop_tick) | ||
| 241 | { | ||
| 242 | struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); | ||
| 243 | int latency_req = cpuidle_governor_latency_req(dev->cpu); | ||
| 244 | unsigned int duration_us, count; | ||
| 245 | int max_early_idx, idx, i; | ||
| 246 | ktime_t delta_tick; | ||
| 247 | |||
| 248 | if (cpu_data->last_state >= 0) { | ||
| 249 | teo_update(drv, dev); | ||
| 250 | cpu_data->last_state = -1; | ||
| 251 | } | ||
| 252 | |||
| 253 | cpu_data->time_span_ns = local_clock(); | ||
| 254 | |||
| 255 | cpu_data->sleep_length_ns = tick_nohz_get_sleep_length(&delta_tick); | ||
| 256 | duration_us = ktime_to_us(cpu_data->sleep_length_ns); | ||
| 257 | |||
| 258 | count = 0; | ||
| 259 | max_early_idx = -1; | ||
| 260 | idx = -1; | ||
| 261 | |||
| 262 | for (i = 0; i < drv->state_count; i++) { | ||
| 263 | struct cpuidle_state *s = &drv->states[i]; | ||
| 264 | struct cpuidle_state_usage *su = &dev->states_usage[i]; | ||
| 265 | |||
| 266 | if (s->disabled || su->disable) { | ||
| 267 | /* | ||
| 268 | * If the "early hits" metric of a disabled state is | ||
| 269 | * greater than the current maximum, it should be taken | ||
| 270 | * into account, because it would be a mistake to select | ||
| 271 | * a deeper state with lower "early hits" metric. The | ||
| 272 | * index cannot be changed to point to it, however, so | ||
| 273 | * just increase the max count alone and let the index | ||
| 274 | * still point to a shallower idle state. | ||
| 275 | */ | ||
| 276 | if (max_early_idx >= 0 && | ||
| 277 | count < cpu_data->states[i].early_hits) | ||
| 278 | count = cpu_data->states[i].early_hits; | ||
| 279 | |||
| 280 | continue; | ||
| 281 | } | ||
| 282 | |||
| 283 | if (idx < 0) | ||
| 284 | idx = i; /* first enabled state */ | ||
| 285 | |||
| 286 | if (s->target_residency > duration_us) | ||
| 287 | break; | ||
| 288 | |||
| 289 | if (s->exit_latency > latency_req) { | ||
| 290 | /* | ||
| 291 | * If we break out of the loop for latency reasons, use | ||
| 292 | * the target residency of the selected state as the | ||
| 293 | * expected idle duration to avoid stopping the tick | ||
| 294 | * as long as that target residency is low enough. | ||
| 295 | */ | ||
| 296 | duration_us = drv->states[idx].target_residency; | ||
| 297 | goto refine; | ||
| 298 | } | ||
| 299 | |||
| 300 | idx = i; | ||
| 301 | |||
| 302 | if (count < cpu_data->states[i].early_hits && | ||
| 303 | !(tick_nohz_tick_stopped() && | ||
| 304 | drv->states[i].target_residency < TICK_USEC)) { | ||
| 305 | count = cpu_data->states[i].early_hits; | ||
| 306 | max_early_idx = i; | ||
| 307 | } | ||
| 308 | } | ||
| 309 | |||
| 310 | /* | ||
| 311 | * If the "hits" metric of the idle state matching the sleep length is | ||
| 312 | * greater than its "misses" metric, that is the one to use. Otherwise, | ||
| 313 | * it is more likely that one of the shallower states will match the | ||
| 314 | * idle duration observed after wakeup, so take the one with the maximum | ||
| 315 | * "early hits" metric, but if that cannot be determined, just use the | ||
| 316 | * state selected so far. | ||
| 317 | */ | ||
| 318 | if (cpu_data->states[idx].hits <= cpu_data->states[idx].misses && | ||
| 319 | max_early_idx >= 0) { | ||
| 320 | idx = max_early_idx; | ||
| 321 | duration_us = drv->states[idx].target_residency; | ||
| 322 | } | ||
| 323 | |||
| 324 | refine: | ||
| 325 | if (idx < 0) { | ||
| 326 | idx = 0; /* No states enabled. Must use 0. */ | ||
| 327 | } else if (idx > 0) { | ||
| 328 | u64 sum = 0; | ||
| 329 | |||
| 330 | count = 0; | ||
| 331 | |||
| 332 | /* | ||
| 333 | * Count and sum the most recent idle duration values less than | ||
| 334 | * the target residency of the state selected so far, find the | ||
| 335 | * max. | ||
| 336 | */ | ||
| 337 | for (i = 0; i < INTERVALS; i++) { | ||
| 338 | unsigned int val = cpu_data->intervals[i]; | ||
| 339 | |||
| 340 | if (val >= drv->states[idx].target_residency) | ||
| 341 | continue; | ||
| 342 | |||
| 343 | count++; | ||
| 344 | sum += val; | ||
| 345 | } | ||
| 346 | |||
| 347 | /* | ||
| 348 | * Give up unless the majority of the most recent idle duration | ||
| 349 | * values are in the interesting range. | ||
| 350 | */ | ||
| 351 | if (count > INTERVALS / 2) { | ||
| 352 | unsigned int avg_us = div64_u64(sum, count); | ||
| 353 | |||
| 354 | /* | ||
| 355 | * Avoid spending too much time in an idle state that | ||
| 356 | * would be too shallow. | ||
| 357 | */ | ||
| 358 | if (!(tick_nohz_tick_stopped() && avg_us < TICK_USEC)) { | ||
| 359 | idx = teo_find_shallower_state(drv, dev, idx, avg_us); | ||
| 360 | duration_us = avg_us; | ||
| 361 | } | ||
| 362 | } | ||
| 363 | } | ||
| 364 | |||
| 365 | /* | ||
| 366 | * Don't stop the tick if the selected state is a polling one or if the | ||
| 367 | * expected idle duration is shorter than the tick period length. | ||
| 368 | */ | ||
| 369 | if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || | ||
| 370 | duration_us < TICK_USEC) && !tick_nohz_tick_stopped()) { | ||
| 371 | unsigned int delta_tick_us = ktime_to_us(delta_tick); | ||
| 372 | |||
| 373 | *stop_tick = false; | ||
| 374 | |||
| 375 | /* | ||
| 376 | * The tick is not going to be stopped, so if the target | ||
| 377 | * residency of the state to be returned is not within the time | ||
| 378 | * till the closest timer including the tick, try to correct | ||
| 379 | * that. | ||
| 380 | */ | ||
| 381 | if (idx > 0 && drv->states[idx].target_residency > delta_tick_us) | ||
| 382 | idx = teo_find_shallower_state(drv, dev, idx, delta_tick_us); | ||
| 383 | } | ||
| 384 | |||
| 385 | return idx; | ||
| 386 | } | ||
| 387 | |||
| 388 | /** | ||
| 389 | * teo_reflect - Note that governor data for the CPU need to be updated. | ||
| 390 | * @dev: Target CPU. | ||
| 391 | * @state: Entered state. | ||
| 392 | */ | ||
| 393 | static void teo_reflect(struct cpuidle_device *dev, int state) | ||
| 394 | { | ||
| 395 | struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); | ||
| 396 | |||
| 397 | cpu_data->last_state = state; | ||
| 398 | /* | ||
| 399 | * If the wakeup was not "natural", but triggered by one of the safety | ||
| 400 | * nets, assume that the CPU might have been idle for the entire sleep | ||
| 401 | * length time. | ||
| 402 | */ | ||
| 403 | if (dev->poll_time_limit || | ||
| 404 | (tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) { | ||
| 405 | dev->poll_time_limit = false; | ||
| 406 | cpu_data->time_span_ns = cpu_data->sleep_length_ns; | ||
| 407 | } else { | ||
| 408 | cpu_data->time_span_ns = local_clock() - cpu_data->time_span_ns; | ||
| 409 | } | ||
| 410 | } | ||
| 411 | |||
| 412 | /** | ||
| 413 | * teo_enable_device - Initialize the governor's data for the target CPU. | ||
| 414 | * @drv: cpuidle driver (not used). | ||
| 415 | * @dev: Target CPU. | ||
| 416 | */ | ||
| 417 | static int teo_enable_device(struct cpuidle_driver *drv, | ||
| 418 | struct cpuidle_device *dev) | ||
| 419 | { | ||
| 420 | struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); | ||
| 421 | int i; | ||
| 422 | |||
| 423 | memset(cpu_data, 0, sizeof(*cpu_data)); | ||
| 424 | |||
| 425 | for (i = 0; i < INTERVALS; i++) | ||
| 426 | cpu_data->intervals[i] = UINT_MAX; | ||
| 427 | |||
| 428 | return 0; | ||
| 429 | } | ||
| 430 | |||
| 431 | static struct cpuidle_governor teo_governor = { | ||
| 432 | .name = "teo", | ||
| 433 | .rating = 19, | ||
| 434 | .enable = teo_enable_device, | ||
| 435 | .select = teo_select, | ||
| 436 | .reflect = teo_reflect, | ||
| 437 | }; | ||
| 438 | |||
| 439 | static int __init teo_governor_init(void) | ||
| 440 | { | ||
| 441 | return cpuidle_register_governor(&teo_governor); | ||
| 442 | } | ||
| 443 | |||
| 444 | postcore_initcall(teo_governor_init); | ||
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 017fc602a10e..cf7c66bb3ed9 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | */ | 5 | */ |
| 6 | 6 | ||
| 7 | #include <linux/irq.h> | 7 | #include <linux/irq.h> |
| 8 | #include <linux/pm_runtime.h> | ||
| 8 | #include "i915_pmu.h" | 9 | #include "i915_pmu.h" |
| 9 | #include "intel_ringbuffer.h" | 10 | #include "intel_ringbuffer.h" |
| 10 | #include "i915_drv.h" | 11 | #include "i915_drv.h" |
| @@ -478,7 +479,6 @@ static u64 get_rc6(struct drm_i915_private *i915) | |||
| 478 | * counter value. | 479 | * counter value. |
| 479 | */ | 480 | */ |
| 480 | spin_lock_irqsave(&i915->pmu.lock, flags); | 481 | spin_lock_irqsave(&i915->pmu.lock, flags); |
| 481 | spin_lock(&kdev->power.lock); | ||
| 482 | 482 | ||
| 483 | /* | 483 | /* |
| 484 | * After the above branch intel_runtime_pm_get_if_in_use failed | 484 | * After the above branch intel_runtime_pm_get_if_in_use failed |
| @@ -491,16 +491,13 @@ static u64 get_rc6(struct drm_i915_private *i915) | |||
| 491 | * suspended and if not we cannot do better than report the last | 491 | * suspended and if not we cannot do better than report the last |
| 492 | * known RC6 value. | 492 | * known RC6 value. |
| 493 | */ | 493 | */ |
| 494 | if (kdev->power.runtime_status == RPM_SUSPENDED) { | 494 | if (pm_runtime_status_suspended(kdev)) { |
| 495 | if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) | 495 | val = pm_runtime_suspended_time(kdev); |
| 496 | i915->pmu.suspended_jiffies_last = | ||
| 497 | kdev->power.suspended_jiffies; | ||
| 498 | 496 | ||
| 499 | val = kdev->power.suspended_jiffies - | 497 | if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) |
| 500 | i915->pmu.suspended_jiffies_last; | 498 | i915->pmu.suspended_time_last = val; |
| 501 | val += jiffies - kdev->power.accounting_timestamp; | ||
| 502 | 499 | ||
| 503 | val = jiffies_to_nsecs(val); | 500 | val -= i915->pmu.suspended_time_last; |
| 504 | val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; | 501 | val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; |
| 505 | 502 | ||
| 506 | i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; | 503 | i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; |
| @@ -510,7 +507,6 @@ static u64 get_rc6(struct drm_i915_private *i915) | |||
| 510 | val = i915->pmu.sample[__I915_SAMPLE_RC6].cur; | 507 | val = i915->pmu.sample[__I915_SAMPLE_RC6].cur; |
| 511 | } | 508 | } |
| 512 | 509 | ||
| 513 | spin_unlock(&kdev->power.lock); | ||
| 514 | spin_unlock_irqrestore(&i915->pmu.lock, flags); | 510 | spin_unlock_irqrestore(&i915->pmu.lock, flags); |
| 515 | } | 511 | } |
| 516 | 512 | ||
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index b3728c5f13e7..4fc4f2478301 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h | |||
| @@ -97,9 +97,9 @@ struct i915_pmu { | |||
| 97 | */ | 97 | */ |
| 98 | struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS]; | 98 | struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS]; |
| 99 | /** | 99 | /** |
| 100 | * @suspended_jiffies_last: Cached suspend time from PM core. | 100 | * @suspended_time_last: Cached suspend time from PM core. |
| 101 | */ | 101 | */ |
| 102 | unsigned long suspended_jiffies_last; | 102 | u64 suspended_time_last; |
| 103 | /** | 103 | /** |
| 104 | * @i915_attr: Memory block holding device attributes. | 104 | * @i915_attr: Memory block holding device attributes. |
| 105 | */ | 105 | */ |
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 8b5d85c91e9d..b8647b5c3d4d 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c | |||
| @@ -1103,6 +1103,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { | |||
| 1103 | INTEL_CPU_FAM6(ATOM_GOLDMONT, idle_cpu_bxt), | 1103 | INTEL_CPU_FAM6(ATOM_GOLDMONT, idle_cpu_bxt), |
| 1104 | INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, idle_cpu_bxt), | 1104 | INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, idle_cpu_bxt), |
| 1105 | INTEL_CPU_FAM6(ATOM_GOLDMONT_X, idle_cpu_dnv), | 1105 | INTEL_CPU_FAM6(ATOM_GOLDMONT_X, idle_cpu_dnv), |
| 1106 | INTEL_CPU_FAM6(ATOM_TREMONT_X, idle_cpu_dnv), | ||
| 1106 | {} | 1107 | {} |
| 1107 | }; | 1108 | }; |
| 1108 | 1109 | ||
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 6cdb2c14eee4..4347f15165f8 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c | |||
| @@ -1156,6 +1156,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { | |||
| 1156 | INTEL_CPU_FAM6(KABYLAKE_MOBILE, rapl_defaults_core), | 1156 | INTEL_CPU_FAM6(KABYLAKE_MOBILE, rapl_defaults_core), |
| 1157 | INTEL_CPU_FAM6(KABYLAKE_DESKTOP, rapl_defaults_core), | 1157 | INTEL_CPU_FAM6(KABYLAKE_DESKTOP, rapl_defaults_core), |
| 1158 | INTEL_CPU_FAM6(CANNONLAKE_MOBILE, rapl_defaults_core), | 1158 | INTEL_CPU_FAM6(CANNONLAKE_MOBILE, rapl_defaults_core), |
| 1159 | INTEL_CPU_FAM6(ICELAKE_MOBILE, rapl_defaults_core), | ||
| 1159 | 1160 | ||
| 1160 | INTEL_CPU_FAM6(ATOM_SILVERMONT, rapl_defaults_byt), | 1161 | INTEL_CPU_FAM6(ATOM_SILVERMONT, rapl_defaults_byt), |
| 1161 | INTEL_CPU_FAM6(ATOM_AIRMONT, rapl_defaults_cht), | 1162 | INTEL_CPU_FAM6(ATOM_AIRMONT, rapl_defaults_cht), |
| @@ -1164,6 +1165,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { | |||
| 1164 | INTEL_CPU_FAM6(ATOM_GOLDMONT, rapl_defaults_core), | 1165 | INTEL_CPU_FAM6(ATOM_GOLDMONT, rapl_defaults_core), |
| 1165 | INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, rapl_defaults_core), | 1166 | INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, rapl_defaults_core), |
| 1166 | INTEL_CPU_FAM6(ATOM_GOLDMONT_X, rapl_defaults_core), | 1167 | INTEL_CPU_FAM6(ATOM_GOLDMONT_X, rapl_defaults_core), |
| 1168 | INTEL_CPU_FAM6(ATOM_TREMONT_X, rapl_defaults_core), | ||
| 1167 | 1169 | ||
| 1168 | INTEL_CPU_FAM6(XEON_PHI_KNL, rapl_defaults_hsw_server), | 1170 | INTEL_CPU_FAM6(XEON_PHI_KNL, rapl_defaults_hsw_server), |
| 1169 | INTEL_CPU_FAM6(XEON_PHI_KNM, rapl_defaults_hsw_server), | 1171 | INTEL_CPU_FAM6(XEON_PHI_KNM, rapl_defaults_hsw_server), |
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 4dff74f48d4b..3b39472324a3 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h | |||
| @@ -69,11 +69,9 @@ struct cpuidle_state { | |||
| 69 | 69 | ||
| 70 | /* Idle State Flags */ | 70 | /* Idle State Flags */ |
| 71 | #define CPUIDLE_FLAG_NONE (0x00) | 71 | #define CPUIDLE_FLAG_NONE (0x00) |
| 72 | #define CPUIDLE_FLAG_POLLING (0x01) /* polling state */ | 72 | #define CPUIDLE_FLAG_POLLING BIT(0) /* polling state */ |
| 73 | #define CPUIDLE_FLAG_COUPLED (0x02) /* state applies to multiple cpus */ | 73 | #define CPUIDLE_FLAG_COUPLED BIT(1) /* state applies to multiple cpus */ |
| 74 | #define CPUIDLE_FLAG_TIMER_STOP (0x04) /* timer is stopped on this state */ | 74 | #define CPUIDLE_FLAG_TIMER_STOP BIT(2) /* timer is stopped on this state */ |
| 75 | |||
| 76 | #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) | ||
| 77 | 75 | ||
| 78 | struct cpuidle_device_kobj; | 76 | struct cpuidle_device_kobj; |
| 79 | struct cpuidle_state_kobj; | 77 | struct cpuidle_state_kobj; |
diff --git a/include/linux/device.h b/include/linux/device.h index 6cb4640b6160..53028636fe39 100644 --- a/include/linux/device.h +++ b/include/linux/device.h | |||
| @@ -1165,6 +1165,16 @@ static inline bool device_async_suspend_enabled(struct device *dev) | |||
| 1165 | return !!dev->power.async_suspend; | 1165 | return !!dev->power.async_suspend; |
| 1166 | } | 1166 | } |
| 1167 | 1167 | ||
| 1168 | static inline bool device_pm_not_required(struct device *dev) | ||
| 1169 | { | ||
| 1170 | return dev->power.no_pm; | ||
| 1171 | } | ||
| 1172 | |||
| 1173 | static inline void device_set_pm_not_required(struct device *dev) | ||
| 1174 | { | ||
| 1175 | dev->power.no_pm = true; | ||
| 1176 | } | ||
| 1177 | |||
| 1168 | static inline void dev_pm_syscore_device(struct device *dev, bool val) | 1178 | static inline void dev_pm_syscore_device(struct device *dev, bool val) |
| 1169 | { | 1179 | { |
| 1170 | #ifdef CONFIG_PM_SLEEP | 1180 | #ifdef CONFIG_PM_SLEEP |
diff --git a/include/linux/pm.h b/include/linux/pm.h index 0bd9de116826..06f7ed893928 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h | |||
| @@ -592,6 +592,7 @@ struct dev_pm_info { | |||
| 592 | bool is_suspended:1; /* Ditto */ | 592 | bool is_suspended:1; /* Ditto */ |
| 593 | bool is_noirq_suspended:1; | 593 | bool is_noirq_suspended:1; |
| 594 | bool is_late_suspended:1; | 594 | bool is_late_suspended:1; |
| 595 | bool no_pm:1; | ||
| 595 | bool early_init:1; /* Owned by the PM core */ | 596 | bool early_init:1; /* Owned by the PM core */ |
| 596 | bool direct_complete:1; /* Owned by the PM core */ | 597 | bool direct_complete:1; /* Owned by the PM core */ |
| 597 | u32 driver_flags; | 598 | u32 driver_flags; |
| @@ -633,9 +634,9 @@ struct dev_pm_info { | |||
| 633 | int runtime_error; | 634 | int runtime_error; |
| 634 | int autosuspend_delay; | 635 | int autosuspend_delay; |
| 635 | u64 last_busy; | 636 | u64 last_busy; |
| 636 | unsigned long active_jiffies; | 637 | u64 active_time; |
| 637 | unsigned long suspended_jiffies; | 638 | u64 suspended_time; |
| 638 | unsigned long accounting_timestamp; | 639 | u64 accounting_timestamp; |
| 639 | #endif | 640 | #endif |
| 640 | struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ | 641 | struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ |
| 641 | void (*set_latency_tolerance)(struct device *, s32); | 642 | void (*set_latency_tolerance)(struct device *, s32); |
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index dd364abb649a..1ed5874bcee0 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h | |||
| @@ -271,7 +271,7 @@ int genpd_dev_pm_attach(struct device *dev); | |||
| 271 | struct device *genpd_dev_pm_attach_by_id(struct device *dev, | 271 | struct device *genpd_dev_pm_attach_by_id(struct device *dev, |
| 272 | unsigned int index); | 272 | unsigned int index); |
| 273 | struct device *genpd_dev_pm_attach_by_name(struct device *dev, | 273 | struct device *genpd_dev_pm_attach_by_name(struct device *dev, |
| 274 | char *name); | 274 | const char *name); |
| 275 | #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */ | 275 | #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */ |
| 276 | static inline int of_genpd_add_provider_simple(struct device_node *np, | 276 | static inline int of_genpd_add_provider_simple(struct device_node *np, |
| 277 | struct generic_pm_domain *genpd) | 277 | struct generic_pm_domain *genpd) |
| @@ -324,7 +324,7 @@ static inline struct device *genpd_dev_pm_attach_by_id(struct device *dev, | |||
| 324 | } | 324 | } |
| 325 | 325 | ||
| 326 | static inline struct device *genpd_dev_pm_attach_by_name(struct device *dev, | 326 | static inline struct device *genpd_dev_pm_attach_by_name(struct device *dev, |
| 327 | char *name) | 327 | const char *name) |
| 328 | { | 328 | { |
| 329 | return NULL; | 329 | return NULL; |
| 330 | } | 330 | } |
| @@ -341,7 +341,7 @@ int dev_pm_domain_attach(struct device *dev, bool power_on); | |||
| 341 | struct device *dev_pm_domain_attach_by_id(struct device *dev, | 341 | struct device *dev_pm_domain_attach_by_id(struct device *dev, |
| 342 | unsigned int index); | 342 | unsigned int index); |
| 343 | struct device *dev_pm_domain_attach_by_name(struct device *dev, | 343 | struct device *dev_pm_domain_attach_by_name(struct device *dev, |
| 344 | char *name); | 344 | const char *name); |
| 345 | void dev_pm_domain_detach(struct device *dev, bool power_off); | 345 | void dev_pm_domain_detach(struct device *dev, bool power_off); |
| 346 | void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); | 346 | void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); |
| 347 | #else | 347 | #else |
| @@ -355,7 +355,7 @@ static inline struct device *dev_pm_domain_attach_by_id(struct device *dev, | |||
| 355 | return NULL; | 355 | return NULL; |
| 356 | } | 356 | } |
| 357 | static inline struct device *dev_pm_domain_attach_by_name(struct device *dev, | 357 | static inline struct device *dev_pm_domain_attach_by_name(struct device *dev, |
| 358 | char *name) | 358 | const char *name) |
| 359 | { | 359 | { |
| 360 | return NULL; | 360 | return NULL; |
| 361 | } | 361 | } |
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index fed5be706bc9..9dc6eebf62d2 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h | |||
| @@ -113,6 +113,8 @@ static inline bool pm_runtime_is_irq_safe(struct device *dev) | |||
| 113 | return dev->power.irq_safe; | 113 | return dev->power.irq_safe; |
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | extern u64 pm_runtime_suspended_time(struct device *dev); | ||
| 117 | |||
| 116 | #else /* !CONFIG_PM */ | 118 | #else /* !CONFIG_PM */ |
| 117 | 119 | ||
| 118 | static inline bool queue_pm_work(struct work_struct *work) { return false; } | 120 | static inline bool queue_pm_work(struct work_struct *work) { return false; } |
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index d9dc2c38764a..7d66ee68aaaf 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include <linux/cpu.h> | 11 | #include <linux/cpu.h> |
| 12 | #include <linux/cpumask.h> | 12 | #include <linux/cpumask.h> |
| 13 | #include <linux/debugfs.h> | ||
| 13 | #include <linux/energy_model.h> | 14 | #include <linux/energy_model.h> |
| 14 | #include <linux/sched/topology.h> | 15 | #include <linux/sched/topology.h> |
| 15 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
| @@ -23,6 +24,60 @@ static DEFINE_PER_CPU(struct em_perf_domain *, em_data); | |||
| 23 | */ | 24 | */ |
| 24 | static DEFINE_MUTEX(em_pd_mutex); | 25 | static DEFINE_MUTEX(em_pd_mutex); |
| 25 | 26 | ||
| 27 | #ifdef CONFIG_DEBUG_FS | ||
| 28 | static struct dentry *rootdir; | ||
| 29 | |||
| 30 | static void em_debug_create_cs(struct em_cap_state *cs, struct dentry *pd) | ||
| 31 | { | ||
| 32 | struct dentry *d; | ||
| 33 | char name[24]; | ||
| 34 | |||
| 35 | snprintf(name, sizeof(name), "cs:%lu", cs->frequency); | ||
| 36 | |||
| 37 | /* Create per-cs directory */ | ||
| 38 | d = debugfs_create_dir(name, pd); | ||
| 39 | debugfs_create_ulong("frequency", 0444, d, &cs->frequency); | ||
| 40 | debugfs_create_ulong("power", 0444, d, &cs->power); | ||
| 41 | debugfs_create_ulong("cost", 0444, d, &cs->cost); | ||
| 42 | } | ||
| 43 | |||
| 44 | static int em_debug_cpus_show(struct seq_file *s, void *unused) | ||
| 45 | { | ||
| 46 | seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private))); | ||
| 47 | |||
| 48 | return 0; | ||
| 49 | } | ||
| 50 | DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); | ||
| 51 | |||
| 52 | static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) | ||
| 53 | { | ||
| 54 | struct dentry *d; | ||
| 55 | char name[8]; | ||
| 56 | int i; | ||
| 57 | |||
| 58 | snprintf(name, sizeof(name), "pd%d", cpu); | ||
| 59 | |||
| 60 | /* Create the directory of the performance domain */ | ||
| 61 | d = debugfs_create_dir(name, rootdir); | ||
| 62 | |||
| 63 | debugfs_create_file("cpus", 0444, d, pd->cpus, &em_debug_cpus_fops); | ||
| 64 | |||
| 65 | /* Create a sub-directory for each capacity state */ | ||
| 66 | for (i = 0; i < pd->nr_cap_states; i++) | ||
| 67 | em_debug_create_cs(&pd->table[i], d); | ||
| 68 | } | ||
| 69 | |||
| 70 | static int __init em_debug_init(void) | ||
| 71 | { | ||
| 72 | /* Create /sys/kernel/debug/energy_model directory */ | ||
| 73 | rootdir = debugfs_create_dir("energy_model", NULL); | ||
| 74 | |||
| 75 | return 0; | ||
| 76 | } | ||
| 77 | core_initcall(em_debug_init); | ||
| 78 | #else /* CONFIG_DEBUG_FS */ | ||
| 79 | static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) {} | ||
| 80 | #endif | ||
| 26 | static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, | 81 | static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, |
| 27 | struct em_data_callback *cb) | 82 | struct em_data_callback *cb) |
| 28 | { | 83 | { |
| @@ -102,6 +157,8 @@ static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, | |||
| 102 | pd->nr_cap_states = nr_states; | 157 | pd->nr_cap_states = nr_states; |
| 103 | cpumask_copy(to_cpumask(pd->cpus), span); | 158 | cpumask_copy(to_cpumask(pd->cpus), span); |
| 104 | 159 | ||
| 160 | em_debug_create_pd(pd, cpu); | ||
| 161 | |||
| 105 | return pd; | 162 | return pd; |
| 106 | 163 | ||
| 107 | free_cs_table: | 164 | free_cs_table: |
diff --git a/kernel/power/qos.c b/kernel/power/qos.c index b7a82502857a..9d22131afc1e 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c | |||
| @@ -582,10 +582,8 @@ static int register_pm_qos_misc(struct pm_qos_object *qos, struct dentry *d) | |||
| 582 | qos->pm_qos_power_miscdev.name = qos->name; | 582 | qos->pm_qos_power_miscdev.name = qos->name; |
| 583 | qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; | 583 | qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; |
| 584 | 584 | ||
| 585 | if (d) { | 585 | debugfs_create_file(qos->name, S_IRUGO, d, (void *)qos, |
| 586 | (void)debugfs_create_file(qos->name, S_IRUGO, d, | 586 | &pm_qos_debug_fops); |
| 587 | (void *)qos, &pm_qos_debug_fops); | ||
| 588 | } | ||
| 589 | 587 | ||
| 590 | return misc_register(&qos->pm_qos_power_miscdev); | 588 | return misc_register(&qos->pm_qos_power_miscdev); |
| 591 | } | 589 | } |
| @@ -685,8 +683,6 @@ static int __init pm_qos_power_init(void) | |||
| 685 | BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES); | 683 | BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES); |
| 686 | 684 | ||
| 687 | d = debugfs_create_dir("pm_qos", NULL); | 685 | d = debugfs_create_dir("pm_qos", NULL); |
| 688 | if (IS_ERR_OR_NULL(d)) | ||
| 689 | d = NULL; | ||
| 690 | 686 | ||
| 691 | for (i = PM_QOS_CPU_DMA_LATENCY; i < PM_QOS_NUM_CLASSES; i++) { | 687 | for (i = PM_QOS_CPU_DMA_LATENCY; i < PM_QOS_NUM_CLASSES; i++) { |
| 692 | ret = register_pm_qos_misc(pm_qos_array[i], d); | 688 | ret = register_pm_qos_misc(pm_qos_array[i], d); |
