summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-15 23:44:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-15 23:44:49 -0400
commitfb4da215ed92f564f7ca090bb81a199b0d6cab8a (patch)
tree38d4e18e1db026bec42c8b58ee40a245db313af3
parent2a3c389a0fde49b241430df806a34276568cfb29 (diff)
parent7b4b0f6b34d893be569da81ffad865a9d3a7d014 (diff)
Merge tag 'pci-v5.3-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
Pull PCI updates from Bjorn Helgaas: "Enumeration changes: - Evaluate PCI Boot Configuration _DSM to learn if firmware wants us to preserve its resource assignments (Benjamin Herrenschmidt) - Simplify resource distribution (Nicholas Johnson) - Decode 32 GT/s link speed (Gustavo Pimentel) Virtualization: - Fix incorrect caching of VF config space size (Alex Williamson) - Fix VF driver probing sysfs knobs (Alex Williamson) Peer-to-peer DMA: - Fix dma_virt_ops check (Logan Gunthorpe) Altera host bridge driver: - Allow building as module (Ley Foon Tan) Armada 8K host bridge driver: - add PHYs support (Miquel Raynal) DesignWare host bridge driver: - Export APIs to support removable loadable module (Vidya Sagar) - Enable Relaxed Ordering erratum workaround only on Tegra20 & Tegra30 (Vidya Sagar) Hyper-V host bridge driver: - Fix use-after-free in eject (Dexuan Cui) Mobiveil host bridge driver: - Clean up and fix many issues, including non-identify mapped windows, 64-bit windows, multi-MSI, class code, INTx clearing (Hou Zhiqiang) Qualcomm host bridge driver: - Use clk bulk API for 2.4.0 controllers (Bjorn Andersson) - Add QCS404 support (Bjorn Andersson) - Assert PERST for at least 100ms (Niklas Cassel) R-Car host bridge driver: - Add r8a774a1 DT support (Biju Das) Tegra host bridge driver: - Add support for Gen2, opportunistic UpdateFC and ACK (PCIe protocol details) AER, GPIO-based PERST# (Manikanta Maddireddy) - Fix many issues, including power-on failure cases, interrupt masking in suspend, UPHY settings, AFI dynamic clock gating, pending DLL transactions (Manikanta Maddireddy) Xilinx host bridge driver: - Fix NWL Multi-MSI programming (Bharat Kumar Gogada) Endpoint support: - Fix 64bit BAR support (Alan Mikhak) - Fix pcitest build issues (Alan Mikhak, Andy Shevchenko) Bug fixes: - Fix NVIDIA GPU multi-function power dependencies (Abhishek Sahu) - Fix NVIDIA GPU HDA enablement issue (Lukas Wunner) - Ignore lockdep for sysfs "remove" (Marek Vasut) Misc: - Convert docs to reST (Changbin Du, Mauro Carvalho Chehab)" * tag 'pci-v5.3-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci: (107 commits) PCI: Enable NVIDIA HDA controllers tools: PCI: Fix installation when `make tools/pci_install` PCI: dwc: pci-dra7xx: Fix compilation when !CONFIG_GPIOLIB PCI: Fix typos and whitespace errors PCI: mobiveil: Fix INTx interrupt clearing in mobiveil_pcie_isr() PCI: mobiveil: Fix infinite-loop in the INTx handling function PCI: mobiveil: Move PCIe PIO enablement out of inbound window routine PCI: mobiveil: Add upper 32-bit PCI base address setup in inbound window PCI: mobiveil: Add upper 32-bit CPU base address setup in outbound window PCI: mobiveil: Mask out hardcoded bits in inbound/outbound windows setup PCI: mobiveil: Clear the control fields before updating it PCI: mobiveil: Add configured inbound windows counter PCI: mobiveil: Fix the valid check for inbound and outbound windows PCI: mobiveil: Clean-up program_{ib/ob}_windows() PCI: mobiveil: Remove an unnecessary return value check PCI: mobiveil: Fix error return values PCI: mobiveil: Refactor the MEM/IO outbound window initialization PCI: mobiveil: Make some register updates more readable PCI: mobiveil: Reformat the code for readability dt-bindings: PCI: mobiveil: Change gpio_slave and apb_csr to optional ...
-rw-r--r--Documentation/ABI/testing/sysfs-class-powercap2
-rw-r--r--Documentation/PCI/acpi-info.rst (renamed from Documentation/PCI/acpi-info.txt)15
-rw-r--r--Documentation/PCI/endpoint/index.rst13
-rw-r--r--Documentation/PCI/endpoint/pci-endpoint-cfs.rst (renamed from Documentation/PCI/endpoint/pci-endpoint-cfs.txt)99
-rw-r--r--Documentation/PCI/endpoint/pci-endpoint.rst (renamed from Documentation/PCI/endpoint/pci-endpoint.txt)92
-rw-r--r--Documentation/PCI/endpoint/pci-test-function.rst (renamed from Documentation/PCI/endpoint/pci-test-function.txt)84
-rw-r--r--Documentation/PCI/endpoint/pci-test-howto.rst (renamed from Documentation/PCI/endpoint/pci-test-howto.txt)81
-rw-r--r--Documentation/PCI/index.rst18
-rw-r--r--Documentation/PCI/msi-howto.rst (renamed from Documentation/PCI/MSI-HOWTO.txt)85
-rw-r--r--Documentation/PCI/pci-error-recovery.rst (renamed from Documentation/PCI/pci-error-recovery.txt)287
-rw-r--r--Documentation/PCI/pci-iov-howto.rst (renamed from Documentation/PCI/pci-iov-howto.txt)161
-rw-r--r--Documentation/PCI/pci.rst (renamed from Documentation/PCI/pci.txt)356
-rw-r--r--Documentation/PCI/pcieaer-howto.rst (renamed from Documentation/PCI/pcieaer-howto.txt)156
-rw-r--r--Documentation/PCI/picebus-howto.rst (renamed from Documentation/PCI/PCIEBUS-HOWTO.txt)140
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt6
-rw-r--r--Documentation/cpu-freq/core.txt2
-rw-r--r--Documentation/devicetree/bindings/pci/mobiveil-pcie.txt2
-rw-r--r--Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt8
-rw-r--r--Documentation/devicetree/bindings/pci/pci.txt3
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie.txt25
-rw-r--r--Documentation/devicetree/bindings/pci/rcar-pci.txt1
-rw-r--r--Documentation/driver-api/pm/devices.rst6
-rw-r--r--Documentation/driver-api/usb/power-management.rst2
-rw-r--r--Documentation/index.rst1
-rw-r--r--Documentation/power/apm-acpi.rst (renamed from Documentation/power/apm-acpi.txt)10
-rw-r--r--Documentation/power/basic-pm-debugging.rst (renamed from Documentation/power/basic-pm-debugging.txt)79
-rw-r--r--Documentation/power/charger-manager.rst (renamed from Documentation/power/charger-manager.txt)105
-rw-r--r--Documentation/power/drivers-testing.rst (renamed from Documentation/power/drivers-testing.txt)15
-rw-r--r--Documentation/power/energy-model.rst (renamed from Documentation/power/energy-model.txt)105
-rw-r--r--Documentation/power/freezing-of-tasks.rst (renamed from Documentation/power/freezing-of-tasks.txt)91
-rw-r--r--Documentation/power/index.rst46
-rw-r--r--Documentation/power/interface.rst (renamed from Documentation/power/interface.txt)24
-rw-r--r--Documentation/power/opp.rst (renamed from Documentation/power/opp.txt)175
-rw-r--r--Documentation/power/pci.rst (renamed from Documentation/power/pci.txt)87
-rw-r--r--Documentation/power/pm_qos_interface.rst (renamed from Documentation/power/pm_qos_interface.txt)127
-rw-r--r--Documentation/power/power_supply_class.rst282
-rw-r--r--Documentation/power/power_supply_class.txt231
-rw-r--r--Documentation/power/powercap/powercap.rst257
-rw-r--r--Documentation/power/powercap/powercap.txt236
-rw-r--r--Documentation/power/regulator/consumer.rst (renamed from Documentation/power/regulator/consumer.txt)141
-rw-r--r--Documentation/power/regulator/design.rst (renamed from Documentation/power/regulator/design.txt)9
-rw-r--r--Documentation/power/regulator/machine.rst (renamed from Documentation/power/regulator/machine.txt)47
-rw-r--r--Documentation/power/regulator/overview.rst (renamed from Documentation/power/regulator/overview.txt)57
-rw-r--r--Documentation/power/regulator/regulator.rst32
-rw-r--r--Documentation/power/regulator/regulator.txt30
-rw-r--r--Documentation/power/runtime_pm.rst (renamed from Documentation/power/runtime_pm.txt)234
-rw-r--r--Documentation/power/s2ram.rst (renamed from Documentation/power/s2ram.txt)20
-rw-r--r--Documentation/power/suspend-and-cpuhotplug.rst (renamed from Documentation/power/suspend-and-cpuhotplug.txt)42
-rw-r--r--Documentation/power/suspend-and-interrupts.rst (renamed from Documentation/power/suspend-and-interrupts.txt)2
-rw-r--r--Documentation/power/swsusp-and-swap-files.rst (renamed from Documentation/power/swsusp-and-swap-files.txt)17
-rw-r--r--Documentation/power/swsusp-dmcrypt.rst (renamed from Documentation/power/swsusp-dmcrypt.txt)122
-rw-r--r--Documentation/power/swsusp.rst501
-rw-r--r--Documentation/power/swsusp.txt446
-rw-r--r--Documentation/power/tricks.rst (renamed from Documentation/power/tricks.txt)6
-rw-r--r--Documentation/power/userland-swsusp.rst (renamed from Documentation/power/userland-swsusp.txt)55
-rw-r--r--Documentation/power/video.rst (renamed from Documentation/power/video.txt)156
-rw-r--r--Documentation/process/submitting-drivers.rst2
-rw-r--r--Documentation/scheduler/sched-energy.rst6
-rw-r--r--Documentation/trace/coresight-cpu-debug.txt2
-rw-r--r--Documentation/translations/zh_CN/process/submitting-drivers.rst2
-rw-r--r--MAINTAINERS8
-rw-r--r--arch/arm64/kernel/pci.c13
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--drivers/acpi/pci_root.c12
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.h2
-rw-r--r--drivers/opp/Kconfig2
-rw-r--r--drivers/pci/ats.c2
-rw-r--r--drivers/pci/controller/Kconfig4
-rw-r--r--drivers/pci/controller/dwc/Kconfig2
-rw-r--r--drivers/pci/controller/dwc/pci-dra7xx.c1
-rw-r--r--drivers/pci/controller/dwc/pcie-armada8k.c84
-rw-r--r--drivers/pci/controller/dwc/pcie-designware-host.c12
-rw-r--r--drivers/pci/controller/dwc/pcie-designware.c61
-rw-r--r--drivers/pci/controller/dwc/pcie-designware.h39
-rw-r--r--drivers/pci/controller/dwc/pcie-kirin.c2
-rw-r--r--drivers/pci/controller/dwc/pcie-qcom.c115
-rw-r--r--drivers/pci/controller/pci-aardvark.c2
-rw-r--r--drivers/pci/controller/pci-hyperv.c15
-rw-r--r--drivers/pci/controller/pci-tegra.c589
-rw-r--r--drivers/pci/controller/pcie-altera-msi.c10
-rw-r--r--drivers/pci/controller/pcie-altera.c69
-rw-r--r--drivers/pci/controller/pcie-iproc-platform.c2
-rw-r--r--drivers/pci/controller/pcie-iproc.c2
-rw-r--r--drivers/pci/controller/pcie-mobiveil.c525
-rw-r--r--drivers/pci/controller/pcie-xilinx-nwl.c11
-rw-r--r--drivers/pci/controller/vmd.c2
-rw-r--r--drivers/pci/endpoint/functions/pci-epf-test.c35
-rw-r--r--drivers/pci/endpoint/pci-epc-core.c3
-rw-r--r--drivers/pci/iov.c2
-rw-r--r--drivers/pci/mmap.c2
-rw-r--r--drivers/pci/msi.c43
-rw-r--r--drivers/pci/p2pdma.c16
-rw-r--r--drivers/pci/pci-bridge-emul.c2
-rw-r--r--drivers/pci/pci-driver.c16
-rw-r--r--drivers/pci/pci-pf-stub.c2
-rw-r--r--drivers/pci/pci-sysfs.c5
-rw-r--r--drivers/pci/pci.c6
-rw-r--r--drivers/pci/pci.h1
-rw-r--r--drivers/pci/pcie/aer_inject.c2
-rw-r--r--drivers/pci/probe.c28
-rw-r--r--drivers/pci/proc.c2
-rw-r--r--drivers/pci/quirks.c110
-rw-r--r--drivers/pci/setup-bus.c60
-rw-r--r--drivers/pci/slot.c1
-rw-r--r--drivers/power/supply/power_supply_core.c2
-rw-r--r--drivers/soc/tegra/pmc.c1
-rw-r--r--include/linux/interrupt.h2
-rw-r--r--include/linux/mod_devicetable.h29
-rw-r--r--include/linux/pci-acpi.h7
-rw-r--r--include/linux/pci.h53
-rw-r--r--include/linux/pci_ids.h7
-rw-r--r--include/linux/pm.h2
-rw-r--r--include/uapi/linux/pci_regs.h4
-rw-r--r--kernel/power/Kconfig6
-rw-r--r--net/wireless/Kconfig2
-rw-r--r--tools/pci/Makefile5
-rw-r--r--tools/pci/pcitest.c8
117 files changed, 4502 insertions, 2994 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-powercap b/Documentation/ABI/testing/sysfs-class-powercap
index f333a0ccc29b..ca491ec4e693 100644
--- a/Documentation/ABI/testing/sysfs-class-powercap
+++ b/Documentation/ABI/testing/sysfs-class-powercap
@@ -5,7 +5,7 @@ Contact: linux-pm@vger.kernel.org
5Description: 5Description:
6 The powercap/ class sub directory belongs to the power cap 6 The powercap/ class sub directory belongs to the power cap
7 subsystem. Refer to 7 subsystem. Refer to
8 Documentation/power/powercap/powercap.txt for details. 8 Documentation/power/powercap/powercap.rst for details.
9 9
10What: /sys/class/powercap/<control type> 10What: /sys/class/powercap/<control type>
11Date: September 2013 11Date: September 2013
diff --git a/Documentation/PCI/acpi-info.txt b/Documentation/PCI/acpi-info.rst
index 3ffa3b03970e..060217081c79 100644
--- a/Documentation/PCI/acpi-info.txt
+++ b/Documentation/PCI/acpi-info.rst
@@ -1,4 +1,8 @@
1 ACPI considerations for PCI host bridges 1.. SPDX-License-Identifier: GPL-2.0
2
3========================================
4ACPI considerations for PCI host bridges
5========================================
2 6
3The general rule is that the ACPI namespace should describe everything the 7The general rule is that the ACPI namespace should describe everything the
4OS might use unless there's another way for the OS to find it [1, 2]. 8OS might use unless there's another way for the OS to find it [1, 2].
@@ -131,12 +135,13 @@ address always corresponds to bus 0, even if the bus range below the bridge
131 135
132[4] ACPI 6.2, sec 6.4.3.5.1, 2, 3, 4: 136[4] ACPI 6.2, sec 6.4.3.5.1, 2, 3, 4:
133 QWord/DWord/Word Address Space Descriptor (.1, .2, .3) 137 QWord/DWord/Word Address Space Descriptor (.1, .2, .3)
134 General Flags: Bit [0] Ignored 138 General Flags: Bit [0] Ignored
135 139
136 Extended Address Space Descriptor (.4) 140 Extended Address Space Descriptor (.4)
137 General Flags: Bit [0] Consumer/Producer: 141 General Flags: Bit [0] Consumer/Producer:
138 1–This device consumes this resource 142
139 0–This device produces and consumes this resource 143 * 1 – This device consumes this resource
144 * 0 – This device produces and consumes this resource
140 145
141[5] ACPI 6.2, sec 19.6.43: 146[5] ACPI 6.2, sec 19.6.43:
142 ResourceUsage specifies whether the Memory range is consumed by 147 ResourceUsage specifies whether the Memory range is consumed by
diff --git a/Documentation/PCI/endpoint/index.rst b/Documentation/PCI/endpoint/index.rst
new file mode 100644
index 000000000000..d114ea74b444
--- /dev/null
+++ b/Documentation/PCI/endpoint/index.rst
@@ -0,0 +1,13 @@
1.. SPDX-License-Identifier: GPL-2.0
2
3======================
4PCI Endpoint Framework
5======================
6
7.. toctree::
8 :maxdepth: 2
9
10 pci-endpoint
11 pci-endpoint-cfs
12 pci-test-function
13 pci-test-howto
diff --git a/Documentation/PCI/endpoint/pci-endpoint-cfs.txt b/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
index d740f29960a4..b6d39cdec56e 100644
--- a/Documentation/PCI/endpoint/pci-endpoint-cfs.txt
+++ b/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
@@ -1,41 +1,51 @@
1 CONFIGURING PCI ENDPOINT USING CONFIGFS 1.. SPDX-License-Identifier: GPL-2.0
2 Kishon Vijay Abraham I <kishon@ti.com> 2
3=======================================
4Configuring PCI Endpoint Using CONFIGFS
5=======================================
6
7:Author: Kishon Vijay Abraham I <kishon@ti.com>
3 8
4The PCI Endpoint Core exposes configfs entry (pci_ep) to configure the 9The PCI Endpoint Core exposes configfs entry (pci_ep) to configure the
5PCI endpoint function and to bind the endpoint function 10PCI endpoint function and to bind the endpoint function
6with the endpoint controller. (For introducing other mechanisms to 11with the endpoint controller. (For introducing other mechanisms to
7configure the PCI Endpoint Function refer to [1]). 12configure the PCI Endpoint Function refer to [1]).
8 13
9*) Mounting configfs 14Mounting configfs
15=================
10 16
11The PCI Endpoint Core layer creates pci_ep directory in the mounted configfs 17The PCI Endpoint Core layer creates pci_ep directory in the mounted configfs
12directory. configfs can be mounted using the following command. 18directory. configfs can be mounted using the following command::
13 19
14 mount -t configfs none /sys/kernel/config 20 mount -t configfs none /sys/kernel/config
15 21
16*) Directory Structure 22Directory Structure
23===================
17 24
18The pci_ep configfs has two directories at its root: controllers and 25The pci_ep configfs has two directories at its root: controllers and
19functions. Every EPC device present in the system will have an entry in 26functions. Every EPC device present in the system will have an entry in
20the *controllers* directory and and every EPF driver present in the system 27the *controllers* directory and and every EPF driver present in the system
21will have an entry in the *functions* directory. 28will have an entry in the *functions* directory.
29::
22 30
23/sys/kernel/config/pci_ep/ 31 /sys/kernel/config/pci_ep/
24 .. controllers/ 32 .. controllers/
25 .. functions/ 33 .. functions/
26 34
27*) Creating EPF Device 35Creating EPF Device
36===================
28 37
29Every registered EPF driver will be listed in controllers directory. The 38Every registered EPF driver will be listed in controllers directory. The
30entries corresponding to EPF driver will be created by the EPF core. 39entries corresponding to EPF driver will be created by the EPF core.
40::
31 41
32/sys/kernel/config/pci_ep/functions/ 42 /sys/kernel/config/pci_ep/functions/
33 .. <EPF Driver1>/ 43 .. <EPF Driver1>/
34 ... <EPF Device 11>/ 44 ... <EPF Device 11>/
35 ... <EPF Device 21>/ 45 ... <EPF Device 21>/
36 .. <EPF Driver2>/ 46 .. <EPF Driver2>/
37 ... <EPF Device 12>/ 47 ... <EPF Device 12>/
38 ... <EPF Device 22>/ 48 ... <EPF Device 22>/
39 49
40In order to create a <EPF device> of the type probed by <EPF Driver>, the 50In order to create a <EPF device> of the type probed by <EPF Driver>, the
41user has to create a directory inside <EPF DriverN>. 51user has to create a directory inside <EPF DriverN>.
@@ -44,34 +54,37 @@ Every <EPF device> directory consists of the following entries that can be
44used to configure the standard configuration header of the endpoint function. 54used to configure the standard configuration header of the endpoint function.
45(These entries are created by the framework when any new <EPF Device> is 55(These entries are created by the framework when any new <EPF Device> is
46created) 56created)
47 57::
48 .. <EPF Driver1>/ 58
49 ... <EPF Device 11>/ 59 .. <EPF Driver1>/
50 ... vendorid 60 ... <EPF Device 11>/
51 ... deviceid 61 ... vendorid
52 ... revid 62 ... deviceid
53 ... progif_code 63 ... revid
54 ... subclass_code 64 ... progif_code
55 ... baseclass_code 65 ... subclass_code
56 ... cache_line_size 66 ... baseclass_code
57 ... subsys_vendor_id 67 ... cache_line_size
58 ... subsys_id 68 ... subsys_vendor_id
59 ... interrupt_pin 69 ... subsys_id
60 70 ... interrupt_pin
61*) EPC Device 71
72EPC Device
73==========
62 74
63Every registered EPC device will be listed in controllers directory. The 75Every registered EPC device will be listed in controllers directory. The
64entries corresponding to EPC device will be created by the EPC core. 76entries corresponding to EPC device will be created by the EPC core.
65 77::
66/sys/kernel/config/pci_ep/controllers/ 78
67 .. <EPC Device1>/ 79 /sys/kernel/config/pci_ep/controllers/
68 ... <Symlink EPF Device11>/ 80 .. <EPC Device1>/
69 ... <Symlink EPF Device12>/ 81 ... <Symlink EPF Device11>/
70 ... start 82 ... <Symlink EPF Device12>/
71 .. <EPC Device2>/ 83 ... start
72 ... <Symlink EPF Device21>/ 84 .. <EPC Device2>/
73 ... <Symlink EPF Device22>/ 85 ... <Symlink EPF Device21>/
74 ... start 86 ... <Symlink EPF Device22>/
87 ... start
75 88
76The <EPC Device> directory will have a list of symbolic links to 89The <EPC Device> directory will have a list of symbolic links to
77<EPF Device>. These symbolic links should be created by the user to 90<EPF Device>. These symbolic links should be created by the user to
@@ -81,7 +94,7 @@ The <EPC Device> directory will also have a *start* field. Once
81"1" is written to this field, the endpoint device will be ready to 94"1" is written to this field, the endpoint device will be ready to
82establish the link with the host. This is usually done after 95establish the link with the host. This is usually done after
83all the EPF devices are created and linked with the EPC device. 96all the EPF devices are created and linked with the EPC device.
84 97::
85 98
86 | controllers/ 99 | controllers/
87 | <Directory: EPC name>/ 100 | <Directory: EPC name>/
@@ -102,4 +115,4 @@ all the EPF devices are created and linked with the EPC device.
102 | interrupt_pin 115 | interrupt_pin
103 | function 116 | function
104 117
105[1] -> Documentation/PCI/endpoint/pci-endpoint.txt 118[1] :doc:`pci-endpoint`
diff --git a/Documentation/PCI/endpoint/pci-endpoint.txt b/Documentation/PCI/endpoint/pci-endpoint.rst
index e86a96b66a6a..0e2311b5617b 100644
--- a/Documentation/PCI/endpoint/pci-endpoint.txt
+++ b/Documentation/PCI/endpoint/pci-endpoint.rst
@@ -1,11 +1,13 @@
1 PCI ENDPOINT FRAMEWORK 1.. SPDX-License-Identifier: GPL-2.0
2 Kishon Vijay Abraham I <kishon@ti.com> 2
3:Author: Kishon Vijay Abraham I <kishon@ti.com>
3 4
4This document is a guide to use the PCI Endpoint Framework in order to create 5This document is a guide to use the PCI Endpoint Framework in order to create
5endpoint controller driver, endpoint function driver, and using configfs 6endpoint controller driver, endpoint function driver, and using configfs
6interface to bind the function driver to the controller driver. 7interface to bind the function driver to the controller driver.
7 8
81. Introduction 9Introduction
10============
9 11
10Linux has a comprehensive PCI subsystem to support PCI controllers that 12Linux has a comprehensive PCI subsystem to support PCI controllers that
11operates in Root Complex mode. The subsystem has capability to scan PCI bus, 13operates in Root Complex mode. The subsystem has capability to scan PCI bus,
@@ -19,26 +21,30 @@ add endpoint mode support in Linux. This will help to run Linux in an
19EP system which can have a wide variety of use cases from testing or 21EP system which can have a wide variety of use cases from testing or
20validation, co-processor accelerator, etc. 22validation, co-processor accelerator, etc.
21 23
222. PCI Endpoint Core 24PCI Endpoint Core
25=================
23 26
24The PCI Endpoint Core layer comprises 3 components: the Endpoint Controller 27The PCI Endpoint Core layer comprises 3 components: the Endpoint Controller
25library, the Endpoint Function library, and the configfs layer to bind the 28library, the Endpoint Function library, and the configfs layer to bind the
26endpoint function with the endpoint controller. 29endpoint function with the endpoint controller.
27 30
282.1 PCI Endpoint Controller(EPC) Library 31PCI Endpoint Controller(EPC) Library
32------------------------------------
29 33
30The EPC library provides APIs to be used by the controller that can operate 34The EPC library provides APIs to be used by the controller that can operate
31in endpoint mode. It also provides APIs to be used by function driver/library 35in endpoint mode. It also provides APIs to be used by function driver/library
32in order to implement a particular endpoint function. 36in order to implement a particular endpoint function.
33 37
342.1.1 APIs for the PCI controller Driver 38APIs for the PCI controller Driver
39~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
35 40
36This section lists the APIs that the PCI Endpoint core provides to be used 41This section lists the APIs that the PCI Endpoint core provides to be used
37by the PCI controller driver. 42by the PCI controller driver.
38 43
39*) devm_pci_epc_create()/pci_epc_create() 44* devm_pci_epc_create()/pci_epc_create()
40 45
41 The PCI controller driver should implement the following ops: 46 The PCI controller driver should implement the following ops:
47
42 * write_header: ops to populate configuration space header 48 * write_header: ops to populate configuration space header
43 * set_bar: ops to configure the BAR 49 * set_bar: ops to configure the BAR
44 * clear_bar: ops to reset the BAR 50 * clear_bar: ops to reset the BAR
@@ -51,110 +57,116 @@ by the PCI controller driver.
51 The PCI controller driver can then create a new EPC device by invoking 57 The PCI controller driver can then create a new EPC device by invoking
52 devm_pci_epc_create()/pci_epc_create(). 58 devm_pci_epc_create()/pci_epc_create().
53 59
54*) devm_pci_epc_destroy()/pci_epc_destroy() 60* devm_pci_epc_destroy()/pci_epc_destroy()
55 61
56 The PCI controller driver can destroy the EPC device created by either 62 The PCI controller driver can destroy the EPC device created by either
57 devm_pci_epc_create() or pci_epc_create() using devm_pci_epc_destroy() or 63 devm_pci_epc_create() or pci_epc_create() using devm_pci_epc_destroy() or
58 pci_epc_destroy(). 64 pci_epc_destroy().
59 65
60*) pci_epc_linkup() 66* pci_epc_linkup()
61 67
62 In order to notify all the function devices that the EPC device to which 68 In order to notify all the function devices that the EPC device to which
63 they are linked has established a link with the host, the PCI controller 69 they are linked has established a link with the host, the PCI controller
64 driver should invoke pci_epc_linkup(). 70 driver should invoke pci_epc_linkup().
65 71
66*) pci_epc_mem_init() 72* pci_epc_mem_init()
67 73
68 Initialize the pci_epc_mem structure used for allocating EPC addr space. 74 Initialize the pci_epc_mem structure used for allocating EPC addr space.
69 75
70*) pci_epc_mem_exit() 76* pci_epc_mem_exit()
71 77
72 Cleanup the pci_epc_mem structure allocated during pci_epc_mem_init(). 78 Cleanup the pci_epc_mem structure allocated during pci_epc_mem_init().
73 79
742.1.2 APIs for the PCI Endpoint Function Driver 80
81APIs for the PCI Endpoint Function Driver
82~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
75 83
76This section lists the APIs that the PCI Endpoint core provides to be used 84This section lists the APIs that the PCI Endpoint core provides to be used
77by the PCI endpoint function driver. 85by the PCI endpoint function driver.
78 86
79*) pci_epc_write_header() 87* pci_epc_write_header()
80 88
81 The PCI endpoint function driver should use pci_epc_write_header() to 89 The PCI endpoint function driver should use pci_epc_write_header() to
82 write the standard configuration header to the endpoint controller. 90 write the standard configuration header to the endpoint controller.
83 91
84*) pci_epc_set_bar() 92* pci_epc_set_bar()
85 93
86 The PCI endpoint function driver should use pci_epc_set_bar() to configure 94 The PCI endpoint function driver should use pci_epc_set_bar() to configure
87 the Base Address Register in order for the host to assign PCI addr space. 95 the Base Address Register in order for the host to assign PCI addr space.
88 Register space of the function driver is usually configured 96 Register space of the function driver is usually configured
89 using this API. 97 using this API.
90 98
91*) pci_epc_clear_bar() 99* pci_epc_clear_bar()
92 100
93 The PCI endpoint function driver should use pci_epc_clear_bar() to reset 101 The PCI endpoint function driver should use pci_epc_clear_bar() to reset
94 the BAR. 102 the BAR.
95 103
96*) pci_epc_raise_irq() 104* pci_epc_raise_irq()
97 105
98 The PCI endpoint function driver should use pci_epc_raise_irq() to raise 106 The PCI endpoint function driver should use pci_epc_raise_irq() to raise
99 Legacy Interrupt, MSI or MSI-X Interrupt. 107 Legacy Interrupt, MSI or MSI-X Interrupt.
100 108
101*) pci_epc_mem_alloc_addr() 109* pci_epc_mem_alloc_addr()
102 110
103 The PCI endpoint function driver should use pci_epc_mem_alloc_addr(), to 111 The PCI endpoint function driver should use pci_epc_mem_alloc_addr(), to
104 allocate memory address from EPC addr space which is required to access 112 allocate memory address from EPC addr space which is required to access
105 RC's buffer 113 RC's buffer
106 114
107*) pci_epc_mem_free_addr() 115* pci_epc_mem_free_addr()
108 116
109 The PCI endpoint function driver should use pci_epc_mem_free_addr() to 117 The PCI endpoint function driver should use pci_epc_mem_free_addr() to
110 free the memory space allocated using pci_epc_mem_alloc_addr(). 118 free the memory space allocated using pci_epc_mem_alloc_addr().
111 119
1122.1.3 Other APIs 120Other APIs
121~~~~~~~~~~
113 122
114There are other APIs provided by the EPC library. These are used for binding 123There are other APIs provided by the EPC library. These are used for binding
115the EPF device with EPC device. pci-ep-cfs.c can be used as reference for 124the EPF device with EPC device. pci-ep-cfs.c can be used as reference for
116using these APIs. 125using these APIs.
117 126
118*) pci_epc_get() 127* pci_epc_get()
119 128
120 Get a reference to the PCI endpoint controller based on the device name of 129 Get a reference to the PCI endpoint controller based on the device name of
121 the controller. 130 the controller.
122 131
123*) pci_epc_put() 132* pci_epc_put()
124 133
125 Release the reference to the PCI endpoint controller obtained using 134 Release the reference to the PCI endpoint controller obtained using
126 pci_epc_get() 135 pci_epc_get()
127 136
128*) pci_epc_add_epf() 137* pci_epc_add_epf()
129 138
130 Add a PCI endpoint function to a PCI endpoint controller. A PCIe device 139 Add a PCI endpoint function to a PCI endpoint controller. A PCIe device
131 can have up to 8 functions according to the specification. 140 can have up to 8 functions according to the specification.
132 141
133*) pci_epc_remove_epf() 142* pci_epc_remove_epf()
134 143
135 Remove the PCI endpoint function from PCI endpoint controller. 144 Remove the PCI endpoint function from PCI endpoint controller.
136 145
137*) pci_epc_start() 146* pci_epc_start()
138 147
139 The PCI endpoint function driver should invoke pci_epc_start() once it 148 The PCI endpoint function driver should invoke pci_epc_start() once it
140 has configured the endpoint function and wants to start the PCI link. 149 has configured the endpoint function and wants to start the PCI link.
141 150
142*) pci_epc_stop() 151* pci_epc_stop()
143 152
144 The PCI endpoint function driver should invoke pci_epc_stop() to stop 153 The PCI endpoint function driver should invoke pci_epc_stop() to stop
145 the PCI LINK. 154 the PCI LINK.
146 155
1472.2 PCI Endpoint Function(EPF) Library 156
157PCI Endpoint Function(EPF) Library
158----------------------------------
148 159
149The EPF library provides APIs to be used by the function driver and the EPC 160The EPF library provides APIs to be used by the function driver and the EPC
150library to provide endpoint mode functionality. 161library to provide endpoint mode functionality.
151 162
1522.2.1 APIs for the PCI Endpoint Function Driver 163APIs for the PCI Endpoint Function Driver
164~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
153 165
154This section lists the APIs that the PCI Endpoint core provides to be used 166This section lists the APIs that the PCI Endpoint core provides to be used
155by the PCI endpoint function driver. 167by the PCI endpoint function driver.
156 168
157*) pci_epf_register_driver() 169* pci_epf_register_driver()
158 170
159 The PCI Endpoint Function driver should implement the following ops: 171 The PCI Endpoint Function driver should implement the following ops:
160 * bind: ops to perform when a EPC device has been bound to EPF device 172 * bind: ops to perform when a EPC device has been bound to EPF device
@@ -166,50 +178,54 @@ by the PCI endpoint function driver.
166 The PCI Function driver can then register the PCI EPF driver by using 178 The PCI Function driver can then register the PCI EPF driver by using
167 pci_epf_register_driver(). 179 pci_epf_register_driver().
168 180
169*) pci_epf_unregister_driver() 181* pci_epf_unregister_driver()
170 182
171 The PCI Function driver can unregister the PCI EPF driver by using 183 The PCI Function driver can unregister the PCI EPF driver by using
172 pci_epf_unregister_driver(). 184 pci_epf_unregister_driver().
173 185
174*) pci_epf_alloc_space() 186* pci_epf_alloc_space()
175 187
176 The PCI Function driver can allocate space for a particular BAR using 188 The PCI Function driver can allocate space for a particular BAR using
177 pci_epf_alloc_space(). 189 pci_epf_alloc_space().
178 190
179*) pci_epf_free_space() 191* pci_epf_free_space()
180 192
181 The PCI Function driver can free the allocated space 193 The PCI Function driver can free the allocated space
182 (using pci_epf_alloc_space) by invoking pci_epf_free_space(). 194 (using pci_epf_alloc_space) by invoking pci_epf_free_space().
183 195
1842.2.2 APIs for the PCI Endpoint Controller Library 196APIs for the PCI Endpoint Controller Library
197~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
198
185This section lists the APIs that the PCI Endpoint core provides to be used 199This section lists the APIs that the PCI Endpoint core provides to be used
186by the PCI endpoint controller library. 200by the PCI endpoint controller library.
187 201
188*) pci_epf_linkup() 202* pci_epf_linkup()
189 203
190 The PCI endpoint controller library invokes pci_epf_linkup() when the 204 The PCI endpoint controller library invokes pci_epf_linkup() when the
191 EPC device has established the connection to the host. 205 EPC device has established the connection to the host.
192 206
1932.2.2 Other APIs 207Other APIs
208~~~~~~~~~~
209
194There are other APIs provided by the EPF library. These are used to notify 210There are other APIs provided by the EPF library. These are used to notify
195the function driver when the EPF device is bound to the EPC device. 211the function driver when the EPF device is bound to the EPC device.
196pci-ep-cfs.c can be used as reference for using these APIs. 212pci-ep-cfs.c can be used as reference for using these APIs.
197 213
198*) pci_epf_create() 214* pci_epf_create()
199 215
200 Create a new PCI EPF device by passing the name of the PCI EPF device. 216 Create a new PCI EPF device by passing the name of the PCI EPF device.
201 This name will be used to bind the the EPF device to a EPF driver. 217 This name will be used to bind the the EPF device to a EPF driver.
202 218
203*) pci_epf_destroy() 219* pci_epf_destroy()
204 220
205 Destroy the created PCI EPF device. 221 Destroy the created PCI EPF device.
206 222
207*) pci_epf_bind() 223* pci_epf_bind()
208 224
209 pci_epf_bind() should be invoked when the EPF device has been bound to 225 pci_epf_bind() should be invoked when the EPF device has been bound to
210 a EPC device. 226 a EPC device.
211 227
212*) pci_epf_unbind() 228* pci_epf_unbind()
213 229
214 pci_epf_unbind() should be invoked when the binding between EPC device 230 pci_epf_unbind() should be invoked when the binding between EPC device
215 and EPF device is lost. 231 and EPF device is lost.
diff --git a/Documentation/PCI/endpoint/pci-test-function.txt b/Documentation/PCI/endpoint/pci-test-function.rst
index 5916f1f592bb..3c8521d7aa31 100644
--- a/Documentation/PCI/endpoint/pci-test-function.txt
+++ b/Documentation/PCI/endpoint/pci-test-function.rst
@@ -1,5 +1,10 @@
1 PCI TEST 1.. SPDX-License-Identifier: GPL-2.0
2 Kishon Vijay Abraham I <kishon@ti.com> 2
3=================
4PCI Test Function
5=================
6
7:Author: Kishon Vijay Abraham I <kishon@ti.com>
3 8
4Traditionally PCI RC has always been validated by using standard 9Traditionally PCI RC has always been validated by using standard
5PCI cards like ethernet PCI cards or USB PCI cards or SATA PCI cards. 10PCI cards like ethernet PCI cards or USB PCI cards or SATA PCI cards.
@@ -23,65 +28,76 @@ The PCI endpoint test device has the following registers:
23 8) PCI_ENDPOINT_TEST_IRQ_TYPE 28 8) PCI_ENDPOINT_TEST_IRQ_TYPE
24 9) PCI_ENDPOINT_TEST_IRQ_NUMBER 29 9) PCI_ENDPOINT_TEST_IRQ_NUMBER
25 30
26*) PCI_ENDPOINT_TEST_MAGIC 31* PCI_ENDPOINT_TEST_MAGIC
27 32
28This register will be used to test BAR0. A known pattern will be written 33This register will be used to test BAR0. A known pattern will be written
29and read back from MAGIC register to verify BAR0. 34and read back from MAGIC register to verify BAR0.
30 35
31*) PCI_ENDPOINT_TEST_COMMAND: 36* PCI_ENDPOINT_TEST_COMMAND
32 37
33This register will be used by the host driver to indicate the function 38This register will be used by the host driver to indicate the function
34that the endpoint device must perform. 39that the endpoint device must perform.
35 40
36Bitfield Description: 41======== ================================================================
37 Bit 0 : raise legacy IRQ 42Bitfield Description
38 Bit 1 : raise MSI IRQ 43======== ================================================================
39 Bit 2 : raise MSI-X IRQ 44Bit 0 raise legacy IRQ
40 Bit 3 : read command (read data from RC buffer) 45Bit 1 raise MSI IRQ
41 Bit 4 : write command (write data to RC buffer) 46Bit 2 raise MSI-X IRQ
42 Bit 5 : copy command (copy data from one RC buffer to another 47Bit 3 read command (read data from RC buffer)
43 RC buffer) 48Bit 4 write command (write data to RC buffer)
49Bit 5 copy command (copy data from one RC buffer to another RC buffer)
50======== ================================================================
44 51
45*) PCI_ENDPOINT_TEST_STATUS 52* PCI_ENDPOINT_TEST_STATUS
46 53
47This register reflects the status of the PCI endpoint device. 54This register reflects the status of the PCI endpoint device.
48 55
49Bitfield Description: 56======== ==============================
50 Bit 0 : read success 57Bitfield Description
51 Bit 1 : read fail 58======== ==============================
52 Bit 2 : write success 59Bit 0 read success
53 Bit 3 : write fail 60Bit 1 read fail
54 Bit 4 : copy success 61Bit 2 write success
55 Bit 5 : copy fail 62Bit 3 write fail
56 Bit 6 : IRQ raised 63Bit 4 copy success
57 Bit 7 : source address is invalid 64Bit 5 copy fail
58 Bit 8 : destination address is invalid 65Bit 6 IRQ raised
59 66Bit 7 source address is invalid
60*) PCI_ENDPOINT_TEST_SRC_ADDR 67Bit 8 destination address is invalid
68======== ==============================
69
70* PCI_ENDPOINT_TEST_SRC_ADDR
61 71
62This register contains the source address (RC buffer address) for the 72This register contains the source address (RC buffer address) for the
63COPY/READ command. 73COPY/READ command.
64 74
65*) PCI_ENDPOINT_TEST_DST_ADDR 75* PCI_ENDPOINT_TEST_DST_ADDR
66 76
67This register contains the destination address (RC buffer address) for 77This register contains the destination address (RC buffer address) for
68the COPY/WRITE command. 78the COPY/WRITE command.
69 79
70*) PCI_ENDPOINT_TEST_IRQ_TYPE 80* PCI_ENDPOINT_TEST_IRQ_TYPE
71 81
72This register contains the interrupt type (Legacy/MSI) triggered 82This register contains the interrupt type (Legacy/MSI) triggered
73for the READ/WRITE/COPY and raise IRQ (Legacy/MSI) commands. 83for the READ/WRITE/COPY and raise IRQ (Legacy/MSI) commands.
74 84
75Possible types: 85Possible types:
76 - Legacy : 0
77 - MSI : 1
78 - MSI-X : 2
79 86
80*) PCI_ENDPOINT_TEST_IRQ_NUMBER 87====== ==
88Legacy 0
89MSI 1
90MSI-X 2
91====== ==
92
93* PCI_ENDPOINT_TEST_IRQ_NUMBER
81 94
82This register contains the triggered ID interrupt. 95This register contains the triggered ID interrupt.
83 96
84Admissible values: 97Admissible values:
85 - Legacy : 0 98
86 - MSI : [1 .. 32] 99====== ===========
87 - MSI-X : [1 .. 2048] 100Legacy 0
101MSI [1 .. 32]
102MSI-X [1 .. 2048]
103====== ===========
diff --git a/Documentation/PCI/endpoint/pci-test-howto.txt b/Documentation/PCI/endpoint/pci-test-howto.rst
index 040479f437a5..909f770a07d6 100644
--- a/Documentation/PCI/endpoint/pci-test-howto.txt
+++ b/Documentation/PCI/endpoint/pci-test-howto.rst
@@ -1,38 +1,51 @@
1 PCI TEST USERGUIDE 1.. SPDX-License-Identifier: GPL-2.0
2 Kishon Vijay Abraham I <kishon@ti.com> 2
3===================
4PCI Test User Guide
5===================
6
7:Author: Kishon Vijay Abraham I <kishon@ti.com>
3 8
4This document is a guide to help users use pci-epf-test function driver 9This document is a guide to help users use pci-epf-test function driver
5and pci_endpoint_test host driver for testing PCI. The list of steps to 10and pci_endpoint_test host driver for testing PCI. The list of steps to
6be followed in the host side and EP side is given below. 11be followed in the host side and EP side is given below.
7 12
81. Endpoint Device 13Endpoint Device
14===============
9 15
101.1 Endpoint Controller Devices 16Endpoint Controller Devices
17---------------------------
11 18
12To find the list of endpoint controller devices in the system: 19To find the list of endpoint controller devices in the system::
13 20
14 # ls /sys/class/pci_epc/ 21 # ls /sys/class/pci_epc/
15 51000000.pcie_ep 22 51000000.pcie_ep
16 23
17If PCI_ENDPOINT_CONFIGFS is enabled 24If PCI_ENDPOINT_CONFIGFS is enabled::
25
18 # ls /sys/kernel/config/pci_ep/controllers 26 # ls /sys/kernel/config/pci_ep/controllers
19 51000000.pcie_ep 27 51000000.pcie_ep
20 28
211.2 Endpoint Function Drivers
22 29
23To find the list of endpoint function drivers in the system: 30Endpoint Function Drivers
31-------------------------
32
33To find the list of endpoint function drivers in the system::
24 34
25 # ls /sys/bus/pci-epf/drivers 35 # ls /sys/bus/pci-epf/drivers
26 pci_epf_test 36 pci_epf_test
27 37
28If PCI_ENDPOINT_CONFIGFS is enabled 38If PCI_ENDPOINT_CONFIGFS is enabled::
39
29 # ls /sys/kernel/config/pci_ep/functions 40 # ls /sys/kernel/config/pci_ep/functions
30 pci_epf_test 41 pci_epf_test
31 42
321.3 Creating pci-epf-test Device 43
44Creating pci-epf-test Device
45----------------------------
33 46
34PCI endpoint function device can be created using the configfs. To create 47PCI endpoint function device can be created using the configfs. To create
35pci-epf-test device, the following commands can be used 48pci-epf-test device, the following commands can be used::
36 49
37 # mount -t configfs none /sys/kernel/config 50 # mount -t configfs none /sys/kernel/config
38 # cd /sys/kernel/config/pci_ep/ 51 # cd /sys/kernel/config/pci_ep/
@@ -42,7 +55,7 @@ The "mkdir func1" above creates the pci-epf-test function device that will
42be probed by pci_epf_test driver. 55be probed by pci_epf_test driver.
43 56
44The PCI endpoint framework populates the directory with the following 57The PCI endpoint framework populates the directory with the following
45configurable fields. 58configurable fields::
46 59
47 # ls functions/pci_epf_test/func1 60 # ls functions/pci_epf_test/func1
48 baseclass_code interrupt_pin progif_code subsys_id 61 baseclass_code interrupt_pin progif_code subsys_id
@@ -51,67 +64,83 @@ configurable fields.
51 64
52The PCI endpoint function driver populates these entries with default values 65The PCI endpoint function driver populates these entries with default values
53when the device is bound to the driver. The pci-epf-test driver populates 66when the device is bound to the driver. The pci-epf-test driver populates
54vendorid with 0xffff and interrupt_pin with 0x0001 67vendorid with 0xffff and interrupt_pin with 0x0001::
55 68
56 # cat functions/pci_epf_test/func1/vendorid 69 # cat functions/pci_epf_test/func1/vendorid
57 0xffff 70 0xffff
58 # cat functions/pci_epf_test/func1/interrupt_pin 71 # cat functions/pci_epf_test/func1/interrupt_pin
59 0x0001 72 0x0001
60 73
611.4 Configuring pci-epf-test Device 74
75Configuring pci-epf-test Device
76-------------------------------
62 77
63The user can configure the pci-epf-test device using configfs entry. In order 78The user can configure the pci-epf-test device using configfs entry. In order
64to change the vendorid and the number of MSI interrupts used by the function 79to change the vendorid and the number of MSI interrupts used by the function
65device, the following commands can be used. 80device, the following commands can be used::
66 81
67 # echo 0x104c > functions/pci_epf_test/func1/vendorid 82 # echo 0x104c > functions/pci_epf_test/func1/vendorid
68 # echo 0xb500 > functions/pci_epf_test/func1/deviceid 83 # echo 0xb500 > functions/pci_epf_test/func1/deviceid
69 # echo 16 > functions/pci_epf_test/func1/msi_interrupts 84 # echo 16 > functions/pci_epf_test/func1/msi_interrupts
70 # echo 8 > functions/pci_epf_test/func1/msix_interrupts 85 # echo 8 > functions/pci_epf_test/func1/msix_interrupts
71 86
721.5 Binding pci-epf-test Device to EP Controller 87
88Binding pci-epf-test Device to EP Controller
89--------------------------------------------
73 90
74In order for the endpoint function device to be useful, it has to be bound to 91In order for the endpoint function device to be useful, it has to be bound to
75a PCI endpoint controller driver. Use the configfs to bind the function 92a PCI endpoint controller driver. Use the configfs to bind the function
76device to one of the controller driver present in the system. 93device to one of the controller driver present in the system::
77 94
78 # ln -s functions/pci_epf_test/func1 controllers/51000000.pcie_ep/ 95 # ln -s functions/pci_epf_test/func1 controllers/51000000.pcie_ep/
79 96
80Once the above step is completed, the PCI endpoint is ready to establish a link 97Once the above step is completed, the PCI endpoint is ready to establish a link
81with the host. 98with the host.
82 99
831.6 Start the Link 100
101Start the Link
102--------------
84 103
85In order for the endpoint device to establish a link with the host, the _start_ 104In order for the endpoint device to establish a link with the host, the _start_
86field should be populated with '1'. 105field should be populated with '1'::
87 106
88 # echo 1 > controllers/51000000.pcie_ep/start 107 # echo 1 > controllers/51000000.pcie_ep/start
89 108
902. RootComplex Device
91 109
922.1 lspci Output 110RootComplex Device
111==================
112
113lspci Output
114------------
93 115
94Note that the devices listed here correspond to the value populated in 1.4 above 116Note that the devices listed here correspond to the value populated in 1.4
117above::
95 118
96 00:00.0 PCI bridge: Texas Instruments Device 8888 (rev 01) 119 00:00.0 PCI bridge: Texas Instruments Device 8888 (rev 01)
97 01:00.0 Unassigned class [ff00]: Texas Instruments Device b500 120 01:00.0 Unassigned class [ff00]: Texas Instruments Device b500
98 121
992.2 Using Endpoint Test function Device 122
123Using Endpoint Test function Device
124-----------------------------------
100 125
101pcitest.sh added in tools/pci/ can be used to run all the default PCI endpoint 126pcitest.sh added in tools/pci/ can be used to run all the default PCI endpoint
102tests. To compile this tool the following commands should be used: 127tests. To compile this tool the following commands should be used::
103 128
104 # cd <kernel-dir> 129 # cd <kernel-dir>
105 # make -C tools/pci 130 # make -C tools/pci
106 131
107or if you desire to compile and install in your system: 132or if you desire to compile and install in your system::
108 133
109 # cd <kernel-dir> 134 # cd <kernel-dir>
110 # make -C tools/pci install 135 # make -C tools/pci install
111 136
112The tool and script will be located in <rootfs>/usr/bin/ 137The tool and script will be located in <rootfs>/usr/bin/
113 138
1142.2.1 pcitest.sh Output 139
140pcitest.sh Output
141~~~~~~~~~~~~~~~~~
142::
143
115 # pcitest.sh 144 # pcitest.sh
116 BAR tests 145 BAR tests
117 146
diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst
new file mode 100644
index 000000000000..f4c6121868c3
--- /dev/null
+++ b/Documentation/PCI/index.rst
@@ -0,0 +1,18 @@
1.. SPDX-License-Identifier: GPL-2.0
2
3=======================
4Linux PCI Bus Subsystem
5=======================
6
7.. toctree::
8 :maxdepth: 2
9 :numbered:
10
11 pci
12 picebus-howto
13 pci-iov-howto
14 msi-howto
15 acpi-info
16 pci-error-recovery
17 pcieaer-howto
18 endpoint/index
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/msi-howto.rst
index 618e13d5e276..994cbb660ade 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/msi-howto.rst
@@ -1,13 +1,16 @@
1 The MSI Driver Guide HOWTO 1.. SPDX-License-Identifier: GPL-2.0
2 Tom L Nguyen tom.l.nguyen@intel.com 2.. include:: <isonum.txt>
3 10/03/2003
4 Revised Feb 12, 2004 by Martine Silbermann
5 email: Martine.Silbermann@hp.com
6 Revised Jun 25, 2004 by Tom L Nguyen
7 Revised Jul 9, 2008 by Matthew Wilcox <willy@linux.intel.com>
8 Copyright 2003, 2008 Intel Corporation
9 3
101. About this guide 4==========================
5The MSI Driver Guide HOWTO
6==========================
7
8:Authors: Tom L Nguyen; Martine Silbermann; Matthew Wilcox
9
10:Copyright: 2003, 2008 Intel Corporation
11
12About this guide
13================
11 14
12This guide describes the basics of Message Signaled Interrupts (MSIs), 15This guide describes the basics of Message Signaled Interrupts (MSIs),
13the advantages of using MSI over traditional interrupt mechanisms, how 16the advantages of using MSI over traditional interrupt mechanisms, how
@@ -15,7 +18,8 @@ to change your driver to use MSI or MSI-X and some basic diagnostics to
15try if a device doesn't support MSIs. 18try if a device doesn't support MSIs.
16 19
17 20
182. What are MSIs? 21What are MSIs?
22==============
19 23
20A Message Signaled Interrupt is a write from the device to a special 24A Message Signaled Interrupt is a write from the device to a special
21address which causes an interrupt to be received by the CPU. 25address which causes an interrupt to be received by the CPU.
@@ -29,7 +33,8 @@ Devices may support both MSI and MSI-X, but only one can be enabled at
29a time. 33a time.
30 34
31 35
323. Why use MSIs? 36Why use MSIs?
37=============
33 38
34There are three reasons why using MSIs can give an advantage over 39There are three reasons why using MSIs can give an advantage over
35traditional pin-based interrupts. 40traditional pin-based interrupts.
@@ -61,14 +66,16 @@ Other possible designs include giving one interrupt to each packet queue
61in a network card or each port in a storage controller. 66in a network card or each port in a storage controller.
62 67
63 68
644. How to use MSIs 69How to use MSIs
70===============
65 71
66PCI devices are initialised to use pin-based interrupts. The device 72PCI devices are initialised to use pin-based interrupts. The device
67driver has to set up the device to use MSI or MSI-X. Not all machines 73driver has to set up the device to use MSI or MSI-X. Not all machines
68support MSIs correctly, and for those machines, the APIs described below 74support MSIs correctly, and for those machines, the APIs described below
69will simply fail and the device will continue to use pin-based interrupts. 75will simply fail and the device will continue to use pin-based interrupts.
70 76
714.1 Include kernel support for MSIs 77Include kernel support for MSIs
78-------------------------------
72 79
73To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI 80To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI
74option enabled. This option is only available on some architectures, 81option enabled. This option is only available on some architectures,
@@ -76,14 +83,15 @@ and it may depend on some other options also being set. For example,
76on x86, you must also enable X86_UP_APIC or SMP in order to see the 83on x86, you must also enable X86_UP_APIC or SMP in order to see the
77CONFIG_PCI_MSI option. 84CONFIG_PCI_MSI option.
78 85
794.2 Using MSI 86Using MSI
87---------
80 88
81Most of the hard work is done for the driver in the PCI layer. The driver 89Most of the hard work is done for the driver in the PCI layer. The driver
82simply has to request that the PCI layer set up the MSI capability for this 90simply has to request that the PCI layer set up the MSI capability for this
83device. 91device.
84 92
85To automatically use MSI or MSI-X interrupt vectors, use the following 93To automatically use MSI or MSI-X interrupt vectors, use the following
86function: 94function::
87 95
88 int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, 96 int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
89 unsigned int max_vecs, unsigned int flags); 97 unsigned int max_vecs, unsigned int flags);
@@ -101,12 +109,12 @@ any possible kind of interrupt. If the PCI_IRQ_AFFINITY flag is set,
101pci_alloc_irq_vectors() will spread the interrupts around the available CPUs. 109pci_alloc_irq_vectors() will spread the interrupts around the available CPUs.
102 110
103To get the Linux IRQ numbers passed to request_irq() and free_irq() and the 111To get the Linux IRQ numbers passed to request_irq() and free_irq() and the
104vectors, use the following function: 112vectors, use the following function::
105 113
106 int pci_irq_vector(struct pci_dev *dev, unsigned int nr); 114 int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
107 115
108Any allocated resources should be freed before removing the device using 116Any allocated resources should be freed before removing the device using
109the following function: 117the following function::
110 118
111 void pci_free_irq_vectors(struct pci_dev *dev); 119 void pci_free_irq_vectors(struct pci_dev *dev);
112 120
@@ -126,7 +134,7 @@ The typical usage of MSI or MSI-X interrupts is to allocate as many vectors
126as possible, likely up to the limit supported by the device. If nvec is 134as possible, likely up to the limit supported by the device. If nvec is
127larger than the number supported by the device it will automatically be 135larger than the number supported by the device it will automatically be
128capped to the supported limit, so there is no need to query the number of 136capped to the supported limit, so there is no need to query the number of
129vectors supported beforehand: 137vectors supported beforehand::
130 138
131 nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_ALL_TYPES) 139 nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_ALL_TYPES)
132 if (nvec < 0) 140 if (nvec < 0)
@@ -135,7 +143,7 @@ vectors supported beforehand:
135If a driver is unable or unwilling to deal with a variable number of MSI 143If a driver is unable or unwilling to deal with a variable number of MSI
136interrupts it can request a particular number of interrupts by passing that 144interrupts it can request a particular number of interrupts by passing that
137number to pci_alloc_irq_vectors() function as both 'min_vecs' and 145number to pci_alloc_irq_vectors() function as both 'min_vecs' and
138'max_vecs' parameters: 146'max_vecs' parameters::
139 147
140 ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_ALL_TYPES); 148 ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_ALL_TYPES);
141 if (ret < 0) 149 if (ret < 0)
@@ -143,23 +151,24 @@ number to pci_alloc_irq_vectors() function as both 'min_vecs' and
143 151
144The most notorious example of the request type described above is enabling 152The most notorious example of the request type described above is enabling
145the single MSI mode for a device. It could be done by passing two 1s as 153the single MSI mode for a device. It could be done by passing two 1s as
146'min_vecs' and 'max_vecs': 154'min_vecs' and 'max_vecs'::
147 155
148 ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); 156 ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
149 if (ret < 0) 157 if (ret < 0)
150 goto out_err; 158 goto out_err;
151 159
152Some devices might not support using legacy line interrupts, in which case 160Some devices might not support using legacy line interrupts, in which case
153the driver can specify that only MSI or MSI-X is acceptable: 161the driver can specify that only MSI or MSI-X is acceptable::
154 162
155 nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_MSI | PCI_IRQ_MSIX); 163 nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_MSI | PCI_IRQ_MSIX);
156 if (nvec < 0) 164 if (nvec < 0)
157 goto out_err; 165 goto out_err;
158 166
1594.3 Legacy APIs 167Legacy APIs
168-----------
160 169
161The following old APIs to enable and disable MSI or MSI-X interrupts should 170The following old APIs to enable and disable MSI or MSI-X interrupts should
162not be used in new code: 171not be used in new code::
163 172
164 pci_enable_msi() /* deprecated */ 173 pci_enable_msi() /* deprecated */
165 pci_disable_msi() /* deprecated */ 174 pci_disable_msi() /* deprecated */
@@ -174,9 +183,11 @@ number of vectors. If you have a legitimate special use case for the count
174of vectors we might have to revisit that decision and add a 183of vectors we might have to revisit that decision and add a
175pci_nr_irq_vectors() helper that handles MSI and MSI-X transparently. 184pci_nr_irq_vectors() helper that handles MSI and MSI-X transparently.
176 185
1774.4 Considerations when using MSIs 186Considerations when using MSIs
187------------------------------
178 188
1794.4.1 Spinlocks 189Spinlocks
190~~~~~~~~~
180 191
181Most device drivers have a per-device spinlock which is taken in the 192Most device drivers have a per-device spinlock which is taken in the
182interrupt handler. With pin-based interrupts or a single MSI, it is not 193interrupt handler. With pin-based interrupts or a single MSI, it is not
@@ -188,7 +199,8 @@ acquire the spinlock. Such deadlocks can be avoided by using
188spin_lock_irqsave() or spin_lock_irq() which disable local interrupts 199spin_lock_irqsave() or spin_lock_irq() which disable local interrupts
189and acquire the lock (see Documentation/kernel-hacking/locking.rst). 200and acquire the lock (see Documentation/kernel-hacking/locking.rst).
190 201
1914.5 How to tell whether MSI/MSI-X is enabled on a device 202How to tell whether MSI/MSI-X is enabled on a device
203----------------------------------------------------
192 204
193Using 'lspci -v' (as root) may show some devices with "MSI", "Message 205Using 'lspci -v' (as root) may show some devices with "MSI", "Message
194Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities 206Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities
@@ -196,7 +208,8 @@ has an 'Enable' flag which is followed with either "+" (enabled)
196or "-" (disabled). 208or "-" (disabled).
197 209
198 210
1995. MSI quirks 211MSI quirks
212==========
200 213
201Several PCI chipsets or devices are known not to support MSIs. 214Several PCI chipsets or devices are known not to support MSIs.
202The PCI stack provides three ways to disable MSIs: 215The PCI stack provides three ways to disable MSIs:
@@ -205,7 +218,8 @@ The PCI stack provides three ways to disable MSIs:
2052. on all devices behind a specific bridge 2182. on all devices behind a specific bridge
2063. on a single device 2193. on a single device
207 220
2085.1. Disabling MSIs globally 221Disabling MSIs globally
222-----------------------
209 223
210Some host chipsets simply don't support MSIs properly. If we're 224Some host chipsets simply don't support MSIs properly. If we're
211lucky, the manufacturer knows this and has indicated it in the ACPI 225lucky, the manufacturer knows this and has indicated it in the ACPI
@@ -219,7 +233,8 @@ on the kernel command line to disable MSIs on all devices. It would be
219in your best interests to report the problem to linux-pci@vger.kernel.org 233in your best interests to report the problem to linux-pci@vger.kernel.org
220including a full 'lspci -v' so we can add the quirks to the kernel. 234including a full 'lspci -v' so we can add the quirks to the kernel.
221 235
2225.2. Disabling MSIs below a bridge 236Disabling MSIs below a bridge
237-----------------------------
223 238
224Some PCI bridges are not able to route MSIs between busses properly. 239Some PCI bridges are not able to route MSIs between busses properly.
225In this case, MSIs must be disabled on all devices behind the bridge. 240In this case, MSIs must be disabled on all devices behind the bridge.
@@ -230,7 +245,7 @@ as the nVidia nForce and Serverworks HT2000). As with host chipsets,
230Linux mostly knows about them and automatically enables MSIs if it can. 245Linux mostly knows about them and automatically enables MSIs if it can.
231If you have a bridge unknown to Linux, you can enable 246If you have a bridge unknown to Linux, you can enable
232MSIs in configuration space using whatever method you know works, then 247MSIs in configuration space using whatever method you know works, then
233enable MSIs on that bridge by doing: 248enable MSIs on that bridge by doing::
234 249
235 echo 1 > /sys/bus/pci/devices/$bridge/msi_bus 250 echo 1 > /sys/bus/pci/devices/$bridge/msi_bus
236 251
@@ -244,7 +259,8 @@ below this bridge.
244Again, please notify linux-pci@vger.kernel.org of any bridges that need 259Again, please notify linux-pci@vger.kernel.org of any bridges that need
245special handling. 260special handling.
246 261
2475.3. Disabling MSIs on a single device 262Disabling MSIs on a single device
263---------------------------------
248 264
249Some devices are known to have faulty MSI implementations. Usually this 265Some devices are known to have faulty MSI implementations. Usually this
250is handled in the individual device driver, but occasionally it's necessary 266is handled in the individual device driver, but occasionally it's necessary
@@ -252,7 +268,8 @@ to handle this with a quirk. Some drivers have an option to disable use
252of MSI. While this is a convenient workaround for the driver author, 268of MSI. While this is a convenient workaround for the driver author,
253it is not good practice, and should not be emulated. 269it is not good practice, and should not be emulated.
254 270
2555.4. Finding why MSIs are disabled on a device 271Finding why MSIs are disabled on a device
272-----------------------------------------
256 273
257From the above three sections, you can see that there are many reasons 274From the above three sections, you can see that there are many reasons
258why MSIs may not be enabled for a given device. Your first step should 275why MSIs may not be enabled for a given device. Your first step should
@@ -260,8 +277,8 @@ be to examine your dmesg carefully to determine whether MSIs are enabled
260for your machine. You should also check your .config to be sure you 277for your machine. You should also check your .config to be sure you
261have enabled CONFIG_PCI_MSI. 278have enabled CONFIG_PCI_MSI.
262 279
263Then, 'lspci -t' gives the list of bridges above a device. Reading 280Then, 'lspci -t' gives the list of bridges above a device. Reading
264/sys/bus/pci/devices/*/msi_bus will tell you whether MSIs are enabled (1) 281`/sys/bus/pci/devices/*/msi_bus` will tell you whether MSIs are enabled (1)
265or disabled (0). If 0 is found in any of the msi_bus files belonging 282or disabled (0). If 0 is found in any of the msi_bus files belonging
266to bridges between the PCI root and the device, MSIs are disabled. 283to bridges between the PCI root and the device, MSIs are disabled.
267 284
diff --git a/Documentation/PCI/pci-error-recovery.txt b/Documentation/PCI/pci-error-recovery.rst
index 0b6bb3ef449e..83db42092935 100644
--- a/Documentation/PCI/pci-error-recovery.txt
+++ b/Documentation/PCI/pci-error-recovery.rst
@@ -1,12 +1,13 @@
1.. SPDX-License-Identifier: GPL-2.0
1 2
2 PCI Error Recovery 3==================
3 ------------------ 4PCI Error Recovery
4 February 2, 2006 5==================
5 6
6 Current document maintainer: 7
7 Linas Vepstas <linasvepstas@gmail.com> 8:Authors: - Linas Vepstas <linasvepstas@gmail.com>
8 updated by Richard Lary <rlary@us.ibm.com> 9 - Richard Lary <rlary@us.ibm.com>
9 and Mike Mason <mmlnx@us.ibm.com> on 27-Jul-2009 10 - Mike Mason <mmlnx@us.ibm.com>
10 11
11 12
12Many PCI bus controllers are able to detect a variety of hardware 13Many PCI bus controllers are able to detect a variety of hardware
@@ -63,7 +64,8 @@ mechanisms for dealing with SCSI bus errors and SCSI bus resets.
63 64
64 65
65Detailed Design 66Detailed Design
66--------------- 67===============
68
67Design and implementation details below, based on a chain of 69Design and implementation details below, based on a chain of
68public email discussions with Ben Herrenschmidt, circa 5 April 2005. 70public email discussions with Ben Herrenschmidt, circa 5 April 2005.
69 71
@@ -73,30 +75,33 @@ pci_driver. A driver that fails to provide the structure is "non-aware",
73and the actual recovery steps taken are platform dependent. The 75and the actual recovery steps taken are platform dependent. The
74arch/powerpc implementation will simulate a PCI hotplug remove/add. 76arch/powerpc implementation will simulate a PCI hotplug remove/add.
75 77
76This structure has the form: 78This structure has the form::
77struct pci_error_handlers 79
78{ 80 struct pci_error_handlers
79 int (*error_detected)(struct pci_dev *dev, enum pci_channel_state); 81 {
80 int (*mmio_enabled)(struct pci_dev *dev); 82 int (*error_detected)(struct pci_dev *dev, enum pci_channel_state);
81 int (*slot_reset)(struct pci_dev *dev); 83 int (*mmio_enabled)(struct pci_dev *dev);
82 void (*resume)(struct pci_dev *dev); 84 int (*slot_reset)(struct pci_dev *dev);
83}; 85 void (*resume)(struct pci_dev *dev);
84 86 };
85The possible channel states are: 87
86enum pci_channel_state { 88The possible channel states are::
87 pci_channel_io_normal, /* I/O channel is in normal state */ 89
88 pci_channel_io_frozen, /* I/O to channel is blocked */ 90 enum pci_channel_state {
89 pci_channel_io_perm_failure, /* PCI card is dead */ 91 pci_channel_io_normal, /* I/O channel is in normal state */
90}; 92 pci_channel_io_frozen, /* I/O to channel is blocked */
91 93 pci_channel_io_perm_failure, /* PCI card is dead */
92Possible return values are: 94 };
93enum pci_ers_result { 95
94 PCI_ERS_RESULT_NONE, /* no result/none/not supported in device driver */ 96Possible return values are::
95 PCI_ERS_RESULT_CAN_RECOVER, /* Device driver can recover without slot reset */ 97
96 PCI_ERS_RESULT_NEED_RESET, /* Device driver wants slot to be reset. */ 98 enum pci_ers_result {
97 PCI_ERS_RESULT_DISCONNECT, /* Device has completely failed, is unrecoverable */ 99 PCI_ERS_RESULT_NONE, /* no result/none/not supported in device driver */
98 PCI_ERS_RESULT_RECOVERED, /* Device driver is fully recovered and operational */ 100 PCI_ERS_RESULT_CAN_RECOVER, /* Device driver can recover without slot reset */
99}; 101 PCI_ERS_RESULT_NEED_RESET, /* Device driver wants slot to be reset. */
102 PCI_ERS_RESULT_DISCONNECT, /* Device has completely failed, is unrecoverable */
103 PCI_ERS_RESULT_RECOVERED, /* Device driver is fully recovered and operational */
104 };
100 105
101A driver does not have to implement all of these callbacks; however, 106A driver does not have to implement all of these callbacks; however,
102if it implements any, it must implement error_detected(). If a callback 107if it implements any, it must implement error_detected(). If a callback
@@ -134,16 +139,17 @@ shouldn't do any new IOs. Called in task context. This is sort of a
134 139
135All drivers participating in this system must implement this call. 140All drivers participating in this system must implement this call.
136The driver must return one of the following result codes: 141The driver must return one of the following result codes:
137 - PCI_ERS_RESULT_CAN_RECOVER: 142
138 Driver returns this if it thinks it might be able to recover 143 - PCI_ERS_RESULT_CAN_RECOVER
139 the HW by just banging IOs or if it wants to be given 144 Driver returns this if it thinks it might be able to recover
140 a chance to extract some diagnostic information (see 145 the HW by just banging IOs or if it wants to be given
141 mmio_enable, below). 146 a chance to extract some diagnostic information (see
142 - PCI_ERS_RESULT_NEED_RESET: 147 mmio_enable, below).
143 Driver returns this if it can't recover without a 148 - PCI_ERS_RESULT_NEED_RESET
144 slot reset. 149 Driver returns this if it can't recover without a
145 - PCI_ERS_RESULT_DISCONNECT: 150 slot reset.
146 Driver returns this if it doesn't want to recover at all. 151 - PCI_ERS_RESULT_DISCONNECT
152 Driver returns this if it doesn't want to recover at all.
147 153
148The next step taken will depend on the result codes returned by the 154The next step taken will depend on the result codes returned by the
149drivers. 155drivers.
@@ -159,25 +165,27 @@ then recovery proceeds to STEP 4 (Slot Reset).
159If the platform is unable to recover the slot, the next step 165If the platform is unable to recover the slot, the next step
160is STEP 6 (Permanent Failure). 166is STEP 6 (Permanent Failure).
161 167
162>>> The current powerpc implementation assumes that a device driver will 168.. note::
163>>> *not* schedule or semaphore in this routine; the current powerpc 169
164>>> implementation uses one kernel thread to notify all devices; 170 The current powerpc implementation assumes that a device driver will
165>>> thus, if one device sleeps/schedules, all devices are affected. 171 *not* schedule or semaphore in this routine; the current powerpc
166>>> Doing better requires complex multi-threaded logic in the error 172 implementation uses one kernel thread to notify all devices;
167>>> recovery implementation (e.g. waiting for all notification threads 173 thus, if one device sleeps/schedules, all devices are affected.
168>>> to "join" before proceeding with recovery.) This seems excessively 174 Doing better requires complex multi-threaded logic in the error
169>>> complex and not worth implementing. 175 recovery implementation (e.g. waiting for all notification threads
170 176 to "join" before proceeding with recovery.) This seems excessively
171>>> The current powerpc implementation doesn't much care if the device 177 complex and not worth implementing.
172>>> attempts I/O at this point, or not. I/O's will fail, returning 178
173>>> a value of 0xff on read, and writes will be dropped. If more than 179 The current powerpc implementation doesn't much care if the device
174>>> EEH_MAX_FAILS I/O's are attempted to a frozen adapter, EEH 180 attempts I/O at this point, or not. I/O's will fail, returning
175>>> assumes that the device driver has gone into an infinite loop 181 a value of 0xff on read, and writes will be dropped. If more than
176>>> and prints an error to syslog. A reboot is then required to 182 EEH_MAX_FAILS I/O's are attempted to a frozen adapter, EEH
177>>> get the device working again. 183 assumes that the device driver has gone into an infinite loop
184 and prints an error to syslog. A reboot is then required to
185 get the device working again.
178 186
179STEP 2: MMIO Enabled 187STEP 2: MMIO Enabled
180------------------- 188--------------------
181The platform re-enables MMIO to the device (but typically not the 189The platform re-enables MMIO to the device (but typically not the
182DMA), and then calls the mmio_enabled() callback on all affected 190DMA), and then calls the mmio_enabled() callback on all affected
183device drivers. 191device drivers.
@@ -192,34 +200,36 @@ link reset was performed by the HW. If the platform can't just re-enable IOs
192without a slot reset or a link reset, it will not call this callback, and 200without a slot reset or a link reset, it will not call this callback, and
193instead will have gone directly to STEP 3 (Link Reset) or STEP 4 (Slot Reset) 201instead will have gone directly to STEP 3 (Link Reset) or STEP 4 (Slot Reset)
194 202
195>>> The following is proposed; no platform implements this yet: 203.. note::
196>>> Proposal: All I/O's should be done _synchronously_ from within 204
197>>> this callback, errors triggered by them will be returned via 205 The following is proposed; no platform implements this yet:
198>>> the normal pci_check_whatever() API, no new error_detected() 206 Proposal: All I/O's should be done _synchronously_ from within
199>>> callback will be issued due to an error happening here. However, 207 this callback, errors triggered by them will be returned via
200>>> such an error might cause IOs to be re-blocked for the whole 208 the normal pci_check_whatever() API, no new error_detected()
201>>> segment, and thus invalidate the recovery that other devices 209 callback will be issued due to an error happening here. However,
202>>> on the same segment might have done, forcing the whole segment 210 such an error might cause IOs to be re-blocked for the whole
203>>> into one of the next states, that is, link reset or slot reset. 211 segment, and thus invalidate the recovery that other devices
212 on the same segment might have done, forcing the whole segment
213 into one of the next states, that is, link reset or slot reset.
204 214
205The driver should return one of the following result codes: 215The driver should return one of the following result codes:
206 - PCI_ERS_RESULT_RECOVERED 216 - PCI_ERS_RESULT_RECOVERED
207 Driver returns this if it thinks the device is fully 217 Driver returns this if it thinks the device is fully
208 functional and thinks it is ready to start 218 functional and thinks it is ready to start
209 normal driver operations again. There is no 219 normal driver operations again. There is no
210 guarantee that the driver will actually be 220 guarantee that the driver will actually be
211 allowed to proceed, as another driver on the 221 allowed to proceed, as another driver on the
212 same segment might have failed and thus triggered a 222 same segment might have failed and thus triggered a
213 slot reset on platforms that support it. 223 slot reset on platforms that support it.
214 224
215 - PCI_ERS_RESULT_NEED_RESET 225 - PCI_ERS_RESULT_NEED_RESET
216 Driver returns this if it thinks the device is not 226 Driver returns this if it thinks the device is not
217 recoverable in its current state and it needs a slot 227 recoverable in its current state and it needs a slot
218 reset to proceed. 228 reset to proceed.
219 229
220 - PCI_ERS_RESULT_DISCONNECT 230 - PCI_ERS_RESULT_DISCONNECT
221 Same as above. Total failure, no recovery even after 231 Same as above. Total failure, no recovery even after
222 reset driver dead. (To be defined more precisely) 232 reset driver dead. (To be defined more precisely)
223 233
224The next step taken depends on the results returned by the drivers. 234The next step taken depends on the results returned by the drivers.
225If all drivers returned PCI_ERS_RESULT_RECOVERED, then the platform 235If all drivers returned PCI_ERS_RESULT_RECOVERED, then the platform
@@ -293,31 +303,33 @@ device will be considered "dead" in this case.
293Drivers for multi-function cards will need to coordinate among 303Drivers for multi-function cards will need to coordinate among
294themselves as to which driver instance will perform any "one-shot" 304themselves as to which driver instance will perform any "one-shot"
295or global device initialization. For example, the Symbios sym53cxx2 305or global device initialization. For example, the Symbios sym53cxx2
296driver performs device init only from PCI function 0: 306driver performs device init only from PCI function 0::
297 307
298+ if (PCI_FUNC(pdev->devfn) == 0) 308 + if (PCI_FUNC(pdev->devfn) == 0)
299+ sym_reset_scsi_bus(np, 0); 309 + sym_reset_scsi_bus(np, 0);
300 310
301 Result codes: 311Result codes:
302 - PCI_ERS_RESULT_DISCONNECT 312 - PCI_ERS_RESULT_DISCONNECT
303 Same as above. 313 Same as above.
304 314
305Drivers for PCI Express cards that require a fundamental reset must 315Drivers for PCI Express cards that require a fundamental reset must
306set the needs_freset bit in the pci_dev structure in their probe function. 316set the needs_freset bit in the pci_dev structure in their probe function.
307For example, the QLogic qla2xxx driver sets the needs_freset bit for certain 317For example, the QLogic qla2xxx driver sets the needs_freset bit for certain
308PCI card types: 318PCI card types::
309 319
310+ /* Set EEH reset type to fundamental if required by hba */ 320 + /* Set EEH reset type to fundamental if required by hba */
311+ if (IS_QLA24XX(ha) || IS_QLA25XX(ha) || IS_QLA81XX(ha)) 321 + if (IS_QLA24XX(ha) || IS_QLA25XX(ha) || IS_QLA81XX(ha))
312+ pdev->needs_freset = 1; 322 + pdev->needs_freset = 1;
313+ 323 +
314 324
315Platform proceeds either to STEP 5 (Resume Operations) or STEP 6 (Permanent 325Platform proceeds either to STEP 5 (Resume Operations) or STEP 6 (Permanent
316Failure). 326Failure).
317 327
318>>> The current powerpc implementation does not try a power-cycle 328.. note::
319>>> reset if the driver returned PCI_ERS_RESULT_DISCONNECT. 329
320>>> However, it probably should. 330 The current powerpc implementation does not try a power-cycle
331 reset if the driver returned PCI_ERS_RESULT_DISCONNECT.
332 However, it probably should.
321 333
322 334
323STEP 5: Resume Operations 335STEP 5: Resume Operations
@@ -370,44 +382,43 @@ The current policy is to turn this into a platform policy.
370That is, the recovery API only requires that: 382That is, the recovery API only requires that:
371 383
372 - There is no guarantee that interrupt delivery can proceed from any 384 - There is no guarantee that interrupt delivery can proceed from any
373device on the segment starting from the error detection and until the 385 device on the segment starting from the error detection and until the
374slot_reset callback is called, at which point interrupts are expected 386 slot_reset callback is called, at which point interrupts are expected
375to be fully operational. 387 to be fully operational.
376 388
377 - There is no guarantee that interrupt delivery is stopped, that is, 389 - There is no guarantee that interrupt delivery is stopped, that is,
378a driver that gets an interrupt after detecting an error, or that detects 390 a driver that gets an interrupt after detecting an error, or that detects
379an error within the interrupt handler such that it prevents proper 391 an error within the interrupt handler such that it prevents proper
380ack'ing of the interrupt (and thus removal of the source) should just 392 ack'ing of the interrupt (and thus removal of the source) should just
381return IRQ_NOTHANDLED. It's up to the platform to deal with that 393 return IRQ_NOTHANDLED. It's up to the platform to deal with that
382condition, typically by masking the IRQ source during the duration of 394 condition, typically by masking the IRQ source during the duration of
383the error handling. It is expected that the platform "knows" which 395 the error handling. It is expected that the platform "knows" which
384interrupts are routed to error-management capable slots and can deal 396 interrupts are routed to error-management capable slots and can deal
385with temporarily disabling that IRQ number during error processing (this 397 with temporarily disabling that IRQ number during error processing (this
386isn't terribly complex). That means some IRQ latency for other devices 398 isn't terribly complex). That means some IRQ latency for other devices
387sharing the interrupt, but there is simply no other way. High end 399 sharing the interrupt, but there is simply no other way. High end
388platforms aren't supposed to share interrupts between many devices 400 platforms aren't supposed to share interrupts between many devices
389anyway :) 401 anyway :)
390 402
391>>> Implementation details for the powerpc platform are discussed in 403.. note::
392>>> the file Documentation/powerpc/eeh-pci-error-recovery.txt 404
393 405 Implementation details for the powerpc platform are discussed in
394>>> As of this writing, there is a growing list of device drivers with 406 the file Documentation/powerpc/eeh-pci-error-recovery.txt
395>>> patches implementing error recovery. Not all of these patches are in 407
396>>> mainline yet. These may be used as "examples": 408 As of this writing, there is a growing list of device drivers with
397>>> 409 patches implementing error recovery. Not all of these patches are in
398>>> drivers/scsi/ipr 410 mainline yet. These may be used as "examples":
399>>> drivers/scsi/sym53c8xx_2 411
400>>> drivers/scsi/qla2xxx 412 - drivers/scsi/ipr
401>>> drivers/scsi/lpfc 413 - drivers/scsi/sym53c8xx_2
402>>> drivers/next/bnx2.c 414 - drivers/scsi/qla2xxx
403>>> drivers/next/e100.c 415 - drivers/scsi/lpfc
404>>> drivers/net/e1000 416 - drivers/next/bnx2.c
405>>> drivers/net/e1000e 417 - drivers/next/e100.c
406>>> drivers/net/ixgb 418 - drivers/net/e1000
407>>> drivers/net/ixgbe 419 - drivers/net/e1000e
408>>> drivers/net/cxgb3 420 - drivers/net/ixgb
409>>> drivers/net/s2io.c 421 - drivers/net/ixgbe
410>>> drivers/net/qlge 422 - drivers/net/cxgb3
411 423 - drivers/net/s2io.c
412The End 424 - drivers/net/qlge
413-------
diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.rst
index d2a84151e99c..b9fd003206f1 100644
--- a/Documentation/PCI/pci-iov-howto.txt
+++ b/Documentation/PCI/pci-iov-howto.rst
@@ -1,14 +1,19 @@
1 PCI Express I/O Virtualization Howto 1.. SPDX-License-Identifier: GPL-2.0
2 Copyright (C) 2009 Intel Corporation 2.. include:: <isonum.txt>
3 Yu Zhao <yu.zhao@intel.com>
4 3
5 Update: November 2012 4====================================
6 -- sysfs-based SRIOV enable-/disable-ment 5PCI Express I/O Virtualization Howto
7 Donald Dutile <ddutile@redhat.com> 6====================================
8 7
91. Overview 8:Copyright: |copy| 2009 Intel Corporation
9:Authors: - Yu Zhao <yu.zhao@intel.com>
10 - Donald Dutile <ddutile@redhat.com>
10 11
111.1 What is SR-IOV 12Overview
13========
14
15What is SR-IOV
16--------------
12 17
13Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended 18Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended
14capability which makes one physical device appear as multiple virtual 19capability which makes one physical device appear as multiple virtual
@@ -23,9 +28,11 @@ Memory Space, which is used to map its register set. VF device driver
23operates on the register set so it can be functional and appear as a 28operates on the register set so it can be functional and appear as a
24real existing PCI device. 29real existing PCI device.
25 30
262. User Guide 31User Guide
32==========
27 33
282.1 How can I enable SR-IOV capability 34How can I enable SR-IOV capability
35----------------------------------
29 36
30Multiple methods are available for SR-IOV enablement. 37Multiple methods are available for SR-IOV enablement.
31In the first method, the device driver (PF driver) will control the 38In the first method, the device driver (PF driver) will control the
@@ -43,105 +50,123 @@ checks, e.g., check numvfs == 0 if enabling VFs, ensure
43numvfs <= totalvfs. 50numvfs <= totalvfs.
44The second method is the recommended method for new/future VF devices. 51The second method is the recommended method for new/future VF devices.
45 52
462.2 How can I use the Virtual Functions 53How can I use the Virtual Functions
54-----------------------------------
47 55
48The VF is treated as hot-plugged PCI devices in the kernel, so they 56The VF is treated as hot-plugged PCI devices in the kernel, so they
49should be able to work in the same way as real PCI devices. The VF 57should be able to work in the same way as real PCI devices. The VF
50requires device driver that is same as a normal PCI device's. 58requires device driver that is same as a normal PCI device's.
51 59
523. Developer Guide 60Developer Guide
61===============
53 62
543.1 SR-IOV API 63SR-IOV API
64----------
55 65
56To enable SR-IOV capability: 66To enable SR-IOV capability:
57(a) For the first method, in the driver: 67
68(a) For the first method, in the driver::
69
58 int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); 70 int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
59 'nr_virtfn' is number of VFs to be enabled. 71
60(b) For the second method, from sysfs: 72'nr_virtfn' is number of VFs to be enabled.
73
74(b) For the second method, from sysfs::
75
61 echo 'nr_virtfn' > \ 76 echo 'nr_virtfn' > \
62 /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs 77 /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
63 78
64To disable SR-IOV capability: 79To disable SR-IOV capability:
65(a) For the first method, in the driver: 80
81(a) For the first method, in the driver::
82
66 void pci_disable_sriov(struct pci_dev *dev); 83 void pci_disable_sriov(struct pci_dev *dev);
67(b) For the second method, from sysfs: 84
85(b) For the second method, from sysfs::
86
68 echo 0 > \ 87 echo 0 > \
69 /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs 88 /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
70 89
71To enable auto probing VFs by a compatible driver on the host, run 90To enable auto probing VFs by a compatible driver on the host, run
72command below before enabling SR-IOV capabilities. This is the 91command below before enabling SR-IOV capabilities. This is the
73default behavior. 92default behavior.
93::
94
74 echo 1 > \ 95 echo 1 > \
75 /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe 96 /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe
76 97
77To disable auto probing VFs by a compatible driver on the host, run 98To disable auto probing VFs by a compatible driver on the host, run
78command below before enabling SR-IOV capabilities. Updating this 99command below before enabling SR-IOV capabilities. Updating this
79entry will not affect VFs which are already probed. 100entry will not affect VFs which are already probed.
101::
102
80 echo 0 > \ 103 echo 0 > \
81 /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe 104 /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe
82 105
833.2 Usage example 106Usage example
107-------------
84 108
85Following piece of code illustrates the usage of the SR-IOV API. 109Following piece of code illustrates the usage of the SR-IOV API.
110::
86 111
87static int dev_probe(struct pci_dev *dev, const struct pci_device_id *id) 112 static int dev_probe(struct pci_dev *dev, const struct pci_device_id *id)
88{ 113 {
89 pci_enable_sriov(dev, NR_VIRTFN); 114 pci_enable_sriov(dev, NR_VIRTFN);
90 115
91 ... 116 ...
92
93 return 0;
94}
95 117
96static void dev_remove(struct pci_dev *dev) 118 return 0;
97{ 119 }
98 pci_disable_sriov(dev);
99 120
100 ... 121 static void dev_remove(struct pci_dev *dev)
101} 122 {
123 pci_disable_sriov(dev);
102 124
103static int dev_suspend(struct pci_dev *dev, pm_message_t state) 125 ...
104{ 126 }
105 ...
106 127
107 return 0; 128 static int dev_suspend(struct pci_dev *dev, pm_message_t state)
108} 129 {
130 ...
109 131
110static int dev_resume(struct pci_dev *dev) 132 return 0;
111{ 133 }
112 ...
113 134
114 return 0; 135 static int dev_resume(struct pci_dev *dev)
115} 136 {
137 ...
116 138
117static void dev_shutdown(struct pci_dev *dev) 139 return 0;
118{ 140 }
119 ...
120}
121 141
122static int dev_sriov_configure(struct pci_dev *dev, int numvfs) 142 static void dev_shutdown(struct pci_dev *dev)
123{ 143 {
124 if (numvfs > 0) {
125 ...
126 pci_enable_sriov(dev, numvfs);
127 ... 144 ...
128 return numvfs;
129 } 145 }
130 if (numvfs == 0) { 146
131 .... 147 static int dev_sriov_configure(struct pci_dev *dev, int numvfs)
132 pci_disable_sriov(dev); 148 {
133 ... 149 if (numvfs > 0) {
134 return 0; 150 ...
151 pci_enable_sriov(dev, numvfs);
152 ...
153 return numvfs;
154 }
155 if (numvfs == 0) {
156 ....
157 pci_disable_sriov(dev);
158 ...
159 return 0;
160 }
135 } 161 }
136} 162
137 163 static struct pci_driver dev_driver = {
138static struct pci_driver dev_driver = { 164 .name = "SR-IOV Physical Function driver",
139 .name = "SR-IOV Physical Function driver", 165 .id_table = dev_id_table,
140 .id_table = dev_id_table, 166 .probe = dev_probe,
141 .probe = dev_probe, 167 .remove = dev_remove,
142 .remove = dev_remove, 168 .suspend = dev_suspend,
143 .suspend = dev_suspend, 169 .resume = dev_resume,
144 .resume = dev_resume, 170 .shutdown = dev_shutdown,
145 .shutdown = dev_shutdown, 171 .sriov_configure = dev_sriov_configure,
146 .sriov_configure = dev_sriov_configure, 172 };
147};
diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.rst
index badb26ac33dc..6864f9a70f5f 100644
--- a/Documentation/PCI/pci.txt
+++ b/Documentation/PCI/pci.rst
@@ -1,10 +1,12 @@
1.. SPDX-License-Identifier: GPL-2.0
1 2
2 How To Write Linux PCI Drivers 3==============================
4How To Write Linux PCI Drivers
5==============================
3 6
4 by Martin Mares <mj@ucw.cz> on 07-Feb-2000 7:Authors: - Martin Mares <mj@ucw.cz>
5 updated by Grant Grundler <grundler@parisc-linux.org> on 23-Dec-2006 8 - Grant Grundler <grundler@parisc-linux.org>
6 9
7~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8The world of PCI is vast and full of (mostly unpleasant) surprises. 10The world of PCI is vast and full of (mostly unpleasant) surprises.
9Since each CPU architecture implements different chip-sets and PCI devices 11Since each CPU architecture implements different chip-sets and PCI devices
10have different requirements (erm, "features"), the result is the PCI support 12have different requirements (erm, "features"), the result is the PCI support
@@ -15,8 +17,7 @@ PCI device drivers.
15A more complete resource is the third edition of "Linux Device Drivers" 17A more complete resource is the third edition of "Linux Device Drivers"
16by Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman. 18by Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman.
17LDD3 is available for free (under Creative Commons License) from: 19LDD3 is available for free (under Creative Commons License) from:
18 20http://lwn.net/Kernel/LDD3/.
19 http://lwn.net/Kernel/LDD3/
20 21
21However, keep in mind that all documents are subject to "bit rot". 22However, keep in mind that all documents are subject to "bit rot".
22Refer to the source code if things are not working as described here. 23Refer to the source code if things are not working as described here.
@@ -25,9 +26,8 @@ Please send questions/comments/patches about Linux PCI API to the
25"Linux PCI" <linux-pci@atrey.karlin.mff.cuni.cz> mailing list. 26"Linux PCI" <linux-pci@atrey.karlin.mff.cuni.cz> mailing list.
26 27
27 28
28 29Structure of PCI drivers
290. Structure of PCI drivers 30========================
30~~~~~~~~~~~~~~~~~~~~~~~~~~~
31PCI drivers "discover" PCI devices in a system via pci_register_driver(). 31PCI drivers "discover" PCI devices in a system via pci_register_driver().
32Actually, it's the other way around. When the PCI generic code discovers 32Actually, it's the other way around. When the PCI generic code discovers
33a new device, the driver with a matching "description" will be notified. 33a new device, the driver with a matching "description" will be notified.
@@ -42,24 +42,25 @@ pointers and thus dictates the high level structure of a driver.
42Once the driver knows about a PCI device and takes ownership, the 42Once the driver knows about a PCI device and takes ownership, the
43driver generally needs to perform the following initialization: 43driver generally needs to perform the following initialization:
44 44
45 Enable the device 45 - Enable the device
46 Request MMIO/IOP resources 46 - Request MMIO/IOP resources
47 Set the DMA mask size (for both coherent and streaming DMA) 47 - Set the DMA mask size (for both coherent and streaming DMA)
48 Allocate and initialize shared control data (pci_allocate_coherent()) 48 - Allocate and initialize shared control data (pci_allocate_coherent())
49 Access device configuration space (if needed) 49 - Access device configuration space (if needed)
50 Register IRQ handler (request_irq()) 50 - Register IRQ handler (request_irq())
51 Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip) 51 - Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip)
52 Enable DMA/processing engines 52 - Enable DMA/processing engines
53 53
54When done using the device, and perhaps the module needs to be unloaded, 54When done using the device, and perhaps the module needs to be unloaded,
55the driver needs to take the follow steps: 55the driver needs to take the follow steps:
56 Disable the device from generating IRQs 56
57 Release the IRQ (free_irq()) 57 - Disable the device from generating IRQs
58 Stop all DMA activity 58 - Release the IRQ (free_irq())
59 Release DMA buffers (both streaming and coherent) 59 - Stop all DMA activity
60 Unregister from other subsystems (e.g. scsi or netdev) 60 - Release DMA buffers (both streaming and coherent)
61 Release MMIO/IOP resources 61 - Unregister from other subsystems (e.g. scsi or netdev)
62 Disable the device 62 - Release MMIO/IOP resources
63 - Disable the device
63 64
64Most of these topics are covered in the following sections. 65Most of these topics are covered in the following sections.
65For the rest look at LDD3 or <linux/pci.h> . 66For the rest look at LDD3 or <linux/pci.h> .
@@ -70,99 +71,38 @@ completely empty or just returning an appropriate error codes to avoid
70lots of ifdefs in the drivers. 71lots of ifdefs in the drivers.
71 72
72 73
74pci_register_driver() call
75==========================
73 76
741. pci_register_driver() call 77PCI device drivers call ``pci_register_driver()`` during their
75~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
76
77PCI device drivers call pci_register_driver() during their
78initialization with a pointer to a structure describing the driver 78initialization with a pointer to a structure describing the driver
79(struct pci_driver): 79(``struct pci_driver``):
80
81 field name Description
82 ---------- ------------------------------------------------------
83 id_table Pointer to table of device ID's the driver is
84 interested in. Most drivers should export this
85 table using MODULE_DEVICE_TABLE(pci,...).
86
87 probe This probing function gets called (during execution
88 of pci_register_driver() for already existing
89 devices or later if a new device gets inserted) for
90 all PCI devices which match the ID table and are not
91 "owned" by the other drivers yet. This function gets
92 passed a "struct pci_dev *" for each device whose
93 entry in the ID table matches the device. The probe
94 function returns zero when the driver chooses to
95 take "ownership" of the device or an error code
96 (negative number) otherwise.
97 The probe function always gets called from process
98 context, so it can sleep.
99
100 remove The remove() function gets called whenever a device
101 being handled by this driver is removed (either during
102 deregistration of the driver or when it's manually
103 pulled out of a hot-pluggable slot).
104 The remove function always gets called from process
105 context, so it can sleep.
106
107 suspend Put device into low power state.
108 suspend_late Put device into low power state.
109
110 resume_early Wake device from low power state.
111 resume Wake device from low power state.
112
113 (Please see Documentation/power/pci.txt for descriptions
114 of PCI Power Management and the related functions.)
115
116 shutdown Hook into reboot_notifier_list (kernel/sys.c).
117 Intended to stop any idling DMA operations.
118 Useful for enabling wake-on-lan (NIC) or changing
119 the power state of a device before reboot.
120 e.g. drivers/net/e100.c.
121
122 err_handler See Documentation/PCI/pci-error-recovery.txt
123
124
125The ID table is an array of struct pci_device_id entries ending with an
126all-zero entry. Definitions with static const are generally preferred.
127
128Each entry consists of:
129
130 vendor,device Vendor and device ID to match (or PCI_ANY_ID)
131 80
132 subvendor, Subsystem vendor and device ID to match (or PCI_ANY_ID) 81.. kernel-doc:: include/linux/pci.h
133 subdevice, 82 :functions: pci_driver
134 83
135 class Device class, subclass, and "interface" to match. 84The ID table is an array of ``struct pci_device_id`` entries ending with an
136 See Appendix D of the PCI Local Bus Spec or 85all-zero entry. Definitions with static const are generally preferred.
137 include/linux/pci_ids.h for a full list of classes.
138 Most drivers do not need to specify class/class_mask
139 as vendor/device is normally sufficient.
140
141 class_mask limit which sub-fields of the class field are compared.
142 See drivers/scsi/sym53c8xx_2/ for example of usage.
143
144 driver_data Data private to the driver.
145 Most drivers don't need to use driver_data field.
146 Best practice is to use driver_data as an index
147 into a static list of equivalent device types,
148 instead of using it as a pointer.
149 86
87.. kernel-doc:: include/linux/mod_devicetable.h
88 :functions: pci_device_id
150 89
151Most drivers only need PCI_DEVICE() or PCI_DEVICE_CLASS() to set up 90Most drivers only need ``PCI_DEVICE()`` or ``PCI_DEVICE_CLASS()`` to set up
152a pci_device_id table. 91a pci_device_id table.
153 92
154New PCI IDs may be added to a device driver pci_ids table at runtime 93New PCI IDs may be added to a device driver pci_ids table at runtime
155as shown below: 94as shown below::
156 95
157echo "vendor device subvendor subdevice class class_mask driver_data" > \ 96 echo "vendor device subvendor subdevice class class_mask driver_data" > \
158/sys/bus/pci/drivers/{driver}/new_id 97 /sys/bus/pci/drivers/{driver}/new_id
159 98
160All fields are passed in as hexadecimal values (no leading 0x). 99All fields are passed in as hexadecimal values (no leading 0x).
161The vendor and device fields are mandatory, the others are optional. Users 100The vendor and device fields are mandatory, the others are optional. Users
162need pass only as many optional fields as necessary: 101need pass only as many optional fields as necessary:
163 o subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF) 102
164 o class and classmask fields default to 0 103 - subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF)
165 o driver_data defaults to 0UL. 104 - class and classmask fields default to 0
105 - driver_data defaults to 0UL.
166 106
167Note that driver_data must match the value used by any of the pci_device_id 107Note that driver_data must match the value used by any of the pci_device_id
168entries defined in the driver. This makes the driver_data field mandatory 108entries defined in the driver. This makes the driver_data field mandatory
@@ -175,29 +115,31 @@ When the driver exits, it just calls pci_unregister_driver() and the PCI layer
175automatically calls the remove hook for all devices handled by the driver. 115automatically calls the remove hook for all devices handled by the driver.
176 116
177 117
1781.1 "Attributes" for driver functions/data 118"Attributes" for driver functions/data
119--------------------------------------
179 120
180Please mark the initialization and cleanup functions where appropriate 121Please mark the initialization and cleanup functions where appropriate
181(the corresponding macros are defined in <linux/init.h>): 122(the corresponding macros are defined in <linux/init.h>):
182 123
124 ====== =================================================
183 __init Initialization code. Thrown away after the driver 125 __init Initialization code. Thrown away after the driver
184 initializes. 126 initializes.
185 __exit Exit code. Ignored for non-modular drivers. 127 __exit Exit code. Ignored for non-modular drivers.
128 ====== =================================================
186 129
187Tips on when/where to use the above attributes: 130Tips on when/where to use the above attributes:
188 o The module_init()/module_exit() functions (and all 131 - The module_init()/module_exit() functions (and all
189 initialization functions called _only_ from these) 132 initialization functions called _only_ from these)
190 should be marked __init/__exit. 133 should be marked __init/__exit.
191 134
192 o Do not mark the struct pci_driver. 135 - Do not mark the struct pci_driver.
193 136
194 o Do NOT mark a function if you are not sure which mark to use. 137 - Do NOT mark a function if you are not sure which mark to use.
195 Better to not mark the function than mark the function wrong. 138 Better to not mark the function than mark the function wrong.
196 139
197 140
198 141How to find PCI devices manually
1992. How to find PCI devices manually 142================================
200~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
201 143
202PCI drivers should have a really good reason for not using the 144PCI drivers should have a really good reason for not using the
203pci_register_driver() interface to search for PCI devices. 145pci_register_driver() interface to search for PCI devices.
@@ -207,17 +149,17 @@ E.g. combined serial/parallel port/floppy controller.
207 149
208A manual search may be performed using the following constructs: 150A manual search may be performed using the following constructs:
209 151
210Searching by vendor and device ID: 152Searching by vendor and device ID::
211 153
212 struct pci_dev *dev = NULL; 154 struct pci_dev *dev = NULL;
213 while (dev = pci_get_device(VENDOR_ID, DEVICE_ID, dev)) 155 while (dev = pci_get_device(VENDOR_ID, DEVICE_ID, dev))
214 configure_device(dev); 156 configure_device(dev);
215 157
216Searching by class ID (iterate in a similar way): 158Searching by class ID (iterate in a similar way)::
217 159
218 pci_get_class(CLASS_ID, dev) 160 pci_get_class(CLASS_ID, dev)
219 161
220Searching by both vendor/device and subsystem vendor/device ID: 162Searching by both vendor/device and subsystem vendor/device ID::
221 163
222 pci_get_subsys(VENDOR_ID,DEVICE_ID, SUBSYS_VENDOR_ID, SUBSYS_DEVICE_ID, dev). 164 pci_get_subsys(VENDOR_ID,DEVICE_ID, SUBSYS_VENDOR_ID, SUBSYS_DEVICE_ID, dev).
223 165
@@ -230,21 +172,20 @@ the pci_dev that they return. You must eventually (possibly at module unload)
230decrement the reference count on these devices by calling pci_dev_put(). 172decrement the reference count on these devices by calling pci_dev_put().
231 173
232 174
233 175Device Initialization Steps
2343. Device Initialization Steps 176===========================
235~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
236 177
237As noted in the introduction, most PCI drivers need the following steps 178As noted in the introduction, most PCI drivers need the following steps
238for device initialization: 179for device initialization:
239 180
240 Enable the device 181 - Enable the device
241 Request MMIO/IOP resources 182 - Request MMIO/IOP resources
242 Set the DMA mask size (for both coherent and streaming DMA) 183 - Set the DMA mask size (for both coherent and streaming DMA)
243 Allocate and initialize shared control data (pci_allocate_coherent()) 184 - Allocate and initialize shared control data (pci_allocate_coherent())
244 Access device configuration space (if needed) 185 - Access device configuration space (if needed)
245 Register IRQ handler (request_irq()) 186 - Register IRQ handler (request_irq())
246 Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip) 187 - Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip)
247 Enable DMA/processing engines. 188 - Enable DMA/processing engines.
248 189
249The driver can access PCI config space registers at any time. 190The driver can access PCI config space registers at any time.
250(Well, almost. When running BIST, config space can go away...but 191(Well, almost. When running BIST, config space can go away...but
@@ -252,26 +193,29 @@ that will just result in a PCI Bus Master Abort and config reads
252will return garbage). 193will return garbage).
253 194
254 195
2553.1 Enable the PCI device 196Enable the PCI device
256~~~~~~~~~~~~~~~~~~~~~~~~~ 197---------------------
257Before touching any device registers, the driver needs to enable 198Before touching any device registers, the driver needs to enable
258the PCI device by calling pci_enable_device(). This will: 199the PCI device by calling pci_enable_device(). This will:
259 o wake up the device if it was in suspended state,
260 o allocate I/O and memory regions of the device (if BIOS did not),
261 o allocate an IRQ (if BIOS did not).
262 200
263NOTE: pci_enable_device() can fail! Check the return value. 201 - wake up the device if it was in suspended state,
202 - allocate I/O and memory regions of the device (if BIOS did not),
203 - allocate an IRQ (if BIOS did not).
264 204
265[ OS BUG: we don't check resource allocations before enabling those 205.. note::
266 resources. The sequence would make more sense if we called 206 pci_enable_device() can fail! Check the return value.
267 pci_request_resources() before calling pci_enable_device(). 207
268 Currently, the device drivers can't detect the bug when when two 208.. warning::
269 devices have been allocated the same range. This is not a common 209 OS BUG: we don't check resource allocations before enabling those
270 problem and unlikely to get fixed soon. 210 resources. The sequence would make more sense if we called
211 pci_request_resources() before calling pci_enable_device().
212 Currently, the device drivers can't detect the bug when when two
213 devices have been allocated the same range. This is not a common
214 problem and unlikely to get fixed soon.
215
216 This has been discussed before but not changed as of 2.6.19:
217 http://lkml.org/lkml/2006/3/2/194
271 218
272 This has been discussed before but not changed as of 2.6.19:
273 http://lkml.org/lkml/2006/3/2/194
274]
275 219
276pci_set_master() will enable DMA by setting the bus master bit 220pci_set_master() will enable DMA by setting the bus master bit
277in the PCI_COMMAND register. It also fixes the latency timer value if 221in the PCI_COMMAND register. It also fixes the latency timer value if
@@ -288,8 +232,8 @@ pci_try_set_mwi() to have the system do its best effort at enabling
288Mem-Wr-Inval. 232Mem-Wr-Inval.
289 233
290 234
2913.2 Request MMIO/IOP resources 235Request MMIO/IOP resources
292~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 236--------------------------
293Memory (MMIO), and I/O port addresses should NOT be read directly 237Memory (MMIO), and I/O port addresses should NOT be read directly
294from the PCI device config space. Use the values in the pci_dev structure 238from the PCI device config space. Use the values in the pci_dev structure
295as the PCI "bus address" might have been remapped to a "host physical" 239as the PCI "bus address" might have been remapped to a "host physical"
@@ -304,9 +248,10 @@ Conversely, drivers should call pci_release_region() AFTER
304calling pci_disable_device(). 248calling pci_disable_device().
305The idea is to prevent two devices colliding on the same address range. 249The idea is to prevent two devices colliding on the same address range.
306 250
307[ See OS BUG comment above. Currently (2.6.19), The driver can only 251.. tip::
308 determine MMIO and IO Port resource availability _after_ calling 252 See OS BUG comment above. Currently (2.6.19), The driver can only
309 pci_enable_device(). ] 253 determine MMIO and IO Port resource availability _after_ calling
254 pci_enable_device().
310 255
311Generic flavors of pci_request_region() are request_mem_region() 256Generic flavors of pci_request_region() are request_mem_region()
312(for MMIO ranges) and request_region() (for IO Port ranges). 257(for MMIO ranges) and request_region() (for IO Port ranges).
@@ -316,12 +261,13 @@ BARs.
316Also see pci_request_selected_regions() below. 261Also see pci_request_selected_regions() below.
317 262
318 263
3193.3 Set the DMA mask size 264Set the DMA mask size
320~~~~~~~~~~~~~~~~~~~~~~~~~ 265---------------------
321[ If anything below doesn't make sense, please refer to 266.. note::
322 Documentation/DMA-API.txt. This section is just a reminder that 267 If anything below doesn't make sense, please refer to
323 drivers need to indicate DMA capabilities of the device and is not 268 Documentation/DMA-API.txt. This section is just a reminder that
324 an authoritative source for DMA interfaces. ] 269 drivers need to indicate DMA capabilities of the device and is not
270 an authoritative source for DMA interfaces.
325 271
326While all drivers should explicitly indicate the DMA capability 272While all drivers should explicitly indicate the DMA capability
327(e.g. 32 or 64 bit) of the PCI bus master, devices with more than 273(e.g. 32 or 64 bit) of the PCI bus master, devices with more than
@@ -342,23 +288,23 @@ Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are
342("consistent") data. 288("consistent") data.
343 289
344 290
3453.4 Setup shared control data 291Setup shared control data
346~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 292-------------------------
347Once the DMA masks are set, the driver can allocate "consistent" (a.k.a. shared) 293Once the DMA masks are set, the driver can allocate "consistent" (a.k.a. shared)
348memory. See Documentation/DMA-API.txt for a full description of 294memory. See Documentation/DMA-API.txt for a full description of
349the DMA APIs. This section is just a reminder that it needs to be done 295the DMA APIs. This section is just a reminder that it needs to be done
350before enabling DMA on the device. 296before enabling DMA on the device.
351 297
352 298
3533.5 Initialize device registers 299Initialize device registers
354~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 300---------------------------
355Some drivers will need specific "capability" fields programmed 301Some drivers will need specific "capability" fields programmed
356or other "vendor specific" register initialized or reset. 302or other "vendor specific" register initialized or reset.
357E.g. clearing pending interrupts. 303E.g. clearing pending interrupts.
358 304
359 305
3603.6 Register IRQ handler 306Register IRQ handler
361~~~~~~~~~~~~~~~~~~~~~~~~ 307--------------------
362While calling request_irq() is the last step described here, 308While calling request_irq() is the last step described here,
363this is often just another intermediate step to initialize a device. 309this is often just another intermediate step to initialize a device.
364This step can often be deferred until the device is opened for use. 310This step can often be deferred until the device is opened for use.
@@ -396,6 +342,7 @@ and msix_enabled flags in the pci_dev structure after calling
396pci_alloc_irq_vectors. 342pci_alloc_irq_vectors.
397 343
398There are (at least) two really good reasons for using MSI: 344There are (at least) two really good reasons for using MSI:
345
3991) MSI is an exclusive interrupt vector by definition. 3461) MSI is an exclusive interrupt vector by definition.
400 This means the interrupt handler doesn't have to verify 347 This means the interrupt handler doesn't have to verify
401 its device caused the interrupt. 348 its device caused the interrupt.
@@ -410,24 +357,23 @@ See drivers/infiniband/hw/mthca/ or drivers/net/tg3.c for examples
410of MSI/MSI-X usage. 357of MSI/MSI-X usage.
411 358
412 359
413 360PCI device shutdown
4144. PCI device shutdown 361===================
415~~~~~~~~~~~~~~~~~~~~~~~
416 362
417When a PCI device driver is being unloaded, most of the following 363When a PCI device driver is being unloaded, most of the following
418steps need to be performed: 364steps need to be performed:
419 365
420 Disable the device from generating IRQs 366 - Disable the device from generating IRQs
421 Release the IRQ (free_irq()) 367 - Release the IRQ (free_irq())
422 Stop all DMA activity 368 - Stop all DMA activity
423 Release DMA buffers (both streaming and consistent) 369 - Release DMA buffers (both streaming and consistent)
424 Unregister from other subsystems (e.g. scsi or netdev) 370 - Unregister from other subsystems (e.g. scsi or netdev)
425 Disable device from responding to MMIO/IO Port addresses 371 - Disable device from responding to MMIO/IO Port addresses
426 Release MMIO/IO Port resource(s) 372 - Release MMIO/IO Port resource(s)
427 373
428 374
4294.1 Stop IRQs on the device 375Stop IRQs on the device
430~~~~~~~~~~~~~~~~~~~~~~~~~~~ 376-----------------------
431How to do this is chip/device specific. If it's not done, it opens 377How to do this is chip/device specific. If it's not done, it opens
432the possibility of a "screaming interrupt" if (and only if) 378the possibility of a "screaming interrupt" if (and only if)
433the IRQ is shared with another device. 379the IRQ is shared with another device.
@@ -446,16 +392,16 @@ MSI and MSI-X are defined to be exclusive interrupts and thus
446are not susceptible to the "screaming interrupt" problem. 392are not susceptible to the "screaming interrupt" problem.
447 393
448 394
4494.2 Release the IRQ 395Release the IRQ
450~~~~~~~~~~~~~~~~~~~ 396---------------
451Once the device is quiesced (no more IRQs), one can call free_irq(). 397Once the device is quiesced (no more IRQs), one can call free_irq().
452This function will return control once any pending IRQs are handled, 398This function will return control once any pending IRQs are handled,
453"unhook" the drivers IRQ handler from that IRQ, and finally release 399"unhook" the drivers IRQ handler from that IRQ, and finally release
454the IRQ if no one else is using it. 400the IRQ if no one else is using it.
455 401
456 402
4574.3 Stop all DMA activity 403Stop all DMA activity
458~~~~~~~~~~~~~~~~~~~~~~~~~ 404---------------------
459It's extremely important to stop all DMA operations BEFORE attempting 405It's extremely important to stop all DMA operations BEFORE attempting
460to deallocate DMA control data. Failure to do so can result in memory 406to deallocate DMA control data. Failure to do so can result in memory
461corruption, hangs, and on some chip-sets a hard crash. 407corruption, hangs, and on some chip-sets a hard crash.
@@ -467,8 +413,8 @@ While this step sounds obvious and trivial, several "mature" drivers
467didn't get this step right in the past. 413didn't get this step right in the past.
468 414
469 415
4704.4 Release DMA buffers 416Release DMA buffers
471~~~~~~~~~~~~~~~~~~~~~~~ 417-------------------
472Once DMA is stopped, clean up streaming DMA first. 418Once DMA is stopped, clean up streaming DMA first.
473I.e. unmap data buffers and return buffers to "upstream" 419I.e. unmap data buffers and return buffers to "upstream"
474owners if there is one. 420owners if there is one.
@@ -478,8 +424,8 @@ Then clean up "consistent" buffers which contain the control data.
478See Documentation/DMA-API.txt for details on unmapping interfaces. 424See Documentation/DMA-API.txt for details on unmapping interfaces.
479 425
480 426
4814.5 Unregister from other subsystems 427Unregister from other subsystems
482~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 428--------------------------------
483Most low level PCI device drivers support some other subsystem 429Most low level PCI device drivers support some other subsystem
484like USB, ALSA, SCSI, NetDev, Infiniband, etc. Make sure your 430like USB, ALSA, SCSI, NetDev, Infiniband, etc. Make sure your
485driver isn't losing resources from that other subsystem. 431driver isn't losing resources from that other subsystem.
@@ -487,31 +433,30 @@ If this happens, typically the symptom is an Oops (panic) when
487the subsystem attempts to call into a driver that has been unloaded. 433the subsystem attempts to call into a driver that has been unloaded.
488 434
489 435
4904.6 Disable Device from responding to MMIO/IO Port addresses 436Disable Device from responding to MMIO/IO Port addresses
491~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 437--------------------------------------------------------
492io_unmap() MMIO or IO Port resources and then call pci_disable_device(). 438io_unmap() MMIO or IO Port resources and then call pci_disable_device().
493This is the symmetric opposite of pci_enable_device(). 439This is the symmetric opposite of pci_enable_device().
494Do not access device registers after calling pci_disable_device(). 440Do not access device registers after calling pci_disable_device().
495 441
496 442
4974.7 Release MMIO/IO Port Resource(s) 443Release MMIO/IO Port Resource(s)
498~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 444--------------------------------
499Call pci_release_region() to mark the MMIO or IO Port range as available. 445Call pci_release_region() to mark the MMIO or IO Port range as available.
500Failure to do so usually results in the inability to reload the driver. 446Failure to do so usually results in the inability to reload the driver.
501 447
502 448
449How to access PCI config space
450==============================
503 451
5045. How to access PCI config space 452You can use `pci_(read|write)_config_(byte|word|dword)` to access the config
505~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 453space of a device represented by `struct pci_dev *`. All these functions return
506 4540 when successful or an error code (`PCIBIOS_...`) which can be translated to a
507You can use pci_(read|write)_config_(byte|word|dword) to access the config 455text string by pcibios_strerror. Most drivers expect that accesses to valid PCI
508space of a device represented by struct pci_dev *. All these functions return 0
509when successful or an error code (PCIBIOS_...) which can be translated to a text
510string by pcibios_strerror. Most drivers expect that accesses to valid PCI
511devices don't fail. 456devices don't fail.
512 457
513If you don't have a struct pci_dev available, you can call 458If you don't have a struct pci_dev available, you can call
514pci_bus_(read|write)_config_(byte|word|dword) to access a given device 459`pci_bus_(read|write)_config_(byte|word|dword)` to access a given device
515and function on that bus. 460and function on that bus.
516 461
517If you access fields in the standard portion of the config header, please 462If you access fields in the standard portion of the config header, please
@@ -522,10 +467,10 @@ pci_find_capability() for the particular capability and it will find the
522corresponding register block for you. 467corresponding register block for you.
523 468
524 469
470Other interesting functions
471===========================
525 472
5266. Other interesting functions 473============================= ================================================
527~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
528
529pci_get_domain_bus_and_slot() Find pci_dev corresponding to given domain, 474pci_get_domain_bus_and_slot() Find pci_dev corresponding to given domain,
530 bus and slot and number. If the device is 475 bus and slot and number. If the device is
531 found, its reference count is increased. 476 found, its reference count is increased.
@@ -539,11 +484,11 @@ pci_set_drvdata() Set private driver data pointer for a pci_dev
539pci_get_drvdata() Return private driver data pointer for a pci_dev 484pci_get_drvdata() Return private driver data pointer for a pci_dev
540pci_set_mwi() Enable Memory-Write-Invalidate transactions. 485pci_set_mwi() Enable Memory-Write-Invalidate transactions.
541pci_clear_mwi() Disable Memory-Write-Invalidate transactions. 486pci_clear_mwi() Disable Memory-Write-Invalidate transactions.
487============================= ================================================
542 488
543 489
544 490Miscellaneous hints
5457. Miscellaneous hints 491===================
546~~~~~~~~~~~~~~~~~~~~~~
547 492
548When displaying PCI device names to the user (for example when a driver wants 493When displaying PCI device names to the user (for example when a driver wants
549to tell the user what card has it found), please use pci_name(pci_dev). 494to tell the user what card has it found), please use pci_name(pci_dev).
@@ -559,9 +504,8 @@ on the bus need to be capable of doing it, so this is something which needs
559to be handled by platform and generic code, not individual drivers. 504to be handled by platform and generic code, not individual drivers.
560 505
561 506
562 507Vendor and device identifications
5638. Vendor and device identifications 508=================================
564~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
565 509
566Do not add new device or vendor IDs to include/linux/pci_ids.h unless they 510Do not add new device or vendor IDs to include/linux/pci_ids.h unless they
567are shared across multiple drivers. You can add private definitions in 511are shared across multiple drivers. You can add private definitions in
@@ -575,28 +519,27 @@ There are mirrors of the pci.ids file at http://pciids.sourceforge.net/
575and https://github.com/pciutils/pciids. 519and https://github.com/pciutils/pciids.
576 520
577 521
578 522Obsolete functions
5799. Obsolete functions 523==================
580~~~~~~~~~~~~~~~~~~~~~
581 524
582There are several functions which you might come across when trying to 525There are several functions which you might come across when trying to
583port an old driver to the new PCI interface. They are no longer present 526port an old driver to the new PCI interface. They are no longer present
584in the kernel as they aren't compatible with hotplug or PCI domains or 527in the kernel as they aren't compatible with hotplug or PCI domains or
585having sane locking. 528having sane locking.
586 529
530================= ===========================================
587pci_find_device() Superseded by pci_get_device() 531pci_find_device() Superseded by pci_get_device()
588pci_find_subsys() Superseded by pci_get_subsys() 532pci_find_subsys() Superseded by pci_get_subsys()
589pci_find_slot() Superseded by pci_get_domain_bus_and_slot() 533pci_find_slot() Superseded by pci_get_domain_bus_and_slot()
590pci_get_slot() Superseded by pci_get_domain_bus_and_slot() 534pci_get_slot() Superseded by pci_get_domain_bus_and_slot()
591 535================= ===========================================
592 536
593The alternative is the traditional PCI device driver that walks PCI 537The alternative is the traditional PCI device driver that walks PCI
594device lists. This is still possible but discouraged. 538device lists. This is still possible but discouraged.
595 539
596 540
597 541MMIO Space and "Write Posting"
59810. MMIO Space and "Write Posting" 542==============================
599~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
600 543
601Converting a driver from using I/O Port space to using MMIO space 544Converting a driver from using I/O Port space to using MMIO space
602often requires some additional changes. Specifically, "write posting" 545often requires some additional changes. Specifically, "write posting"
@@ -609,14 +552,14 @@ the CPU before the transaction has reached its destination.
609 552
610Thus, timing sensitive code should add readl() where the CPU is 553Thus, timing sensitive code should add readl() where the CPU is
611expected to wait before doing other work. The classic "bit banging" 554expected to wait before doing other work. The classic "bit banging"
612sequence works fine for I/O Port space: 555sequence works fine for I/O Port space::
613 556
614 for (i = 8; --i; val >>= 1) { 557 for (i = 8; --i; val >>= 1) {
615 outb(val & 1, ioport_reg); /* write bit */ 558 outb(val & 1, ioport_reg); /* write bit */
616 udelay(10); 559 udelay(10);
617 } 560 }
618 561
619The same sequence for MMIO space should be: 562The same sequence for MMIO space should be::
620 563
621 for (i = 8; --i; val >>= 1) { 564 for (i = 8; --i; val >>= 1) {
622 writeb(val & 1, mmio_reg); /* write bit */ 565 writeb(val & 1, mmio_reg); /* write bit */
@@ -633,4 +576,3 @@ handle the PCI master abort on all platforms if the PCI device is
633expected to not respond to a readl(). Most x86 platforms will allow 576expected to not respond to a readl(). Most x86 platforms will allow
634MMIO reads to master abort (a.k.a. "Soft Fail") and return garbage 577MMIO reads to master abort (a.k.a. "Soft Fail") and return garbage
635(e.g. ~0). But many RISC platforms will crash (a.k.a."Hard Fail"). 578(e.g. ~0). But many RISC platforms will crash (a.k.a."Hard Fail").
636
diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.rst
index 48ce7903e3c6..18bdefaafd1a 100644
--- a/Documentation/PCI/pcieaer-howto.txt
+++ b/Documentation/PCI/pcieaer-howto.rst
@@ -1,21 +1,29 @@
1 The PCI Express Advanced Error Reporting Driver Guide HOWTO 1.. SPDX-License-Identifier: GPL-2.0
2 T. Long Nguyen <tom.l.nguyen@intel.com> 2.. include:: <isonum.txt>
3 Yanmin Zhang <yanmin.zhang@intel.com>
4 07/29/2006
5 3
4===========================================================
5The PCI Express Advanced Error Reporting Driver Guide HOWTO
6===========================================================
6 7
71. Overview 8:Authors: - T. Long Nguyen <tom.l.nguyen@intel.com>
9 - Yanmin Zhang <yanmin.zhang@intel.com>
8 10
91.1 About this guide 11:Copyright: |copy| 2006 Intel Corporation
12
13Overview
14===========
15
16About this guide
17----------------
10 18
11This guide describes the basics of the PCI Express Advanced Error 19This guide describes the basics of the PCI Express Advanced Error
12Reporting (AER) driver and provides information on how to use it, as 20Reporting (AER) driver and provides information on how to use it, as
13well as how to enable the drivers of endpoint devices to conform with 21well as how to enable the drivers of endpoint devices to conform with
14PCI Express AER driver. 22PCI Express AER driver.
15 23
161.2 Copyright (C) Intel Corporation 2006.
17 24
181.3 What is the PCI Express AER Driver? 25What is the PCI Express AER Driver?
26-----------------------------------
19 27
20PCI Express error signaling can occur on the PCI Express link itself 28PCI Express error signaling can occur on the PCI Express link itself
21or on behalf of transactions initiated on the link. PCI Express 29or on behalf of transactions initiated on the link. PCI Express
@@ -30,17 +38,19 @@ The PCI Express AER driver provides the infrastructure to support PCI
30Express Advanced Error Reporting capability. The PCI Express AER 38Express Advanced Error Reporting capability. The PCI Express AER
31driver provides three basic functions: 39driver provides three basic functions:
32 40
33- Gathers the comprehensive error information if errors occurred. 41 - Gathers the comprehensive error information if errors occurred.
34- Reports error to the users. 42 - Reports error to the users.
35- Performs error recovery actions. 43 - Performs error recovery actions.
36 44
37AER driver only attaches root ports which support PCI-Express AER 45AER driver only attaches root ports which support PCI-Express AER
38capability. 46capability.
39 47
40 48
412. User Guide 49User Guide
50==========
42 51
432.1 Include the PCI Express AER Root Driver into the Linux Kernel 52Include the PCI Express AER Root Driver into the Linux Kernel
53-------------------------------------------------------------
44 54
45The PCI Express AER Root driver is a Root Port service driver attached 55The PCI Express AER Root driver is a Root Port service driver attached
46to the PCI Express Port Bus driver. If a user wants to use it, the driver 56to the PCI Express Port Bus driver. If a user wants to use it, the driver
@@ -48,7 +58,8 @@ has to be compiled. Option CONFIG_PCIEAER supports this capability. It
48depends on CONFIG_PCIEPORTBUS, so pls. set CONFIG_PCIEPORTBUS=y and 58depends on CONFIG_PCIEPORTBUS, so pls. set CONFIG_PCIEPORTBUS=y and
49CONFIG_PCIEAER = y. 59CONFIG_PCIEAER = y.
50 60
512.2 Load PCI Express AER Root Driver 61Load PCI Express AER Root Driver
62--------------------------------
52 63
53Some systems have AER support in firmware. Enabling Linux AER support at 64Some systems have AER support in firmware. Enabling Linux AER support at
54the same time the firmware handles AER may result in unpredictable 65the same time the firmware handles AER may result in unpredictable
@@ -56,30 +67,34 @@ behavior. Therefore, Linux does not handle AER events unless the firmware
56grants AER control to the OS via the ACPI _OSC method. See the PCI FW 3.0 67grants AER control to the OS via the ACPI _OSC method. See the PCI FW 3.0
57Specification for details regarding _OSC usage. 68Specification for details regarding _OSC usage.
58 69
592.3 AER error output 70AER error output
71----------------
60 72
61When a PCIe AER error is captured, an error message will be output to 73When a PCIe AER error is captured, an error message will be output to
62console. If it's a correctable error, it is output as a warning. 74console. If it's a correctable error, it is output as a warning.
63Otherwise, it is printed as an error. So users could choose different 75Otherwise, it is printed as an error. So users could choose different
64log level to filter out correctable error messages. 76log level to filter out correctable error messages.
65 77
66Below shows an example: 78Below shows an example::
670000:50:00.0: PCIe Bus Error: severity=Uncorrected (Fatal), type=Transaction Layer, id=0500(Requester ID) 79
680000:50:00.0: device [8086:0329] error status/mask=00100000/00000000 80 0000:50:00.0: PCIe Bus Error: severity=Uncorrected (Fatal), type=Transaction Layer, id=0500(Requester ID)
690000:50:00.0: [20] Unsupported Request (First) 81 0000:50:00.0: device [8086:0329] error status/mask=00100000/00000000
700000:50:00.0: TLP Header: 04000001 00200a03 05010000 00050100 82 0000:50:00.0: [20] Unsupported Request (First)
83 0000:50:00.0: TLP Header: 04000001 00200a03 05010000 00050100
71 84
72In the example, 'Requester ID' means the ID of the device who sends 85In the example, 'Requester ID' means the ID of the device who sends
73the error message to root port. Pls. refer to pci express specs for 86the error message to root port. Pls. refer to pci express specs for
74other fields. 87other fields.
75 88
762.4 AER Statistics / Counters 89AER Statistics / Counters
90-------------------------
77 91
78When PCIe AER errors are captured, the counters / statistics are also exposed 92When PCIe AER errors are captured, the counters / statistics are also exposed
79in the form of sysfs attributes which are documented at 93in the form of sysfs attributes which are documented at
80Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats 94Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
81 95
823. Developer Guide 96Developer Guide
97===============
83 98
84To enable AER aware support requires a software driver to configure 99To enable AER aware support requires a software driver to configure
85the AER capability structure within its device and to provide callbacks. 100the AER capability structure within its device and to provide callbacks.
@@ -120,7 +135,8 @@ hierarchy and links. These errors do not include any device specific
120errors because device specific errors will still get sent directly to 135errors because device specific errors will still get sent directly to
121the device driver. 136the device driver.
122 137
1233.1 Configure the AER capability structure 138Configure the AER capability structure
139--------------------------------------
124 140
125AER aware drivers of PCI Express component need change the device 141AER aware drivers of PCI Express component need change the device
126control registers to enable AER. They also could change AER registers, 142control registers to enable AER. They also could change AER registers,
@@ -128,9 +144,11 @@ including mask and severity registers. Helper function
128pci_enable_pcie_error_reporting could be used to enable AER. See 144pci_enable_pcie_error_reporting could be used to enable AER. See
129section 3.3. 145section 3.3.
130 146
1313.2. Provide callbacks 147Provide callbacks
148-----------------
132 149
1333.2.1 callback reset_link to reset pci express link 150callback reset_link to reset pci express link
151~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
134 152
135This callback is used to reset the pci express physical link when a 153This callback is used to reset the pci express physical link when a
136fatal error happens. The root port aer service driver provides a 154fatal error happens. The root port aer service driver provides a
@@ -140,13 +158,15 @@ upstream ports should provide their own reset_link functions.
140 158
141In struct pcie_port_service_driver, a new pointer, reset_link, is 159In struct pcie_port_service_driver, a new pointer, reset_link, is
142added. 160added.
161::
143 162
144pci_ers_result_t (*reset_link) (struct pci_dev *dev); 163 pci_ers_result_t (*reset_link) (struct pci_dev *dev);
145 164
146Section 3.2.2.2 provides more detailed info on when to call 165Section 3.2.2.2 provides more detailed info on when to call
147reset_link. 166reset_link.
148 167
1493.2.2 PCI error-recovery callbacks 168PCI error-recovery callbacks
169~~~~~~~~~~~~~~~~~~~~~~~~~~~~
150 170
151The PCI Express AER Root driver uses error callbacks to coordinate 171The PCI Express AER Root driver uses error callbacks to coordinate
152with downstream device drivers associated with a hierarchy in question 172with downstream device drivers associated with a hierarchy in question
@@ -161,7 +181,8 @@ definitions of the callbacks.
161 181
162Below sections specify when to call the error callback functions. 182Below sections specify when to call the error callback functions.
163 183
1643.2.2.1 Correctable errors 184Correctable errors
185~~~~~~~~~~~~~~~~~~
165 186
166Correctable errors pose no impacts on the functionality of 187Correctable errors pose no impacts on the functionality of
167the interface. The PCI Express protocol can recover without any 188the interface. The PCI Express protocol can recover without any
@@ -169,13 +190,16 @@ software intervention or any loss of data. These errors do not
169require any recovery actions. The AER driver clears the device's 190require any recovery actions. The AER driver clears the device's
170correctable error status register accordingly and logs these errors. 191correctable error status register accordingly and logs these errors.
171 192
1723.2.2.2 Non-correctable (non-fatal and fatal) errors 193Non-correctable (non-fatal and fatal) errors
194~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
173 195
174If an error message indicates a non-fatal error, performing link reset 196If an error message indicates a non-fatal error, performing link reset
175at upstream is not required. The AER driver calls error_detected(dev, 197at upstream is not required. The AER driver calls error_detected(dev,
176pci_channel_io_normal) to all drivers associated within a hierarchy in 198pci_channel_io_normal) to all drivers associated within a hierarchy in
177question. for example, 199question. for example::
178EndPoint<==>DownstreamPort B<==>UpstreamPort A<==>RootPort. 200
201 EndPoint<==>DownstreamPort B<==>UpstreamPort A<==>RootPort
202
179If Upstream port A captures an AER error, the hierarchy consists of 203If Upstream port A captures an AER error, the hierarchy consists of
180Downstream port B and EndPoint. 204Downstream port B and EndPoint.
181 205
@@ -199,53 +223,72 @@ function. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER and
199reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes 223reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes
200to mmio_enabled. 224to mmio_enabled.
201 225
2023.3 helper functions 226helper functions
227----------------
228::
229
230 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
203 231
2043.3.1 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
205pci_enable_pcie_error_reporting enables the device to send error 232pci_enable_pcie_error_reporting enables the device to send error
206messages to root port when an error is detected. Note that devices 233messages to root port when an error is detected. Note that devices
207don't enable the error reporting by default, so device drivers need 234don't enable the error reporting by default, so device drivers need
208call this function to enable it. 235call this function to enable it.
209 236
2103.3.2 int pci_disable_pcie_error_reporting(struct pci_dev *dev); 237::
238
239 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
240
211pci_disable_pcie_error_reporting disables the device to send error 241pci_disable_pcie_error_reporting disables the device to send error
212messages to root port when an error is detected. 242messages to root port when an error is detected.
213 243
2143.3.3 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); 244::
245
246 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);`
247
215pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable 248pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
216error status register. 249error status register.
217 250
2183.4 Frequent Asked Questions 251Frequent Asked Questions
252------------------------
219 253
220Q: What happens if a PCI Express device driver does not provide an 254Q:
221error recovery handler (pci_driver->err_handler is equal to NULL)? 255 What happens if a PCI Express device driver does not provide an
256 error recovery handler (pci_driver->err_handler is equal to NULL)?
222 257
223A: The devices attached with the driver won't be recovered. If the 258A:
224error is fatal, kernel will print out warning messages. Please refer 259 The devices attached with the driver won't be recovered. If the
225to section 3 for more information. 260 error is fatal, kernel will print out warning messages. Please refer
261 to section 3 for more information.
226 262
227Q: What happens if an upstream port service driver does not provide 263Q:
228callback reset_link? 264 What happens if an upstream port service driver does not provide
265 callback reset_link?
229 266
230A: Fatal error recovery will fail if the errors are reported by the 267A:
231upstream ports who are attached by the service driver. 268 Fatal error recovery will fail if the errors are reported by the
269 upstream ports who are attached by the service driver.
232 270
233Q: How does this infrastructure deal with driver that is not PCI 271Q:
234Express aware? 272 How does this infrastructure deal with driver that is not PCI
273 Express aware?
235 274
236A: This infrastructure calls the error callback functions of the 275A:
237driver when an error happens. But if the driver is not aware of 276 This infrastructure calls the error callback functions of the
238PCI Express, the device might not report its own errors to root 277 driver when an error happens. But if the driver is not aware of
239port. 278 PCI Express, the device might not report its own errors to root
279 port.
240 280
241Q: What modifications will that driver need to make it compatible 281Q:
242with the PCI Express AER Root driver? 282 What modifications will that driver need to make it compatible
283 with the PCI Express AER Root driver?
243 284
244A: It could call the helper functions to enable AER in devices and 285A:
245cleanup uncorrectable status register. Pls. refer to section 3.3. 286 It could call the helper functions to enable AER in devices and
287 cleanup uncorrectable status register. Pls. refer to section 3.3.
246 288
247 289
2484. Software error injection 290Software error injection
291========================
249 292
250Debugging PCIe AER error recovery code is quite difficult because it 293Debugging PCIe AER error recovery code is quite difficult because it
251is hard to trigger real hardware errors. Software based error 294is hard to trigger real hardware errors. Software based error
@@ -261,6 +304,7 @@ After reboot with new kernel or insert the module, a device file named
261 304
262Then, you need a user space tool named aer-inject, which can be gotten 305Then, you need a user space tool named aer-inject, which can be gotten
263from: 306from:
307
264 https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/ 308 https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/
265 309
266More information about aer-inject can be found in the document comes 310More information about aer-inject can be found in the document comes
diff --git a/Documentation/PCI/PCIEBUS-HOWTO.txt b/Documentation/PCI/picebus-howto.rst
index 15f0bb3b5045..f882ff62c51f 100644
--- a/Documentation/PCI/PCIEBUS-HOWTO.txt
+++ b/Documentation/PCI/picebus-howto.rst
@@ -1,16 +1,23 @@
1 The PCI Express Port Bus Driver Guide HOWTO 1.. SPDX-License-Identifier: GPL-2.0
2 Tom L Nguyen tom.l.nguyen@intel.com 2.. include:: <isonum.txt>
3 11/03/2004
4 3
51. About this guide 4===========================================
5The PCI Express Port Bus Driver Guide HOWTO
6===========================================
7
8:Author: Tom L Nguyen tom.l.nguyen@intel.com 11/03/2004
9:Copyright: |copy| 2004 Intel Corporation
10
11About this guide
12================
6 13
7This guide describes the basics of the PCI Express Port Bus driver 14This guide describes the basics of the PCI Express Port Bus driver
8and provides information on how to enable the service drivers to 15and provides information on how to enable the service drivers to
9register/unregister with the PCI Express Port Bus Driver. 16register/unregister with the PCI Express Port Bus Driver.
10 17
112. Copyright 2004 Intel Corporation
12 18
133. What is the PCI Express Port Bus Driver 19What is the PCI Express Port Bus Driver
20=======================================
14 21
15A PCI Express Port is a logical PCI-PCI Bridge structure. There 22A PCI Express Port is a logical PCI-PCI Bridge structure. There
16are two types of PCI Express Port: the Root Port and the Switch 23are two types of PCI Express Port: the Root Port and the Switch
@@ -30,7 +37,8 @@ support (AER), and virtual channel support (VC). These services may
30be handled by a single complex driver or be individually distributed 37be handled by a single complex driver or be individually distributed
31and handled by corresponding service drivers. 38and handled by corresponding service drivers.
32 39
334. Why use the PCI Express Port Bus Driver? 40Why use the PCI Express Port Bus Driver?
41========================================
34 42
35In existing Linux kernels, the Linux Device Driver Model allows a 43In existing Linux kernels, the Linux Device Driver Model allows a
36physical device to be handled by only a single driver. The PCI 44physical device to be handled by only a single driver. The PCI
@@ -51,28 +59,31 @@ PCI Express Ports and distributes all provided service requests
51to the corresponding service drivers as required. Some key 59to the corresponding service drivers as required. Some key
52advantages of using the PCI Express Port Bus driver are listed below: 60advantages of using the PCI Express Port Bus driver are listed below:
53 61
54 - Allow multiple service drivers to run simultaneously on 62 - Allow multiple service drivers to run simultaneously on
55 a PCI-PCI Bridge Port device. 63 a PCI-PCI Bridge Port device.
56 64
57 - Allow service drivers implemented in an independent 65 - Allow service drivers implemented in an independent
58 staged approach. 66 staged approach.
59 67
60 - Allow one service driver to run on multiple PCI-PCI Bridge 68 - Allow one service driver to run on multiple PCI-PCI Bridge
61 Port devices. 69 Port devices.
62 70
63 - Manage and distribute resources of a PCI-PCI Bridge Port 71 - Manage and distribute resources of a PCI-PCI Bridge Port
64 device to requested service drivers. 72 device to requested service drivers.
65 73
665. Configuring the PCI Express Port Bus Driver vs. Service Drivers 74Configuring the PCI Express Port Bus Driver vs. Service Drivers
75===============================================================
67 76
685.1 Including the PCI Express Port Bus Driver Support into the Kernel 77Including the PCI Express Port Bus Driver Support into the Kernel
78-----------------------------------------------------------------
69 79
70Including the PCI Express Port Bus driver depends on whether the PCI 80Including the PCI Express Port Bus driver depends on whether the PCI
71Express support is included in the kernel config. The kernel will 81Express support is included in the kernel config. The kernel will
72automatically include the PCI Express Port Bus driver as a kernel 82automatically include the PCI Express Port Bus driver as a kernel
73driver when the PCI Express support is enabled in the kernel. 83driver when the PCI Express support is enabled in the kernel.
74 84
755.2 Enabling Service Driver Support 85Enabling Service Driver Support
86-------------------------------
76 87
77PCI device drivers are implemented based on Linux Device Driver Model. 88PCI device drivers are implemented based on Linux Device Driver Model.
78All service drivers are PCI device drivers. As discussed above, it is 89All service drivers are PCI device drivers. As discussed above, it is
@@ -89,9 +100,11 @@ header file /include/linux/pcieport_if.h, before calling these APIs.
89Failure to do so will result an identity mismatch, which prevents 100Failure to do so will result an identity mismatch, which prevents
90the PCI Express Port Bus driver from loading a service driver. 101the PCI Express Port Bus driver from loading a service driver.
91 102
925.2.1 pcie_port_service_register 103pcie_port_service_register
104~~~~~~~~~~~~~~~~~~~~~~~~~~
105::
93 106
94int pcie_port_service_register(struct pcie_port_service_driver *new) 107 int pcie_port_service_register(struct pcie_port_service_driver *new)
95 108
96This API replaces the Linux Driver Model's pci_register_driver API. A 109This API replaces the Linux Driver Model's pci_register_driver API. A
97service driver should always calls pcie_port_service_register at 110service driver should always calls pcie_port_service_register at
@@ -99,69 +112,76 @@ module init. Note that after service driver being loaded, calls
99such as pci_enable_device(dev) and pci_set_master(dev) are no longer 112such as pci_enable_device(dev) and pci_set_master(dev) are no longer
100necessary since these calls are executed by the PCI Port Bus driver. 113necessary since these calls are executed by the PCI Port Bus driver.
101 114
1025.2.2 pcie_port_service_unregister 115pcie_port_service_unregister
116~~~~~~~~~~~~~~~~~~~~~~~~~~~~
117::
103 118
104void pcie_port_service_unregister(struct pcie_port_service_driver *new) 119 void pcie_port_service_unregister(struct pcie_port_service_driver *new)
105 120
106pcie_port_service_unregister replaces the Linux Driver Model's 121pcie_port_service_unregister replaces the Linux Driver Model's
107pci_unregister_driver. It's always called by service driver when a 122pci_unregister_driver. It's always called by service driver when a
108module exits. 123module exits.
109 124
1105.2.3 Sample Code 125Sample Code
126~~~~~~~~~~~
111 127
112Below is sample service driver code to initialize the port service 128Below is sample service driver code to initialize the port service
113driver data structure. 129driver data structure.
130::
114 131
115static struct pcie_port_service_id service_id[] = { { 132 static struct pcie_port_service_id service_id[] = { {
116 .vendor = PCI_ANY_ID, 133 .vendor = PCI_ANY_ID,
117 .device = PCI_ANY_ID, 134 .device = PCI_ANY_ID,
118 .port_type = PCIE_RC_PORT, 135 .port_type = PCIE_RC_PORT,
119 .service_type = PCIE_PORT_SERVICE_AER, 136 .service_type = PCIE_PORT_SERVICE_AER,
120 }, { /* end: all zeroes */ } 137 }, { /* end: all zeroes */ }
121}; 138 };
122 139
123static struct pcie_port_service_driver root_aerdrv = { 140 static struct pcie_port_service_driver root_aerdrv = {
124 .name = (char *)device_name, 141 .name = (char *)device_name,
125 .id_table = &service_id[0], 142 .id_table = &service_id[0],
126 143
127 .probe = aerdrv_load, 144 .probe = aerdrv_load,
128 .remove = aerdrv_unload, 145 .remove = aerdrv_unload,
129 146
130 .suspend = aerdrv_suspend, 147 .suspend = aerdrv_suspend,
131 .resume = aerdrv_resume, 148 .resume = aerdrv_resume,
132}; 149 };
133 150
134Below is a sample code for registering/unregistering a service 151Below is a sample code for registering/unregistering a service
135driver. 152driver.
153::
136 154
137static int __init aerdrv_service_init(void) 155 static int __init aerdrv_service_init(void)
138{ 156 {
139 int retval = 0; 157 int retval = 0;
140 158
141 retval = pcie_port_service_register(&root_aerdrv); 159 retval = pcie_port_service_register(&root_aerdrv);
142 if (!retval) { 160 if (!retval) {
143 /* 161 /*
144 * FIX ME 162 * FIX ME
145 */ 163 */
146 } 164 }
147 return retval; 165 return retval;
148} 166 }
149 167
150static void __exit aerdrv_service_exit(void) 168 static void __exit aerdrv_service_exit(void)
151{ 169 {
152 pcie_port_service_unregister(&root_aerdrv); 170 pcie_port_service_unregister(&root_aerdrv);
153} 171 }
154 172
155module_init(aerdrv_service_init); 173 module_init(aerdrv_service_init);
156module_exit(aerdrv_service_exit); 174 module_exit(aerdrv_service_exit);
157 175
1586. Possible Resource Conflicts 176Possible Resource Conflicts
177===========================
159 178
160Since all service drivers of a PCI-PCI Bridge Port device are 179Since all service drivers of a PCI-PCI Bridge Port device are
161allowed to run simultaneously, below lists a few of possible resource 180allowed to run simultaneously, below lists a few of possible resource
162conflicts with proposed solutions. 181conflicts with proposed solutions.
163 182
1646.1 MSI and MSI-X Vector Resource 183MSI and MSI-X Vector Resource
184-----------------------------
165 185
166Once MSI or MSI-X interrupts are enabled on a device, it stays in this 186Once MSI or MSI-X interrupts are enabled on a device, it stays in this
167mode until they are disabled again. Since service drivers of the same 187mode until they are disabled again. Since service drivers of the same
@@ -179,7 +199,8 @@ driver. Service drivers should use (struct pcie_device*)dev->irq to
179call request_irq/free_irq. In addition, the interrupt mode is stored 199call request_irq/free_irq. In addition, the interrupt mode is stored
180in the field interrupt_mode of struct pcie_device. 200in the field interrupt_mode of struct pcie_device.
181 201
1826.3 PCI Memory/IO Mapped Regions 202PCI Memory/IO Mapped Regions
203----------------------------
183 204
184Service drivers for PCI Express Power Management (PME), Advanced 205Service drivers for PCI Express Power Management (PME), Advanced
185Error Reporting (AER), Hot-Plug (HP) and Virtual Channel (VC) access 206Error Reporting (AER), Hot-Plug (HP) and Virtual Channel (VC) access
@@ -188,7 +209,8 @@ registers accessed are independent of each other. This patch assumes
188that all service drivers will be well behaved and not overwrite 209that all service drivers will be well behaved and not overwrite
189other service driver's configuration settings. 210other service driver's configuration settings.
190 211
1916.4 PCI Config Registers 212PCI Config Registers
213--------------------
192 214
193Each service driver runs its PCI config operations on its own 215Each service driver runs its PCI config operations on its own
194capability structure except the PCI Express capability structure, in 216capability structure except the PCI Express capability structure, in
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index ed104a44e8b2..f8b62360b18c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -13,7 +13,7 @@
13 For ARM64, ONLY "acpi=off", "acpi=on" or "acpi=force" 13 For ARM64, ONLY "acpi=off", "acpi=on" or "acpi=force"
14 are available 14 are available
15 15
16 See also Documentation/power/runtime_pm.txt, pci=noacpi 16 See also Documentation/power/runtime_pm.rst, pci=noacpi
17 17
18 acpi_apic_instance= [ACPI, IOAPIC] 18 acpi_apic_instance= [ACPI, IOAPIC]
19 Format: <int> 19 Format: <int>
@@ -223,7 +223,7 @@
223 acpi_sleep= [HW,ACPI] Sleep options 223 acpi_sleep= [HW,ACPI] Sleep options
224 Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, 224 Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
225 old_ordering, nonvs, sci_force_enable, nobl } 225 old_ordering, nonvs, sci_force_enable, nobl }
226 See Documentation/power/video.txt for information on 226 See Documentation/power/video.rst for information on
227 s3_bios and s3_mode. 227 s3_bios and s3_mode.
228 s3_beep is for debugging; it makes the PC's speaker beep 228 s3_beep is for debugging; it makes the PC's speaker beep
229 as soon as the kernel's real-mode entry point is called. 229 as soon as the kernel's real-mode entry point is called.
@@ -4119,7 +4119,7 @@
4119 Specify the offset from the beginning of the partition 4119 Specify the offset from the beginning of the partition
4120 given by "resume=" at which the swap header is located, 4120 given by "resume=" at which the swap header is located,
4121 in <PAGE_SIZE> units (needed only for swap files). 4121 in <PAGE_SIZE> units (needed only for swap files).
4122 See Documentation/power/swsusp-and-swap-files.txt 4122 See Documentation/power/swsusp-and-swap-files.rst
4123 4123
4124 resumedelay= [HIBERNATION] Delay (in seconds) to pause before attempting to 4124 resumedelay= [HIBERNATION] Delay (in seconds) to pause before attempting to
4125 read the resume files 4125 read the resume files
diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt
index 073f128af5a7..55193e680250 100644
--- a/Documentation/cpu-freq/core.txt
+++ b/Documentation/cpu-freq/core.txt
@@ -95,7 +95,7 @@ flags - flags of the cpufreq driver
95 95
963. CPUFreq Table Generation with Operating Performance Point (OPP) 963. CPUFreq Table Generation with Operating Performance Point (OPP)
97================================================================== 97==================================================================
98For details about OPP, see Documentation/power/opp.txt 98For details about OPP, see Documentation/power/opp.rst
99 99
100dev_pm_opp_init_cpufreq_table - 100dev_pm_opp_init_cpufreq_table -
101 This function provides a ready to use conversion routine to translate 101 This function provides a ready to use conversion routine to translate
diff --git a/Documentation/devicetree/bindings/pci/mobiveil-pcie.txt b/Documentation/devicetree/bindings/pci/mobiveil-pcie.txt
index a618d4787dd7..64156993e052 100644
--- a/Documentation/devicetree/bindings/pci/mobiveil-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/mobiveil-pcie.txt
@@ -10,8 +10,10 @@ Required properties:
10 interrupt source. The value must be 1. 10 interrupt source. The value must be 1.
11- compatible: Should contain "mbvl,gpex40-pcie" 11- compatible: Should contain "mbvl,gpex40-pcie"
12- reg: Should contain PCIe registers location and length 12- reg: Should contain PCIe registers location and length
13 Mandatory:
13 "config_axi_slave": PCIe controller registers 14 "config_axi_slave": PCIe controller registers
14 "csr_axi_slave" : Bridge config registers 15 "csr_axi_slave" : Bridge config registers
16 Optional:
15 "gpio_slave" : GPIO registers to control slot power 17 "gpio_slave" : GPIO registers to control slot power
16 "apb_csr" : MSI registers 18 "apb_csr" : MSI registers
17 19
diff --git a/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt b/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
index 145a4f04194f..7939bca47861 100644
--- a/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
@@ -65,6 +65,14 @@ Required properties:
65 - afi 65 - afi
66 - pcie_x 66 - pcie_x
67 67
68Optional properties:
69- pinctrl-names: A list of pinctrl state names. Must contain the following
70 entries:
71 - "default": active state, puts PCIe I/O out of deep power down state
72 - "idle": puts PCIe I/O into deep power down state
73- pinctrl-0: phandle for the default/active state of pin configurations.
74- pinctrl-1: phandle for the idle state of pin configurations.
75
68Required properties on Tegra124 and later (deprecated): 76Required properties on Tegra124 and later (deprecated):
69- phys: Must contain an entry for each entry in phy-names. 77- phys: Must contain an entry for each entry in phy-names.
70- phy-names: Must include the following entries: 78- phy-names: Must include the following entries:
diff --git a/Documentation/devicetree/bindings/pci/pci.txt b/Documentation/devicetree/bindings/pci/pci.txt
index 92c01db610df..2a5d91024059 100644
--- a/Documentation/devicetree/bindings/pci/pci.txt
+++ b/Documentation/devicetree/bindings/pci/pci.txt
@@ -24,6 +24,9 @@ driver implementation may support the following properties:
24 unsupported link speed, for instance, trying to do training for 24 unsupported link speed, for instance, trying to do training for
25 unsupported link speed, etc. Must be '4' for gen4, '3' for gen3, '2' 25 unsupported link speed, etc. Must be '4' for gen4, '3' for gen3, '2'
26 for gen2, and '1' for gen1. Any other values are invalid. 26 for gen2, and '1' for gen1. Any other values are invalid.
27- reset-gpios:
28 If present this property specifies PERST# GPIO. Host drivers can parse the
29 GPIO and apply fundamental reset to endpoints.
27 30
28PCI-PCI Bridge properties 31PCI-PCI Bridge properties
29------------------------- 32-------------------------
diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie.txt b/Documentation/devicetree/bindings/pci/qcom,pcie.txt
index 1fd703bd73e0..ada80b01bf0c 100644
--- a/Documentation/devicetree/bindings/pci/qcom,pcie.txt
+++ b/Documentation/devicetree/bindings/pci/qcom,pcie.txt
@@ -10,6 +10,7 @@
10 - "qcom,pcie-msm8996" for msm8996 or apq8096 10 - "qcom,pcie-msm8996" for msm8996 or apq8096
11 - "qcom,pcie-ipq4019" for ipq4019 11 - "qcom,pcie-ipq4019" for ipq4019
12 - "qcom,pcie-ipq8074" for ipq8074 12 - "qcom,pcie-ipq8074" for ipq8074
13 - "qcom,pcie-qcs404" for qcs404
13 14
14- reg: 15- reg:
15 Usage: required 16 Usage: required
@@ -116,6 +117,15 @@
116 - "ahb" AHB clock 117 - "ahb" AHB clock
117 - "aux" Auxiliary clock 118 - "aux" Auxiliary clock
118 119
120- clock-names:
121 Usage: required for qcs404
122 Value type: <stringlist>
123 Definition: Should contain the following entries
124 - "iface" AHB clock
125 - "aux" Auxiliary clock
126 - "master_bus" AXI Master clock
127 - "slave_bus" AXI Slave clock
128
119- resets: 129- resets:
120 Usage: required 130 Usage: required
121 Value type: <prop-encoded-array> 131 Value type: <prop-encoded-array>
@@ -167,6 +177,17 @@
167 - "ahb" AHB Reset 177 - "ahb" AHB Reset
168 - "axi_m_sticky" AXI Master Sticky reset 178 - "axi_m_sticky" AXI Master Sticky reset
169 179
180- reset-names:
181 Usage: required for qcs404
182 Value type: <stringlist>
183 Definition: Should contain the following entries
184 - "axi_m" AXI Master reset
185 - "axi_s" AXI Slave reset
186 - "axi_m_sticky" AXI Master Sticky reset
187 - "pipe_sticky" PIPE sticky reset
188 - "pwr" PWR reset
189 - "ahb" AHB reset
190
170- power-domains: 191- power-domains:
171 Usage: required for apq8084 and msm8996/apq8096 192 Usage: required for apq8084 and msm8996/apq8096
172 Value type: <prop-encoded-array> 193 Value type: <prop-encoded-array>
@@ -195,12 +216,12 @@
195 Definition: A phandle to the PCIe endpoint power supply 216 Definition: A phandle to the PCIe endpoint power supply
196 217
197- phys: 218- phys:
198 Usage: required for apq8084 219 Usage: required for apq8084 and qcs404
199 Value type: <phandle> 220 Value type: <phandle>
200 Definition: List of phandle(s) as listed in phy-names property 221 Definition: List of phandle(s) as listed in phy-names property
201 222
202- phy-names: 223- phy-names:
203 Usage: required for apq8084 224 Usage: required for apq8084 and qcs404
204 Value type: <stringlist> 225 Value type: <stringlist>
205 Definition: Should contain "pciephy" 226 Definition: Should contain "pciephy"
206 227
diff --git a/Documentation/devicetree/bindings/pci/rcar-pci.txt b/Documentation/devicetree/bindings/pci/rcar-pci.txt
index 6904882a0e94..45bba9f88a51 100644
--- a/Documentation/devicetree/bindings/pci/rcar-pci.txt
+++ b/Documentation/devicetree/bindings/pci/rcar-pci.txt
@@ -3,6 +3,7 @@
3Required properties: 3Required properties:
4compatible: "renesas,pcie-r8a7743" for the R8A7743 SoC; 4compatible: "renesas,pcie-r8a7743" for the R8A7743 SoC;
5 "renesas,pcie-r8a7744" for the R8A7744 SoC; 5 "renesas,pcie-r8a7744" for the R8A7744 SoC;
6 "renesas,pcie-r8a774a1" for the R8A774A1 SoC;
6 "renesas,pcie-r8a774c0" for the R8A774C0 SoC; 7 "renesas,pcie-r8a774c0" for the R8A774C0 SoC;
7 "renesas,pcie-r8a7779" for the R8A7779 SoC; 8 "renesas,pcie-r8a7779" for the R8A7779 SoC;
8 "renesas,pcie-r8a7790" for the R8A7790 SoC; 9 "renesas,pcie-r8a7790" for the R8A7790 SoC;
diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst
index 30835683616a..f66c7b9126ea 100644
--- a/Documentation/driver-api/pm/devices.rst
+++ b/Documentation/driver-api/pm/devices.rst
@@ -225,7 +225,7 @@ system-wide transition to a sleep state even though its :c:member:`runtime_auto`
225flag is clear. 225flag is clear.
226 226
227For more information about the runtime power management framework, refer to 227For more information about the runtime power management framework, refer to
228:file:`Documentation/power/runtime_pm.txt`. 228:file:`Documentation/power/runtime_pm.rst`.
229 229
230 230
231Calling Drivers to Enter and Leave System Sleep States 231Calling Drivers to Enter and Leave System Sleep States
@@ -728,7 +728,7 @@ it into account in any way.
728 728
729Devices may be defined as IRQ-safe which indicates to the PM core that their 729Devices may be defined as IRQ-safe which indicates to the PM core that their
730runtime PM callbacks may be invoked with disabled interrupts (see 730runtime PM callbacks may be invoked with disabled interrupts (see
731:file:`Documentation/power/runtime_pm.txt` for more information). If an 731:file:`Documentation/power/runtime_pm.rst` for more information). If an
732IRQ-safe device belongs to a PM domain, the runtime PM of the domain will be 732IRQ-safe device belongs to a PM domain, the runtime PM of the domain will be
733disallowed, unless the domain itself is defined as IRQ-safe. However, it 733disallowed, unless the domain itself is defined as IRQ-safe. However, it
734makes sense to define a PM domain as IRQ-safe only if all the devices in it 734makes sense to define a PM domain as IRQ-safe only if all the devices in it
@@ -795,7 +795,7 @@ so on) and the final state of the device must reflect the "active" runtime PM
795status in that case. 795status in that case.
796 796
797During system-wide resume from a sleep state it's easiest to put devices into 797During system-wide resume from a sleep state it's easiest to put devices into
798the full-power state, as explained in :file:`Documentation/power/runtime_pm.txt`. 798the full-power state, as explained in :file:`Documentation/power/runtime_pm.rst`.
799[Refer to that document for more information regarding this particular issue as 799[Refer to that document for more information regarding this particular issue as
800well as for information on the device runtime power management framework in 800well as for information on the device runtime power management framework in
801general.] 801general.]
diff --git a/Documentation/driver-api/usb/power-management.rst b/Documentation/driver-api/usb/power-management.rst
index 4a74cf6f2797..2525c3622cae 100644
--- a/Documentation/driver-api/usb/power-management.rst
+++ b/Documentation/driver-api/usb/power-management.rst
@@ -46,7 +46,7 @@ device is turned off while the system as a whole remains running, we
46call it a "dynamic suspend" (also known as a "runtime suspend" or 46call it a "dynamic suspend" (also known as a "runtime suspend" or
47"selective suspend"). This document concentrates mostly on how 47"selective suspend"). This document concentrates mostly on how
48dynamic PM is implemented in the USB subsystem, although system PM is 48dynamic PM is implemented in the USB subsystem, although system PM is
49covered to some extent (see ``Documentation/power/*.txt`` for more 49covered to some extent (see ``Documentation/power/*.rst`` for more
50information about system PM). 50information about system PM).
51 51
52System PM support is present only if the kernel was built with 52System PM support is present only if the kernel was built with
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 71a77feb779b..00a0fe4241a4 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -103,6 +103,7 @@ needed).
103 vm/index 103 vm/index
104 bpf/index 104 bpf/index
105 usb/index 105 usb/index
106 PCI/index
106 misc-devices/index 107 misc-devices/index
107 108
108Architecture-specific documentation 109Architecture-specific documentation
diff --git a/Documentation/power/apm-acpi.txt b/Documentation/power/apm-acpi.rst
index 6cc423d3662e..5b90d947126d 100644
--- a/Documentation/power/apm-acpi.txt
+++ b/Documentation/power/apm-acpi.rst
@@ -1,5 +1,7 @@
1============
1APM or ACPI? 2APM or ACPI?
2------------ 3============
4
3If you have a relatively recent x86 mobile, desktop, or server system, 5If you have a relatively recent x86 mobile, desktop, or server system,
4odds are it supports either Advanced Power Management (APM) or 6odds are it supports either Advanced Power Management (APM) or
5Advanced Configuration and Power Interface (ACPI). ACPI is the newer 7Advanced Configuration and Power Interface (ACPI). ACPI is the newer
@@ -28,5 +30,7 @@ and be sure that they are started sometime in the system boot process.
28Go ahead and start both. If ACPI or APM is not available on your 30Go ahead and start both. If ACPI or APM is not available on your
29system the associated daemon will exit gracefully. 31system the associated daemon will exit gracefully.
30 32
31 apmd: http://ftp.debian.org/pool/main/a/apmd/ 33 ===== =======================================
32 acpid: http://acpid.sf.net/ 34 apmd http://ftp.debian.org/pool/main/a/apmd/
35 acpid http://acpid.sf.net/
36 ===== =======================================
diff --git a/Documentation/power/basic-pm-debugging.txt b/Documentation/power/basic-pm-debugging.rst
index 708f87f78a75..69862e759c30 100644
--- a/Documentation/power/basic-pm-debugging.txt
+++ b/Documentation/power/basic-pm-debugging.rst
@@ -1,12 +1,16 @@
1=================================
1Debugging hibernation and suspend 2Debugging hibernation and suspend
3=================================
4
2 (C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL 5 (C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
3 6
41. Testing hibernation (aka suspend to disk or STD) 71. Testing hibernation (aka suspend to disk or STD)
8===================================================
5 9
6To check if hibernation works, you can try to hibernate in the "reboot" mode: 10To check if hibernation works, you can try to hibernate in the "reboot" mode::
7 11
8# echo reboot > /sys/power/disk 12 # echo reboot > /sys/power/disk
9# echo disk > /sys/power/state 13 # echo disk > /sys/power/state
10 14
11and the system should create a hibernation image, reboot, resume and get back to 15and the system should create a hibernation image, reboot, resume and get back to
12the command prompt where you have started the transition. If that happens, 16the command prompt where you have started the transition. If that happens,
@@ -15,20 +19,21 @@ test at least a couple of times in a row for confidence. [This is necessary,
15because some problems only show up on a second attempt at suspending and 19because some problems only show up on a second attempt at suspending and
16resuming the system.] Moreover, hibernating in the "reboot" and "shutdown" 20resuming the system.] Moreover, hibernating in the "reboot" and "shutdown"
17modes causes the PM core to skip some platform-related callbacks which on ACPI 21modes causes the PM core to skip some platform-related callbacks which on ACPI
18systems might be necessary to make hibernation work. Thus, if your machine fails 22systems might be necessary to make hibernation work. Thus, if your machine
19to hibernate or resume in the "reboot" mode, you should try the "platform" mode: 23fails to hibernate or resume in the "reboot" mode, you should try the
24"platform" mode::
20 25
21# echo platform > /sys/power/disk 26 # echo platform > /sys/power/disk
22# echo disk > /sys/power/state 27 # echo disk > /sys/power/state
23 28
24which is the default and recommended mode of hibernation. 29which is the default and recommended mode of hibernation.
25 30
26Unfortunately, the "platform" mode of hibernation does not work on some systems 31Unfortunately, the "platform" mode of hibernation does not work on some systems
27with broken BIOSes. In such cases the "shutdown" mode of hibernation might 32with broken BIOSes. In such cases the "shutdown" mode of hibernation might
28work: 33work::
29 34
30# echo shutdown > /sys/power/disk 35 # echo shutdown > /sys/power/disk
31# echo disk > /sys/power/state 36 # echo disk > /sys/power/state
32 37
33(it is similar to the "reboot" mode, but it requires you to press the power 38(it is similar to the "reboot" mode, but it requires you to press the power
34button to make the system resume). 39button to make the system resume).
@@ -37,6 +42,7 @@ If neither "platform" nor "shutdown" hibernation mode works, you will need to
37identify what goes wrong. 42identify what goes wrong.
38 43
39a) Test modes of hibernation 44a) Test modes of hibernation
45----------------------------
40 46
41To find out why hibernation fails on your system, you can use a special testing 47To find out why hibernation fails on your system, you can use a special testing
42facility available if the kernel is compiled with CONFIG_PM_DEBUG set. Then, 48facility available if the kernel is compiled with CONFIG_PM_DEBUG set. Then,
@@ -44,36 +50,38 @@ there is the file /sys/power/pm_test that can be used to make the hibernation
44core run in a test mode. There are 5 test modes available: 50core run in a test mode. There are 5 test modes available:
45 51
46freezer 52freezer
47- test the freezing of processes 53 - test the freezing of processes
48 54
49devices 55devices
50- test the freezing of processes and suspending of devices 56 - test the freezing of processes and suspending of devices
51 57
52platform 58platform
53- test the freezing of processes, suspending of devices and platform 59 - test the freezing of processes, suspending of devices and platform
54 global control methods(*) 60 global control methods [1]_
55 61
56processors 62processors
57- test the freezing of processes, suspending of devices, platform 63 - test the freezing of processes, suspending of devices, platform
58 global control methods(*) and the disabling of nonboot CPUs 64 global control methods [1]_ and the disabling of nonboot CPUs
59 65
60core 66core
61- test the freezing of processes, suspending of devices, platform global 67 - test the freezing of processes, suspending of devices, platform global
62 control methods(*), the disabling of nonboot CPUs and suspending of 68 control methods\ [1]_, the disabling of nonboot CPUs and suspending
63 platform/system devices 69 of platform/system devices
70
71.. [1]
64 72
65(*) the platform global control methods are only available on ACPI systems 73 the platform global control methods are only available on ACPI systems
66 and are only tested if the hibernation mode is set to "platform" 74 and are only tested if the hibernation mode is set to "platform"
67 75
68To use one of them it is necessary to write the corresponding string to 76To use one of them it is necessary to write the corresponding string to
69/sys/power/pm_test (eg. "devices" to test the freezing of processes and 77/sys/power/pm_test (eg. "devices" to test the freezing of processes and
70suspending devices) and issue the standard hibernation commands. For example, 78suspending devices) and issue the standard hibernation commands. For example,
71to use the "devices" test mode along with the "platform" mode of hibernation, 79to use the "devices" test mode along with the "platform" mode of hibernation,
72you should do the following: 80you should do the following::
73 81
74# echo devices > /sys/power/pm_test 82 # echo devices > /sys/power/pm_test
75# echo platform > /sys/power/disk 83 # echo platform > /sys/power/disk
76# echo disk > /sys/power/state 84 # echo disk > /sys/power/state
77 85
78Then, the kernel will try to freeze processes, suspend devices, wait a few 86Then, the kernel will try to freeze processes, suspend devices, wait a few
79seconds (5 by default, but configurable by the suspend.pm_test_delay module 87seconds (5 by default, but configurable by the suspend.pm_test_delay module
@@ -108,11 +116,12 @@ If the "devices" test fails, most likely there is a driver that cannot suspend
108or resume its device (in the latter case the system may hang or become unstable 116or resume its device (in the latter case the system may hang or become unstable
109after the test, so please take that into consideration). To find this driver, 117after the test, so please take that into consideration). To find this driver,
110you can carry out a binary search according to the rules: 118you can carry out a binary search according to the rules:
119
111- if the test fails, unload a half of the drivers currently loaded and repeat 120- if the test fails, unload a half of the drivers currently loaded and repeat
112(that would probably involve rebooting the system, so always note what drivers 121 (that would probably involve rebooting the system, so always note what drivers
113have been loaded before the test), 122 have been loaded before the test),
114- if the test succeeds, load a half of the drivers you have unloaded most 123- if the test succeeds, load a half of the drivers you have unloaded most
115recently and repeat. 124 recently and repeat.
116 125
117Once you have found the failing driver (there can be more than just one of 126Once you have found the failing driver (there can be more than just one of
118them), you have to unload it every time before hibernation. In that case please 127them), you have to unload it every time before hibernation. In that case please
@@ -146,6 +155,7 @@ indicates a serious problem that very well may be related to the hardware, but
146please report it anyway. 155please report it anyway.
147 156
148b) Testing minimal configuration 157b) Testing minimal configuration
158--------------------------------
149 159
150If all of the hibernation test modes work, you can boot the system with the 160If all of the hibernation test modes work, you can boot the system with the
151"init=/bin/bash" command line parameter and attempt to hibernate in the 161"init=/bin/bash" command line parameter and attempt to hibernate in the
@@ -165,14 +175,15 @@ Again, if you find the offending module(s), it(they) must be unloaded every time
165before hibernation, and please report the problem with it(them). 175before hibernation, and please report the problem with it(them).
166 176
167c) Using the "test_resume" hibernation option 177c) Using the "test_resume" hibernation option
178---------------------------------------------
168 179
169/sys/power/disk generally tells the kernel what to do after creating a 180/sys/power/disk generally tells the kernel what to do after creating a
170hibernation image. One of the available options is "test_resume" which 181hibernation image. One of the available options is "test_resume" which
171causes the just created image to be used for immediate restoration. Namely, 182causes the just created image to be used for immediate restoration. Namely,
172after doing: 183after doing::
173 184
174# echo test_resume > /sys/power/disk 185 # echo test_resume > /sys/power/disk
175# echo disk > /sys/power/state 186 # echo disk > /sys/power/state
176 187
177a hibernation image will be created and a resume from it will be triggered 188a hibernation image will be created and a resume from it will be triggered
178immediately without involving the platform firmware in any way. 189immediately without involving the platform firmware in any way.
@@ -190,6 +201,7 @@ to resume may be related to the differences between the restore and image
190kernels. 201kernels.
191 202
192d) Advanced debugging 203d) Advanced debugging
204---------------------
193 205
194In case that hibernation does not work on your system even in the minimal 206In case that hibernation does not work on your system even in the minimal
195configuration and compiling more drivers as modules is not practical or some 207configuration and compiling more drivers as modules is not practical or some
@@ -200,9 +212,10 @@ kernel messages using the serial console. This may provide you with some
200information about the reasons of the suspend (resume) failure. Alternatively, 212information about the reasons of the suspend (resume) failure. Alternatively,
201it may be possible to use a FireWire port for debugging with firescope 213it may be possible to use a FireWire port for debugging with firescope
202(http://v3.sk/~lkundrak/firescope/). On x86 it is also possible to 214(http://v3.sk/~lkundrak/firescope/). On x86 it is also possible to
203use the PM_TRACE mechanism documented in Documentation/power/s2ram.txt . 215use the PM_TRACE mechanism documented in Documentation/power/s2ram.rst .
204 216
2052. Testing suspend to RAM (STR) 2172. Testing suspend to RAM (STR)
218===============================
206 219
207To verify that the STR works, it is generally more convenient to use the s2ram 220To verify that the STR works, it is generally more convenient to use the s2ram
208tool available from http://suspend.sf.net and documented at 221tool available from http://suspend.sf.net and documented at
@@ -230,7 +243,8 @@ you will have to unload them every time before an STR transition (ie. before
230you run s2ram), and please report the problems with them. 243you run s2ram), and please report the problems with them.
231 244
232There is a debugfs entry which shows the suspend to RAM statistics. Here is an 245There is a debugfs entry which shows the suspend to RAM statistics. Here is an
233example of its output. 246example of its output::
247
234 # mount -t debugfs none /sys/kernel/debug 248 # mount -t debugfs none /sys/kernel/debug
235 # cat /sys/kernel/debug/suspend_stats 249 # cat /sys/kernel/debug/suspend_stats
236 success: 20 250 success: 20
@@ -248,6 +262,7 @@ example of its output.
248 -16 262 -16
249 last_failed_step: suspend 263 last_failed_step: suspend
250 suspend 264 suspend
265
251Field success means the success number of suspend to RAM, and field fail means 266Field success means the success number of suspend to RAM, and field fail means
252the failure number. Others are the failure number of different steps of suspend 267the failure number. Others are the failure number of different steps of suspend
253to RAM. suspend_stats just lists the last 2 failed devices, error number and 268to RAM. suspend_stats just lists the last 2 failed devices, error number and
diff --git a/Documentation/power/charger-manager.txt b/Documentation/power/charger-manager.rst
index 9ff1105e58d6..84fab9376792 100644
--- a/Documentation/power/charger-manager.txt
+++ b/Documentation/power/charger-manager.rst
@@ -1,4 +1,7 @@
1===============
1Charger Manager 2Charger Manager
3===============
4
2 (C) 2011 MyungJoo Ham <myungjoo.ham@samsung.com>, GPL 5 (C) 2011 MyungJoo Ham <myungjoo.ham@samsung.com>, GPL
3 6
4Charger Manager provides in-kernel battery charger management that 7Charger Manager provides in-kernel battery charger management that
@@ -55,41 +58,39 @@ Charger Manager supports the following:
55 notification to users with UEVENT. 58 notification to users with UEVENT.
56 59
572. Global Charger-Manager Data related with suspend_again 602. Global Charger-Manager Data related with suspend_again
58======================================================== 61=========================================================
59In order to setup Charger Manager with suspend-again feature 62In order to setup Charger Manager with suspend-again feature
60(in-suspend monitoring), the user should provide charger_global_desc 63(in-suspend monitoring), the user should provide charger_global_desc
61with setup_charger_manager(struct charger_global_desc *). 64with setup_charger_manager(`struct charger_global_desc *`).
62This charger_global_desc data for in-suspend monitoring is global 65This charger_global_desc data for in-suspend monitoring is global
63as the name suggests. Thus, the user needs to provide only once even 66as the name suggests. Thus, the user needs to provide only once even
64if there are multiple batteries. If there are multiple batteries, the 67if there are multiple batteries. If there are multiple batteries, the
65multiple instances of Charger Manager share the same charger_global_desc 68multiple instances of Charger Manager share the same charger_global_desc
66and it will manage in-suspend monitoring for all instances of Charger Manager. 69and it will manage in-suspend monitoring for all instances of Charger Manager.
67 70
68The user needs to provide all the three entries properly in order to activate 71The user needs to provide all the three entries to `struct charger_global_desc`
69in-suspend monitoring: 72properly in order to activate in-suspend monitoring:
70
71struct charger_global_desc {
72 73
73char *rtc_name; 74`char *rtc_name;`
74 : The name of rtc (e.g., "rtc0") used to wakeup the system from 75 The name of rtc (e.g., "rtc0") used to wakeup the system from
75 suspend for Charger Manager. The alarm interrupt (AIE) of the rtc 76 suspend for Charger Manager. The alarm interrupt (AIE) of the rtc
76 should be able to wake up the system from suspend. Charger Manager 77 should be able to wake up the system from suspend. Charger Manager
77 saves and restores the alarm value and use the previously-defined 78 saves and restores the alarm value and use the previously-defined
78 alarm if it is going to go off earlier than Charger Manager so that 79 alarm if it is going to go off earlier than Charger Manager so that
79 Charger Manager does not interfere with previously-defined alarms. 80 Charger Manager does not interfere with previously-defined alarms.
80 81
81bool (*rtc_only_wakeup)(void); 82`bool (*rtc_only_wakeup)(void);`
82 : This callback should let CM know whether 83 This callback should let CM know whether
83 the wakeup-from-suspend is caused only by the alarm of "rtc" in the 84 the wakeup-from-suspend is caused only by the alarm of "rtc" in the
84 same struct. If there is any other wakeup source triggered the 85 same struct. If there is any other wakeup source triggered the
85 wakeup, it should return false. If the "rtc" is the only wakeup 86 wakeup, it should return false. If the "rtc" is the only wakeup
86 reason, it should return true. 87 reason, it should return true.
87 88
88bool assume_timer_stops_in_suspend; 89`bool assume_timer_stops_in_suspend;`
89 : if true, Charger Manager assumes that 90 if true, Charger Manager assumes that
90 the timer (CM uses jiffies as timer) stops during suspend. Then, CM 91 the timer (CM uses jiffies as timer) stops during suspend. Then, CM
91 assumes that the suspend-duration is same as the alarm length. 92 assumes that the suspend-duration is same as the alarm length.
92}; 93
93 94
943. How to setup suspend_again 953. How to setup suspend_again
95============================= 96=============================
@@ -109,26 +110,28 @@ if the system was woken up by Charger Manager and the polling
109============================================= 110=============================================
110For each battery charged independently from other batteries (if a series of 111For each battery charged independently from other batteries (if a series of
111batteries are charged by a single charger, they are counted as one independent 112batteries are charged by a single charger, they are counted as one independent
112battery), an instance of Charger Manager is attached to it. 113battery), an instance of Charger Manager is attached to it. The following
113 114
114struct charger_desc { 115struct charger_desc elements:
115 116
116char *psy_name; 117`char *psy_name;`
117 : The power-supply-class name of the battery. Default is 118 The power-supply-class name of the battery. Default is
118 "battery" if psy_name is NULL. Users can access the psy entries 119 "battery" if psy_name is NULL. Users can access the psy entries
119 at "/sys/class/power_supply/[psy_name]/". 120 at "/sys/class/power_supply/[psy_name]/".
120 121
121enum polling_modes polling_mode; 122`enum polling_modes polling_mode;`
122 : CM_POLL_DISABLE: do not poll this battery. 123 CM_POLL_DISABLE:
123 CM_POLL_ALWAYS: always poll this battery. 124 do not poll this battery.
124 CM_POLL_EXTERNAL_POWER_ONLY: poll this battery if and only if 125 CM_POLL_ALWAYS:
125 an external power source is attached. 126 always poll this battery.
126 CM_POLL_CHARGING_ONLY: poll this battery if and only if the 127 CM_POLL_EXTERNAL_POWER_ONLY:
127 battery is being charged. 128 poll this battery if and only if an external power
128 129 source is attached.
129unsigned int fullbatt_vchkdrop_ms; 130 CM_POLL_CHARGING_ONLY:
130unsigned int fullbatt_vchkdrop_uV; 131 poll this battery if and only if the battery is being charged.
131 : If both have non-zero values, Charger Manager will check the 132
133`unsigned int fullbatt_vchkdrop_ms; / unsigned int fullbatt_vchkdrop_uV;`
134 If both have non-zero values, Charger Manager will check the
132 battery voltage drop fullbatt_vchkdrop_ms after the battery is fully 135 battery voltage drop fullbatt_vchkdrop_ms after the battery is fully
133 charged. If the voltage drop is over fullbatt_vchkdrop_uV, Charger 136 charged. If the voltage drop is over fullbatt_vchkdrop_uV, Charger
134 Manager will try to recharge the battery by disabling and enabling 137 Manager will try to recharge the battery by disabling and enabling
@@ -136,50 +139,52 @@ unsigned int fullbatt_vchkdrop_uV;
136 condition) is needed to be implemented with hardware interrupts from 139 condition) is needed to be implemented with hardware interrupts from
137 fuel gauges or charger devices/chips. 140 fuel gauges or charger devices/chips.
138 141
139unsigned int fullbatt_uV; 142`unsigned int fullbatt_uV;`
140 : If specified with a non-zero value, Charger Manager assumes 143 If specified with a non-zero value, Charger Manager assumes
141 that the battery is full (capacity = 100) if the battery is not being 144 that the battery is full (capacity = 100) if the battery is not being
142 charged and the battery voltage is equal to or greater than 145 charged and the battery voltage is equal to or greater than
143 fullbatt_uV. 146 fullbatt_uV.
144 147
145unsigned int polling_interval_ms; 148`unsigned int polling_interval_ms;`
146 : Required polling interval in ms. Charger Manager will poll 149 Required polling interval in ms. Charger Manager will poll
147 this battery every polling_interval_ms or more frequently. 150 this battery every polling_interval_ms or more frequently.
148 151
149enum data_source battery_present; 152`enum data_source battery_present;`
150 : CM_BATTERY_PRESENT: assume that the battery exists. 153 CM_BATTERY_PRESENT:
151 CM_NO_BATTERY: assume that the battery does not exists. 154 assume that the battery exists.
152 CM_FUEL_GAUGE: get battery presence information from fuel gauge. 155 CM_NO_BATTERY:
153 CM_CHARGER_STAT: get battery presence from chargers. 156 assume that the battery does not exists.
154 157 CM_FUEL_GAUGE:
155char **psy_charger_stat; 158 get battery presence information from fuel gauge.
156 : An array ending with NULL that has power-supply-class names of 159 CM_CHARGER_STAT:
160 get battery presence from chargers.
161
162`char **psy_charger_stat;`
163 An array ending with NULL that has power-supply-class names of
157 chargers. Each power-supply-class should provide "PRESENT" (if 164 chargers. Each power-supply-class should provide "PRESENT" (if
158 battery_present is "CM_CHARGER_STAT"), "ONLINE" (shows whether an 165 battery_present is "CM_CHARGER_STAT"), "ONLINE" (shows whether an
159 external power source is attached or not), and "STATUS" (shows whether 166 external power source is attached or not), and "STATUS" (shows whether
160 the battery is {"FULL" or not FULL} or {"FULL", "Charging", 167 the battery is {"FULL" or not FULL} or {"FULL", "Charging",
161 "Discharging", "NotCharging"}). 168 "Discharging", "NotCharging"}).
162 169
163int num_charger_regulators; 170`int num_charger_regulators; / struct regulator_bulk_data *charger_regulators;`
164struct regulator_bulk_data *charger_regulators; 171 Regulators representing the chargers in the form for
165 : Regulators representing the chargers in the form for
166 regulator framework's bulk functions. 172 regulator framework's bulk functions.
167 173
168char *psy_fuel_gauge; 174`char *psy_fuel_gauge;`
169 : Power-supply-class name of the fuel gauge. 175 Power-supply-class name of the fuel gauge.
170 176
171int (*temperature_out_of_range)(int *mC); 177`int (*temperature_out_of_range)(int *mC); / bool measure_battery_temp;`
172bool measure_battery_temp; 178 This callback returns 0 if the temperature is safe for charging,
173 : This callback returns 0 if the temperature is safe for charging,
174 a positive number if it is too hot to charge, and a negative number 179 a positive number if it is too hot to charge, and a negative number
175 if it is too cold to charge. With the variable mC, the callback returns 180 if it is too cold to charge. With the variable mC, the callback returns
176 the temperature in 1/1000 of centigrade. 181 the temperature in 1/1000 of centigrade.
177 The source of temperature can be battery or ambient one according to 182 The source of temperature can be battery or ambient one according to
178 the value of measure_battery_temp. 183 the value of measure_battery_temp.
179}; 184
180 185
1815. Notify Charger-Manager of charger events: cm_notify_event() 1865. Notify Charger-Manager of charger events: cm_notify_event()
182========================================================= 187==============================================================
183If there is an charger event is required to notify 188If there is an charger event is required to notify
184Charger Manager, a charger device driver that triggers the event can call 189Charger Manager, a charger device driver that triggers the event can call
185cm_notify_event(psy, type, msg) to notify the corresponding Charger Manager. 190cm_notify_event(psy, type, msg) to notify the corresponding Charger Manager.
diff --git a/Documentation/power/drivers-testing.txt b/Documentation/power/drivers-testing.rst
index 638afdf4d6b8..e53f1999fc39 100644
--- a/Documentation/power/drivers-testing.txt
+++ b/Documentation/power/drivers-testing.rst
@@ -1,7 +1,11 @@
1====================================================
1Testing suspend and resume support in device drivers 2Testing suspend and resume support in device drivers
3====================================================
4
2 (C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL 5 (C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
3 6
41. Preparing the test system 71. Preparing the test system
8============================
5 9
6Unfortunately, to effectively test the support for the system-wide suspend and 10Unfortunately, to effectively test the support for the system-wide suspend and
7resume transitions in a driver, it is necessary to suspend and resume a fully 11resume transitions in a driver, it is necessary to suspend and resume a fully
@@ -14,19 +18,20 @@ the machine's BIOS.
14Of course, for this purpose the test system has to be known to suspend and 18Of course, for this purpose the test system has to be known to suspend and
15resume without the driver being tested. Thus, if possible, you should first 19resume without the driver being tested. Thus, if possible, you should first
16resolve all suspend/resume-related problems in the test system before you start 20resolve all suspend/resume-related problems in the test system before you start
17testing the new driver. Please see Documentation/power/basic-pm-debugging.txt 21testing the new driver. Please see Documentation/power/basic-pm-debugging.rst
18for more information about the debugging of suspend/resume functionality. 22for more information about the debugging of suspend/resume functionality.
19 23
202. Testing the driver 242. Testing the driver
25=====================
21 26
22Once you have resolved the suspend/resume-related problems with your test system 27Once you have resolved the suspend/resume-related problems with your test system
23without the new driver, you are ready to test it: 28without the new driver, you are ready to test it:
24 29
25a) Build the driver as a module, load it and try the test modes of hibernation 30a) Build the driver as a module, load it and try the test modes of hibernation
26 (see: Documentation/power/basic-pm-debugging.txt, 1). 31 (see: Documentation/power/basic-pm-debugging.rst, 1).
27 32
28b) Load the driver and attempt to hibernate in the "reboot", "shutdown" and 33b) Load the driver and attempt to hibernate in the "reboot", "shutdown" and
29 "platform" modes (see: Documentation/power/basic-pm-debugging.txt, 1). 34 "platform" modes (see: Documentation/power/basic-pm-debugging.rst, 1).
30 35
31c) Compile the driver directly into the kernel and try the test modes of 36c) Compile the driver directly into the kernel and try the test modes of
32 hibernation. 37 hibernation.
@@ -34,12 +39,12 @@ c) Compile the driver directly into the kernel and try the test modes of
34d) Attempt to hibernate with the driver compiled directly into the kernel 39d) Attempt to hibernate with the driver compiled directly into the kernel
35 in the "reboot", "shutdown" and "platform" modes. 40 in the "reboot", "shutdown" and "platform" modes.
36 41
37e) Try the test modes of suspend (see: Documentation/power/basic-pm-debugging.txt, 42e) Try the test modes of suspend (see: Documentation/power/basic-pm-debugging.rst,
38 2). [As far as the STR tests are concerned, it should not matter whether or 43 2). [As far as the STR tests are concerned, it should not matter whether or
39 not the driver is built as a module.] 44 not the driver is built as a module.]
40 45
41f) Attempt to suspend to RAM using the s2ram tool with the driver loaded 46f) Attempt to suspend to RAM using the s2ram tool with the driver loaded
42 (see: Documentation/power/basic-pm-debugging.txt, 2). 47 (see: Documentation/power/basic-pm-debugging.rst, 2).
43 48
44Each of the above tests should be repeated several times and the STD tests 49Each of the above tests should be repeated several times and the STD tests
45should be mixed with the STR tests. If any of them fails, the driver cannot be 50should be mixed with the STR tests. If any of them fails, the driver cannot be
diff --git a/Documentation/power/energy-model.txt b/Documentation/power/energy-model.rst
index a2b0ae4c76bd..90a345d57ae9 100644
--- a/Documentation/power/energy-model.txt
+++ b/Documentation/power/energy-model.rst
@@ -1,6 +1,6 @@
1 ==================== 1====================
2 Energy Model of CPUs 2Energy Model of CPUs
3 ==================== 3====================
4 4
51. Overview 51. Overview
6----------- 6-----------
@@ -20,7 +20,7 @@ kernel, hence enabling to avoid redundant work.
20 20
21The figure below depicts an example of drivers (Arm-specific here, but the 21The figure below depicts an example of drivers (Arm-specific here, but the
22approach is applicable to any architecture) providing power costs to the EM 22approach is applicable to any architecture) providing power costs to the EM
23framework, and interested clients reading the data from it. 23framework, and interested clients reading the data from it::
24 24
25 +---------------+ +-----------------+ +---------------+ 25 +---------------+ +-----------------+ +---------------+
26 | Thermal (IPA) | | Scheduler (EAS) | | Other | 26 | Thermal (IPA) | | Scheduler (EAS) | | Other |
@@ -58,15 +58,17 @@ micro-architectures.
582. Core APIs 582. Core APIs
59------------ 59------------
60 60
61 2.1 Config options 612.1 Config options
62^^^^^^^^^^^^^^^^^^
62 63
63CONFIG_ENERGY_MODEL must be enabled to use the EM framework. 64CONFIG_ENERGY_MODEL must be enabled to use the EM framework.
64 65
65 66
66 2.2 Registration of performance domains 672.2 Registration of performance domains
68^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
67 69
68Drivers are expected to register performance domains into the EM framework by 70Drivers are expected to register performance domains into the EM framework by
69calling the following API: 71calling the following API::
70 72
71 int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, 73 int em_register_perf_domain(cpumask_t *span, unsigned int nr_states,
72 struct em_data_callback *cb); 74 struct em_data_callback *cb);
@@ -80,7 +82,8 @@ callback, and kernel/power/energy_model.c for further documentation on this
80API. 82API.
81 83
82 84
83 2.3 Accessing performance domains 852.3 Accessing performance domains
86^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
84 87
85Subsystems interested in the energy model of a CPU can retrieve it using the 88Subsystems interested in the energy model of a CPU can retrieve it using the
86em_cpu_get() API. The energy model tables are allocated once upon creation of 89em_cpu_get() API. The energy model tables are allocated once upon creation of
@@ -99,46 +102,46 @@ More details about the above APIs can be found in include/linux/energy_model.h.
99This section provides a simple example of a CPUFreq driver registering a 102This section provides a simple example of a CPUFreq driver registering a
100performance domain in the Energy Model framework using the (fake) 'foo' 103performance domain in the Energy Model framework using the (fake) 'foo'
101protocol. The driver implements an est_power() function to be provided to the 104protocol. The driver implements an est_power() function to be provided to the
102EM framework. 105EM framework::
103 106
104 -> drivers/cpufreq/foo_cpufreq.c 107 -> drivers/cpufreq/foo_cpufreq.c
105 108
10601 static int est_power(unsigned long *mW, unsigned long *KHz, int cpu) 109 01 static int est_power(unsigned long *mW, unsigned long *KHz, int cpu)
10702 { 110 02 {
10803 long freq, power; 111 03 long freq, power;
10904 112 04
11005 /* Use the 'foo' protocol to ceil the frequency */ 113 05 /* Use the 'foo' protocol to ceil the frequency */
11106 freq = foo_get_freq_ceil(cpu, *KHz); 114 06 freq = foo_get_freq_ceil(cpu, *KHz);
11207 if (freq < 0); 115 07 if (freq < 0);
11308 return freq; 116 08 return freq;
11409 117 09
11510 /* Estimate the power cost for the CPU at the relevant freq. */ 118 10 /* Estimate the power cost for the CPU at the relevant freq. */
11611 power = foo_estimate_power(cpu, freq); 119 11 power = foo_estimate_power(cpu, freq);
11712 if (power < 0); 120 12 if (power < 0);
11813 return power; 121 13 return power;
11914 122 14
12015 /* Return the values to the EM framework */ 123 15 /* Return the values to the EM framework */
12116 *mW = power; 124 16 *mW = power;
12217 *KHz = freq; 125 17 *KHz = freq;
12318 126 18
12419 return 0; 127 19 return 0;
12520 } 128 20 }
12621 129 21
12722 static int foo_cpufreq_init(struct cpufreq_policy *policy) 130 22 static int foo_cpufreq_init(struct cpufreq_policy *policy)
12823 { 131 23 {
12924 struct em_data_callback em_cb = EM_DATA_CB(est_power); 132 24 struct em_data_callback em_cb = EM_DATA_CB(est_power);
13025 int nr_opp, ret; 133 25 int nr_opp, ret;
13126 134 26
13227 /* Do the actual CPUFreq init work ... */ 135 27 /* Do the actual CPUFreq init work ... */
13328 ret = do_foo_cpufreq_init(policy); 136 28 ret = do_foo_cpufreq_init(policy);
13429 if (ret) 137 29 if (ret)
13530 return ret; 138 30 return ret;
13631 139 31
13732 /* Find the number of OPPs for this policy */ 140 32 /* Find the number of OPPs for this policy */
13833 nr_opp = foo_get_nr_opp(policy); 141 33 nr_opp = foo_get_nr_opp(policy);
13934 142 34
14035 /* And register the new performance domain */ 143 35 /* And register the new performance domain */
14136 em_register_perf_domain(policy->cpus, nr_opp, &em_cb); 144 36 em_register_perf_domain(policy->cpus, nr_opp, &em_cb);
14237 145 37
14338 return 0; 146 38 return 0;
14439 } 147 39 }
diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.rst
index cd283190855a..ef110fe55e82 100644
--- a/Documentation/power/freezing-of-tasks.txt
+++ b/Documentation/power/freezing-of-tasks.rst
@@ -1,13 +1,18 @@
1=================
1Freezing of tasks 2Freezing of tasks
2 (C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL 3=================
4
5(C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
3 6
4I. What is the freezing of tasks? 7I. What is the freezing of tasks?
8=================================
5 9
6The freezing of tasks is a mechanism by which user space processes and some 10The freezing of tasks is a mechanism by which user space processes and some
7kernel threads are controlled during hibernation or system-wide suspend (on some 11kernel threads are controlled during hibernation or system-wide suspend (on some
8architectures). 12architectures).
9 13
10II. How does it work? 14II. How does it work?
15=====================
11 16
12There are three per-task flags used for that, PF_NOFREEZE, PF_FROZEN 17There are three per-task flags used for that, PF_NOFREEZE, PF_FROZEN
13and PF_FREEZER_SKIP (the last one is auxiliary). The tasks that have 18and PF_FREEZER_SKIP (the last one is auxiliary). The tasks that have
@@ -41,7 +46,7 @@ explicitly in suitable places or use the wait_event_freezable() or
41wait_event_freezable_timeout() macros (defined in include/linux/freezer.h) 46wait_event_freezable_timeout() macros (defined in include/linux/freezer.h)
42that combine interruptible sleep with checking if the task is to be frozen and 47that combine interruptible sleep with checking if the task is to be frozen and
43calling try_to_freeze(). The main loop of a freezable kernel thread may look 48calling try_to_freeze(). The main loop of a freezable kernel thread may look
44like the following one: 49like the following one::
45 50
46 set_freezable(); 51 set_freezable();
47 do { 52 do {
@@ -65,7 +70,7 @@ order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that
65have been frozen leave __refrigerator() and continue running. 70have been frozen leave __refrigerator() and continue running.
66 71
67 72
68Rationale behind the functions dealing with freezing and thawing of tasks: 73Rationale behind the functions dealing with freezing and thawing of tasks
69------------------------------------------------------------------------- 74-------------------------------------------------------------------------
70 75
71freeze_processes(): 76freeze_processes():
@@ -86,6 +91,7 @@ thaw_processes():
86 91
87 92
88III. Which kernel threads are freezable? 93III. Which kernel threads are freezable?
94========================================
89 95
90Kernel threads are not freezable by default. However, a kernel thread may clear 96Kernel threads are not freezable by default. However, a kernel thread may clear
91PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE 97PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE
@@ -93,37 +99,39 @@ directly is not allowed). From this point it is regarded as freezable
93and must call try_to_freeze() in a suitable place. 99and must call try_to_freeze() in a suitable place.
94 100
95IV. Why do we do that? 101IV. Why do we do that?
102======================
96 103
97Generally speaking, there is a couple of reasons to use the freezing of tasks: 104Generally speaking, there is a couple of reasons to use the freezing of tasks:
98 105
991. The principal reason is to prevent filesystems from being damaged after 1061. The principal reason is to prevent filesystems from being damaged after
100hibernation. At the moment we have no simple means of checkpointing 107 hibernation. At the moment we have no simple means of checkpointing
101filesystems, so if there are any modifications made to filesystem data and/or 108 filesystems, so if there are any modifications made to filesystem data and/or
102metadata on disks, we cannot bring them back to the state from before the 109 metadata on disks, we cannot bring them back to the state from before the
103modifications. At the same time each hibernation image contains some 110 modifications. At the same time each hibernation image contains some
104filesystem-related information that must be consistent with the state of the 111 filesystem-related information that must be consistent with the state of the
105on-disk data and metadata after the system memory state has been restored from 112 on-disk data and metadata after the system memory state has been restored
106the image (otherwise the filesystems will be damaged in a nasty way, usually 113 from the image (otherwise the filesystems will be damaged in a nasty way,
107making them almost impossible to repair). We therefore freeze tasks that might 114 usually making them almost impossible to repair). We therefore freeze
108cause the on-disk filesystems' data and metadata to be modified after the 115 tasks that might cause the on-disk filesystems' data and metadata to be
109hibernation image has been created and before the system is finally powered off. 116 modified after the hibernation image has been created and before the
110The majority of these are user space processes, but if any of the kernel threads 117 system is finally powered off. The majority of these are user space
111may cause something like this to happen, they have to be freezable. 118 processes, but if any of the kernel threads may cause something like this
119 to happen, they have to be freezable.
112 120
1132. Next, to create the hibernation image we need to free a sufficient amount of 1212. Next, to create the hibernation image we need to free a sufficient amount of
114memory (approximately 50% of available RAM) and we need to do that before 122 memory (approximately 50% of available RAM) and we need to do that before
115devices are deactivated, because we generally need them for swapping out. Then, 123 devices are deactivated, because we generally need them for swapping out.
116after the memory for the image has been freed, we don't want tasks to allocate 124 Then, after the memory for the image has been freed, we don't want tasks
117additional memory and we prevent them from doing that by freezing them earlier. 125 to allocate additional memory and we prevent them from doing that by
118[Of course, this also means that device drivers should not allocate substantial 126 freezing them earlier. [Of course, this also means that device drivers
119amounts of memory from their .suspend() callbacks before hibernation, but this 127 should not allocate substantial amounts of memory from their .suspend()
120is a separate issue.] 128 callbacks before hibernation, but this is a separate issue.]
121 129
1223. The third reason is to prevent user space processes and some kernel threads 1303. The third reason is to prevent user space processes and some kernel threads
123from interfering with the suspending and resuming of devices. A user space 131 from interfering with the suspending and resuming of devices. A user space
124process running on a second CPU while we are suspending devices may, for 132 process running on a second CPU while we are suspending devices may, for
125example, be troublesome and without the freezing of tasks we would need some 133 example, be troublesome and without the freezing of tasks we would need some
126safeguards against race conditions that might occur in such a case. 134 safeguards against race conditions that might occur in such a case.
127 135
128Although Linus Torvalds doesn't like the freezing of tasks, he said this in one 136Although Linus Torvalds doesn't like the freezing of tasks, he said this in one
129of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608): 137of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608):
@@ -132,7 +140,7 @@ of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608):
132 140
133Linus: In many ways, 'at all'. 141Linus: In many ways, 'at all'.
134 142
135I _do_ realize the IO request queue issues, and that we cannot actually do 143I **do** realize the IO request queue issues, and that we cannot actually do
136s2ram with some devices in the middle of a DMA. So we want to be able to 144s2ram with some devices in the middle of a DMA. So we want to be able to
137avoid *that*, there's no question about that. And I suspect that stopping 145avoid *that*, there's no question about that. And I suspect that stopping
138user threads and then waiting for a sync is practically one of the easier 146user threads and then waiting for a sync is practically one of the easier
@@ -150,17 +158,18 @@ thawed after the driver's .resume() callback has run, so it won't be accessing
150the device while it's suspended. 158the device while it's suspended.
151 159
1524. Another reason for freezing tasks is to prevent user space processes from 1604. Another reason for freezing tasks is to prevent user space processes from
153realizing that hibernation (or suspend) operation takes place. Ideally, user 161 realizing that hibernation (or suspend) operation takes place. Ideally, user
154space processes should not notice that such a system-wide operation has occurred 162 space processes should not notice that such a system-wide operation has
155and should continue running without any problems after the restore (or resume 163 occurred and should continue running without any problems after the restore
156from suspend). Unfortunately, in the most general case this is quite difficult 164 (or resume from suspend). Unfortunately, in the most general case this
157to achieve without the freezing of tasks. Consider, for example, a process 165 is quite difficult to achieve without the freezing of tasks. Consider,
158that depends on all CPUs being online while it's running. Since we need to 166 for example, a process that depends on all CPUs being online while it's
159disable nonboot CPUs during the hibernation, if this process is not frozen, it 167 running. Since we need to disable nonboot CPUs during the hibernation,
160may notice that the number of CPUs has changed and may start to work incorrectly 168 if this process is not frozen, it may notice that the number of CPUs has
161because of that. 169 changed and may start to work incorrectly because of that.
162 170
163V. Are there any problems related to the freezing of tasks? 171V. Are there any problems related to the freezing of tasks?
172===========================================================
164 173
165Yes, there are. 174Yes, there are.
166 175
@@ -172,11 +181,12 @@ may be undesirable. That's why kernel threads are not freezable by default.
172 181
173Second, there are the following two problems related to the freezing of user 182Second, there are the following two problems related to the freezing of user
174space processes: 183space processes:
184
1751. Putting processes into an uninterruptible sleep distorts the load average. 1851. Putting processes into an uninterruptible sleep distorts the load average.
1762. Now that we have FUSE, plus the framework for doing device drivers in 1862. Now that we have FUSE, plus the framework for doing device drivers in
177userspace, it gets even more complicated because some userspace processes are 187 userspace, it gets even more complicated because some userspace processes are
178now doing the sorts of things that kernel threads do 188 now doing the sorts of things that kernel threads do
179(https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012309.html). 189 (https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012309.html).
180 190
181The problem 1. seems to be fixable, although it hasn't been fixed so far. The 191The problem 1. seems to be fixable, although it hasn't been fixed so far. The
182other one is more serious, but it seems that we can work around it by using 192other one is more serious, but it seems that we can work around it by using
@@ -201,6 +211,7 @@ requested early enough using the suspend notifier API described in
201Documentation/driver-api/pm/notifiers.rst. 211Documentation/driver-api/pm/notifiers.rst.
202 212
203VI. Are there any precautions to be taken to prevent freezing failures? 213VI. Are there any precautions to be taken to prevent freezing failures?
214=======================================================================
204 215
205Yes, there are. 216Yes, there are.
206 217
@@ -226,6 +237,8 @@ So, to summarize, use [un]lock_system_sleep() instead of directly using
226mutex_[un]lock(&system_transition_mutex). That would prevent freezing failures. 237mutex_[un]lock(&system_transition_mutex). That would prevent freezing failures.
227 238
228V. Miscellaneous 239V. Miscellaneous
240================
241
229/sys/power/pm_freeze_timeout controls how long it will cost at most to freeze 242/sys/power/pm_freeze_timeout controls how long it will cost at most to freeze
230all user space processes or all freezable kernel threads, in unit of millisecond. 243all user space processes or all freezable kernel threads, in unit of millisecond.
231The default value is 20000, with range of unsigned integer. 244The default value is 20000, with range of unsigned integer.
diff --git a/Documentation/power/index.rst b/Documentation/power/index.rst
new file mode 100644
index 000000000000..20415f21e48a
--- /dev/null
+++ b/Documentation/power/index.rst
@@ -0,0 +1,46 @@
1:orphan:
2
3================
4Power Management
5================
6
7.. toctree::
8 :maxdepth: 1
9
10 apm-acpi
11 basic-pm-debugging
12 charger-manager
13 drivers-testing
14 energy-model
15 freezing-of-tasks
16 interface
17 opp
18 pci
19 pm_qos_interface
20 power_supply_class
21 runtime_pm
22 s2ram
23 suspend-and-cpuhotplug
24 suspend-and-interrupts
25 swsusp-and-swap-files
26 swsusp-dmcrypt
27 swsusp
28 video
29 tricks
30
31 userland-swsusp
32
33 powercap/powercap
34
35 regulator/consumer
36 regulator/design
37 regulator/machine
38 regulator/overview
39 regulator/regulator
40
41.. only:: subproject and html
42
43 Indices
44 =======
45
46 * :ref:`genindex`
diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.rst
index 27df7f98668a..8d270ed27228 100644
--- a/Documentation/power/interface.txt
+++ b/Documentation/power/interface.rst
@@ -1,4 +1,6 @@
1===========================================
1Power Management Interface for System Sleep 2Power Management Interface for System Sleep
3===========================================
2 4
3Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com> 5Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
4 6
@@ -11,10 +13,10 @@ mounted at /sys).
11 13
12Reading from it returns a list of supported sleep states, encoded as: 14Reading from it returns a list of supported sleep states, encoded as:
13 15
14'freeze' (Suspend-to-Idle) 16- 'freeze' (Suspend-to-Idle)
15'standby' (Power-On Suspend) 17- 'standby' (Power-On Suspend)
16'mem' (Suspend-to-RAM) 18- 'mem' (Suspend-to-RAM)
17'disk' (Suspend-to-Disk) 19- 'disk' (Suspend-to-Disk)
18 20
19Suspend-to-Idle is always supported. Suspend-to-Disk is always supported 21Suspend-to-Idle is always supported. Suspend-to-Disk is always supported
20too as long the kernel has been configured to support hibernation at all 22too as long the kernel has been configured to support hibernation at all
@@ -32,18 +34,18 @@ Specifically, it tells the kernel what to do after creating a hibernation image.
32 34
33Reading from it returns a list of supported options encoded as: 35Reading from it returns a list of supported options encoded as:
34 36
35'platform' (put the system into sleep using a platform-provided method) 37- 'platform' (put the system into sleep using a platform-provided method)
36'shutdown' (shut the system down) 38- 'shutdown' (shut the system down)
37'reboot' (reboot the system) 39- 'reboot' (reboot the system)
38'suspend' (trigger a Suspend-to-RAM transition) 40- 'suspend' (trigger a Suspend-to-RAM transition)
39'test_resume' (resume-after-hibernation test mode) 41- 'test_resume' (resume-after-hibernation test mode)
40 42
41The currently selected option is printed in square brackets. 43The currently selected option is printed in square brackets.
42 44
43The 'platform' option is only available if the platform provides a special 45The 'platform' option is only available if the platform provides a special
44mechanism to put the system to sleep after creating a hibernation image (ACPI 46mechanism to put the system to sleep after creating a hibernation image (ACPI
45does that, for example). The 'suspend' option is available if Suspend-to-RAM 47does that, for example). The 'suspend' option is available if Suspend-to-RAM
46is supported. Refer to Documentation/power/basic-pm-debugging.txt for the 48is supported. Refer to Documentation/power/basic-pm-debugging.rst for the
47description of the 'test_resume' option. 49description of the 'test_resume' option.
48 50
49To select an option, write the string representing it to /sys/power/disk. 51To select an option, write the string representing it to /sys/power/disk.
@@ -71,7 +73,7 @@ If /sys/power/pm_trace contains '1', the fingerprint of each suspend/resume
71event point in turn will be stored in the RTC memory (overwriting the actual 73event point in turn will be stored in the RTC memory (overwriting the actual
72RTC information), so it will survive a system crash if one occurs right after 74RTC information), so it will survive a system crash if one occurs right after
73storing it and it can be used later to identify the driver that caused the crash 75storing it and it can be used later to identify the driver that caused the crash
74to happen (see Documentation/power/s2ram.txt for more information). 76to happen (see Documentation/power/s2ram.rst for more information).
75 77
76Initially it contains '0' which may be changed to '1' by writing a string 78Initially it contains '0' which may be changed to '1' by writing a string
77representing a nonzero integer into it. 79representing a nonzero integer into it.
diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.rst
index 0c007e250cd1..b3cf1def9dee 100644
--- a/Documentation/power/opp.txt
+++ b/Documentation/power/opp.rst
@@ -1,20 +1,23 @@
1==========================================
1Operating Performance Points (OPP) Library 2Operating Performance Points (OPP) Library
2========================================== 3==========================================
3 4
4(C) 2009-2010 Nishanth Menon <nm@ti.com>, Texas Instruments Incorporated 5(C) 2009-2010 Nishanth Menon <nm@ti.com>, Texas Instruments Incorporated
5 6
6Contents 7.. Contents
7-------- 8
81. Introduction 9 1. Introduction
92. Initial OPP List Registration 10 2. Initial OPP List Registration
103. OPP Search Functions 11 3. OPP Search Functions
114. OPP Availability Control Functions 12 4. OPP Availability Control Functions
125. OPP Data Retrieval Functions 13 5. OPP Data Retrieval Functions
136. Data Structures 14 6. Data Structures
14 15
151. Introduction 161. Introduction
16=============== 17===============
18
171.1 What is an Operating Performance Point (OPP)? 191.1 What is an Operating Performance Point (OPP)?
20-------------------------------------------------
18 21
19Complex SoCs of today consists of a multiple sub-modules working in conjunction. 22Complex SoCs of today consists of a multiple sub-modules working in conjunction.
20In an operational system executing varied use cases, not all modules in the SoC 23In an operational system executing varied use cases, not all modules in the SoC
@@ -28,16 +31,19 @@ the device will support per domain are called Operating Performance Points or
28OPPs. 31OPPs.
29 32
30As an example: 33As an example:
34
31Let us consider an MPU device which supports the following: 35Let us consider an MPU device which supports the following:
32{300MHz at minimum voltage of 1V}, {800MHz at minimum voltage of 1.2V}, 36{300MHz at minimum voltage of 1V}, {800MHz at minimum voltage of 1.2V},
33{1GHz at minimum voltage of 1.3V} 37{1GHz at minimum voltage of 1.3V}
34 38
35We can represent these as three OPPs as the following {Hz, uV} tuples: 39We can represent these as three OPPs as the following {Hz, uV} tuples:
36{300000000, 1000000} 40
37{800000000, 1200000} 41- {300000000, 1000000}
38{1000000000, 1300000} 42- {800000000, 1200000}
43- {1000000000, 1300000}
39 44
401.2 Operating Performance Points Library 451.2 Operating Performance Points Library
46----------------------------------------
41 47
42OPP library provides a set of helper functions to organize and query the OPP 48OPP library provides a set of helper functions to organize and query the OPP
43information. The library is located in drivers/base/power/opp.c and the header 49information. The library is located in drivers/base/power/opp.c and the header
@@ -46,9 +52,10 @@ CONFIG_PM_OPP from power management menuconfig menu. OPP library depends on
46CONFIG_PM as certain SoCs such as Texas Instrument's OMAP framework allows to 52CONFIG_PM as certain SoCs such as Texas Instrument's OMAP framework allows to
47optionally boot at a certain OPP without needing cpufreq. 53optionally boot at a certain OPP without needing cpufreq.
48 54
49Typical usage of the OPP library is as follows: 55Typical usage of the OPP library is as follows::
50(users) -> registers a set of default OPPs -> (library) 56
51SoC framework -> modifies on required cases certain OPPs -> OPP layer 57 (users) -> registers a set of default OPPs -> (library)
58 SoC framework -> modifies on required cases certain OPPs -> OPP layer
52 -> queries to search/retrieve information -> 59 -> queries to search/retrieve information ->
53 60
54OPP layer expects each domain to be represented by a unique device pointer. SoC 61OPP layer expects each domain to be represented by a unique device pointer. SoC
@@ -57,8 +64,9 @@ list is expected to be an optimally small number typically around 5 per device.
57This initial list contains a set of OPPs that the framework expects to be safely 64This initial list contains a set of OPPs that the framework expects to be safely
58enabled by default in the system. 65enabled by default in the system.
59 66
60Note on OPP Availability: 67Note on OPP Availability
61------------------------ 68^^^^^^^^^^^^^^^^^^^^^^^^
69
62As the system proceeds to operate, SoC framework may choose to make certain 70As the system proceeds to operate, SoC framework may choose to make certain
63OPPs available or not available on each device based on various external 71OPPs available or not available on each device based on various external
64factors. Example usage: Thermal management or other exceptional situations where 72factors. Example usage: Thermal management or other exceptional situations where
@@ -88,7 +96,8 @@ registering the OPPs is maintained by OPP library throughout the device
88operation. The SoC framework can subsequently control the availability of the 96operation. The SoC framework can subsequently control the availability of the
89OPPs dynamically using the dev_pm_opp_enable / disable functions. 97OPPs dynamically using the dev_pm_opp_enable / disable functions.
90 98
91dev_pm_opp_add - Add a new OPP for a specific domain represented by the device pointer. 99dev_pm_opp_add
100 Add a new OPP for a specific domain represented by the device pointer.
92 The OPP is defined using the frequency and voltage. Once added, the OPP 101 The OPP is defined using the frequency and voltage. Once added, the OPP
93 is assumed to be available and control of it's availability can be done 102 is assumed to be available and control of it's availability can be done
94 with the dev_pm_opp_enable/disable functions. OPP library internally stores 103 with the dev_pm_opp_enable/disable functions. OPP library internally stores
@@ -96,9 +105,11 @@ dev_pm_opp_add - Add a new OPP for a specific domain represented by the device p
96 used by SoC framework to define a optimal list as per the demands of 105 used by SoC framework to define a optimal list as per the demands of
97 SoC usage environment. 106 SoC usage environment.
98 107
99 WARNING: Do not use this function in interrupt context. 108 WARNING:
109 Do not use this function in interrupt context.
110
111 Example::
100 112
101 Example:
102 soc_pm_init() 113 soc_pm_init()
103 { 114 {
104 /* Do things */ 115 /* Do things */
@@ -125,12 +136,15 @@ Callers of these functions shall call dev_pm_opp_put() after they have used the
125OPP. Otherwise the memory for the OPP will never get freed and result in 136OPP. Otherwise the memory for the OPP will never get freed and result in
126memleak. 137memleak.
127 138
128dev_pm_opp_find_freq_exact - Search for an OPP based on an *exact* frequency and 139dev_pm_opp_find_freq_exact
140 Search for an OPP based on an *exact* frequency and
129 availability. This function is especially useful to enable an OPP which 141 availability. This function is especially useful to enable an OPP which
130 is not available by default. 142 is not available by default.
131 Example: In a case when SoC framework detects a situation where a 143 Example: In a case when SoC framework detects a situation where a
132 higher frequency could be made available, it can use this function to 144 higher frequency could be made available, it can use this function to
133 find the OPP prior to call the dev_pm_opp_enable to actually make it available. 145 find the OPP prior to call the dev_pm_opp_enable to actually make
146 it available::
147
134 opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false); 148 opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
135 dev_pm_opp_put(opp); 149 dev_pm_opp_put(opp);
136 /* dont operate on the pointer.. just do a sanity check.. */ 150 /* dont operate on the pointer.. just do a sanity check.. */
@@ -141,27 +155,34 @@ dev_pm_opp_find_freq_exact - Search for an OPP based on an *exact* frequency and
141 dev_pm_opp_enable(dev,1000000000); 155 dev_pm_opp_enable(dev,1000000000);
142 } 156 }
143 157
144 NOTE: This is the only search function that operates on OPPs which are 158 NOTE:
145 not available. 159 This is the only search function that operates on OPPs which are
160 not available.
146 161
147dev_pm_opp_find_freq_floor - Search for an available OPP which is *at most* the 162dev_pm_opp_find_freq_floor
163 Search for an available OPP which is *at most* the
148 provided frequency. This function is useful while searching for a lesser 164 provided frequency. This function is useful while searching for a lesser
149 match OR operating on OPP information in the order of decreasing 165 match OR operating on OPP information in the order of decreasing
150 frequency. 166 frequency.
151 Example: To find the highest opp for a device: 167 Example: To find the highest opp for a device::
168
152 freq = ULONG_MAX; 169 freq = ULONG_MAX;
153 opp = dev_pm_opp_find_freq_floor(dev, &freq); 170 opp = dev_pm_opp_find_freq_floor(dev, &freq);
154 dev_pm_opp_put(opp); 171 dev_pm_opp_put(opp);
155 172
156dev_pm_opp_find_freq_ceil - Search for an available OPP which is *at least* the 173dev_pm_opp_find_freq_ceil
174 Search for an available OPP which is *at least* the
157 provided frequency. This function is useful while searching for a 175 provided frequency. This function is useful while searching for a
158 higher match OR operating on OPP information in the order of increasing 176 higher match OR operating on OPP information in the order of increasing
159 frequency. 177 frequency.
160 Example 1: To find the lowest opp for a device: 178 Example 1: To find the lowest opp for a device::
179
161 freq = 0; 180 freq = 0;
162 opp = dev_pm_opp_find_freq_ceil(dev, &freq); 181 opp = dev_pm_opp_find_freq_ceil(dev, &freq);
163 dev_pm_opp_put(opp); 182 dev_pm_opp_put(opp);
164 Example 2: A simplified implementation of a SoC cpufreq_driver->target: 183
184 Example 2: A simplified implementation of a SoC cpufreq_driver->target::
185
165 soc_cpufreq_target(..) 186 soc_cpufreq_target(..)
166 { 187 {
167 /* Do stuff like policy checks etc. */ 188 /* Do stuff like policy checks etc. */
@@ -184,12 +205,15 @@ fine grained dynamic control of which sets of OPPs are operationally available.
184These functions are intended to *temporarily* remove an OPP in conditions such 205These functions are intended to *temporarily* remove an OPP in conditions such
185as thermal considerations (e.g. don't use OPPx until the temperature drops). 206as thermal considerations (e.g. don't use OPPx until the temperature drops).
186 207
187WARNING: Do not use these functions in interrupt context. 208WARNING:
209 Do not use these functions in interrupt context.
188 210
189dev_pm_opp_enable - Make a OPP available for operation. 211dev_pm_opp_enable
212 Make a OPP available for operation.
190 Example: Lets say that 1GHz OPP is to be made available only if the 213 Example: Lets say that 1GHz OPP is to be made available only if the
191 SoC temperature is lower than a certain threshold. The SoC framework 214 SoC temperature is lower than a certain threshold. The SoC framework
192 implementation might choose to do something as follows: 215 implementation might choose to do something as follows::
216
193 if (cur_temp < temp_low_thresh) { 217 if (cur_temp < temp_low_thresh) {
194 /* Enable 1GHz if it was disabled */ 218 /* Enable 1GHz if it was disabled */
195 opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false); 219 opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
@@ -201,10 +225,12 @@ dev_pm_opp_enable - Make a OPP available for operation.
201 goto try_something_else; 225 goto try_something_else;
202 } 226 }
203 227
204dev_pm_opp_disable - Make an OPP to be not available for operation 228dev_pm_opp_disable
229 Make an OPP to be not available for operation
205 Example: Lets say that 1GHz OPP is to be disabled if the temperature 230 Example: Lets say that 1GHz OPP is to be disabled if the temperature
206 exceeds a threshold value. The SoC framework implementation might 231 exceeds a threshold value. The SoC framework implementation might
207 choose to do something as follows: 232 choose to do something as follows::
233
208 if (cur_temp > temp_high_thresh) { 234 if (cur_temp > temp_high_thresh) {
209 /* Disable 1GHz if it was enabled */ 235 /* Disable 1GHz if it was enabled */
210 opp = dev_pm_opp_find_freq_exact(dev, 1000000000, true); 236 opp = dev_pm_opp_find_freq_exact(dev, 1000000000, true);
@@ -223,11 +249,13 @@ information from the OPP structure is necessary. Once an OPP pointer is
223retrieved using the search functions, the following functions can be used by SoC 249retrieved using the search functions, the following functions can be used by SoC
224framework to retrieve the information represented inside the OPP layer. 250framework to retrieve the information represented inside the OPP layer.
225 251
226dev_pm_opp_get_voltage - Retrieve the voltage represented by the opp pointer. 252dev_pm_opp_get_voltage
253 Retrieve the voltage represented by the opp pointer.
227 Example: At a cpufreq transition to a different frequency, SoC 254 Example: At a cpufreq transition to a different frequency, SoC
228 framework requires to set the voltage represented by the OPP using 255 framework requires to set the voltage represented by the OPP using
229 the regulator framework to the Power Management chip providing the 256 the regulator framework to the Power Management chip providing the
230 voltage. 257 voltage::
258
231 soc_switch_to_freq_voltage(freq) 259 soc_switch_to_freq_voltage(freq)
232 { 260 {
233 /* do things */ 261 /* do things */
@@ -239,10 +267,12 @@ dev_pm_opp_get_voltage - Retrieve the voltage represented by the opp pointer.
239 /* do other things */ 267 /* do other things */
240 } 268 }
241 269
242dev_pm_opp_get_freq - Retrieve the freq represented by the opp pointer. 270dev_pm_opp_get_freq
271 Retrieve the freq represented by the opp pointer.
243 Example: Lets say the SoC framework uses a couple of helper functions 272 Example: Lets say the SoC framework uses a couple of helper functions
244 we could pass opp pointers instead of doing additional parameters to 273 we could pass opp pointers instead of doing additional parameters to
245 handle quiet a bit of data parameters. 274 handle quiet a bit of data parameters::
275
246 soc_cpufreq_target(..) 276 soc_cpufreq_target(..)
247 { 277 {
248 /* do things.. */ 278 /* do things.. */
@@ -264,9 +294,11 @@ dev_pm_opp_get_freq - Retrieve the freq represented by the opp pointer.
264 /* do things.. */ 294 /* do things.. */
265 } 295 }
266 296
267dev_pm_opp_get_opp_count - Retrieve the number of available opps for a device 297dev_pm_opp_get_opp_count
298 Retrieve the number of available opps for a device
268 Example: Lets say a co-processor in the SoC needs to know the available 299 Example: Lets say a co-processor in the SoC needs to know the available
269 frequencies in a table, the main processor can notify as following: 300 frequencies in a table, the main processor can notify as following::
301
270 soc_notify_coproc_available_frequencies() 302 soc_notify_coproc_available_frequencies()
271 { 303 {
272 /* Do things */ 304 /* Do things */
@@ -289,54 +321,59 @@ dev_pm_opp_get_opp_count - Retrieve the number of available opps for a device
289================== 321==================
290Typically an SoC contains multiple voltage domains which are variable. Each 322Typically an SoC contains multiple voltage domains which are variable. Each
291domain is represented by a device pointer. The relationship to OPP can be 323domain is represented by a device pointer. The relationship to OPP can be
292represented as follows: 324represented as follows::
293SoC 325
294 |- device 1 326 SoC
295 | |- opp 1 (availability, freq, voltage) 327 |- device 1
296 | |- opp 2 .. 328 | |- opp 1 (availability, freq, voltage)
297 ... ... 329 | |- opp 2 ..
298 | `- opp n .. 330 ... ...
299 |- device 2 331 | `- opp n ..
300 ... 332 |- device 2
301 `- device m 333 ...
334 `- device m
302 335
303OPP library maintains a internal list that the SoC framework populates and 336OPP library maintains a internal list that the SoC framework populates and
304accessed by various functions as described above. However, the structures 337accessed by various functions as described above. However, the structures
305representing the actual OPPs and domains are internal to the OPP library itself 338representing the actual OPPs and domains are internal to the OPP library itself
306to allow for suitable abstraction reusable across systems. 339to allow for suitable abstraction reusable across systems.
307 340
308struct dev_pm_opp - The internal data structure of OPP library which is used to 341struct dev_pm_opp
342 The internal data structure of OPP library which is used to
309 represent an OPP. In addition to the freq, voltage, availability 343 represent an OPP. In addition to the freq, voltage, availability
310 information, it also contains internal book keeping information required 344 information, it also contains internal book keeping information required
311 for the OPP library to operate on. Pointer to this structure is 345 for the OPP library to operate on. Pointer to this structure is
312 provided back to the users such as SoC framework to be used as a 346 provided back to the users such as SoC framework to be used as a
313 identifier for OPP in the interactions with OPP layer. 347 identifier for OPP in the interactions with OPP layer.
314 348
315 WARNING: The struct dev_pm_opp pointer should not be parsed or modified by the 349 WARNING:
316 users. The defaults of for an instance is populated by dev_pm_opp_add, but the 350 The struct dev_pm_opp pointer should not be parsed or modified by the
317 availability of the OPP can be modified by dev_pm_opp_enable/disable functions. 351 users. The defaults of for an instance is populated by
352 dev_pm_opp_add, but the availability of the OPP can be modified
353 by dev_pm_opp_enable/disable functions.
318 354
319struct device - This is used to identify a domain to the OPP layer. The 355struct device
356 This is used to identify a domain to the OPP layer. The
320 nature of the device and it's implementation is left to the user of 357 nature of the device and it's implementation is left to the user of
321 OPP library such as the SoC framework. 358 OPP library such as the SoC framework.
322 359
323Overall, in a simplistic view, the data structure operations is represented as 360Overall, in a simplistic view, the data structure operations is represented as
324following: 361following::
325 362
326Initialization / modification: 363 Initialization / modification:
327 +-----+ /- dev_pm_opp_enable 364 +-----+ /- dev_pm_opp_enable
328dev_pm_opp_add --> | opp | <------- 365 dev_pm_opp_add --> | opp | <-------
329 | +-----+ \- dev_pm_opp_disable 366 | +-----+ \- dev_pm_opp_disable
330 \-------> domain_info(device) 367 \-------> domain_info(device)
331 368
332Search functions: 369 Search functions:
333 /-- dev_pm_opp_find_freq_ceil ---\ +-----+ 370 /-- dev_pm_opp_find_freq_ceil ---\ +-----+
334domain_info<---- dev_pm_opp_find_freq_exact -----> | opp | 371 domain_info<---- dev_pm_opp_find_freq_exact -----> | opp |
335 \-- dev_pm_opp_find_freq_floor ---/ +-----+ 372 \-- dev_pm_opp_find_freq_floor ---/ +-----+
336 373
337Retrieval functions: 374 Retrieval functions:
338+-----+ /- dev_pm_opp_get_voltage 375 +-----+ /- dev_pm_opp_get_voltage
339| opp | <--- 376 | opp | <---
340+-----+ \- dev_pm_opp_get_freq 377 +-----+ \- dev_pm_opp_get_freq
341 378
342domain_info <- dev_pm_opp_get_opp_count 379 domain_info <- dev_pm_opp_get_opp_count
diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.rst
index 8eaf9ee24d43..0e2ef7429304 100644
--- a/Documentation/power/pci.txt
+++ b/Documentation/power/pci.rst
@@ -1,4 +1,6 @@
1====================
1PCI Power Management 2PCI Power Management
3====================
2 4
3Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. 5Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
4 6
@@ -9,14 +11,14 @@ management. Based on previous work by Patrick Mochel <mochel@transmeta.com>
9This document only covers the aspects of power management specific to PCI 11This document only covers the aspects of power management specific to PCI
10devices. For general description of the kernel's interfaces related to device 12devices. For general description of the kernel's interfaces related to device
11power management refer to Documentation/driver-api/pm/devices.rst and 13power management refer to Documentation/driver-api/pm/devices.rst and
12Documentation/power/runtime_pm.txt. 14Documentation/power/runtime_pm.rst.
13 15
14--------------------------------------------------------------------------- 16.. contents:
15 17
161. Hardware and Platform Support for PCI Power Management 18 1. Hardware and Platform Support for PCI Power Management
172. PCI Subsystem and Device Power Management 19 2. PCI Subsystem and Device Power Management
183. PCI Device Drivers and Power Management 20 3. PCI Device Drivers and Power Management
194. Resources 21 4. Resources
20 22
21 23
221. Hardware and Platform Support for PCI Power Management 241. Hardware and Platform Support for PCI Power Management
@@ -24,6 +26,7 @@ Documentation/power/runtime_pm.txt.
24 26
251.1. Native and Platform-Based Power Management 271.1. Native and Platform-Based Power Management
26----------------------------------------------- 28-----------------------------------------------
29
27In general, power management is a feature allowing one to save energy by putting 30In general, power management is a feature allowing one to save energy by putting
28devices into states in which they draw less power (low-power states) at the 31devices into states in which they draw less power (low-power states) at the
29price of reduced functionality or performance. 32price of reduced functionality or performance.
@@ -67,6 +70,7 @@ mechanisms have to be used simultaneously to obtain the desired result.
67 70
681.2. Native PCI Power Management 711.2. Native PCI Power Management
69-------------------------------- 72--------------------------------
73
70The PCI Bus Power Management Interface Specification (PCI PM Spec) was 74The PCI Bus Power Management Interface Specification (PCI PM Spec) was
71introduced between the PCI 2.1 and PCI 2.2 Specifications. It defined a 75introduced between the PCI 2.1 and PCI 2.2 Specifications. It defined a
72standard interface for performing various operations related to power 76standard interface for performing various operations related to power
@@ -134,6 +138,7 @@ sufficiently active to generate a wakeup signal.
134 138
1351.3. ACPI Device Power Management 1391.3. ACPI Device Power Management
136--------------------------------- 140---------------------------------
141
137The platform firmware support for the power management of PCI devices is 142The platform firmware support for the power management of PCI devices is
138system-specific. However, if the system in question is compliant with the 143system-specific. However, if the system in question is compliant with the
139Advanced Configuration and Power Interface (ACPI) Specification, like the 144Advanced Configuration and Power Interface (ACPI) Specification, like the
@@ -194,6 +199,7 @@ enabled for the device to be able to generate wakeup signals.
194 199
1951.4. Wakeup Signaling 2001.4. Wakeup Signaling
196--------------------- 201---------------------
202
197Wakeup signals generated by PCI devices, either as native PCI PMEs, or as 203Wakeup signals generated by PCI devices, either as native PCI PMEs, or as
198a result of the execution of the _DSW (or _PSW) ACPI control method before 204a result of the execution of the _DSW (or _PSW) ACPI control method before
199putting the device into a low-power state, have to be caught and handled as 205putting the device into a low-power state, have to be caught and handled as
@@ -265,14 +271,15 @@ the native PCI Express PME signaling cannot be used by the kernel in that case.
265 271
2662.1. Device Power Management Callbacks 2722.1. Device Power Management Callbacks
267-------------------------------------- 273--------------------------------------
274
268The PCI Subsystem participates in the power management of PCI devices in a 275The PCI Subsystem participates in the power management of PCI devices in a
269number of ways. First of all, it provides an intermediate code layer between 276number of ways. First of all, it provides an intermediate code layer between
270the device power management core (PM core) and PCI device drivers. 277the device power management core (PM core) and PCI device drivers.
271Specifically, the pm field of the PCI subsystem's struct bus_type object, 278Specifically, the pm field of the PCI subsystem's struct bus_type object,
272pci_bus_type, points to a struct dev_pm_ops object, pci_dev_pm_ops, containing 279pci_bus_type, points to a struct dev_pm_ops object, pci_dev_pm_ops, containing
273pointers to several device power management callbacks: 280pointers to several device power management callbacks::
274 281
275const struct dev_pm_ops pci_dev_pm_ops = { 282 const struct dev_pm_ops pci_dev_pm_ops = {
276 .prepare = pci_pm_prepare, 283 .prepare = pci_pm_prepare,
277 .complete = pci_pm_complete, 284 .complete = pci_pm_complete,
278 .suspend = pci_pm_suspend, 285 .suspend = pci_pm_suspend,
@@ -290,7 +297,7 @@ const struct dev_pm_ops pci_dev_pm_ops = {
290 .runtime_suspend = pci_pm_runtime_suspend, 297 .runtime_suspend = pci_pm_runtime_suspend,
291 .runtime_resume = pci_pm_runtime_resume, 298 .runtime_resume = pci_pm_runtime_resume,
292 .runtime_idle = pci_pm_runtime_idle, 299 .runtime_idle = pci_pm_runtime_idle,
293}; 300 };
294 301
295These callbacks are executed by the PM core in various situations related to 302These callbacks are executed by the PM core in various situations related to
296device power management and they, in turn, execute power management callbacks 303device power management and they, in turn, execute power management callbacks
@@ -299,9 +306,9 @@ involving some standard configuration registers of PCI devices that device
299drivers need not know or care about. 306drivers need not know or care about.
300 307
301The structure representing a PCI device, struct pci_dev, contains several fields 308The structure representing a PCI device, struct pci_dev, contains several fields
302that these callbacks operate on: 309that these callbacks operate on::
303 310
304struct pci_dev { 311 struct pci_dev {
305 ... 312 ...
306 pci_power_t current_state; /* Current operating state. */ 313 pci_power_t current_state; /* Current operating state. */
307 int pm_cap; /* PM capability offset in the 314 int pm_cap; /* PM capability offset in the
@@ -315,13 +322,14 @@ struct pci_dev {
315 unsigned int wakeup_prepared:1; /* Device prepared for wake up */ 322 unsigned int wakeup_prepared:1; /* Device prepared for wake up */
316 unsigned int d3_delay; /* D3->D0 transition time in ms */ 323 unsigned int d3_delay; /* D3->D0 transition time in ms */
317 ... 324 ...
318}; 325 };
319 326
320They also indirectly use some fields of the struct device that is embedded in 327They also indirectly use some fields of the struct device that is embedded in
321struct pci_dev. 328struct pci_dev.
322 329
3232.2. Device Initialization 3302.2. Device Initialization
324-------------------------- 331--------------------------
332
325The PCI subsystem's first task related to device power management is to 333The PCI subsystem's first task related to device power management is to
326prepare the device for power management and initialize the fields of struct 334prepare the device for power management and initialize the fields of struct
327pci_dev used for this purpose. This happens in two functions defined in 335pci_dev used for this purpose. This happens in two functions defined in
@@ -348,10 +356,11 @@ during system-wide transitions to a sleep state and back to the working state.
348 356
3492.3. Runtime Device Power Management 3572.3. Runtime Device Power Management
350------------------------------------ 358------------------------------------
359
351The PCI subsystem plays a vital role in the runtime power management of PCI 360The PCI subsystem plays a vital role in the runtime power management of PCI
352devices. For this purpose it uses the general runtime power management 361devices. For this purpose it uses the general runtime power management
353(runtime PM) framework described in Documentation/power/runtime_pm.txt. 362(runtime PM) framework described in Documentation/power/runtime_pm.rst.
354Namely, it provides subsystem-level callbacks: 363Namely, it provides subsystem-level callbacks::
355 364
356 pci_pm_runtime_suspend() 365 pci_pm_runtime_suspend()
357 pci_pm_runtime_resume() 366 pci_pm_runtime_resume()
@@ -425,13 +434,14 @@ to the given subsystem before the next phase begins. These phases always run
425after tasks have been frozen. 434after tasks have been frozen.
426 435
4272.4.1. System Suspend 4362.4.1. System Suspend
437^^^^^^^^^^^^^^^^^^^^^
428 438
429When the system is going into a sleep state in which the contents of memory will 439When the system is going into a sleep state in which the contents of memory will
430be preserved, such as one of the ACPI sleep states S1-S3, the phases are: 440be preserved, such as one of the ACPI sleep states S1-S3, the phases are:
431 441
432 prepare, suspend, suspend_noirq. 442 prepare, suspend, suspend_noirq.
433 443
434The following PCI bus type's callbacks, respectively, are used in these phases: 444The following PCI bus type's callbacks, respectively, are used in these phases::
435 445
436 pci_pm_prepare() 446 pci_pm_prepare()
437 pci_pm_suspend() 447 pci_pm_suspend()
@@ -492,6 +502,7 @@ this purpose). PCI device drivers are not encouraged to do that, but in some
492rare cases doing that in the driver may be the optimum approach. 502rare cases doing that in the driver may be the optimum approach.
493 503
4942.4.2. System Resume 5042.4.2. System Resume
505^^^^^^^^^^^^^^^^^^^^
495 506
496When the system is undergoing a transition from a sleep state in which the 507When the system is undergoing a transition from a sleep state in which the
497contents of memory have been preserved, such as one of the ACPI sleep states 508contents of memory have been preserved, such as one of the ACPI sleep states
@@ -500,7 +511,7 @@ S1-S3, into the working state (ACPI S0), the phases are:
500 resume_noirq, resume, complete. 511 resume_noirq, resume, complete.
501 512
502The following PCI bus type's callbacks, respectively, are executed in these 513The following PCI bus type's callbacks, respectively, are executed in these
503phases: 514phases::
504 515
505 pci_pm_resume_noirq() 516 pci_pm_resume_noirq()
506 pci_pm_resume() 517 pci_pm_resume()
@@ -539,6 +550,7 @@ The pci_pm_complete() routine only executes the device driver's pm->complete()
539callback, if defined. 550callback, if defined.
540 551
5412.4.3. System Hibernation 5522.4.3. System Hibernation
553^^^^^^^^^^^^^^^^^^^^^^^^^
542 554
543System hibernation is more complicated than system suspend, because it requires 555System hibernation is more complicated than system suspend, because it requires
544a system image to be created and written into a persistent storage medium. The 556a system image to be created and written into a persistent storage medium. The
@@ -551,7 +563,7 @@ to be free) in the following three phases:
551 563
552 prepare, freeze, freeze_noirq 564 prepare, freeze, freeze_noirq
553 565
554that correspond to the PCI bus type's callbacks: 566that correspond to the PCI bus type's callbacks::
555 567
556 pci_pm_prepare() 568 pci_pm_prepare()
557 pci_pm_freeze() 569 pci_pm_freeze()
@@ -580,7 +592,7 @@ back to the fully functional state and this is done in the following phases:
580 592
581 thaw_noirq, thaw, complete 593 thaw_noirq, thaw, complete
582 594
583using the following PCI bus type's callbacks: 595using the following PCI bus type's callbacks::
584 596
585 pci_pm_thaw_noirq() 597 pci_pm_thaw_noirq()
586 pci_pm_thaw() 598 pci_pm_thaw()
@@ -608,7 +620,7 @@ three phases:
608 620
609where the prepare phase is exactly the same as for system suspend. The other 621where the prepare phase is exactly the same as for system suspend. The other
610two phases are analogous to the suspend and suspend_noirq phases, respectively. 622two phases are analogous to the suspend and suspend_noirq phases, respectively.
611The PCI subsystem-level callbacks they correspond to 623The PCI subsystem-level callbacks they correspond to::
612 624
613 pci_pm_poweroff() 625 pci_pm_poweroff()
614 pci_pm_poweroff_noirq() 626 pci_pm_poweroff_noirq()
@@ -618,6 +630,7 @@ although they don't attempt to save the device's standard configuration
618registers. 630registers.
619 631
6202.4.4. System Restore 6322.4.4. System Restore
633^^^^^^^^^^^^^^^^^^^^^
621 634
622System restore requires a hibernation image to be loaded into memory and the 635System restore requires a hibernation image to be loaded into memory and the
623pre-hibernation memory contents to be restored before the pre-hibernation system 636pre-hibernation memory contents to be restored before the pre-hibernation system
@@ -653,7 +666,7 @@ phases:
653 666
654The first two of these are analogous to the resume_noirq and resume phases 667The first two of these are analogous to the resume_noirq and resume phases
655described above, respectively, and correspond to the following PCI subsystem 668described above, respectively, and correspond to the following PCI subsystem
656callbacks: 669callbacks::
657 670
658 pci_pm_restore_noirq() 671 pci_pm_restore_noirq()
659 pci_pm_restore() 672 pci_pm_restore()
@@ -671,6 +684,7 @@ resume.
671 684
6723.1. Power Management Callbacks 6853.1. Power Management Callbacks
673------------------------------- 686-------------------------------
687
674PCI device drivers participate in power management by providing callbacks to be 688PCI device drivers participate in power management by providing callbacks to be
675executed by the PCI subsystem's power management routines described above and by 689executed by the PCI subsystem's power management routines described above and by
676controlling the runtime power management of their devices. 690controlling the runtime power management of their devices.
@@ -698,6 +712,7 @@ defined, though, they are expected to behave as described in the following
698subsections. 712subsections.
699 713
7003.1.1. prepare() 7143.1.1. prepare()
715^^^^^^^^^^^^^^^^
701 716
702The prepare() callback is executed during system suspend, during hibernation 717The prepare() callback is executed during system suspend, during hibernation
703(when a hibernation image is about to be created), during power-off after 718(when a hibernation image is about to be created), during power-off after
@@ -716,6 +731,7 @@ preallocated earlier, for example in a suspend/hibernate notifier as described
716in Documentation/driver-api/pm/notifiers.rst). 731in Documentation/driver-api/pm/notifiers.rst).
717 732
7183.1.2. suspend() 7333.1.2. suspend()
734^^^^^^^^^^^^^^^^
719 735
720The suspend() callback is only executed during system suspend, after prepare() 736The suspend() callback is only executed during system suspend, after prepare()
721callbacks have been executed for all devices in the system. 737callbacks have been executed for all devices in the system.
@@ -742,6 +758,7 @@ operations relying on the driver's ability to handle interrupts should be
742carried out in this callback. 758carried out in this callback.
743 759
7443.1.3. suspend_noirq() 7603.1.3. suspend_noirq()
761^^^^^^^^^^^^^^^^^^^^^^
745 762
746The suspend_noirq() callback is only executed during system suspend, after 763The suspend_noirq() callback is only executed during system suspend, after
747suspend() callbacks have been executed for all devices in the system and 764suspend() callbacks have been executed for all devices in the system and
@@ -753,6 +770,7 @@ suspend_noirq() can carry out operations that would cause race conditions to
753arise if they were performed in suspend(). 770arise if they were performed in suspend().
754 771
7553.1.4. freeze() 7723.1.4. freeze()
773^^^^^^^^^^^^^^^
756 774
757The freeze() callback is hibernation-specific and is executed in two situations, 775The freeze() callback is hibernation-specific and is executed in two situations,
758during hibernation, after prepare() callbacks have been executed for all devices 776during hibernation, after prepare() callbacks have been executed for all devices
@@ -770,6 +788,7 @@ or put it into a low-power state. Still, either it or freeze_noirq() should
770save the device's standard configuration registers using pci_save_state(). 788save the device's standard configuration registers using pci_save_state().
771 789
7723.1.5. freeze_noirq() 7903.1.5. freeze_noirq()
791^^^^^^^^^^^^^^^^^^^^^
773 792
774The freeze_noirq() callback is hibernation-specific. It is executed during 793The freeze_noirq() callback is hibernation-specific. It is executed during
775hibernation, after prepare() and freeze() callbacks have been executed for all 794hibernation, after prepare() and freeze() callbacks have been executed for all
@@ -786,6 +805,7 @@ The difference between freeze_noirq() and freeze() is analogous to the
786difference between suspend_noirq() and suspend(). 805difference between suspend_noirq() and suspend().
787 806
7883.1.6. poweroff() 8073.1.6. poweroff()
808^^^^^^^^^^^^^^^^^
789 809
790The poweroff() callback is hibernation-specific. It is executed when the system 810The poweroff() callback is hibernation-specific. It is executed when the system
791is about to be powered off after saving a hibernation image to a persistent 811is about to be powered off after saving a hibernation image to a persistent
@@ -802,6 +822,7 @@ into a low-power state, respectively, but it need not save the device's standard
802configuration registers. 822configuration registers.
803 823
8043.1.7. poweroff_noirq() 8243.1.7. poweroff_noirq()
825^^^^^^^^^^^^^^^^^^^^^^^
805 826
806The poweroff_noirq() callback is hibernation-specific. It is executed after 827The poweroff_noirq() callback is hibernation-specific. It is executed after
807poweroff() callbacks have been executed for all devices in the system. 828poweroff() callbacks have been executed for all devices in the system.
@@ -814,6 +835,7 @@ The difference between poweroff_noirq() and poweroff() is analogous to the
814difference between suspend_noirq() and suspend(). 835difference between suspend_noirq() and suspend().
815 836
8163.1.8. resume_noirq() 8373.1.8. resume_noirq()
838^^^^^^^^^^^^^^^^^^^^^
817 839
818The resume_noirq() callback is only executed during system resume, after the 840The resume_noirq() callback is only executed during system resume, after the
819PM core has enabled the non-boot CPUs. The driver's interrupt handler will not 841PM core has enabled the non-boot CPUs. The driver's interrupt handler will not
@@ -827,6 +849,7 @@ it should only be used for performing operations that would lead to race
827conditions if carried out by resume(). 849conditions if carried out by resume().
828 850
8293.1.9. resume() 8513.1.9. resume()
852^^^^^^^^^^^^^^^
830 853
831The resume() callback is only executed during system resume, after 854The resume() callback is only executed during system resume, after
832resume_noirq() callbacks have been executed for all devices in the system and 855resume_noirq() callbacks have been executed for all devices in the system and
@@ -837,6 +860,7 @@ device and bringing it back to the fully functional state. The device should be
837able to process I/O in a usual way after resume() has returned. 860able to process I/O in a usual way after resume() has returned.
838 861
8393.1.10. thaw_noirq() 8623.1.10. thaw_noirq()
863^^^^^^^^^^^^^^^^^^^^
840 864
841The thaw_noirq() callback is hibernation-specific. It is executed after a 865The thaw_noirq() callback is hibernation-specific. It is executed after a
842system image has been created and the non-boot CPUs have been enabled by the PM 866system image has been created and the non-boot CPUs have been enabled by the PM
@@ -851,6 +875,7 @@ freeze() and freeze_noirq(), so in general it does not need to modify the
851contents of the device's registers. 875contents of the device's registers.
852 876
8533.1.11. thaw() 8773.1.11. thaw()
878^^^^^^^^^^^^^^
854 879
855The thaw() callback is hibernation-specific. It is executed after thaw_noirq() 880The thaw() callback is hibernation-specific. It is executed after thaw_noirq()
856callbacks have been executed for all devices in the system and after device 881callbacks have been executed for all devices in the system and after device
@@ -860,6 +885,7 @@ This callback is responsible for restoring the pre-freeze configuration of
860the device, so that it will work in a usual way after thaw() has returned. 885the device, so that it will work in a usual way after thaw() has returned.
861 886
8623.1.12. restore_noirq() 8873.1.12. restore_noirq()
888^^^^^^^^^^^^^^^^^^^^^^^
863 889
864The restore_noirq() callback is hibernation-specific. It is executed in the 890The restore_noirq() callback is hibernation-specific. It is executed in the
865restore_noirq phase of hibernation, when the boot kernel has passed control to 891restore_noirq phase of hibernation, when the boot kernel has passed control to
@@ -875,6 +901,7 @@ For the vast majority of PCI device drivers there is no difference between
875resume_noirq() and restore_noirq(). 901resume_noirq() and restore_noirq().
876 902
8773.1.13. restore() 9033.1.13. restore()
904^^^^^^^^^^^^^^^^^
878 905
879The restore() callback is hibernation-specific. It is executed after 906The restore() callback is hibernation-specific. It is executed after
880restore_noirq() callbacks have been executed for all devices in the system and 907restore_noirq() callbacks have been executed for all devices in the system and
@@ -888,14 +915,17 @@ For the vast majority of PCI device drivers there is no difference between
888resume() and restore(). 915resume() and restore().
889 916
8903.1.14. complete() 9173.1.14. complete()
918^^^^^^^^^^^^^^^^^^
891 919
892The complete() callback is executed in the following situations: 920The complete() callback is executed in the following situations:
921
893 - during system resume, after resume() callbacks have been executed for all 922 - during system resume, after resume() callbacks have been executed for all
894 devices, 923 devices,
895 - during hibernation, before saving the system image, after thaw() callbacks 924 - during hibernation, before saving the system image, after thaw() callbacks
896 have been executed for all devices, 925 have been executed for all devices,
897 - during system restore, when the system is going back to its pre-hibernation 926 - during system restore, when the system is going back to its pre-hibernation
898 state, after restore() callbacks have been executed for all devices. 927 state, after restore() callbacks have been executed for all devices.
928
899It also may be executed if the loading of a hibernation image into memory fails 929It also may be executed if the loading of a hibernation image into memory fails
900(in that case it is run after thaw() callbacks have been executed for all 930(in that case it is run after thaw() callbacks have been executed for all
901devices that have drivers in the boot kernel). 931devices that have drivers in the boot kernel).
@@ -904,6 +934,7 @@ This callback is entirely optional, although it may be necessary if the
904prepare() callback performs operations that need to be reversed. 934prepare() callback performs operations that need to be reversed.
905 935
9063.1.15. runtime_suspend() 9363.1.15. runtime_suspend()
937^^^^^^^^^^^^^^^^^^^^^^^^^
907 938
908The runtime_suspend() callback is specific to device runtime power management 939The runtime_suspend() callback is specific to device runtime power management
909(runtime PM). It is executed by the PM core's runtime PM framework when the 940(runtime PM). It is executed by the PM core's runtime PM framework when the
@@ -915,6 +946,7 @@ put into a low-power state, but it must allow the PCI subsystem to perform all
915of the PCI-specific actions necessary for suspending the device. 946of the PCI-specific actions necessary for suspending the device.
916 947
9173.1.16. runtime_resume() 9483.1.16. runtime_resume()
949^^^^^^^^^^^^^^^^^^^^^^^^
918 950
919The runtime_resume() callback is specific to device runtime PM. It is executed 951The runtime_resume() callback is specific to device runtime PM. It is executed
920by the PM core's runtime PM framework when the device is about to be resumed 952by the PM core's runtime PM framework when the device is about to be resumed
@@ -927,6 +959,7 @@ The device is expected to be able to process I/O in the usual way after
927runtime_resume() has returned. 959runtime_resume() has returned.
928 960
9293.1.17. runtime_idle() 9613.1.17. runtime_idle()
962^^^^^^^^^^^^^^^^^^^^^^
930 963
931The runtime_idle() callback is specific to device runtime PM. It is executed 964The runtime_idle() callback is specific to device runtime PM. It is executed
932by the PM core's runtime PM framework whenever it may be desirable to suspend 965by the PM core's runtime PM framework whenever it may be desirable to suspend
@@ -939,6 +972,7 @@ PCI subsystem will call pm_runtime_suspend() for the device, which in turn will
939cause the driver's runtime_suspend() callback to be executed. 972cause the driver's runtime_suspend() callback to be executed.
940 973
9413.1.18. Pointing Multiple Callback Pointers to One Routine 9743.1.18. Pointing Multiple Callback Pointers to One Routine
975^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
942 976
943Although in principle each of the callbacks described in the previous 977Although in principle each of the callbacks described in the previous
944subsections can be defined as a separate function, it often is convenient to 978subsections can be defined as a separate function, it often is convenient to
@@ -962,6 +996,7 @@ dev_pm_ops to indicate that one suspend routine is to be pointed to by the
962be pointed to by the .resume(), .thaw(), and .restore() members. 996be pointed to by the .resume(), .thaw(), and .restore() members.
963 997
9643.1.19. Driver Flags for Power Management 9983.1.19. Driver Flags for Power Management
999^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
965 1000
966The PM core allows device drivers to set flags that influence the handling of 1001The PM core allows device drivers to set flags that influence the handling of
967power management for the devices by the core itself and by middle layer code 1002power management for the devices by the core itself and by middle layer code
@@ -1007,6 +1042,7 @@ it.
1007 1042
10083.2. Device Runtime Power Management 10433.2. Device Runtime Power Management
1009------------------------------------ 1044------------------------------------
1045
1010In addition to providing device power management callbacks PCI device drivers 1046In addition to providing device power management callbacks PCI device drivers
1011are responsible for controlling the runtime power management (runtime PM) of 1047are responsible for controlling the runtime power management (runtime PM) of
1012their devices. 1048their devices.
@@ -1073,22 +1109,27 @@ device the PM core automatically queues a request to check if the device is
1073idle), device drivers are generally responsible for queuing power management 1109idle), device drivers are generally responsible for queuing power management
1074requests for their devices. For this purpose they should use the runtime PM 1110requests for their devices. For this purpose they should use the runtime PM
1075helper functions provided by the PM core, discussed in 1111helper functions provided by the PM core, discussed in
1076Documentation/power/runtime_pm.txt. 1112Documentation/power/runtime_pm.rst.
1077 1113
1078Devices can also be suspended and resumed synchronously, without placing a 1114Devices can also be suspended and resumed synchronously, without placing a
1079request into pm_wq. In the majority of cases this also is done by their 1115request into pm_wq. In the majority of cases this also is done by their
1080drivers that use helper functions provided by the PM core for this purpose. 1116drivers that use helper functions provided by the PM core for this purpose.
1081 1117
1082For more information on the runtime PM of devices refer to 1118For more information on the runtime PM of devices refer to
1083Documentation/power/runtime_pm.txt. 1119Documentation/power/runtime_pm.rst.
1084 1120
1085 1121
10864. Resources 11224. Resources
1087============ 1123============
1088 1124
1089PCI Local Bus Specification, Rev. 3.0 1125PCI Local Bus Specification, Rev. 3.0
1126
1090PCI Bus Power Management Interface Specification, Rev. 1.2 1127PCI Bus Power Management Interface Specification, Rev. 1.2
1128
1091Advanced Configuration and Power Interface (ACPI) Specification, Rev. 3.0b 1129Advanced Configuration and Power Interface (ACPI) Specification, Rev. 3.0b
1130
1092PCI Express Base Specification, Rev. 2.0 1131PCI Express Base Specification, Rev. 2.0
1132
1093Documentation/driver-api/pm/devices.rst 1133Documentation/driver-api/pm/devices.rst
1094Documentation/power/runtime_pm.txt 1134
1135Documentation/power/runtime_pm.rst
diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.rst
index 19c5f7b1a7ba..945fc6d760c9 100644
--- a/Documentation/power/pm_qos_interface.txt
+++ b/Documentation/power/pm_qos_interface.rst
@@ -1,4 +1,6 @@
1PM Quality Of Service Interface. 1===============================
2PM Quality Of Service Interface
3===============================
2 4
3This interface provides a kernel and user mode interface for registering 5This interface provides a kernel and user mode interface for registering
4performance expectations by drivers, subsystems and user space applications on 6performance expectations by drivers, subsystems and user space applications on
@@ -11,6 +13,7 @@ memory_bandwidth.
11constraints and PM QoS flags. 13constraints and PM QoS flags.
12 14
13Each parameters have defined units: 15Each parameters have defined units:
16
14 * latency: usec 17 * latency: usec
15 * timeout: usec 18 * timeout: usec
16 * throughput: kbs (kilo bit / sec) 19 * throughput: kbs (kilo bit / sec)
@@ -18,6 +21,7 @@ Each parameters have defined units:
18 21
19 22
201. PM QoS framework 231. PM QoS framework
24===================
21 25
22The infrastructure exposes multiple misc device nodes one per implemented 26The infrastructure exposes multiple misc device nodes one per implemented
23parameter. The set of parameters implement is defined by pm_qos_power_init() 27parameter. The set of parameters implement is defined by pm_qos_power_init()
@@ -37,38 +41,39 @@ reading the aggregated value does not require any locking mechanism.
37From kernel mode the use of this interface is simple: 41From kernel mode the use of this interface is simple:
38 42
39void pm_qos_add_request(handle, param_class, target_value): 43void pm_qos_add_request(handle, param_class, target_value):
40Will insert an element into the list for that identified PM QoS class with the 44 Will insert an element into the list for that identified PM QoS class with the
41target value. Upon change to this list the new target is recomputed and any 45 target value. Upon change to this list the new target is recomputed and any
42registered notifiers are called only if the target value is now different. 46 registered notifiers are called only if the target value is now different.
43Clients of pm_qos need to save the returned handle for future use in other 47 Clients of pm_qos need to save the returned handle for future use in other
44pm_qos API functions. 48 pm_qos API functions.
45 49
46void pm_qos_update_request(handle, new_target_value): 50void pm_qos_update_request(handle, new_target_value):
47Will update the list element pointed to by the handle with the new target value 51 Will update the list element pointed to by the handle with the new target value
48and recompute the new aggregated target, calling the notification tree if the 52 and recompute the new aggregated target, calling the notification tree if the
49target is changed. 53 target is changed.
50 54
51void pm_qos_remove_request(handle): 55void pm_qos_remove_request(handle):
52Will remove the element. After removal it will update the aggregate target and 56 Will remove the element. After removal it will update the aggregate target and
53call the notification tree if the target was changed as a result of removing 57 call the notification tree if the target was changed as a result of removing
54the request. 58 the request.
55 59
56int pm_qos_request(param_class): 60int pm_qos_request(param_class):
57Returns the aggregated value for a given PM QoS class. 61 Returns the aggregated value for a given PM QoS class.
58 62
59int pm_qos_request_active(handle): 63int pm_qos_request_active(handle):
60Returns if the request is still active, i.e. it has not been removed from a 64 Returns if the request is still active, i.e. it has not been removed from a
61PM QoS class constraints list. 65 PM QoS class constraints list.
62 66
63int pm_qos_add_notifier(param_class, notifier): 67int pm_qos_add_notifier(param_class, notifier):
64Adds a notification callback function to the PM QoS class. The callback is 68 Adds a notification callback function to the PM QoS class. The callback is
65called when the aggregated value for the PM QoS class is changed. 69 called when the aggregated value for the PM QoS class is changed.
66 70
67int pm_qos_remove_notifier(int param_class, notifier): 71int pm_qos_remove_notifier(int param_class, notifier):
68Removes the notification callback function for the PM QoS class. 72 Removes the notification callback function for the PM QoS class.
69 73
70 74
71From user mode: 75From user mode:
76
72Only processes can register a pm_qos request. To provide for automatic 77Only processes can register a pm_qos request. To provide for automatic
73cleanup of a process, the interface requires the process to register its 78cleanup of a process, the interface requires the process to register its
74parameter requests in the following way: 79parameter requests in the following way:
@@ -89,6 +94,7 @@ node.
89 94
90 95
912. PM QoS per-device latency and flags framework 962. PM QoS per-device latency and flags framework
97================================================
92 98
93For each device, there are three lists of PM QoS requests. Two of them are 99For each device, there are three lists of PM QoS requests. Two of them are
94maintained along with the aggregated targets of resume latency and active 100maintained along with the aggregated targets of resume latency and active
@@ -107,73 +113,80 @@ the aggregated value does not require any locking mechanism.
107From kernel mode the use of this interface is the following: 113From kernel mode the use of this interface is the following:
108 114
109int dev_pm_qos_add_request(device, handle, type, value): 115int dev_pm_qos_add_request(device, handle, type, value):
110Will insert an element into the list for that identified device with the 116 Will insert an element into the list for that identified device with the
111target value. Upon change to this list the new target is recomputed and any 117 target value. Upon change to this list the new target is recomputed and any
112registered notifiers are called only if the target value is now different. 118 registered notifiers are called only if the target value is now different.
113Clients of dev_pm_qos need to save the handle for future use in other 119 Clients of dev_pm_qos need to save the handle for future use in other
114dev_pm_qos API functions. 120 dev_pm_qos API functions.
115 121
116int dev_pm_qos_update_request(handle, new_value): 122int dev_pm_qos_update_request(handle, new_value):
117Will update the list element pointed to by the handle with the new target value 123 Will update the list element pointed to by the handle with the new target
118and recompute the new aggregated target, calling the notification trees if the 124 value and recompute the new aggregated target, calling the notification
119target is changed. 125 trees if the target is changed.
120 126
121int dev_pm_qos_remove_request(handle): 127int dev_pm_qos_remove_request(handle):
122Will remove the element. After removal it will update the aggregate target and 128 Will remove the element. After removal it will update the aggregate target
123call the notification trees if the target was changed as a result of removing 129 and call the notification trees if the target was changed as a result of
124the request. 130 removing the request.
125 131
126s32 dev_pm_qos_read_value(device): 132s32 dev_pm_qos_read_value(device):
127Returns the aggregated value for a given device's constraints list. 133 Returns the aggregated value for a given device's constraints list.
128 134
129enum pm_qos_flags_status dev_pm_qos_flags(device, mask) 135enum pm_qos_flags_status dev_pm_qos_flags(device, mask)
130Check PM QoS flags of the given device against the given mask of flags. 136 Check PM QoS flags of the given device against the given mask of flags.
131The meaning of the return values is as follows: 137 The meaning of the return values is as follows:
132 PM_QOS_FLAGS_ALL: All flags from the mask are set 138
133 PM_QOS_FLAGS_SOME: Some flags from the mask are set 139 PM_QOS_FLAGS_ALL:
134 PM_QOS_FLAGS_NONE: No flags from the mask are set 140 All flags from the mask are set
135 PM_QOS_FLAGS_UNDEFINED: The device's PM QoS structure has not been 141 PM_QOS_FLAGS_SOME:
136 initialized or the list of requests is empty. 142 Some flags from the mask are set
143 PM_QOS_FLAGS_NONE:
144 No flags from the mask are set
145 PM_QOS_FLAGS_UNDEFINED:
146 The device's PM QoS structure has not been initialized
147 or the list of requests is empty.
137 148
138int dev_pm_qos_add_ancestor_request(dev, handle, type, value) 149int dev_pm_qos_add_ancestor_request(dev, handle, type, value)
139Add a PM QoS request for the first direct ancestor of the given device whose 150 Add a PM QoS request for the first direct ancestor of the given device whose
140power.ignore_children flag is unset (for DEV_PM_QOS_RESUME_LATENCY requests) 151 power.ignore_children flag is unset (for DEV_PM_QOS_RESUME_LATENCY requests)
141or whose power.set_latency_tolerance callback pointer is not NULL (for 152 or whose power.set_latency_tolerance callback pointer is not NULL (for
142DEV_PM_QOS_LATENCY_TOLERANCE requests). 153 DEV_PM_QOS_LATENCY_TOLERANCE requests).
143 154
144int dev_pm_qos_expose_latency_limit(device, value) 155int dev_pm_qos_expose_latency_limit(device, value)
145Add a request to the device's PM QoS list of resume latency constraints and 156 Add a request to the device's PM QoS list of resume latency constraints and
146create a sysfs attribute pm_qos_resume_latency_us under the device's power 157 create a sysfs attribute pm_qos_resume_latency_us under the device's power
147directory allowing user space to manipulate that request. 158 directory allowing user space to manipulate that request.
148 159
149void dev_pm_qos_hide_latency_limit(device) 160void dev_pm_qos_hide_latency_limit(device)
150Drop the request added by dev_pm_qos_expose_latency_limit() from the device's 161 Drop the request added by dev_pm_qos_expose_latency_limit() from the device's
151PM QoS list of resume latency constraints and remove sysfs attribute 162 PM QoS list of resume latency constraints and remove sysfs attribute
152pm_qos_resume_latency_us from the device's power directory. 163 pm_qos_resume_latency_us from the device's power directory.
153 164
154int dev_pm_qos_expose_flags(device, value) 165int dev_pm_qos_expose_flags(device, value)
155Add a request to the device's PM QoS list of flags and create sysfs attribute 166 Add a request to the device's PM QoS list of flags and create sysfs attribute
156pm_qos_no_power_off under the device's power directory allowing user space to 167 pm_qos_no_power_off under the device's power directory allowing user space to
157change the value of the PM_QOS_FLAG_NO_POWER_OFF flag. 168 change the value of the PM_QOS_FLAG_NO_POWER_OFF flag.
158 169
159void dev_pm_qos_hide_flags(device) 170void dev_pm_qos_hide_flags(device)
160Drop the request added by dev_pm_qos_expose_flags() from the device's PM QoS list 171 Drop the request added by dev_pm_qos_expose_flags() from the device's PM QoS list
161of flags and remove sysfs attribute pm_qos_no_power_off from the device's power 172 of flags and remove sysfs attribute pm_qos_no_power_off from the device's power
162directory. 173 directory.
163 174
164Notification mechanisms: 175Notification mechanisms:
176
165The per-device PM QoS framework has a per-device notification tree. 177The per-device PM QoS framework has a per-device notification tree.
166 178
167int dev_pm_qos_add_notifier(device, notifier): 179int dev_pm_qos_add_notifier(device, notifier):
168Adds a notification callback function for the device. 180 Adds a notification callback function for the device.
169The callback is called when the aggregated value of the device constraints list 181 The callback is called when the aggregated value of the device constraints list
170is changed (for resume latency device PM QoS only). 182 is changed (for resume latency device PM QoS only).
171 183
172int dev_pm_qos_remove_notifier(device, notifier): 184int dev_pm_qos_remove_notifier(device, notifier):
173Removes the notification callback function for the device. 185 Removes the notification callback function for the device.
174 186
175 187
176Active state latency tolerance 188Active state latency tolerance
189^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
177 190
178This device PM QoS type is used to support systems in which hardware may switch 191This device PM QoS type is used to support systems in which hardware may switch
179to energy-saving operation modes on the fly. In those systems, if the operation 192to energy-saving operation modes on the fly. In those systems, if the operation
diff --git a/Documentation/power/power_supply_class.rst b/Documentation/power/power_supply_class.rst
new file mode 100644
index 000000000000..3f2c3fe38a61
--- /dev/null
+++ b/Documentation/power/power_supply_class.rst
@@ -0,0 +1,282 @@
1========================
2Linux power supply class
3========================
4
5Synopsis
6~~~~~~~~
7Power supply class used to represent battery, UPS, AC or DC power supply
8properties to user-space.
9
10It defines core set of attributes, which should be applicable to (almost)
11every power supply out there. Attributes are available via sysfs and uevent
12interfaces.
13
14Each attribute has well defined meaning, up to unit of measure used. While
15the attributes provided are believed to be universally applicable to any
16power supply, specific monitoring hardware may not be able to provide them
17all, so any of them may be skipped.
18
19Power supply class is extensible, and allows to define drivers own attributes.
20The core attribute set is subject to the standard Linux evolution (i.e.
21if it will be found that some attribute is applicable to many power supply
22types or their drivers, it can be added to the core set).
23
24It also integrates with LED framework, for the purpose of providing
25typically expected feedback of battery charging/fully charged status and
26AC/USB power supply online status. (Note that specific details of the
27indication (including whether to use it at all) are fully controllable by
28user and/or specific machine defaults, per design principles of LED
29framework).
30
31
32Attributes/properties
33~~~~~~~~~~~~~~~~~~~~~
34Power supply class has predefined set of attributes, this eliminates code
35duplication across drivers. Power supply class insist on reusing its
36predefined attributes *and* their units.
37
38So, userspace gets predictable set of attributes and their units for any
39kind of power supply, and can process/present them to a user in consistent
40manner. Results for different power supplies and machines are also directly
41comparable.
42
43See drivers/power/supply/ds2760_battery.c and drivers/power/supply/pda_power.c
44for the example how to declare and handle attributes.
45
46
47Units
48~~~~~
49Quoting include/linux/power_supply.h:
50
51 All voltages, currents, charges, energies, time and temperatures in µV,
52 µA, µAh, µWh, seconds and tenths of degree Celsius unless otherwise
53 stated. It's driver's job to convert its raw values to units in which
54 this class operates.
55
56
57Attributes/properties detailed
58~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
59
60+--------------------------------------------------------------------------+
61| **Charge/Energy/Capacity - how to not confuse** |
62+--------------------------------------------------------------------------+
63| **Because both "charge" (µAh) and "energy" (µWh) represents "capacity" |
64| of battery, this class distinguish these terms. Don't mix them!** |
65| |
66| - `CHARGE_*` |
67| attributes represents capacity in µAh only. |
68| - `ENERGY_*` |
69| attributes represents capacity in µWh only. |
70| - `CAPACITY` |
71| attribute represents capacity in *percents*, from 0 to 100. |
72+--------------------------------------------------------------------------+
73
74Postfixes:
75
76_AVG
77 *hardware* averaged value, use it if your hardware is really able to
78 report averaged values.
79_NOW
80 momentary/instantaneous values.
81
82STATUS
83 this attribute represents operating status (charging, full,
84 discharging (i.e. powering a load), etc.). This corresponds to
85 `BATTERY_STATUS_*` values, as defined in battery.h.
86
87CHARGE_TYPE
88 batteries can typically charge at different rates.
89 This defines trickle and fast charges. For batteries that
90 are already charged or discharging, 'n/a' can be displayed (or
91 'unknown', if the status is not known).
92
93AUTHENTIC
94 indicates the power supply (battery or charger) connected
95 to the platform is authentic(1) or non authentic(0).
96
97HEALTH
98 represents health of the battery, values corresponds to
99 POWER_SUPPLY_HEALTH_*, defined in battery.h.
100
101VOLTAGE_OCV
102 open circuit voltage of the battery.
103
104VOLTAGE_MAX_DESIGN, VOLTAGE_MIN_DESIGN
105 design values for maximal and minimal power supply voltages.
106 Maximal/minimal means values of voltages when battery considered
107 "full"/"empty" at normal conditions. Yes, there is no direct relation
108 between voltage and battery capacity, but some dumb
109 batteries use voltage for very approximated calculation of capacity.
110 Battery driver also can use this attribute just to inform userspace
111 about maximal and minimal voltage thresholds of a given battery.
112
113VOLTAGE_MAX, VOLTAGE_MIN
114 same as _DESIGN voltage values except that these ones should be used
115 if hardware could only guess (measure and retain) the thresholds of a
116 given power supply.
117
118VOLTAGE_BOOT
119 Reports the voltage measured during boot
120
121CURRENT_BOOT
122 Reports the current measured during boot
123
124CHARGE_FULL_DESIGN, CHARGE_EMPTY_DESIGN
125 design charge values, when battery considered full/empty.
126
127ENERGY_FULL_DESIGN, ENERGY_EMPTY_DESIGN
128 same as above but for energy.
129
130CHARGE_FULL, CHARGE_EMPTY
131 These attributes means "last remembered value of charge when battery
132 became full/empty". It also could mean "value of charge when battery
133 considered full/empty at given conditions (temperature, age)".
134 I.e. these attributes represents real thresholds, not design values.
135
136ENERGY_FULL, ENERGY_EMPTY
137 same as above but for energy.
138
139CHARGE_COUNTER
140 the current charge counter (in µAh). This could easily
141 be negative; there is no empty or full value. It is only useful for
142 relative, time-based measurements.
143
144PRECHARGE_CURRENT
145 the maximum charge current during precharge phase of charge cycle
146 (typically 20% of battery capacity).
147
148CHARGE_TERM_CURRENT
149 Charge termination current. The charge cycle terminates when battery
150 voltage is above recharge threshold, and charge current is below
151 this setting (typically 10% of battery capacity).
152
153CONSTANT_CHARGE_CURRENT
154 constant charge current programmed by charger.
155
156
157CONSTANT_CHARGE_CURRENT_MAX
158 maximum charge current supported by the power supply object.
159
160CONSTANT_CHARGE_VOLTAGE
161 constant charge voltage programmed by charger.
162CONSTANT_CHARGE_VOLTAGE_MAX
163 maximum charge voltage supported by the power supply object.
164
165INPUT_CURRENT_LIMIT
166 input current limit programmed by charger. Indicates
167 the current drawn from a charging source.
168
169CHARGE_CONTROL_LIMIT
170 current charge control limit setting
171CHARGE_CONTROL_LIMIT_MAX
172 maximum charge control limit setting
173
174CALIBRATE
175 battery or coulomb counter calibration status
176
177CAPACITY
178 capacity in percents.
179CAPACITY_ALERT_MIN
180 minimum capacity alert value in percents.
181CAPACITY_ALERT_MAX
182 maximum capacity alert value in percents.
183CAPACITY_LEVEL
184 capacity level. This corresponds to POWER_SUPPLY_CAPACITY_LEVEL_*.
185
186TEMP
187 temperature of the power supply.
188TEMP_ALERT_MIN
189 minimum battery temperature alert.
190TEMP_ALERT_MAX
191 maximum battery temperature alert.
192TEMP_AMBIENT
193 ambient temperature.
194TEMP_AMBIENT_ALERT_MIN
195 minimum ambient temperature alert.
196TEMP_AMBIENT_ALERT_MAX
197 maximum ambient temperature alert.
198TEMP_MIN
199 minimum operatable temperature
200TEMP_MAX
201 maximum operatable temperature
202
203TIME_TO_EMPTY
204 seconds left for battery to be considered empty
205 (i.e. while battery powers a load)
206TIME_TO_FULL
207 seconds left for battery to be considered full
208 (i.e. while battery is charging)
209
210
211Battery <-> external power supply interaction
212~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
213Often power supplies are acting as supplies and supplicants at the same
214time. Batteries are good example. So, batteries usually care if they're
215externally powered or not.
216
217For that case, power supply class implements notification mechanism for
218batteries.
219
220External power supply (AC) lists supplicants (batteries) names in
221"supplied_to" struct member, and each power_supply_changed() call
222issued by external power supply will notify supplicants via
223external_power_changed callback.
224
225
226Devicetree battery characteristics
227~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
228Drivers should call power_supply_get_battery_info() to obtain battery
229characteristics from a devicetree battery node, defined in
230Documentation/devicetree/bindings/power/supply/battery.txt. This is
231implemented in drivers/power/supply/bq27xxx_battery.c.
232
233Properties in struct power_supply_battery_info and their counterparts in the
234battery node have names corresponding to elements in enum power_supply_property,
235for naming consistency between sysfs attributes and battery node properties.
236
237
238QA
239~~
240
241Q:
242 Where is POWER_SUPPLY_PROP_XYZ attribute?
243A:
244 If you cannot find attribute suitable for your driver needs, feel free
245 to add it and send patch along with your driver.
246
247 The attributes available currently are the ones currently provided by the
248 drivers written.
249
250 Good candidates to add in future: model/part#, cycle_time, manufacturer,
251 etc.
252
253
254Q:
255 I have some very specific attribute (e.g. battery color), should I add
256 this attribute to standard ones?
257A:
258 Most likely, no. Such attribute can be placed in the driver itself, if
259 it is useful. Of course, if the attribute in question applicable to
260 large set of batteries, provided by many drivers, and/or comes from
261 some general battery specification/standard, it may be a candidate to
262 be added to the core attribute set.
263
264
265Q:
266 Suppose, my battery monitoring chip/firmware does not provides capacity
267 in percents, but provides charge_{now,full,empty}. Should I calculate
268 percentage capacity manually, inside the driver, and register CAPACITY
269 attribute? The same question about time_to_empty/time_to_full.
270A:
271 Most likely, no. This class is designed to export properties which are
272 directly measurable by the specific hardware available.
273
274 Inferring not available properties using some heuristics or mathematical
275 model is not subject of work for a battery driver. Such functionality
276 should be factored out, and in fact, apm_power, the driver to serve
277 legacy APM API on top of power supply class, uses a simple heuristic of
278 approximating remaining battery capacity based on its charge, current,
279 voltage and so on. But full-fledged battery model is likely not subject
280 for kernel at all, as it would require floating point calculation to deal
281 with things like differential equations and Kalman filters. This is
282 better be handled by batteryd/libbattery, yet to be written.
diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
deleted file mode 100644
index 300d37896e51..000000000000
--- a/Documentation/power/power_supply_class.txt
+++ /dev/null
@@ -1,231 +0,0 @@
1Linux power supply class
2========================
3
4Synopsis
5~~~~~~~~
6Power supply class used to represent battery, UPS, AC or DC power supply
7properties to user-space.
8
9It defines core set of attributes, which should be applicable to (almost)
10every power supply out there. Attributes are available via sysfs and uevent
11interfaces.
12
13Each attribute has well defined meaning, up to unit of measure used. While
14the attributes provided are believed to be universally applicable to any
15power supply, specific monitoring hardware may not be able to provide them
16all, so any of them may be skipped.
17
18Power supply class is extensible, and allows to define drivers own attributes.
19The core attribute set is subject to the standard Linux evolution (i.e.
20if it will be found that some attribute is applicable to many power supply
21types or their drivers, it can be added to the core set).
22
23It also integrates with LED framework, for the purpose of providing
24typically expected feedback of battery charging/fully charged status and
25AC/USB power supply online status. (Note that specific details of the
26indication (including whether to use it at all) are fully controllable by
27user and/or specific machine defaults, per design principles of LED
28framework).
29
30
31Attributes/properties
32~~~~~~~~~~~~~~~~~~~~~
33Power supply class has predefined set of attributes, this eliminates code
34duplication across drivers. Power supply class insist on reusing its
35predefined attributes *and* their units.
36
37So, userspace gets predictable set of attributes and their units for any
38kind of power supply, and can process/present them to a user in consistent
39manner. Results for different power supplies and machines are also directly
40comparable.
41
42See drivers/power/supply/ds2760_battery.c and drivers/power/supply/pda_power.c
43for the example how to declare and handle attributes.
44
45
46Units
47~~~~~
48Quoting include/linux/power_supply.h:
49
50 All voltages, currents, charges, energies, time and temperatures in µV,
51 µA, µAh, µWh, seconds and tenths of degree Celsius unless otherwise
52 stated. It's driver's job to convert its raw values to units in which
53 this class operates.
54
55
56Attributes/properties detailed
57~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
58
59~ ~ ~ ~ ~ ~ ~ Charge/Energy/Capacity - how to not confuse ~ ~ ~ ~ ~ ~ ~
60~ ~
61~ Because both "charge" (µAh) and "energy" (µWh) represents "capacity" ~
62~ of battery, this class distinguish these terms. Don't mix them! ~
63~ ~
64~ CHARGE_* attributes represents capacity in µAh only. ~
65~ ENERGY_* attributes represents capacity in µWh only. ~
66~ CAPACITY attribute represents capacity in *percents*, from 0 to 100. ~
67~ ~
68~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
69
70Postfixes:
71_AVG - *hardware* averaged value, use it if your hardware is really able to
72report averaged values.
73_NOW - momentary/instantaneous values.
74
75STATUS - this attribute represents operating status (charging, full,
76discharging (i.e. powering a load), etc.). This corresponds to
77BATTERY_STATUS_* values, as defined in battery.h.
78
79CHARGE_TYPE - batteries can typically charge at different rates.
80This defines trickle and fast charges. For batteries that
81are already charged or discharging, 'n/a' can be displayed (or
82'unknown', if the status is not known).
83
84AUTHENTIC - indicates the power supply (battery or charger) connected
85to the platform is authentic(1) or non authentic(0).
86
87HEALTH - represents health of the battery, values corresponds to
88POWER_SUPPLY_HEALTH_*, defined in battery.h.
89
90VOLTAGE_OCV - open circuit voltage of the battery.
91
92VOLTAGE_MAX_DESIGN, VOLTAGE_MIN_DESIGN - design values for maximal and
93minimal power supply voltages. Maximal/minimal means values of voltages
94when battery considered "full"/"empty" at normal conditions. Yes, there is
95no direct relation between voltage and battery capacity, but some dumb
96batteries use voltage for very approximated calculation of capacity.
97Battery driver also can use this attribute just to inform userspace
98about maximal and minimal voltage thresholds of a given battery.
99
100VOLTAGE_MAX, VOLTAGE_MIN - same as _DESIGN voltage values except that
101these ones should be used if hardware could only guess (measure and
102retain) the thresholds of a given power supply.
103
104VOLTAGE_BOOT - Reports the voltage measured during boot
105
106CURRENT_BOOT - Reports the current measured during boot
107
108CHARGE_FULL_DESIGN, CHARGE_EMPTY_DESIGN - design charge values, when
109battery considered full/empty.
110
111ENERGY_FULL_DESIGN, ENERGY_EMPTY_DESIGN - same as above but for energy.
112
113CHARGE_FULL, CHARGE_EMPTY - These attributes means "last remembered value
114of charge when battery became full/empty". It also could mean "value of
115charge when battery considered full/empty at given conditions (temperature,
116age)". I.e. these attributes represents real thresholds, not design values.
117
118ENERGY_FULL, ENERGY_EMPTY - same as above but for energy.
119
120CHARGE_COUNTER - the current charge counter (in µAh). This could easily
121be negative; there is no empty or full value. It is only useful for
122relative, time-based measurements.
123
124PRECHARGE_CURRENT - the maximum charge current during precharge phase
125of charge cycle (typically 20% of battery capacity).
126CHARGE_TERM_CURRENT - Charge termination current. The charge cycle
127terminates when battery voltage is above recharge threshold, and charge
128current is below this setting (typically 10% of battery capacity).
129
130CONSTANT_CHARGE_CURRENT - constant charge current programmed by charger.
131CONSTANT_CHARGE_CURRENT_MAX - maximum charge current supported by the
132power supply object.
133
134CONSTANT_CHARGE_VOLTAGE - constant charge voltage programmed by charger.
135CONSTANT_CHARGE_VOLTAGE_MAX - maximum charge voltage supported by the
136power supply object.
137
138INPUT_CURRENT_LIMIT - input current limit programmed by charger. Indicates
139the current drawn from a charging source.
140
141CHARGE_CONTROL_LIMIT - current charge control limit setting
142CHARGE_CONTROL_LIMIT_MAX - maximum charge control limit setting
143
144CALIBRATE - battery or coulomb counter calibration status
145
146CAPACITY - capacity in percents.
147CAPACITY_ALERT_MIN - minimum capacity alert value in percents.
148CAPACITY_ALERT_MAX - maximum capacity alert value in percents.
149CAPACITY_LEVEL - capacity level. This corresponds to
150POWER_SUPPLY_CAPACITY_LEVEL_*.
151
152TEMP - temperature of the power supply.
153TEMP_ALERT_MIN - minimum battery temperature alert.
154TEMP_ALERT_MAX - maximum battery temperature alert.
155TEMP_AMBIENT - ambient temperature.
156TEMP_AMBIENT_ALERT_MIN - minimum ambient temperature alert.
157TEMP_AMBIENT_ALERT_MAX - maximum ambient temperature alert.
158TEMP_MIN - minimum operatable temperature
159TEMP_MAX - maximum operatable temperature
160
161TIME_TO_EMPTY - seconds left for battery to be considered empty (i.e.
162while battery powers a load)
163TIME_TO_FULL - seconds left for battery to be considered full (i.e.
164while battery is charging)
165
166
167Battery <-> external power supply interaction
168~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
169Often power supplies are acting as supplies and supplicants at the same
170time. Batteries are good example. So, batteries usually care if they're
171externally powered or not.
172
173For that case, power supply class implements notification mechanism for
174batteries.
175
176External power supply (AC) lists supplicants (batteries) names in
177"supplied_to" struct member, and each power_supply_changed() call
178issued by external power supply will notify supplicants via
179external_power_changed callback.
180
181
182Devicetree battery characteristics
183~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
184Drivers should call power_supply_get_battery_info() to obtain battery
185characteristics from a devicetree battery node, defined in
186Documentation/devicetree/bindings/power/supply/battery.txt. This is
187implemented in drivers/power/supply/bq27xxx_battery.c.
188
189Properties in struct power_supply_battery_info and their counterparts in the
190battery node have names corresponding to elements in enum power_supply_property,
191for naming consistency between sysfs attributes and battery node properties.
192
193
194QA
195~~
196Q: Where is POWER_SUPPLY_PROP_XYZ attribute?
197A: If you cannot find attribute suitable for your driver needs, feel free
198 to add it and send patch along with your driver.
199
200 The attributes available currently are the ones currently provided by the
201 drivers written.
202
203 Good candidates to add in future: model/part#, cycle_time, manufacturer,
204 etc.
205
206
207Q: I have some very specific attribute (e.g. battery color), should I add
208 this attribute to standard ones?
209A: Most likely, no. Such attribute can be placed in the driver itself, if
210 it is useful. Of course, if the attribute in question applicable to
211 large set of batteries, provided by many drivers, and/or comes from
212 some general battery specification/standard, it may be a candidate to
213 be added to the core attribute set.
214
215
216Q: Suppose, my battery monitoring chip/firmware does not provides capacity
217 in percents, but provides charge_{now,full,empty}. Should I calculate
218 percentage capacity manually, inside the driver, and register CAPACITY
219 attribute? The same question about time_to_empty/time_to_full.
220A: Most likely, no. This class is designed to export properties which are
221 directly measurable by the specific hardware available.
222
223 Inferring not available properties using some heuristics or mathematical
224 model is not subject of work for a battery driver. Such functionality
225 should be factored out, and in fact, apm_power, the driver to serve
226 legacy APM API on top of power supply class, uses a simple heuristic of
227 approximating remaining battery capacity based on its charge, current,
228 voltage and so on. But full-fledged battery model is likely not subject
229 for kernel at all, as it would require floating point calculation to deal
230 with things like differential equations and Kalman filters. This is
231 better be handled by batteryd/libbattery, yet to be written.
diff --git a/Documentation/power/powercap/powercap.rst b/Documentation/power/powercap/powercap.rst
new file mode 100644
index 000000000000..7ae3b44c7624
--- /dev/null
+++ b/Documentation/power/powercap/powercap.rst
@@ -0,0 +1,257 @@
1=======================
2Power Capping Framework
3=======================
4
5The power capping framework provides a consistent interface between the kernel
6and the user space that allows power capping drivers to expose the settings to
7user space in a uniform way.
8
9Terminology
10===========
11
12The framework exposes power capping devices to user space via sysfs in the
13form of a tree of objects. The objects at the root level of the tree represent
14'control types', which correspond to different methods of power capping. For
15example, the intel-rapl control type represents the Intel "Running Average
16Power Limit" (RAPL) technology, whereas the 'idle-injection' control type
17corresponds to the use of idle injection for controlling power.
18
19Power zones represent different parts of the system, which can be controlled and
20monitored using the power capping method determined by the control type the
21given zone belongs to. They each contain attributes for monitoring power, as
22well as controls represented in the form of power constraints. If the parts of
23the system represented by different power zones are hierarchical (that is, one
24bigger part consists of multiple smaller parts that each have their own power
25controls), those power zones may also be organized in a hierarchy with one
26parent power zone containing multiple subzones and so on to reflect the power
27control topology of the system. In that case, it is possible to apply power
28capping to a set of devices together using the parent power zone and if more
29fine grained control is required, it can be applied through the subzones.
30
31
32Example sysfs interface tree::
33
34 /sys/devices/virtual/powercap
35 └──intel-rapl
36 ├──intel-rapl:0
37 │   ├──constraint_0_name
38 │   ├──constraint_0_power_limit_uw
39 │   ├──constraint_0_time_window_us
40 │   ├──constraint_1_name
41 │   ├──constraint_1_power_limit_uw
42 │   ├──constraint_1_time_window_us
43 │   ├──device -> ../../intel-rapl
44 │   ├──energy_uj
45 │   ├──intel-rapl:0:0
46 │   │   ├──constraint_0_name
47 │   │   ├──constraint_0_power_limit_uw
48 │   │   ├──constraint_0_time_window_us
49 │   │   ├──constraint_1_name
50 │   │   ├──constraint_1_power_limit_uw
51 │   │   ├──constraint_1_time_window_us
52 │   │   ├──device -> ../../intel-rapl:0
53 │   │   ├──energy_uj
54 │   │   ├──max_energy_range_uj
55 │   │   ├──name
56 │   │   ├──enabled
57 │   │   ├──power
58 │   │   │   ├──async
59 │   │   │   []
60 │   │   ├──subsystem -> ../../../../../../class/power_cap
61 │   │   └──uevent
62 │   ├──intel-rapl:0:1
63 │   │   ├──constraint_0_name
64 │   │   ├──constraint_0_power_limit_uw
65 │   │   ├──constraint_0_time_window_us
66 │   │   ├──constraint_1_name
67 │   │   ├──constraint_1_power_limit_uw
68 │   │   ├──constraint_1_time_window_us
69 │   │   ├──device -> ../../intel-rapl:0
70 │   │   ├──energy_uj
71 │   │   ├──max_energy_range_uj
72 │   │   ├──name
73 │   │   ├──enabled
74 │   │   ├──power
75 │   │   │   ├──async
76 │   │   │   []
77 │   │   ├──subsystem -> ../../../../../../class/power_cap
78 │   │   └──uevent
79 │   ├──max_energy_range_uj
80 │   ├──max_power_range_uw
81 │   ├──name
82 │   ├──enabled
83 │   ├──power
84 │   │   ├──async
85 │   │   []
86 │   ├──subsystem -> ../../../../../class/power_cap
87 │   ├──enabled
88 │   ├──uevent
89 ├──intel-rapl:1
90 │   ├──constraint_0_name
91 │   ├──constraint_0_power_limit_uw
92 │   ├──constraint_0_time_window_us
93 │   ├──constraint_1_name
94 │   ├──constraint_1_power_limit_uw
95 │   ├──constraint_1_time_window_us
96 │   ├──device -> ../../intel-rapl
97 │   ├──energy_uj
98 │   ├──intel-rapl:1:0
99 │   │   ├──constraint_0_name
100 │   │   ├──constraint_0_power_limit_uw
101 │   │   ├──constraint_0_time_window_us
102 │   │   ├──constraint_1_name
103 │   │   ├──constraint_1_power_limit_uw
104 │   │   ├──constraint_1_time_window_us
105 │   │   ├──device -> ../../intel-rapl:1
106 │   │   ├──energy_uj
107 │   │   ├──max_energy_range_uj
108 │   │   ├──name
109 │   │   ├──enabled
110 │   │   ├──power
111 │   │   │   ├──async
112 │   │   │   []
113 │   │   ├──subsystem -> ../../../../../../class/power_cap
114 │   │   └──uevent
115 │   ├──intel-rapl:1:1
116 │   │   ├──constraint_0_name
117 │   │   ├──constraint_0_power_limit_uw
118 │   │   ├──constraint_0_time_window_us
119 │   │   ├──constraint_1_name
120 │   │   ├──constraint_1_power_limit_uw
121 │   │   ├──constraint_1_time_window_us
122 │   │   ├──device -> ../../intel-rapl:1
123 │   │   ├──energy_uj
124 │   │   ├──max_energy_range_uj
125 │   │   ├──name
126 │   │   ├──enabled
127 │   │   ├──power
128 │   │   │   ├──async
129 │   │   │   []
130 │   │   ├──subsystem -> ../../../../../../class/power_cap
131 │   │   └──uevent
132 │   ├──max_energy_range_uj
133 │   ├──max_power_range_uw
134 │   ├──name
135 │   ├──enabled
136 │   ├──power
137 │   │   ├──async
138 │   │   []
139 │   ├──subsystem -> ../../../../../class/power_cap
140 │   ├──uevent
141 ├──power
142 │   ├──async
143 │   []
144 ├──subsystem -> ../../../../class/power_cap
145 ├──enabled
146 └──uevent
147
148The above example illustrates a case in which the Intel RAPL technology,
149available in Intel® IA-64 and IA-32 Processor Architectures, is used. There is one
150control type called intel-rapl which contains two power zones, intel-rapl:0 and
151intel-rapl:1, representing CPU packages. Each of these power zones contains
152two subzones, intel-rapl:j:0 and intel-rapl:j:1 (j = 0, 1), representing the
153"core" and the "uncore" parts of the given CPU package, respectively. All of
154the zones and subzones contain energy monitoring attributes (energy_uj,
155max_energy_range_uj) and constraint attributes (constraint_*) allowing controls
156to be applied (the constraints in the 'package' power zones apply to the whole
157CPU packages and the subzone constraints only apply to the respective parts of
158the given package individually). Since Intel RAPL doesn't provide instantaneous
159power value, there is no power_uw attribute.
160
161In addition to that, each power zone contains a name attribute, allowing the
162part of the system represented by that zone to be identified.
163For example::
164
165 cat /sys/class/power_cap/intel-rapl/intel-rapl:0/name
166
167package-0
168---------
169
170The Intel RAPL technology allows two constraints, short term and long term,
171with two different time windows to be applied to each power zone. Thus for
172each zone there are 2 attributes representing the constraint names, 2 power
173limits and 2 attributes representing the sizes of the time windows. Such that,
174constraint_j_* attributes correspond to the jth constraint (j = 0,1).
175
176For example::
177
178 constraint_0_name
179 constraint_0_power_limit_uw
180 constraint_0_time_window_us
181 constraint_1_name
182 constraint_1_power_limit_uw
183 constraint_1_time_window_us
184
185Power Zone Attributes
186=====================
187
188Monitoring attributes
189---------------------
190
191energy_uj (rw)
192 Current energy counter in micro joules. Write "0" to reset.
193 If the counter can not be reset, then this attribute is read only.
194
195max_energy_range_uj (ro)
196 Range of the above energy counter in micro-joules.
197
198power_uw (ro)
199 Current power in micro watts.
200
201max_power_range_uw (ro)
202 Range of the above power value in micro-watts.
203
204name (ro)
205 Name of this power zone.
206
207It is possible that some domains have both power ranges and energy counter ranges;
208however, only one is mandatory.
209
210Constraints
211-----------
212
213constraint_X_power_limit_uw (rw)
214 Power limit in micro watts, which should be applicable for the
215 time window specified by "constraint_X_time_window_us".
216
217constraint_X_time_window_us (rw)
218 Time window in micro seconds.
219
220constraint_X_name (ro)
221 An optional name of the constraint
222
223constraint_X_max_power_uw(ro)
224 Maximum allowed power in micro watts.
225
226constraint_X_min_power_uw(ro)
227 Minimum allowed power in micro watts.
228
229constraint_X_max_time_window_us(ro)
230 Maximum allowed time window in micro seconds.
231
232constraint_X_min_time_window_us(ro)
233 Minimum allowed time window in micro seconds.
234
235Except power_limit_uw and time_window_us other fields are optional.
236
237Common zone and control type attributes
238---------------------------------------
239
240enabled (rw): Enable/Disable controls at zone level or for all zones using
241a control type.
242
243Power Cap Client Driver Interface
244=================================
245
246The API summary:
247
248Call powercap_register_control_type() to register control type object.
249Call powercap_register_zone() to register a power zone (under a given
250control type), either as a top-level power zone or as a subzone of another
251power zone registered earlier.
252The number of constraints in a power zone and the corresponding callbacks have
253to be defined prior to calling powercap_register_zone() to register that zone.
254
255To Free a power zone call powercap_unregister_zone().
256To free a control type object call powercap_unregister_control_type().
257Detailed API can be generated using kernel-doc on include/linux/powercap.h.
diff --git a/Documentation/power/powercap/powercap.txt b/Documentation/power/powercap/powercap.txt
deleted file mode 100644
index 1e6ef164e07a..000000000000
--- a/Documentation/power/powercap/powercap.txt
+++ /dev/null
@@ -1,236 +0,0 @@
1Power Capping Framework
2==================================
3
4The power capping framework provides a consistent interface between the kernel
5and the user space that allows power capping drivers to expose the settings to
6user space in a uniform way.
7
8Terminology
9=========================
10The framework exposes power capping devices to user space via sysfs in the
11form of a tree of objects. The objects at the root level of the tree represent
12'control types', which correspond to different methods of power capping. For
13example, the intel-rapl control type represents the Intel "Running Average
14Power Limit" (RAPL) technology, whereas the 'idle-injection' control type
15corresponds to the use of idle injection for controlling power.
16
17Power zones represent different parts of the system, which can be controlled and
18monitored using the power capping method determined by the control type the
19given zone belongs to. They each contain attributes for monitoring power, as
20well as controls represented in the form of power constraints. If the parts of
21the system represented by different power zones are hierarchical (that is, one
22bigger part consists of multiple smaller parts that each have their own power
23controls), those power zones may also be organized in a hierarchy with one
24parent power zone containing multiple subzones and so on to reflect the power
25control topology of the system. In that case, it is possible to apply power
26capping to a set of devices together using the parent power zone and if more
27fine grained control is required, it can be applied through the subzones.
28
29
30Example sysfs interface tree:
31
32/sys/devices/virtual/powercap
33??? intel-rapl
34 ??? intel-rapl:0
35 ?   ??? constraint_0_name
36 ?   ??? constraint_0_power_limit_uw
37 ?   ??? constraint_0_time_window_us
38 ?   ??? constraint_1_name
39 ?   ??? constraint_1_power_limit_uw
40 ?   ??? constraint_1_time_window_us
41 ?   ??? device -> ../../intel-rapl
42 ?   ??? energy_uj
43 ?   ??? intel-rapl:0:0
44 ?   ?   ??? constraint_0_name
45 ?   ?   ??? constraint_0_power_limit_uw
46 ?   ?   ??? constraint_0_time_window_us
47 ?   ?   ??? constraint_1_name
48 ?   ?   ??? constraint_1_power_limit_uw
49 ?   ?   ??? constraint_1_time_window_us
50 ?   ?   ??? device -> ../../intel-rapl:0
51 ?   ?   ??? energy_uj
52 ?   ?   ??? max_energy_range_uj
53 ?   ?   ??? name
54 ?   ?   ??? enabled
55 ?   ?   ??? power
56 ?   ?   ?   ??? async
57 ?   ?   ?   []
58 ?   ?   ??? subsystem -> ../../../../../../class/power_cap
59 ?   ?   ??? uevent
60 ?   ??? intel-rapl:0:1
61 ?   ?   ??? constraint_0_name
62 ?   ?   ??? constraint_0_power_limit_uw
63 ?   ?   ??? constraint_0_time_window_us
64 ?   ?   ??? constraint_1_name
65 ?   ?   ??? constraint_1_power_limit_uw
66 ?   ?   ??? constraint_1_time_window_us
67 ?   ?   ??? device -> ../../intel-rapl:0
68 ?   ?   ??? energy_uj
69 ?   ?   ??? max_energy_range_uj
70 ?   ?   ??? name
71 ?   ?   ??? enabled
72 ?   ?   ??? power
73 ?   ?   ?   ??? async
74 ?   ?   ?   []
75 ?   ?   ??? subsystem -> ../../../../../../class/power_cap
76 ?   ?   ??? uevent
77 ?   ??? max_energy_range_uj
78 ?   ??? max_power_range_uw
79 ?   ??? name
80 ?   ??? enabled
81 ?   ??? power
82 ?   ?   ??? async
83 ?   ?   []
84 ?   ??? subsystem -> ../../../../../class/power_cap
85 ?   ??? enabled
86 ?   ??? uevent
87 ??? intel-rapl:1
88 ?   ??? constraint_0_name
89 ?   ??? constraint_0_power_limit_uw
90 ?   ??? constraint_0_time_window_us
91 ?   ??? constraint_1_name
92 ?   ??? constraint_1_power_limit_uw
93 ?   ??? constraint_1_time_window_us
94 ?   ??? device -> ../../intel-rapl
95 ?   ??? energy_uj
96 ?   ??? intel-rapl:1:0
97 ?   ?   ??? constraint_0_name
98 ?   ?   ??? constraint_0_power_limit_uw
99 ?   ?   ??? constraint_0_time_window_us
100 ?   ?   ??? constraint_1_name
101 ?   ?   ??? constraint_1_power_limit_uw
102 ?   ?   ??? constraint_1_time_window_us
103 ?   ?   ??? device -> ../../intel-rapl:1
104 ?   ?   ??? energy_uj
105 ?   ?   ??? max_energy_range_uj
106 ?   ?   ??? name
107 ?   ?   ??? enabled
108 ?   ?   ??? power
109 ?   ?   ?   ??? async
110 ?   ?   ?   []
111 ?   ?   ??? subsystem -> ../../../../../../class/power_cap
112 ?   ?   ??? uevent
113 ?   ??? intel-rapl:1:1
114 ?   ?   ??? constraint_0_name
115 ?   ?   ??? constraint_0_power_limit_uw
116 ?   ?   ??? constraint_0_time_window_us
117 ?   ?   ??? constraint_1_name
118 ?   ?   ??? constraint_1_power_limit_uw
119 ?   ?   ??? constraint_1_time_window_us
120 ?   ?   ??? device -> ../../intel-rapl:1
121 ?   ?   ??? energy_uj
122 ?   ?   ??? max_energy_range_uj
123 ?   ?   ??? name
124 ?   ?   ??? enabled
125 ?   ?   ??? power
126 ?   ?   ?   ??? async
127 ?   ?   ?   []
128 ?   ?   ??? subsystem -> ../../../../../../class/power_cap
129 ?   ?   ??? uevent
130 ?   ??? max_energy_range_uj
131 ?   ??? max_power_range_uw
132 ?   ??? name
133 ?   ??? enabled
134 ?   ??? power
135 ?   ?   ??? async
136 ?   ?   []
137 ?   ??? subsystem -> ../../../../../class/power_cap
138 ?   ??? uevent
139 ??? power
140 ?   ??? async
141 ?   []
142 ??? subsystem -> ../../../../class/power_cap
143 ??? enabled
144 ??? uevent
145
146The above example illustrates a case in which the Intel RAPL technology,
147available in Intel® IA-64 and IA-32 Processor Architectures, is used. There is one
148control type called intel-rapl which contains two power zones, intel-rapl:0 and
149intel-rapl:1, representing CPU packages. Each of these power zones contains
150two subzones, intel-rapl:j:0 and intel-rapl:j:1 (j = 0, 1), representing the
151"core" and the "uncore" parts of the given CPU package, respectively. All of
152the zones and subzones contain energy monitoring attributes (energy_uj,
153max_energy_range_uj) and constraint attributes (constraint_*) allowing controls
154to be applied (the constraints in the 'package' power zones apply to the whole
155CPU packages and the subzone constraints only apply to the respective parts of
156the given package individually). Since Intel RAPL doesn't provide instantaneous
157power value, there is no power_uw attribute.
158
159In addition to that, each power zone contains a name attribute, allowing the
160part of the system represented by that zone to be identified.
161For example:
162
163cat /sys/class/power_cap/intel-rapl/intel-rapl:0/name
164package-0
165
166The Intel RAPL technology allows two constraints, short term and long term,
167with two different time windows to be applied to each power zone. Thus for
168each zone there are 2 attributes representing the constraint names, 2 power
169limits and 2 attributes representing the sizes of the time windows. Such that,
170constraint_j_* attributes correspond to the jth constraint (j = 0,1).
171
172For example:
173 constraint_0_name
174 constraint_0_power_limit_uw
175 constraint_0_time_window_us
176 constraint_1_name
177 constraint_1_power_limit_uw
178 constraint_1_time_window_us
179
180Power Zone Attributes
181=================================
182Monitoring attributes
183----------------------
184
185energy_uj (rw): Current energy counter in micro joules. Write "0" to reset.
186If the counter can not be reset, then this attribute is read only.
187
188max_energy_range_uj (ro): Range of the above energy counter in micro-joules.
189
190power_uw (ro): Current power in micro watts.
191
192max_power_range_uw (ro): Range of the above power value in micro-watts.
193
194name (ro): Name of this power zone.
195
196It is possible that some domains have both power ranges and energy counter ranges;
197however, only one is mandatory.
198
199Constraints
200----------------
201constraint_X_power_limit_uw (rw): Power limit in micro watts, which should be
202applicable for the time window specified by "constraint_X_time_window_us".
203
204constraint_X_time_window_us (rw): Time window in micro seconds.
205
206constraint_X_name (ro): An optional name of the constraint
207
208constraint_X_max_power_uw(ro): Maximum allowed power in micro watts.
209
210constraint_X_min_power_uw(ro): Minimum allowed power in micro watts.
211
212constraint_X_max_time_window_us(ro): Maximum allowed time window in micro seconds.
213
214constraint_X_min_time_window_us(ro): Minimum allowed time window in micro seconds.
215
216Except power_limit_uw and time_window_us other fields are optional.
217
218Common zone and control type attributes
219----------------------------------------
220enabled (rw): Enable/Disable controls at zone level or for all zones using
221a control type.
222
223Power Cap Client Driver Interface
224==================================
225The API summary:
226
227Call powercap_register_control_type() to register control type object.
228Call powercap_register_zone() to register a power zone (under a given
229control type), either as a top-level power zone or as a subzone of another
230power zone registered earlier.
231The number of constraints in a power zone and the corresponding callbacks have
232to be defined prior to calling powercap_register_zone() to register that zone.
233
234To Free a power zone call powercap_unregister_zone().
235To free a control type object call powercap_unregister_control_type().
236Detailed API can be generated using kernel-doc on include/linux/powercap.h.
diff --git a/Documentation/power/regulator/consumer.txt b/Documentation/power/regulator/consumer.rst
index e51564c1a140..0cd8cc1275a7 100644
--- a/Documentation/power/regulator/consumer.txt
+++ b/Documentation/power/regulator/consumer.rst
@@ -1,3 +1,4 @@
1===================================
1Regulator Consumer Driver Interface 2Regulator Consumer Driver Interface
2=================================== 3===================================
3 4
@@ -8,73 +9,77 @@ Please see overview.txt for a description of the terms used in this text.
81. Consumer Regulator Access (static & dynamic drivers) 91. Consumer Regulator Access (static & dynamic drivers)
9======================================================= 10=======================================================
10 11
11A consumer driver can get access to its supply regulator by calling :- 12A consumer driver can get access to its supply regulator by calling ::
12 13
13regulator = regulator_get(dev, "Vcc"); 14 regulator = regulator_get(dev, "Vcc");
14 15
15The consumer passes in its struct device pointer and power supply ID. The core 16The consumer passes in its struct device pointer and power supply ID. The core
16then finds the correct regulator by consulting a machine specific lookup table. 17then finds the correct regulator by consulting a machine specific lookup table.
17If the lookup is successful then this call will return a pointer to the struct 18If the lookup is successful then this call will return a pointer to the struct
18regulator that supplies this consumer. 19regulator that supplies this consumer.
19 20
20To release the regulator the consumer driver should call :- 21To release the regulator the consumer driver should call ::
21 22
22regulator_put(regulator); 23 regulator_put(regulator);
23 24
24Consumers can be supplied by more than one regulator e.g. codec consumer with 25Consumers can be supplied by more than one regulator e.g. codec consumer with
25analog and digital supplies :- 26analog and digital supplies ::
26 27
27digital = regulator_get(dev, "Vcc"); /* digital core */ 28 digital = regulator_get(dev, "Vcc"); /* digital core */
28analog = regulator_get(dev, "Avdd"); /* analog */ 29 analog = regulator_get(dev, "Avdd"); /* analog */
29 30
30The regulator access functions regulator_get() and regulator_put() will 31The regulator access functions regulator_get() and regulator_put() will
31usually be called in your device drivers probe() and remove() respectively. 32usually be called in your device drivers probe() and remove() respectively.
32 33
33 34
342. Regulator Output Enable & Disable (static & dynamic drivers) 352. Regulator Output Enable & Disable (static & dynamic drivers)
35==================================================================== 36===============================================================
37
36 38
37A consumer can enable its power supply by calling:- 39A consumer can enable its power supply by calling::
38 40
39int regulator_enable(regulator); 41 int regulator_enable(regulator);
40 42
41NOTE: The supply may already be enabled before regulator_enabled() is called. 43NOTE:
42This may happen if the consumer shares the regulator or the regulator has been 44 The supply may already be enabled before regulator_enabled() is called.
43previously enabled by bootloader or kernel board initialization code. 45 This may happen if the consumer shares the regulator or the regulator has been
46 previously enabled by bootloader or kernel board initialization code.
44 47
45A consumer can determine if a regulator is enabled by calling :- 48A consumer can determine if a regulator is enabled by calling::
46 49
47int regulator_is_enabled(regulator); 50 int regulator_is_enabled(regulator);
48 51
49This will return > zero when the regulator is enabled. 52This will return > zero when the regulator is enabled.
50 53
51 54
52A consumer can disable its supply when no longer needed by calling :- 55A consumer can disable its supply when no longer needed by calling::
53 56
54int regulator_disable(regulator); 57 int regulator_disable(regulator);
55 58
56NOTE: This may not disable the supply if it's shared with other consumers. The 59NOTE:
57regulator will only be disabled when the enabled reference count is zero. 60 This may not disable the supply if it's shared with other consumers. The
61 regulator will only be disabled when the enabled reference count is zero.
58 62
59Finally, a regulator can be forcefully disabled in the case of an emergency :- 63Finally, a regulator can be forcefully disabled in the case of an emergency::
60 64
61int regulator_force_disable(regulator); 65 int regulator_force_disable(regulator);
62 66
63NOTE: this will immediately and forcefully shutdown the regulator output. All 67NOTE:
64consumers will be powered off. 68 this will immediately and forcefully shutdown the regulator output. All
69 consumers will be powered off.
65 70
66 71
673. Regulator Voltage Control & Status (dynamic drivers) 723. Regulator Voltage Control & Status (dynamic drivers)
68====================================================== 73=======================================================
69 74
70Some consumer drivers need to be able to dynamically change their supply 75Some consumer drivers need to be able to dynamically change their supply
71voltage to match system operating points. e.g. CPUfreq drivers can scale 76voltage to match system operating points. e.g. CPUfreq drivers can scale
72voltage along with frequency to save power, SD drivers may need to select the 77voltage along with frequency to save power, SD drivers may need to select the
73correct card voltage, etc. 78correct card voltage, etc.
74 79
75Consumers can control their supply voltage by calling :- 80Consumers can control their supply voltage by calling::
76 81
77int regulator_set_voltage(regulator, min_uV, max_uV); 82 int regulator_set_voltage(regulator, min_uV, max_uV);
78 83
79Where min_uV and max_uV are the minimum and maximum acceptable voltages in 84Where min_uV and max_uV are the minimum and maximum acceptable voltages in
80microvolts. 85microvolts.
@@ -84,47 +89,50 @@ when enabled, then the voltage changes instantly, otherwise the voltage
84configuration changes and the voltage is physically set when the regulator is 89configuration changes and the voltage is physically set when the regulator is
85next enabled. 90next enabled.
86 91
87The regulators configured voltage output can be found by calling :- 92The regulators configured voltage output can be found by calling::
88 93
89int regulator_get_voltage(regulator); 94 int regulator_get_voltage(regulator);
90 95
91NOTE: get_voltage() will return the configured output voltage whether the 96NOTE:
92regulator is enabled or disabled and should NOT be used to determine regulator 97 get_voltage() will return the configured output voltage whether the
93output state. However this can be used in conjunction with is_enabled() to 98 regulator is enabled or disabled and should NOT be used to determine regulator
94determine the regulator physical output voltage. 99 output state. However this can be used in conjunction with is_enabled() to
100 determine the regulator physical output voltage.
95 101
96 102
974. Regulator Current Limit Control & Status (dynamic drivers) 1034. Regulator Current Limit Control & Status (dynamic drivers)
98=========================================================== 104=============================================================
99 105
100Some consumer drivers need to be able to dynamically change their supply 106Some consumer drivers need to be able to dynamically change their supply
101current limit to match system operating points. e.g. LCD backlight driver can 107current limit to match system operating points. e.g. LCD backlight driver can
102change the current limit to vary the backlight brightness, USB drivers may want 108change the current limit to vary the backlight brightness, USB drivers may want
103to set the limit to 500mA when supplying power. 109to set the limit to 500mA when supplying power.
104 110
105Consumers can control their supply current limit by calling :- 111Consumers can control their supply current limit by calling::
106 112
107int regulator_set_current_limit(regulator, min_uA, max_uA); 113 int regulator_set_current_limit(regulator, min_uA, max_uA);
108 114
109Where min_uA and max_uA are the minimum and maximum acceptable current limit in 115Where min_uA and max_uA are the minimum and maximum acceptable current limit in
110microamps. 116microamps.
111 117
112NOTE: this can be called when the regulator is enabled or disabled. If called 118NOTE:
113when enabled, then the current limit changes instantly, otherwise the current 119 this can be called when the regulator is enabled or disabled. If called
114limit configuration changes and the current limit is physically set when the 120 when enabled, then the current limit changes instantly, otherwise the current
115regulator is next enabled. 121 limit configuration changes and the current limit is physically set when the
122 regulator is next enabled.
116 123
117A regulators current limit can be found by calling :- 124A regulators current limit can be found by calling::
118 125
119int regulator_get_current_limit(regulator); 126 int regulator_get_current_limit(regulator);
120 127
121NOTE: get_current_limit() will return the current limit whether the regulator 128NOTE:
122is enabled or disabled and should not be used to determine regulator current 129 get_current_limit() will return the current limit whether the regulator
123load. 130 is enabled or disabled and should not be used to determine regulator current
131 load.
124 132
125 133
1265. Regulator Operating Mode Control & Status (dynamic drivers) 1345. Regulator Operating Mode Control & Status (dynamic drivers)
127============================================================= 135==============================================================
128 136
129Some consumers can further save system power by changing the operating mode of 137Some consumers can further save system power by changing the operating mode of
130their supply regulator to be more efficient when the consumers operating state 138their supply regulator to be more efficient when the consumers operating state
@@ -135,9 +143,9 @@ Regulator operating mode can be changed indirectly or directly.
135Indirect operating mode control. 143Indirect operating mode control.
136-------------------------------- 144--------------------------------
137Consumer drivers can request a change in their supply regulator operating mode 145Consumer drivers can request a change in their supply regulator operating mode
138by calling :- 146by calling::
139 147
140int regulator_set_load(struct regulator *regulator, int load_uA); 148 int regulator_set_load(struct regulator *regulator, int load_uA);
141 149
142This will cause the core to recalculate the total load on the regulator (based 150This will cause the core to recalculate the total load on the regulator (based
143on all its consumers) and change operating mode (if necessary and permitted) 151on all its consumers) and change operating mode (if necessary and permitted)
@@ -153,12 +161,13 @@ consumers.
153 161
154Direct operating mode control. 162Direct operating mode control.
155------------------------------ 163------------------------------
164
156Bespoke or tightly coupled drivers may want to directly control regulator 165Bespoke or tightly coupled drivers may want to directly control regulator
157operating mode depending on their operating point. This can be achieved by 166operating mode depending on their operating point. This can be achieved by
158calling :- 167calling::
159 168
160int regulator_set_mode(struct regulator *regulator, unsigned int mode); 169 int regulator_set_mode(struct regulator *regulator, unsigned int mode);
161unsigned int regulator_get_mode(struct regulator *regulator); 170 unsigned int regulator_get_mode(struct regulator *regulator);
162 171
163Direct mode will only be used by consumers that *know* about the regulator and 172Direct mode will only be used by consumers that *know* about the regulator and
164are not sharing the regulator with other consumers. 173are not sharing the regulator with other consumers.
@@ -166,24 +175,26 @@ are not sharing the regulator with other consumers.
166 175
1676. Regulator Events 1766. Regulator Events
168=================== 177===================
178
169Regulators can notify consumers of external events. Events could be received by 179Regulators can notify consumers of external events. Events could be received by
170consumers under regulator stress or failure conditions. 180consumers under regulator stress or failure conditions.
171 181
172Consumers can register interest in regulator events by calling :- 182Consumers can register interest in regulator events by calling::
173 183
174int regulator_register_notifier(struct regulator *regulator, 184 int regulator_register_notifier(struct regulator *regulator,
175 struct notifier_block *nb); 185 struct notifier_block *nb);
176 186
177Consumers can unregister interest by calling :- 187Consumers can unregister interest by calling::
178 188
179int regulator_unregister_notifier(struct regulator *regulator, 189 int regulator_unregister_notifier(struct regulator *regulator,
180 struct notifier_block *nb); 190 struct notifier_block *nb);
181 191
182Regulators use the kernel notifier framework to send event to their interested 192Regulators use the kernel notifier framework to send event to their interested
183consumers. 193consumers.
184 194
1857. Regulator Direct Register Access 1957. Regulator Direct Register Access
186=================================== 196===================================
197
187Some kinds of power management hardware or firmware are designed such that 198Some kinds of power management hardware or firmware are designed such that
188they need to do low-level hardware access to regulators, with no involvement 199they need to do low-level hardware access to regulators, with no involvement
189from the kernel. Examples of such devices are: 200from the kernel. Examples of such devices are:
@@ -199,20 +210,20 @@ to it. The regulator framework provides the following helpers for querying
199these details. 210these details.
200 211
201Bus-specific details, like I2C addresses or transfer rates are handled by the 212Bus-specific details, like I2C addresses or transfer rates are handled by the
202regmap framework. To get the regulator's regmap (if supported), use :- 213regmap framework. To get the regulator's regmap (if supported), use::
203 214
204struct regmap *regulator_get_regmap(struct regulator *regulator); 215 struct regmap *regulator_get_regmap(struct regulator *regulator);
205 216
206To obtain the hardware register offset and bitmask for the regulator's voltage 217To obtain the hardware register offset and bitmask for the regulator's voltage
207selector register, use :- 218selector register, use::
208 219
209int regulator_get_hardware_vsel_register(struct regulator *regulator, 220 int regulator_get_hardware_vsel_register(struct regulator *regulator,
210 unsigned *vsel_reg, 221 unsigned *vsel_reg,
211 unsigned *vsel_mask); 222 unsigned *vsel_mask);
212 223
213To convert a regulator framework voltage selector code (used by 224To convert a regulator framework voltage selector code (used by
214regulator_list_voltage) to a hardware-specific voltage selector that can be 225regulator_list_voltage) to a hardware-specific voltage selector that can be
215directly written to the voltage selector register, use :- 226directly written to the voltage selector register, use::
216 227
217int regulator_list_hardware_vsel(struct regulator *regulator, 228 int regulator_list_hardware_vsel(struct regulator *regulator,
218 unsigned selector); 229 unsigned selector);
diff --git a/Documentation/power/regulator/design.txt b/Documentation/power/regulator/design.rst
index fdd919b96830..3b09c6841dc4 100644
--- a/Documentation/power/regulator/design.txt
+++ b/Documentation/power/regulator/design.rst
@@ -1,3 +1,4 @@
1==========================
1Regulator API design notes 2Regulator API design notes
2========================== 3==========================
3 4
@@ -14,7 +15,9 @@ Safety
14 have different power requirements, and not all components with power 15 have different power requirements, and not all components with power
15 requirements are visible to software. 16 requirements are visible to software.
16 17
17 => The API should make no changes to the hardware state unless it has 18.. note::
19
20 The API should make no changes to the hardware state unless it has
18 specific knowledge that these changes are safe to perform on this 21 specific knowledge that these changes are safe to perform on this
19 particular system. 22 particular system.
20 23
@@ -28,6 +31,8 @@ Consumer use cases
28 - Many of the power supplies in the system will be shared between many 31 - Many of the power supplies in the system will be shared between many
29 different consumers. 32 different consumers.
30 33
31 => The consumer API should be structured so that these use cases are 34.. note::
35
36 The consumer API should be structured so that these use cases are
32 very easy to handle and so that consumers will work with shared 37 very easy to handle and so that consumers will work with shared
33 supplies without any additional effort. 38 supplies without any additional effort.
diff --git a/Documentation/power/regulator/machine.txt b/Documentation/power/regulator/machine.rst
index eff4dcaaa252..22fffefaa3ad 100644
--- a/Documentation/power/regulator/machine.txt
+++ b/Documentation/power/regulator/machine.rst
@@ -1,10 +1,11 @@
1==================================
1Regulator Machine Driver Interface 2Regulator Machine Driver Interface
2=================================== 3==================================
3 4
4The regulator machine driver interface is intended for board/machine specific 5The regulator machine driver interface is intended for board/machine specific
5initialisation code to configure the regulator subsystem. 6initialisation code to configure the regulator subsystem.
6 7
7Consider the following machine :- 8Consider the following machine::
8 9
9 Regulator-1 -+-> Regulator-2 --> [Consumer A @ 1.8 - 2.0V] 10 Regulator-1 -+-> Regulator-2 --> [Consumer A @ 1.8 - 2.0V]
10 | 11 |
@@ -13,31 +14,31 @@ Consider the following machine :-
13The drivers for consumers A & B must be mapped to the correct regulator in 14The drivers for consumers A & B must be mapped to the correct regulator in
14order to control their power supplies. This mapping can be achieved in machine 15order to control their power supplies. This mapping can be achieved in machine
15initialisation code by creating a struct regulator_consumer_supply for 16initialisation code by creating a struct regulator_consumer_supply for
16each regulator. 17each regulator::
17 18
18struct regulator_consumer_supply { 19 struct regulator_consumer_supply {
19 const char *dev_name; /* consumer dev_name() */ 20 const char *dev_name; /* consumer dev_name() */
20 const char *supply; /* consumer supply - e.g. "vcc" */ 21 const char *supply; /* consumer supply - e.g. "vcc" */
21}; 22 };
22 23
23e.g. for the machine above 24e.g. for the machine above::
24 25
25static struct regulator_consumer_supply regulator1_consumers[] = { 26 static struct regulator_consumer_supply regulator1_consumers[] = {
26 REGULATOR_SUPPLY("Vcc", "consumer B"), 27 REGULATOR_SUPPLY("Vcc", "consumer B"),
27}; 28 };
28 29
29static struct regulator_consumer_supply regulator2_consumers[] = { 30 static struct regulator_consumer_supply regulator2_consumers[] = {
30 REGULATOR_SUPPLY("Vcc", "consumer A"), 31 REGULATOR_SUPPLY("Vcc", "consumer A"),
31}; 32 };
32 33
33This maps Regulator-1 to the 'Vcc' supply for Consumer B and maps Regulator-2 34This maps Regulator-1 to the 'Vcc' supply for Consumer B and maps Regulator-2
34to the 'Vcc' supply for Consumer A. 35to the 'Vcc' supply for Consumer A.
35 36
36Constraints can now be registered by defining a struct regulator_init_data 37Constraints can now be registered by defining a struct regulator_init_data
37for each regulator power domain. This structure also maps the consumers 38for each regulator power domain. This structure also maps the consumers
38to their supply regulators :- 39to their supply regulators::
39 40
40static struct regulator_init_data regulator1_data = { 41 static struct regulator_init_data regulator1_data = {
41 .constraints = { 42 .constraints = {
42 .name = "Regulator-1", 43 .name = "Regulator-1",
43 .min_uV = 3300000, 44 .min_uV = 3300000,
@@ -46,7 +47,7 @@ static struct regulator_init_data regulator1_data = {
46 }, 47 },
47 .num_consumer_supplies = ARRAY_SIZE(regulator1_consumers), 48 .num_consumer_supplies = ARRAY_SIZE(regulator1_consumers),
48 .consumer_supplies = regulator1_consumers, 49 .consumer_supplies = regulator1_consumers,
49}; 50 };
50 51
51The name field should be set to something that is usefully descriptive 52The name field should be set to something that is usefully descriptive
52for the board for configuration of supplies for other regulators and 53for the board for configuration of supplies for other regulators and
@@ -57,9 +58,9 @@ name is provided then the subsystem will choose one.
57Regulator-1 supplies power to Regulator-2. This relationship must be registered 58Regulator-1 supplies power to Regulator-2. This relationship must be registered
58with the core so that Regulator-1 is also enabled when Consumer A enables its 59with the core so that Regulator-1 is also enabled when Consumer A enables its
59supply (Regulator-2). The supply regulator is set by the supply_regulator 60supply (Regulator-2). The supply regulator is set by the supply_regulator
60field below and co:- 61field below and co::
61 62
62static struct regulator_init_data regulator2_data = { 63 static struct regulator_init_data regulator2_data = {
63 .supply_regulator = "Regulator-1", 64 .supply_regulator = "Regulator-1",
64 .constraints = { 65 .constraints = {
65 .min_uV = 1800000, 66 .min_uV = 1800000,
@@ -69,11 +70,11 @@ static struct regulator_init_data regulator2_data = {
69 }, 70 },
70 .num_consumer_supplies = ARRAY_SIZE(regulator2_consumers), 71 .num_consumer_supplies = ARRAY_SIZE(regulator2_consumers),
71 .consumer_supplies = regulator2_consumers, 72 .consumer_supplies = regulator2_consumers,
72}; 73 };
73 74
74Finally the regulator devices must be registered in the usual manner. 75Finally the regulator devices must be registered in the usual manner::
75 76
76static struct platform_device regulator_devices[] = { 77 static struct platform_device regulator_devices[] = {
77 { 78 {
78 .name = "regulator", 79 .name = "regulator",
79 .id = DCDC_1, 80 .id = DCDC_1,
@@ -88,9 +89,9 @@ static struct platform_device regulator_devices[] = {
88 .platform_data = &regulator2_data, 89 .platform_data = &regulator2_data,
89 }, 90 },
90 }, 91 },
91}; 92 };
92/* register regulator 1 device */ 93 /* register regulator 1 device */
93platform_device_register(&regulator_devices[0]); 94 platform_device_register(&regulator_devices[0]);
94 95
95/* register regulator 2 device */ 96 /* register regulator 2 device */
96platform_device_register(&regulator_devices[1]); 97 platform_device_register(&regulator_devices[1]);
diff --git a/Documentation/power/regulator/overview.txt b/Documentation/power/regulator/overview.rst
index 721b4739ec32..ee494c70a7c4 100644
--- a/Documentation/power/regulator/overview.txt
+++ b/Documentation/power/regulator/overview.rst
@@ -1,3 +1,4 @@
1=============================================
1Linux voltage and current regulator framework 2Linux voltage and current regulator framework
2============================================= 3=============================================
3 4
@@ -13,26 +14,30 @@ regulators (where voltage output is controllable) and current sinks (where
13current limit is controllable). 14current limit is controllable).
14 15
15(C) 2008 Wolfson Microelectronics PLC. 16(C) 2008 Wolfson Microelectronics PLC.
17
16Author: Liam Girdwood <lrg@slimlogic.co.uk> 18Author: Liam Girdwood <lrg@slimlogic.co.uk>
17 19
18 20
19Nomenclature 21Nomenclature
20============ 22============
21 23
22Some terms used in this document:- 24Some terms used in this document:
23 25
24 o Regulator - Electronic device that supplies power to other devices. 26 - Regulator
27 - Electronic device that supplies power to other devices.
25 Most regulators can enable and disable their output while 28 Most regulators can enable and disable their output while
26 some can control their output voltage and or current. 29 some can control their output voltage and or current.
27 30
28 Input Voltage -> Regulator -> Output Voltage 31 Input Voltage -> Regulator -> Output Voltage
29 32
30 33
31 o PMIC - Power Management IC. An IC that contains numerous regulators 34 - PMIC
32 and often contains other subsystems. 35 - Power Management IC. An IC that contains numerous
36 regulators and often contains other subsystems.
33 37
34 38
35 o Consumer - Electronic device that is supplied power by a regulator. 39 - Consumer
40 - Electronic device that is supplied power by a regulator.
36 Consumers can be classified into two types:- 41 Consumers can be classified into two types:-
37 42
38 Static: consumer does not change its supply voltage or 43 Static: consumer does not change its supply voltage or
@@ -44,46 +49,48 @@ Some terms used in this document:-
44 current limit to meet operation demands. 49 current limit to meet operation demands.
45 50
46 51
47 o Power Domain - Electronic circuit that is supplied its input power by the 52 - Power Domain
53 - Electronic circuit that is supplied its input power by the
48 output power of a regulator, switch or by another power 54 output power of a regulator, switch or by another power
49 domain. 55 domain.
50 56
51 The supply regulator may be behind a switch(s). i.e. 57 The supply regulator may be behind a switch(s). i.e.::
52 58
53 Regulator -+-> Switch-1 -+-> Switch-2 --> [Consumer A] 59 Regulator -+-> Switch-1 -+-> Switch-2 --> [Consumer A]
54 | | 60 | |
55 | +-> [Consumer B], [Consumer C] 61 | +-> [Consumer B], [Consumer C]
56 | 62 |
57 +-> [Consumer D], [Consumer E] 63 +-> [Consumer D], [Consumer E]
58 64
59 That is one regulator and three power domains: 65 That is one regulator and three power domains:
60 66
61 Domain 1: Switch-1, Consumers D & E. 67 - Domain 1: Switch-1, Consumers D & E.
62 Domain 2: Switch-2, Consumers B & C. 68 - Domain 2: Switch-2, Consumers B & C.
63 Domain 3: Consumer A. 69 - Domain 3: Consumer A.
64 70
65 and this represents a "supplies" relationship: 71 and this represents a "supplies" relationship:
66 72
67 Domain-1 --> Domain-2 --> Domain-3. 73 Domain-1 --> Domain-2 --> Domain-3.
68 74
69 A power domain may have regulators that are supplied power 75 A power domain may have regulators that are supplied power
70 by other regulators. i.e. 76 by other regulators. i.e.::
71 77
72 Regulator-1 -+-> Regulator-2 -+-> [Consumer A] 78 Regulator-1 -+-> Regulator-2 -+-> [Consumer A]
73 | 79 |
74 +-> [Consumer B] 80 +-> [Consumer B]
75 81
76 This gives us two regulators and two power domains: 82 This gives us two regulators and two power domains:
77 83
78 Domain 1: Regulator-2, Consumer B. 84 - Domain 1: Regulator-2, Consumer B.
79 Domain 2: Consumer A. 85 - Domain 2: Consumer A.
80 86
81 and a "supplies" relationship: 87 and a "supplies" relationship:
82 88
83 Domain-1 --> Domain-2 89 Domain-1 --> Domain-2
84 90
85 91
86 o Constraints - Constraints are used to define power levels for performance 92 - Constraints
93 - Constraints are used to define power levels for performance
87 and hardware protection. Constraints exist at three levels: 94 and hardware protection. Constraints exist at three levels:
88 95
89 Regulator Level: This is defined by the regulator hardware 96 Regulator Level: This is defined by the regulator hardware
@@ -141,7 +148,7 @@ relevant to non SoC devices and is split into the following four interfaces:-
141 limit. This also compiles out if not in use so drivers can be reused in 148 limit. This also compiles out if not in use so drivers can be reused in
142 systems with no regulator based power control. 149 systems with no regulator based power control.
143 150
144 See Documentation/power/regulator/consumer.txt 151 See Documentation/power/regulator/consumer.rst
145 152
146 2. Regulator driver interface. 153 2. Regulator driver interface.
147 154
@@ -149,7 +156,7 @@ relevant to non SoC devices and is split into the following four interfaces:-
149 operations to the core. It also has a notifier call chain for propagating 156 operations to the core. It also has a notifier call chain for propagating
150 regulator events to clients. 157 regulator events to clients.
151 158
152 See Documentation/power/regulator/regulator.txt 159 See Documentation/power/regulator/regulator.rst
153 160
154 3. Machine interface. 161 3. Machine interface.
155 162
@@ -160,7 +167,7 @@ relevant to non SoC devices and is split into the following four interfaces:-
160 allows the creation of a regulator tree whereby some regulators are 167 allows the creation of a regulator tree whereby some regulators are
161 supplied by others (similar to a clock tree). 168 supplied by others (similar to a clock tree).
162 169
163 See Documentation/power/regulator/machine.txt 170 See Documentation/power/regulator/machine.rst
164 171
165 4. Userspace ABI. 172 4. Userspace ABI.
166 173
diff --git a/Documentation/power/regulator/regulator.rst b/Documentation/power/regulator/regulator.rst
new file mode 100644
index 000000000000..794b3256fbb9
--- /dev/null
+++ b/Documentation/power/regulator/regulator.rst
@@ -0,0 +1,32 @@
1==========================
2Regulator Driver Interface
3==========================
4
5The regulator driver interface is relatively simple and designed to allow
6regulator drivers to register their services with the core framework.
7
8
9Registration
10============
11
12Drivers can register a regulator by calling::
13
14 struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
15 const struct regulator_config *config);
16
17This will register the regulator's capabilities and operations to the regulator
18core.
19
20Regulators can be unregistered by calling::
21
22 void regulator_unregister(struct regulator_dev *rdev);
23
24
25Regulator Events
26================
27
28Regulators can send events (e.g. overtemperature, undervoltage, etc) to
29consumer drivers by calling::
30
31 int regulator_notifier_call_chain(struct regulator_dev *rdev,
32 unsigned long event, void *data);
diff --git a/Documentation/power/regulator/regulator.txt b/Documentation/power/regulator/regulator.txt
deleted file mode 100644
index b17e5833ce21..000000000000
--- a/Documentation/power/regulator/regulator.txt
+++ /dev/null
@@ -1,30 +0,0 @@
1Regulator Driver Interface
2==========================
3
4The regulator driver interface is relatively simple and designed to allow
5regulator drivers to register their services with the core framework.
6
7
8Registration
9============
10
11Drivers can register a regulator by calling :-
12
13struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
14 const struct regulator_config *config);
15
16This will register the regulator's capabilities and operations to the regulator
17core.
18
19Regulators can be unregistered by calling :-
20
21void regulator_unregister(struct regulator_dev *rdev);
22
23
24Regulator Events
25================
26Regulators can send events (e.g. overtemperature, undervoltage, etc) to
27consumer drivers by calling :-
28
29int regulator_notifier_call_chain(struct regulator_dev *rdev,
30 unsigned long event, void *data);
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.rst
index 937e33c46211..2c2ec99b5088 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.rst
@@ -1,10 +1,15 @@
1==================================================
1Runtime Power Management Framework for I/O Devices 2Runtime Power Management Framework for I/O Devices
3==================================================
2 4
3(C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. 5(C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
6
4(C) 2010 Alan Stern <stern@rowland.harvard.edu> 7(C) 2010 Alan Stern <stern@rowland.harvard.edu>
8
5(C) 2014 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com> 9(C) 2014 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
6 10
71. Introduction 111. Introduction
12===============
8 13
9Support for runtime power management (runtime PM) of I/O devices is provided 14Support for runtime power management (runtime PM) of I/O devices is provided
10at the power management core (PM core) level by means of: 15at the power management core (PM core) level by means of:
@@ -33,16 +38,17 @@ fields of 'struct dev_pm_info' and the core helper functions provided for
33runtime PM are described below. 38runtime PM are described below.
34 39
352. Device Runtime PM Callbacks 402. Device Runtime PM Callbacks
41==============================
36 42
37There are three device runtime PM callbacks defined in 'struct dev_pm_ops': 43There are three device runtime PM callbacks defined in 'struct dev_pm_ops'::
38 44
39struct dev_pm_ops { 45 struct dev_pm_ops {
40 ... 46 ...
41 int (*runtime_suspend)(struct device *dev); 47 int (*runtime_suspend)(struct device *dev);
42 int (*runtime_resume)(struct device *dev); 48 int (*runtime_resume)(struct device *dev);
43 int (*runtime_idle)(struct device *dev); 49 int (*runtime_idle)(struct device *dev);
44 ... 50 ...
45}; 51 };
46 52
47The ->runtime_suspend(), ->runtime_resume() and ->runtime_idle() callbacks 53The ->runtime_suspend(), ->runtime_resume() and ->runtime_idle() callbacks
48are executed by the PM core for the device's subsystem that may be either of 54are executed by the PM core for the device's subsystem that may be either of
@@ -112,7 +118,7 @@ low-power state during the execution of the suspend callback, it is expected
112that remote wakeup will be enabled for the device. Generally, remote wakeup 118that remote wakeup will be enabled for the device. Generally, remote wakeup
113should be enabled for all input devices put into low-power states at run time. 119should be enabled for all input devices put into low-power states at run time.
114 120
115The subsystem-level resume callback, if present, is _entirely_ _responsible_ for 121The subsystem-level resume callback, if present, is **entirely responsible** for
116handling the resume of the device as appropriate, which may, but need not 122handling the resume of the device as appropriate, which may, but need not
117include executing the device driver's own ->runtime_resume() callback (from the 123include executing the device driver's own ->runtime_resume() callback (from the
118PM core's point of view it is not necessary to implement a ->runtime_resume() 124PM core's point of view it is not necessary to implement a ->runtime_resume()
@@ -197,95 +203,96 @@ rules:
197 except for scheduled autosuspends. 203 except for scheduled autosuspends.
198 204
1993. Runtime PM Device Fields 2053. Runtime PM Device Fields
206===========================
200 207
201The following device runtime PM fields are present in 'struct dev_pm_info', as 208The following device runtime PM fields are present in 'struct dev_pm_info', as
202defined in include/linux/pm.h: 209defined in include/linux/pm.h:
203 210
204 struct timer_list suspend_timer; 211 `struct timer_list suspend_timer;`
205 - timer used for scheduling (delayed) suspend and autosuspend requests 212 - timer used for scheduling (delayed) suspend and autosuspend requests
206 213
207 unsigned long timer_expires; 214 `unsigned long timer_expires;`
208 - timer expiration time, in jiffies (if this is different from zero, the 215 - timer expiration time, in jiffies (if this is different from zero, the
209 timer is running and will expire at that time, otherwise the timer is not 216 timer is running and will expire at that time, otherwise the timer is not
210 running) 217 running)
211 218
212 struct work_struct work; 219 `struct work_struct work;`
213 - work structure used for queuing up requests (i.e. work items in pm_wq) 220 - work structure used for queuing up requests (i.e. work items in pm_wq)
214 221
215 wait_queue_head_t wait_queue; 222 `wait_queue_head_t wait_queue;`
216 - wait queue used if any of the helper functions needs to wait for another 223 - wait queue used if any of the helper functions needs to wait for another
217 one to complete 224 one to complete
218 225
219 spinlock_t lock; 226 `spinlock_t lock;`
220 - lock used for synchronization 227 - lock used for synchronization
221 228
222 atomic_t usage_count; 229 `atomic_t usage_count;`
223 - the usage counter of the device 230 - the usage counter of the device
224 231
225 atomic_t child_count; 232 `atomic_t child_count;`
226 - the count of 'active' children of the device 233 - the count of 'active' children of the device
227 234
228 unsigned int ignore_children; 235 `unsigned int ignore_children;`
229 - if set, the value of child_count is ignored (but still updated) 236 - if set, the value of child_count is ignored (but still updated)
230 237
231 unsigned int disable_depth; 238 `unsigned int disable_depth;`
232 - used for disabling the helper functions (they work normally if this is 239 - used for disabling the helper functions (they work normally if this is
233 equal to zero); the initial value of it is 1 (i.e. runtime PM is 240 equal to zero); the initial value of it is 1 (i.e. runtime PM is
234 initially disabled for all devices) 241 initially disabled for all devices)
235 242
236 int runtime_error; 243 `int runtime_error;`
237 - if set, there was a fatal error (one of the callbacks returned error code 244 - if set, there was a fatal error (one of the callbacks returned error code
238 as described in Section 2), so the helper functions will not work until 245 as described in Section 2), so the helper functions will not work until
239 this flag is cleared; this is the error code returned by the failing 246 this flag is cleared; this is the error code returned by the failing
240 callback 247 callback
241 248
242 unsigned int idle_notification; 249 `unsigned int idle_notification;`
243 - if set, ->runtime_idle() is being executed 250 - if set, ->runtime_idle() is being executed
244 251
245 unsigned int request_pending; 252 `unsigned int request_pending;`
246 - if set, there's a pending request (i.e. a work item queued up into pm_wq) 253 - if set, there's a pending request (i.e. a work item queued up into pm_wq)
247 254
248 enum rpm_request request; 255 `enum rpm_request request;`
249 - type of request that's pending (valid if request_pending is set) 256 - type of request that's pending (valid if request_pending is set)
250 257
251 unsigned int deferred_resume; 258 `unsigned int deferred_resume;`
252 - set if ->runtime_resume() is about to be run while ->runtime_suspend() is 259 - set if ->runtime_resume() is about to be run while ->runtime_suspend() is
253 being executed for that device and it is not practical to wait for the 260 being executed for that device and it is not practical to wait for the
254 suspend to complete; means "start a resume as soon as you've suspended" 261 suspend to complete; means "start a resume as soon as you've suspended"
255 262
256 enum rpm_status runtime_status; 263 `enum rpm_status runtime_status;`
257 - the runtime PM status of the device; this field's initial value is 264 - the runtime PM status of the device; this field's initial value is
258 RPM_SUSPENDED, which means that each device is initially regarded by the 265 RPM_SUSPENDED, which means that each device is initially regarded by the
259 PM core as 'suspended', regardless of its real hardware status 266 PM core as 'suspended', regardless of its real hardware status
260 267
261 unsigned int runtime_auto; 268 `unsigned int runtime_auto;`
262 - if set, indicates that the user space has allowed the device driver to 269 - if set, indicates that the user space has allowed the device driver to
263 power manage the device at run time via the /sys/devices/.../power/control 270 power manage the device at run time via the /sys/devices/.../power/control
264 interface; it may only be modified with the help of the pm_runtime_allow() 271 `interface;` it may only be modified with the help of the pm_runtime_allow()
265 and pm_runtime_forbid() helper functions 272 and pm_runtime_forbid() helper functions
266 273
267 unsigned int no_callbacks; 274 `unsigned int no_callbacks;`
268 - indicates that the device does not use the runtime PM callbacks (see 275 - indicates that the device does not use the runtime PM callbacks (see
269 Section 8); it may be modified only by the pm_runtime_no_callbacks() 276 Section 8); it may be modified only by the pm_runtime_no_callbacks()
270 helper function 277 helper function
271 278
272 unsigned int irq_safe; 279 `unsigned int irq_safe;`
273 - indicates that the ->runtime_suspend() and ->runtime_resume() callbacks 280 - indicates that the ->runtime_suspend() and ->runtime_resume() callbacks
274 will be invoked with the spinlock held and interrupts disabled 281 will be invoked with the spinlock held and interrupts disabled
275 282
276 unsigned int use_autosuspend; 283 `unsigned int use_autosuspend;`
277 - indicates that the device's driver supports delayed autosuspend (see 284 - indicates that the device's driver supports delayed autosuspend (see
278 Section 9); it may be modified only by the 285 Section 9); it may be modified only by the
279 pm_runtime{_dont}_use_autosuspend() helper functions 286 pm_runtime{_dont}_use_autosuspend() helper functions
280 287
281 unsigned int timer_autosuspends; 288 `unsigned int timer_autosuspends;`
282 - indicates that the PM core should attempt to carry out an autosuspend 289 - indicates that the PM core should attempt to carry out an autosuspend
283 when the timer expires rather than a normal suspend 290 when the timer expires rather than a normal suspend
284 291
285 int autosuspend_delay; 292 `int autosuspend_delay;`
286 - the delay time (in milliseconds) to be used for autosuspend 293 - the delay time (in milliseconds) to be used for autosuspend
287 294
288 unsigned long last_busy; 295 `unsigned long last_busy;`
289 - the time (in jiffies) when the pm_runtime_mark_last_busy() helper 296 - the time (in jiffies) when the pm_runtime_mark_last_busy() helper
290 function was last called for this device; used in calculating inactivity 297 function was last called for this device; used in calculating inactivity
291 periods for autosuspend 298 periods for autosuspend
@@ -293,37 +300,38 @@ defined in include/linux/pm.h:
293All of the above fields are members of the 'power' member of 'struct device'. 300All of the above fields are members of the 'power' member of 'struct device'.
294 301
2954. Runtime PM Device Helper Functions 3024. Runtime PM Device Helper Functions
303=====================================
296 304
297The following runtime PM helper functions are defined in 305The following runtime PM helper functions are defined in
298drivers/base/power/runtime.c and include/linux/pm_runtime.h: 306drivers/base/power/runtime.c and include/linux/pm_runtime.h:
299 307
300 void pm_runtime_init(struct device *dev); 308 `void pm_runtime_init(struct device *dev);`
301 - initialize the device runtime PM fields in 'struct dev_pm_info' 309 - initialize the device runtime PM fields in 'struct dev_pm_info'
302 310
303 void pm_runtime_remove(struct device *dev); 311 `void pm_runtime_remove(struct device *dev);`
304 - make sure that the runtime PM of the device will be disabled after 312 - make sure that the runtime PM of the device will be disabled after
305 removing the device from device hierarchy 313 removing the device from device hierarchy
306 314
307 int pm_runtime_idle(struct device *dev); 315 `int pm_runtime_idle(struct device *dev);`
308 - execute the subsystem-level idle callback for the device; returns an 316 - execute the subsystem-level idle callback for the device; returns an
309 error code on failure, where -EINPROGRESS means that ->runtime_idle() is 317 error code on failure, where -EINPROGRESS means that ->runtime_idle() is
310 already being executed; if there is no callback or the callback returns 0 318 already being executed; if there is no callback or the callback returns 0
311 then run pm_runtime_autosuspend(dev) and return its result 319 then run pm_runtime_autosuspend(dev) and return its result
312 320
313 int pm_runtime_suspend(struct device *dev); 321 `int pm_runtime_suspend(struct device *dev);`
314 - execute the subsystem-level suspend callback for the device; returns 0 on 322 - execute the subsystem-level suspend callback for the device; returns 0 on
315 success, 1 if the device's runtime PM status was already 'suspended', or 323 success, 1 if the device's runtime PM status was already 'suspended', or
316 error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt 324 error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt
317 to suspend the device again in future and -EACCES means that 325 to suspend the device again in future and -EACCES means that
318 'power.disable_depth' is different from 0 326 'power.disable_depth' is different from 0
319 327
320 int pm_runtime_autosuspend(struct device *dev); 328 `int pm_runtime_autosuspend(struct device *dev);`
321 - same as pm_runtime_suspend() except that the autosuspend delay is taken 329 - same as pm_runtime_suspend() except that the autosuspend delay is taken
322 into account; if pm_runtime_autosuspend_expiration() says the delay has 330 `into account;` if pm_runtime_autosuspend_expiration() says the delay has
323 not yet expired then an autosuspend is scheduled for the appropriate time 331 not yet expired then an autosuspend is scheduled for the appropriate time
324 and 0 is returned 332 and 0 is returned
325 333
326 int pm_runtime_resume(struct device *dev); 334 `int pm_runtime_resume(struct device *dev);`
327 - execute the subsystem-level resume callback for the device; returns 0 on 335 - execute the subsystem-level resume callback for the device; returns 0 on
328 success, 1 if the device's runtime PM status was already 'active' or 336 success, 1 if the device's runtime PM status was already 'active' or
329 error code on failure, where -EAGAIN means it may be safe to attempt to 337 error code on failure, where -EAGAIN means it may be safe to attempt to
@@ -331,17 +339,17 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
331 checked additionally, and -EACCES means that 'power.disable_depth' is 339 checked additionally, and -EACCES means that 'power.disable_depth' is
332 different from 0 340 different from 0
333 341
334 int pm_request_idle(struct device *dev); 342 `int pm_request_idle(struct device *dev);`
335 - submit a request to execute the subsystem-level idle callback for the 343 - submit a request to execute the subsystem-level idle callback for the
336 device (the request is represented by a work item in pm_wq); returns 0 on 344 device (the request is represented by a work item in pm_wq); returns 0 on
337 success or error code if the request has not been queued up 345 success or error code if the request has not been queued up
338 346
339 int pm_request_autosuspend(struct device *dev); 347 `int pm_request_autosuspend(struct device *dev);`
340 - schedule the execution of the subsystem-level suspend callback for the 348 - schedule the execution of the subsystem-level suspend callback for the
341 device when the autosuspend delay has expired; if the delay has already 349 device when the autosuspend delay has expired; if the delay has already
342 expired then the work item is queued up immediately 350 expired then the work item is queued up immediately
343 351
344 int pm_schedule_suspend(struct device *dev, unsigned int delay); 352 `int pm_schedule_suspend(struct device *dev, unsigned int delay);`
345 - schedule the execution of the subsystem-level suspend callback for the 353 - schedule the execution of the subsystem-level suspend callback for the
346 device in future, where 'delay' is the time to wait before queuing up a 354 device in future, where 'delay' is the time to wait before queuing up a
347 suspend work item in pm_wq, in milliseconds (if 'delay' is zero, the work 355 suspend work item in pm_wq, in milliseconds (if 'delay' is zero, the work
@@ -351,58 +359,58 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
351 ->runtime_suspend() is already scheduled and not yet expired, the new 359 ->runtime_suspend() is already scheduled and not yet expired, the new
352 value of 'delay' will be used as the time to wait 360 value of 'delay' will be used as the time to wait
353 361
354 int pm_request_resume(struct device *dev); 362 `int pm_request_resume(struct device *dev);`
355 - submit a request to execute the subsystem-level resume callback for the 363 - submit a request to execute the subsystem-level resume callback for the
356 device (the request is represented by a work item in pm_wq); returns 0 on 364 device (the request is represented by a work item in pm_wq); returns 0 on
357 success, 1 if the device's runtime PM status was already 'active', or 365 success, 1 if the device's runtime PM status was already 'active', or
358 error code if the request hasn't been queued up 366 error code if the request hasn't been queued up
359 367
360 void pm_runtime_get_noresume(struct device *dev); 368 `void pm_runtime_get_noresume(struct device *dev);`
361 - increment the device's usage counter 369 - increment the device's usage counter
362 370
363 int pm_runtime_get(struct device *dev); 371 `int pm_runtime_get(struct device *dev);`
364 - increment the device's usage counter, run pm_request_resume(dev) and 372 - increment the device's usage counter, run pm_request_resume(dev) and
365 return its result 373 return its result
366 374
367 int pm_runtime_get_sync(struct device *dev); 375 `int pm_runtime_get_sync(struct device *dev);`
368 - increment the device's usage counter, run pm_runtime_resume(dev) and 376 - increment the device's usage counter, run pm_runtime_resume(dev) and
369 return its result 377 return its result
370 378
371 int pm_runtime_get_if_in_use(struct device *dev); 379 `int pm_runtime_get_if_in_use(struct device *dev);`
372 - return -EINVAL if 'power.disable_depth' is nonzero; otherwise, if the 380 - return -EINVAL if 'power.disable_depth' is nonzero; otherwise, if the
373 runtime PM status is RPM_ACTIVE and the runtime PM usage counter is 381 runtime PM status is RPM_ACTIVE and the runtime PM usage counter is
374 nonzero, increment the counter and return 1; otherwise return 0 without 382 nonzero, increment the counter and return 1; otherwise return 0 without
375 changing the counter 383 changing the counter
376 384
377 void pm_runtime_put_noidle(struct device *dev); 385 `void pm_runtime_put_noidle(struct device *dev);`
378 - decrement the device's usage counter 386 - decrement the device's usage counter
379 387
380 int pm_runtime_put(struct device *dev); 388 `int pm_runtime_put(struct device *dev);`
381 - decrement the device's usage counter; if the result is 0 then run 389 - decrement the device's usage counter; if the result is 0 then run
382 pm_request_idle(dev) and return its result 390 pm_request_idle(dev) and return its result
383 391
384 int pm_runtime_put_autosuspend(struct device *dev); 392 `int pm_runtime_put_autosuspend(struct device *dev);`
385 - decrement the device's usage counter; if the result is 0 then run 393 - decrement the device's usage counter; if the result is 0 then run
386 pm_request_autosuspend(dev) and return its result 394 pm_request_autosuspend(dev) and return its result
387 395
388 int pm_runtime_put_sync(struct device *dev); 396 `int pm_runtime_put_sync(struct device *dev);`
389 - decrement the device's usage counter; if the result is 0 then run 397 - decrement the device's usage counter; if the result is 0 then run
390 pm_runtime_idle(dev) and return its result 398 pm_runtime_idle(dev) and return its result
391 399
392 int pm_runtime_put_sync_suspend(struct device *dev); 400 `int pm_runtime_put_sync_suspend(struct device *dev);`
393 - decrement the device's usage counter; if the result is 0 then run 401 - decrement the device's usage counter; if the result is 0 then run
394 pm_runtime_suspend(dev) and return its result 402 pm_runtime_suspend(dev) and return its result
395 403
396 int pm_runtime_put_sync_autosuspend(struct device *dev); 404 `int pm_runtime_put_sync_autosuspend(struct device *dev);`
397 - decrement the device's usage counter; if the result is 0 then run 405 - decrement the device's usage counter; if the result is 0 then run
398 pm_runtime_autosuspend(dev) and return its result 406 pm_runtime_autosuspend(dev) and return its result
399 407
400 void pm_runtime_enable(struct device *dev); 408 `void pm_runtime_enable(struct device *dev);`
401 - decrement the device's 'power.disable_depth' field; if that field is equal 409 - decrement the device's 'power.disable_depth' field; if that field is equal
402 to zero, the runtime PM helper functions can execute subsystem-level 410 to zero, the runtime PM helper functions can execute subsystem-level
403 callbacks described in Section 2 for the device 411 callbacks described in Section 2 for the device
404 412
405 int pm_runtime_disable(struct device *dev); 413 `int pm_runtime_disable(struct device *dev);`
406 - increment the device's 'power.disable_depth' field (if the value of that 414 - increment the device's 'power.disable_depth' field (if the value of that
407 field was previously zero, this prevents subsystem-level runtime PM 415 field was previously zero, this prevents subsystem-level runtime PM
408 callbacks from being run for the device), make sure that all of the 416 callbacks from being run for the device), make sure that all of the
@@ -411,7 +419,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
411 necessary to execute the subsystem-level resume callback for the device 419 necessary to execute the subsystem-level resume callback for the device
412 to satisfy that request, otherwise 0 is returned 420 to satisfy that request, otherwise 0 is returned
413 421
414 int pm_runtime_barrier(struct device *dev); 422 `int pm_runtime_barrier(struct device *dev);`
415 - check if there's a resume request pending for the device and resume it 423 - check if there's a resume request pending for the device and resume it
416 (synchronously) in that case, cancel any other pending runtime PM requests 424 (synchronously) in that case, cancel any other pending runtime PM requests
417 regarding it and wait for all runtime PM operations on it in progress to 425 regarding it and wait for all runtime PM operations on it in progress to
@@ -419,10 +427,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
419 necessary to execute the subsystem-level resume callback for the device to 427 necessary to execute the subsystem-level resume callback for the device to
420 satisfy that request, otherwise 0 is returned 428 satisfy that request, otherwise 0 is returned
421 429
422 void pm_suspend_ignore_children(struct device *dev, bool enable); 430 `void pm_suspend_ignore_children(struct device *dev, bool enable);`
423 - set/unset the power.ignore_children flag of the device 431 - set/unset the power.ignore_children flag of the device
424 432
425 int pm_runtime_set_active(struct device *dev); 433 `int pm_runtime_set_active(struct device *dev);`
426 - clear the device's 'power.runtime_error' flag, set the device's runtime 434 - clear the device's 'power.runtime_error' flag, set the device's runtime
427 PM status to 'active' and update its parent's counter of 'active' 435 PM status to 'active' and update its parent's counter of 'active'
428 children as appropriate (it is only valid to use this function if 436 children as appropriate (it is only valid to use this function if
@@ -430,61 +438,61 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
430 zero); it will fail and return error code if the device has a parent 438 zero); it will fail and return error code if the device has a parent
431 which is not active and the 'power.ignore_children' flag of which is unset 439 which is not active and the 'power.ignore_children' flag of which is unset
432 440
433 void pm_runtime_set_suspended(struct device *dev); 441 `void pm_runtime_set_suspended(struct device *dev);`
434 - clear the device's 'power.runtime_error' flag, set the device's runtime 442 - clear the device's 'power.runtime_error' flag, set the device's runtime
435 PM status to 'suspended' and update its parent's counter of 'active' 443 PM status to 'suspended' and update its parent's counter of 'active'
436 children as appropriate (it is only valid to use this function if 444 children as appropriate (it is only valid to use this function if
437 'power.runtime_error' is set or 'power.disable_depth' is greater than 445 'power.runtime_error' is set or 'power.disable_depth' is greater than
438 zero) 446 zero)
439 447
440 bool pm_runtime_active(struct device *dev); 448 `bool pm_runtime_active(struct device *dev);`
441 - return true if the device's runtime PM status is 'active' or its 449 - return true if the device's runtime PM status is 'active' or its
442 'power.disable_depth' field is not equal to zero, or false otherwise 450 'power.disable_depth' field is not equal to zero, or false otherwise
443 451
444 bool pm_runtime_suspended(struct device *dev); 452 `bool pm_runtime_suspended(struct device *dev);`
445 - return true if the device's runtime PM status is 'suspended' and its 453 - return true if the device's runtime PM status is 'suspended' and its
446 'power.disable_depth' field is equal to zero, or false otherwise 454 'power.disable_depth' field is equal to zero, or false otherwise
447 455
448 bool pm_runtime_status_suspended(struct device *dev); 456 `bool pm_runtime_status_suspended(struct device *dev);`
449 - return true if the device's runtime PM status is 'suspended' 457 - return true if the device's runtime PM status is 'suspended'
450 458
451 void pm_runtime_allow(struct device *dev); 459 `void pm_runtime_allow(struct device *dev);`
452 - set the power.runtime_auto flag for the device and decrease its usage 460 - set the power.runtime_auto flag for the device and decrease its usage
453 counter (used by the /sys/devices/.../power/control interface to 461 counter (used by the /sys/devices/.../power/control interface to
454 effectively allow the device to be power managed at run time) 462 effectively allow the device to be power managed at run time)
455 463
456 void pm_runtime_forbid(struct device *dev); 464 `void pm_runtime_forbid(struct device *dev);`
457 - unset the power.runtime_auto flag for the device and increase its usage 465 - unset the power.runtime_auto flag for the device and increase its usage
458 counter (used by the /sys/devices/.../power/control interface to 466 counter (used by the /sys/devices/.../power/control interface to
459 effectively prevent the device from being power managed at run time) 467 effectively prevent the device from being power managed at run time)
460 468
461 void pm_runtime_no_callbacks(struct device *dev); 469 `void pm_runtime_no_callbacks(struct device *dev);`
462 - set the power.no_callbacks flag for the device and remove the runtime 470 - set the power.no_callbacks flag for the device and remove the runtime
463 PM attributes from /sys/devices/.../power (or prevent them from being 471 PM attributes from /sys/devices/.../power (or prevent them from being
464 added when the device is registered) 472 added when the device is registered)
465 473
466 void pm_runtime_irq_safe(struct device *dev); 474 `void pm_runtime_irq_safe(struct device *dev);`
467 - set the power.irq_safe flag for the device, causing the runtime-PM 475 - set the power.irq_safe flag for the device, causing the runtime-PM
468 callbacks to be invoked with interrupts off 476 callbacks to be invoked with interrupts off
469 477
470 bool pm_runtime_is_irq_safe(struct device *dev); 478 `bool pm_runtime_is_irq_safe(struct device *dev);`
471 - return true if power.irq_safe flag was set for the device, causing 479 - return true if power.irq_safe flag was set for the device, causing
472 the runtime-PM callbacks to be invoked with interrupts off 480 the runtime-PM callbacks to be invoked with interrupts off
473 481
474 void pm_runtime_mark_last_busy(struct device *dev); 482 `void pm_runtime_mark_last_busy(struct device *dev);`
475 - set the power.last_busy field to the current time 483 - set the power.last_busy field to the current time
476 484
477 void pm_runtime_use_autosuspend(struct device *dev); 485 `void pm_runtime_use_autosuspend(struct device *dev);`
478 - set the power.use_autosuspend flag, enabling autosuspend delays; call 486 - set the power.use_autosuspend flag, enabling autosuspend delays; call
479 pm_runtime_get_sync if the flag was previously cleared and 487 pm_runtime_get_sync if the flag was previously cleared and
480 power.autosuspend_delay is negative 488 power.autosuspend_delay is negative
481 489
482 void pm_runtime_dont_use_autosuspend(struct device *dev); 490 `void pm_runtime_dont_use_autosuspend(struct device *dev);`
483 - clear the power.use_autosuspend flag, disabling autosuspend delays; 491 - clear the power.use_autosuspend flag, disabling autosuspend delays;
484 decrement the device's usage counter if the flag was previously set and 492 decrement the device's usage counter if the flag was previously set and
485 power.autosuspend_delay is negative; call pm_runtime_idle 493 power.autosuspend_delay is negative; call pm_runtime_idle
486 494
487 void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); 495 `void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);`
488 - set the power.autosuspend_delay value to 'delay' (expressed in 496 - set the power.autosuspend_delay value to 'delay' (expressed in
489 milliseconds); if 'delay' is negative then runtime suspends are 497 milliseconds); if 'delay' is negative then runtime suspends are
490 prevented; if power.use_autosuspend is set, pm_runtime_get_sync may be 498 prevented; if power.use_autosuspend is set, pm_runtime_get_sync may be
@@ -493,7 +501,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
493 changed to or from a negative value; if power.use_autosuspend is clear, 501 changed to or from a negative value; if power.use_autosuspend is clear,
494 pm_runtime_idle is called 502 pm_runtime_idle is called
495 503
496 unsigned long pm_runtime_autosuspend_expiration(struct device *dev); 504 `unsigned long pm_runtime_autosuspend_expiration(struct device *dev);`
497 - calculate the time when the current autosuspend delay period will expire, 505 - calculate the time when the current autosuspend delay period will expire,
498 based on power.last_busy and power.autosuspend_delay; if the delay time 506 based on power.last_busy and power.autosuspend_delay; if the delay time
499 is 1000 ms or larger then the expiration time is rounded up to the 507 is 1000 ms or larger then the expiration time is rounded up to the
@@ -503,36 +511,37 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
503 511
504It is safe to execute the following helper functions from interrupt context: 512It is safe to execute the following helper functions from interrupt context:
505 513
506pm_request_idle() 514- pm_request_idle()
507pm_request_autosuspend() 515- pm_request_autosuspend()
508pm_schedule_suspend() 516- pm_schedule_suspend()
509pm_request_resume() 517- pm_request_resume()
510pm_runtime_get_noresume() 518- pm_runtime_get_noresume()
511pm_runtime_get() 519- pm_runtime_get()
512pm_runtime_put_noidle() 520- pm_runtime_put_noidle()
513pm_runtime_put() 521- pm_runtime_put()
514pm_runtime_put_autosuspend() 522- pm_runtime_put_autosuspend()
515pm_runtime_enable() 523- pm_runtime_enable()
516pm_suspend_ignore_children() 524- pm_suspend_ignore_children()
517pm_runtime_set_active() 525- pm_runtime_set_active()
518pm_runtime_set_suspended() 526- pm_runtime_set_suspended()
519pm_runtime_suspended() 527- pm_runtime_suspended()
520pm_runtime_mark_last_busy() 528- pm_runtime_mark_last_busy()
521pm_runtime_autosuspend_expiration() 529- pm_runtime_autosuspend_expiration()
522 530
523If pm_runtime_irq_safe() has been called for a device then the following helper 531If pm_runtime_irq_safe() has been called for a device then the following helper
524functions may also be used in interrupt context: 532functions may also be used in interrupt context:
525 533
526pm_runtime_idle() 534- pm_runtime_idle()
527pm_runtime_suspend() 535- pm_runtime_suspend()
528pm_runtime_autosuspend() 536- pm_runtime_autosuspend()
529pm_runtime_resume() 537- pm_runtime_resume()
530pm_runtime_get_sync() 538- pm_runtime_get_sync()
531pm_runtime_put_sync() 539- pm_runtime_put_sync()
532pm_runtime_put_sync_suspend() 540- pm_runtime_put_sync_suspend()
533pm_runtime_put_sync_autosuspend() 541- pm_runtime_put_sync_autosuspend()
534 542
5355. Runtime PM Initialization, Device Probing and Removal 5435. Runtime PM Initialization, Device Probing and Removal
544========================================================
536 545
537Initially, the runtime PM is disabled for all devices, which means that the 546Initially, the runtime PM is disabled for all devices, which means that the
538majority of the runtime PM helper functions described in Section 4 will return 547majority of the runtime PM helper functions described in Section 4 will return
@@ -608,6 +617,7 @@ manage the device at run time, the driver may confuse it by using
608pm_runtime_forbid() this way. 617pm_runtime_forbid() this way.
609 618
6106. Runtime PM and System Sleep 6196. Runtime PM and System Sleep
620==============================
611 621
612Runtime PM and system sleep (i.e., system suspend and hibernation, also known 622Runtime PM and system sleep (i.e., system suspend and hibernation, also known
613as suspend-to-RAM and suspend-to-disk) interact with each other in a couple of 623as suspend-to-RAM and suspend-to-disk) interact with each other in a couple of
@@ -647,9 +657,9 @@ brought back to full power during resume, then its runtime PM status will have
647to be updated to reflect the actual post-system sleep status. The way to do 657to be updated to reflect the actual post-system sleep status. The way to do
648this is: 658this is:
649 659
650 pm_runtime_disable(dev); 660 - pm_runtime_disable(dev);
651 pm_runtime_set_active(dev); 661 - pm_runtime_set_active(dev);
652 pm_runtime_enable(dev); 662 - pm_runtime_enable(dev);
653 663
654The PM core always increments the runtime usage counter before calling the 664The PM core always increments the runtime usage counter before calling the
655->suspend() callback and decrements it after calling the ->resume() callback. 665->suspend() callback and decrements it after calling the ->resume() callback.
@@ -705,66 +715,66 @@ Subsystems may wish to conserve code space by using the set of generic power
705management callbacks provided by the PM core, defined in 715management callbacks provided by the PM core, defined in
706driver/base/power/generic_ops.c: 716driver/base/power/generic_ops.c:
707 717
708 int pm_generic_runtime_suspend(struct device *dev); 718 `int pm_generic_runtime_suspend(struct device *dev);`
709 - invoke the ->runtime_suspend() callback provided by the driver of this 719 - invoke the ->runtime_suspend() callback provided by the driver of this
710 device and return its result, or return 0 if not defined 720 device and return its result, or return 0 if not defined
711 721
712 int pm_generic_runtime_resume(struct device *dev); 722 `int pm_generic_runtime_resume(struct device *dev);`
713 - invoke the ->runtime_resume() callback provided by the driver of this 723 - invoke the ->runtime_resume() callback provided by the driver of this
714 device and return its result, or return 0 if not defined 724 device and return its result, or return 0 if not defined
715 725
716 int pm_generic_suspend(struct device *dev); 726 `int pm_generic_suspend(struct device *dev);`
717 - if the device has not been suspended at run time, invoke the ->suspend() 727 - if the device has not been suspended at run time, invoke the ->suspend()
718 callback provided by its driver and return its result, or return 0 if not 728 callback provided by its driver and return its result, or return 0 if not
719 defined 729 defined
720 730
721 int pm_generic_suspend_noirq(struct device *dev); 731 `int pm_generic_suspend_noirq(struct device *dev);`
722 - if pm_runtime_suspended(dev) returns "false", invoke the ->suspend_noirq() 732 - if pm_runtime_suspended(dev) returns "false", invoke the ->suspend_noirq()
723 callback provided by the device's driver and return its result, or return 733 callback provided by the device's driver and return its result, or return
724 0 if not defined 734 0 if not defined
725 735
726 int pm_generic_resume(struct device *dev); 736 `int pm_generic_resume(struct device *dev);`
727 - invoke the ->resume() callback provided by the driver of this device and, 737 - invoke the ->resume() callback provided by the driver of this device and,
728 if successful, change the device's runtime PM status to 'active' 738 if successful, change the device's runtime PM status to 'active'
729 739
730 int pm_generic_resume_noirq(struct device *dev); 740 `int pm_generic_resume_noirq(struct device *dev);`
731 - invoke the ->resume_noirq() callback provided by the driver of this device 741 - invoke the ->resume_noirq() callback provided by the driver of this device
732 742
733 int pm_generic_freeze(struct device *dev); 743 `int pm_generic_freeze(struct device *dev);`
734 - if the device has not been suspended at run time, invoke the ->freeze() 744 - if the device has not been suspended at run time, invoke the ->freeze()
735 callback provided by its driver and return its result, or return 0 if not 745 callback provided by its driver and return its result, or return 0 if not
736 defined 746 defined
737 747
738 int pm_generic_freeze_noirq(struct device *dev); 748 `int pm_generic_freeze_noirq(struct device *dev);`
739 - if pm_runtime_suspended(dev) returns "false", invoke the ->freeze_noirq() 749 - if pm_runtime_suspended(dev) returns "false", invoke the ->freeze_noirq()
740 callback provided by the device's driver and return its result, or return 750 callback provided by the device's driver and return its result, or return
741 0 if not defined 751 0 if not defined
742 752
743 int pm_generic_thaw(struct device *dev); 753 `int pm_generic_thaw(struct device *dev);`
744 - if the device has not been suspended at run time, invoke the ->thaw() 754 - if the device has not been suspended at run time, invoke the ->thaw()
745 callback provided by its driver and return its result, or return 0 if not 755 callback provided by its driver and return its result, or return 0 if not
746 defined 756 defined
747 757
748 int pm_generic_thaw_noirq(struct device *dev); 758 `int pm_generic_thaw_noirq(struct device *dev);`
749 - if pm_runtime_suspended(dev) returns "false", invoke the ->thaw_noirq() 759 - if pm_runtime_suspended(dev) returns "false", invoke the ->thaw_noirq()
750 callback provided by the device's driver and return its result, or return 760 callback provided by the device's driver and return its result, or return
751 0 if not defined 761 0 if not defined
752 762
753 int pm_generic_poweroff(struct device *dev); 763 `int pm_generic_poweroff(struct device *dev);`
754 - if the device has not been suspended at run time, invoke the ->poweroff() 764 - if the device has not been suspended at run time, invoke the ->poweroff()
755 callback provided by its driver and return its result, or return 0 if not 765 callback provided by its driver and return its result, or return 0 if not
756 defined 766 defined
757 767
758 int pm_generic_poweroff_noirq(struct device *dev); 768 `int pm_generic_poweroff_noirq(struct device *dev);`
759 - if pm_runtime_suspended(dev) returns "false", run the ->poweroff_noirq() 769 - if pm_runtime_suspended(dev) returns "false", run the ->poweroff_noirq()
760 callback provided by the device's driver and return its result, or return 770 callback provided by the device's driver and return its result, or return
761 0 if not defined 771 0 if not defined
762 772
763 int pm_generic_restore(struct device *dev); 773 `int pm_generic_restore(struct device *dev);`
764 - invoke the ->restore() callback provided by the driver of this device and, 774 - invoke the ->restore() callback provided by the driver of this device and,
765 if successful, change the device's runtime PM status to 'active' 775 if successful, change the device's runtime PM status to 'active'
766 776
767 int pm_generic_restore_noirq(struct device *dev); 777 `int pm_generic_restore_noirq(struct device *dev);`
768 - invoke the ->restore_noirq() callback provided by the device's driver 778 - invoke the ->restore_noirq() callback provided by the device's driver
769 779
770These functions are the defaults used by the PM core, if a subsystem doesn't 780These functions are the defaults used by the PM core, if a subsystem doesn't
@@ -781,6 +791,7 @@ UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its
781last argument to NULL). 791last argument to NULL).
782 792
7838. "No-Callback" Devices 7938. "No-Callback" Devices
794========================
784 795
785Some "devices" are only logical sub-devices of their parent and cannot be 796Some "devices" are only logical sub-devices of their parent and cannot be
786power-managed on their own. (The prototype example is a USB interface. Entire 797power-managed on their own. (The prototype example is a USB interface. Entire
@@ -807,6 +818,7 @@ parent must take responsibility for telling the device's driver when the
807parent's power state changes. 818parent's power state changes.
808 819
8099. Autosuspend, or automatically-delayed suspends 8209. Autosuspend, or automatically-delayed suspends
821=================================================
810 822
811Changing a device's power state isn't free; it requires both time and energy. 823Changing a device's power state isn't free; it requires both time and energy.
812A device should be put in a low-power state only when there's some reason to 824A device should be put in a low-power state only when there's some reason to
@@ -832,8 +844,8 @@ registration the length should be controlled by user space, using the
832 844
833In order to use autosuspend, subsystems or drivers must call 845In order to use autosuspend, subsystems or drivers must call
834pm_runtime_use_autosuspend() (preferably before registering the device), and 846pm_runtime_use_autosuspend() (preferably before registering the device), and
835thereafter they should use the various *_autosuspend() helper functions instead 847thereafter they should use the various `*_autosuspend()` helper functions
836of the non-autosuspend counterparts: 848instead of the non-autosuspend counterparts::
837 849
838 Instead of: pm_runtime_suspend use: pm_runtime_autosuspend; 850 Instead of: pm_runtime_suspend use: pm_runtime_autosuspend;
839 Instead of: pm_schedule_suspend use: pm_request_autosuspend; 851 Instead of: pm_schedule_suspend use: pm_request_autosuspend;
@@ -858,7 +870,7 @@ The implementation is well suited for asynchronous use in interrupt contexts.
858However such use inevitably involves races, because the PM core can't 870However such use inevitably involves races, because the PM core can't
859synchronize ->runtime_suspend() callbacks with the arrival of I/O requests. 871synchronize ->runtime_suspend() callbacks with the arrival of I/O requests.
860This synchronization must be handled by the driver, using its private lock. 872This synchronization must be handled by the driver, using its private lock.
861Here is a schematic pseudo-code example: 873Here is a schematic pseudo-code example::
862 874
863 foo_read_or_write(struct foo_priv *foo, void *data) 875 foo_read_or_write(struct foo_priv *foo, void *data)
864 { 876 {
diff --git a/Documentation/power/s2ram.txt b/Documentation/power/s2ram.rst
index 4685aee197fd..d739aa7c742c 100644
--- a/Documentation/power/s2ram.txt
+++ b/Documentation/power/s2ram.rst
@@ -1,7 +1,9 @@
1 How to get s2ram working 1========================
2 ~~~~~~~~~~~~~~~~~~~~~~~~ 2How to get s2ram working
3 2006 Linus Torvalds 3========================
4 2006 Pavel Machek 4
52006 Linus Torvalds
62006 Pavel Machek
5 7
61) Check suspend.sf.net, program s2ram there has long whitelist of 81) Check suspend.sf.net, program s2ram there has long whitelist of
7 "known ok" machines, along with tricks to use on each one. 9 "known ok" machines, along with tricks to use on each one.
@@ -12,8 +14,8 @@
12 14
133) You can use Linus' TRACE_RESUME infrastructure, described below. 153) You can use Linus' TRACE_RESUME infrastructure, described below.
14 16
15 Using TRACE_RESUME 17Using TRACE_RESUME
16 ~~~~~~~~~~~~~~~~~~ 18~~~~~~~~~~~~~~~~~~
17 19
18I've been working at making the machines I have able to STR, and almost 20I've been working at making the machines I have able to STR, and almost
19always it's a driver that is buggy. Thank God for the suspend/resume 21always it's a driver that is buggy. Thank God for the suspend/resume
@@ -27,7 +29,7 @@ machine that doesn't boot) is:
27 29
28 - enable PM_DEBUG, and PM_TRACE 30 - enable PM_DEBUG, and PM_TRACE
29 31
30 - use a script like this: 32 - use a script like this::
31 33
32 #!/bin/sh 34 #!/bin/sh
33 sync 35 sync
@@ -38,7 +40,7 @@ machine that doesn't boot) is:
38 40
39 - if it doesn't come back up (which is usually the problem), reboot by 41 - if it doesn't come back up (which is usually the problem), reboot by
40 holding the power button down, and look at the dmesg output for things 42 holding the power button down, and look at the dmesg output for things
41 like 43 like::
42 44
43 Magic number: 4:156:725 45 Magic number: 4:156:725
44 hash matches drivers/base/power/resume.c:28 46 hash matches drivers/base/power/resume.c:28
@@ -52,7 +54,7 @@ machine that doesn't boot) is:
52 If no device matches the hash (or any matches appear to be false positives), 54 If no device matches the hash (or any matches appear to be false positives),
53 the culprit may be a device from a loadable kernel module that is not loaded 55 the culprit may be a device from a loadable kernel module that is not loaded
54 until after the hash is checked. You can check the hash against the current 56 until after the hash is checked. You can check the hash against the current
55 devices again after more modules are loaded using sysfs: 57 devices again after more modules are loaded using sysfs::
56 58
57 cat /sys/power/pm_trace_dev_match 59 cat /sys/power/pm_trace_dev_match
58 60
diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.rst
index a8751b8df10e..7ac8e1f549f4 100644
--- a/Documentation/power/suspend-and-cpuhotplug.txt
+++ b/Documentation/power/suspend-and-cpuhotplug.rst
@@ -1,10 +1,15 @@
1====================================================================
1Interaction of Suspend code (S3) with the CPU hotplug infrastructure 2Interaction of Suspend code (S3) with the CPU hotplug infrastructure
3====================================================================
2 4
3 (C) 2011 - 2014 Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> 5(C) 2011 - 2014 Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
4 6
5 7
6I. How does the regular CPU hotplug code differ from how the Suspend-to-RAM 8I. Differences between CPU hotplug and Suspend-to-RAM
7 infrastructure uses it internally? And where do they share common code? 9======================================================
10
11How does the regular CPU hotplug code differ from how the Suspend-to-RAM
12infrastructure uses it internally? And where do they share common code?
8 13
9Well, a picture is worth a thousand words... So ASCII art follows :-) 14Well, a picture is worth a thousand words... So ASCII art follows :-)
10 15
@@ -16,13 +21,13 @@ of describing where they take different paths and where they share code.
16What happens when regular CPU hotplug and Suspend-to-RAM race with each other 21What happens when regular CPU hotplug and Suspend-to-RAM race with each other
17is not depicted here.] 22is not depicted here.]
18 23
19On a high level, the suspend-resume cycle goes like this: 24On a high level, the suspend-resume cycle goes like this::
20 25
21|Freeze| -> |Disable nonboot| -> |Do suspend| -> |Enable nonboot| -> |Thaw | 26 |Freeze| -> |Disable nonboot| -> |Do suspend| -> |Enable nonboot| -> |Thaw |
22|tasks | | cpus | | | | cpus | |tasks| 27 |tasks | | cpus | | | | cpus | |tasks|
23 28
24 29
25More details follow: 30More details follow::
26 31
27 Suspend call path 32 Suspend call path
28 ----------------- 33 -----------------
@@ -87,7 +92,9 @@ More details follow:
87 92
88Resuming back is likewise, with the counterparts being (in the order of 93Resuming back is likewise, with the counterparts being (in the order of
89execution during resume): 94execution during resume):
90* enable_nonboot_cpus() which involves: 95
96* enable_nonboot_cpus() which involves::
97
91 | Acquire cpu_add_remove_lock 98 | Acquire cpu_add_remove_lock
92 | Decrease cpu_hotplug_disabled, thereby enabling regular cpu hotplug 99 | Decrease cpu_hotplug_disabled, thereby enabling regular cpu hotplug
93 | Call _cpu_up() [for all those cpus in the frozen_cpus mask, in a loop] 100 | Call _cpu_up() [for all those cpus in the frozen_cpus mask, in a loop]
@@ -103,6 +110,8 @@ It is to be noted here that the system_transition_mutex lock is acquired at the
103beginning, when we are just starting out to suspend, and then released only 110beginning, when we are just starting out to suspend, and then released only
104after the entire cycle is complete (i.e., suspend + resume). 111after the entire cycle is complete (i.e., suspend + resume).
105 112
113::
114
106 115
107 116
108 Regular CPU hotplug call path 117 Regular CPU hotplug call path
@@ -152,16 +161,16 @@ with the 'tasks_frozen' argument set to 1.
152 161
153 162
154Important files and functions/entry points: 163Important files and functions/entry points:
155------------------------------------------ 164-------------------------------------------
156 165
157kernel/power/process.c : freeze_processes(), thaw_processes() 166- kernel/power/process.c : freeze_processes(), thaw_processes()
158kernel/power/suspend.c : suspend_prepare(), suspend_enter(), suspend_finish() 167- kernel/power/suspend.c : suspend_prepare(), suspend_enter(), suspend_finish()
159kernel/cpu.c: cpu_[up|down](), _cpu_[up|down](), [disable|enable]_nonboot_cpus() 168- kernel/cpu.c: cpu_[up|down](), _cpu_[up|down](), [disable|enable]_nonboot_cpus()
160 169
161 170
162 171
163II. What are the issues involved in CPU hotplug? 172II. What are the issues involved in CPU hotplug?
164 ------------------------------------------- 173------------------------------------------------
165 174
166There are some interesting situations involving CPU hotplug and microcode 175There are some interesting situations involving CPU hotplug and microcode
167update on the CPUs, as discussed below: 176update on the CPUs, as discussed below:
@@ -243,8 +252,11 @@ d. Handling microcode update during suspend/hibernate:
243 cycles). 252 cycles).
244 253
245 254
246III. Are there any known problems when regular CPU hotplug and suspend race 255III. Known problems
247 with each other? 256===================
257
258Are there any known problems when regular CPU hotplug and suspend race
259with each other?
248 260
249Yes, they are listed below: 261Yes, they are listed below:
250 262
diff --git a/Documentation/power/suspend-and-interrupts.txt b/Documentation/power/suspend-and-interrupts.rst
index 8afb29a8604a..4cda6617709a 100644
--- a/Documentation/power/suspend-and-interrupts.txt
+++ b/Documentation/power/suspend-and-interrupts.rst
@@ -1,4 +1,6 @@
1====================================
1System Suspend and Device Interrupts 2System Suspend and Device Interrupts
3====================================
2 4
3Copyright (C) 2014 Intel Corp. 5Copyright (C) 2014 Intel Corp.
4Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 6Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
diff --git a/Documentation/power/swsusp-and-swap-files.txt b/Documentation/power/swsusp-and-swap-files.rst
index f281886de490..a33a2919dbe4 100644
--- a/Documentation/power/swsusp-and-swap-files.txt
+++ b/Documentation/power/swsusp-and-swap-files.rst
@@ -1,4 +1,7 @@
1===============================================
1Using swap files with software suspend (swsusp) 2Using swap files with software suspend (swsusp)
3===============================================
4
2 (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 5 (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
3 6
4The Linux kernel handles swap files almost in the same way as it handles swap 7The Linux kernel handles swap files almost in the same way as it handles swap
@@ -21,20 +24,20 @@ units.
21 24
22In order to use a swap file with swsusp, you need to: 25In order to use a swap file with swsusp, you need to:
23 26
241) Create the swap file and make it active, eg. 271) Create the swap file and make it active, eg.::
25 28
26# dd if=/dev/zero of=<swap_file_path> bs=1024 count=<swap_file_size_in_k> 29 # dd if=/dev/zero of=<swap_file_path> bs=1024 count=<swap_file_size_in_k>
27# mkswap <swap_file_path> 30 # mkswap <swap_file_path>
28# swapon <swap_file_path> 31 # swapon <swap_file_path>
29 32
302) Use an application that will bmap the swap file with the help of the 332) Use an application that will bmap the swap file with the help of the
31FIBMAP ioctl and determine the location of the file's swap header, as the 34FIBMAP ioctl and determine the location of the file's swap header, as the
32offset, in <PAGE_SIZE> units, from the beginning of the partition which 35offset, in <PAGE_SIZE> units, from the beginning of the partition which
33holds the swap file. 36holds the swap file.
34 37
353) Add the following parameters to the kernel command line: 383) Add the following parameters to the kernel command line::
36 39
37resume=<swap_file_partition> resume_offset=<swap_file_offset> 40 resume=<swap_file_partition> resume_offset=<swap_file_offset>
38 41
39where <swap_file_partition> is the partition on which the swap file is located 42where <swap_file_partition> is the partition on which the swap file is located
40and <swap_file_offset> is the offset of the swap header determined by the 43and <swap_file_offset> is the offset of the swap header determined by the
@@ -46,7 +49,7 @@ OR
46 49
47Use a userland suspend application that will set the partition and offset 50Use a userland suspend application that will set the partition and offset
48with the help of the SNAPSHOT_SET_SWAP_AREA ioctl described in 51with the help of the SNAPSHOT_SET_SWAP_AREA ioctl described in
49Documentation/power/userland-swsusp.txt (this is the only method to suspend 52Documentation/power/userland-swsusp.rst (this is the only method to suspend
50to a swap file allowing the resume to be initiated from an initrd or initramfs 53to a swap file allowing the resume to be initiated from an initrd or initramfs
51image). 54image).
52 55
diff --git a/Documentation/power/swsusp-dmcrypt.txt b/Documentation/power/swsusp-dmcrypt.rst
index b802fbfd95ef..426df59172cd 100644
--- a/Documentation/power/swsusp-dmcrypt.txt
+++ b/Documentation/power/swsusp-dmcrypt.rst
@@ -1,13 +1,15 @@
1=======================================
2How to use dm-crypt and swsusp together
3=======================================
4
1Author: Andreas Steinmetz <ast@domdv.de> 5Author: Andreas Steinmetz <ast@domdv.de>
2 6
3 7
4How to use dm-crypt and swsusp together:
5========================================
6 8
7Some prerequisites: 9Some prerequisites:
8You know how dm-crypt works. If not, visit the following web page: 10You know how dm-crypt works. If not, visit the following web page:
9http://www.saout.de/misc/dm-crypt/ 11http://www.saout.de/misc/dm-crypt/
10You have read Documentation/power/swsusp.txt and understand it. 12You have read Documentation/power/swsusp.rst and understand it.
11You did read Documentation/admin-guide/initrd.rst and know how an initrd works. 13You did read Documentation/admin-guide/initrd.rst and know how an initrd works.
12You know how to create or how to modify an initrd. 14You know how to create or how to modify an initrd.
13 15
@@ -29,23 +31,23 @@ a way that the swap device you suspend to/resume from has
29always the same major/minor within the initrd as well as 31always the same major/minor within the initrd as well as
30within your running system. The easiest way to achieve this is 32within your running system. The easiest way to achieve this is
31to always set up this swap device first with dmsetup, so that 33to always set up this swap device first with dmsetup, so that
32it will always look like the following: 34it will always look like the following::
33 35
34brw------- 1 root root 254, 0 Jul 28 13:37 /dev/mapper/swap0 36 brw------- 1 root root 254, 0 Jul 28 13:37 /dev/mapper/swap0
35 37
36Now set up your kernel to use /dev/mapper/swap0 as the default 38Now set up your kernel to use /dev/mapper/swap0 as the default
37resume partition, so your kernel .config contains: 39resume partition, so your kernel .config contains::
38 40
39CONFIG_PM_STD_PARTITION="/dev/mapper/swap0" 41 CONFIG_PM_STD_PARTITION="/dev/mapper/swap0"
40 42
41Prepare your boot loader to use the initrd you will create or 43Prepare your boot loader to use the initrd you will create or
42modify. For lilo the simplest setup looks like the following 44modify. For lilo the simplest setup looks like the following
43lines: 45lines::
44 46
45image=/boot/vmlinuz 47 image=/boot/vmlinuz
46initrd=/boot/initrd.gz 48 initrd=/boot/initrd.gz
47label=linux 49 label=linux
48append="root=/dev/ram0 init=/linuxrc rw" 50 append="root=/dev/ram0 init=/linuxrc rw"
49 51
50Finally you need to create or modify your initrd. Lets assume 52Finally you need to create or modify your initrd. Lets assume
51you create an initrd that reads the required dm-crypt setup 53you create an initrd that reads the required dm-crypt setup
@@ -53,66 +55,66 @@ from a pcmcia flash disk card. The card is formatted with an ext2
53fs which resides on /dev/hde1 when the card is inserted. The 55fs which resides on /dev/hde1 when the card is inserted. The
54card contains at least the encrypted swap setup in a file 56card contains at least the encrypted swap setup in a file
55named "swapkey". /etc/fstab of your initrd contains something 57named "swapkey". /etc/fstab of your initrd contains something
56like the following: 58like the following::
57 59
58/dev/hda1 /mnt ext3 ro 0 0 60 /dev/hda1 /mnt ext3 ro 0 0
59none /proc proc defaults,noatime,nodiratime 0 0 61 none /proc proc defaults,noatime,nodiratime 0 0
60none /sys sysfs defaults,noatime,nodiratime 0 0 62 none /sys sysfs defaults,noatime,nodiratime 0 0
61 63
62/dev/hda1 contains an unencrypted mini system that sets up all 64/dev/hda1 contains an unencrypted mini system that sets up all
63of your crypto devices, again by reading the setup from the 65of your crypto devices, again by reading the setup from the
64pcmcia flash disk. What follows now is a /linuxrc for your 66pcmcia flash disk. What follows now is a /linuxrc for your
65initrd that allows you to resume from encrypted swap and that 67initrd that allows you to resume from encrypted swap and that
66continues boot with your mini system on /dev/hda1 if resume 68continues boot with your mini system on /dev/hda1 if resume
67does not happen: 69does not happen::
68 70
69#!/bin/sh 71 #!/bin/sh
70PATH=/sbin:/bin:/usr/sbin:/usr/bin 72 PATH=/sbin:/bin:/usr/sbin:/usr/bin
71mount /proc 73 mount /proc
72mount /sys 74 mount /sys
73mapped=0 75 mapped=0
74noresume=`grep -c noresume /proc/cmdline` 76 noresume=`grep -c noresume /proc/cmdline`
75if [ "$*" != "" ] 77 if [ "$*" != "" ]
76then
77 noresume=1
78fi
79dmesg -n 1
80/sbin/cardmgr -q
81for i in 1 2 3 4 5 6 7 8 9 0
82do
83 if [ -f /proc/ide/hde/media ]
84 then 78 then
85 usleep 500000 79 noresume=1
86 mount -t ext2 -o ro /dev/hde1 /mnt 80 fi
87 if [ -f /mnt/swapkey ] 81 dmesg -n 1
82 /sbin/cardmgr -q
83 for i in 1 2 3 4 5 6 7 8 9 0
84 do
85 if [ -f /proc/ide/hde/media ]
88 then 86 then
89 dmsetup create swap0 /mnt/swapkey > /dev/null 2>&1 && mapped=1 87 usleep 500000
88 mount -t ext2 -o ro /dev/hde1 /mnt
89 if [ -f /mnt/swapkey ]
90 then
91 dmsetup create swap0 /mnt/swapkey > /dev/null 2>&1 && mapped=1
92 fi
93 umount /mnt
94 break
90 fi 95 fi
91 umount /mnt 96 usleep 500000
92 break 97 done
93 fi 98 killproc /sbin/cardmgr
94 usleep 500000 99 dmesg -n 6
95done 100 if [ $mapped = 1 ]
96killproc /sbin/cardmgr
97dmesg -n 6
98if [ $mapped = 1 ]
99then
100 if [ $noresume != 0 ]
101 then 101 then
102 mkswap /dev/mapper/swap0 > /dev/null 2>&1 102 if [ $noresume != 0 ]
103 then
104 mkswap /dev/mapper/swap0 > /dev/null 2>&1
105 fi
106 echo 254:0 > /sys/power/resume
107 dmsetup remove swap0
103 fi 108 fi
104 echo 254:0 > /sys/power/resume 109 umount /sys
105 dmsetup remove swap0 110 mount /mnt
106fi 111 umount /proc
107umount /sys 112 cd /mnt
108mount /mnt 113 pivot_root . mnt
109umount /proc 114 mount /proc
110cd /mnt 115 umount -l /mnt
111pivot_root . mnt 116 umount /proc
112mount /proc 117 exec chroot . /sbin/init $* < dev/console > dev/console 2>&1
113umount -l /mnt
114umount /proc
115exec chroot . /sbin/init $* < dev/console > dev/console 2>&1
116 118
117Please don't mind the weird loop above, busybox's msh doesn't know 119Please don't mind the weird loop above, busybox's msh doesn't know
118the let statement. Now, what is happening in the script? 120the let statement. Now, what is happening in the script?
diff --git a/Documentation/power/swsusp.rst b/Documentation/power/swsusp.rst
new file mode 100644
index 000000000000..d000312f6965
--- /dev/null
+++ b/Documentation/power/swsusp.rst
@@ -0,0 +1,501 @@
1============
2Swap suspend
3============
4
5Some warnings, first.
6
7.. warning::
8
9 **BIG FAT WARNING**
10
11 If you touch anything on disk between suspend and resume...
12 ...kiss your data goodbye.
13
14 If you do resume from initrd after your filesystems are mounted...
15 ...bye bye root partition.
16
17 [this is actually same case as above]
18
19 If you have unsupported ( ) devices using DMA, you may have some
20 problems. If your disk driver does not support suspend... (IDE does),
21 it may cause some problems, too. If you change kernel command line
22 between suspend and resume, it may do something wrong. If you change
23 your hardware while system is suspended... well, it was not good idea;
24 but it will probably only crash.
25
26 ( ) suspend/resume support is needed to make it safe.
27
28 If you have any filesystems on USB devices mounted before software suspend,
29 they won't be accessible after resume and you may lose data, as though
30 you have unplugged the USB devices with mounted filesystems on them;
31 see the FAQ below for details. (This is not true for more traditional
32 power states like "standby", which normally don't turn USB off.)
33
34Swap partition:
35 You need to append resume=/dev/your_swap_partition to kernel command
36 line or specify it using /sys/power/resume.
37
38Swap file:
39 If using a swapfile you can also specify a resume offset using
40 resume_offset=<number> on the kernel command line or specify it
41 in /sys/power/resume_offset.
42
43After preparing then you suspend by::
44
45 echo shutdown > /sys/power/disk; echo disk > /sys/power/state
46
47- If you feel ACPI works pretty well on your system, you might try::
48
49 echo platform > /sys/power/disk; echo disk > /sys/power/state
50
51- If you would like to write hibernation image to swap and then suspend
52 to RAM (provided your platform supports it), you can try::
53
54 echo suspend > /sys/power/disk; echo disk > /sys/power/state
55
56- If you have SATA disks, you'll need recent kernels with SATA suspend
57 support. For suspend and resume to work, make sure your disk drivers
58 are built into kernel -- not modules. [There's way to make
59 suspend/resume with modular disk drivers, see FAQ, but you probably
60 should not do that.]
61
62If you want to limit the suspend image size to N bytes, do::
63
64 echo N > /sys/power/image_size
65
66before suspend (it is limited to around 2/5 of available RAM by default).
67
68- The resume process checks for the presence of the resume device,
69 if found, it then checks the contents for the hibernation image signature.
70 If both are found, it resumes the hibernation image.
71
72- The resume process may be triggered in two ways:
73
74 1) During lateinit: If resume=/dev/your_swap_partition is specified on
75 the kernel command line, lateinit runs the resume process. If the
76 resume device has not been probed yet, the resume process fails and
77 bootup continues.
78 2) Manually from an initrd or initramfs: May be run from
79 the init script by using the /sys/power/resume file. It is vital
80 that this be done prior to remounting any filesystems (even as
81 read-only) otherwise data may be corrupted.
82
83Article about goals and implementation of Software Suspend for Linux
84====================================================================
85
86Author: Gábor Kuti
87Last revised: 2003-10-20 by Pavel Machek
88
89Idea and goals to achieve
90-------------------------
91
92Nowadays it is common in several laptops that they have a suspend button. It
93saves the state of the machine to a filesystem or to a partition and switches
94to standby mode. Later resuming the machine the saved state is loaded back to
95ram and the machine can continue its work. It has two real benefits. First we
96save ourselves the time machine goes down and later boots up, energy costs
97are real high when running from batteries. The other gain is that we don't have
98to interrupt our programs so processes that are calculating something for a long
99time shouldn't need to be written interruptible.
100
101swsusp saves the state of the machine into active swaps and then reboots or
102powerdowns. You must explicitly specify the swap partition to resume from with
103`resume=` kernel option. If signature is found it loads and restores saved
104state. If the option `noresume` is specified as a boot parameter, it skips
105the resuming. If the option `hibernate=nocompress` is specified as a boot
106parameter, it saves hibernation image without compression.
107
108In the meantime while the system is suspended you should not add/remove any
109of the hardware, write to the filesystems, etc.
110
111Sleep states summary
112====================
113
114There are three different interfaces you can use, /proc/acpi should
115work like this:
116
117In a really perfect world::
118
119 echo 1 > /proc/acpi/sleep # for standby
120 echo 2 > /proc/acpi/sleep # for suspend to ram
121 echo 3 > /proc/acpi/sleep # for suspend to ram, but with more power conservative
122 echo 4 > /proc/acpi/sleep # for suspend to disk
123 echo 5 > /proc/acpi/sleep # for shutdown unfriendly the system
124
125and perhaps::
126
127 echo 4b > /proc/acpi/sleep # for suspend to disk via s4bios
128
129Frequently Asked Questions
130==========================
131
132Q:
133 well, suspending a server is IMHO a really stupid thing,
134 but... (Diego Zuccato):
135
136A:
137 You bought new UPS for your server. How do you install it without
138 bringing machine down? Suspend to disk, rearrange power cables,
139 resume.
140
141 You have your server on UPS. Power died, and UPS is indicating 30
142 seconds to failure. What do you do? Suspend to disk.
143
144
145Q:
146 Maybe I'm missing something, but why don't the regular I/O paths work?
147
148A:
149 We do use the regular I/O paths. However we cannot restore the data
150 to its original location as we load it. That would create an
151 inconsistent kernel state which would certainly result in an oops.
152 Instead, we load the image into unused memory and then atomically copy
153 it back to it original location. This implies, of course, a maximum
154 image size of half the amount of memory.
155
156 There are two solutions to this:
157
158 * require half of memory to be free during suspend. That way you can
159 read "new" data onto free spots, then cli and copy
160
161 * assume we had special "polling" ide driver that only uses memory
162 between 0-640KB. That way, I'd have to make sure that 0-640KB is free
163 during suspending, but otherwise it would work...
164
165 suspend2 shares this fundamental limitation, but does not include user
166 data and disk caches into "used memory" by saving them in
167 advance. That means that the limitation goes away in practice.
168
169Q:
170 Does linux support ACPI S4?
171
172A:
173 Yes. That's what echo platform > /sys/power/disk does.
174
175Q:
176 What is 'suspend2'?
177
178A:
179 suspend2 is 'Software Suspend 2', a forked implementation of
180 suspend-to-disk which is available as separate patches for 2.4 and 2.6
181 kernels from swsusp.sourceforge.net. It includes support for SMP, 4GB
182 highmem and preemption. It also has a extensible architecture that
183 allows for arbitrary transformations on the image (compression,
184 encryption) and arbitrary backends for writing the image (eg to swap
185 or an NFS share[Work In Progress]). Questions regarding suspend2
186 should be sent to the mailing list available through the suspend2
187 website, and not to the Linux Kernel Mailing List. We are working
188 toward merging suspend2 into the mainline kernel.
189
190Q:
191 What is the freezing of tasks and why are we using it?
192
193A:
194 The freezing of tasks is a mechanism by which user space processes and some
195 kernel threads are controlled during hibernation or system-wide suspend (on some
196 architectures). See freezing-of-tasks.txt for details.
197
198Q:
199 What is the difference between "platform" and "shutdown"?
200
201A:
202 shutdown:
203 save state in linux, then tell bios to powerdown
204
205 platform:
206 save state in linux, then tell bios to powerdown and blink
207 "suspended led"
208
209 "platform" is actually right thing to do where supported, but
210 "shutdown" is most reliable (except on ACPI systems).
211
212Q:
213 I do not understand why you have such strong objections to idea of
214 selective suspend.
215
216A:
217 Do selective suspend during runtime power management, that's okay. But
218 it's useless for suspend-to-disk. (And I do not see how you could use
219 it for suspend-to-ram, I hope you do not want that).
220
221 Lets see, so you suggest to
222
223 * SUSPEND all but swap device and parents
224 * Snapshot
225 * Write image to disk
226 * SUSPEND swap device and parents
227 * Powerdown
228
229 Oh no, that does not work, if swap device or its parents uses DMA,
230 you've corrupted data. You'd have to do
231
232 * SUSPEND all but swap device and parents
233 * FREEZE swap device and parents
234 * Snapshot
235 * UNFREEZE swap device and parents
236 * Write
237 * SUSPEND swap device and parents
238
239 Which means that you still need that FREEZE state, and you get more
240 complicated code. (And I have not yet introduce details like system
241 devices).
242
243Q:
244 There don't seem to be any generally useful behavioral
245 distinctions between SUSPEND and FREEZE.
246
247A:
248 Doing SUSPEND when you are asked to do FREEZE is always correct,
249 but it may be unnecessarily slow. If you want your driver to stay simple,
250 slowness may not matter to you. It can always be fixed later.
251
252 For devices like disk it does matter, you do not want to spindown for
253 FREEZE.
254
255Q:
256 After resuming, system is paging heavily, leading to very bad interactivity.
257
258A:
259 Try running::
260
261 cat /proc/[0-9]*/maps | grep / | sed 's:.* /:/:' | sort -u | while read file
262 do
263 test -f "$file" && cat "$file" > /dev/null
264 done
265
266 after resume. swapoff -a; swapon -a may also be useful.
267
268Q:
269 What happens to devices during swsusp? They seem to be resumed
270 during system suspend?
271
272A:
273 That's correct. We need to resume them if we want to write image to
274 disk. Whole sequence goes like
275
276 **Suspend part**
277
278 running system, user asks for suspend-to-disk
279
280 user processes are stopped
281
282 suspend(PMSG_FREEZE): devices are frozen so that they don't interfere
283 with state snapshot
284
285 state snapshot: copy of whole used memory is taken with interrupts disabled
286
287 resume(): devices are woken up so that we can write image to swap
288
289 write image to swap
290
291 suspend(PMSG_SUSPEND): suspend devices so that we can power off
292
293 turn the power off
294
295 **Resume part**
296
297 (is actually pretty similar)
298
299 running system, user asks for suspend-to-disk
300
301 user processes are stopped (in common case there are none,
302 but with resume-from-initrd, no one knows)
303
304 read image from disk
305
306 suspend(PMSG_FREEZE): devices are frozen so that they don't interfere
307 with image restoration
308
309 image restoration: rewrite memory with image
310
311 resume(): devices are woken up so that system can continue
312
313 thaw all user processes
314
315Q:
316 What is this 'Encrypt suspend image' for?
317
318A:
319 First of all: it is not a replacement for dm-crypt encrypted swap.
320 It cannot protect your computer while it is suspended. Instead it does
321 protect from leaking sensitive data after resume from suspend.
322
323 Think of the following: you suspend while an application is running
324 that keeps sensitive data in memory. The application itself prevents
325 the data from being swapped out. Suspend, however, must write these
326 data to swap to be able to resume later on. Without suspend encryption
327 your sensitive data are then stored in plaintext on disk. This means
328 that after resume your sensitive data are accessible to all
329 applications having direct access to the swap device which was used
330 for suspend. If you don't need swap after resume these data can remain
331 on disk virtually forever. Thus it can happen that your system gets
332 broken in weeks later and sensitive data which you thought were
333 encrypted and protected are retrieved and stolen from the swap device.
334 To prevent this situation you should use 'Encrypt suspend image'.
335
336 During suspend a temporary key is created and this key is used to
337 encrypt the data written to disk. When, during resume, the data was
338 read back into memory the temporary key is destroyed which simply
339 means that all data written to disk during suspend are then
340 inaccessible so they can't be stolen later on. The only thing that
341 you must then take care of is that you call 'mkswap' for the swap
342 partition used for suspend as early as possible during regular
343 boot. This asserts that any temporary key from an oopsed suspend or
344 from a failed or aborted resume is erased from the swap device.
345
346 As a rule of thumb use encrypted swap to protect your data while your
347 system is shut down or suspended. Additionally use the encrypted
348 suspend image to prevent sensitive data from being stolen after
349 resume.
350
351Q:
352 Can I suspend to a swap file?
353
354A:
355 Generally, yes, you can. However, it requires you to use the "resume=" and
356 "resume_offset=" kernel command line parameters, so the resume from a swap file
357 cannot be initiated from an initrd or initramfs image. See
358 swsusp-and-swap-files.txt for details.
359
360Q:
361 Is there a maximum system RAM size that is supported by swsusp?
362
363A:
364 It should work okay with highmem.
365
366Q:
367 Does swsusp (to disk) use only one swap partition or can it use
368 multiple swap partitions (aggregate them into one logical space)?
369
370A:
371 Only one swap partition, sorry.
372
373Q:
374 If my application(s) causes lots of memory & swap space to be used
375 (over half of the total system RAM), is it correct that it is likely
376 to be useless to try to suspend to disk while that app is running?
377
378A:
379 No, it should work okay, as long as your app does not mlock()
380 it. Just prepare big enough swap partition.
381
382Q:
383 What information is useful for debugging suspend-to-disk problems?
384
385A:
386 Well, last messages on the screen are always useful. If something
387 is broken, it is usually some kernel driver, therefore trying with as
388 little as possible modules loaded helps a lot. I also prefer people to
389 suspend from console, preferably without X running. Booting with
390 init=/bin/bash, then swapon and starting suspend sequence manually
391 usually does the trick. Then it is good idea to try with latest
392 vanilla kernel.
393
394Q:
395 How can distributions ship a swsusp-supporting kernel with modular
396 disk drivers (especially SATA)?
397
398A:
399 Well, it can be done, load the drivers, then do echo into
400 /sys/power/resume file from initrd. Be sure not to mount
401 anything, not even read-only mount, or you are going to lose your
402 data.
403
404Q:
405 How do I make suspend more verbose?
406
407A:
408 If you want to see any non-error kernel messages on the virtual
409 terminal the kernel switches to during suspend, you have to set the
410 kernel console loglevel to at least 4 (KERN_WARNING), for example by
411 doing::
412
413 # save the old loglevel
414 read LOGLEVEL DUMMY < /proc/sys/kernel/printk
415 # set the loglevel so we see the progress bar.
416 # if the level is higher than needed, we leave it alone.
417 if [ $LOGLEVEL -lt 5 ]; then
418 echo 5 > /proc/sys/kernel/printk
419 fi
420
421 IMG_SZ=0
422 read IMG_SZ < /sys/power/image_size
423 echo -n disk > /sys/power/state
424 RET=$?
425 #
426 # the logic here is:
427 # if image_size > 0 (without kernel support, IMG_SZ will be zero),
428 # then try again with image_size set to zero.
429 if [ $RET -ne 0 -a $IMG_SZ -ne 0 ]; then # try again with minimal image size
430 echo 0 > /sys/power/image_size
431 echo -n disk > /sys/power/state
432 RET=$?
433 fi
434
435 # restore previous loglevel
436 echo $LOGLEVEL > /proc/sys/kernel/printk
437 exit $RET
438
439Q:
440 Is this true that if I have a mounted filesystem on a USB device and
441 I suspend to disk, I can lose data unless the filesystem has been mounted
442 with "sync"?
443
444A:
445 That's right ... if you disconnect that device, you may lose data.
446 In fact, even with "-o sync" you can lose data if your programs have
447 information in buffers they haven't written out to a disk you disconnect,
448 or if you disconnect before the device finished saving data you wrote.
449
450 Software suspend normally powers down USB controllers, which is equivalent
451 to disconnecting all USB devices attached to your system.
452
453 Your system might well support low-power modes for its USB controllers
454 while the system is asleep, maintaining the connection, using true sleep
455 modes like "suspend-to-RAM" or "standby". (Don't write "disk" to the
456 /sys/power/state file; write "standby" or "mem".) We've not seen any
457 hardware that can use these modes through software suspend, although in
458 theory some systems might support "platform" modes that won't break the
459 USB connections.
460
461 Remember that it's always a bad idea to unplug a disk drive containing a
462 mounted filesystem. That's true even when your system is asleep! The
463 safest thing is to unmount all filesystems on removable media (such USB,
464 Firewire, CompactFlash, MMC, external SATA, or even IDE hotplug bays)
465 before suspending; then remount them after resuming.
466
467 There is a work-around for this problem. For more information, see
468 Documentation/driver-api/usb/persist.rst.
469
470Q:
471 Can I suspend-to-disk using a swap partition under LVM?
472
473A:
474 Yes and No. You can suspend successfully, but the kernel will not be able
475 to resume on its own. You need an initramfs that can recognize the resume
476 situation, activate the logical volume containing the swap volume (but not
477 touch any filesystems!), and eventually call::
478
479 echo -n "$major:$minor" > /sys/power/resume
480
481 where $major and $minor are the respective major and minor device numbers of
482 the swap volume.
483
484 uswsusp works with LVM, too. See http://suspend.sourceforge.net/
485
486Q:
487 I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were
488 compiled with the similar configuration files. Anyway I found that
489 suspend to disk (and resume) is much slower on 2.6.16 compared to
490 2.6.15. Any idea for why that might happen or how can I speed it up?
491
492A:
493 This is because the size of the suspend image is now greater than
494 for 2.6.15 (by saving more data we can get more responsive system
495 after resume).
496
497 There's the /sys/power/image_size knob that controls the size of the
498 image. If you set it to 0 (eg. by echo 0 > /sys/power/image_size as
499 root), the 2.6.15 behavior should be restored. If it is still too
500 slow, take a look at suspend.sf.net -- userland suspend is faster and
501 supports LZF compression to speed it up further.
diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt
deleted file mode 100644
index 236d1fb13640..000000000000
--- a/Documentation/power/swsusp.txt
+++ /dev/null
@@ -1,446 +0,0 @@
1Some warnings, first.
2
3 * BIG FAT WARNING *********************************************************
4 *
5 * If you touch anything on disk between suspend and resume...
6 * ...kiss your data goodbye.
7 *
8 * If you do resume from initrd after your filesystems are mounted...
9 * ...bye bye root partition.
10 * [this is actually same case as above]
11 *
12 * If you have unsupported (*) devices using DMA, you may have some
13 * problems. If your disk driver does not support suspend... (IDE does),
14 * it may cause some problems, too. If you change kernel command line
15 * between suspend and resume, it may do something wrong. If you change
16 * your hardware while system is suspended... well, it was not good idea;
17 * but it will probably only crash.
18 *
19 * (*) suspend/resume support is needed to make it safe.
20 *
21 * If you have any filesystems on USB devices mounted before software suspend,
22 * they won't be accessible after resume and you may lose data, as though
23 * you have unplugged the USB devices with mounted filesystems on them;
24 * see the FAQ below for details. (This is not true for more traditional
25 * power states like "standby", which normally don't turn USB off.)
26
27Swap partition:
28You need to append resume=/dev/your_swap_partition to kernel command
29line or specify it using /sys/power/resume.
30
31Swap file:
32If using a swapfile you can also specify a resume offset using
33resume_offset=<number> on the kernel command line or specify it
34in /sys/power/resume_offset.
35
36After preparing then you suspend by
37
38echo shutdown > /sys/power/disk; echo disk > /sys/power/state
39
40. If you feel ACPI works pretty well on your system, you might try
41
42echo platform > /sys/power/disk; echo disk > /sys/power/state
43
44. If you would like to write hibernation image to swap and then suspend
45to RAM (provided your platform supports it), you can try
46
47echo suspend > /sys/power/disk; echo disk > /sys/power/state
48
49. If you have SATA disks, you'll need recent kernels with SATA suspend
50support. For suspend and resume to work, make sure your disk drivers
51are built into kernel -- not modules. [There's way to make
52suspend/resume with modular disk drivers, see FAQ, but you probably
53should not do that.]
54
55If you want to limit the suspend image size to N bytes, do
56
57echo N > /sys/power/image_size
58
59before suspend (it is limited to around 2/5 of available RAM by default).
60
61. The resume process checks for the presence of the resume device,
62if found, it then checks the contents for the hibernation image signature.
63If both are found, it resumes the hibernation image.
64
65. The resume process may be triggered in two ways:
66 1) During lateinit: If resume=/dev/your_swap_partition is specified on
67 the kernel command line, lateinit runs the resume process. If the
68 resume device has not been probed yet, the resume process fails and
69 bootup continues.
70 2) Manually from an initrd or initramfs: May be run from
71 the init script by using the /sys/power/resume file. It is vital
72 that this be done prior to remounting any filesystems (even as
73 read-only) otherwise data may be corrupted.
74
75Article about goals and implementation of Software Suspend for Linux
76~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
77Author: Gábor Kuti
78Last revised: 2003-10-20 by Pavel Machek
79
80Idea and goals to achieve
81
82Nowadays it is common in several laptops that they have a suspend button. It
83saves the state of the machine to a filesystem or to a partition and switches
84to standby mode. Later resuming the machine the saved state is loaded back to
85ram and the machine can continue its work. It has two real benefits. First we
86save ourselves the time machine goes down and later boots up, energy costs
87are real high when running from batteries. The other gain is that we don't have to
88interrupt our programs so processes that are calculating something for a long
89time shouldn't need to be written interruptible.
90
91swsusp saves the state of the machine into active swaps and then reboots or
92powerdowns. You must explicitly specify the swap partition to resume from with
93``resume='' kernel option. If signature is found it loads and restores saved
94state. If the option ``noresume'' is specified as a boot parameter, it skips
95the resuming. If the option ``hibernate=nocompress'' is specified as a boot
96parameter, it saves hibernation image without compression.
97
98In the meantime while the system is suspended you should not add/remove any
99of the hardware, write to the filesystems, etc.
100
101Sleep states summary
102====================
103
104There are three different interfaces you can use, /proc/acpi should
105work like this:
106
107In a really perfect world:
108echo 1 > /proc/acpi/sleep # for standby
109echo 2 > /proc/acpi/sleep # for suspend to ram
110echo 3 > /proc/acpi/sleep # for suspend to ram, but with more power conservative
111echo 4 > /proc/acpi/sleep # for suspend to disk
112echo 5 > /proc/acpi/sleep # for shutdown unfriendly the system
113
114and perhaps
115echo 4b > /proc/acpi/sleep # for suspend to disk via s4bios
116
117Frequently Asked Questions
118==========================
119
120Q: well, suspending a server is IMHO a really stupid thing,
121but... (Diego Zuccato):
122
123A: You bought new UPS for your server. How do you install it without
124bringing machine down? Suspend to disk, rearrange power cables,
125resume.
126
127You have your server on UPS. Power died, and UPS is indicating 30
128seconds to failure. What do you do? Suspend to disk.
129
130
131Q: Maybe I'm missing something, but why don't the regular I/O paths work?
132
133A: We do use the regular I/O paths. However we cannot restore the data
134to its original location as we load it. That would create an
135inconsistent kernel state which would certainly result in an oops.
136Instead, we load the image into unused memory and then atomically copy
137it back to it original location. This implies, of course, a maximum
138image size of half the amount of memory.
139
140There are two solutions to this:
141
142* require half of memory to be free during suspend. That way you can
143read "new" data onto free spots, then cli and copy
144
145* assume we had special "polling" ide driver that only uses memory
146between 0-640KB. That way, I'd have to make sure that 0-640KB is free
147during suspending, but otherwise it would work...
148
149suspend2 shares this fundamental limitation, but does not include user
150data and disk caches into "used memory" by saving them in
151advance. That means that the limitation goes away in practice.
152
153Q: Does linux support ACPI S4?
154
155A: Yes. That's what echo platform > /sys/power/disk does.
156
157Q: What is 'suspend2'?
158
159A: suspend2 is 'Software Suspend 2', a forked implementation of
160suspend-to-disk which is available as separate patches for 2.4 and 2.6
161kernels from swsusp.sourceforge.net. It includes support for SMP, 4GB
162highmem and preemption. It also has a extensible architecture that
163allows for arbitrary transformations on the image (compression,
164encryption) and arbitrary backends for writing the image (eg to swap
165or an NFS share[Work In Progress]). Questions regarding suspend2
166should be sent to the mailing list available through the suspend2
167website, and not to the Linux Kernel Mailing List. We are working
168toward merging suspend2 into the mainline kernel.
169
170Q: What is the freezing of tasks and why are we using it?
171
172A: The freezing of tasks is a mechanism by which user space processes and some
173kernel threads are controlled during hibernation or system-wide suspend (on some
174architectures). See freezing-of-tasks.txt for details.
175
176Q: What is the difference between "platform" and "shutdown"?
177
178A:
179
180shutdown: save state in linux, then tell bios to powerdown
181
182platform: save state in linux, then tell bios to powerdown and blink
183 "suspended led"
184
185"platform" is actually right thing to do where supported, but
186"shutdown" is most reliable (except on ACPI systems).
187
188Q: I do not understand why you have such strong objections to idea of
189selective suspend.
190
191A: Do selective suspend during runtime power management, that's okay. But
192it's useless for suspend-to-disk. (And I do not see how you could use
193it for suspend-to-ram, I hope you do not want that).
194
195Lets see, so you suggest to
196
197* SUSPEND all but swap device and parents
198* Snapshot
199* Write image to disk
200* SUSPEND swap device and parents
201* Powerdown
202
203Oh no, that does not work, if swap device or its parents uses DMA,
204you've corrupted data. You'd have to do
205
206* SUSPEND all but swap device and parents
207* FREEZE swap device and parents
208* Snapshot
209* UNFREEZE swap device and parents
210* Write
211* SUSPEND swap device and parents
212
213Which means that you still need that FREEZE state, and you get more
214complicated code. (And I have not yet introduce details like system
215devices).
216
217Q: There don't seem to be any generally useful behavioral
218distinctions between SUSPEND and FREEZE.
219
220A: Doing SUSPEND when you are asked to do FREEZE is always correct,
221but it may be unnecessarily slow. If you want your driver to stay simple,
222slowness may not matter to you. It can always be fixed later.
223
224For devices like disk it does matter, you do not want to spindown for
225FREEZE.
226
227Q: After resuming, system is paging heavily, leading to very bad interactivity.
228
229A: Try running
230
231cat /proc/[0-9]*/maps | grep / | sed 's:.* /:/:' | sort -u | while read file
232do
233 test -f "$file" && cat "$file" > /dev/null
234done
235
236after resume. swapoff -a; swapon -a may also be useful.
237
238Q: What happens to devices during swsusp? They seem to be resumed
239during system suspend?
240
241A: That's correct. We need to resume them if we want to write image to
242disk. Whole sequence goes like
243
244 Suspend part
245 ~~~~~~~~~~~~
246 running system, user asks for suspend-to-disk
247
248 user processes are stopped
249
250 suspend(PMSG_FREEZE): devices are frozen so that they don't interfere
251 with state snapshot
252
253 state snapshot: copy of whole used memory is taken with interrupts disabled
254
255 resume(): devices are woken up so that we can write image to swap
256
257 write image to swap
258
259 suspend(PMSG_SUSPEND): suspend devices so that we can power off
260
261 turn the power off
262
263 Resume part
264 ~~~~~~~~~~~
265 (is actually pretty similar)
266
267 running system, user asks for suspend-to-disk
268
269 user processes are stopped (in common case there are none, but with resume-from-initrd, no one knows)
270
271 read image from disk
272
273 suspend(PMSG_FREEZE): devices are frozen so that they don't interfere
274 with image restoration
275
276 image restoration: rewrite memory with image
277
278 resume(): devices are woken up so that system can continue
279
280 thaw all user processes
281
282Q: What is this 'Encrypt suspend image' for?
283
284A: First of all: it is not a replacement for dm-crypt encrypted swap.
285It cannot protect your computer while it is suspended. Instead it does
286protect from leaking sensitive data after resume from suspend.
287
288Think of the following: you suspend while an application is running
289that keeps sensitive data in memory. The application itself prevents
290the data from being swapped out. Suspend, however, must write these
291data to swap to be able to resume later on. Without suspend encryption
292your sensitive data are then stored in plaintext on disk. This means
293that after resume your sensitive data are accessible to all
294applications having direct access to the swap device which was used
295for suspend. If you don't need swap after resume these data can remain
296on disk virtually forever. Thus it can happen that your system gets
297broken in weeks later and sensitive data which you thought were
298encrypted and protected are retrieved and stolen from the swap device.
299To prevent this situation you should use 'Encrypt suspend image'.
300
301During suspend a temporary key is created and this key is used to
302encrypt the data written to disk. When, during resume, the data was
303read back into memory the temporary key is destroyed which simply
304means that all data written to disk during suspend are then
305inaccessible so they can't be stolen later on. The only thing that
306you must then take care of is that you call 'mkswap' for the swap
307partition used for suspend as early as possible during regular
308boot. This asserts that any temporary key from an oopsed suspend or
309from a failed or aborted resume is erased from the swap device.
310
311As a rule of thumb use encrypted swap to protect your data while your
312system is shut down or suspended. Additionally use the encrypted
313suspend image to prevent sensitive data from being stolen after
314resume.
315
316Q: Can I suspend to a swap file?
317
318A: Generally, yes, you can. However, it requires you to use the "resume=" and
319"resume_offset=" kernel command line parameters, so the resume from a swap file
320cannot be initiated from an initrd or initramfs image. See
321swsusp-and-swap-files.txt for details.
322
323Q: Is there a maximum system RAM size that is supported by swsusp?
324
325A: It should work okay with highmem.
326
327Q: Does swsusp (to disk) use only one swap partition or can it use
328multiple swap partitions (aggregate them into one logical space)?
329
330A: Only one swap partition, sorry.
331
332Q: If my application(s) causes lots of memory & swap space to be used
333(over half of the total system RAM), is it correct that it is likely
334to be useless to try to suspend to disk while that app is running?
335
336A: No, it should work okay, as long as your app does not mlock()
337it. Just prepare big enough swap partition.
338
339Q: What information is useful for debugging suspend-to-disk problems?
340
341A: Well, last messages on the screen are always useful. If something
342is broken, it is usually some kernel driver, therefore trying with as
343little as possible modules loaded helps a lot. I also prefer people to
344suspend from console, preferably without X running. Booting with
345init=/bin/bash, then swapon and starting suspend sequence manually
346usually does the trick. Then it is good idea to try with latest
347vanilla kernel.
348
349Q: How can distributions ship a swsusp-supporting kernel with modular
350disk drivers (especially SATA)?
351
352A: Well, it can be done, load the drivers, then do echo into
353/sys/power/resume file from initrd. Be sure not to mount
354anything, not even read-only mount, or you are going to lose your
355data.
356
357Q: How do I make suspend more verbose?
358
359A: If you want to see any non-error kernel messages on the virtual
360terminal the kernel switches to during suspend, you have to set the
361kernel console loglevel to at least 4 (KERN_WARNING), for example by
362doing
363
364 # save the old loglevel
365 read LOGLEVEL DUMMY < /proc/sys/kernel/printk
366 # set the loglevel so we see the progress bar.
367 # if the level is higher than needed, we leave it alone.
368 if [ $LOGLEVEL -lt 5 ]; then
369 echo 5 > /proc/sys/kernel/printk
370 fi
371
372 IMG_SZ=0
373 read IMG_SZ < /sys/power/image_size
374 echo -n disk > /sys/power/state
375 RET=$?
376 #
377 # the logic here is:
378 # if image_size > 0 (without kernel support, IMG_SZ will be zero),
379 # then try again with image_size set to zero.
380 if [ $RET -ne 0 -a $IMG_SZ -ne 0 ]; then # try again with minimal image size
381 echo 0 > /sys/power/image_size
382 echo -n disk > /sys/power/state
383 RET=$?
384 fi
385
386 # restore previous loglevel
387 echo $LOGLEVEL > /proc/sys/kernel/printk
388 exit $RET
389
390Q: Is this true that if I have a mounted filesystem on a USB device and
391I suspend to disk, I can lose data unless the filesystem has been mounted
392with "sync"?
393
394A: That's right ... if you disconnect that device, you may lose data.
395In fact, even with "-o sync" you can lose data if your programs have
396information in buffers they haven't written out to a disk you disconnect,
397or if you disconnect before the device finished saving data you wrote.
398
399Software suspend normally powers down USB controllers, which is equivalent
400to disconnecting all USB devices attached to your system.
401
402Your system might well support low-power modes for its USB controllers
403while the system is asleep, maintaining the connection, using true sleep
404modes like "suspend-to-RAM" or "standby". (Don't write "disk" to the
405/sys/power/state file; write "standby" or "mem".) We've not seen any
406hardware that can use these modes through software suspend, although in
407theory some systems might support "platform" modes that won't break the
408USB connections.
409
410Remember that it's always a bad idea to unplug a disk drive containing a
411mounted filesystem. That's true even when your system is asleep! The
412safest thing is to unmount all filesystems on removable media (such USB,
413Firewire, CompactFlash, MMC, external SATA, or even IDE hotplug bays)
414before suspending; then remount them after resuming.
415
416There is a work-around for this problem. For more information, see
417Documentation/driver-api/usb/persist.rst.
418
419Q: Can I suspend-to-disk using a swap partition under LVM?
420
421A: Yes and No. You can suspend successfully, but the kernel will not be able
422to resume on its own. You need an initramfs that can recognize the resume
423situation, activate the logical volume containing the swap volume (but not
424touch any filesystems!), and eventually call
425
426echo -n "$major:$minor" > /sys/power/resume
427
428where $major and $minor are the respective major and minor device numbers of
429the swap volume.
430
431uswsusp works with LVM, too. See http://suspend.sourceforge.net/
432
433Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were
434compiled with the similar configuration files. Anyway I found that
435suspend to disk (and resume) is much slower on 2.6.16 compared to
4362.6.15. Any idea for why that might happen or how can I speed it up?
437
438A: This is because the size of the suspend image is now greater than
439for 2.6.15 (by saving more data we can get more responsive system
440after resume).
441
442There's the /sys/power/image_size knob that controls the size of the
443image. If you set it to 0 (eg. by echo 0 > /sys/power/image_size as
444root), the 2.6.15 behavior should be restored. If it is still too
445slow, take a look at suspend.sf.net -- userland suspend is faster and
446supports LZF compression to speed it up further.
diff --git a/Documentation/power/tricks.txt b/Documentation/power/tricks.rst
index a1b8f7249f4c..ca787f142c3f 100644
--- a/Documentation/power/tricks.txt
+++ b/Documentation/power/tricks.rst
@@ -1,5 +1,7 @@
1 swsusp/S3 tricks 1================
2 ~~~~~~~~~~~~~~~~ 2swsusp/S3 tricks
3================
4
3Pavel Machek <pavel@ucw.cz> 5Pavel Machek <pavel@ucw.cz>
4 6
5If you want to trick swsusp/S3 into working, you might want to try: 7If you want to trick swsusp/S3 into working, you might want to try:
diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.rst
index bbfcd1bbedc5..a0fa51bb1a4d 100644
--- a/Documentation/power/userland-swsusp.txt
+++ b/Documentation/power/userland-swsusp.rst
@@ -1,4 +1,7 @@
1=====================================================
1Documentation for userland software suspend interface 2Documentation for userland software suspend interface
3=====================================================
4
2 (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 5 (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
3 6
4First, the warnings at the beginning of swsusp.txt still apply. 7First, the warnings at the beginning of swsusp.txt still apply.
@@ -30,13 +33,16 @@ called.
30 33
31The ioctl() commands recognized by the device are: 34The ioctl() commands recognized by the device are:
32 35
33SNAPSHOT_FREEZE - freeze user space processes (the current process is 36SNAPSHOT_FREEZE
37 freeze user space processes (the current process is
34 not frozen); this is required for SNAPSHOT_CREATE_IMAGE 38 not frozen); this is required for SNAPSHOT_CREATE_IMAGE
35 and SNAPSHOT_ATOMIC_RESTORE to succeed 39 and SNAPSHOT_ATOMIC_RESTORE to succeed
36 40
37SNAPSHOT_UNFREEZE - thaw user space processes frozen by SNAPSHOT_FREEZE 41SNAPSHOT_UNFREEZE
42 thaw user space processes frozen by SNAPSHOT_FREEZE
38 43
39SNAPSHOT_CREATE_IMAGE - create a snapshot of the system memory; the 44SNAPSHOT_CREATE_IMAGE
45 create a snapshot of the system memory; the
40 last argument of ioctl() should be a pointer to an int variable, 46 last argument of ioctl() should be a pointer to an int variable,
41 the value of which will indicate whether the call returned after 47 the value of which will indicate whether the call returned after
42 creating the snapshot (1) or after restoring the system memory state 48 creating the snapshot (1) or after restoring the system memory state
@@ -45,48 +51,59 @@ SNAPSHOT_CREATE_IMAGE - create a snapshot of the system memory; the
45 has been created the read() operation can be used to transfer 51 has been created the read() operation can be used to transfer
46 it out of the kernel 52 it out of the kernel
47 53
48SNAPSHOT_ATOMIC_RESTORE - restore the system memory state from the 54SNAPSHOT_ATOMIC_RESTORE
55 restore the system memory state from the
49 uploaded snapshot image; before calling it you should transfer 56 uploaded snapshot image; before calling it you should transfer
50 the system memory snapshot back to the kernel using the write() 57 the system memory snapshot back to the kernel using the write()
51 operation; this call will not succeed if the snapshot 58 operation; this call will not succeed if the snapshot
52 image is not available to the kernel 59 image is not available to the kernel
53 60
54SNAPSHOT_FREE - free memory allocated for the snapshot image 61SNAPSHOT_FREE
62 free memory allocated for the snapshot image
55 63
56SNAPSHOT_PREF_IMAGE_SIZE - set the preferred maximum size of the image 64SNAPSHOT_PREF_IMAGE_SIZE
65 set the preferred maximum size of the image
57 (the kernel will do its best to ensure the image size will not exceed 66 (the kernel will do its best to ensure the image size will not exceed
58 this number, but if it turns out to be impossible, the kernel will 67 this number, but if it turns out to be impossible, the kernel will
59 create the smallest image possible) 68 create the smallest image possible)
60 69
61SNAPSHOT_GET_IMAGE_SIZE - return the actual size of the hibernation image 70SNAPSHOT_GET_IMAGE_SIZE
71 return the actual size of the hibernation image
62 72
63SNAPSHOT_AVAIL_SWAP_SIZE - return the amount of available swap in bytes (the 73SNAPSHOT_AVAIL_SWAP_SIZE
74 return the amount of available swap in bytes (the
64 last argument should be a pointer to an unsigned int variable that will 75 last argument should be a pointer to an unsigned int variable that will
65 contain the result if the call is successful). 76 contain the result if the call is successful).
66 77
67SNAPSHOT_ALLOC_SWAP_PAGE - allocate a swap page from the resume partition 78SNAPSHOT_ALLOC_SWAP_PAGE
79 allocate a swap page from the resume partition
68 (the last argument should be a pointer to a loff_t variable that 80 (the last argument should be a pointer to a loff_t variable that
69 will contain the swap page offset if the call is successful) 81 will contain the swap page offset if the call is successful)
70 82
71SNAPSHOT_FREE_SWAP_PAGES - free all swap pages allocated by 83SNAPSHOT_FREE_SWAP_PAGES
84 free all swap pages allocated by
72 SNAPSHOT_ALLOC_SWAP_PAGE 85 SNAPSHOT_ALLOC_SWAP_PAGE
73 86
74SNAPSHOT_SET_SWAP_AREA - set the resume partition and the offset (in <PAGE_SIZE> 87SNAPSHOT_SET_SWAP_AREA
88 set the resume partition and the offset (in <PAGE_SIZE>
75 units) from the beginning of the partition at which the swap header is 89 units) from the beginning of the partition at which the swap header is
76 located (the last ioctl() argument should point to a struct 90 located (the last ioctl() argument should point to a struct
77 resume_swap_area, as defined in kernel/power/suspend_ioctls.h, 91 resume_swap_area, as defined in kernel/power/suspend_ioctls.h,
78 containing the resume device specification and the offset); for swap 92 containing the resume device specification and the offset); for swap
79 partitions the offset is always 0, but it is different from zero for 93 partitions the offset is always 0, but it is different from zero for
80 swap files (see Documentation/power/swsusp-and-swap-files.txt for 94 swap files (see Documentation/power/swsusp-and-swap-files.rst for
81 details). 95 details).
82 96
83SNAPSHOT_PLATFORM_SUPPORT - enable/disable the hibernation platform support, 97SNAPSHOT_PLATFORM_SUPPORT
98 enable/disable the hibernation platform support,
84 depending on the argument value (enable, if the argument is nonzero) 99 depending on the argument value (enable, if the argument is nonzero)
85 100
86SNAPSHOT_POWER_OFF - make the kernel transition the system to the hibernation 101SNAPSHOT_POWER_OFF
102 make the kernel transition the system to the hibernation
87 state (eg. ACPI S4) using the platform (eg. ACPI) driver 103 state (eg. ACPI S4) using the platform (eg. ACPI) driver
88 104
89SNAPSHOT_S2RAM - suspend to RAM; using this call causes the kernel to 105SNAPSHOT_S2RAM
106 suspend to RAM; using this call causes the kernel to
90 immediately enter the suspend-to-RAM state, so this call must always 107 immediately enter the suspend-to-RAM state, so this call must always
91 be preceded by the SNAPSHOT_FREEZE call and it is also necessary 108 be preceded by the SNAPSHOT_FREEZE call and it is also necessary
92 to use the SNAPSHOT_UNFREEZE call after the system wakes up. This call 109 to use the SNAPSHOT_UNFREEZE call after the system wakes up. This call
@@ -98,10 +115,11 @@ SNAPSHOT_S2RAM - suspend to RAM; using this call causes the kernel to
98 115
99The device's read() operation can be used to transfer the snapshot image from 116The device's read() operation can be used to transfer the snapshot image from
100the kernel. It has the following limitations: 117the kernel. It has the following limitations:
118
101- you cannot read() more than one virtual memory page at a time 119- you cannot read() more than one virtual memory page at a time
102- read()s across page boundaries are impossible (ie. if you read() 1/2 of 120- read()s across page boundaries are impossible (ie. if you read() 1/2 of
103 a page in the previous call, you will only be able to read() 121 a page in the previous call, you will only be able to read()
104 _at_ _most_ 1/2 of the page in the next call) 122 **at most** 1/2 of the page in the next call)
105 123
106The device's write() operation is used for uploading the system memory snapshot 124The device's write() operation is used for uploading the system memory snapshot
107into the kernel. It has the same limitations as the read() operation. 125into the kernel. It has the same limitations as the read() operation.
@@ -143,8 +161,10 @@ preferably using mlockall(), before calling SNAPSHOT_FREEZE.
143The suspending utility MUST check the value stored by SNAPSHOT_CREATE_IMAGE 161The suspending utility MUST check the value stored by SNAPSHOT_CREATE_IMAGE
144in the memory location pointed to by the last argument of ioctl() and proceed 162in the memory location pointed to by the last argument of ioctl() and proceed
145in accordance with it: 163in accordance with it:
164
1461. If the value is 1 (ie. the system memory snapshot has just been 1651. If the value is 1 (ie. the system memory snapshot has just been
147 created and the system is ready for saving it): 166 created and the system is ready for saving it):
167
148 (a) The suspending utility MUST NOT close the snapshot device 168 (a) The suspending utility MUST NOT close the snapshot device
149 _unless_ the whole suspend procedure is to be cancelled, in 169 _unless_ the whole suspend procedure is to be cancelled, in
150 which case, if the snapshot image has already been saved, the 170 which case, if the snapshot image has already been saved, the
@@ -158,6 +178,7 @@ in accordance with it:
158 called. However, it MAY mount a file system that was not 178 called. However, it MAY mount a file system that was not
159 mounted at that time and perform some operations on it (eg. 179 mounted at that time and perform some operations on it (eg.
160 use it for saving the image). 180 use it for saving the image).
181
1612. If the value is 0 (ie. the system state has just been restored from 1822. If the value is 0 (ie. the system state has just been restored from
162 the snapshot image), the suspending utility MUST close the snapshot 183 the snapshot image), the suspending utility MUST close the snapshot
163 device. Afterwards it will be treated as a regular userland process, 184 device. Afterwards it will be treated as a regular userland process,
diff --git a/Documentation/power/video.txt b/Documentation/power/video.rst
index 3e6272bc4472..337a2ba9f32f 100644
--- a/Documentation/power/video.txt
+++ b/Documentation/power/video.rst
@@ -1,7 +1,8 @@
1===========================
2Video issues with S3 resume
3===========================
1 4
2 Video issues with S3 resume 52003-2006, Pavel Machek
3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
4 2003-2006, Pavel Machek
5 6
6During S3 resume, hardware needs to be reinitialized. For most 7During S3 resume, hardware needs to be reinitialized. For most
7devices, this is easy, and kernel driver knows how to do 8devices, this is easy, and kernel driver knows how to do
@@ -41,37 +42,37 @@ There are a few types of systems where video works after S3 resume:
41(1) systems where video state is preserved over S3. 42(1) systems where video state is preserved over S3.
42 43
43(2) systems where it is possible to call the video BIOS during S3 44(2) systems where it is possible to call the video BIOS during S3
44 resume. Unfortunately, it is not correct to call the video BIOS at 45 resume. Unfortunately, it is not correct to call the video BIOS at
45 that point, but it happens to work on some machines. Use 46 that point, but it happens to work on some machines. Use
46 acpi_sleep=s3_bios. 47 acpi_sleep=s3_bios.
47 48
48(3) systems that initialize video card into vga text mode and where 49(3) systems that initialize video card into vga text mode and where
49 the BIOS works well enough to be able to set video mode. Use 50 the BIOS works well enough to be able to set video mode. Use
50 acpi_sleep=s3_mode on these. 51 acpi_sleep=s3_mode on these.
51 52
52(4) on some systems s3_bios kicks video into text mode, and 53(4) on some systems s3_bios kicks video into text mode, and
53 acpi_sleep=s3_bios,s3_mode is needed. 54 acpi_sleep=s3_bios,s3_mode is needed.
54 55
55(5) radeon systems, where X can soft-boot your video card. You'll need 56(5) radeon systems, where X can soft-boot your video card. You'll need
56 a new enough X, and a plain text console (no vesafb or radeonfb). See 57 a new enough X, and a plain text console (no vesafb or radeonfb). See
57 http://www.doesi.gmxhome.de/linux/tm800s3/s3.html for more information. 58 http://www.doesi.gmxhome.de/linux/tm800s3/s3.html for more information.
58 Alternatively, you should use vbetool (6) instead. 59 Alternatively, you should use vbetool (6) instead.
59 60
60(6) other radeon systems, where vbetool is enough to bring system back 61(6) other radeon systems, where vbetool is enough to bring system back
61 to life. It needs text console to be working. Do vbetool vbestate 62 to life. It needs text console to be working. Do vbetool vbestate
62 save > /tmp/delme; echo 3 > /proc/acpi/sleep; vbetool post; vbetool 63 save > /tmp/delme; echo 3 > /proc/acpi/sleep; vbetool post; vbetool
63 vbestate restore < /tmp/delme; setfont <whatever>, and your video 64 vbestate restore < /tmp/delme; setfont <whatever>, and your video
64 should work. 65 should work.
65 66
66(7) on some systems, it is possible to boot most of kernel, and then 67(7) on some systems, it is possible to boot most of kernel, and then
67 POSTing bios works. Ole Rohne has patch to do just that at 68 POSTing bios works. Ole Rohne has patch to do just that at
68 http://dev.gentoo.org/~marineam/patch-radeonfb-2.6.11-rc2-mm2. 69 http://dev.gentoo.org/~marineam/patch-radeonfb-2.6.11-rc2-mm2.
69 70
70(8) on some systems, you can use the video_post utility and or 71(8) on some systems, you can use the video_post utility and or
71 do echo 3 > /sys/power/state && /usr/sbin/video_post - which will 72 do echo 3 > /sys/power/state && /usr/sbin/video_post - which will
72 initialize the display in console mode. If you are in X, you can switch 73 initialize the display in console mode. If you are in X, you can switch
73 to a virtual terminal and back to X using CTRL+ALT+F1 - CTRL+ALT+F7 to get 74 to a virtual terminal and back to X using CTRL+ALT+F1 - CTRL+ALT+F7 to get
74 the display working in graphical mode again. 75 the display working in graphical mode again.
75 76
76Now, if you pass acpi_sleep=something, and it does not work with your 77Now, if you pass acpi_sleep=something, and it does not work with your
77bios, you'll get a hard crash during resume. Be careful. Also it is 78bios, you'll get a hard crash during resume. Be careful. Also it is
@@ -87,99 +88,126 @@ chance of working.
87 88
88Table of known working notebooks: 89Table of known working notebooks:
89 90
91
92=============================== ===============================================
90Model hack (or "how to do it") 93Model hack (or "how to do it")
91------------------------------------------------------------------------------ 94=============================== ===============================================
92Acer Aspire 1406LC ole's late BIOS init (7), turn off DRI 95Acer Aspire 1406LC ole's late BIOS init (7), turn off DRI
93Acer TM 230 s3_bios (2) 96Acer TM 230 s3_bios (2)
94Acer TM 242FX vbetool (6) 97Acer TM 242FX vbetool (6)
95Acer TM C110 video_post (8) 98Acer TM C110 video_post (8)
96Acer TM C300 vga=normal (only suspend on console, not in X), vbetool (6) or video_post (8) 99Acer TM C300 vga=normal (only suspend on console, not in X),
100 vbetool (6) or video_post (8)
97Acer TM 4052LCi s3_bios (2) 101Acer TM 4052LCi s3_bios (2)
98Acer TM 636Lci s3_bios,s3_mode (4) 102Acer TM 636Lci s3_bios,s3_mode (4)
99Acer TM 650 (Radeon M7) vga=normal plus boot-radeon (5) gets text console back 103Acer TM 650 (Radeon M7) vga=normal plus boot-radeon (5) gets text
100Acer TM 660 ??? (*) 104 console back
101Acer TM 800 vga=normal, X patches, see webpage (5) or vbetool (6) 105Acer TM 660 ??? [#f1]_
102Acer TM 803 vga=normal, X patches, see webpage (5) or vbetool (6) 106Acer TM 800 vga=normal, X patches, see webpage (5)
107 or vbetool (6)
108Acer TM 803 vga=normal, X patches, see webpage (5)
109 or vbetool (6)
103Acer TM 803LCi vga=normal, vbetool (6) 110Acer TM 803LCi vga=normal, vbetool (6)
104Arima W730a vbetool needed (6) 111Arima W730a vbetool needed (6)
105Asus L2400D s3_mode (3)(***) (S1 also works OK) 112Asus L2400D s3_mode (3) [#f2]_ (S1 also works OK)
106Asus L3350M (SiS 740) (6) 113Asus L3350M (SiS 740) (6)
107Asus L3800C (Radeon M7) s3_bios (2) (S1 also works OK) 114Asus L3800C (Radeon M7) s3_bios (2) (S1 also works OK)
108Asus M6887Ne vga=normal, s3_bios (2), use radeon driver instead of fglrx in x.org 115Asus M6887Ne vga=normal, s3_bios (2), use radeon driver
116 instead of fglrx in x.org
109Athlon64 desktop prototype s3_bios (2) 117Athlon64 desktop prototype s3_bios (2)
110Compal CL-50 ??? (*) 118Compal CL-50 ??? [#f1]_
111Compaq Armada E500 - P3-700 none (1) (S1 also works OK) 119Compaq Armada E500 - P3-700 none (1) (S1 also works OK)
112Compaq Evo N620c vga=normal, s3_bios (2) 120Compaq Evo N620c vga=normal, s3_bios (2)
113Dell 600m, ATI R250 Lf none (1), but needs xorg-x11-6.8.1.902-1 121Dell 600m, ATI R250 Lf none (1), but needs xorg-x11-6.8.1.902-1
114Dell D600, ATI RV250 vga=normal and X, or try vbestate (6) 122Dell D600, ATI RV250 vga=normal and X, or try vbestate (6)
115Dell D610 vga=normal and X (possibly vbestate (6) too, but not tested) 123Dell D610 vga=normal and X (possibly vbestate (6) too,
116Dell Inspiron 4000 ??? (*) 124 but not tested)
117Dell Inspiron 500m ??? (*) 125Dell Inspiron 4000 ??? [#f1]_
126Dell Inspiron 500m ??? [#f1]_
118Dell Inspiron 510m ??? 127Dell Inspiron 510m ???
119Dell Inspiron 5150 vbetool needed (6) 128Dell Inspiron 5150 vbetool needed (6)
120Dell Inspiron 600m ??? (*) 129Dell Inspiron 600m ??? [#f1]_
121Dell Inspiron 8200 ??? (*) 130Dell Inspiron 8200 ??? [#f1]_
122Dell Inspiron 8500 ??? (*) 131Dell Inspiron 8500 ??? [#f1]_
123Dell Inspiron 8600 ??? (*) 132Dell Inspiron 8600 ??? [#f1]_
124eMachines athlon64 machines vbetool needed (6) (someone please get me model #s) 133eMachines athlon64 machines vbetool needed (6) (someone please get
125HP NC6000 s3_bios, may not use radeonfb (2); or vbetool (6) 134 me model #s)
126HP NX7000 ??? (*) 135HP NC6000 s3_bios, may not use radeonfb (2);
127HP Pavilion ZD7000 vbetool post needed, need open-source nv driver for X 136 or vbetool (6)
137HP NX7000 ??? [#f1]_
138HP Pavilion ZD7000 vbetool post needed, need open-source nv
139 driver for X
128HP Omnibook XE3 athlon version none (1) 140HP Omnibook XE3 athlon version none (1)
129HP Omnibook XE3GC none (1), video is S3 Savage/IX-MV 141HP Omnibook XE3GC none (1), video is S3 Savage/IX-MV
130HP Omnibook XE3L-GF vbetool (6) 142HP Omnibook XE3L-GF vbetool (6)
131HP Omnibook 5150 none (1), (S1 also works OK) 143HP Omnibook 5150 none (1), (S1 also works OK)
132IBM TP T20, model 2647-44G none (1), video is S3 Inc. 86C270-294 Savage/IX-MV, vesafb gets "interesting" but X work. 144IBM TP T20, model 2647-44G none (1), video is S3 Inc. 86C270-294
133IBM TP A31 / Type 2652-M5G s3_mode (3) [works ok with BIOS 1.04 2002-08-23, but not at all with BIOS 1.11 2004-11-05 :-(] 145 Savage/IX-MV, vesafb gets "interesting"
146 but X work.
147IBM TP A31 / Type 2652-M5G s3_mode (3) [works ok with
148 BIOS 1.04 2002-08-23, but not at all with
149 BIOS 1.11 2004-11-05 :-(]
134IBM TP R32 / Type 2658-MMG none (1) 150IBM TP R32 / Type 2658-MMG none (1)
135IBM TP R40 2722B3G ??? (*) 151IBM TP R40 2722B3G ??? [#f1]_
136IBM TP R50p / Type 1832-22U s3_bios (2) 152IBM TP R50p / Type 1832-22U s3_bios (2)
137IBM TP R51 none (1) 153IBM TP R51 none (1)
138IBM TP T30 236681A ??? (*) 154IBM TP T30 236681A ??? [#f1]_
139IBM TP T40 / Type 2373-MU4 none (1) 155IBM TP T40 / Type 2373-MU4 none (1)
140IBM TP T40p none (1) 156IBM TP T40p none (1)
141IBM TP R40p s3_bios (2) 157IBM TP R40p s3_bios (2)
142IBM TP T41p s3_bios (2), switch to X after resume 158IBM TP T41p s3_bios (2), switch to X after resume
143IBM TP T42 s3_bios (2) 159IBM TP T42 s3_bios (2)
144IBM ThinkPad T42p (2373-GTG) s3_bios (2) 160IBM ThinkPad T42p (2373-GTG) s3_bios (2)
145IBM TP X20 ??? (*) 161IBM TP X20 ??? [#f1]_
146IBM TP X30 s3_bios, s3_mode (4) 162IBM TP X30 s3_bios, s3_mode (4)
147IBM TP X31 / Type 2672-XXH none (1), use radeontool (http://fdd.com/software/radeon/) to turn off backlight. 163IBM TP X31 / Type 2672-XXH none (1), use radeontool
148IBM TP X32 none (1), but backlight is on and video is trashed after long suspend. s3_bios,s3_mode (4) works too. Perhaps that gets better results? 164 (http://fdd.com/software/radeon/) to
165 turn off backlight.
166IBM TP X32 none (1), but backlight is on and video is
167 trashed after long suspend. s3_bios,
168 s3_mode (4) works too. Perhaps that gets
169 better results?
149IBM Thinkpad X40 Type 2371-7JG s3_bios,s3_mode (4) 170IBM Thinkpad X40 Type 2371-7JG s3_bios,s3_mode (4)
150IBM TP 600e none(1), but a switch to console and back to X is needed 171IBM TP 600e none(1), but a switch to console and
151Medion MD4220 ??? (*) 172 back to X is needed
173Medion MD4220 ??? [#f1]_
152Samsung P35 vbetool needed (6) 174Samsung P35 vbetool needed (6)
153Sharp PC-AR10 (ATI rage) none (1), backlight does not switch off 175Sharp PC-AR10 (ATI rage) none (1), backlight does not switch off
154Sony Vaio PCG-C1VRX/K s3_bios (2) 176Sony Vaio PCG-C1VRX/K s3_bios (2)
155Sony Vaio PCG-F403 ??? (*) 177Sony Vaio PCG-F403 ??? [#f1]_
156Sony Vaio PCG-GRT995MP none (1), works with 'nv' X driver 178Sony Vaio PCG-GRT995MP none (1), works with 'nv' X driver
157Sony Vaio PCG-GR7/K none (1), but needs radeonfb, use radeontool (http://fdd.com/software/radeon/) to turn off backlight. 179Sony Vaio PCG-GR7/K none (1), but needs radeonfb, use
158Sony Vaio PCG-N505SN ??? (*) 180 radeontool (http://fdd.com/software/radeon/)
181 to turn off backlight.
182Sony Vaio PCG-N505SN ??? [#f1]_
159Sony Vaio vgn-s260 X or boot-radeon can init it (5) 183Sony Vaio vgn-s260 X or boot-radeon can init it (5)
160Sony Vaio vgn-S580BH vga=normal, but suspend from X. Console will be blank unless you return to X. 184Sony Vaio vgn-S580BH vga=normal, but suspend from X. Console will
185 be blank unless you return to X.
161Sony Vaio vgn-FS115B s3_bios (2),s3_mode (4) 186Sony Vaio vgn-FS115B s3_bios (2),s3_mode (4)
162Toshiba Libretto L5 none (1) 187Toshiba Libretto L5 none (1)
163Toshiba Libretto 100CT/110CT vbetool (6) 188Toshiba Libretto 100CT/110CT vbetool (6)
164Toshiba Portege 3020CT s3_mode (3) 189Toshiba Portege 3020CT s3_mode (3)
165Toshiba Satellite 4030CDT s3_mode (3) (S1 also works OK) 190Toshiba Satellite 4030CDT s3_mode (3) (S1 also works OK)
166Toshiba Satellite 4080XCDT s3_mode (3) (S1 also works OK) 191Toshiba Satellite 4080XCDT s3_mode (3) (S1 also works OK)
167Toshiba Satellite 4090XCDT ??? (*) 192Toshiba Satellite 4090XCDT ??? [#f1]_
168Toshiba Satellite P10-554 s3_bios,s3_mode (4)(****) 193Toshiba Satellite P10-554 s3_bios,s3_mode (4)[#f3]_
169Toshiba M30 (2) xor X with nvidia driver using internal AGP 194Toshiba M30 (2) xor X with nvidia driver using internal AGP
170Uniwill 244IIO ??? (*) 195Uniwill 244IIO ??? [#f1]_
196=============================== ===============================================
171 197
172Known working desktop systems 198Known working desktop systems
173~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 199~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
174 200
201=================== ============================= ========================
175Mainboard Graphics card hack (or "how to do it") 202Mainboard Graphics card hack (or "how to do it")
176------------------------------------------------------------------------------ 203=================== ============================= ========================
177Asus A7V8X nVidia RIVA TNT2 model 64 s3_bios,s3_mode (4) 204Asus A7V8X nVidia RIVA TNT2 model 64 s3_bios,s3_mode (4)
205=================== ============================= ========================
178 206
179 207
180(*) from https://wiki.ubuntu.com/HoaryPMResults, not sure 208.. [#f1] from https://wiki.ubuntu.com/HoaryPMResults, not sure
181 which options to use. If you know, please tell me. 209 which options to use. If you know, please tell me.
182 210
183(***) To be tested with a newer kernel. 211.. [#f2] To be tested with a newer kernel.
184 212
185(****) Not with SMP kernel, UP only. 213.. [#f3] Not with SMP kernel, UP only.
diff --git a/Documentation/process/submitting-drivers.rst b/Documentation/process/submitting-drivers.rst
index 58bc047e7b95..1acaa14903d6 100644
--- a/Documentation/process/submitting-drivers.rst
+++ b/Documentation/process/submitting-drivers.rst
@@ -117,7 +117,7 @@ PM support:
117 implemented") error. You should also try to make sure that your 117 implemented") error. You should also try to make sure that your
118 driver uses as little power as possible when it's not doing 118 driver uses as little power as possible when it's not doing
119 anything. For the driver testing instructions see 119 anything. For the driver testing instructions see
120 Documentation/power/drivers-testing.txt and for a relatively 120 Documentation/power/drivers-testing.rst and for a relatively
121 complete overview of the power management issues related to 121 complete overview of the power management issues related to
122 drivers see :ref:`Documentation/driver-api/pm/devices.rst <driverapi_pm_devices>`. 122 drivers see :ref:`Documentation/driver-api/pm/devices.rst <driverapi_pm_devices>`.
123 123
diff --git a/Documentation/scheduler/sched-energy.rst b/Documentation/scheduler/sched-energy.rst
index fce5858c9082..9580c57a52bc 100644
--- a/Documentation/scheduler/sched-energy.rst
+++ b/Documentation/scheduler/sched-energy.rst
@@ -22,7 +22,7 @@ the highest.
22 22
23The actual EM used by EAS is _not_ maintained by the scheduler, but by a 23The actual EM used by EAS is _not_ maintained by the scheduler, but by a
24dedicated framework. For details about this framework and what it provides, 24dedicated framework. For details about this framework and what it provides,
25please refer to its documentation (see Documentation/power/energy-model.txt). 25please refer to its documentation (see Documentation/power/energy-model.rst).
26 26
27 27
282. Background and Terminology 282. Background and Terminology
@@ -81,7 +81,7 @@ through the arch_scale_cpu_capacity() callback.
81 81
82The rest of platform knowledge used by EAS is directly read from the Energy 82The rest of platform knowledge used by EAS is directly read from the Energy
83Model (EM) framework. The EM of a platform is composed of a power cost table 83Model (EM) framework. The EM of a platform is composed of a power cost table
84per 'performance domain' in the system (see Documentation/power/energy-model.txt 84per 'performance domain' in the system (see Documentation/power/energy-model.rst
85for futher details about performance domains). 85for futher details about performance domains).
86 86
87The scheduler manages references to the EM objects in the topology code when the 87The scheduler manages references to the EM objects in the topology code when the
@@ -353,7 +353,7 @@ could be amended in the future if proven otherwise.
353EAS uses the EM of a platform to estimate the impact of scheduling decisions on 353EAS uses the EM of a platform to estimate the impact of scheduling decisions on
354energy. So, your platform must provide power cost tables to the EM framework in 354energy. So, your platform must provide power cost tables to the EM framework in
355order to make EAS start. To do so, please refer to documentation of the 355order to make EAS start. To do so, please refer to documentation of the
356independent EM framework in Documentation/power/energy-model.txt. 356independent EM framework in Documentation/power/energy-model.rst.
357 357
358Please also note that the scheduling domains need to be re-built after the 358Please also note that the scheduling domains need to be re-built after the
359EM has been registered in order to start EAS. 359EM has been registered in order to start EAS.
diff --git a/Documentation/trace/coresight-cpu-debug.txt b/Documentation/trace/coresight-cpu-debug.txt
index f07e38094b40..1a660a39e3c0 100644
--- a/Documentation/trace/coresight-cpu-debug.txt
+++ b/Documentation/trace/coresight-cpu-debug.txt
@@ -151,7 +151,7 @@ At the runtime you can disable idle states with below methods:
151 151
152It is possible to disable CPU idle states by way of the PM QoS 152It is possible to disable CPU idle states by way of the PM QoS
153subsystem, more specifically by using the "/dev/cpu_dma_latency" 153subsystem, more specifically by using the "/dev/cpu_dma_latency"
154interface (see Documentation/power/pm_qos_interface.txt for more 154interface (see Documentation/power/pm_qos_interface.rst for more
155details). As specified in the PM QoS documentation the requested 155details). As specified in the PM QoS documentation the requested
156parameter will stay in effect until the file descriptor is released. 156parameter will stay in effect until the file descriptor is released.
157For example: 157For example:
diff --git a/Documentation/translations/zh_CN/process/submitting-drivers.rst b/Documentation/translations/zh_CN/process/submitting-drivers.rst
index 72f4f45c98de..d99885c27aed 100644
--- a/Documentation/translations/zh_CN/process/submitting-drivers.rst
+++ b/Documentation/translations/zh_CN/process/submitting-drivers.rst
@@ -97,7 +97,7 @@ Linux 2.6:
97 函数定义成返回 -ENOSYS(功能未实现)错误。你还应该尝试确 97 函数定义成返回 -ENOSYS(功能未实现)错误。你还应该尝试确
98 保你的驱动在什么都不干的情况下将耗电降到最低。要获得驱动 98 保你的驱动在什么都不干的情况下将耗电降到最低。要获得驱动
99 程序测试的指导,请参阅 99 程序测试的指导,请参阅
100 Documentation/power/drivers-testing.txt。有关驱动程序电 100 Documentation/power/drivers-testing.rst。有关驱动程序电
101 源管理问题相对全面的概述,请参阅 101 源管理问题相对全面的概述,请参阅
102 Documentation/driver-api/pm/devices.rst。 102 Documentation/driver-api/pm/devices.rst。
103 103
diff --git a/MAINTAINERS b/MAINTAINERS
index 350bb27a1c25..91eed595dc9a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6548,7 +6548,7 @@ M: "Rafael J. Wysocki" <rjw@rjwysocki.net>
6548M: Pavel Machek <pavel@ucw.cz> 6548M: Pavel Machek <pavel@ucw.cz>
6549L: linux-pm@vger.kernel.org 6549L: linux-pm@vger.kernel.org
6550S: Supported 6550S: Supported
6551F: Documentation/power/freezing-of-tasks.txt 6551F: Documentation/power/freezing-of-tasks.rst
6552F: include/linux/freezer.h 6552F: include/linux/freezer.h
6553F: kernel/freezer.c 6553F: kernel/freezer.c
6554 6554
@@ -11942,7 +11942,7 @@ S: Maintained
11942T: git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git 11942T: git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git
11943F: drivers/opp/ 11943F: drivers/opp/
11944F: include/linux/pm_opp.h 11944F: include/linux/pm_opp.h
11945F: Documentation/power/opp.txt 11945F: Documentation/power/opp.rst
11946F: Documentation/devicetree/bindings/opp/ 11946F: Documentation/devicetree/bindings/opp/
11947 11947
11948OPL4 DRIVER 11948OPL4 DRIVER
@@ -12329,7 +12329,7 @@ M: Sam Bobroff <sbobroff@linux.ibm.com>
12329M: Oliver O'Halloran <oohall@gmail.com> 12329M: Oliver O'Halloran <oohall@gmail.com>
12330L: linuxppc-dev@lists.ozlabs.org 12330L: linuxppc-dev@lists.ozlabs.org
12331S: Supported 12331S: Supported
12332F: Documentation/PCI/pci-error-recovery.txt 12332F: Documentation/PCI/pci-error-recovery.rst
12333F: drivers/pci/pcie/aer.c 12333F: drivers/pci/pcie/aer.c
12334F: drivers/pci/pcie/dpc.c 12334F: drivers/pci/pcie/dpc.c
12335F: drivers/pci/pcie/err.c 12335F: drivers/pci/pcie/err.c
@@ -12342,7 +12342,7 @@ PCI ERROR RECOVERY
12342M: Linas Vepstas <linasvepstas@gmail.com> 12342M: Linas Vepstas <linasvepstas@gmail.com>
12343L: linux-pci@vger.kernel.org 12343L: linux-pci@vger.kernel.org
12344S: Supported 12344S: Supported
12345F: Documentation/PCI/pci-error-recovery.txt 12345F: Documentation/PCI/pci-error-recovery.rst
12346 12346
12347PCI MSI DRIVER FOR ALTERA MSI IP 12347PCI MSI DRIVER FOR ALTERA MSI IP
12348M: Ley Foon Tan <lftan@altera.com> 12348M: Ley Foon Tan <lftan@altera.com>
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 575bd5517d21..570988c7a7ff 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -164,6 +164,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
164 struct acpi_pci_generic_root_info *ri; 164 struct acpi_pci_generic_root_info *ri;
165 struct pci_bus *bus, *child; 165 struct pci_bus *bus, *child;
166 struct acpi_pci_root_ops *root_ops; 166 struct acpi_pci_root_ops *root_ops;
167 struct pci_host_bridge *host;
167 168
168 ri = kzalloc(sizeof(*ri), GFP_KERNEL); 169 ri = kzalloc(sizeof(*ri), GFP_KERNEL);
169 if (!ri) 170 if (!ri)
@@ -189,8 +190,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
189 if (!bus) 190 if (!bus)
190 return NULL; 191 return NULL;
191 192
192 pci_bus_size_bridges(bus); 193 /* If we must preserve the resource configuration, claim now */
193 pci_bus_assign_resources(bus); 194 host = pci_find_host_bridge(bus);
195 if (host->preserve_config)
196 pci_bus_claim_resources(bus);
197
198 /*
199 * Assign whatever was left unassigned. If we didn't claim above,
200 * this will reassign everything.
201 */
202 pci_assign_unassigned_root_bus_resources(bus);
194 203
195 list_for_each_entry(child, &bus->children, node) 204 list_for_each_entry(child, &bus->children, node)
196 pcie_bus_configure_settings(child); 205 pcie_bus_configure_settings(child);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d0bbca65e4a4..879741336771 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2482,7 +2482,7 @@ menuconfig APM
2482 machines with more than one CPU. 2482 machines with more than one CPU.
2483 2483
2484 In order to use APM, you will need supporting software. For location 2484 In order to use APM, you will need supporting software. For location
2485 and more information, read <file:Documentation/power/apm-acpi.txt> 2485 and more information, read <file:Documentation/power/apm-acpi.rst>
2486 and the Battery Powered Linux mini-HOWTO, available from 2486 and the Battery Powered Linux mini-HOWTO, available from
2487 <http://www.tldp.org/docs.html#howto>. 2487 <http://www.tldp.org/docs.html#howto>.
2488 2488
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 39f5d172e84f..314a187ed572 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -881,6 +881,7 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
881 int node = acpi_get_node(device->handle); 881 int node = acpi_get_node(device->handle);
882 struct pci_bus *bus; 882 struct pci_bus *bus;
883 struct pci_host_bridge *host_bridge; 883 struct pci_host_bridge *host_bridge;
884 union acpi_object *obj;
884 885
885 info->root = root; 886 info->root = root;
886 info->bridge = device; 887 info->bridge = device;
@@ -917,6 +918,17 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
917 if (!(root->osc_control_set & OSC_PCI_EXPRESS_LTR_CONTROL)) 918 if (!(root->osc_control_set & OSC_PCI_EXPRESS_LTR_CONTROL))
918 host_bridge->native_ltr = 0; 919 host_bridge->native_ltr = 0;
919 920
921 /*
922 * Evaluate the "PCI Boot Configuration" _DSM Function. If it
923 * exists and returns 0, we must preserve any PCI resource
924 * assignments made by firmware for this host bridge.
925 */
926 obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), &pci_acpi_dsm_guid, 1,
927 IGNORE_PCI_BOOT_CONFIG_DSM, NULL);
928 if (obj && obj->type == ACPI_TYPE_INTEGER && obj->integer.value == 0)
929 host_bridge->preserve_config = 1;
930 ACPI_FREE(obj);
931
920 pci_scan_child_bus(bus); 932 pci_scan_child_bus(bus);
921 pci_set_host_bridge_release(host_bridge, acpi_pci_root_release_info, 933 pci_set_host_bridge_release(host_bridge, acpi_pci_root_release_info,
922 info); 934 info);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h
index 473c4850c01d..2ee8f9522e05 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.h
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.h
@@ -45,7 +45,7 @@ enum i915_drm_suspend_mode {
45 * to be disabled. This shouldn't happen and we'll print some error messages in 45 * to be disabled. This shouldn't happen and we'll print some error messages in
46 * case it happens. 46 * case it happens.
47 * 47 *
48 * For more, read the Documentation/power/runtime_pm.txt. 48 * For more, read the Documentation/power/runtime_pm.rst.
49 */ 49 */
50struct intel_runtime_pm { 50struct intel_runtime_pm {
51 atomic_t wakeref_count; 51 atomic_t wakeref_count;
diff --git a/drivers/opp/Kconfig b/drivers/opp/Kconfig
index fe54d349d2e1..35dfc7e80f92 100644
--- a/drivers/opp/Kconfig
+++ b/drivers/opp/Kconfig
@@ -11,4 +11,4 @@ config PM_OPP
11 OPP layer organizes the data internally using device pointers 11 OPP layer organizes the data internally using device pointers
12 representing individual voltage domains and provides SOC 12 representing individual voltage domains and provides SOC
13 implementations a ready to use framework to manage OPPs. 13 implementations a ready to use framework to manage OPPs.
14 For more information, read <file:Documentation/power/opp.txt> 14 For more information, read <file:Documentation/power/opp.rst>
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 97c08146534a..e18499243f84 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -432,7 +432,7 @@ EXPORT_SYMBOL_GPL(pci_prg_resp_pasid_required);
432 * @pdev: PCI device structure 432 * @pdev: PCI device structure
433 * 433 *
434 * Returns negative value when PASID capability is not present. 434 * Returns negative value when PASID capability is not present.
435 * Otherwise it returns the numer of supported PASIDs. 435 * Otherwise it returns the number of supported PASIDs.
436 */ 436 */
437int pci_max_pasids(struct pci_dev *pdev) 437int pci_max_pasids(struct pci_dev *pdev)
438{ 438{
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index 011c57cae4b0..fe9f9f13ce11 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -174,14 +174,14 @@ config PCIE_IPROC_MSI
174 PCIe controller 174 PCIe controller
175 175
176config PCIE_ALTERA 176config PCIE_ALTERA
177 bool "Altera PCIe controller" 177 tristate "Altera PCIe controller"
178 depends on ARM || NIOS2 || ARM64 || COMPILE_TEST 178 depends on ARM || NIOS2 || ARM64 || COMPILE_TEST
179 help 179 help
180 Say Y here if you want to enable PCIe controller support on Altera 180 Say Y here if you want to enable PCIe controller support on Altera
181 FPGA. 181 FPGA.
182 182
183config PCIE_ALTERA_MSI 183config PCIE_ALTERA_MSI
184 bool "Altera PCIe MSI feature" 184 tristate "Altera PCIe MSI feature"
185 depends on PCIE_ALTERA 185 depends on PCIE_ALTERA
186 depends on PCI_MSI_IRQ_DOMAIN 186 depends on PCI_MSI_IRQ_DOMAIN
187 help 187 help
diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
index a6ce1ee51b4c..6ea778ae4877 100644
--- a/drivers/pci/controller/dwc/Kconfig
+++ b/drivers/pci/controller/dwc/Kconfig
@@ -90,7 +90,7 @@ config PCI_EXYNOS
90 90
91config PCI_IMX6 91config PCI_IMX6
92 bool "Freescale i.MX6/7/8 PCIe controller" 92 bool "Freescale i.MX6/7/8 PCIe controller"
93 depends on SOC_IMX6Q || SOC_IMX7D || (ARM64 && ARCH_MXC) || COMPILE_TEST 93 depends on ARCH_MXC || COMPILE_TEST
94 depends on PCI_MSI_IRQ_DOMAIN 94 depends on PCI_MSI_IRQ_DOMAIN
95 select PCIE_DW_HOST 95 select PCIE_DW_HOST
96 96
diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c
index 419451efd58c..4234ddb4722f 100644
--- a/drivers/pci/controller/dwc/pci-dra7xx.c
+++ b/drivers/pci/controller/dwc/pci-dra7xx.c
@@ -26,6 +26,7 @@
26#include <linux/types.h> 26#include <linux/types.h>
27#include <linux/mfd/syscon.h> 27#include <linux/mfd/syscon.h>
28#include <linux/regmap.h> 28#include <linux/regmap.h>
29#include <linux/gpio/consumer.h>
29 30
30#include "../../pci.h" 31#include "../../pci.h"
31#include "pcie-designware.h" 32#include "pcie-designware.h"
diff --git a/drivers/pci/controller/dwc/pcie-armada8k.c b/drivers/pci/controller/dwc/pcie-armada8k.c
index 0c389a30ef5d..3d55dc78d999 100644
--- a/drivers/pci/controller/dwc/pcie-armada8k.c
+++ b/drivers/pci/controller/dwc/pcie-armada8k.c
@@ -25,10 +25,14 @@
25 25
26#include "pcie-designware.h" 26#include "pcie-designware.h"
27 27
28#define ARMADA8K_PCIE_MAX_LANES PCIE_LNK_X4
29
28struct armada8k_pcie { 30struct armada8k_pcie {
29 struct dw_pcie *pci; 31 struct dw_pcie *pci;
30 struct clk *clk; 32 struct clk *clk;
31 struct clk *clk_reg; 33 struct clk *clk_reg;
34 struct phy *phy[ARMADA8K_PCIE_MAX_LANES];
35 unsigned int phy_count;
32}; 36};
33 37
34#define PCIE_VENDOR_REGS_OFFSET 0x8000 38#define PCIE_VENDOR_REGS_OFFSET 0x8000
@@ -55,7 +59,7 @@ struct armada8k_pcie {
55#define PCIE_ARUSER_REG (PCIE_VENDOR_REGS_OFFSET + 0x5C) 59#define PCIE_ARUSER_REG (PCIE_VENDOR_REGS_OFFSET + 0x5C)
56#define PCIE_AWUSER_REG (PCIE_VENDOR_REGS_OFFSET + 0x60) 60#define PCIE_AWUSER_REG (PCIE_VENDOR_REGS_OFFSET + 0x60)
57/* 61/*
58 * AR/AW Cache defauls: Normal memory, Write-Back, Read / Write 62 * AR/AW Cache defaults: Normal memory, Write-Back, Read / Write
59 * allocate 63 * allocate
60 */ 64 */
61#define ARCACHE_DEFAULT_VALUE 0x3511 65#define ARCACHE_DEFAULT_VALUE 0x3511
@@ -67,6 +71,76 @@ struct armada8k_pcie {
67 71
68#define to_armada8k_pcie(x) dev_get_drvdata((x)->dev) 72#define to_armada8k_pcie(x) dev_get_drvdata((x)->dev)
69 73
74static void armada8k_pcie_disable_phys(struct armada8k_pcie *pcie)
75{
76 int i;
77
78 for (i = 0; i < ARMADA8K_PCIE_MAX_LANES; i++) {
79 phy_power_off(pcie->phy[i]);
80 phy_exit(pcie->phy[i]);
81 }
82}
83
84static int armada8k_pcie_enable_phys(struct armada8k_pcie *pcie)
85{
86 int ret;
87 int i;
88
89 for (i = 0; i < ARMADA8K_PCIE_MAX_LANES; i++) {
90 ret = phy_init(pcie->phy[i]);
91 if (ret)
92 return ret;
93
94 ret = phy_set_mode_ext(pcie->phy[i], PHY_MODE_PCIE,
95 pcie->phy_count);
96 if (ret) {
97 phy_exit(pcie->phy[i]);
98 return ret;
99 }
100
101 ret = phy_power_on(pcie->phy[i]);
102 if (ret) {
103 phy_exit(pcie->phy[i]);
104 return ret;
105 }
106 }
107
108 return 0;
109}
110
111static int armada8k_pcie_setup_phys(struct armada8k_pcie *pcie)
112{
113 struct dw_pcie *pci = pcie->pci;
114 struct device *dev = pci->dev;
115 struct device_node *node = dev->of_node;
116 int ret = 0;
117 int i;
118
119 for (i = 0; i < ARMADA8K_PCIE_MAX_LANES; i++) {
120 pcie->phy[i] = devm_of_phy_get_by_index(dev, node, i);
121 if (IS_ERR(pcie->phy[i]) &&
122 (PTR_ERR(pcie->phy[i]) == -EPROBE_DEFER))
123 return PTR_ERR(pcie->phy[i]);
124
125 if (IS_ERR(pcie->phy[i])) {
126 pcie->phy[i] = NULL;
127 continue;
128 }
129
130 pcie->phy_count++;
131 }
132
133 /* Old bindings miss the PHY handle, so just warn if there is no PHY */
134 if (!pcie->phy_count)
135 dev_warn(dev, "No available PHY\n");
136
137 ret = armada8k_pcie_enable_phys(pcie);
138 if (ret)
139 dev_err(dev, "Failed to initialize PHY(s) (%d)\n", ret);
140
141 return ret;
142}
143
70static int armada8k_pcie_link_up(struct dw_pcie *pci) 144static int armada8k_pcie_link_up(struct dw_pcie *pci)
71{ 145{
72 u32 reg; 146 u32 reg;
@@ -249,14 +323,20 @@ static int armada8k_pcie_probe(struct platform_device *pdev)
249 goto fail_clkreg; 323 goto fail_clkreg;
250 } 324 }
251 325
326 ret = armada8k_pcie_setup_phys(pcie);
327 if (ret)
328 goto fail_clkreg;
329
252 platform_set_drvdata(pdev, pcie); 330 platform_set_drvdata(pdev, pcie);
253 331
254 ret = armada8k_add_pcie_port(pcie, pdev); 332 ret = armada8k_add_pcie_port(pcie, pdev);
255 if (ret) 333 if (ret)
256 goto fail_clkreg; 334 goto disable_phy;
257 335
258 return 0; 336 return 0;
259 337
338disable_phy:
339 armada8k_pcie_disable_phys(pcie);
260fail_clkreg: 340fail_clkreg:
261 clk_disable_unprepare(pcie->clk_reg); 341 clk_disable_unprepare(pcie->clk_reg);
262fail: 342fail:
diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
index 77db32529319..f93252d0da5b 100644
--- a/drivers/pci/controller/dwc/pcie-designware-host.c
+++ b/drivers/pci/controller/dwc/pcie-designware-host.c
@@ -311,6 +311,7 @@ void dw_pcie_msi_init(struct pcie_port *pp)
311 dw_pcie_wr_own_conf(pp, PCIE_MSI_ADDR_HI, 4, 311 dw_pcie_wr_own_conf(pp, PCIE_MSI_ADDR_HI, 4,
312 upper_32_bits(msi_target)); 312 upper_32_bits(msi_target));
313} 313}
314EXPORT_SYMBOL_GPL(dw_pcie_msi_init);
314 315
315int dw_pcie_host_init(struct pcie_port *pp) 316int dw_pcie_host_init(struct pcie_port *pp)
316{ 317{
@@ -495,6 +496,16 @@ err_free_msi:
495 dw_pcie_free_msi(pp); 496 dw_pcie_free_msi(pp);
496 return ret; 497 return ret;
497} 498}
499EXPORT_SYMBOL_GPL(dw_pcie_host_init);
500
501void dw_pcie_host_deinit(struct pcie_port *pp)
502{
503 pci_stop_root_bus(pp->root_bus);
504 pci_remove_root_bus(pp->root_bus);
505 if (pci_msi_enabled() && !pp->ops->msi_host_init)
506 dw_pcie_free_msi(pp);
507}
508EXPORT_SYMBOL_GPL(dw_pcie_host_deinit);
498 509
499static int dw_pcie_access_other_conf(struct pcie_port *pp, struct pci_bus *bus, 510static int dw_pcie_access_other_conf(struct pcie_port *pp, struct pci_bus *bus,
500 u32 devfn, int where, int size, u32 *val, 511 u32 devfn, int where, int size, u32 *val,
@@ -687,3 +698,4 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
687 val |= PORT_LOGIC_SPEED_CHANGE; 698 val |= PORT_LOGIC_SPEED_CHANGE;
688 dw_pcie_wr_own_conf(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, 4, val); 699 dw_pcie_wr_own_conf(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, 4, val);
689} 700}
701EXPORT_SYMBOL_GPL(dw_pcie_setup_rc);
diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index 9d7c51c32b3b..7d25102c304c 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -34,6 +34,7 @@ int dw_pcie_read(void __iomem *addr, int size, u32 *val)
34 34
35 return PCIBIOS_SUCCESSFUL; 35 return PCIBIOS_SUCCESSFUL;
36} 36}
37EXPORT_SYMBOL_GPL(dw_pcie_read);
37 38
38int dw_pcie_write(void __iomem *addr, int size, u32 val) 39int dw_pcie_write(void __iomem *addr, int size, u32 val)
39{ 40{
@@ -51,69 +52,97 @@ int dw_pcie_write(void __iomem *addr, int size, u32 val)
51 52
52 return PCIBIOS_SUCCESSFUL; 53 return PCIBIOS_SUCCESSFUL;
53} 54}
55EXPORT_SYMBOL_GPL(dw_pcie_write);
54 56
55u32 __dw_pcie_read_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg, 57u32 dw_pcie_read_dbi(struct dw_pcie *pci, u32 reg, size_t size)
56 size_t size)
57{ 58{
58 int ret; 59 int ret;
59 u32 val; 60 u32 val;
60 61
61 if (pci->ops->read_dbi) 62 if (pci->ops->read_dbi)
62 return pci->ops->read_dbi(pci, base, reg, size); 63 return pci->ops->read_dbi(pci, pci->dbi_base, reg, size);
63 64
64 ret = dw_pcie_read(base + reg, size, &val); 65 ret = dw_pcie_read(pci->dbi_base + reg, size, &val);
65 if (ret) 66 if (ret)
66 dev_err(pci->dev, "Read DBI address failed\n"); 67 dev_err(pci->dev, "Read DBI address failed\n");
67 68
68 return val; 69 return val;
69} 70}
71EXPORT_SYMBOL_GPL(dw_pcie_read_dbi);
70 72
71void __dw_pcie_write_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg, 73void dw_pcie_write_dbi(struct dw_pcie *pci, u32 reg, size_t size, u32 val)
72 size_t size, u32 val)
73{ 74{
74 int ret; 75 int ret;
75 76
76 if (pci->ops->write_dbi) { 77 if (pci->ops->write_dbi) {
77 pci->ops->write_dbi(pci, base, reg, size, val); 78 pci->ops->write_dbi(pci, pci->dbi_base, reg, size, val);
78 return; 79 return;
79 } 80 }
80 81
81 ret = dw_pcie_write(base + reg, size, val); 82 ret = dw_pcie_write(pci->dbi_base + reg, size, val);
82 if (ret) 83 if (ret)
83 dev_err(pci->dev, "Write DBI address failed\n"); 84 dev_err(pci->dev, "Write DBI address failed\n");
84} 85}
86EXPORT_SYMBOL_GPL(dw_pcie_write_dbi);
85 87
86u32 __dw_pcie_read_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg, 88u32 dw_pcie_read_dbi2(struct dw_pcie *pci, u32 reg, size_t size)
87 size_t size)
88{ 89{
89 int ret; 90 int ret;
90 u32 val; 91 u32 val;
91 92
92 if (pci->ops->read_dbi2) 93 if (pci->ops->read_dbi2)
93 return pci->ops->read_dbi2(pci, base, reg, size); 94 return pci->ops->read_dbi2(pci, pci->dbi_base2, reg, size);
94 95
95 ret = dw_pcie_read(base + reg, size, &val); 96 ret = dw_pcie_read(pci->dbi_base2 + reg, size, &val);
96 if (ret) 97 if (ret)
97 dev_err(pci->dev, "read DBI address failed\n"); 98 dev_err(pci->dev, "read DBI address failed\n");
98 99
99 return val; 100 return val;
100} 101}
101 102
102void __dw_pcie_write_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg, 103void dw_pcie_write_dbi2(struct dw_pcie *pci, u32 reg, size_t size, u32 val)
103 size_t size, u32 val)
104{ 104{
105 int ret; 105 int ret;
106 106
107 if (pci->ops->write_dbi2) { 107 if (pci->ops->write_dbi2) {
108 pci->ops->write_dbi2(pci, base, reg, size, val); 108 pci->ops->write_dbi2(pci, pci->dbi_base2, reg, size, val);
109 return; 109 return;
110 } 110 }
111 111
112 ret = dw_pcie_write(base + reg, size, val); 112 ret = dw_pcie_write(pci->dbi_base2 + reg, size, val);
113 if (ret) 113 if (ret)
114 dev_err(pci->dev, "write DBI address failed\n"); 114 dev_err(pci->dev, "write DBI address failed\n");
115} 115}
116 116
117u32 dw_pcie_read_atu(struct dw_pcie *pci, u32 reg, size_t size)
118{
119 int ret;
120 u32 val;
121
122 if (pci->ops->read_dbi)
123 return pci->ops->read_dbi(pci, pci->atu_base, reg, size);
124
125 ret = dw_pcie_read(pci->atu_base + reg, size, &val);
126 if (ret)
127 dev_err(pci->dev, "Read ATU address failed\n");
128
129 return val;
130}
131
132void dw_pcie_write_atu(struct dw_pcie *pci, u32 reg, size_t size, u32 val)
133{
134 int ret;
135
136 if (pci->ops->write_dbi) {
137 pci->ops->write_dbi(pci, pci->atu_base, reg, size, val);
138 return;
139 }
140
141 ret = dw_pcie_write(pci->atu_base + reg, size, val);
142 if (ret)
143 dev_err(pci->dev, "Write ATU address failed\n");
144}
145
117static u32 dw_pcie_readl_ob_unroll(struct dw_pcie *pci, u32 index, u32 reg) 146static u32 dw_pcie_readl_ob_unroll(struct dw_pcie *pci, u32 index, u32 reg)
118{ 147{
119 u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index); 148 u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index);
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index b8993f2b78df..ffed084a0b4f 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -254,14 +254,12 @@ struct dw_pcie {
254int dw_pcie_read(void __iomem *addr, int size, u32 *val); 254int dw_pcie_read(void __iomem *addr, int size, u32 *val);
255int dw_pcie_write(void __iomem *addr, int size, u32 val); 255int dw_pcie_write(void __iomem *addr, int size, u32 val);
256 256
257u32 __dw_pcie_read_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg, 257u32 dw_pcie_read_dbi(struct dw_pcie *pci, u32 reg, size_t size);
258 size_t size); 258void dw_pcie_write_dbi(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
259void __dw_pcie_write_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg, 259u32 dw_pcie_read_dbi2(struct dw_pcie *pci, u32 reg, size_t size);
260 size_t size, u32 val); 260void dw_pcie_write_dbi2(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
261u32 __dw_pcie_read_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg, 261u32 dw_pcie_read_atu(struct dw_pcie *pci, u32 reg, size_t size);
262 size_t size); 262void dw_pcie_write_atu(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
263void __dw_pcie_write_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg,
264 size_t size, u32 val);
265int dw_pcie_link_up(struct dw_pcie *pci); 263int dw_pcie_link_up(struct dw_pcie *pci);
266int dw_pcie_wait_for_link(struct dw_pcie *pci); 264int dw_pcie_wait_for_link(struct dw_pcie *pci);
267void dw_pcie_prog_outbound_atu(struct dw_pcie *pci, int index, 265void dw_pcie_prog_outbound_atu(struct dw_pcie *pci, int index,
@@ -275,52 +273,52 @@ void dw_pcie_setup(struct dw_pcie *pci);
275 273
276static inline void dw_pcie_writel_dbi(struct dw_pcie *pci, u32 reg, u32 val) 274static inline void dw_pcie_writel_dbi(struct dw_pcie *pci, u32 reg, u32 val)
277{ 275{
278 __dw_pcie_write_dbi(pci, pci->dbi_base, reg, 0x4, val); 276 dw_pcie_write_dbi(pci, reg, 0x4, val);
279} 277}
280 278
281static inline u32 dw_pcie_readl_dbi(struct dw_pcie *pci, u32 reg) 279static inline u32 dw_pcie_readl_dbi(struct dw_pcie *pci, u32 reg)
282{ 280{
283 return __dw_pcie_read_dbi(pci, pci->dbi_base, reg, 0x4); 281 return dw_pcie_read_dbi(pci, reg, 0x4);
284} 282}
285 283
286static inline void dw_pcie_writew_dbi(struct dw_pcie *pci, u32 reg, u16 val) 284static inline void dw_pcie_writew_dbi(struct dw_pcie *pci, u32 reg, u16 val)
287{ 285{
288 __dw_pcie_write_dbi(pci, pci->dbi_base, reg, 0x2, val); 286 dw_pcie_write_dbi(pci, reg, 0x2, val);
289} 287}
290 288
291static inline u16 dw_pcie_readw_dbi(struct dw_pcie *pci, u32 reg) 289static inline u16 dw_pcie_readw_dbi(struct dw_pcie *pci, u32 reg)
292{ 290{
293 return __dw_pcie_read_dbi(pci, pci->dbi_base, reg, 0x2); 291 return dw_pcie_read_dbi(pci, reg, 0x2);
294} 292}
295 293
296static inline void dw_pcie_writeb_dbi(struct dw_pcie *pci, u32 reg, u8 val) 294static inline void dw_pcie_writeb_dbi(struct dw_pcie *pci, u32 reg, u8 val)
297{ 295{
298 __dw_pcie_write_dbi(pci, pci->dbi_base, reg, 0x1, val); 296 dw_pcie_write_dbi(pci, reg, 0x1, val);
299} 297}
300 298
301static inline u8 dw_pcie_readb_dbi(struct dw_pcie *pci, u32 reg) 299static inline u8 dw_pcie_readb_dbi(struct dw_pcie *pci, u32 reg)
302{ 300{
303 return __dw_pcie_read_dbi(pci, pci->dbi_base, reg, 0x1); 301 return dw_pcie_read_dbi(pci, reg, 0x1);
304} 302}
305 303
306static inline void dw_pcie_writel_dbi2(struct dw_pcie *pci, u32 reg, u32 val) 304static inline void dw_pcie_writel_dbi2(struct dw_pcie *pci, u32 reg, u32 val)
307{ 305{
308 __dw_pcie_write_dbi2(pci, pci->dbi_base2, reg, 0x4, val); 306 dw_pcie_write_dbi2(pci, reg, 0x4, val);
309} 307}
310 308
311static inline u32 dw_pcie_readl_dbi2(struct dw_pcie *pci, u32 reg) 309static inline u32 dw_pcie_readl_dbi2(struct dw_pcie *pci, u32 reg)
312{ 310{
313 return __dw_pcie_read_dbi2(pci, pci->dbi_base2, reg, 0x4); 311 return dw_pcie_read_dbi2(pci, reg, 0x4);
314} 312}
315 313
316static inline void dw_pcie_writel_atu(struct dw_pcie *pci, u32 reg, u32 val) 314static inline void dw_pcie_writel_atu(struct dw_pcie *pci, u32 reg, u32 val)
317{ 315{
318 __dw_pcie_write_dbi(pci, pci->atu_base, reg, 0x4, val); 316 dw_pcie_write_atu(pci, reg, 0x4, val);
319} 317}
320 318
321static inline u32 dw_pcie_readl_atu(struct dw_pcie *pci, u32 reg) 319static inline u32 dw_pcie_readl_atu(struct dw_pcie *pci, u32 reg)
322{ 320{
323 return __dw_pcie_read_dbi(pci, pci->atu_base, reg, 0x4); 321 return dw_pcie_read_atu(pci, reg, 0x4);
324} 322}
325 323
326static inline void dw_pcie_dbi_ro_wr_en(struct dw_pcie *pci) 324static inline void dw_pcie_dbi_ro_wr_en(struct dw_pcie *pci)
@@ -351,6 +349,7 @@ void dw_pcie_msi_init(struct pcie_port *pp);
351void dw_pcie_free_msi(struct pcie_port *pp); 349void dw_pcie_free_msi(struct pcie_port *pp);
352void dw_pcie_setup_rc(struct pcie_port *pp); 350void dw_pcie_setup_rc(struct pcie_port *pp);
353int dw_pcie_host_init(struct pcie_port *pp); 351int dw_pcie_host_init(struct pcie_port *pp);
352void dw_pcie_host_deinit(struct pcie_port *pp);
354int dw_pcie_allocate_domains(struct pcie_port *pp); 353int dw_pcie_allocate_domains(struct pcie_port *pp);
355#else 354#else
356static inline irqreturn_t dw_handle_msi_irq(struct pcie_port *pp) 355static inline irqreturn_t dw_handle_msi_irq(struct pcie_port *pp)
@@ -375,6 +374,10 @@ static inline int dw_pcie_host_init(struct pcie_port *pp)
375 return 0; 374 return 0;
376} 375}
377 376
377static inline void dw_pcie_host_deinit(struct pcie_port *pp)
378{
379}
380
378static inline int dw_pcie_allocate_domains(struct pcie_port *pp) 381static inline int dw_pcie_allocate_domains(struct pcie_port *pp)
379{ 382{
380 return 0; 383 return 0;
diff --git a/drivers/pci/controller/dwc/pcie-kirin.c b/drivers/pci/controller/dwc/pcie-kirin.c
index 9b599296205d..8df1914226be 100644
--- a/drivers/pci/controller/dwc/pcie-kirin.c
+++ b/drivers/pci/controller/dwc/pcie-kirin.c
@@ -2,7 +2,7 @@
2/* 2/*
3 * PCIe host controller driver for Kirin Phone SoCs 3 * PCIe host controller driver for Kirin Phone SoCs
4 * 4 *
5 * Copyright (C) 2017 Hilisicon Electronics Co., Ltd. 5 * Copyright (C) 2017 HiSilicon Electronics Co., Ltd.
6 * http://www.huawei.com 6 * http://www.huawei.com
7 * 7 *
8 * Author: Xiaowei Song <songxiaowei@huawei.com> 8 * Author: Xiaowei Song <songxiaowei@huawei.com>
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 0ed235d560e3..7e581748ee9f 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -112,10 +112,10 @@ struct qcom_pcie_resources_2_3_2 {
112 struct regulator_bulk_data supplies[QCOM_PCIE_2_3_2_MAX_SUPPLY]; 112 struct regulator_bulk_data supplies[QCOM_PCIE_2_3_2_MAX_SUPPLY];
113}; 113};
114 114
115#define QCOM_PCIE_2_4_0_MAX_CLOCKS 4
115struct qcom_pcie_resources_2_4_0 { 116struct qcom_pcie_resources_2_4_0 {
116 struct clk *aux_clk; 117 struct clk_bulk_data clks[QCOM_PCIE_2_4_0_MAX_CLOCKS];
117 struct clk *master_clk; 118 int num_clks;
118 struct clk *slave_clk;
119 struct reset_control *axi_m_reset; 119 struct reset_control *axi_m_reset;
120 struct reset_control *axi_s_reset; 120 struct reset_control *axi_s_reset;
121 struct reset_control *pipe_reset; 121 struct reset_control *pipe_reset;
@@ -178,6 +178,8 @@ static void qcom_ep_reset_assert(struct qcom_pcie *pcie)
178 178
179static void qcom_ep_reset_deassert(struct qcom_pcie *pcie) 179static void qcom_ep_reset_deassert(struct qcom_pcie *pcie)
180{ 180{
181 /* Ensure that PERST has been asserted for at least 100 ms */
182 msleep(100);
181 gpiod_set_value_cansleep(pcie->reset, 0); 183 gpiod_set_value_cansleep(pcie->reset, 0);
182 usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500); 184 usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500);
183} 185}
@@ -638,18 +640,20 @@ static int qcom_pcie_get_resources_2_4_0(struct qcom_pcie *pcie)
638 struct qcom_pcie_resources_2_4_0 *res = &pcie->res.v2_4_0; 640 struct qcom_pcie_resources_2_4_0 *res = &pcie->res.v2_4_0;
639 struct dw_pcie *pci = pcie->pci; 641 struct dw_pcie *pci = pcie->pci;
640 struct device *dev = pci->dev; 642 struct device *dev = pci->dev;
643 bool is_ipq = of_device_is_compatible(dev->of_node, "qcom,pcie-ipq4019");
644 int ret;
641 645
642 res->aux_clk = devm_clk_get(dev, "aux"); 646 res->clks[0].id = "aux";
643 if (IS_ERR(res->aux_clk)) 647 res->clks[1].id = "master_bus";
644 return PTR_ERR(res->aux_clk); 648 res->clks[2].id = "slave_bus";
649 res->clks[3].id = "iface";
645 650
646 res->master_clk = devm_clk_get(dev, "master_bus"); 651 /* qcom,pcie-ipq4019 is defined without "iface" */
647 if (IS_ERR(res->master_clk)) 652 res->num_clks = is_ipq ? 3 : 4;
648 return PTR_ERR(res->master_clk);
649 653
650 res->slave_clk = devm_clk_get(dev, "slave_bus"); 654 ret = devm_clk_bulk_get(dev, res->num_clks, res->clks);
651 if (IS_ERR(res->slave_clk)) 655 if (ret < 0)
652 return PTR_ERR(res->slave_clk); 656 return ret;
653 657
654 res->axi_m_reset = devm_reset_control_get_exclusive(dev, "axi_m"); 658 res->axi_m_reset = devm_reset_control_get_exclusive(dev, "axi_m");
655 if (IS_ERR(res->axi_m_reset)) 659 if (IS_ERR(res->axi_m_reset))
@@ -659,27 +663,33 @@ static int qcom_pcie_get_resources_2_4_0(struct qcom_pcie *pcie)
659 if (IS_ERR(res->axi_s_reset)) 663 if (IS_ERR(res->axi_s_reset))
660 return PTR_ERR(res->axi_s_reset); 664 return PTR_ERR(res->axi_s_reset);
661 665
662 res->pipe_reset = devm_reset_control_get_exclusive(dev, "pipe"); 666 if (is_ipq) {
663 if (IS_ERR(res->pipe_reset)) 667 /*
664 return PTR_ERR(res->pipe_reset); 668 * These resources relates to the PHY or are secure clocks, but
665 669 * are controlled here for IPQ4019
666 res->axi_m_vmid_reset = devm_reset_control_get_exclusive(dev, 670 */
667 "axi_m_vmid"); 671 res->pipe_reset = devm_reset_control_get_exclusive(dev, "pipe");
668 if (IS_ERR(res->axi_m_vmid_reset)) 672 if (IS_ERR(res->pipe_reset))
669 return PTR_ERR(res->axi_m_vmid_reset); 673 return PTR_ERR(res->pipe_reset);
670 674
671 res->axi_s_xpu_reset = devm_reset_control_get_exclusive(dev, 675 res->axi_m_vmid_reset = devm_reset_control_get_exclusive(dev,
672 "axi_s_xpu"); 676 "axi_m_vmid");
673 if (IS_ERR(res->axi_s_xpu_reset)) 677 if (IS_ERR(res->axi_m_vmid_reset))
674 return PTR_ERR(res->axi_s_xpu_reset); 678 return PTR_ERR(res->axi_m_vmid_reset);
675 679
676 res->parf_reset = devm_reset_control_get_exclusive(dev, "parf"); 680 res->axi_s_xpu_reset = devm_reset_control_get_exclusive(dev,
677 if (IS_ERR(res->parf_reset)) 681 "axi_s_xpu");
678 return PTR_ERR(res->parf_reset); 682 if (IS_ERR(res->axi_s_xpu_reset))
679 683 return PTR_ERR(res->axi_s_xpu_reset);
680 res->phy_reset = devm_reset_control_get_exclusive(dev, "phy"); 684
681 if (IS_ERR(res->phy_reset)) 685 res->parf_reset = devm_reset_control_get_exclusive(dev, "parf");
682 return PTR_ERR(res->phy_reset); 686 if (IS_ERR(res->parf_reset))
687 return PTR_ERR(res->parf_reset);
688
689 res->phy_reset = devm_reset_control_get_exclusive(dev, "phy");
690 if (IS_ERR(res->phy_reset))
691 return PTR_ERR(res->phy_reset);
692 }
683 693
684 res->axi_m_sticky_reset = devm_reset_control_get_exclusive(dev, 694 res->axi_m_sticky_reset = devm_reset_control_get_exclusive(dev,
685 "axi_m_sticky"); 695 "axi_m_sticky");
@@ -699,9 +709,11 @@ static int qcom_pcie_get_resources_2_4_0(struct qcom_pcie *pcie)
699 if (IS_ERR(res->ahb_reset)) 709 if (IS_ERR(res->ahb_reset))
700 return PTR_ERR(res->ahb_reset); 710 return PTR_ERR(res->ahb_reset);
701 711
702 res->phy_ahb_reset = devm_reset_control_get_exclusive(dev, "phy_ahb"); 712 if (is_ipq) {
703 if (IS_ERR(res->phy_ahb_reset)) 713 res->phy_ahb_reset = devm_reset_control_get_exclusive(dev, "phy_ahb");
704 return PTR_ERR(res->phy_ahb_reset); 714 if (IS_ERR(res->phy_ahb_reset))
715 return PTR_ERR(res->phy_ahb_reset);
716 }
705 717
706 return 0; 718 return 0;
707} 719}
@@ -719,9 +731,7 @@ static void qcom_pcie_deinit_2_4_0(struct qcom_pcie *pcie)
719 reset_control_assert(res->axi_m_sticky_reset); 731 reset_control_assert(res->axi_m_sticky_reset);
720 reset_control_assert(res->pwr_reset); 732 reset_control_assert(res->pwr_reset);
721 reset_control_assert(res->ahb_reset); 733 reset_control_assert(res->ahb_reset);
722 clk_disable_unprepare(res->aux_clk); 734 clk_bulk_disable_unprepare(res->num_clks, res->clks);
723 clk_disable_unprepare(res->master_clk);
724 clk_disable_unprepare(res->slave_clk);
725} 735}
726 736
727static int qcom_pcie_init_2_4_0(struct qcom_pcie *pcie) 737static int qcom_pcie_init_2_4_0(struct qcom_pcie *pcie)
@@ -850,23 +860,9 @@ static int qcom_pcie_init_2_4_0(struct qcom_pcie *pcie)
850 860
851 usleep_range(10000, 12000); 861 usleep_range(10000, 12000);
852 862
853 ret = clk_prepare_enable(res->aux_clk); 863 ret = clk_bulk_prepare_enable(res->num_clks, res->clks);
854 if (ret) { 864 if (ret)
855 dev_err(dev, "cannot prepare/enable iface clock\n"); 865 goto err_clks;
856 goto err_clk_aux;
857 }
858
859 ret = clk_prepare_enable(res->master_clk);
860 if (ret) {
861 dev_err(dev, "cannot prepare/enable core clock\n");
862 goto err_clk_axi_m;
863 }
864
865 ret = clk_prepare_enable(res->slave_clk);
866 if (ret) {
867 dev_err(dev, "cannot prepare/enable phy clock\n");
868 goto err_clk_axi_s;
869 }
870 866
871 /* enable PCIe clocks and resets */ 867 /* enable PCIe clocks and resets */
872 val = readl(pcie->parf + PCIE20_PARF_PHY_CTRL); 868 val = readl(pcie->parf + PCIE20_PARF_PHY_CTRL);
@@ -891,11 +887,7 @@ static int qcom_pcie_init_2_4_0(struct qcom_pcie *pcie)
891 887
892 return 0; 888 return 0;
893 889
894err_clk_axi_s: 890err_clks:
895 clk_disable_unprepare(res->master_clk);
896err_clk_axi_m:
897 clk_disable_unprepare(res->aux_clk);
898err_clk_aux:
899 reset_control_assert(res->ahb_reset); 891 reset_control_assert(res->ahb_reset);
900err_rst_ahb: 892err_rst_ahb:
901 reset_control_assert(res->pwr_reset); 893 reset_control_assert(res->pwr_reset);
@@ -1289,6 +1281,7 @@ static const struct of_device_id qcom_pcie_match[] = {
1289 { .compatible = "qcom,pcie-msm8996", .data = &ops_2_3_2 }, 1281 { .compatible = "qcom,pcie-msm8996", .data = &ops_2_3_2 },
1290 { .compatible = "qcom,pcie-ipq8074", .data = &ops_2_3_3 }, 1282 { .compatible = "qcom,pcie-ipq8074", .data = &ops_2_3_3 },
1291 { .compatible = "qcom,pcie-ipq4019", .data = &ops_2_4_0 }, 1283 { .compatible = "qcom,pcie-ipq4019", .data = &ops_2_4_0 },
1284 { .compatible = "qcom,pcie-qcs404", .data = &ops_2_4_0 },
1292 { } 1285 { }
1293}; 1286};
1294 1287
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index 134e0306ff00..fc0fe4d4de49 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -308,7 +308,7 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie)
308 308
309 advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_MASK_REG); 309 advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_MASK_REG);
310 310
311 /* Unmask all MSI's */ 311 /* Unmask all MSIs */
312 advk_writel(pcie, 0, PCIE_MSI_MASK_REG); 312 advk_writel(pcie, 0, PCIE_MSI_MASK_REG);
313 313
314 /* Enable summary interrupt for GIC SPI source */ 314 /* Enable summary interrupt for GIC SPI source */
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 82acd6155adf..40b625458afa 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -1875,6 +1875,7 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
1875static void hv_eject_device_work(struct work_struct *work) 1875static void hv_eject_device_work(struct work_struct *work)
1876{ 1876{
1877 struct pci_eject_response *ejct_pkt; 1877 struct pci_eject_response *ejct_pkt;
1878 struct hv_pcibus_device *hbus;
1878 struct hv_pci_dev *hpdev; 1879 struct hv_pci_dev *hpdev;
1879 struct pci_dev *pdev; 1880 struct pci_dev *pdev;
1880 unsigned long flags; 1881 unsigned long flags;
@@ -1885,6 +1886,7 @@ static void hv_eject_device_work(struct work_struct *work)
1885 } ctxt; 1886 } ctxt;
1886 1887
1887 hpdev = container_of(work, struct hv_pci_dev, wrk); 1888 hpdev = container_of(work, struct hv_pci_dev, wrk);
1889 hbus = hpdev->hbus;
1888 1890
1889 WARN_ON(hpdev->state != hv_pcichild_ejecting); 1891 WARN_ON(hpdev->state != hv_pcichild_ejecting);
1890 1892
@@ -1895,8 +1897,7 @@ static void hv_eject_device_work(struct work_struct *work)
1895 * because hbus->pci_bus may not exist yet. 1897 * because hbus->pci_bus may not exist yet.
1896 */ 1898 */
1897 wslot = wslot_to_devfn(hpdev->desc.win_slot.slot); 1899 wslot = wslot_to_devfn(hpdev->desc.win_slot.slot);
1898 pdev = pci_get_domain_bus_and_slot(hpdev->hbus->sysdata.domain, 0, 1900 pdev = pci_get_domain_bus_and_slot(hbus->sysdata.domain, 0, wslot);
1899 wslot);
1900 if (pdev) { 1901 if (pdev) {
1901 pci_lock_rescan_remove(); 1902 pci_lock_rescan_remove();
1902 pci_stop_and_remove_bus_device(pdev); 1903 pci_stop_and_remove_bus_device(pdev);
@@ -1904,9 +1905,9 @@ static void hv_eject_device_work(struct work_struct *work)
1904 pci_unlock_rescan_remove(); 1905 pci_unlock_rescan_remove();
1905 } 1906 }
1906 1907
1907 spin_lock_irqsave(&hpdev->hbus->device_list_lock, flags); 1908 spin_lock_irqsave(&hbus->device_list_lock, flags);
1908 list_del(&hpdev->list_entry); 1909 list_del(&hpdev->list_entry);
1909 spin_unlock_irqrestore(&hpdev->hbus->device_list_lock, flags); 1910 spin_unlock_irqrestore(&hbus->device_list_lock, flags);
1910 1911
1911 if (hpdev->pci_slot) 1912 if (hpdev->pci_slot)
1912 pci_destroy_slot(hpdev->pci_slot); 1913 pci_destroy_slot(hpdev->pci_slot);
@@ -1915,7 +1916,7 @@ static void hv_eject_device_work(struct work_struct *work)
1915 ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message; 1916 ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
1916 ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE; 1917 ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE;
1917 ejct_pkt->wslot.slot = hpdev->desc.win_slot.slot; 1918 ejct_pkt->wslot.slot = hpdev->desc.win_slot.slot;
1918 vmbus_sendpacket(hpdev->hbus->hdev->channel, ejct_pkt, 1919 vmbus_sendpacket(hbus->hdev->channel, ejct_pkt,
1919 sizeof(*ejct_pkt), (unsigned long)&ctxt.pkt, 1920 sizeof(*ejct_pkt), (unsigned long)&ctxt.pkt,
1920 VM_PKT_DATA_INBAND, 0); 1921 VM_PKT_DATA_INBAND, 0);
1921 1922
@@ -1924,7 +1925,9 @@ static void hv_eject_device_work(struct work_struct *work)
1924 /* For the two refs got in new_pcichild_device() */ 1925 /* For the two refs got in new_pcichild_device() */
1925 put_pcichild(hpdev); 1926 put_pcichild(hpdev);
1926 put_pcichild(hpdev); 1927 put_pcichild(hpdev);
1927 put_hvpcibus(hpdev->hbus); 1928 /* hpdev has been freed. Do not use it any more. */
1929
1930 put_hvpcibus(hbus);
1928} 1931}
1929 1932
1930/** 1933/**
diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
index 464ba2538d52..9a917b2456f6 100644
--- a/drivers/pci/controller/pci-tegra.c
+++ b/drivers/pci/controller/pci-tegra.c
@@ -17,6 +17,7 @@
17#include <linux/debugfs.h> 17#include <linux/debugfs.h>
18#include <linux/delay.h> 18#include <linux/delay.h>
19#include <linux/export.h> 19#include <linux/export.h>
20#include <linux/gpio/consumer.h>
20#include <linux/interrupt.h> 21#include <linux/interrupt.h>
21#include <linux/iopoll.h> 22#include <linux/iopoll.h>
22#include <linux/irq.h> 23#include <linux/irq.h>
@@ -30,6 +31,7 @@
30#include <linux/of_platform.h> 31#include <linux/of_platform.h>
31#include <linux/pci.h> 32#include <linux/pci.h>
32#include <linux/phy/phy.h> 33#include <linux/phy/phy.h>
34#include <linux/pinctrl/consumer.h>
33#include <linux/platform_device.h> 35#include <linux/platform_device.h>
34#include <linux/reset.h> 36#include <linux/reset.h>
35#include <linux/sizes.h> 37#include <linux/sizes.h>
@@ -95,7 +97,8 @@
95#define AFI_MSI_EN_VEC7 0xa8 97#define AFI_MSI_EN_VEC7 0xa8
96 98
97#define AFI_CONFIGURATION 0xac 99#define AFI_CONFIGURATION 0xac
98#define AFI_CONFIGURATION_EN_FPCI (1 << 0) 100#define AFI_CONFIGURATION_EN_FPCI (1 << 0)
101#define AFI_CONFIGURATION_CLKEN_OVERRIDE (1 << 31)
99 102
100#define AFI_FPCI_ERROR_MASKS 0xb0 103#define AFI_FPCI_ERROR_MASKS 0xb0
101 104
@@ -159,13 +162,14 @@
159#define AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_211 (0x1 << 20) 162#define AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_211 (0x1 << 20)
160#define AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_411 (0x2 << 20) 163#define AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_411 (0x2 << 20)
161#define AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_111 (0x2 << 20) 164#define AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_111 (0x2 << 20)
165#define AFI_PCIE_CONFIG_PCIE_CLKREQ_GPIO(x) (1 << ((x) + 29))
166#define AFI_PCIE_CONFIG_PCIE_CLKREQ_GPIO_ALL (0x7 << 29)
162 167
163#define AFI_FUSE 0x104 168#define AFI_FUSE 0x104
164#define AFI_FUSE_PCIE_T0_GEN2_DIS (1 << 2) 169#define AFI_FUSE_PCIE_T0_GEN2_DIS (1 << 2)
165 170
166#define AFI_PEX0_CTRL 0x110 171#define AFI_PEX0_CTRL 0x110
167#define AFI_PEX1_CTRL 0x118 172#define AFI_PEX1_CTRL 0x118
168#define AFI_PEX2_CTRL 0x128
169#define AFI_PEX_CTRL_RST (1 << 0) 173#define AFI_PEX_CTRL_RST (1 << 0)
170#define AFI_PEX_CTRL_CLKREQ_EN (1 << 1) 174#define AFI_PEX_CTRL_CLKREQ_EN (1 << 1)
171#define AFI_PEX_CTRL_REFCLK_EN (1 << 3) 175#define AFI_PEX_CTRL_REFCLK_EN (1 << 3)
@@ -177,20 +181,74 @@
177 181
178#define AFI_PEXBIAS_CTRL_0 0x168 182#define AFI_PEXBIAS_CTRL_0 0x168
179 183
184#define RP_PRIV_XP_DL 0x00000494
185#define RP_PRIV_XP_DL_GEN2_UPD_FC_TSHOLD (0x1ff << 1)
186
187#define RP_RX_HDR_LIMIT 0x00000e00
188#define RP_RX_HDR_LIMIT_PW_MASK (0xff << 8)
189#define RP_RX_HDR_LIMIT_PW (0x0e << 8)
190
191#define RP_ECTL_2_R1 0x00000e84
192#define RP_ECTL_2_R1_RX_CTLE_1C_MASK 0xffff
193
194#define RP_ECTL_4_R1 0x00000e8c
195#define RP_ECTL_4_R1_RX_CDR_CTRL_1C_MASK (0xffff << 16)
196#define RP_ECTL_4_R1_RX_CDR_CTRL_1C_SHIFT 16
197
198#define RP_ECTL_5_R1 0x00000e90
199#define RP_ECTL_5_R1_RX_EQ_CTRL_L_1C_MASK 0xffffffff
200
201#define RP_ECTL_6_R1 0x00000e94
202#define RP_ECTL_6_R1_RX_EQ_CTRL_H_1C_MASK 0xffffffff
203
204#define RP_ECTL_2_R2 0x00000ea4
205#define RP_ECTL_2_R2_RX_CTLE_1C_MASK 0xffff
206
207#define RP_ECTL_4_R2 0x00000eac
208#define RP_ECTL_4_R2_RX_CDR_CTRL_1C_MASK (0xffff << 16)
209#define RP_ECTL_4_R2_RX_CDR_CTRL_1C_SHIFT 16
210
211#define RP_ECTL_5_R2 0x00000eb0
212#define RP_ECTL_5_R2_RX_EQ_CTRL_L_1C_MASK 0xffffffff
213
214#define RP_ECTL_6_R2 0x00000eb4
215#define RP_ECTL_6_R2_RX_EQ_CTRL_H_1C_MASK 0xffffffff
216
180#define RP_VEND_XP 0x00000f00 217#define RP_VEND_XP 0x00000f00
181#define RP_VEND_XP_DL_UP (1 << 30) 218#define RP_VEND_XP_DL_UP (1 << 30)
219#define RP_VEND_XP_OPPORTUNISTIC_ACK (1 << 27)
220#define RP_VEND_XP_OPPORTUNISTIC_UPDATEFC (1 << 28)
221#define RP_VEND_XP_UPDATE_FC_THRESHOLD_MASK (0xff << 18)
222
223#define RP_VEND_CTL0 0x00000f44
224#define RP_VEND_CTL0_DSK_RST_PULSE_WIDTH_MASK (0xf << 12)
225#define RP_VEND_CTL0_DSK_RST_PULSE_WIDTH (0x9 << 12)
226
227#define RP_VEND_CTL1 0x00000f48
228#define RP_VEND_CTL1_ERPT (1 << 13)
229
230#define RP_VEND_XP_BIST 0x00000f4c
231#define RP_VEND_XP_BIST_GOTO_L1_L2_AFTER_DLLP_DONE (1 << 28)
182 232
183#define RP_VEND_CTL2 0x00000fa8 233#define RP_VEND_CTL2 0x00000fa8
184#define RP_VEND_CTL2_PCA_ENABLE (1 << 7) 234#define RP_VEND_CTL2_PCA_ENABLE (1 << 7)
185 235
186#define RP_PRIV_MISC 0x00000fe0 236#define RP_PRIV_MISC 0x00000fe0
187#define RP_PRIV_MISC_PRSNT_MAP_EP_PRSNT (0xe << 0) 237#define RP_PRIV_MISC_PRSNT_MAP_EP_PRSNT (0xe << 0)
188#define RP_PRIV_MISC_PRSNT_MAP_EP_ABSNT (0xf << 0) 238#define RP_PRIV_MISC_PRSNT_MAP_EP_ABSNT (0xf << 0)
239#define RP_PRIV_MISC_CTLR_CLK_CLAMP_THRESHOLD_MASK (0x7f << 16)
240#define RP_PRIV_MISC_CTLR_CLK_CLAMP_THRESHOLD (0xf << 16)
241#define RP_PRIV_MISC_CTLR_CLK_CLAMP_ENABLE (1 << 23)
242#define RP_PRIV_MISC_TMS_CLK_CLAMP_THRESHOLD_MASK (0x7f << 24)
243#define RP_PRIV_MISC_TMS_CLK_CLAMP_THRESHOLD (0xf << 24)
244#define RP_PRIV_MISC_TMS_CLK_CLAMP_ENABLE (1 << 31)
189 245
190#define RP_LINK_CONTROL_STATUS 0x00000090 246#define RP_LINK_CONTROL_STATUS 0x00000090
191#define RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE 0x20000000 247#define RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE 0x20000000
192#define RP_LINK_CONTROL_STATUS_LINKSTAT_MASK 0x3fff0000 248#define RP_LINK_CONTROL_STATUS_LINKSTAT_MASK 0x3fff0000
193 249
250#define RP_LINK_CONTROL_STATUS_2 0x000000b0
251
194#define PADS_CTL_SEL 0x0000009c 252#define PADS_CTL_SEL 0x0000009c
195 253
196#define PADS_CTL 0x000000a0 254#define PADS_CTL 0x000000a0
@@ -226,6 +284,7 @@
226#define PADS_REFCLK_CFG_DRVI_SHIFT 12 /* 15:12 */ 284#define PADS_REFCLK_CFG_DRVI_SHIFT 12 /* 15:12 */
227 285
228#define PME_ACK_TIMEOUT 10000 286#define PME_ACK_TIMEOUT 10000
287#define LINK_RETRAIN_TIMEOUT 100000 /* in usec */
229 288
230struct tegra_msi { 289struct tegra_msi {
231 struct msi_controller chip; 290 struct msi_controller chip;
@@ -249,10 +308,12 @@ struct tegra_pcie_soc {
249 unsigned int num_ports; 308 unsigned int num_ports;
250 const struct tegra_pcie_port_soc *ports; 309 const struct tegra_pcie_port_soc *ports;
251 unsigned int msi_base_shift; 310 unsigned int msi_base_shift;
311 unsigned long afi_pex2_ctrl;
252 u32 pads_pll_ctl; 312 u32 pads_pll_ctl;
253 u32 tx_ref_sel; 313 u32 tx_ref_sel;
254 u32 pads_refclk_cfg0; 314 u32 pads_refclk_cfg0;
255 u32 pads_refclk_cfg1; 315 u32 pads_refclk_cfg1;
316 u32 update_fc_threshold;
256 bool has_pex_clkreq_en; 317 bool has_pex_clkreq_en;
257 bool has_pex_bias_ctrl; 318 bool has_pex_bias_ctrl;
258 bool has_intr_prsnt_sense; 319 bool has_intr_prsnt_sense;
@@ -260,6 +321,24 @@ struct tegra_pcie_soc {
260 bool has_gen2; 321 bool has_gen2;
261 bool force_pca_enable; 322 bool force_pca_enable;
262 bool program_uphy; 323 bool program_uphy;
324 bool update_clamp_threshold;
325 bool program_deskew_time;
326 bool raw_violation_fixup;
327 bool update_fc_timer;
328 bool has_cache_bars;
329 struct {
330 struct {
331 u32 rp_ectl_2_r1;
332 u32 rp_ectl_4_r1;
333 u32 rp_ectl_5_r1;
334 u32 rp_ectl_6_r1;
335 u32 rp_ectl_2_r2;
336 u32 rp_ectl_4_r2;
337 u32 rp_ectl_5_r2;
338 u32 rp_ectl_6_r2;
339 } regs;
340 bool enable;
341 } ectl;
263}; 342};
264 343
265static inline struct tegra_msi *to_tegra_msi(struct msi_controller *chip) 344static inline struct tegra_msi *to_tegra_msi(struct msi_controller *chip)
@@ -321,6 +400,8 @@ struct tegra_pcie_port {
321 unsigned int lanes; 400 unsigned int lanes;
322 401
323 struct phy **phys; 402 struct phy **phys;
403
404 struct gpio_desc *reset_gpio;
324}; 405};
325 406
326struct tegra_pcie_bus { 407struct tegra_pcie_bus {
@@ -440,6 +521,7 @@ static struct pci_ops tegra_pcie_ops = {
440 521
441static unsigned long tegra_pcie_port_get_pex_ctrl(struct tegra_pcie_port *port) 522static unsigned long tegra_pcie_port_get_pex_ctrl(struct tegra_pcie_port *port)
442{ 523{
524 const struct tegra_pcie_soc *soc = port->pcie->soc;
443 unsigned long ret = 0; 525 unsigned long ret = 0;
444 526
445 switch (port->index) { 527 switch (port->index) {
@@ -452,7 +534,7 @@ static unsigned long tegra_pcie_port_get_pex_ctrl(struct tegra_pcie_port *port)
452 break; 534 break;
453 535
454 case 2: 536 case 2:
455 ret = AFI_PEX2_CTRL; 537 ret = soc->afi_pex2_ctrl;
456 break; 538 break;
457 } 539 }
458 540
@@ -465,15 +547,162 @@ static void tegra_pcie_port_reset(struct tegra_pcie_port *port)
465 unsigned long value; 547 unsigned long value;
466 548
467 /* pulse reset signal */ 549 /* pulse reset signal */
468 value = afi_readl(port->pcie, ctrl); 550 if (port->reset_gpio) {
469 value &= ~AFI_PEX_CTRL_RST; 551 gpiod_set_value(port->reset_gpio, 1);
470 afi_writel(port->pcie, value, ctrl); 552 } else {
553 value = afi_readl(port->pcie, ctrl);
554 value &= ~AFI_PEX_CTRL_RST;
555 afi_writel(port->pcie, value, ctrl);
556 }
471 557
472 usleep_range(1000, 2000); 558 usleep_range(1000, 2000);
473 559
474 value = afi_readl(port->pcie, ctrl); 560 if (port->reset_gpio) {
475 value |= AFI_PEX_CTRL_RST; 561 gpiod_set_value(port->reset_gpio, 0);
476 afi_writel(port->pcie, value, ctrl); 562 } else {
563 value = afi_readl(port->pcie, ctrl);
564 value |= AFI_PEX_CTRL_RST;
565 afi_writel(port->pcie, value, ctrl);
566 }
567}
568
569static void tegra_pcie_enable_rp_features(struct tegra_pcie_port *port)
570{
571 const struct tegra_pcie_soc *soc = port->pcie->soc;
572 u32 value;
573
574 /* Enable AER capability */
575 value = readl(port->base + RP_VEND_CTL1);
576 value |= RP_VEND_CTL1_ERPT;
577 writel(value, port->base + RP_VEND_CTL1);
578
579 /* Optimal settings to enhance bandwidth */
580 value = readl(port->base + RP_VEND_XP);
581 value |= RP_VEND_XP_OPPORTUNISTIC_ACK;
582 value |= RP_VEND_XP_OPPORTUNISTIC_UPDATEFC;
583 writel(value, port->base + RP_VEND_XP);
584
585 /*
586 * LTSSM will wait for DLLP to finish before entering L1 or L2,
587 * to avoid truncation of PM messages which results in receiver errors
588 */
589 value = readl(port->base + RP_VEND_XP_BIST);
590 value |= RP_VEND_XP_BIST_GOTO_L1_L2_AFTER_DLLP_DONE;
591 writel(value, port->base + RP_VEND_XP_BIST);
592
593 value = readl(port->base + RP_PRIV_MISC);
594 value |= RP_PRIV_MISC_CTLR_CLK_CLAMP_ENABLE;
595 value |= RP_PRIV_MISC_TMS_CLK_CLAMP_ENABLE;
596
597 if (soc->update_clamp_threshold) {
598 value &= ~(RP_PRIV_MISC_CTLR_CLK_CLAMP_THRESHOLD_MASK |
599 RP_PRIV_MISC_TMS_CLK_CLAMP_THRESHOLD_MASK);
600 value |= RP_PRIV_MISC_CTLR_CLK_CLAMP_THRESHOLD |
601 RP_PRIV_MISC_TMS_CLK_CLAMP_THRESHOLD;
602 }
603
604 writel(value, port->base + RP_PRIV_MISC);
605}
606
607static void tegra_pcie_program_ectl_settings(struct tegra_pcie_port *port)
608{
609 const struct tegra_pcie_soc *soc = port->pcie->soc;
610 u32 value;
611
612 value = readl(port->base + RP_ECTL_2_R1);
613 value &= ~RP_ECTL_2_R1_RX_CTLE_1C_MASK;
614 value |= soc->ectl.regs.rp_ectl_2_r1;
615 writel(value, port->base + RP_ECTL_2_R1);
616
617 value = readl(port->base + RP_ECTL_4_R1);
618 value &= ~RP_ECTL_4_R1_RX_CDR_CTRL_1C_MASK;
619 value |= soc->ectl.regs.rp_ectl_4_r1 <<
620 RP_ECTL_4_R1_RX_CDR_CTRL_1C_SHIFT;
621 writel(value, port->base + RP_ECTL_4_R1);
622
623 value = readl(port->base + RP_ECTL_5_R1);
624 value &= ~RP_ECTL_5_R1_RX_EQ_CTRL_L_1C_MASK;
625 value |= soc->ectl.regs.rp_ectl_5_r1;
626 writel(value, port->base + RP_ECTL_5_R1);
627
628 value = readl(port->base + RP_ECTL_6_R1);
629 value &= ~RP_ECTL_6_R1_RX_EQ_CTRL_H_1C_MASK;
630 value |= soc->ectl.regs.rp_ectl_6_r1;
631 writel(value, port->base + RP_ECTL_6_R1);
632
633 value = readl(port->base + RP_ECTL_2_R2);
634 value &= ~RP_ECTL_2_R2_RX_CTLE_1C_MASK;
635 value |= soc->ectl.regs.rp_ectl_2_r2;
636 writel(value, port->base + RP_ECTL_2_R2);
637
638 value = readl(port->base + RP_ECTL_4_R2);
639 value &= ~RP_ECTL_4_R2_RX_CDR_CTRL_1C_MASK;
640 value |= soc->ectl.regs.rp_ectl_4_r2 <<
641 RP_ECTL_4_R2_RX_CDR_CTRL_1C_SHIFT;
642 writel(value, port->base + RP_ECTL_4_R2);
643
644 value = readl(port->base + RP_ECTL_5_R2);
645 value &= ~RP_ECTL_5_R2_RX_EQ_CTRL_L_1C_MASK;
646 value |= soc->ectl.regs.rp_ectl_5_r2;
647 writel(value, port->base + RP_ECTL_5_R2);
648
649 value = readl(port->base + RP_ECTL_6_R2);
650 value &= ~RP_ECTL_6_R2_RX_EQ_CTRL_H_1C_MASK;
651 value |= soc->ectl.regs.rp_ectl_6_r2;
652 writel(value, port->base + RP_ECTL_6_R2);
653}
654
655static void tegra_pcie_apply_sw_fixup(struct tegra_pcie_port *port)
656{
657 const struct tegra_pcie_soc *soc = port->pcie->soc;
658 u32 value;
659
660 /*
661 * Sometimes link speed change from Gen2 to Gen1 fails due to
662 * instability in deskew logic on lane-0. Increase the deskew
663 * retry time to resolve this issue.
664 */
665 if (soc->program_deskew_time) {
666 value = readl(port->base + RP_VEND_CTL0);
667 value &= ~RP_VEND_CTL0_DSK_RST_PULSE_WIDTH_MASK;
668 value |= RP_VEND_CTL0_DSK_RST_PULSE_WIDTH;
669 writel(value, port->base + RP_VEND_CTL0);
670 }
671
672 /* Fixup for read after write violation. */
673 if (soc->raw_violation_fixup) {
674 value = readl(port->base + RP_RX_HDR_LIMIT);
675 value &= ~RP_RX_HDR_LIMIT_PW_MASK;
676 value |= RP_RX_HDR_LIMIT_PW;
677 writel(value, port->base + RP_RX_HDR_LIMIT);
678
679 value = readl(port->base + RP_PRIV_XP_DL);
680 value |= RP_PRIV_XP_DL_GEN2_UPD_FC_TSHOLD;
681 writel(value, port->base + RP_PRIV_XP_DL);
682
683 value = readl(port->base + RP_VEND_XP);
684 value &= ~RP_VEND_XP_UPDATE_FC_THRESHOLD_MASK;
685 value |= soc->update_fc_threshold;
686 writel(value, port->base + RP_VEND_XP);
687 }
688
689 if (soc->update_fc_timer) {
690 value = readl(port->base + RP_VEND_XP);
691 value &= ~RP_VEND_XP_UPDATE_FC_THRESHOLD_MASK;
692 value |= soc->update_fc_threshold;
693 writel(value, port->base + RP_VEND_XP);
694 }
695
696 /*
697 * PCIe link doesn't come up with few legacy PCIe endpoints if
698 * root port advertises both Gen-1 and Gen-2 speeds in Tegra.
699 * Hence, the strategy followed here is to initially advertise
700 * only Gen-1 and after link is up, retrain link to Gen-2 speed
701 */
702 value = readl(port->base + RP_LINK_CONTROL_STATUS_2);
703 value &= ~PCI_EXP_LNKSTA_CLS;
704 value |= PCI_EXP_LNKSTA_CLS_2_5GB;
705 writel(value, port->base + RP_LINK_CONTROL_STATUS_2);
477} 706}
478 707
479static void tegra_pcie_port_enable(struct tegra_pcie_port *port) 708static void tegra_pcie_port_enable(struct tegra_pcie_port *port)
@@ -500,6 +729,13 @@ static void tegra_pcie_port_enable(struct tegra_pcie_port *port)
500 value |= RP_VEND_CTL2_PCA_ENABLE; 729 value |= RP_VEND_CTL2_PCA_ENABLE;
501 writel(value, port->base + RP_VEND_CTL2); 730 writel(value, port->base + RP_VEND_CTL2);
502 } 731 }
732
733 tegra_pcie_enable_rp_features(port);
734
735 if (soc->ectl.enable)
736 tegra_pcie_program_ectl_settings(port);
737
738 tegra_pcie_apply_sw_fixup(port);
503} 739}
504 740
505static void tegra_pcie_port_disable(struct tegra_pcie_port *port) 741static void tegra_pcie_port_disable(struct tegra_pcie_port *port)
@@ -521,6 +757,12 @@ static void tegra_pcie_port_disable(struct tegra_pcie_port *port)
521 757
522 value &= ~AFI_PEX_CTRL_REFCLK_EN; 758 value &= ~AFI_PEX_CTRL_REFCLK_EN;
523 afi_writel(port->pcie, value, ctrl); 759 afi_writel(port->pcie, value, ctrl);
760
761 /* disable PCIe port and set CLKREQ# as GPIO to allow PLLE power down */
762 value = afi_readl(port->pcie, AFI_PCIE_CONFIG);
763 value |= AFI_PCIE_CONFIG_PCIE_DISABLE(port->index);
764 value |= AFI_PCIE_CONFIG_PCIE_CLKREQ_GPIO(port->index);
765 afi_writel(port->pcie, value, AFI_PCIE_CONFIG);
524} 766}
525 767
526static void tegra_pcie_port_free(struct tegra_pcie_port *port) 768static void tegra_pcie_port_free(struct tegra_pcie_port *port)
@@ -545,12 +787,15 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf1, tegra_pcie_fixup_class);
545DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1c, tegra_pcie_fixup_class); 787DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1c, tegra_pcie_fixup_class);
546DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1d, tegra_pcie_fixup_class); 788DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1d, tegra_pcie_fixup_class);
547 789
548/* Tegra PCIE requires relaxed ordering */ 790/* Tegra20 and Tegra30 PCIE requires relaxed ordering */
549static void tegra_pcie_relax_enable(struct pci_dev *dev) 791static void tegra_pcie_relax_enable(struct pci_dev *dev)
550{ 792{
551 pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN); 793 pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN);
552} 794}
553DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, tegra_pcie_relax_enable); 795DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0bf0, tegra_pcie_relax_enable);
796DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0bf1, tegra_pcie_relax_enable);
797DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0e1c, tegra_pcie_relax_enable);
798DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0e1d, tegra_pcie_relax_enable);
554 799
555static int tegra_pcie_request_resources(struct tegra_pcie *pcie) 800static int tegra_pcie_request_resources(struct tegra_pcie *pcie)
556{ 801{
@@ -635,7 +880,7 @@ static irqreturn_t tegra_pcie_isr(int irq, void *arg)
635 * do not pollute kernel log with master abort reports since they 880 * do not pollute kernel log with master abort reports since they
636 * happen a lot during enumeration 881 * happen a lot during enumeration
637 */ 882 */
638 if (code == AFI_INTR_MASTER_ABORT) 883 if (code == AFI_INTR_MASTER_ABORT || code == AFI_INTR_PE_PRSNT_SENSE)
639 dev_dbg(dev, "%s, signature: %08x\n", err_msg[code], signature); 884 dev_dbg(dev, "%s, signature: %08x\n", err_msg[code], signature);
640 else 885 else
641 dev_err(dev, "%s, signature: %08x\n", err_msg[code], signature); 886 dev_err(dev, "%s, signature: %08x\n", err_msg[code], signature);
@@ -704,11 +949,13 @@ static void tegra_pcie_setup_translations(struct tegra_pcie *pcie)
704 afi_writel(pcie, 0, AFI_AXI_BAR5_SZ); 949 afi_writel(pcie, 0, AFI_AXI_BAR5_SZ);
705 afi_writel(pcie, 0, AFI_FPCI_BAR5); 950 afi_writel(pcie, 0, AFI_FPCI_BAR5);
706 951
707 /* map all upstream transactions as uncached */ 952 if (pcie->soc->has_cache_bars) {
708 afi_writel(pcie, 0, AFI_CACHE_BAR0_ST); 953 /* map all upstream transactions as uncached */
709 afi_writel(pcie, 0, AFI_CACHE_BAR0_SZ); 954 afi_writel(pcie, 0, AFI_CACHE_BAR0_ST);
710 afi_writel(pcie, 0, AFI_CACHE_BAR1_ST); 955 afi_writel(pcie, 0, AFI_CACHE_BAR0_SZ);
711 afi_writel(pcie, 0, AFI_CACHE_BAR1_SZ); 956 afi_writel(pcie, 0, AFI_CACHE_BAR1_ST);
957 afi_writel(pcie, 0, AFI_CACHE_BAR1_SZ);
958 }
712 959
713 /* MSI translations are setup only when needed */ 960 /* MSI translations are setup only when needed */
714 afi_writel(pcie, 0, AFI_MSI_FPCI_BAR_ST); 961 afi_writel(pcie, 0, AFI_MSI_FPCI_BAR_ST);
@@ -852,7 +1099,6 @@ static int tegra_pcie_port_phy_power_off(struct tegra_pcie_port *port)
852static int tegra_pcie_phy_power_on(struct tegra_pcie *pcie) 1099static int tegra_pcie_phy_power_on(struct tegra_pcie *pcie)
853{ 1100{
854 struct device *dev = pcie->dev; 1101 struct device *dev = pcie->dev;
855 const struct tegra_pcie_soc *soc = pcie->soc;
856 struct tegra_pcie_port *port; 1102 struct tegra_pcie_port *port;
857 int err; 1103 int err;
858 1104
@@ -878,12 +1124,6 @@ static int tegra_pcie_phy_power_on(struct tegra_pcie *pcie)
878 } 1124 }
879 } 1125 }
880 1126
881 /* Configure the reference clock driver */
882 pads_writel(pcie, soc->pads_refclk_cfg0, PADS_REFCLK_CFG0);
883
884 if (soc->num_ports > 2)
885 pads_writel(pcie, soc->pads_refclk_cfg1, PADS_REFCLK_CFG1);
886
887 return 0; 1127 return 0;
888} 1128}
889 1129
@@ -918,13 +1158,11 @@ static int tegra_pcie_phy_power_off(struct tegra_pcie *pcie)
918 return 0; 1158 return 0;
919} 1159}
920 1160
921static int tegra_pcie_enable_controller(struct tegra_pcie *pcie) 1161static void tegra_pcie_enable_controller(struct tegra_pcie *pcie)
922{ 1162{
923 struct device *dev = pcie->dev;
924 const struct tegra_pcie_soc *soc = pcie->soc; 1163 const struct tegra_pcie_soc *soc = pcie->soc;
925 struct tegra_pcie_port *port; 1164 struct tegra_pcie_port *port;
926 unsigned long value; 1165 unsigned long value;
927 int err;
928 1166
929 /* enable PLL power down */ 1167 /* enable PLL power down */
930 if (pcie->phy) { 1168 if (pcie->phy) {
@@ -942,9 +1180,12 @@ static int tegra_pcie_enable_controller(struct tegra_pcie *pcie)
942 value = afi_readl(pcie, AFI_PCIE_CONFIG); 1180 value = afi_readl(pcie, AFI_PCIE_CONFIG);
943 value &= ~AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_MASK; 1181 value &= ~AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_MASK;
944 value |= AFI_PCIE_CONFIG_PCIE_DISABLE_ALL | pcie->xbar_config; 1182 value |= AFI_PCIE_CONFIG_PCIE_DISABLE_ALL | pcie->xbar_config;
1183 value |= AFI_PCIE_CONFIG_PCIE_CLKREQ_GPIO_ALL;
945 1184
946 list_for_each_entry(port, &pcie->ports, list) 1185 list_for_each_entry(port, &pcie->ports, list) {
947 value &= ~AFI_PCIE_CONFIG_PCIE_DISABLE(port->index); 1186 value &= ~AFI_PCIE_CONFIG_PCIE_DISABLE(port->index);
1187 value &= ~AFI_PCIE_CONFIG_PCIE_CLKREQ_GPIO(port->index);
1188 }
948 1189
949 afi_writel(pcie, value, AFI_PCIE_CONFIG); 1190 afi_writel(pcie, value, AFI_PCIE_CONFIG);
950 1191
@@ -958,20 +1199,10 @@ static int tegra_pcie_enable_controller(struct tegra_pcie *pcie)
958 afi_writel(pcie, value, AFI_FUSE); 1199 afi_writel(pcie, value, AFI_FUSE);
959 } 1200 }
960 1201
961 if (soc->program_uphy) { 1202 /* Disable AFI dynamic clock gating and enable PCIe */
962 err = tegra_pcie_phy_power_on(pcie);
963 if (err < 0) {
964 dev_err(dev, "failed to power on PHY(s): %d\n", err);
965 return err;
966 }
967 }
968
969 /* take the PCIe interface module out of reset */
970 reset_control_deassert(pcie->pcie_xrst);
971
972 /* finally enable PCIe */
973 value = afi_readl(pcie, AFI_CONFIGURATION); 1203 value = afi_readl(pcie, AFI_CONFIGURATION);
974 value |= AFI_CONFIGURATION_EN_FPCI; 1204 value |= AFI_CONFIGURATION_EN_FPCI;
1205 value |= AFI_CONFIGURATION_CLKEN_OVERRIDE;
975 afi_writel(pcie, value, AFI_CONFIGURATION); 1206 afi_writel(pcie, value, AFI_CONFIGURATION);
976 1207
977 value = AFI_INTR_EN_INI_SLVERR | AFI_INTR_EN_INI_DECERR | 1208 value = AFI_INTR_EN_INI_SLVERR | AFI_INTR_EN_INI_DECERR |
@@ -989,22 +1220,6 @@ static int tegra_pcie_enable_controller(struct tegra_pcie *pcie)
989 1220
990 /* disable all exceptions */ 1221 /* disable all exceptions */
991 afi_writel(pcie, 0, AFI_FPCI_ERROR_MASKS); 1222 afi_writel(pcie, 0, AFI_FPCI_ERROR_MASKS);
992
993 return 0;
994}
995
996static void tegra_pcie_disable_controller(struct tegra_pcie *pcie)
997{
998 int err;
999
1000 reset_control_assert(pcie->pcie_xrst);
1001
1002 if (pcie->soc->program_uphy) {
1003 err = tegra_pcie_phy_power_off(pcie);
1004 if (err < 0)
1005 dev_err(pcie->dev, "failed to power off PHY(s): %d\n",
1006 err);
1007 }
1008} 1223}
1009 1224
1010static void tegra_pcie_power_off(struct tegra_pcie *pcie) 1225static void tegra_pcie_power_off(struct tegra_pcie *pcie)
@@ -1014,13 +1229,11 @@ static void tegra_pcie_power_off(struct tegra_pcie *pcie)
1014 int err; 1229 int err;
1015 1230
1016 reset_control_assert(pcie->afi_rst); 1231 reset_control_assert(pcie->afi_rst);
1017 reset_control_assert(pcie->pex_rst);
1018 1232
1019 clk_disable_unprepare(pcie->pll_e); 1233 clk_disable_unprepare(pcie->pll_e);
1020 if (soc->has_cml_clk) 1234 if (soc->has_cml_clk)
1021 clk_disable_unprepare(pcie->cml_clk); 1235 clk_disable_unprepare(pcie->cml_clk);
1022 clk_disable_unprepare(pcie->afi_clk); 1236 clk_disable_unprepare(pcie->afi_clk);
1023 clk_disable_unprepare(pcie->pex_clk);
1024 1237
1025 if (!dev->pm_domain) 1238 if (!dev->pm_domain)
1026 tegra_powergate_power_off(TEGRA_POWERGATE_PCIE); 1239 tegra_powergate_power_off(TEGRA_POWERGATE_PCIE);
@@ -1048,46 +1261,66 @@ static int tegra_pcie_power_on(struct tegra_pcie *pcie)
1048 if (err < 0) 1261 if (err < 0)
1049 dev_err(dev, "failed to enable regulators: %d\n", err); 1262 dev_err(dev, "failed to enable regulators: %d\n", err);
1050 1263
1051 if (dev->pm_domain) { 1264 if (!dev->pm_domain) {
1052 err = clk_prepare_enable(pcie->pex_clk); 1265 err = tegra_powergate_power_on(TEGRA_POWERGATE_PCIE);
1053 if (err) { 1266 if (err) {
1054 dev_err(dev, "failed to enable PEX clock: %d\n", err); 1267 dev_err(dev, "failed to power ungate: %d\n", err);
1055 return err; 1268 goto regulator_disable;
1056 } 1269 }
1057 reset_control_deassert(pcie->pex_rst); 1270 err = tegra_powergate_remove_clamping(TEGRA_POWERGATE_PCIE);
1058 } else {
1059 err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_PCIE,
1060 pcie->pex_clk,
1061 pcie->pex_rst);
1062 if (err) { 1271 if (err) {
1063 dev_err(dev, "powerup sequence failed: %d\n", err); 1272 dev_err(dev, "failed to remove clamp: %d\n", err);
1064 return err; 1273 goto powergate;
1065 } 1274 }
1066 } 1275 }
1067 1276
1068 reset_control_deassert(pcie->afi_rst);
1069
1070 err = clk_prepare_enable(pcie->afi_clk); 1277 err = clk_prepare_enable(pcie->afi_clk);
1071 if (err < 0) { 1278 if (err < 0) {
1072 dev_err(dev, "failed to enable AFI clock: %d\n", err); 1279 dev_err(dev, "failed to enable AFI clock: %d\n", err);
1073 return err; 1280 goto powergate;
1074 } 1281 }
1075 1282
1076 if (soc->has_cml_clk) { 1283 if (soc->has_cml_clk) {
1077 err = clk_prepare_enable(pcie->cml_clk); 1284 err = clk_prepare_enable(pcie->cml_clk);
1078 if (err < 0) { 1285 if (err < 0) {
1079 dev_err(dev, "failed to enable CML clock: %d\n", err); 1286 dev_err(dev, "failed to enable CML clock: %d\n", err);
1080 return err; 1287 goto disable_afi_clk;
1081 } 1288 }
1082 } 1289 }
1083 1290
1084 err = clk_prepare_enable(pcie->pll_e); 1291 err = clk_prepare_enable(pcie->pll_e);
1085 if (err < 0) { 1292 if (err < 0) {
1086 dev_err(dev, "failed to enable PLLE clock: %d\n", err); 1293 dev_err(dev, "failed to enable PLLE clock: %d\n", err);
1087 return err; 1294 goto disable_cml_clk;
1088 } 1295 }
1089 1296
1297 reset_control_deassert(pcie->afi_rst);
1298
1090 return 0; 1299 return 0;
1300
1301disable_cml_clk:
1302 if (soc->has_cml_clk)
1303 clk_disable_unprepare(pcie->cml_clk);
1304disable_afi_clk:
1305 clk_disable_unprepare(pcie->afi_clk);
1306powergate:
1307 if (!dev->pm_domain)
1308 tegra_powergate_power_off(TEGRA_POWERGATE_PCIE);
1309regulator_disable:
1310 regulator_bulk_disable(pcie->num_supplies, pcie->supplies);
1311
1312 return err;
1313}
1314
1315static void tegra_pcie_apply_pad_settings(struct tegra_pcie *pcie)
1316{
1317 const struct tegra_pcie_soc *soc = pcie->soc;
1318
1319 /* Configure the reference clock driver */
1320 pads_writel(pcie, soc->pads_refclk_cfg0, PADS_REFCLK_CFG0);
1321
1322 if (soc->num_ports > 2)
1323 pads_writel(pcie, soc->pads_refclk_cfg1, PADS_REFCLK_CFG1);
1091} 1324}
1092 1325
1093static int tegra_pcie_clocks_get(struct tegra_pcie *pcie) 1326static int tegra_pcie_clocks_get(struct tegra_pcie *pcie)
@@ -1647,6 +1880,15 @@ static int tegra_pcie_disable_msi(struct tegra_pcie *pcie)
1647 return 0; 1880 return 0;
1648} 1881}
1649 1882
1883static void tegra_pcie_disable_interrupts(struct tegra_pcie *pcie)
1884{
1885 u32 value;
1886
1887 value = afi_readl(pcie, AFI_INTR_MASK);
1888 value &= ~AFI_INTR_MASK_INT_MASK;
1889 afi_writel(pcie, value, AFI_INTR_MASK);
1890}
1891
1650static int tegra_pcie_get_xbar_config(struct tegra_pcie *pcie, u32 lanes, 1892static int tegra_pcie_get_xbar_config(struct tegra_pcie *pcie, u32 lanes,
1651 u32 *xbar) 1893 u32 *xbar)
1652{ 1894{
@@ -1990,6 +2232,7 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
1990 struct tegra_pcie_port *rp; 2232 struct tegra_pcie_port *rp;
1991 unsigned int index; 2233 unsigned int index;
1992 u32 value; 2234 u32 value;
2235 char *label;
1993 2236
1994 err = of_pci_get_devfn(port); 2237 err = of_pci_get_devfn(port);
1995 if (err < 0) { 2238 if (err < 0) {
@@ -2048,6 +2291,31 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
2048 if (IS_ERR(rp->base)) 2291 if (IS_ERR(rp->base))
2049 return PTR_ERR(rp->base); 2292 return PTR_ERR(rp->base);
2050 2293
2294 label = devm_kasprintf(dev, GFP_KERNEL, "pex-reset-%u", index);
2295 if (!label) {
2296 dev_err(dev, "failed to create reset GPIO label\n");
2297 return -ENOMEM;
2298 }
2299
2300 /*
2301 * Returns -ENOENT if reset-gpios property is not populated
2302 * and in this case fall back to using AFI per port register
2303 * to toggle PERST# SFIO line.
2304 */
2305 rp->reset_gpio = devm_gpiod_get_from_of_node(dev, port,
2306 "reset-gpios", 0,
2307 GPIOD_OUT_LOW,
2308 label);
2309 if (IS_ERR(rp->reset_gpio)) {
2310 if (PTR_ERR(rp->reset_gpio) == -ENOENT) {
2311 rp->reset_gpio = NULL;
2312 } else {
2313 dev_err(dev, "failed to get reset GPIO: %d\n",
2314 err);
2315 return PTR_ERR(rp->reset_gpio);
2316 }
2317 }
2318
2051 list_add_tail(&rp->list, &pcie->ports); 2319 list_add_tail(&rp->list, &pcie->ports);
2052 } 2320 }
2053 2321
@@ -2095,7 +2363,7 @@ static bool tegra_pcie_port_check_link(struct tegra_pcie_port *port)
2095 } while (--timeout); 2363 } while (--timeout);
2096 2364
2097 if (!timeout) { 2365 if (!timeout) {
2098 dev_err(dev, "link %u down, retrying\n", port->index); 2366 dev_dbg(dev, "link %u down, retrying\n", port->index);
2099 goto retry; 2367 goto retry;
2100 } 2368 }
2101 2369
@@ -2117,6 +2385,64 @@ retry:
2117 return false; 2385 return false;
2118} 2386}
2119 2387
2388static void tegra_pcie_change_link_speed(struct tegra_pcie *pcie)
2389{
2390 struct device *dev = pcie->dev;
2391 struct tegra_pcie_port *port;
2392 ktime_t deadline;
2393 u32 value;
2394
2395 list_for_each_entry(port, &pcie->ports, list) {
2396 /*
2397 * "Supported Link Speeds Vector" in "Link Capabilities 2"
2398 * is not supported by Tegra. tegra_pcie_change_link_speed()
2399 * is called only for Tegra chips which support Gen2.
2400 * So there no harm if supported link speed is not verified.
2401 */
2402 value = readl(port->base + RP_LINK_CONTROL_STATUS_2);
2403 value &= ~PCI_EXP_LNKSTA_CLS;
2404 value |= PCI_EXP_LNKSTA_CLS_5_0GB;
2405 writel(value, port->base + RP_LINK_CONTROL_STATUS_2);
2406
2407 /*
2408 * Poll until link comes back from recovery to avoid race
2409 * condition.
2410 */
2411 deadline = ktime_add_us(ktime_get(), LINK_RETRAIN_TIMEOUT);
2412
2413 while (ktime_before(ktime_get(), deadline)) {
2414 value = readl(port->base + RP_LINK_CONTROL_STATUS);
2415 if ((value & PCI_EXP_LNKSTA_LT) == 0)
2416 break;
2417
2418 usleep_range(2000, 3000);
2419 }
2420
2421 if (value & PCI_EXP_LNKSTA_LT)
2422 dev_warn(dev, "PCIe port %u link is in recovery\n",
2423 port->index);
2424
2425 /* Retrain the link */
2426 value = readl(port->base + RP_LINK_CONTROL_STATUS);
2427 value |= PCI_EXP_LNKCTL_RL;
2428 writel(value, port->base + RP_LINK_CONTROL_STATUS);
2429
2430 deadline = ktime_add_us(ktime_get(), LINK_RETRAIN_TIMEOUT);
2431
2432 while (ktime_before(ktime_get(), deadline)) {
2433 value = readl(port->base + RP_LINK_CONTROL_STATUS);
2434 if ((value & PCI_EXP_LNKSTA_LT) == 0)
2435 break;
2436
2437 usleep_range(2000, 3000);
2438 }
2439
2440 if (value & PCI_EXP_LNKSTA_LT)
2441 dev_err(dev, "failed to retrain link of port %u\n",
2442 port->index);
2443 }
2444}
2445
2120static void tegra_pcie_enable_ports(struct tegra_pcie *pcie) 2446static void tegra_pcie_enable_ports(struct tegra_pcie *pcie)
2121{ 2447{
2122 struct device *dev = pcie->dev; 2448 struct device *dev = pcie->dev;
@@ -2127,7 +2453,12 @@ static void tegra_pcie_enable_ports(struct tegra_pcie *pcie)
2127 port->index, port->lanes); 2453 port->index, port->lanes);
2128 2454
2129 tegra_pcie_port_enable(port); 2455 tegra_pcie_port_enable(port);
2456 }
2130 2457
2458 /* Start LTSSM from Tegra side */
2459 reset_control_deassert(pcie->pcie_xrst);
2460
2461 list_for_each_entry_safe(port, tmp, &pcie->ports, list) {
2131 if (tegra_pcie_port_check_link(port)) 2462 if (tegra_pcie_port_check_link(port))
2132 continue; 2463 continue;
2133 2464
@@ -2136,12 +2467,17 @@ static void tegra_pcie_enable_ports(struct tegra_pcie *pcie)
2136 tegra_pcie_port_disable(port); 2467 tegra_pcie_port_disable(port);
2137 tegra_pcie_port_free(port); 2468 tegra_pcie_port_free(port);
2138 } 2469 }
2470
2471 if (pcie->soc->has_gen2)
2472 tegra_pcie_change_link_speed(pcie);
2139} 2473}
2140 2474
2141static void tegra_pcie_disable_ports(struct tegra_pcie *pcie) 2475static void tegra_pcie_disable_ports(struct tegra_pcie *pcie)
2142{ 2476{
2143 struct tegra_pcie_port *port, *tmp; 2477 struct tegra_pcie_port *port, *tmp;
2144 2478
2479 reset_control_assert(pcie->pcie_xrst);
2480
2145 list_for_each_entry_safe(port, tmp, &pcie->ports, list) 2481 list_for_each_entry_safe(port, tmp, &pcie->ports, list)
2146 tegra_pcie_port_disable(port); 2482 tegra_pcie_port_disable(port);
2147} 2483}
@@ -2155,6 +2491,7 @@ static const struct tegra_pcie_soc tegra20_pcie = {
2155 .num_ports = 2, 2491 .num_ports = 2,
2156 .ports = tegra20_pcie_ports, 2492 .ports = tegra20_pcie_ports,
2157 .msi_base_shift = 0, 2493 .msi_base_shift = 0,
2494 .afi_pex2_ctrl = 0x128,
2158 .pads_pll_ctl = PADS_PLL_CTL_TEGRA20, 2495 .pads_pll_ctl = PADS_PLL_CTL_TEGRA20,
2159 .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_DIV10, 2496 .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_DIV10,
2160 .pads_refclk_cfg0 = 0xfa5cfa5c, 2497 .pads_refclk_cfg0 = 0xfa5cfa5c,
@@ -2165,6 +2502,12 @@ static const struct tegra_pcie_soc tegra20_pcie = {
2165 .has_gen2 = false, 2502 .has_gen2 = false,
2166 .force_pca_enable = false, 2503 .force_pca_enable = false,
2167 .program_uphy = true, 2504 .program_uphy = true,
2505 .update_clamp_threshold = false,
2506 .program_deskew_time = false,
2507 .raw_violation_fixup = false,
2508 .update_fc_timer = false,
2509 .has_cache_bars = true,
2510 .ectl.enable = false,
2168}; 2511};
2169 2512
2170static const struct tegra_pcie_port_soc tegra30_pcie_ports[] = { 2513static const struct tegra_pcie_port_soc tegra30_pcie_ports[] = {
@@ -2188,6 +2531,12 @@ static const struct tegra_pcie_soc tegra30_pcie = {
2188 .has_gen2 = false, 2531 .has_gen2 = false,
2189 .force_pca_enable = false, 2532 .force_pca_enable = false,
2190 .program_uphy = true, 2533 .program_uphy = true,
2534 .update_clamp_threshold = false,
2535 .program_deskew_time = false,
2536 .raw_violation_fixup = false,
2537 .update_fc_timer = false,
2538 .has_cache_bars = false,
2539 .ectl.enable = false,
2191}; 2540};
2192 2541
2193static const struct tegra_pcie_soc tegra124_pcie = { 2542static const struct tegra_pcie_soc tegra124_pcie = {
@@ -2197,6 +2546,8 @@ static const struct tegra_pcie_soc tegra124_pcie = {
2197 .pads_pll_ctl = PADS_PLL_CTL_TEGRA30, 2546 .pads_pll_ctl = PADS_PLL_CTL_TEGRA30,
2198 .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN, 2547 .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN,
2199 .pads_refclk_cfg0 = 0x44ac44ac, 2548 .pads_refclk_cfg0 = 0x44ac44ac,
2549 /* FC threshold is bit[25:18] */
2550 .update_fc_threshold = 0x03fc0000,
2200 .has_pex_clkreq_en = true, 2551 .has_pex_clkreq_en = true,
2201 .has_pex_bias_ctrl = true, 2552 .has_pex_bias_ctrl = true,
2202 .has_intr_prsnt_sense = true, 2553 .has_intr_prsnt_sense = true,
@@ -2204,6 +2555,12 @@ static const struct tegra_pcie_soc tegra124_pcie = {
2204 .has_gen2 = true, 2555 .has_gen2 = true,
2205 .force_pca_enable = false, 2556 .force_pca_enable = false,
2206 .program_uphy = true, 2557 .program_uphy = true,
2558 .update_clamp_threshold = true,
2559 .program_deskew_time = false,
2560 .raw_violation_fixup = true,
2561 .update_fc_timer = false,
2562 .has_cache_bars = false,
2563 .ectl.enable = false,
2207}; 2564};
2208 2565
2209static const struct tegra_pcie_soc tegra210_pcie = { 2566static const struct tegra_pcie_soc tegra210_pcie = {
@@ -2213,6 +2570,8 @@ static const struct tegra_pcie_soc tegra210_pcie = {
2213 .pads_pll_ctl = PADS_PLL_CTL_TEGRA30, 2570 .pads_pll_ctl = PADS_PLL_CTL_TEGRA30,
2214 .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN, 2571 .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN,
2215 .pads_refclk_cfg0 = 0x90b890b8, 2572 .pads_refclk_cfg0 = 0x90b890b8,
2573 /* FC threshold is bit[25:18] */
2574 .update_fc_threshold = 0x01800000,
2216 .has_pex_clkreq_en = true, 2575 .has_pex_clkreq_en = true,
2217 .has_pex_bias_ctrl = true, 2576 .has_pex_bias_ctrl = true,
2218 .has_intr_prsnt_sense = true, 2577 .has_intr_prsnt_sense = true,
@@ -2220,6 +2579,24 @@ static const struct tegra_pcie_soc tegra210_pcie = {
2220 .has_gen2 = true, 2579 .has_gen2 = true,
2221 .force_pca_enable = true, 2580 .force_pca_enable = true,
2222 .program_uphy = true, 2581 .program_uphy = true,
2582 .update_clamp_threshold = true,
2583 .program_deskew_time = true,
2584 .raw_violation_fixup = false,
2585 .update_fc_timer = true,
2586 .has_cache_bars = false,
2587 .ectl = {
2588 .regs = {
2589 .rp_ectl_2_r1 = 0x0000000f,
2590 .rp_ectl_4_r1 = 0x00000067,
2591 .rp_ectl_5_r1 = 0x55010000,
2592 .rp_ectl_6_r1 = 0x00000001,
2593 .rp_ectl_2_r2 = 0x0000008f,
2594 .rp_ectl_4_r2 = 0x000000c7,
2595 .rp_ectl_5_r2 = 0x55010000,
2596 .rp_ectl_6_r2 = 0x00000001,
2597 },
2598 .enable = true,
2599 },
2223}; 2600};
2224 2601
2225static const struct tegra_pcie_port_soc tegra186_pcie_ports[] = { 2602static const struct tegra_pcie_port_soc tegra186_pcie_ports[] = {
@@ -2232,6 +2609,7 @@ static const struct tegra_pcie_soc tegra186_pcie = {
2232 .num_ports = 3, 2609 .num_ports = 3,
2233 .ports = tegra186_pcie_ports, 2610 .ports = tegra186_pcie_ports,
2234 .msi_base_shift = 8, 2611 .msi_base_shift = 8,
2612 .afi_pex2_ctrl = 0x19c,
2235 .pads_pll_ctl = PADS_PLL_CTL_TEGRA30, 2613 .pads_pll_ctl = PADS_PLL_CTL_TEGRA30,
2236 .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN, 2614 .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN,
2237 .pads_refclk_cfg0 = 0x80b880b8, 2615 .pads_refclk_cfg0 = 0x80b880b8,
@@ -2243,6 +2621,12 @@ static const struct tegra_pcie_soc tegra186_pcie = {
2243 .has_gen2 = true, 2621 .has_gen2 = true,
2244 .force_pca_enable = false, 2622 .force_pca_enable = false,
2245 .program_uphy = false, 2623 .program_uphy = false,
2624 .update_clamp_threshold = false,
2625 .program_deskew_time = false,
2626 .raw_violation_fixup = false,
2627 .update_fc_timer = false,
2628 .has_cache_bars = false,
2629 .ectl.enable = false,
2246}; 2630};
2247 2631
2248static const struct of_device_id tegra_pcie_of_match[] = { 2632static const struct of_device_id tegra_pcie_of_match[] = {
@@ -2485,16 +2869,32 @@ static int __maybe_unused tegra_pcie_pm_suspend(struct device *dev)
2485{ 2869{
2486 struct tegra_pcie *pcie = dev_get_drvdata(dev); 2870 struct tegra_pcie *pcie = dev_get_drvdata(dev);
2487 struct tegra_pcie_port *port; 2871 struct tegra_pcie_port *port;
2872 int err;
2488 2873
2489 list_for_each_entry(port, &pcie->ports, list) 2874 list_for_each_entry(port, &pcie->ports, list)
2490 tegra_pcie_pme_turnoff(port); 2875 tegra_pcie_pme_turnoff(port);
2491 2876
2492 tegra_pcie_disable_ports(pcie); 2877 tegra_pcie_disable_ports(pcie);
2493 2878
2879 /*
2880 * AFI_INTR is unmasked in tegra_pcie_enable_controller(), mask it to
2881 * avoid unwanted interrupts raised by AFI after pex_rst is asserted.
2882 */
2883 tegra_pcie_disable_interrupts(pcie);
2884
2885 if (pcie->soc->program_uphy) {
2886 err = tegra_pcie_phy_power_off(pcie);
2887 if (err < 0)
2888 dev_err(dev, "failed to power off PHY(s): %d\n", err);
2889 }
2890
2891 reset_control_assert(pcie->pex_rst);
2892 clk_disable_unprepare(pcie->pex_clk);
2893
2494 if (IS_ENABLED(CONFIG_PCI_MSI)) 2894 if (IS_ENABLED(CONFIG_PCI_MSI))
2495 tegra_pcie_disable_msi(pcie); 2895 tegra_pcie_disable_msi(pcie);
2496 2896
2497 tegra_pcie_disable_controller(pcie); 2897 pinctrl_pm_select_idle_state(dev);
2498 tegra_pcie_power_off(pcie); 2898 tegra_pcie_power_off(pcie);
2499 2899
2500 return 0; 2900 return 0;
@@ -2510,20 +2910,45 @@ static int __maybe_unused tegra_pcie_pm_resume(struct device *dev)
2510 dev_err(dev, "tegra pcie power on fail: %d\n", err); 2910 dev_err(dev, "tegra pcie power on fail: %d\n", err);
2511 return err; 2911 return err;
2512 } 2912 }
2513 err = tegra_pcie_enable_controller(pcie); 2913
2514 if (err) { 2914 err = pinctrl_pm_select_default_state(dev);
2515 dev_err(dev, "tegra pcie controller enable fail: %d\n", err); 2915 if (err < 0) {
2916 dev_err(dev, "failed to disable PCIe IO DPD: %d\n", err);
2516 goto poweroff; 2917 goto poweroff;
2517 } 2918 }
2919
2920 tegra_pcie_enable_controller(pcie);
2518 tegra_pcie_setup_translations(pcie); 2921 tegra_pcie_setup_translations(pcie);
2519 2922
2520 if (IS_ENABLED(CONFIG_PCI_MSI)) 2923 if (IS_ENABLED(CONFIG_PCI_MSI))
2521 tegra_pcie_enable_msi(pcie); 2924 tegra_pcie_enable_msi(pcie);
2522 2925
2926 err = clk_prepare_enable(pcie->pex_clk);
2927 if (err) {
2928 dev_err(dev, "failed to enable PEX clock: %d\n", err);
2929 goto pex_dpd_enable;
2930 }
2931
2932 reset_control_deassert(pcie->pex_rst);
2933
2934 if (pcie->soc->program_uphy) {
2935 err = tegra_pcie_phy_power_on(pcie);
2936 if (err < 0) {
2937 dev_err(dev, "failed to power on PHY(s): %d\n", err);
2938 goto disable_pex_clk;
2939 }
2940 }
2941
2942 tegra_pcie_apply_pad_settings(pcie);
2523 tegra_pcie_enable_ports(pcie); 2943 tegra_pcie_enable_ports(pcie);
2524 2944
2525 return 0; 2945 return 0;
2526 2946
2947disable_pex_clk:
2948 reset_control_assert(pcie->pex_rst);
2949 clk_disable_unprepare(pcie->pex_clk);
2950pex_dpd_enable:
2951 pinctrl_pm_select_idle_state(dev);
2527poweroff: 2952poweroff:
2528 tegra_pcie_power_off(pcie); 2953 tegra_pcie_power_off(pcie);
2529 2954
diff --git a/drivers/pci/controller/pcie-altera-msi.c b/drivers/pci/controller/pcie-altera-msi.c
index 025ef7d9a046..16d938920ca5 100644
--- a/drivers/pci/controller/pcie-altera-msi.c
+++ b/drivers/pci/controller/pcie-altera-msi.c
@@ -10,6 +10,7 @@
10#include <linux/interrupt.h> 10#include <linux/interrupt.h>
11#include <linux/irqchip/chained_irq.h> 11#include <linux/irqchip/chained_irq.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/module.h>
13#include <linux/msi.h> 14#include <linux/msi.h>
14#include <linux/of_address.h> 15#include <linux/of_address.h>
15#include <linux/of_irq.h> 16#include <linux/of_irq.h>
@@ -288,4 +289,13 @@ static int __init altera_msi_init(void)
288{ 289{
289 return platform_driver_register(&altera_msi_driver); 290 return platform_driver_register(&altera_msi_driver);
290} 291}
292
293static void __exit altera_msi_exit(void)
294{
295 platform_driver_unregister(&altera_msi_driver);
296}
297
291subsys_initcall(altera_msi_init); 298subsys_initcall(altera_msi_init);
299MODULE_DEVICE_TABLE(of, altera_msi_of_match);
300module_exit(altera_msi_exit);
301MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pcie-altera.c b/drivers/pci/controller/pcie-altera.c
index 27edcebd1726..d2497ca43828 100644
--- a/drivers/pci/controller/pcie-altera.c
+++ b/drivers/pci/controller/pcie-altera.c
@@ -10,6 +10,7 @@
10#include <linux/interrupt.h> 10#include <linux/interrupt.h>
11#include <linux/irqchip/chained_irq.h> 11#include <linux/irqchip/chained_irq.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/module.h>
13#include <linux/of_address.h> 14#include <linux/of_address.h>
14#include <linux/of_device.h> 15#include <linux/of_device.h>
15#include <linux/of_irq.h> 16#include <linux/of_irq.h>
@@ -43,6 +44,8 @@
43#define S10_RP_RXCPL_STATUS 0x200C 44#define S10_RP_RXCPL_STATUS 0x200C
44#define S10_RP_CFG_ADDR(pcie, reg) \ 45#define S10_RP_CFG_ADDR(pcie, reg) \
45 (((pcie)->hip_base) + (reg) + (1 << 20)) 46 (((pcie)->hip_base) + (reg) + (1 << 20))
47#define S10_RP_SECONDARY(pcie) \
48 readb(S10_RP_CFG_ADDR(pcie, PCI_SECONDARY_BUS))
46 49
47/* TLP configuration type 0 and 1 */ 50/* TLP configuration type 0 and 1 */
48#define TLP_FMTTYPE_CFGRD0 0x04 /* Configuration Read Type 0 */ 51#define TLP_FMTTYPE_CFGRD0 0x04 /* Configuration Read Type 0 */
@@ -54,14 +57,9 @@
54#define TLP_WRITE_TAG 0x10 57#define TLP_WRITE_TAG 0x10
55#define RP_DEVFN 0 58#define RP_DEVFN 0
56#define TLP_REQ_ID(bus, devfn) (((bus) << 8) | (devfn)) 59#define TLP_REQ_ID(bus, devfn) (((bus) << 8) | (devfn))
57#define TLP_CFGRD_DW0(pcie, bus) \ 60#define TLP_CFG_DW0(pcie, cfg) \
58 ((((bus == pcie->root_bus_nr) ? pcie->pcie_data->cfgrd0 \ 61 (((cfg) << 24) | \
59 : pcie->pcie_data->cfgrd1) << 24) | \ 62 TLP_PAYLOAD_SIZE)
60 TLP_PAYLOAD_SIZE)
61#define TLP_CFGWR_DW0(pcie, bus) \
62 ((((bus == pcie->root_bus_nr) ? pcie->pcie_data->cfgwr0 \
63 : pcie->pcie_data->cfgwr1) << 24) | \
64 TLP_PAYLOAD_SIZE)
65#define TLP_CFG_DW1(pcie, tag, be) \ 63#define TLP_CFG_DW1(pcie, tag, be) \
66 (((TLP_REQ_ID(pcie->root_bus_nr, RP_DEVFN)) << 16) | (tag << 8) | (be)) 64 (((TLP_REQ_ID(pcie->root_bus_nr, RP_DEVFN)) << 16) | (tag << 8) | (be))
67#define TLP_CFG_DW2(bus, devfn, offset) \ 65#define TLP_CFG_DW2(bus, devfn, offset) \
@@ -321,14 +319,31 @@ static void s10_tlp_write_packet(struct altera_pcie *pcie, u32 *headers,
321 s10_tlp_write_tx(pcie, data, RP_TX_EOP); 319 s10_tlp_write_tx(pcie, data, RP_TX_EOP);
322} 320}
323 321
322static void get_tlp_header(struct altera_pcie *pcie, u8 bus, u32 devfn,
323 int where, u8 byte_en, bool read, u32 *headers)
324{
325 u8 cfg;
326 u8 cfg0 = read ? pcie->pcie_data->cfgrd0 : pcie->pcie_data->cfgwr0;
327 u8 cfg1 = read ? pcie->pcie_data->cfgrd1 : pcie->pcie_data->cfgwr1;
328 u8 tag = read ? TLP_READ_TAG : TLP_WRITE_TAG;
329
330 if (pcie->pcie_data->version == ALTERA_PCIE_V1)
331 cfg = (bus == pcie->root_bus_nr) ? cfg0 : cfg1;
332 else
333 cfg = (bus > S10_RP_SECONDARY(pcie)) ? cfg0 : cfg1;
334
335 headers[0] = TLP_CFG_DW0(pcie, cfg);
336 headers[1] = TLP_CFG_DW1(pcie, tag, byte_en);
337 headers[2] = TLP_CFG_DW2(bus, devfn, where);
338}
339
324static int tlp_cfg_dword_read(struct altera_pcie *pcie, u8 bus, u32 devfn, 340static int tlp_cfg_dword_read(struct altera_pcie *pcie, u8 bus, u32 devfn,
325 int where, u8 byte_en, u32 *value) 341 int where, u8 byte_en, u32 *value)
326{ 342{
327 u32 headers[TLP_HDR_SIZE]; 343 u32 headers[TLP_HDR_SIZE];
328 344
329 headers[0] = TLP_CFGRD_DW0(pcie, bus); 345 get_tlp_header(pcie, bus, devfn, where, byte_en, true,
330 headers[1] = TLP_CFG_DW1(pcie, TLP_READ_TAG, byte_en); 346 headers);
331 headers[2] = TLP_CFG_DW2(bus, devfn, where);
332 347
333 pcie->pcie_data->ops->tlp_write_pkt(pcie, headers, 0, false); 348 pcie->pcie_data->ops->tlp_write_pkt(pcie, headers, 0, false);
334 349
@@ -341,9 +356,8 @@ static int tlp_cfg_dword_write(struct altera_pcie *pcie, u8 bus, u32 devfn,
341 u32 headers[TLP_HDR_SIZE]; 356 u32 headers[TLP_HDR_SIZE];
342 int ret; 357 int ret;
343 358
344 headers[0] = TLP_CFGWR_DW0(pcie, bus); 359 get_tlp_header(pcie, bus, devfn, where, byte_en, false,
345 headers[1] = TLP_CFG_DW1(pcie, TLP_WRITE_TAG, byte_en); 360 headers);
346 headers[2] = TLP_CFG_DW2(bus, devfn, where);
347 361
348 /* check alignment to Qword */ 362 /* check alignment to Qword */
349 if ((where & 0x7) == 0) 363 if ((where & 0x7) == 0)
@@ -705,6 +719,13 @@ static int altera_pcie_init_irq_domain(struct altera_pcie *pcie)
705 return 0; 719 return 0;
706} 720}
707 721
722static void altera_pcie_irq_teardown(struct altera_pcie *pcie)
723{
724 irq_set_chained_handler_and_data(pcie->irq, NULL, NULL);
725 irq_domain_remove(pcie->irq_domain);
726 irq_dispose_mapping(pcie->irq);
727}
728
708static int altera_pcie_parse_dt(struct altera_pcie *pcie) 729static int altera_pcie_parse_dt(struct altera_pcie *pcie)
709{ 730{
710 struct device *dev = &pcie->pdev->dev; 731 struct device *dev = &pcie->pdev->dev;
@@ -798,6 +819,7 @@ static int altera_pcie_probe(struct platform_device *pdev)
798 819
799 pcie = pci_host_bridge_priv(bridge); 820 pcie = pci_host_bridge_priv(bridge);
800 pcie->pdev = pdev; 821 pcie->pdev = pdev;
822 platform_set_drvdata(pdev, pcie);
801 823
802 match = of_match_device(altera_pcie_of_match, &pdev->dev); 824 match = of_match_device(altera_pcie_of_match, &pdev->dev);
803 if (!match) 825 if (!match)
@@ -855,13 +877,28 @@ static int altera_pcie_probe(struct platform_device *pdev)
855 return ret; 877 return ret;
856} 878}
857 879
880static int altera_pcie_remove(struct platform_device *pdev)
881{
882 struct altera_pcie *pcie = platform_get_drvdata(pdev);
883 struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
884
885 pci_stop_root_bus(bridge->bus);
886 pci_remove_root_bus(bridge->bus);
887 pci_free_resource_list(&pcie->resources);
888 altera_pcie_irq_teardown(pcie);
889
890 return 0;
891}
892
858static struct platform_driver altera_pcie_driver = { 893static struct platform_driver altera_pcie_driver = {
859 .probe = altera_pcie_probe, 894 .probe = altera_pcie_probe,
895 .remove = altera_pcie_remove,
860 .driver = { 896 .driver = {
861 .name = "altera-pcie", 897 .name = "altera-pcie",
862 .of_match_table = altera_pcie_of_match, 898 .of_match_table = altera_pcie_of_match,
863 .suppress_bind_attrs = true,
864 }, 899 },
865}; 900};
866 901
867builtin_platform_driver(altera_pcie_driver); 902MODULE_DEVICE_TABLE(of, altera_pcie_of_match);
903module_platform_driver(altera_pcie_driver);
904MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pcie-iproc-platform.c b/drivers/pci/controller/pcie-iproc-platform.c
index f30f5f3fb5c1..5a3550b6bb29 100644
--- a/drivers/pci/controller/pcie-iproc-platform.c
+++ b/drivers/pci/controller/pcie-iproc-platform.c
@@ -87,7 +87,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
87 87
88 /* 88 /*
89 * DT nodes are not used by all platforms that use the iProc PCIe 89 * DT nodes are not used by all platforms that use the iProc PCIe
90 * core driver. For platforms that require explict inbound mapping 90 * core driver. For platforms that require explicit inbound mapping
91 * configuration, "dma-ranges" would have been present in DT 91 * configuration, "dma-ranges" would have been present in DT
92 */ 92 */
93 pcie->need_ib_cfg = of_property_read_bool(np, "dma-ranges"); 93 pcie->need_ib_cfg = of_property_read_bool(np, "dma-ranges");
diff --git a/drivers/pci/controller/pcie-iproc.c b/drivers/pci/controller/pcie-iproc.c
index e3ca46497470..2d457bfdaf66 100644
--- a/drivers/pci/controller/pcie-iproc.c
+++ b/drivers/pci/controller/pcie-iproc.c
@@ -163,7 +163,7 @@ enum iproc_pcie_ib_map_type {
163 * @size_unit: inbound mapping region size unit, could be SZ_1K, SZ_1M, or 163 * @size_unit: inbound mapping region size unit, could be SZ_1K, SZ_1M, or
164 * SZ_1G 164 * SZ_1G
165 * @region_sizes: list of supported inbound mapping region sizes in KB, MB, or 165 * @region_sizes: list of supported inbound mapping region sizes in KB, MB, or
166 * GB, depedning on the size unit 166 * GB, depending on the size unit
167 * @nr_sizes: number of supported inbound mapping region sizes 167 * @nr_sizes: number of supported inbound mapping region sizes
168 * @nr_windows: number of supported inbound mapping windows for the region 168 * @nr_windows: number of supported inbound mapping windows for the region
169 * @imap_addr_offset: register offset between the upper and lower 32-bit 169 * @imap_addr_offset: register offset between the upper and lower 32-bit
diff --git a/drivers/pci/controller/pcie-mobiveil.c b/drivers/pci/controller/pcie-mobiveil.c
index 77052a0712d0..672e633601c7 100644
--- a/drivers/pci/controller/pcie-mobiveil.c
+++ b/drivers/pci/controller/pcie-mobiveil.c
@@ -31,56 +31,61 @@
31 * translation tables are grouped into windows, each window registers are 31 * translation tables are grouped into windows, each window registers are
32 * grouped into blocks of 4 or 16 registers each 32 * grouped into blocks of 4 or 16 registers each
33 */ 33 */
34#define PAB_REG_BLOCK_SIZE 16 34#define PAB_REG_BLOCK_SIZE 16
35#define PAB_EXT_REG_BLOCK_SIZE 4 35#define PAB_EXT_REG_BLOCK_SIZE 4
36 36
37#define PAB_REG_ADDR(offset, win) (offset + (win * PAB_REG_BLOCK_SIZE)) 37#define PAB_REG_ADDR(offset, win) \
38#define PAB_EXT_REG_ADDR(offset, win) (offset + (win * PAB_EXT_REG_BLOCK_SIZE)) 38 (offset + (win * PAB_REG_BLOCK_SIZE))
39#define PAB_EXT_REG_ADDR(offset, win) \
40 (offset + (win * PAB_EXT_REG_BLOCK_SIZE))
39 41
40#define LTSSM_STATUS 0x0404 42#define LTSSM_STATUS 0x0404
41#define LTSSM_STATUS_L0_MASK 0x3f 43#define LTSSM_STATUS_L0_MASK 0x3f
42#define LTSSM_STATUS_L0 0x2d 44#define LTSSM_STATUS_L0 0x2d
43 45
44#define PAB_CTRL 0x0808 46#define PAB_CTRL 0x0808
45#define AMBA_PIO_ENABLE_SHIFT 0 47#define AMBA_PIO_ENABLE_SHIFT 0
46#define PEX_PIO_ENABLE_SHIFT 1 48#define PEX_PIO_ENABLE_SHIFT 1
47#define PAGE_SEL_SHIFT 13 49#define PAGE_SEL_SHIFT 13
48#define PAGE_SEL_MASK 0x3f 50#define PAGE_SEL_MASK 0x3f
49#define PAGE_LO_MASK 0x3ff 51#define PAGE_LO_MASK 0x3ff
50#define PAGE_SEL_EN 0xc00 52#define PAGE_SEL_OFFSET_SHIFT 10
51#define PAGE_SEL_OFFSET_SHIFT 10
52 53
53#define PAB_AXI_PIO_CTRL 0x0840 54#define PAB_AXI_PIO_CTRL 0x0840
54#define APIO_EN_MASK 0xf 55#define APIO_EN_MASK 0xf
55 56
56#define PAB_PEX_PIO_CTRL 0x08c0 57#define PAB_PEX_PIO_CTRL 0x08c0
57#define PIO_ENABLE_SHIFT 0 58#define PIO_ENABLE_SHIFT 0
58 59
59#define PAB_INTP_AMBA_MISC_ENB 0x0b0c 60#define PAB_INTP_AMBA_MISC_ENB 0x0b0c
60#define PAB_INTP_AMBA_MISC_STAT 0x0b1c 61#define PAB_INTP_AMBA_MISC_STAT 0x0b1c
61#define PAB_INTP_INTX_MASK 0x01e0 62#define PAB_INTP_INTX_MASK 0x01e0
62#define PAB_INTP_MSI_MASK 0x8 63#define PAB_INTP_MSI_MASK 0x8
63 64
64#define PAB_AXI_AMAP_CTRL(win) PAB_REG_ADDR(0x0ba0, win) 65#define PAB_AXI_AMAP_CTRL(win) PAB_REG_ADDR(0x0ba0, win)
65#define WIN_ENABLE_SHIFT 0 66#define WIN_ENABLE_SHIFT 0
66#define WIN_TYPE_SHIFT 1 67#define WIN_TYPE_SHIFT 1
68#define WIN_TYPE_MASK 0x3
69#define WIN_SIZE_MASK 0xfffffc00
67 70
68#define PAB_EXT_AXI_AMAP_SIZE(win) PAB_EXT_REG_ADDR(0xbaf0, win) 71#define PAB_EXT_AXI_AMAP_SIZE(win) PAB_EXT_REG_ADDR(0xbaf0, win)
69 72
73#define PAB_EXT_AXI_AMAP_AXI_WIN(win) PAB_EXT_REG_ADDR(0x80a0, win)
70#define PAB_AXI_AMAP_AXI_WIN(win) PAB_REG_ADDR(0x0ba4, win) 74#define PAB_AXI_AMAP_AXI_WIN(win) PAB_REG_ADDR(0x0ba4, win)
71#define AXI_WINDOW_ALIGN_MASK 3 75#define AXI_WINDOW_ALIGN_MASK 3
72 76
73#define PAB_AXI_AMAP_PEX_WIN_L(win) PAB_REG_ADDR(0x0ba8, win) 77#define PAB_AXI_AMAP_PEX_WIN_L(win) PAB_REG_ADDR(0x0ba8, win)
74#define PAB_BUS_SHIFT 24 78#define PAB_BUS_SHIFT 24
75#define PAB_DEVICE_SHIFT 19 79#define PAB_DEVICE_SHIFT 19
76#define PAB_FUNCTION_SHIFT 16 80#define PAB_FUNCTION_SHIFT 16
77 81
78#define PAB_AXI_AMAP_PEX_WIN_H(win) PAB_REG_ADDR(0x0bac, win) 82#define PAB_AXI_AMAP_PEX_WIN_H(win) PAB_REG_ADDR(0x0bac, win)
79#define PAB_INTP_AXI_PIO_CLASS 0x474 83#define PAB_INTP_AXI_PIO_CLASS 0x474
80 84
81#define PAB_PEX_AMAP_CTRL(win) PAB_REG_ADDR(0x4ba0, win) 85#define PAB_PEX_AMAP_CTRL(win) PAB_REG_ADDR(0x4ba0, win)
82#define AMAP_CTRL_EN_SHIFT 0 86#define AMAP_CTRL_EN_SHIFT 0
83#define AMAP_CTRL_TYPE_SHIFT 1 87#define AMAP_CTRL_TYPE_SHIFT 1
88#define AMAP_CTRL_TYPE_MASK 3
84 89
85#define PAB_EXT_PEX_AMAP_SIZEN(win) PAB_EXT_REG_ADDR(0xbef0, win) 90#define PAB_EXT_PEX_AMAP_SIZEN(win) PAB_EXT_REG_ADDR(0xbef0, win)
86#define PAB_PEX_AMAP_AXI_WIN(win) PAB_REG_ADDR(0x4ba4, win) 91#define PAB_PEX_AMAP_AXI_WIN(win) PAB_REG_ADDR(0x4ba4, win)
@@ -88,34 +93,40 @@
88#define PAB_PEX_AMAP_PEX_WIN_H(win) PAB_REG_ADDR(0x4bac, win) 93#define PAB_PEX_AMAP_PEX_WIN_H(win) PAB_REG_ADDR(0x4bac, win)
89 94
90/* starting offset of INTX bits in status register */ 95/* starting offset of INTX bits in status register */
91#define PAB_INTX_START 5 96#define PAB_INTX_START 5
92 97
93/* supported number of MSI interrupts */ 98/* supported number of MSI interrupts */
94#define PCI_NUM_MSI 16 99#define PCI_NUM_MSI 16
95 100
96/* MSI registers */ 101/* MSI registers */
97#define MSI_BASE_LO_OFFSET 0x04 102#define MSI_BASE_LO_OFFSET 0x04
98#define MSI_BASE_HI_OFFSET 0x08 103#define MSI_BASE_HI_OFFSET 0x08
99#define MSI_SIZE_OFFSET 0x0c 104#define MSI_SIZE_OFFSET 0x0c
100#define MSI_ENABLE_OFFSET 0x14 105#define MSI_ENABLE_OFFSET 0x14
101#define MSI_STATUS_OFFSET 0x18 106#define MSI_STATUS_OFFSET 0x18
102#define MSI_DATA_OFFSET 0x20 107#define MSI_DATA_OFFSET 0x20
103#define MSI_ADDR_L_OFFSET 0x24 108#define MSI_ADDR_L_OFFSET 0x24
104#define MSI_ADDR_H_OFFSET 0x28 109#define MSI_ADDR_H_OFFSET 0x28
105 110
106/* outbound and inbound window definitions */ 111/* outbound and inbound window definitions */
107#define WIN_NUM_0 0 112#define WIN_NUM_0 0
108#define WIN_NUM_1 1 113#define WIN_NUM_1 1
109#define CFG_WINDOW_TYPE 0 114#define CFG_WINDOW_TYPE 0
110#define IO_WINDOW_TYPE 1 115#define IO_WINDOW_TYPE 1
111#define MEM_WINDOW_TYPE 2 116#define MEM_WINDOW_TYPE 2
112#define IB_WIN_SIZE ((u64)256 * 1024 * 1024 * 1024) 117#define IB_WIN_SIZE ((u64)256 * 1024 * 1024 * 1024)
113#define MAX_PIO_WINDOWS 8 118#define MAX_PIO_WINDOWS 8
114 119
115/* Parameters for the waiting for link up routine */ 120/* Parameters for the waiting for link up routine */
116#define LINK_WAIT_MAX_RETRIES 10 121#define LINK_WAIT_MAX_RETRIES 10
117#define LINK_WAIT_MIN 90000 122#define LINK_WAIT_MIN 90000
118#define LINK_WAIT_MAX 100000 123#define LINK_WAIT_MAX 100000
124
125#define PAGED_ADDR_BNDRY 0xc00
126#define OFFSET_TO_PAGE_ADDR(off) \
127 ((off & PAGE_LO_MASK) | PAGED_ADDR_BNDRY)
128#define OFFSET_TO_PAGE_IDX(off) \
129 ((off >> PAGE_SEL_OFFSET_SHIFT) & PAGE_SEL_MASK)
119 130
120struct mobiveil_msi { /* MSI information */ 131struct mobiveil_msi { /* MSI information */
121 struct mutex lock; /* protect bitmap variable */ 132 struct mutex lock; /* protect bitmap variable */
@@ -145,15 +156,119 @@ struct mobiveil_pcie {
145 struct mobiveil_msi msi; 156 struct mobiveil_msi msi;
146}; 157};
147 158
148static inline void csr_writel(struct mobiveil_pcie *pcie, const u32 value, 159/*
149 const u32 reg) 160 * mobiveil_pcie_sel_page - routine to access paged register
161 *
162 * Registers whose address greater than PAGED_ADDR_BNDRY (0xc00) are paged,
163 * for this scheme to work extracted higher 6 bits of the offset will be
164 * written to pg_sel field of PAB_CTRL register and rest of the lower 10
165 * bits enabled with PAGED_ADDR_BNDRY are used as offset of the register.
166 */
167static void mobiveil_pcie_sel_page(struct mobiveil_pcie *pcie, u8 pg_idx)
168{
169 u32 val;
170
171 val = readl(pcie->csr_axi_slave_base + PAB_CTRL);
172 val &= ~(PAGE_SEL_MASK << PAGE_SEL_SHIFT);
173 val |= (pg_idx & PAGE_SEL_MASK) << PAGE_SEL_SHIFT;
174
175 writel(val, pcie->csr_axi_slave_base + PAB_CTRL);
176}
177
178static void *mobiveil_pcie_comp_addr(struct mobiveil_pcie *pcie, u32 off)
179{
180 if (off < PAGED_ADDR_BNDRY) {
181 /* For directly accessed registers, clear the pg_sel field */
182 mobiveil_pcie_sel_page(pcie, 0);
183 return pcie->csr_axi_slave_base + off;
184 }
185
186 mobiveil_pcie_sel_page(pcie, OFFSET_TO_PAGE_IDX(off));
187 return pcie->csr_axi_slave_base + OFFSET_TO_PAGE_ADDR(off);
188}
189
190static int mobiveil_pcie_read(void __iomem *addr, int size, u32 *val)
150{ 191{
151 writel_relaxed(value, pcie->csr_axi_slave_base + reg); 192 if ((uintptr_t)addr & (size - 1)) {
193 *val = 0;
194 return PCIBIOS_BAD_REGISTER_NUMBER;
195 }
196
197 switch (size) {
198 case 4:
199 *val = readl(addr);
200 break;
201 case 2:
202 *val = readw(addr);
203 break;
204 case 1:
205 *val = readb(addr);
206 break;
207 default:
208 *val = 0;
209 return PCIBIOS_BAD_REGISTER_NUMBER;
210 }
211
212 return PCIBIOS_SUCCESSFUL;
152} 213}
153 214
154static inline u32 csr_readl(struct mobiveil_pcie *pcie, const u32 reg) 215static int mobiveil_pcie_write(void __iomem *addr, int size, u32 val)
155{ 216{
156 return readl_relaxed(pcie->csr_axi_slave_base + reg); 217 if ((uintptr_t)addr & (size - 1))
218 return PCIBIOS_BAD_REGISTER_NUMBER;
219
220 switch (size) {
221 case 4:
222 writel(val, addr);
223 break;
224 case 2:
225 writew(val, addr);
226 break;
227 case 1:
228 writeb(val, addr);
229 break;
230 default:
231 return PCIBIOS_BAD_REGISTER_NUMBER;
232 }
233
234 return PCIBIOS_SUCCESSFUL;
235}
236
237static u32 csr_read(struct mobiveil_pcie *pcie, u32 off, size_t size)
238{
239 void *addr;
240 u32 val;
241 int ret;
242
243 addr = mobiveil_pcie_comp_addr(pcie, off);
244
245 ret = mobiveil_pcie_read(addr, size, &val);
246 if (ret)
247 dev_err(&pcie->pdev->dev, "read CSR address failed\n");
248
249 return val;
250}
251
252static void csr_write(struct mobiveil_pcie *pcie, u32 val, u32 off, size_t size)
253{
254 void *addr;
255 int ret;
256
257 addr = mobiveil_pcie_comp_addr(pcie, off);
258
259 ret = mobiveil_pcie_write(addr, size, val);
260 if (ret)
261 dev_err(&pcie->pdev->dev, "write CSR address failed\n");
262}
263
264static u32 csr_readl(struct mobiveil_pcie *pcie, u32 off)
265{
266 return csr_read(pcie, off, 0x4);
267}
268
269static void csr_writel(struct mobiveil_pcie *pcie, u32 val, u32 off)
270{
271 csr_write(pcie, val, off, 0x4);
157} 272}
158 273
159static bool mobiveil_pcie_link_up(struct mobiveil_pcie *pcie) 274static bool mobiveil_pcie_link_up(struct mobiveil_pcie *pcie)
@@ -174,7 +289,7 @@ static bool mobiveil_pcie_valid_device(struct pci_bus *bus, unsigned int devfn)
174 * Do not read more than one device on the bus directly 289 * Do not read more than one device on the bus directly
175 * attached to RC 290 * attached to RC
176 */ 291 */
177 if ((bus->primary == pcie->root_bus_nr) && (devfn > 0)) 292 if ((bus->primary == pcie->root_bus_nr) && (PCI_SLOT(devfn) > 0))
178 return false; 293 return false;
179 294
180 return true; 295 return true;
@@ -185,17 +300,17 @@ static bool mobiveil_pcie_valid_device(struct pci_bus *bus, unsigned int devfn)
185 * root port or endpoint 300 * root port or endpoint
186 */ 301 */
187static void __iomem *mobiveil_pcie_map_bus(struct pci_bus *bus, 302static void __iomem *mobiveil_pcie_map_bus(struct pci_bus *bus,
188 unsigned int devfn, int where) 303 unsigned int devfn, int where)
189{ 304{
190 struct mobiveil_pcie *pcie = bus->sysdata; 305 struct mobiveil_pcie *pcie = bus->sysdata;
306 u32 value;
191 307
192 if (!mobiveil_pcie_valid_device(bus, devfn)) 308 if (!mobiveil_pcie_valid_device(bus, devfn))
193 return NULL; 309 return NULL;
194 310
195 if (bus->number == pcie->root_bus_nr) { 311 /* RC config access */
196 /* RC config access */ 312 if (bus->number == pcie->root_bus_nr)
197 return pcie->csr_axi_slave_base + where; 313 return pcie->csr_axi_slave_base + where;
198 }
199 314
200 /* 315 /*
201 * EP config access (in Config/APIO space) 316 * EP config access (in Config/APIO space)
@@ -203,10 +318,12 @@ static void __iomem *mobiveil_pcie_map_bus(struct pci_bus *bus,
203 * (BDF) in PAB_AXI_AMAP_PEX_WIN_L0 Register. 318 * (BDF) in PAB_AXI_AMAP_PEX_WIN_L0 Register.
204 * Relies on pci_lock serialization 319 * Relies on pci_lock serialization
205 */ 320 */
206 csr_writel(pcie, bus->number << PAB_BUS_SHIFT | 321 value = bus->number << PAB_BUS_SHIFT |
207 PCI_SLOT(devfn) << PAB_DEVICE_SHIFT | 322 PCI_SLOT(devfn) << PAB_DEVICE_SHIFT |
208 PCI_FUNC(devfn) << PAB_FUNCTION_SHIFT, 323 PCI_FUNC(devfn) << PAB_FUNCTION_SHIFT;
209 PAB_AXI_AMAP_PEX_WIN_L(WIN_NUM_0)); 324
325 csr_writel(pcie, value, PAB_AXI_AMAP_PEX_WIN_L(WIN_NUM_0));
326
210 return pcie->config_axi_slave_base + where; 327 return pcie->config_axi_slave_base + where;
211} 328}
212 329
@@ -241,24 +358,29 @@ static void mobiveil_pcie_isr(struct irq_desc *desc)
241 358
242 /* Handle INTx */ 359 /* Handle INTx */
243 if (intr_status & PAB_INTP_INTX_MASK) { 360 if (intr_status & PAB_INTP_INTX_MASK) {
244 shifted_status = csr_readl(pcie, PAB_INTP_AMBA_MISC_STAT) >> 361 shifted_status = csr_readl(pcie, PAB_INTP_AMBA_MISC_STAT);
245 PAB_INTX_START; 362 shifted_status &= PAB_INTP_INTX_MASK;
363 shifted_status >>= PAB_INTX_START;
246 do { 364 do {
247 for_each_set_bit(bit, &shifted_status, PCI_NUM_INTX) { 365 for_each_set_bit(bit, &shifted_status, PCI_NUM_INTX) {
248 virq = irq_find_mapping(pcie->intx_domain, 366 virq = irq_find_mapping(pcie->intx_domain,
249 bit + 1); 367 bit + 1);
250 if (virq) 368 if (virq)
251 generic_handle_irq(virq); 369 generic_handle_irq(virq);
252 else 370 else
253 dev_err_ratelimited(dev, 371 dev_err_ratelimited(dev, "unexpected IRQ, INT%d\n",
254 "unexpected IRQ, INT%d\n", bit); 372 bit);
255 373
256 /* clear interrupt */ 374 /* clear interrupt handled */
257 csr_writel(pcie, 375 csr_writel(pcie, 1 << (PAB_INTX_START + bit),
258 shifted_status << PAB_INTX_START, 376 PAB_INTP_AMBA_MISC_STAT);
259 PAB_INTP_AMBA_MISC_STAT);
260 } 377 }
261 } while ((shifted_status >> PAB_INTX_START) != 0); 378
379 shifted_status = csr_readl(pcie,
380 PAB_INTP_AMBA_MISC_STAT);
381 shifted_status &= PAB_INTP_INTX_MASK;
382 shifted_status >>= PAB_INTX_START;
383 } while (shifted_status != 0);
262 } 384 }
263 385
264 /* read extra MSI status register */ 386 /* read extra MSI status register */
@@ -266,8 +388,7 @@ static void mobiveil_pcie_isr(struct irq_desc *desc)
266 388
267 /* handle MSI interrupts */ 389 /* handle MSI interrupts */
268 while (msi_status & 1) { 390 while (msi_status & 1) {
269 msi_data = readl_relaxed(pcie->apb_csr_base 391 msi_data = readl_relaxed(pcie->apb_csr_base + MSI_DATA_OFFSET);
270 + MSI_DATA_OFFSET);
271 392
272 /* 393 /*
273 * MSI_STATUS_OFFSET register gets updated to zero 394 * MSI_STATUS_OFFSET register gets updated to zero
@@ -276,18 +397,18 @@ static void mobiveil_pcie_isr(struct irq_desc *desc)
276 * two dummy reads. 397 * two dummy reads.
277 */ 398 */
278 msi_addr_lo = readl_relaxed(pcie->apb_csr_base + 399 msi_addr_lo = readl_relaxed(pcie->apb_csr_base +
279 MSI_ADDR_L_OFFSET); 400 MSI_ADDR_L_OFFSET);
280 msi_addr_hi = readl_relaxed(pcie->apb_csr_base + 401 msi_addr_hi = readl_relaxed(pcie->apb_csr_base +
281 MSI_ADDR_H_OFFSET); 402 MSI_ADDR_H_OFFSET);
282 dev_dbg(dev, "MSI registers, data: %08x, addr: %08x:%08x\n", 403 dev_dbg(dev, "MSI registers, data: %08x, addr: %08x:%08x\n",
283 msi_data, msi_addr_hi, msi_addr_lo); 404 msi_data, msi_addr_hi, msi_addr_lo);
284 405
285 virq = irq_find_mapping(msi->dev_domain, msi_data); 406 virq = irq_find_mapping(msi->dev_domain, msi_data);
286 if (virq) 407 if (virq)
287 generic_handle_irq(virq); 408 generic_handle_irq(virq);
288 409
289 msi_status = readl_relaxed(pcie->apb_csr_base + 410 msi_status = readl_relaxed(pcie->apb_csr_base +
290 MSI_STATUS_OFFSET); 411 MSI_STATUS_OFFSET);
291 } 412 }
292 413
293 /* Clear the interrupt status */ 414 /* Clear the interrupt status */
@@ -304,7 +425,7 @@ static int mobiveil_pcie_parse_dt(struct mobiveil_pcie *pcie)
304 425
305 /* map config resource */ 426 /* map config resource */
306 res = platform_get_resource_byname(pdev, IORESOURCE_MEM, 427 res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
307 "config_axi_slave"); 428 "config_axi_slave");
308 pcie->config_axi_slave_base = devm_pci_remap_cfg_resource(dev, res); 429 pcie->config_axi_slave_base = devm_pci_remap_cfg_resource(dev, res);
309 if (IS_ERR(pcie->config_axi_slave_base)) 430 if (IS_ERR(pcie->config_axi_slave_base))
310 return PTR_ERR(pcie->config_axi_slave_base); 431 return PTR_ERR(pcie->config_axi_slave_base);
@@ -312,7 +433,7 @@ static int mobiveil_pcie_parse_dt(struct mobiveil_pcie *pcie)
312 433
313 /* map csr resource */ 434 /* map csr resource */
314 res = platform_get_resource_byname(pdev, IORESOURCE_MEM, 435 res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
315 "csr_axi_slave"); 436 "csr_axi_slave");
316 pcie->csr_axi_slave_base = devm_pci_remap_cfg_resource(dev, res); 437 pcie->csr_axi_slave_base = devm_pci_remap_cfg_resource(dev, res);
317 if (IS_ERR(pcie->csr_axi_slave_base)) 438 if (IS_ERR(pcie->csr_axi_slave_base))
318 return PTR_ERR(pcie->csr_axi_slave_base); 439 return PTR_ERR(pcie->csr_axi_slave_base);
@@ -337,92 +458,50 @@ static int mobiveil_pcie_parse_dt(struct mobiveil_pcie *pcie)
337 return -ENODEV; 458 return -ENODEV;
338 } 459 }
339 460
340 irq_set_chained_handler_and_data(pcie->irq, mobiveil_pcie_isr, pcie);
341
342 return 0; 461 return 0;
343} 462}
344 463
345/*
346 * select_paged_register - routine to access paged register of root complex
347 *
348 * registers of RC are paged, for this scheme to work
349 * extracted higher 6 bits of the offset will be written to pg_sel
350 * field of PAB_CTRL register and rest of the lower 10 bits enabled with
351 * PAGE_SEL_EN are used as offset of the register.
352 */
353static void select_paged_register(struct mobiveil_pcie *pcie, u32 offset)
354{
355 int pab_ctrl_dw, pg_sel;
356
357 /* clear pg_sel field */
358 pab_ctrl_dw = csr_readl(pcie, PAB_CTRL);
359 pab_ctrl_dw = (pab_ctrl_dw & ~(PAGE_SEL_MASK << PAGE_SEL_SHIFT));
360
361 /* set pg_sel field */
362 pg_sel = (offset >> PAGE_SEL_OFFSET_SHIFT) & PAGE_SEL_MASK;
363 pab_ctrl_dw |= ((pg_sel << PAGE_SEL_SHIFT));
364 csr_writel(pcie, pab_ctrl_dw, PAB_CTRL);
365}
366
367static void write_paged_register(struct mobiveil_pcie *pcie,
368 u32 val, u32 offset)
369{
370 u32 off = (offset & PAGE_LO_MASK) | PAGE_SEL_EN;
371
372 select_paged_register(pcie, offset);
373 csr_writel(pcie, val, off);
374}
375
376static u32 read_paged_register(struct mobiveil_pcie *pcie, u32 offset)
377{
378 u32 off = (offset & PAGE_LO_MASK) | PAGE_SEL_EN;
379
380 select_paged_register(pcie, offset);
381 return csr_readl(pcie, off);
382}
383
384static void program_ib_windows(struct mobiveil_pcie *pcie, int win_num, 464static void program_ib_windows(struct mobiveil_pcie *pcie, int win_num,
385 int pci_addr, u32 type, u64 size) 465 u64 pci_addr, u32 type, u64 size)
386{ 466{
387 int pio_ctrl_val; 467 u32 value;
388 int amap_ctrl_dw;
389 u64 size64 = ~(size - 1); 468 u64 size64 = ~(size - 1);
390 469
391 if ((pcie->ib_wins_configured + 1) > pcie->ppio_wins) { 470 if (win_num >= pcie->ppio_wins) {
392 dev_err(&pcie->pdev->dev, 471 dev_err(&pcie->pdev->dev,
393 "ERROR: max inbound windows reached !\n"); 472 "ERROR: max inbound windows reached !\n");
394 return; 473 return;
395 } 474 }
396 475
397 pio_ctrl_val = csr_readl(pcie, PAB_PEX_PIO_CTRL); 476 value = csr_readl(pcie, PAB_PEX_AMAP_CTRL(win_num));
398 csr_writel(pcie, 477 value &= ~(AMAP_CTRL_TYPE_MASK << AMAP_CTRL_TYPE_SHIFT | WIN_SIZE_MASK);
399 pio_ctrl_val | (1 << PIO_ENABLE_SHIFT), PAB_PEX_PIO_CTRL); 478 value |= type << AMAP_CTRL_TYPE_SHIFT | 1 << AMAP_CTRL_EN_SHIFT |
400 amap_ctrl_dw = read_paged_register(pcie, PAB_PEX_AMAP_CTRL(win_num)); 479 (lower_32_bits(size64) & WIN_SIZE_MASK);
401 amap_ctrl_dw = (amap_ctrl_dw | (type << AMAP_CTRL_TYPE_SHIFT)); 480 csr_writel(pcie, value, PAB_PEX_AMAP_CTRL(win_num));
402 amap_ctrl_dw = (amap_ctrl_dw | (1 << AMAP_CTRL_EN_SHIFT)); 481
482 csr_writel(pcie, upper_32_bits(size64),
483 PAB_EXT_PEX_AMAP_SIZEN(win_num));
403 484
404 write_paged_register(pcie, amap_ctrl_dw | lower_32_bits(size64), 485 csr_writel(pcie, pci_addr, PAB_PEX_AMAP_AXI_WIN(win_num));
405 PAB_PEX_AMAP_CTRL(win_num));
406 486
407 write_paged_register(pcie, upper_32_bits(size64), 487 csr_writel(pcie, lower_32_bits(pci_addr),
408 PAB_EXT_PEX_AMAP_SIZEN(win_num)); 488 PAB_PEX_AMAP_PEX_WIN_L(win_num));
489 csr_writel(pcie, upper_32_bits(pci_addr),
490 PAB_PEX_AMAP_PEX_WIN_H(win_num));
409 491
410 write_paged_register(pcie, pci_addr, PAB_PEX_AMAP_AXI_WIN(win_num)); 492 pcie->ib_wins_configured++;
411 write_paged_register(pcie, pci_addr, PAB_PEX_AMAP_PEX_WIN_L(win_num));
412 write_paged_register(pcie, 0, PAB_PEX_AMAP_PEX_WIN_H(win_num));
413} 493}
414 494
415/* 495/*
416 * routine to program the outbound windows 496 * routine to program the outbound windows
417 */ 497 */
418static void program_ob_windows(struct mobiveil_pcie *pcie, int win_num, 498static void program_ob_windows(struct mobiveil_pcie *pcie, int win_num,
419 u64 cpu_addr, u64 pci_addr, u32 config_io_bit, u64 size) 499 u64 cpu_addr, u64 pci_addr, u32 type, u64 size)
420{ 500{
421 501 u32 value;
422 u32 value, type;
423 u64 size64 = ~(size - 1); 502 u64 size64 = ~(size - 1);
424 503
425 if ((pcie->ob_wins_configured + 1) > pcie->apio_wins) { 504 if (win_num >= pcie->apio_wins) {
426 dev_err(&pcie->pdev->dev, 505 dev_err(&pcie->pdev->dev,
427 "ERROR: max outbound windows reached !\n"); 506 "ERROR: max outbound windows reached !\n");
428 return; 507 return;
@@ -432,28 +511,27 @@ static void program_ob_windows(struct mobiveil_pcie *pcie, int win_num,
432 * program Enable Bit to 1, Type Bit to (00) base 2, AXI Window Size Bit 511 * program Enable Bit to 1, Type Bit to (00) base 2, AXI Window Size Bit
433 * to 4 KB in PAB_AXI_AMAP_CTRL register 512 * to 4 KB in PAB_AXI_AMAP_CTRL register
434 */ 513 */
435 type = config_io_bit;
436 value = csr_readl(pcie, PAB_AXI_AMAP_CTRL(win_num)); 514 value = csr_readl(pcie, PAB_AXI_AMAP_CTRL(win_num));
437 csr_writel(pcie, 1 << WIN_ENABLE_SHIFT | type << WIN_TYPE_SHIFT | 515 value &= ~(WIN_TYPE_MASK << WIN_TYPE_SHIFT | WIN_SIZE_MASK);
438 lower_32_bits(size64), PAB_AXI_AMAP_CTRL(win_num)); 516 value |= 1 << WIN_ENABLE_SHIFT | type << WIN_TYPE_SHIFT |
517 (lower_32_bits(size64) & WIN_SIZE_MASK);
518 csr_writel(pcie, value, PAB_AXI_AMAP_CTRL(win_num));
439 519
440 write_paged_register(pcie, upper_32_bits(size64), 520 csr_writel(pcie, upper_32_bits(size64), PAB_EXT_AXI_AMAP_SIZE(win_num));
441 PAB_EXT_AXI_AMAP_SIZE(win_num));
442 521
443 /* 522 /*
444 * program AXI window base with appropriate value in 523 * program AXI window base with appropriate value in
445 * PAB_AXI_AMAP_AXI_WIN0 register 524 * PAB_AXI_AMAP_AXI_WIN0 register
446 */ 525 */
447 value = csr_readl(pcie, PAB_AXI_AMAP_AXI_WIN(win_num)); 526 csr_writel(pcie, lower_32_bits(cpu_addr) & (~AXI_WINDOW_ALIGN_MASK),
448 csr_writel(pcie, cpu_addr & (~AXI_WINDOW_ALIGN_MASK), 527 PAB_AXI_AMAP_AXI_WIN(win_num));
449 PAB_AXI_AMAP_AXI_WIN(win_num)); 528 csr_writel(pcie, upper_32_bits(cpu_addr),
450 529 PAB_EXT_AXI_AMAP_AXI_WIN(win_num));
451 value = csr_readl(pcie, PAB_AXI_AMAP_PEX_WIN_H(win_num));
452 530
453 csr_writel(pcie, lower_32_bits(pci_addr), 531 csr_writel(pcie, lower_32_bits(pci_addr),
454 PAB_AXI_AMAP_PEX_WIN_L(win_num)); 532 PAB_AXI_AMAP_PEX_WIN_L(win_num));
455 csr_writel(pcie, upper_32_bits(pci_addr), 533 csr_writel(pcie, upper_32_bits(pci_addr),
456 PAB_AXI_AMAP_PEX_WIN_H(win_num)); 534 PAB_AXI_AMAP_PEX_WIN_H(win_num));
457 535
458 pcie->ob_wins_configured++; 536 pcie->ob_wins_configured++;
459} 537}
@@ -469,7 +547,9 @@ static int mobiveil_bringup_link(struct mobiveil_pcie *pcie)
469 547
470 usleep_range(LINK_WAIT_MIN, LINK_WAIT_MAX); 548 usleep_range(LINK_WAIT_MIN, LINK_WAIT_MAX);
471 } 549 }
550
472 dev_err(&pcie->pdev->dev, "link never came up\n"); 551 dev_err(&pcie->pdev->dev, "link never came up\n");
552
473 return -ETIMEDOUT; 553 return -ETIMEDOUT;
474} 554}
475 555
@@ -482,50 +562,55 @@ static void mobiveil_pcie_enable_msi(struct mobiveil_pcie *pcie)
482 msi->msi_pages_phys = (phys_addr_t)msg_addr; 562 msi->msi_pages_phys = (phys_addr_t)msg_addr;
483 563
484 writel_relaxed(lower_32_bits(msg_addr), 564 writel_relaxed(lower_32_bits(msg_addr),
485 pcie->apb_csr_base + MSI_BASE_LO_OFFSET); 565 pcie->apb_csr_base + MSI_BASE_LO_OFFSET);
486 writel_relaxed(upper_32_bits(msg_addr), 566 writel_relaxed(upper_32_bits(msg_addr),
487 pcie->apb_csr_base + MSI_BASE_HI_OFFSET); 567 pcie->apb_csr_base + MSI_BASE_HI_OFFSET);
488 writel_relaxed(4096, pcie->apb_csr_base + MSI_SIZE_OFFSET); 568 writel_relaxed(4096, pcie->apb_csr_base + MSI_SIZE_OFFSET);
489 writel_relaxed(1, pcie->apb_csr_base + MSI_ENABLE_OFFSET); 569 writel_relaxed(1, pcie->apb_csr_base + MSI_ENABLE_OFFSET);
490} 570}
491 571
492static int mobiveil_host_init(struct mobiveil_pcie *pcie) 572static int mobiveil_host_init(struct mobiveil_pcie *pcie)
493{ 573{
494 u32 value, pab_ctrl, type = 0; 574 u32 value, pab_ctrl, type;
495 int err; 575 struct resource_entry *win;
496 struct resource_entry *win, *tmp; 576
497 577 /* setup bus numbers */
498 err = mobiveil_bringup_link(pcie); 578 value = csr_readl(pcie, PCI_PRIMARY_BUS);
499 if (err) { 579 value &= 0xff000000;
500 dev_info(&pcie->pdev->dev, "link bring-up failed\n"); 580 value |= 0x00ff0100;
501 return err; 581 csr_writel(pcie, value, PCI_PRIMARY_BUS);
502 }
503 582
504 /* 583 /*
505 * program Bus Master Enable Bit in Command Register in PAB Config 584 * program Bus Master Enable Bit in Command Register in PAB Config
506 * Space 585 * Space
507 */ 586 */
508 value = csr_readl(pcie, PCI_COMMAND); 587 value = csr_readl(pcie, PCI_COMMAND);
509 csr_writel(pcie, value | PCI_COMMAND_IO | PCI_COMMAND_MEMORY | 588 value |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
510 PCI_COMMAND_MASTER, PCI_COMMAND); 589 csr_writel(pcie, value, PCI_COMMAND);
511 590
512 /* 591 /*
513 * program PIO Enable Bit to 1 (and PEX PIO Enable to 1) in PAB_CTRL 592 * program PIO Enable Bit to 1 (and PEX PIO Enable to 1) in PAB_CTRL
514 * register 593 * register
515 */ 594 */
516 pab_ctrl = csr_readl(pcie, PAB_CTRL); 595 pab_ctrl = csr_readl(pcie, PAB_CTRL);
517 csr_writel(pcie, pab_ctrl | (1 << AMBA_PIO_ENABLE_SHIFT) | 596 pab_ctrl |= (1 << AMBA_PIO_ENABLE_SHIFT) | (1 << PEX_PIO_ENABLE_SHIFT);
518 (1 << PEX_PIO_ENABLE_SHIFT), PAB_CTRL); 597 csr_writel(pcie, pab_ctrl, PAB_CTRL);
519 598
520 csr_writel(pcie, (PAB_INTP_INTX_MASK | PAB_INTP_MSI_MASK), 599 csr_writel(pcie, (PAB_INTP_INTX_MASK | PAB_INTP_MSI_MASK),
521 PAB_INTP_AMBA_MISC_ENB); 600 PAB_INTP_AMBA_MISC_ENB);
522 601
523 /* 602 /*
524 * program PIO Enable Bit to 1 and Config Window Enable Bit to 1 in 603 * program PIO Enable Bit to 1 and Config Window Enable Bit to 1 in
525 * PAB_AXI_PIO_CTRL Register 604 * PAB_AXI_PIO_CTRL Register
526 */ 605 */
527 value = csr_readl(pcie, PAB_AXI_PIO_CTRL); 606 value = csr_readl(pcie, PAB_AXI_PIO_CTRL);
528 csr_writel(pcie, value | APIO_EN_MASK, PAB_AXI_PIO_CTRL); 607 value |= APIO_EN_MASK;
608 csr_writel(pcie, value, PAB_AXI_PIO_CTRL);
609
610 /* Enable PCIe PIO master */
611 value = csr_readl(pcie, PAB_PEX_PIO_CTRL);
612 value |= 1 << PIO_ENABLE_SHIFT;
613 csr_writel(pcie, value, PAB_PEX_PIO_CTRL);
529 614
530 /* 615 /*
531 * we'll program one outbound window for config reads and 616 * we'll program one outbound window for config reads and
@@ -535,32 +620,38 @@ static int mobiveil_host_init(struct mobiveil_pcie *pcie)
535 */ 620 */
536 621
537 /* config outbound translation window */ 622 /* config outbound translation window */
538 program_ob_windows(pcie, pcie->ob_wins_configured, 623 program_ob_windows(pcie, WIN_NUM_0, pcie->ob_io_res->start, 0,
539 pcie->ob_io_res->start, 0, CFG_WINDOW_TYPE, 624 CFG_WINDOW_TYPE, resource_size(pcie->ob_io_res));
540 resource_size(pcie->ob_io_res));
541 625
542 /* memory inbound translation window */ 626 /* memory inbound translation window */
543 program_ib_windows(pcie, WIN_NUM_1, 0, MEM_WINDOW_TYPE, IB_WIN_SIZE); 627 program_ib_windows(pcie, WIN_NUM_0, 0, MEM_WINDOW_TYPE, IB_WIN_SIZE);
544 628
545 /* Get the I/O and memory ranges from DT */ 629 /* Get the I/O and memory ranges from DT */
546 resource_list_for_each_entry_safe(win, tmp, &pcie->resources) { 630 resource_list_for_each_entry(win, &pcie->resources) {
547 type = 0;
548 if (resource_type(win->res) == IORESOURCE_MEM) 631 if (resource_type(win->res) == IORESOURCE_MEM)
549 type = MEM_WINDOW_TYPE; 632 type = MEM_WINDOW_TYPE;
550 if (resource_type(win->res) == IORESOURCE_IO) 633 else if (resource_type(win->res) == IORESOURCE_IO)
551 type = IO_WINDOW_TYPE; 634 type = IO_WINDOW_TYPE;
552 if (type) { 635 else
553 /* configure outbound translation window */ 636 continue;
554 program_ob_windows(pcie, pcie->ob_wins_configured, 637
555 win->res->start, 0, type, 638 /* configure outbound translation window */
556 resource_size(win->res)); 639 program_ob_windows(pcie, pcie->ob_wins_configured,
557 } 640 win->res->start,
641 win->res->start - win->offset,
642 type, resource_size(win->res));
558 } 643 }
559 644
645 /* fixup for PCIe class register */
646 value = csr_readl(pcie, PAB_INTP_AXI_PIO_CLASS);
647 value &= 0xff;
648 value |= (PCI_CLASS_BRIDGE_PCI << 16);
649 csr_writel(pcie, value, PAB_INTP_AXI_PIO_CLASS);
650
560 /* setup MSI hardware registers */ 651 /* setup MSI hardware registers */
561 mobiveil_pcie_enable_msi(pcie); 652 mobiveil_pcie_enable_msi(pcie);
562 653
563 return err; 654 return 0;
564} 655}
565 656
566static void mobiveil_mask_intx_irq(struct irq_data *data) 657static void mobiveil_mask_intx_irq(struct irq_data *data)
@@ -574,7 +665,8 @@ static void mobiveil_mask_intx_irq(struct irq_data *data)
574 mask = 1 << ((data->hwirq + PAB_INTX_START) - 1); 665 mask = 1 << ((data->hwirq + PAB_INTX_START) - 1);
575 raw_spin_lock_irqsave(&pcie->intx_mask_lock, flags); 666 raw_spin_lock_irqsave(&pcie->intx_mask_lock, flags);
576 shifted_val = csr_readl(pcie, PAB_INTP_AMBA_MISC_ENB); 667 shifted_val = csr_readl(pcie, PAB_INTP_AMBA_MISC_ENB);
577 csr_writel(pcie, (shifted_val & (~mask)), PAB_INTP_AMBA_MISC_ENB); 668 shifted_val &= ~mask;
669 csr_writel(pcie, shifted_val, PAB_INTP_AMBA_MISC_ENB);
578 raw_spin_unlock_irqrestore(&pcie->intx_mask_lock, flags); 670 raw_spin_unlock_irqrestore(&pcie->intx_mask_lock, flags);
579} 671}
580 672
@@ -589,7 +681,8 @@ static void mobiveil_unmask_intx_irq(struct irq_data *data)
589 mask = 1 << ((data->hwirq + PAB_INTX_START) - 1); 681 mask = 1 << ((data->hwirq + PAB_INTX_START) - 1);
590 raw_spin_lock_irqsave(&pcie->intx_mask_lock, flags); 682 raw_spin_lock_irqsave(&pcie->intx_mask_lock, flags);
591 shifted_val = csr_readl(pcie, PAB_INTP_AMBA_MISC_ENB); 683 shifted_val = csr_readl(pcie, PAB_INTP_AMBA_MISC_ENB);
592 csr_writel(pcie, (shifted_val | mask), PAB_INTP_AMBA_MISC_ENB); 684 shifted_val |= mask;
685 csr_writel(pcie, shifted_val, PAB_INTP_AMBA_MISC_ENB);
593 raw_spin_unlock_irqrestore(&pcie->intx_mask_lock, flags); 686 raw_spin_unlock_irqrestore(&pcie->intx_mask_lock, flags);
594} 687}
595 688
@@ -603,10 +696,11 @@ static struct irq_chip intx_irq_chip = {
603 696
604/* routine to setup the INTx related data */ 697/* routine to setup the INTx related data */
605static int mobiveil_pcie_intx_map(struct irq_domain *domain, unsigned int irq, 698static int mobiveil_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
606 irq_hw_number_t hwirq) 699 irq_hw_number_t hwirq)
607{ 700{
608 irq_set_chip_and_handler(irq, &intx_irq_chip, handle_level_irq); 701 irq_set_chip_and_handler(irq, &intx_irq_chip, handle_level_irq);
609 irq_set_chip_data(irq, domain->host_data); 702 irq_set_chip_data(irq, domain->host_data);
703
610 return 0; 704 return 0;
611} 705}
612 706
@@ -623,7 +717,7 @@ static struct irq_chip mobiveil_msi_irq_chip = {
623 717
624static struct msi_domain_info mobiveil_msi_domain_info = { 718static struct msi_domain_info mobiveil_msi_domain_info = {
625 .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | 719 .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
626 MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX), 720 MSI_FLAG_PCI_MSIX),
627 .chip = &mobiveil_msi_irq_chip, 721 .chip = &mobiveil_msi_irq_chip,
628}; 722};
629 723
@@ -641,7 +735,7 @@ static void mobiveil_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
641} 735}
642 736
643static int mobiveil_msi_set_affinity(struct irq_data *irq_data, 737static int mobiveil_msi_set_affinity(struct irq_data *irq_data,
644 const struct cpumask *mask, bool force) 738 const struct cpumask *mask, bool force)
645{ 739{
646 return -EINVAL; 740 return -EINVAL;
647} 741}
@@ -653,7 +747,8 @@ static struct irq_chip mobiveil_msi_bottom_irq_chip = {
653}; 747};
654 748
655static int mobiveil_irq_msi_domain_alloc(struct irq_domain *domain, 749static int mobiveil_irq_msi_domain_alloc(struct irq_domain *domain,
656 unsigned int virq, unsigned int nr_irqs, void *args) 750 unsigned int virq,
751 unsigned int nr_irqs, void *args)
657{ 752{
658 struct mobiveil_pcie *pcie = domain->host_data; 753 struct mobiveil_pcie *pcie = domain->host_data;
659 struct mobiveil_msi *msi = &pcie->msi; 754 struct mobiveil_msi *msi = &pcie->msi;
@@ -673,13 +768,13 @@ static int mobiveil_irq_msi_domain_alloc(struct irq_domain *domain,
673 mutex_unlock(&msi->lock); 768 mutex_unlock(&msi->lock);
674 769
675 irq_domain_set_info(domain, virq, bit, &mobiveil_msi_bottom_irq_chip, 770 irq_domain_set_info(domain, virq, bit, &mobiveil_msi_bottom_irq_chip,
676 domain->host_data, handle_level_irq, 771 domain->host_data, handle_level_irq, NULL, NULL);
677 NULL, NULL);
678 return 0; 772 return 0;
679} 773}
680 774
681static void mobiveil_irq_msi_domain_free(struct irq_domain *domain, 775static void mobiveil_irq_msi_domain_free(struct irq_domain *domain,
682 unsigned int virq, unsigned int nr_irqs) 776 unsigned int virq,
777 unsigned int nr_irqs)
683{ 778{
684 struct irq_data *d = irq_domain_get_irq_data(domain, virq); 779 struct irq_data *d = irq_domain_get_irq_data(domain, virq);
685 struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(d); 780 struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(d);
@@ -687,12 +782,11 @@ static void mobiveil_irq_msi_domain_free(struct irq_domain *domain,
687 782
688 mutex_lock(&msi->lock); 783 mutex_lock(&msi->lock);
689 784
690 if (!test_bit(d->hwirq, msi->msi_irq_in_use)) { 785 if (!test_bit(d->hwirq, msi->msi_irq_in_use))
691 dev_err(&pcie->pdev->dev, "trying to free unused MSI#%lu\n", 786 dev_err(&pcie->pdev->dev, "trying to free unused MSI#%lu\n",
692 d->hwirq); 787 d->hwirq);
693 } else { 788 else
694 __clear_bit(d->hwirq, msi->msi_irq_in_use); 789 __clear_bit(d->hwirq, msi->msi_irq_in_use);
695 }
696 790
697 mutex_unlock(&msi->lock); 791 mutex_unlock(&msi->lock);
698} 792}
@@ -716,12 +810,14 @@ static int mobiveil_allocate_msi_domains(struct mobiveil_pcie *pcie)
716 } 810 }
717 811
718 msi->msi_domain = pci_msi_create_irq_domain(fwnode, 812 msi->msi_domain = pci_msi_create_irq_domain(fwnode,
719 &mobiveil_msi_domain_info, msi->dev_domain); 813 &mobiveil_msi_domain_info,
814 msi->dev_domain);
720 if (!msi->msi_domain) { 815 if (!msi->msi_domain) {
721 dev_err(dev, "failed to create MSI domain\n"); 816 dev_err(dev, "failed to create MSI domain\n");
722 irq_domain_remove(msi->dev_domain); 817 irq_domain_remove(msi->dev_domain);
723 return -ENOMEM; 818 return -ENOMEM;
724 } 819 }
820
725 return 0; 821 return 0;
726} 822}
727 823
@@ -732,12 +828,12 @@ static int mobiveil_pcie_init_irq_domain(struct mobiveil_pcie *pcie)
732 int ret; 828 int ret;
733 829
734 /* setup INTx */ 830 /* setup INTx */
735 pcie->intx_domain = irq_domain_add_linear(node, 831 pcie->intx_domain = irq_domain_add_linear(node, PCI_NUM_INTX,
736 PCI_NUM_INTX, &intx_domain_ops, pcie); 832 &intx_domain_ops, pcie);
737 833
738 if (!pcie->intx_domain) { 834 if (!pcie->intx_domain) {
739 dev_err(dev, "Failed to get a INTx IRQ domain\n"); 835 dev_err(dev, "Failed to get a INTx IRQ domain\n");
740 return -ENODEV; 836 return -ENOMEM;
741 } 837 }
742 838
743 raw_spin_lock_init(&pcie->intx_mask_lock); 839 raw_spin_lock_init(&pcie->intx_mask_lock);
@@ -763,11 +859,9 @@ static int mobiveil_pcie_probe(struct platform_device *pdev)
763 /* allocate the PCIe port */ 859 /* allocate the PCIe port */
764 bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie)); 860 bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
765 if (!bridge) 861 if (!bridge)
766 return -ENODEV; 862 return -ENOMEM;
767 863
768 pcie = pci_host_bridge_priv(bridge); 864 pcie = pci_host_bridge_priv(bridge);
769 if (!pcie)
770 return -ENOMEM;
771 865
772 pcie->pdev = pdev; 866 pcie->pdev = pdev;
773 867
@@ -784,7 +878,7 @@ static int mobiveil_pcie_probe(struct platform_device *pdev)
784 &pcie->resources, &iobase); 878 &pcie->resources, &iobase);
785 if (ret) { 879 if (ret) {
786 dev_err(dev, "Getting bridge resources failed\n"); 880 dev_err(dev, "Getting bridge resources failed\n");
787 return -ENOMEM; 881 return ret;
788 } 882 }
789 883
790 /* 884 /*
@@ -797,9 +891,6 @@ static int mobiveil_pcie_probe(struct platform_device *pdev)
797 goto error; 891 goto error;
798 } 892 }
799 893
800 /* fixup for PCIe class register */
801 csr_writel(pcie, 0x060402ab, PAB_INTP_AXI_PIO_CLASS);
802
803 /* initialize the IRQ domains */ 894 /* initialize the IRQ domains */
804 ret = mobiveil_pcie_init_irq_domain(pcie); 895 ret = mobiveil_pcie_init_irq_domain(pcie);
805 if (ret) { 896 if (ret) {
@@ -807,6 +898,8 @@ static int mobiveil_pcie_probe(struct platform_device *pdev)
807 goto error; 898 goto error;
808 } 899 }
809 900
901 irq_set_chained_handler_and_data(pcie->irq, mobiveil_pcie_isr, pcie);
902
810 ret = devm_request_pci_bus_resources(dev, &pcie->resources); 903 ret = devm_request_pci_bus_resources(dev, &pcie->resources);
811 if (ret) 904 if (ret)
812 goto error; 905 goto error;
@@ -820,6 +913,12 @@ static int mobiveil_pcie_probe(struct platform_device *pdev)
820 bridge->map_irq = of_irq_parse_and_map_pci; 913 bridge->map_irq = of_irq_parse_and_map_pci;
821 bridge->swizzle_irq = pci_common_swizzle; 914 bridge->swizzle_irq = pci_common_swizzle;
822 915
916 ret = mobiveil_bringup_link(pcie);
917 if (ret) {
918 dev_info(dev, "link bring-up failed\n");
919 goto error;
920 }
921
823 /* setup the kernel resources for the newly added PCIe root bus */ 922 /* setup the kernel resources for the newly added PCIe root bus */
824 ret = pci_scan_root_bus_bridge(bridge); 923 ret = pci_scan_root_bus_bridge(bridge);
825 if (ret) 924 if (ret)
@@ -848,10 +947,10 @@ MODULE_DEVICE_TABLE(of, mobiveil_pcie_of_match);
848static struct platform_driver mobiveil_pcie_driver = { 947static struct platform_driver mobiveil_pcie_driver = {
849 .probe = mobiveil_pcie_probe, 948 .probe = mobiveil_pcie_probe,
850 .driver = { 949 .driver = {
851 .name = "mobiveil-pcie", 950 .name = "mobiveil-pcie",
852 .of_match_table = mobiveil_pcie_of_match, 951 .of_match_table = mobiveil_pcie_of_match,
853 .suppress_bind_attrs = true, 952 .suppress_bind_attrs = true,
854 }, 953 },
855}; 954};
856 955
857builtin_platform_driver(mobiveil_pcie_driver); 956builtin_platform_driver(mobiveil_pcie_driver);
diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c
index 3b031f00a94a..45c0f344ccd1 100644
--- a/drivers/pci/controller/pcie-xilinx-nwl.c
+++ b/drivers/pci/controller/pcie-xilinx-nwl.c
@@ -482,15 +482,13 @@ static int nwl_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
482 int i; 482 int i;
483 483
484 mutex_lock(&msi->lock); 484 mutex_lock(&msi->lock);
485 bit = bitmap_find_next_zero_area(msi->bitmap, INT_PCI_MSI_NR, 0, 485 bit = bitmap_find_free_region(msi->bitmap, INT_PCI_MSI_NR,
486 nr_irqs, 0); 486 get_count_order(nr_irqs));
487 if (bit >= INT_PCI_MSI_NR) { 487 if (bit < 0) {
488 mutex_unlock(&msi->lock); 488 mutex_unlock(&msi->lock);
489 return -ENOSPC; 489 return -ENOSPC;
490 } 490 }
491 491
492 bitmap_set(msi->bitmap, bit, nr_irqs);
493
494 for (i = 0; i < nr_irqs; i++) { 492 for (i = 0; i < nr_irqs; i++) {
495 irq_domain_set_info(domain, virq + i, bit + i, &nwl_irq_chip, 493 irq_domain_set_info(domain, virq + i, bit + i, &nwl_irq_chip,
496 domain->host_data, handle_simple_irq, 494 domain->host_data, handle_simple_irq,
@@ -508,7 +506,8 @@ static void nwl_irq_domain_free(struct irq_domain *domain, unsigned int virq,
508 struct nwl_msi *msi = &pcie->msi; 506 struct nwl_msi *msi = &pcie->msi;
509 507
510 mutex_lock(&msi->lock); 508 mutex_lock(&msi->lock);
511 bitmap_clear(msi->bitmap, data->hwirq, nr_irqs); 509 bitmap_release_region(msi->bitmap, data->hwirq,
510 get_count_order(nr_irqs));
512 mutex_unlock(&msi->lock); 511 mutex_unlock(&msi->lock);
513} 512}
514 513
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index 999a5509e57e..4575e0c6dc4b 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -627,7 +627,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
627 * 32-bit resources. __pci_assign_resource() enforces that 627 * 32-bit resources. __pci_assign_resource() enforces that
628 * artificial restriction to make sure everything will fit. 628 * artificial restriction to make sure everything will fit.
629 * 629 *
630 * The only way we could use a 64-bit non-prefechable MEMBAR is 630 * The only way we could use a 64-bit non-prefetchable MEMBAR is
631 * if its address is <4GB so that we can convert it to a 32-bit 631 * if its address is <4GB so that we can convert it to a 32-bit
632 * resource. To be visible to the host OS, all VMD endpoints must 632 * resource. To be visible to the host OS, all VMD endpoints must
633 * be initially configured by platform BIOS, which includes setting 633 * be initially configured by platform BIOS, which includes setting
diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
index 27806987e93b..1cfe3687a211 100644
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -381,15 +381,15 @@ static void pci_epf_test_unbind(struct pci_epf *epf)
381 epf_bar = &epf->bar[bar]; 381 epf_bar = &epf->bar[bar];
382 382
383 if (epf_test->reg[bar]) { 383 if (epf_test->reg[bar]) {
384 pci_epf_free_space(epf, epf_test->reg[bar], bar);
385 pci_epc_clear_bar(epc, epf->func_no, epf_bar); 384 pci_epc_clear_bar(epc, epf->func_no, epf_bar);
385 pci_epf_free_space(epf, epf_test->reg[bar], bar);
386 } 386 }
387 } 387 }
388} 388}
389 389
390static int pci_epf_test_set_bar(struct pci_epf *epf) 390static int pci_epf_test_set_bar(struct pci_epf *epf)
391{ 391{
392 int bar; 392 int bar, add;
393 int ret; 393 int ret;
394 struct pci_epf_bar *epf_bar; 394 struct pci_epf_bar *epf_bar;
395 struct pci_epc *epc = epf->epc; 395 struct pci_epc *epc = epf->epc;
@@ -400,8 +400,14 @@ static int pci_epf_test_set_bar(struct pci_epf *epf)
400 400
401 epc_features = epf_test->epc_features; 401 epc_features = epf_test->epc_features;
402 402
403 for (bar = BAR_0; bar <= BAR_5; bar++) { 403 for (bar = BAR_0; bar <= BAR_5; bar += add) {
404 epf_bar = &epf->bar[bar]; 404 epf_bar = &epf->bar[bar];
405 /*
406 * pci_epc_set_bar() sets PCI_BASE_ADDRESS_MEM_TYPE_64
407 * if the specific implementation required a 64-bit BAR,
408 * even if we only requested a 32-bit BAR.
409 */
410 add = (epf_bar->flags & PCI_BASE_ADDRESS_MEM_TYPE_64) ? 2 : 1;
405 411
406 if (!!(epc_features->reserved_bar & (1 << bar))) 412 if (!!(epc_features->reserved_bar & (1 << bar)))
407 continue; 413 continue;
@@ -413,13 +419,6 @@ static int pci_epf_test_set_bar(struct pci_epf *epf)
413 if (bar == test_reg_bar) 419 if (bar == test_reg_bar)
414 return ret; 420 return ret;
415 } 421 }
416 /*
417 * pci_epc_set_bar() sets PCI_BASE_ADDRESS_MEM_TYPE_64
418 * if the specific implementation required a 64-bit BAR,
419 * even if we only requested a 32-bit BAR.
420 */
421 if (epf_bar->flags & PCI_BASE_ADDRESS_MEM_TYPE_64)
422 bar++;
423 } 422 }
424 423
425 return 0; 424 return 0;
@@ -431,13 +430,19 @@ static int pci_epf_test_alloc_space(struct pci_epf *epf)
431 struct device *dev = &epf->dev; 430 struct device *dev = &epf->dev;
432 struct pci_epf_bar *epf_bar; 431 struct pci_epf_bar *epf_bar;
433 void *base; 432 void *base;
434 int bar; 433 int bar, add;
435 enum pci_barno test_reg_bar = epf_test->test_reg_bar; 434 enum pci_barno test_reg_bar = epf_test->test_reg_bar;
436 const struct pci_epc_features *epc_features; 435 const struct pci_epc_features *epc_features;
436 size_t test_reg_size;
437 437
438 epc_features = epf_test->epc_features; 438 epc_features = epf_test->epc_features;
439 439
440 base = pci_epf_alloc_space(epf, sizeof(struct pci_epf_test_reg), 440 if (epc_features->bar_fixed_size[test_reg_bar])
441 test_reg_size = bar_size[test_reg_bar];
442 else
443 test_reg_size = sizeof(struct pci_epf_test_reg);
444
445 base = pci_epf_alloc_space(epf, test_reg_size,
441 test_reg_bar, epc_features->align); 446 test_reg_bar, epc_features->align);
442 if (!base) { 447 if (!base) {
443 dev_err(dev, "Failed to allocated register space\n"); 448 dev_err(dev, "Failed to allocated register space\n");
@@ -445,8 +450,10 @@ static int pci_epf_test_alloc_space(struct pci_epf *epf)
445 } 450 }
446 epf_test->reg[test_reg_bar] = base; 451 epf_test->reg[test_reg_bar] = base;
447 452
448 for (bar = BAR_0; bar <= BAR_5; bar++) { 453 for (bar = BAR_0; bar <= BAR_5; bar += add) {
449 epf_bar = &epf->bar[bar]; 454 epf_bar = &epf->bar[bar];
455 add = (epf_bar->flags & PCI_BASE_ADDRESS_MEM_TYPE_64) ? 2 : 1;
456
450 if (bar == test_reg_bar) 457 if (bar == test_reg_bar)
451 continue; 458 continue;
452 459
@@ -459,8 +466,6 @@ static int pci_epf_test_alloc_space(struct pci_epf *epf)
459 dev_err(dev, "Failed to allocate space for BAR%d\n", 466 dev_err(dev, "Failed to allocate space for BAR%d\n",
460 bar); 467 bar);
461 epf_test->reg[bar] = base; 468 epf_test->reg[bar] = base;
462 if (epf_bar->flags & PCI_BASE_ADDRESS_MEM_TYPE_64)
463 bar++;
464 } 469 }
465 470
466 return 0; 471 return 0;
diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index e4712a0f249c..2091508c1620 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -519,11 +519,12 @@ void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf)
519{ 519{
520 unsigned long flags; 520 unsigned long flags;
521 521
522 if (!epc || IS_ERR(epc)) 522 if (!epc || IS_ERR(epc) || !epf)
523 return; 523 return;
524 524
525 spin_lock_irqsave(&epc->lock, flags); 525 spin_lock_irqsave(&epc->lock, flags);
526 list_del(&epf->list); 526 list_del(&epf->list);
527 epf->epc = NULL;
527 spin_unlock_irqrestore(&epc->lock, flags); 528 spin_unlock_irqrestore(&epc->lock, flags);
528} 529}
529EXPORT_SYMBOL_GPL(pci_epc_remove_epf); 530EXPORT_SYMBOL_GPL(pci_epc_remove_epf);
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 3aa115ed3a65..525fd3f272b3 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -132,8 +132,6 @@ static void pci_read_vf_config_common(struct pci_dev *virtfn)
132 &physfn->sriov->subsystem_vendor); 132 &physfn->sriov->subsystem_vendor);
133 pci_read_config_word(virtfn, PCI_SUBSYSTEM_ID, 133 pci_read_config_word(virtfn, PCI_SUBSYSTEM_ID,
134 &physfn->sriov->subsystem_device); 134 &physfn->sriov->subsystem_device);
135
136 physfn->sriov->cfg_size = pci_cfg_space_size(virtfn);
137} 135}
138 136
139int pci_iov_add_virtfn(struct pci_dev *dev, int id) 137int pci_iov_add_virtfn(struct pci_dev *dev, int id)
diff --git a/drivers/pci/mmap.c b/drivers/pci/mmap.c
index 24505b08de40..b8c9011987f4 100644
--- a/drivers/pci/mmap.c
+++ b/drivers/pci/mmap.c
@@ -73,7 +73,7 @@ int pci_mmap_resource_range(struct pci_dev *pdev, int bar,
73#elif defined(HAVE_PCI_MMAP) /* && !ARCH_GENERIC_PCI_MMAP_RESOURCE */ 73#elif defined(HAVE_PCI_MMAP) /* && !ARCH_GENERIC_PCI_MMAP_RESOURCE */
74 74
75/* 75/*
76 * Legacy setup: Impement pci_mmap_resource_range() as a wrapper around 76 * Legacy setup: Implement pci_mmap_resource_range() as a wrapper around
77 * the architecture's pci_mmap_page_range(), converting to "user visible" 77 * the architecture's pci_mmap_page_range(), converting to "user visible"
78 * addresses as necessary. 78 * addresses as necessary.
79 */ 79 */
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index e039b740fe74..59a6d232f77a 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -237,7 +237,7 @@ static void msi_set_mask_bit(struct irq_data *data, u32 flag)
237} 237}
238 238
239/** 239/**
240 * pci_msi_mask_irq - Generic irq chip callback to mask PCI/MSI interrupts 240 * pci_msi_mask_irq - Generic IRQ chip callback to mask PCI/MSI interrupts
241 * @data: pointer to irqdata associated to that interrupt 241 * @data: pointer to irqdata associated to that interrupt
242 */ 242 */
243void pci_msi_mask_irq(struct irq_data *data) 243void pci_msi_mask_irq(struct irq_data *data)
@@ -247,7 +247,7 @@ void pci_msi_mask_irq(struct irq_data *data)
247EXPORT_SYMBOL_GPL(pci_msi_mask_irq); 247EXPORT_SYMBOL_GPL(pci_msi_mask_irq);
248 248
249/** 249/**
250 * pci_msi_unmask_irq - Generic irq chip callback to unmask PCI/MSI interrupts 250 * pci_msi_unmask_irq - Generic IRQ chip callback to unmask PCI/MSI interrupts
251 * @data: pointer to irqdata associated to that interrupt 251 * @data: pointer to irqdata associated to that interrupt
252 */ 252 */
253void pci_msi_unmask_irq(struct irq_data *data) 253void pci_msi_unmask_irq(struct irq_data *data)
@@ -588,11 +588,11 @@ static int msi_verify_entries(struct pci_dev *dev)
588 * msi_capability_init - configure device's MSI capability structure 588 * msi_capability_init - configure device's MSI capability structure
589 * @dev: pointer to the pci_dev data structure of MSI device function 589 * @dev: pointer to the pci_dev data structure of MSI device function
590 * @nvec: number of interrupts to allocate 590 * @nvec: number of interrupts to allocate
591 * @affd: description of automatic irq affinity assignments (may be %NULL) 591 * @affd: description of automatic IRQ affinity assignments (may be %NULL)
592 * 592 *
593 * Setup the MSI capability structure of the device with the requested 593 * Setup the MSI capability structure of the device with the requested
594 * number of interrupts. A return value of zero indicates the successful 594 * number of interrupts. A return value of zero indicates the successful
595 * setup of an entry with the new MSI irq. A negative return value indicates 595 * setup of an entry with the new MSI IRQ. A negative return value indicates
596 * an error, and a positive return value indicates the number of interrupts 596 * an error, and a positive return value indicates the number of interrupts
597 * which could have been allocated. 597 * which could have been allocated.
598 */ 598 */
@@ -609,7 +609,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
609 if (!entry) 609 if (!entry)
610 return -ENOMEM; 610 return -ENOMEM;
611 611
612 /* All MSIs are unmasked by default, Mask them all */ 612 /* All MSIs are unmasked by default; mask them all */
613 mask = msi_mask(entry->msi_attrib.multi_cap); 613 mask = msi_mask(entry->msi_attrib.multi_cap);
614 msi_mask_irq(entry, mask, mask); 614 msi_mask_irq(entry, mask, mask);
615 615
@@ -637,7 +637,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
637 return ret; 637 return ret;
638 } 638 }
639 639
640 /* Set MSI enabled bits */ 640 /* Set MSI enabled bits */
641 pci_intx_for_msi(dev, 0); 641 pci_intx_for_msi(dev, 0);
642 pci_msi_set_enable(dev, 1); 642 pci_msi_set_enable(dev, 1);
643 dev->msi_enabled = 1; 643 dev->msi_enabled = 1;
@@ -729,11 +729,11 @@ static void msix_program_entries(struct pci_dev *dev,
729 * @dev: pointer to the pci_dev data structure of MSI-X device function 729 * @dev: pointer to the pci_dev data structure of MSI-X device function
730 * @entries: pointer to an array of struct msix_entry entries 730 * @entries: pointer to an array of struct msix_entry entries
731 * @nvec: number of @entries 731 * @nvec: number of @entries
732 * @affd: Optional pointer to enable automatic affinity assignement 732 * @affd: Optional pointer to enable automatic affinity assignment
733 * 733 *
734 * Setup the MSI-X capability structure of device function with a 734 * Setup the MSI-X capability structure of device function with a
735 * single MSI-X irq. A return of zero indicates the successful setup of 735 * single MSI-X IRQ. A return of zero indicates the successful setup of
736 * requested MSI-X entries with allocated irqs or non-zero for otherwise. 736 * requested MSI-X entries with allocated IRQs or non-zero for otherwise.
737 **/ 737 **/
738static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, 738static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
739 int nvec, struct irq_affinity *affd) 739 int nvec, struct irq_affinity *affd)
@@ -789,7 +789,7 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
789out_avail: 789out_avail:
790 if (ret < 0) { 790 if (ret < 0) {
791 /* 791 /*
792 * If we had some success, report the number of irqs 792 * If we had some success, report the number of IRQs
793 * we succeeded in setting up. 793 * we succeeded in setting up.
794 */ 794 */
795 struct msi_desc *entry; 795 struct msi_desc *entry;
@@ -812,7 +812,7 @@ out_free:
812/** 812/**
813 * pci_msi_supported - check whether MSI may be enabled on a device 813 * pci_msi_supported - check whether MSI may be enabled on a device
814 * @dev: pointer to the pci_dev data structure of MSI device function 814 * @dev: pointer to the pci_dev data structure of MSI device function
815 * @nvec: how many MSIs have been requested ? 815 * @nvec: how many MSIs have been requested?
816 * 816 *
817 * Look at global flags, the device itself, and its parent buses 817 * Look at global flags, the device itself, and its parent buses
818 * to determine if MSI/-X are supported for the device. If MSI/-X is 818 * to determine if MSI/-X are supported for the device. If MSI/-X is
@@ -896,7 +896,7 @@ static void pci_msi_shutdown(struct pci_dev *dev)
896 /* Keep cached state to be restored */ 896 /* Keep cached state to be restored */
897 __pci_msi_desc_mask_irq(desc, mask, ~mask); 897 __pci_msi_desc_mask_irq(desc, mask, ~mask);
898 898
899 /* Restore dev->irq to its default pin-assertion irq */ 899 /* Restore dev->irq to its default pin-assertion IRQ */
900 dev->irq = desc->msi_attrib.default_irq; 900 dev->irq = desc->msi_attrib.default_irq;
901 pcibios_alloc_irq(dev); 901 pcibios_alloc_irq(dev);
902} 902}
@@ -958,7 +958,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
958 } 958 }
959 } 959 }
960 960
961 /* Check whether driver already requested for MSI irq */ 961 /* Check whether driver already requested for MSI IRQ */
962 if (dev->msi_enabled) { 962 if (dev->msi_enabled) {
963 pci_info(dev, "can't enable MSI-X (MSI IRQ already assigned)\n"); 963 pci_info(dev, "can't enable MSI-X (MSI IRQ already assigned)\n");
964 return -EINVAL; 964 return -EINVAL;
@@ -1026,7 +1026,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
1026 if (!pci_msi_supported(dev, minvec)) 1026 if (!pci_msi_supported(dev, minvec))
1027 return -EINVAL; 1027 return -EINVAL;
1028 1028
1029 /* Check whether driver already requested MSI-X irqs */ 1029 /* Check whether driver already requested MSI-X IRQs */
1030 if (dev->msix_enabled) { 1030 if (dev->msix_enabled) {
1031 pci_info(dev, "can't enable MSI (MSI-X already enabled)\n"); 1031 pci_info(dev, "can't enable MSI (MSI-X already enabled)\n");
1032 return -EINVAL; 1032 return -EINVAL;
@@ -1113,8 +1113,8 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
1113 * pci_enable_msix_range - configure device's MSI-X capability structure 1113 * pci_enable_msix_range - configure device's MSI-X capability structure
1114 * @dev: pointer to the pci_dev data structure of MSI-X device function 1114 * @dev: pointer to the pci_dev data structure of MSI-X device function
1115 * @entries: pointer to an array of MSI-X entries 1115 * @entries: pointer to an array of MSI-X entries
1116 * @minvec: minimum number of MSI-X irqs requested 1116 * @minvec: minimum number of MSI-X IRQs requested
1117 * @maxvec: maximum number of MSI-X irqs requested 1117 * @maxvec: maximum number of MSI-X IRQs requested
1118 * 1118 *
1119 * Setup the MSI-X capability structure of device function with a maximum 1119 * Setup the MSI-X capability structure of device function with a maximum
1120 * possible number of interrupts in the range between @minvec and @maxvec 1120 * possible number of interrupts in the range between @minvec and @maxvec
@@ -1179,7 +1179,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
1179 return msi_vecs; 1179 return msi_vecs;
1180 } 1180 }
1181 1181
1182 /* use legacy irq if allowed */ 1182 /* use legacy IRQ if allowed */
1183 if (flags & PCI_IRQ_LEGACY) { 1183 if (flags & PCI_IRQ_LEGACY) {
1184 if (min_vecs == 1 && dev->irq) { 1184 if (min_vecs == 1 && dev->irq) {
1185 /* 1185 /*
@@ -1248,7 +1248,7 @@ int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
1248EXPORT_SYMBOL(pci_irq_vector); 1248EXPORT_SYMBOL(pci_irq_vector);
1249 1249
1250/** 1250/**
1251 * pci_irq_get_affinity - return the affinity of a particular msi vector 1251 * pci_irq_get_affinity - return the affinity of a particular MSI vector
1252 * @dev: PCI device to operate on 1252 * @dev: PCI device to operate on
1253 * @nr: device-relative interrupt vector index (0-based). 1253 * @nr: device-relative interrupt vector index (0-based).
1254 */ 1254 */
@@ -1280,7 +1280,7 @@ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
1280EXPORT_SYMBOL(pci_irq_get_affinity); 1280EXPORT_SYMBOL(pci_irq_get_affinity);
1281 1281
1282/** 1282/**
1283 * pci_irq_get_node - return the numa node of a particular msi vector 1283 * pci_irq_get_node - return the NUMA node of a particular MSI vector
1284 * @pdev: PCI device to operate on 1284 * @pdev: PCI device to operate on
1285 * @vec: device-relative interrupt vector index (0-based). 1285 * @vec: device-relative interrupt vector index (0-based).
1286 */ 1286 */
@@ -1330,7 +1330,7 @@ void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg)
1330/** 1330/**
1331 * pci_msi_domain_calc_hwirq - Generate a unique ID for an MSI source 1331 * pci_msi_domain_calc_hwirq - Generate a unique ID for an MSI source
1332 * @dev: Pointer to the PCI device 1332 * @dev: Pointer to the PCI device
1333 * @desc: Pointer to the msi descriptor 1333 * @desc: Pointer to the MSI descriptor
1334 * 1334 *
1335 * The ID number is only used within the irqdomain. 1335 * The ID number is only used within the irqdomain.
1336 */ 1336 */
@@ -1348,7 +1348,8 @@ static inline bool pci_msi_desc_is_multi_msi(struct msi_desc *desc)
1348} 1348}
1349 1349
1350/** 1350/**
1351 * pci_msi_domain_check_cap - Verify that @domain supports the capabilities for @dev 1351 * pci_msi_domain_check_cap - Verify that @domain supports the capabilities
1352 * for @dev
1352 * @domain: The interrupt domain to check 1353 * @domain: The interrupt domain to check
1353 * @info: The domain info for verification 1354 * @info: The domain info for verification
1354 * @dev: The device to check 1355 * @dev: The device to check
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index a3073ce16520..234476226529 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -195,7 +195,7 @@ EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource);
195 195
196/* 196/*
197 * Note this function returns the parent PCI device with a 197 * Note this function returns the parent PCI device with a
198 * reference taken. It is the caller's responsibily to drop 198 * reference taken. It is the caller's responsibility to drop
199 * the reference. 199 * the reference.
200 */ 200 */
201static struct pci_dev *find_parent_pci_dev(struct device *dev) 201static struct pci_dev *find_parent_pci_dev(struct device *dev)
@@ -355,7 +355,7 @@ static int upstream_bridge_distance(struct pci_dev *provider,
355 355
356 /* 356 /*
357 * Allow the connection if both devices are on a whitelisted root 357 * Allow the connection if both devices are on a whitelisted root
358 * complex, but add an arbitary large value to the distance. 358 * complex, but add an arbitrary large value to the distance.
359 */ 359 */
360 if (root_complex_whitelist(provider) && 360 if (root_complex_whitelist(provider) &&
361 root_complex_whitelist(client)) 361 root_complex_whitelist(client))
@@ -414,7 +414,7 @@ static int upstream_bridge_distance_warn(struct pci_dev *provider,
414} 414}
415 415
416/** 416/**
417 * pci_p2pdma_distance_many - Determive the cumulative distance between 417 * pci_p2pdma_distance_many - Determine the cumulative distance between
418 * a p2pdma provider and the clients in use. 418 * a p2pdma provider and the clients in use.
419 * @provider: p2pdma provider to check against the client list 419 * @provider: p2pdma provider to check against the client list
420 * @clients: array of devices to check (NULL-terminated) 420 * @clients: array of devices to check (NULL-terminated)
@@ -443,6 +443,14 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
443 return -1; 443 return -1;
444 444
445 for (i = 0; i < num_clients; i++) { 445 for (i = 0; i < num_clients; i++) {
446 if (IS_ENABLED(CONFIG_DMA_VIRT_OPS) &&
447 clients[i]->dma_ops == &dma_virt_ops) {
448 if (verbose)
449 dev_warn(clients[i],
450 "cannot be used for peer-to-peer DMA because the driver makes use of dma_virt_ops\n");
451 return -1;
452 }
453
446 pci_client = find_parent_pci_dev(clients[i]); 454 pci_client = find_parent_pci_dev(clients[i]);
447 if (!pci_client) { 455 if (!pci_client) {
448 if (verbose) 456 if (verbose)
@@ -721,7 +729,7 @@ int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
721 * p2pdma mappings are not compatible with devices that use 729 * p2pdma mappings are not compatible with devices that use
722 * dma_virt_ops. If the upper layers do the right thing 730 * dma_virt_ops. If the upper layers do the right thing
723 * this should never happen because it will be prevented 731 * this should never happen because it will be prevented
724 * by the check in pci_p2pdma_add_client() 732 * by the check in pci_p2pdma_distance_many()
725 */ 733 */
726 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_DMA_VIRT_OPS) && 734 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_DMA_VIRT_OPS) &&
727 dev->dma_ops == &dma_virt_ops)) 735 dev->dma_ops == &dma_virt_ops))
diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c
index 83fb077d0b41..06083b86d4f4 100644
--- a/drivers/pci/pci-bridge-emul.c
+++ b/drivers/pci/pci-bridge-emul.c
@@ -305,7 +305,7 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge,
305} 305}
306 306
307/* 307/*
308 * Cleanup a pci_bridge_emul structure that was previously initilized 308 * Cleanup a pci_bridge_emul structure that was previously initialized
309 * using pci_bridge_emul_init(). 309 * using pci_bridge_emul_init().
310 */ 310 */
311void pci_bridge_emul_cleanup(struct pci_bridge_emul *bridge) 311void pci_bridge_emul_cleanup(struct pci_bridge_emul *bridge)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 36dbe960306b..a8124e47bf6e 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -399,7 +399,8 @@ void __weak pcibios_free_irq(struct pci_dev *dev)
399#ifdef CONFIG_PCI_IOV 399#ifdef CONFIG_PCI_IOV
400static inline bool pci_device_can_probe(struct pci_dev *pdev) 400static inline bool pci_device_can_probe(struct pci_dev *pdev)
401{ 401{
402 return (!pdev->is_virtfn || pdev->physfn->sriov->drivers_autoprobe); 402 return (!pdev->is_virtfn || pdev->physfn->sriov->drivers_autoprobe ||
403 pdev->driver_override);
403} 404}
404#else 405#else
405static inline bool pci_device_can_probe(struct pci_dev *pdev) 406static inline bool pci_device_can_probe(struct pci_dev *pdev)
@@ -414,6 +415,9 @@ static int pci_device_probe(struct device *dev)
414 struct pci_dev *pci_dev = to_pci_dev(dev); 415 struct pci_dev *pci_dev = to_pci_dev(dev);
415 struct pci_driver *drv = to_pci_driver(dev->driver); 416 struct pci_driver *drv = to_pci_driver(dev->driver);
416 417
418 if (!pci_device_can_probe(pci_dev))
419 return -ENODEV;
420
417 pci_assign_irq(pci_dev); 421 pci_assign_irq(pci_dev);
418 422
419 error = pcibios_alloc_irq(pci_dev); 423 error = pcibios_alloc_irq(pci_dev);
@@ -421,12 +425,10 @@ static int pci_device_probe(struct device *dev)
421 return error; 425 return error;
422 426
423 pci_dev_get(pci_dev); 427 pci_dev_get(pci_dev);
424 if (pci_device_can_probe(pci_dev)) { 428 error = __pci_device_probe(drv, pci_dev);
425 error = __pci_device_probe(drv, pci_dev); 429 if (error) {
426 if (error) { 430 pcibios_free_irq(pci_dev);
427 pcibios_free_irq(pci_dev); 431 pci_dev_put(pci_dev);
428 pci_dev_put(pci_dev);
429 }
430 } 432 }
431 433
432 return error; 434 return error;
diff --git a/drivers/pci/pci-pf-stub.c b/drivers/pci/pci-pf-stub.c
index 9795649fc6f9..ef293e735c55 100644
--- a/drivers/pci/pci-pf-stub.c
+++ b/drivers/pci/pci-pf-stub.c
@@ -1,7 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/* pci-pf-stub - simple stub driver for PCI SR-IOV PF device 2/* pci-pf-stub - simple stub driver for PCI SR-IOV PF device
3 * 3 *
4 * This driver is meant to act as a "whitelist" for devices that provde 4 * This driver is meant to act as a "whitelist" for devices that provide
5 * SR-IOV functionality while at the same time not actually needing a 5 * SR-IOV functionality while at the same time not actually needing a
6 * driver of their own. 6 * driver of their own.
7 */ 7 */
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 6d27475e39b2..965c72104150 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -182,6 +182,9 @@ static ssize_t current_link_speed_show(struct device *dev,
182 return -EINVAL; 182 return -EINVAL;
183 183
184 switch (linkstat & PCI_EXP_LNKSTA_CLS) { 184 switch (linkstat & PCI_EXP_LNKSTA_CLS) {
185 case PCI_EXP_LNKSTA_CLS_32_0GB:
186 speed = "32 GT/s";
187 break;
185 case PCI_EXP_LNKSTA_CLS_16_0GB: 188 case PCI_EXP_LNKSTA_CLS_16_0GB:
186 speed = "16 GT/s"; 189 speed = "16 GT/s";
187 break; 190 break;
@@ -477,7 +480,7 @@ static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
477 pci_stop_and_remove_bus_device_locked(to_pci_dev(dev)); 480 pci_stop_and_remove_bus_device_locked(to_pci_dev(dev));
478 return count; 481 return count;
479} 482}
480static struct device_attribute dev_remove_attr = __ATTR(remove, 483static struct device_attribute dev_remove_attr = __ATTR_IGNORE_LOCKDEP(remove,
481 (S_IWUSR|S_IWGRP), 484 (S_IWUSR|S_IWGRP),
482 NULL, remove_store); 485 NULL, remove_store);
483 486
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b1f563916036..29ed5ec1ac27 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4535,7 +4535,7 @@ static int pci_af_flr(struct pci_dev *dev, int probe)
4535 4535
4536 /* 4536 /*
4537 * Wait for Transaction Pending bit to clear. A word-aligned test 4537 * Wait for Transaction Pending bit to clear. A word-aligned test
4538 * is used, so we use the conrol offset rather than status and shift 4538 * is used, so we use the control offset rather than status and shift
4539 * the test bit to match. 4539 * the test bit to match.
4540 */ 4540 */
4541 if (!pci_wait_for_pending(dev, pos + PCI_AF_CTRL, 4541 if (!pci_wait_for_pending(dev, pos + PCI_AF_CTRL,
@@ -5669,7 +5669,9 @@ enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev)
5669 */ 5669 */
5670 pcie_capability_read_dword(dev, PCI_EXP_LNKCAP2, &lnkcap2); 5670 pcie_capability_read_dword(dev, PCI_EXP_LNKCAP2, &lnkcap2);
5671 if (lnkcap2) { /* PCIe r3.0-compliant */ 5671 if (lnkcap2) { /* PCIe r3.0-compliant */
5672 if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_16_0GB) 5672 if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_32_0GB)
5673 return PCIE_SPEED_32_0GT;
5674 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_16_0GB)
5673 return PCIE_SPEED_16_0GT; 5675 return PCIE_SPEED_16_0GT;
5674 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB) 5676 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
5675 return PCIE_SPEED_8_0GT; 5677 return PCIE_SPEED_8_0GT;
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 5db6f985f16d..1be03a97cb92 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -298,7 +298,6 @@ struct pci_sriov {
298 u16 driver_max_VFs; /* Max num VFs driver supports */ 298 u16 driver_max_VFs; /* Max num VFs driver supports */
299 struct pci_dev *dev; /* Lowest numbered PF */ 299 struct pci_dev *dev; /* Lowest numbered PF */
300 struct pci_dev *self; /* This PF */ 300 struct pci_dev *self; /* This PF */
301 u32 cfg_size; /* VF config space size */
302 u32 class; /* VF device */ 301 u32 class; /* VF device */
303 u8 hdr_type; /* VF header type */ 302 u8 hdr_type; /* VF header type */
304 u16 subsystem_vendor; /* VF subsystem vendor */ 303 u16 subsystem_vendor; /* VF subsystem vendor */
diff --git a/drivers/pci/pcie/aer_inject.c b/drivers/pci/pcie/aer_inject.c
index 043b8b0cfcc5..6988fe7389b9 100644
--- a/drivers/pci/pcie/aer_inject.c
+++ b/drivers/pci/pcie/aer_inject.c
@@ -2,7 +2,7 @@
2/* 2/*
3 * PCIe AER software error injection support. 3 * PCIe AER software error injection support.
4 * 4 *
5 * Debuging PCIe AER code is quite difficult because it is hard to 5 * Debugging PCIe AER code is quite difficult because it is hard to
6 * trigger various real hardware errors. Software based error 6 * trigger various real hardware errors. Software based error
7 * injection can fake almost all kinds of errors with the help of a 7 * injection can fake almost all kinds of errors with the help of a
8 * user space helper tool aer-inject, which can be gotten from: 8 * user space helper tool aer-inject, which can be gotten from:
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index f9ef7ad3f75d..a3c7338fad86 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -668,7 +668,7 @@ const unsigned char pcie_link_speed[] = {
668 PCIE_SPEED_5_0GT, /* 2 */ 668 PCIE_SPEED_5_0GT, /* 2 */
669 PCIE_SPEED_8_0GT, /* 3 */ 669 PCIE_SPEED_8_0GT, /* 3 */
670 PCIE_SPEED_16_0GT, /* 4 */ 670 PCIE_SPEED_16_0GT, /* 4 */
671 PCI_SPEED_UNKNOWN, /* 5 */ 671 PCIE_SPEED_32_0GT, /* 5 */
672 PCI_SPEED_UNKNOWN, /* 6 */ 672 PCI_SPEED_UNKNOWN, /* 6 */
673 PCI_SPEED_UNKNOWN, /* 7 */ 673 PCI_SPEED_UNKNOWN, /* 7 */
674 PCI_SPEED_UNKNOWN, /* 8 */ 674 PCI_SPEED_UNKNOWN, /* 8 */
@@ -1555,17 +1555,6 @@ static int pci_cfg_space_size_ext(struct pci_dev *dev)
1555 return PCI_CFG_SPACE_EXP_SIZE; 1555 return PCI_CFG_SPACE_EXP_SIZE;
1556} 1556}
1557 1557
1558#ifdef CONFIG_PCI_IOV
1559static bool is_vf0(struct pci_dev *dev)
1560{
1561 if (pci_iov_virtfn_devfn(dev->physfn, 0) == dev->devfn &&
1562 pci_iov_virtfn_bus(dev->physfn, 0) == dev->bus->number)
1563 return true;
1564
1565 return false;
1566}
1567#endif
1568
1569int pci_cfg_space_size(struct pci_dev *dev) 1558int pci_cfg_space_size(struct pci_dev *dev)
1570{ 1559{
1571 int pos; 1560 int pos;
@@ -1573,9 +1562,18 @@ int pci_cfg_space_size(struct pci_dev *dev)
1573 u16 class; 1562 u16 class;
1574 1563
1575#ifdef CONFIG_PCI_IOV 1564#ifdef CONFIG_PCI_IOV
1576 /* Read cached value for all VFs except for VF0 */ 1565 /*
1577 if (dev->is_virtfn && !is_vf0(dev)) 1566 * Per the SR-IOV specification (rev 1.1, sec 3.5), VFs are required to
1578 return dev->physfn->sriov->cfg_size; 1567 * implement a PCIe capability and therefore must implement extended
1568 * config space. We can skip the NO_EXTCFG test below and the
1569 * reachability/aliasing test in pci_cfg_space_size_ext() by virtue of
1570 * the fact that the SR-IOV capability on the PF resides in extended
1571 * config space and must be accessible and non-aliased to have enabled
1572 * support for this VF. This is a micro performance optimization for
1573 * systems supporting many VFs.
1574 */
1575 if (dev->is_virtfn)
1576 return PCI_CFG_SPACE_EXP_SIZE;
1579#endif 1577#endif
1580 1578
1581 if (dev->bus->bus_flags & PCI_BUS_FLAGS_NO_EXTCFG) 1579 if (dev->bus->bus_flags & PCI_BUS_FLAGS_NO_EXTCFG)
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index 445b51db75b0..fe7fe678965b 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -377,7 +377,7 @@ static int show_device(struct seq_file *m, void *v)
377 } 377 }
378 seq_putc(m, '\t'); 378 seq_putc(m, '\t');
379 if (drv) 379 if (drv)
380 seq_printf(m, "%s", drv->name); 380 seq_puts(m, drv->name);
381 seq_putc(m, '\n'); 381 seq_putc(m, '\n');
382 return 0; 382 return 0;
383} 383}
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 0f16acc323c6..208aacf39329 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4934,35 +4934,49 @@ static void quirk_fsl_no_msi(struct pci_dev *pdev)
4934DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, quirk_fsl_no_msi); 4934DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, quirk_fsl_no_msi);
4935 4935
4936/* 4936/*
4937 * GPUs with integrated HDA controller for streaming audio to attached displays 4937 * Although not allowed by the spec, some multi-function devices have
4938 * need a device link from the HDA controller (consumer) to the GPU (supplier) 4938 * dependencies of one function (consumer) on another (supplier). For the
4939 * so that the GPU is powered up whenever the HDA controller is accessed. 4939 * consumer to work in D0, the supplier must also be in D0. Create a
4940 * The GPU and HDA controller are functions 0 and 1 of the same PCI device. 4940 * device link from the consumer to the supplier to enforce this
4941 * The device link stays in place until shutdown (or removal of the PCI device 4941 * dependency. Runtime PM is allowed by default on the consumer to prevent
4942 * if it's hotplugged). Runtime PM is allowed by default on the HDA controller 4942 * it from permanently keeping the supplier awake.
4943 * to prevent it from permanently keeping the GPU awake.
4944 */ 4943 */
4945static void quirk_gpu_hda(struct pci_dev *hda) 4944static void pci_create_device_link(struct pci_dev *pdev, unsigned int consumer,
4945 unsigned int supplier, unsigned int class,
4946 unsigned int class_shift)
4946{ 4947{
4947 struct pci_dev *gpu; 4948 struct pci_dev *supplier_pdev;
4948 4949
4949 if (PCI_FUNC(hda->devfn) != 1) 4950 if (PCI_FUNC(pdev->devfn) != consumer)
4950 return; 4951 return;
4951 4952
4952 gpu = pci_get_domain_bus_and_slot(pci_domain_nr(hda->bus), 4953 supplier_pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus),
4953 hda->bus->number, 4954 pdev->bus->number,
4954 PCI_DEVFN(PCI_SLOT(hda->devfn), 0)); 4955 PCI_DEVFN(PCI_SLOT(pdev->devfn), supplier));
4955 if (!gpu || (gpu->class >> 16) != PCI_BASE_CLASS_DISPLAY) { 4956 if (!supplier_pdev || (supplier_pdev->class >> class_shift) != class) {
4956 pci_dev_put(gpu); 4957 pci_dev_put(supplier_pdev);
4957 return; 4958 return;
4958 } 4959 }
4959 4960
4960 if (!device_link_add(&hda->dev, &gpu->dev, 4961 if (device_link_add(&pdev->dev, &supplier_pdev->dev,
4961 DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME)) 4962 DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME))
4962 pci_err(hda, "cannot link HDA to GPU %s\n", pci_name(gpu)); 4963 pci_info(pdev, "D0 power state depends on %s\n",
4964 pci_name(supplier_pdev));
4965 else
4966 pci_err(pdev, "Cannot enforce power dependency on %s\n",
4967 pci_name(supplier_pdev));
4968
4969 pm_runtime_allow(&pdev->dev);
4970 pci_dev_put(supplier_pdev);
4971}
4963 4972
4964 pm_runtime_allow(&hda->dev); 4973/*
4965 pci_dev_put(gpu); 4974 * Create device link for GPUs with integrated HDA controller for streaming
4975 * audio to attached displays.
4976 */
4977static void quirk_gpu_hda(struct pci_dev *hda)
4978{
4979 pci_create_device_link(hda, 1, 0, PCI_BASE_CLASS_DISPLAY, 16);
4966} 4980}
4967DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID, 4981DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
4968 PCI_CLASS_MULTIMEDIA_HD_AUDIO, 8, quirk_gpu_hda); 4982 PCI_CLASS_MULTIMEDIA_HD_AUDIO, 8, quirk_gpu_hda);
@@ -4972,6 +4986,62 @@ DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
4972 PCI_CLASS_MULTIMEDIA_HD_AUDIO, 8, quirk_gpu_hda); 4986 PCI_CLASS_MULTIMEDIA_HD_AUDIO, 8, quirk_gpu_hda);
4973 4987
4974/* 4988/*
4989 * Create device link for NVIDIA GPU with integrated USB xHCI Host
4990 * controller to VGA.
4991 */
4992static void quirk_gpu_usb(struct pci_dev *usb)
4993{
4994 pci_create_device_link(usb, 2, 0, PCI_BASE_CLASS_DISPLAY, 16);
4995}
4996DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
4997 PCI_CLASS_SERIAL_USB, 8, quirk_gpu_usb);
4998
4999/*
5000 * Create device link for NVIDIA GPU with integrated Type-C UCSI controller
5001 * to VGA. Currently there is no class code defined for UCSI device over PCI
5002 * so using UNKNOWN class for now and it will be updated when UCSI
5003 * over PCI gets a class code.
5004 */
5005#define PCI_CLASS_SERIAL_UNKNOWN 0x0c80
5006static void quirk_gpu_usb_typec_ucsi(struct pci_dev *ucsi)
5007{
5008 pci_create_device_link(ucsi, 3, 0, PCI_BASE_CLASS_DISPLAY, 16);
5009}
5010DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
5011 PCI_CLASS_SERIAL_UNKNOWN, 8,
5012 quirk_gpu_usb_typec_ucsi);
5013
5014/*
5015 * Enable the NVIDIA GPU integrated HDA controller if the BIOS left it
5016 * disabled. https://devtalk.nvidia.com/default/topic/1024022
5017 */
5018static void quirk_nvidia_hda(struct pci_dev *gpu)
5019{
5020 u8 hdr_type;
5021 u32 val;
5022
5023 /* There was no integrated HDA controller before MCP89 */
5024 if (gpu->device < PCI_DEVICE_ID_NVIDIA_GEFORCE_320M)
5025 return;
5026
5027 /* Bit 25 at offset 0x488 enables the HDA controller */
5028 pci_read_config_dword(gpu, 0x488, &val);
5029 if (val & BIT(25))
5030 return;
5031
5032 pci_info(gpu, "Enabling HDA controller\n");
5033 pci_write_config_dword(gpu, 0x488, val | BIT(25));
5034
5035 /* The GPU becomes a multi-function device when the HDA is enabled */
5036 pci_read_config_byte(gpu, PCI_HEADER_TYPE, &hdr_type);
5037 gpu->multifunction = !!(hdr_type & 0x80);
5038}
5039DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
5040 PCI_BASE_CLASS_DISPLAY, 16, quirk_nvidia_hda);
5041DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
5042 PCI_BASE_CLASS_DISPLAY, 16, quirk_nvidia_hda);
5043
5044/*
4975 * Some IDT switches incorrectly flag an ACS Source Validation error on 5045 * Some IDT switches incorrectly flag an ACS Source Validation error on
4976 * completions for config read requests even though PCIe r4.0, sec 5046 * completions for config read requests even though PCIe r4.0, sec
4977 * 6.12.1.1, says that completions are never affected by ACS Source 5047 * 6.12.1.1, says that completions are never affected by ACS Source
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 0cdd5ff389de..79b1fa6519be 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1684,10 +1684,15 @@ static enum enable_type pci_realloc_detect(struct pci_bus *bus,
1684 enum enable_type enable_local) 1684 enum enable_type enable_local)
1685{ 1685{
1686 bool unassigned = false; 1686 bool unassigned = false;
1687 struct pci_host_bridge *host;
1687 1688
1688 if (enable_local != undefined) 1689 if (enable_local != undefined)
1689 return enable_local; 1690 return enable_local;
1690 1691
1692 host = pci_find_host_bridge(bus);
1693 if (host->preserve_config)
1694 return auto_disabled;
1695
1691 pci_walk_bus(bus, iov_resources_unassigned, &unassigned); 1696 pci_walk_bus(bus, iov_resources_unassigned, &unassigned);
1692 if (unassigned) 1697 if (unassigned)
1693 return auto_enabled; 1698 return auto_enabled;
@@ -1861,16 +1866,6 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
1861 available_mmio_pref); 1866 available_mmio_pref);
1862 1867
1863 /* 1868 /*
1864 * Calculate the total amount of extra resource space we can
1865 * pass to bridges below this one. This is basically the
1866 * extra space reduced by the minimal required space for the
1867 * non-hotplug bridges.
1868 */
1869 remaining_io = available_io;
1870 remaining_mmio = available_mmio;
1871 remaining_mmio_pref = available_mmio_pref;
1872
1873 /*
1874 * Calculate how many hotplug bridges and normal bridges there 1869 * Calculate how many hotplug bridges and normal bridges there
1875 * are on this bus. We will distribute the additional available 1870 * are on this bus. We will distribute the additional available
1876 * resources between hotplug bridges. 1871 * resources between hotplug bridges.
@@ -1882,6 +1877,34 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
1882 normal_bridges++; 1877 normal_bridges++;
1883 } 1878 }
1884 1879
1880 /*
1881 * There is only one bridge on the bus so it gets all available
1882 * resources which it can then distribute to the possible hotplug
1883 * bridges below.
1884 */
1885 if (hotplug_bridges + normal_bridges == 1) {
1886 dev = list_first_entry(&bus->devices, struct pci_dev, bus_list);
1887 if (dev->subordinate) {
1888 pci_bus_distribute_available_resources(dev->subordinate,
1889 add_list, available_io, available_mmio,
1890 available_mmio_pref);
1891 }
1892 return;
1893 }
1894
1895 if (hotplug_bridges == 0)
1896 return;
1897
1898 /*
1899 * Calculate the total amount of extra resource space we can
1900 * pass to bridges below this one. This is basically the
1901 * extra space reduced by the minimal required space for the
1902 * non-hotplug bridges.
1903 */
1904 remaining_io = available_io;
1905 remaining_mmio = available_mmio;
1906 remaining_mmio_pref = available_mmio_pref;
1907
1885 for_each_pci_bridge(dev, bus) { 1908 for_each_pci_bridge(dev, bus) {
1886 const struct resource *res; 1909 const struct resource *res;
1887 1910
@@ -1906,21 +1929,6 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
1906 } 1929 }
1907 1930
1908 /* 1931 /*
1909 * There is only one bridge on the bus so it gets all available
1910 * resources which it can then distribute to the possible hotplug
1911 * bridges below.
1912 */
1913 if (hotplug_bridges + normal_bridges == 1) {
1914 dev = list_first_entry(&bus->devices, struct pci_dev, bus_list);
1915 if (dev->subordinate) {
1916 pci_bus_distribute_available_resources(dev->subordinate,
1917 add_list, available_io, available_mmio,
1918 available_mmio_pref);
1919 }
1920 return;
1921 }
1922
1923 /*
1924 * Go over devices on this bus and distribute the remaining 1932 * Go over devices on this bus and distribute the remaining
1925 * resource space between hotplug bridges. 1933 * resource space between hotplug bridges.
1926 */ 1934 */
@@ -1936,8 +1944,6 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
1936 * Distribute available extra resources equally between 1944 * Distribute available extra resources equally between
1937 * hotplug-capable downstream ports taking alignment into 1945 * hotplug-capable downstream ports taking alignment into
1938 * account. 1946 * account.
1939 *
1940 * Here hotplug_bridges is always != 0.
1941 */ 1947 */
1942 align = pci_resource_alignment(bridge, io_res); 1948 align = pci_resource_alignment(bridge, io_res);
1943 io = div64_ul(available_io, hotplug_bridges); 1949 io = div64_ul(available_io, hotplug_bridges);
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index f4d92b1afe7b..ae4aa0e1f2f4 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -75,6 +75,7 @@ static const char *pci_bus_speed_strings[] = {
75 "5.0 GT/s PCIe", /* 0x15 */ 75 "5.0 GT/s PCIe", /* 0x15 */
76 "8.0 GT/s PCIe", /* 0x16 */ 76 "8.0 GT/s PCIe", /* 0x16 */
77 "16.0 GT/s PCIe", /* 0x17 */ 77 "16.0 GT/s PCIe", /* 0x17 */
78 "32.0 GT/s PCIe", /* 0x18 */
78}; 79};
79 80
80static ssize_t bus_speed_read(enum pci_bus_speed speed, char *buf) 81static ssize_t bus_speed_read(enum pci_bus_speed speed, char *buf)
diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index 136e8f64848a..b55cdfe22a2e 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -606,7 +606,7 @@ int power_supply_get_battery_info(struct power_supply *psy,
606 606
607 /* The property and field names below must correspond to elements 607 /* The property and field names below must correspond to elements
608 * in enum power_supply_property. For reasoning, see 608 * in enum power_supply_property. For reasoning, see
609 * Documentation/power/power_supply_class.txt. 609 * Documentation/power/power_supply_class.rst.
610 */ 610 */
611 611
612 of_property_read_u32(battery_np, "energy-full-design-microwatt-hours", 612 of_property_read_u32(battery_np, "energy-full-design-microwatt-hours",
diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c
index 8878720dd779..17e7796a832b 100644
--- a/drivers/soc/tegra/pmc.c
+++ b/drivers/soc/tegra/pmc.c
@@ -700,6 +700,7 @@ int tegra_powergate_power_on(unsigned int id)
700 700
701 return tegra_powergate_set(pmc, id, true); 701 return tegra_powergate_set(pmc, id, true);
702} 702}
703EXPORT_SYMBOL(tegra_powergate_power_on);
703 704
704/** 705/**
705 * tegra_powergate_power_off() - power off partition 706 * tegra_powergate_power_off() - power off partition
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index c7eef32e7739..5b8328a99b2a 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -52,7 +52,7 @@
52 * irq line disabled until the threaded handler has been run. 52 * irq line disabled until the threaded handler has been run.
53 * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend. Does not guarantee 53 * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend. Does not guarantee
54 * that this interrupt will wake the system from a suspended 54 * that this interrupt will wake the system from a suspended
55 * state. See Documentation/power/suspend-and-interrupts.txt 55 * state. See Documentation/power/suspend-and-interrupts.rst
56 * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set 56 * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
57 * IRQF_NO_THREAD - Interrupt cannot be threaded 57 * IRQF_NO_THREAD - Interrupt cannot be threaded
58 * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device 58 * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 09366859aac2..b2c1648f7e5d 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -16,6 +16,25 @@ typedef unsigned long kernel_ulong_t;
16 16
17#define PCI_ANY_ID (~0) 17#define PCI_ANY_ID (~0)
18 18
19/**
20 * struct pci_device_id - PCI device ID structure
21 * @vendor: Vendor ID to match (or PCI_ANY_ID)
22 * @device: Device ID to match (or PCI_ANY_ID)
23 * @subvendor: Subsystem vendor ID to match (or PCI_ANY_ID)
24 * @subdevice: Subsystem device ID to match (or PCI_ANY_ID)
25 * @class: Device class, subclass, and "interface" to match.
26 * See Appendix D of the PCI Local Bus Spec or
27 * include/linux/pci_ids.h for a full list of classes.
28 * Most drivers do not need to specify class/class_mask
29 * as vendor/device is normally sufficient.
30 * @class_mask: Limit which sub-fields of the class field are compared.
31 * See drivers/scsi/sym53c8xx_2/ for example of usage.
32 * @driver_data: Data private to the driver.
33 * Most drivers don't need to use driver_data field.
34 * Best practice is to use driver_data as an index
35 * into a static list of equivalent device types,
36 * instead of using it as a pointer.
37 */
19struct pci_device_id { 38struct pci_device_id {
20 __u32 vendor, device; /* Vendor and device ID or PCI_ANY_ID*/ 39 __u32 vendor, device; /* Vendor and device ID or PCI_ANY_ID*/
21 __u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */ 40 __u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */
@@ -257,17 +276,17 @@ struct pcmcia_device_id {
257 __u16 match_flags; 276 __u16 match_flags;
258 277
259 __u16 manf_id; 278 __u16 manf_id;
260 __u16 card_id; 279 __u16 card_id;
261 280
262 __u8 func_id; 281 __u8 func_id;
263 282
264 /* for real multi-function devices */ 283 /* for real multi-function devices */
265 __u8 function; 284 __u8 function;
266 285
267 /* for pseudo multi-function devices */ 286 /* for pseudo multi-function devices */
268 __u8 device_no; 287 __u8 device_no;
269 288
270 __u32 prod_id_hash[4]; 289 __u32 prod_id_hash[4];
271 290
272 /* not matched against in kernelspace */ 291 /* not matched against in kernelspace */
273 const char * prod_id[4]; 292 const char * prod_id[4];
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 8082b612f561..62b7fdcc661c 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -107,9 +107,10 @@ static inline void acpiphp_check_host_bridge(struct acpi_device *adev) { }
107#endif 107#endif
108 108
109extern const guid_t pci_acpi_dsm_guid; 109extern const guid_t pci_acpi_dsm_guid;
110#define DEVICE_LABEL_DSM 0x07 110#define IGNORE_PCI_BOOT_CONFIG_DSM 0x05
111#define RESET_DELAY_DSM 0x08 111#define DEVICE_LABEL_DSM 0x07
112#define FUNCTION_DELAY_DSM 0x09 112#define RESET_DELAY_DSM 0x08
113#define FUNCTION_DELAY_DSM 0x09
113 114
114#else /* CONFIG_ACPI */ 115#else /* CONFIG_ACPI */
115static inline void acpi_pci_add_bus(struct pci_bus *bus) { } 116static inline void acpi_pci_add_bus(struct pci_bus *bus) { }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index dd436da7eccc..2972793e3028 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -151,6 +151,8 @@ static inline const char *pci_power_name(pci_power_t state)
151#define PCI_PM_BUS_WAIT 50 151#define PCI_PM_BUS_WAIT 50
152 152
153/** 153/**
154 * typedef pci_channel_state_t
155 *
154 * The pci_channel state describes connectivity between the CPU and 156 * The pci_channel state describes connectivity between the CPU and
155 * the PCI device. If some PCI bus between here and the PCI device 157 * the PCI device. If some PCI bus between here and the PCI device
156 * has crashed or locked up, this info is reflected here. 158 * has crashed or locked up, this info is reflected here.
@@ -258,6 +260,7 @@ enum pci_bus_speed {
258 PCIE_SPEED_5_0GT = 0x15, 260 PCIE_SPEED_5_0GT = 0x15,
259 PCIE_SPEED_8_0GT = 0x16, 261 PCIE_SPEED_8_0GT = 0x16,
260 PCIE_SPEED_16_0GT = 0x17, 262 PCIE_SPEED_16_0GT = 0x17,
263 PCIE_SPEED_32_0GT = 0x18,
261 PCI_SPEED_UNKNOWN = 0xff, 264 PCI_SPEED_UNKNOWN = 0xff,
262}; 265};
263 266
@@ -383,7 +386,7 @@ struct pci_dev {
383 386
384 unsigned int is_busmaster:1; /* Is busmaster */ 387 unsigned int is_busmaster:1; /* Is busmaster */
385 unsigned int no_msi:1; /* May not use MSI */ 388 unsigned int no_msi:1; /* May not use MSI */
386 unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */ 389 unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */
387 unsigned int block_cfg_access:1; /* Config space access blocked */ 390 unsigned int block_cfg_access:1; /* Config space access blocked */
388 unsigned int broken_parity_status:1; /* Generates false positive parity */ 391 unsigned int broken_parity_status:1; /* Generates false positive parity */
389 unsigned int irq_reroute_variant:2; /* Needs IRQ rerouting variant */ 392 unsigned int irq_reroute_variant:2; /* Needs IRQ rerouting variant */
@@ -506,6 +509,8 @@ struct pci_host_bridge {
506 unsigned int native_shpc_hotplug:1; /* OS may use SHPC hotplug */ 509 unsigned int native_shpc_hotplug:1; /* OS may use SHPC hotplug */
507 unsigned int native_pme:1; /* OS may use PCIe PME */ 510 unsigned int native_pme:1; /* OS may use PCIe PME */
508 unsigned int native_ltr:1; /* OS may use PCIe LTR */ 511 unsigned int native_ltr:1; /* OS may use PCIe LTR */
512 unsigned int preserve_config:1; /* Preserve FW resource setup */
513
509 /* Resource alignment requirements */ 514 /* Resource alignment requirements */
510 resource_size_t (*align_resource)(struct pci_dev *dev, 515 resource_size_t (*align_resource)(struct pci_dev *dev,
511 const struct resource *res, 516 const struct resource *res,
@@ -776,6 +781,50 @@ struct pci_error_handlers {
776 781
777 782
778struct module; 783struct module;
784
785/**
786 * struct pci_driver - PCI driver structure
787 * @node: List of driver structures.
788 * @name: Driver name.
789 * @id_table: Pointer to table of device IDs the driver is
790 * interested in. Most drivers should export this
791 * table using MODULE_DEVICE_TABLE(pci,...).
792 * @probe: This probing function gets called (during execution
793 * of pci_register_driver() for already existing
794 * devices or later if a new device gets inserted) for
795 * all PCI devices which match the ID table and are not
796 * "owned" by the other drivers yet. This function gets
797 * passed a "struct pci_dev \*" for each device whose
798 * entry in the ID table matches the device. The probe
799 * function returns zero when the driver chooses to
800 * take "ownership" of the device or an error code
801 * (negative number) otherwise.
802 * The probe function always gets called from process
803 * context, so it can sleep.
804 * @remove: The remove() function gets called whenever a device
805 * being handled by this driver is removed (either during
806 * deregistration of the driver or when it's manually
807 * pulled out of a hot-pluggable slot).
808 * The remove function always gets called from process
809 * context, so it can sleep.
810 * @suspend: Put device into low power state.
811 * @suspend_late: Put device into low power state.
812 * @resume_early: Wake device from low power state.
813 * @resume: Wake device from low power state.
814 * (Please see Documentation/power/pci.rst for descriptions
815 * of PCI Power Management and the related functions.)
816 * @shutdown: Hook into reboot_notifier_list (kernel/sys.c).
817 * Intended to stop any idling DMA operations.
818 * Useful for enabling wake-on-lan (NIC) or changing
819 * the power state of a device before reboot.
820 * e.g. drivers/net/e100.c.
821 * @sriov_configure: Optional driver callback to allow configuration of
822 * number of VFs to enable via sysfs "sriov_numvfs" file.
823 * @err_handler: See Documentation/PCI/pci-error-recovery.rst
824 * @groups: Sysfs attribute groups.
825 * @driver: Driver model structure.
826 * @dynids: List of dynamically added device IDs.
827 */
779struct pci_driver { 828struct pci_driver {
780 struct list_head node; 829 struct list_head node;
781 const char *name; 830 const char *name;
@@ -2207,7 +2256,7 @@ static inline u8 pci_vpd_srdt_tag(const u8 *srdt)
2207 2256
2208/** 2257/**
2209 * pci_vpd_info_field_size - Extracts the information field length 2258 * pci_vpd_info_field_size - Extracts the information field length
2210 * @lrdt: Pointer to the beginning of an information field header 2259 * @info_field: Pointer to the beginning of an information field header
2211 * 2260 *
2212 * Returns the extracted information field length. 2261 * Returns the extracted information field length.
2213 */ 2262 */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 70e86148cb1e..40015609c4b5 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1112,7 +1112,7 @@
1112 1112
1113#define PCI_VENDOR_ID_AL 0x10b9 1113#define PCI_VENDOR_ID_AL 0x10b9
1114#define PCI_DEVICE_ID_AL_M1533 0x1533 1114#define PCI_DEVICE_ID_AL_M1533 0x1533
1115#define PCI_DEVICE_ID_AL_M1535 0x1535 1115#define PCI_DEVICE_ID_AL_M1535 0x1535
1116#define PCI_DEVICE_ID_AL_M1541 0x1541 1116#define PCI_DEVICE_ID_AL_M1541 0x1541
1117#define PCI_DEVICE_ID_AL_M1563 0x1563 1117#define PCI_DEVICE_ID_AL_M1563 0x1563
1118#define PCI_DEVICE_ID_AL_M1621 0x1621 1118#define PCI_DEVICE_ID_AL_M1621 0x1621
@@ -1336,6 +1336,7 @@
1336#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP78S_SMBUS 0x0752 1336#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP78S_SMBUS 0x0752
1337#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE 0x0759 1337#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE 0x0759
1338#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_SMBUS 0x07D8 1338#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_SMBUS 0x07D8
1339#define PCI_DEVICE_ID_NVIDIA_GEFORCE_320M 0x08A0
1339#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP79_SMBUS 0x0AA2 1340#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP79_SMBUS 0x0AA2
1340#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP89_SATA 0x0D85 1341#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP89_SATA 0x0D85
1341 1342
@@ -1752,7 +1753,7 @@
1752#define PCI_VENDOR_ID_STALLION 0x124d 1753#define PCI_VENDOR_ID_STALLION 0x124d
1753 1754
1754/* Allied Telesyn */ 1755/* Allied Telesyn */
1755#define PCI_VENDOR_ID_AT 0x1259 1756#define PCI_VENDOR_ID_AT 0x1259
1756#define PCI_SUBDEVICE_ID_AT_2700FX 0x2701 1757#define PCI_SUBDEVICE_ID_AT_2700FX 0x2701
1757#define PCI_SUBDEVICE_ID_AT_2701FX 0x2703 1758#define PCI_SUBDEVICE_ID_AT_2701FX 0x2703
1758 1759
@@ -2550,7 +2551,7 @@
2550#define PCI_DEVICE_ID_KORENIX_JETCARDF2 0x1700 2551#define PCI_DEVICE_ID_KORENIX_JETCARDF2 0x1700
2551#define PCI_DEVICE_ID_KORENIX_JETCARDF3 0x17ff 2552#define PCI_DEVICE_ID_KORENIX_JETCARDF3 0x17ff
2552 2553
2553#define PCI_VENDOR_ID_HUAWEI 0x19e5 2554#define PCI_VENDOR_ID_HUAWEI 0x19e5
2554 2555
2555#define PCI_VENDOR_ID_NETRONOME 0x19ee 2556#define PCI_VENDOR_ID_NETRONOME 0x19ee
2556#define PCI_DEVICE_ID_NETRONOME_NFP4000 0x4000 2557#define PCI_DEVICE_ID_NETRONOME_NFP4000 0x4000
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 283fb3defe56..3619a870eaa4 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -271,7 +271,7 @@ typedef struct pm_message {
271 * actions to be performed by a device driver's callbacks generally depend on 271 * actions to be performed by a device driver's callbacks generally depend on
272 * the platform and subsystem the device belongs to. 272 * the platform and subsystem the device belongs to.
273 * 273 *
274 * Refer to Documentation/power/runtime_pm.txt for more information about the 274 * Refer to Documentation/power/runtime_pm.rst for more information about the
275 * role of the @runtime_suspend(), @runtime_resume() and @runtime_idle() 275 * role of the @runtime_suspend(), @runtime_resume() and @runtime_idle()
276 * callbacks in device runtime power management. 276 * callbacks in device runtime power management.
277 */ 277 */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 27164769d184..f28e562d7ca8 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -528,6 +528,7 @@
528#define PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */ 528#define PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */
529#define PCI_EXP_LNKCAP_SLS_8_0GB 0x00000003 /* LNKCAP2 SLS Vector bit 2 */ 529#define PCI_EXP_LNKCAP_SLS_8_0GB 0x00000003 /* LNKCAP2 SLS Vector bit 2 */
530#define PCI_EXP_LNKCAP_SLS_16_0GB 0x00000004 /* LNKCAP2 SLS Vector bit 3 */ 530#define PCI_EXP_LNKCAP_SLS_16_0GB 0x00000004 /* LNKCAP2 SLS Vector bit 3 */
531#define PCI_EXP_LNKCAP_SLS_32_0GB 0x00000005 /* LNKCAP2 SLS Vector bit 4 */
531#define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ 532#define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */
532#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ 533#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */
533#define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ 534#define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */
@@ -556,6 +557,7 @@
556#define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */ 557#define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */
557#define PCI_EXP_LNKSTA_CLS_8_0GB 0x0003 /* Current Link Speed 8.0GT/s */ 558#define PCI_EXP_LNKSTA_CLS_8_0GB 0x0003 /* Current Link Speed 8.0GT/s */
558#define PCI_EXP_LNKSTA_CLS_16_0GB 0x0004 /* Current Link Speed 16.0GT/s */ 559#define PCI_EXP_LNKSTA_CLS_16_0GB 0x0004 /* Current Link Speed 16.0GT/s */
560#define PCI_EXP_LNKSTA_CLS_32_0GB 0x0005 /* Current Link Speed 32.0GT/s */
559#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Negotiated Link Width */ 561#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Negotiated Link Width */
560#define PCI_EXP_LNKSTA_NLW_X1 0x0010 /* Current Link Width x1 */ 562#define PCI_EXP_LNKSTA_NLW_X1 0x0010 /* Current Link Width x1 */
561#define PCI_EXP_LNKSTA_NLW_X2 0x0020 /* Current Link Width x2 */ 563#define PCI_EXP_LNKSTA_NLW_X2 0x0020 /* Current Link Width x2 */
@@ -661,6 +663,7 @@
661#define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5GT/s */ 663#define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5GT/s */
662#define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8GT/s */ 664#define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8GT/s */
663#define PCI_EXP_LNKCAP2_SLS_16_0GB 0x00000010 /* Supported Speed 16GT/s */ 665#define PCI_EXP_LNKCAP2_SLS_16_0GB 0x00000010 /* Supported Speed 16GT/s */
666#define PCI_EXP_LNKCAP2_SLS_32_0GB 0x00000020 /* Supported Speed 32GT/s */
664#define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */ 667#define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */
665#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ 668#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */
666#define PCI_EXP_LNKCTL2_TLS 0x000f 669#define PCI_EXP_LNKCTL2_TLS 0x000f
@@ -668,6 +671,7 @@
668#define PCI_EXP_LNKCTL2_TLS_5_0GT 0x0002 /* Supported Speed 5GT/s */ 671#define PCI_EXP_LNKCTL2_TLS_5_0GT 0x0002 /* Supported Speed 5GT/s */
669#define PCI_EXP_LNKCTL2_TLS_8_0GT 0x0003 /* Supported Speed 8GT/s */ 672#define PCI_EXP_LNKCTL2_TLS_8_0GT 0x0003 /* Supported Speed 8GT/s */
670#define PCI_EXP_LNKCTL2_TLS_16_0GT 0x0004 /* Supported Speed 16GT/s */ 673#define PCI_EXP_LNKCTL2_TLS_16_0GT 0x0004 /* Supported Speed 16GT/s */
674#define PCI_EXP_LNKCTL2_TLS_32_0GT 0x0005 /* Supported Speed 32GT/s */
671#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ 675#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */
672#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ 676#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */
673#define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ 677#define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index ff8592ddedee..d3667b4075c1 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -66,7 +66,7 @@ config HIBERNATION
66 need to run mkswap against the swap partition used for the suspend. 66 need to run mkswap against the swap partition used for the suspend.
67 67
68 It also works with swap files to a limited extent (for details see 68 It also works with swap files to a limited extent (for details see
69 <file:Documentation/power/swsusp-and-swap-files.txt>). 69 <file:Documentation/power/swsusp-and-swap-files.rst>).
70 70
71 Right now you may boot without resuming and resume later but in the 71 Right now you may boot without resuming and resume later but in the
72 meantime you cannot use the swap partition(s)/file(s) involved in 72 meantime you cannot use the swap partition(s)/file(s) involved in
@@ -75,7 +75,7 @@ config HIBERNATION
75 MOUNT any journaled filesystems mounted before the suspend or they 75 MOUNT any journaled filesystems mounted before the suspend or they
76 will get corrupted in a nasty way. 76 will get corrupted in a nasty way.
77 77
78 For more information take a look at <file:Documentation/power/swsusp.txt>. 78 For more information take a look at <file:Documentation/power/swsusp.rst>.
79 79
80config ARCH_SAVE_PAGE_KEYS 80config ARCH_SAVE_PAGE_KEYS
81 bool 81 bool
@@ -256,7 +256,7 @@ config APM_EMULATION
256 notification of APM "events" (e.g. battery status change). 256 notification of APM "events" (e.g. battery status change).
257 257
258 In order to use APM, you will need supporting software. For location 258 In order to use APM, you will need supporting software. For location
259 and more information, read <file:Documentation/power/apm-acpi.txt> 259 and more information, read <file:Documentation/power/apm-acpi.rst>
260 and the Battery Powered Linux mini-HOWTO, available from 260 and the Battery Powered Linux mini-HOWTO, available from
261 <http://www.tldp.org/docs.html#howto>. 261 <http://www.tldp.org/docs.html#howto>.
262 262
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 578cce4fbe6c..67f8360dfcee 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -166,7 +166,7 @@ config CFG80211_DEFAULT_PS
166 166
167 If this causes your applications to misbehave you should fix your 167 If this causes your applications to misbehave you should fix your
168 applications instead -- they need to register their network 168 applications instead -- they need to register their network
169 latency requirement, see Documentation/power/pm_qos_interface.txt. 169 latency requirement, see Documentation/power/pm_qos_interface.rst.
170 170
171config CFG80211_DEBUGFS 171config CFG80211_DEBUGFS
172 bool "cfg80211 DebugFS entries" 172 bool "cfg80211 DebugFS entries"
diff --git a/tools/pci/Makefile b/tools/pci/Makefile
index 6876ee4bd78c..4b95a5176355 100644
--- a/tools/pci/Makefile
+++ b/tools/pci/Makefile
@@ -18,7 +18,6 @@ ALL_TARGETS := pcitest
18ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) 18ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
19 19
20SCRIPTS := pcitest.sh 20SCRIPTS := pcitest.sh
21ALL_SCRIPTS := $(patsubst %,$(OUTPUT)%,$(SCRIPTS))
22 21
23all: $(ALL_PROGRAMS) 22all: $(ALL_PROGRAMS)
24 23
@@ -47,10 +46,10 @@ clean:
47 46
48install: $(ALL_PROGRAMS) 47install: $(ALL_PROGRAMS)
49 install -d -m 755 $(DESTDIR)$(bindir); \ 48 install -d -m 755 $(DESTDIR)$(bindir); \
50 for program in $(ALL_PROGRAMS) pcitest.sh; do \ 49 for program in $(ALL_PROGRAMS); do \
51 install $$program $(DESTDIR)$(bindir); \ 50 install $$program $(DESTDIR)$(bindir); \
52 done; \ 51 done; \
53 for script in $(ALL_SCRIPTS); do \ 52 for script in $(SCRIPTS); do \
54 install $$script $(DESTDIR)$(bindir); \ 53 install $$script $(DESTDIR)$(bindir); \
55 done 54 done
56 55
diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c
index cb7a47dfd8b6..cb1e51fcc84e 100644
--- a/tools/pci/pcitest.c
+++ b/tools/pci/pcitest.c
@@ -36,15 +36,15 @@ struct pci_test {
36 unsigned long size; 36 unsigned long size;
37}; 37};
38 38
39static void run_test(struct pci_test *test) 39static int run_test(struct pci_test *test)
40{ 40{
41 long ret; 41 int ret = -EINVAL;
42 int fd; 42 int fd;
43 43
44 fd = open(test->device, O_RDWR); 44 fd = open(test->device, O_RDWR);
45 if (fd < 0) { 45 if (fd < 0) {
46 perror("can't open PCI Endpoint Test device"); 46 perror("can't open PCI Endpoint Test device");
47 return; 47 return -ENODEV;
48 } 48 }
49 49
50 if (test->barnum >= 0 && test->barnum <= 5) { 50 if (test->barnum >= 0 && test->barnum <= 5) {
@@ -212,7 +212,7 @@ usage:
212 "\t-r Read buffer test\n" 212 "\t-r Read buffer test\n"
213 "\t-w Write buffer test\n" 213 "\t-w Write buffer test\n"
214 "\t-c Copy buffer test\n" 214 "\t-c Copy buffer test\n"
215 "\t-s <size> Size of buffer {default: 100KB}\n", 215 "\t-s <size> Size of buffer {default: 100KB}\n"
216 "\t-h Print this help message\n", 216 "\t-h Print this help message\n",
217 argv[0]); 217 argv[0]);
218 return -EINVAL; 218 return -EINVAL;