aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShaohua Li <shaohua.li@intel.com>2008-01-23 21:21:57 -0500
committerGreg Kroah-Hartman <gregkh@suse.de>2008-02-01 18:04:30 -0500
commit6c723d5bd89f03fc3ef627d50f89ade054d2ee3b (patch)
tree45fcf8a380b48ddf686456ff65a2234c23c05504
parent5c796ae7a7ebe56967ed9b9963d7c16d733635ff (diff)
PCI: PCIE ASPM support
PCI Express ASPM defines a protocol for PCI Express components in the D0 state to reduce Link power by placing their Links into a low power state and instructing the other end of the Link to do likewise. This capability allows hardware-autonomous, dynamic Link power reduction beyond what is achievable by software-only controlled power management. However, The device should be configured by software appropriately. Enabling ASPM will save power, but will introduce device latency. This patch adds ASPM support in Linux. It introduces a global policy for ASPM, a sysfs file /sys/module/pcie_aspm/parameters/policy can control it. The interface can be used as a boot option too. Currently we have below setting: -default, BIOS default setting -powersave, highest power saving mode, enable all available ASPM state and clock power management -performance, highest performance, disable ASPM and clock power management By default, the 'default' policy is used currently. In my test, power difference between powersave mode and performance mode is about 1.3w in a system with 3 PCIE links. Signed-off-by: Shaohua Li <shaohua.li@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r--drivers/pci/pci-sysfs.c5
-rw-r--r--drivers/pci/pci.c4
-rw-r--r--drivers/pci/pcie/Kconfig20
-rw-r--r--drivers/pci/pcie/Makefile3
-rw-r--r--drivers/pci/pcie/aspm.c802
-rw-r--r--drivers/pci/probe.c5
-rw-r--r--drivers/pci/remove.c4
-rw-r--r--include/linux/aspm.h44
-rw-r--r--include/linux/pci.h5
-rw-r--r--include/linux/pci_regs.h8
10 files changed, 900 insertions, 0 deletions
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 7d1877341aad..d05c1b252386 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -21,6 +21,7 @@
21#include <linux/topology.h> 21#include <linux/topology.h>
22#include <linux/mm.h> 22#include <linux/mm.h>
23#include <linux/capability.h> 23#include <linux/capability.h>
24#include <linux/aspm.h>
24#include "pci.h" 25#include "pci.h"
25 26
26static int sysfs_initialized; /* = 0 */ 27static int sysfs_initialized; /* = 0 */
@@ -650,6 +651,8 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
650 if (pcibios_add_platform_entries(pdev)) 651 if (pcibios_add_platform_entries(pdev))
651 goto err_rom_file; 652 goto err_rom_file;
652 653
654 pcie_aspm_create_sysfs_dev_files(pdev);
655
653 return 0; 656 return 0;
654 657
655err_rom_file: 658err_rom_file:
@@ -679,6 +682,8 @@ void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
679 if (!sysfs_initialized) 682 if (!sysfs_initialized)
680 return; 683 return;
681 684
685 pcie_aspm_remove_sysfs_dev_files(pdev);
686
682 if (pdev->cfg_size < 4096) 687 if (pdev->cfg_size < 4096)
683 sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr); 688 sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
684 else 689 else
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 35f78f1628fc..1f1693161956 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -18,6 +18,7 @@
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/log2.h> 20#include <linux/log2.h>
21#include <linux/aspm.h>
21#include <asm/dma.h> /* isa_dma_bridge_buggy */ 22#include <asm/dma.h> /* isa_dma_bridge_buggy */
22#include "pci.h" 23#include "pci.h"
23 24
@@ -519,6 +520,9 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state)
519 if (need_restore) 520 if (need_restore)
520 pci_restore_bars(dev); 521 pci_restore_bars(dev);
521 522
523 if (dev->bus->self)
524 pcie_aspm_pm_state_change(dev->bus->self);
525
522 return 0; 526 return 0;
523} 527}
524 528
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 287a9311716c..60104cf98796 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -26,3 +26,23 @@ config HOTPLUG_PCI_PCIE
26 When in doubt, say N. 26 When in doubt, say N.
27 27
28source "drivers/pci/pcie/aer/Kconfig" 28source "drivers/pci/pcie/aer/Kconfig"
29
30#
31# PCI Express ASPM
32#
33config PCIEASPM
34 bool "PCI Express ASPM support(Experimental)"
35 depends on PCI && EXPERIMENTAL
36 default y
37 help
38 This enables PCI Express ASPM (Active State Power Management) and
39 Clock Power Management. ASPM supports state L0/L0s/L1.
40
41 When in doubt, say N.
42config PCIEASPM_DEBUG
43 bool "Debug PCI Express ASPM"
44 depends on PCIEASPM
45 default n
46 help
47 This enables PCI Express ASPM debug support. It will add per-device
48 interface to control ASPM.
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index e00fb99acf44..11f6bb1eae24 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -2,6 +2,9 @@
2# Makefile for PCI-Express PORT Driver 2# Makefile for PCI-Express PORT Driver
3# 3#
4 4
5# Build PCI Express ASPM if needed
6obj-$(CONFIG_PCIEASPM) += aspm.o
7
5pcieportdrv-y := portdrv_core.o portdrv_pci.o portdrv_bus.o 8pcieportdrv-y := portdrv_core.o portdrv_pci.o portdrv_bus.o
6 9
7obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o 10obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
new file mode 100644
index 000000000000..1a5adeb10c95
--- /dev/null
+++ b/drivers/pci/pcie/aspm.c
@@ -0,0 +1,802 @@
1/*
2 * File: drivers/pci/pcie/aspm.c
3 * Enabling PCIE link L0s/L1 state and Clock Power Management
4 *
5 * Copyright (C) 2007 Intel
6 * Copyright (C) Zhang Yanmin (yanmin.zhang@intel.com)
7 * Copyright (C) Shaohua Li (shaohua.li@intel.com)
8 */
9
10#include <linux/kernel.h>
11#include <linux/module.h>
12#include <linux/moduleparam.h>
13#include <linux/pci.h>
14#include <linux/pci_regs.h>
15#include <linux/errno.h>
16#include <linux/pm.h>
17#include <linux/init.h>
18#include <linux/slab.h>
19#include <linux/aspm.h>
20#include <acpi/acpi_bus.h>
21#include <linux/pci-acpi.h>
22#include "../pci.h"
23
24#ifdef MODULE_PARAM_PREFIX
25#undef MODULE_PARAM_PREFIX
26#endif
27#define MODULE_PARAM_PREFIX "pcie_aspm."
28
29struct endpoint_state {
30 unsigned int l0s_acceptable_latency;
31 unsigned int l1_acceptable_latency;
32};
33
34struct pcie_link_state {
35 struct list_head sibiling;
36 struct pci_dev *pdev;
37
38 /* ASPM state */
39 unsigned int support_state;
40 unsigned int enabled_state;
41 unsigned int bios_aspm_state;
42 /* upstream component */
43 unsigned int l0s_upper_latency;
44 unsigned int l1_upper_latency;
45 /* downstream component */
46 unsigned int l0s_down_latency;
47 unsigned int l1_down_latency;
48 /* Clock PM state*/
49 unsigned int clk_pm_capable;
50 unsigned int clk_pm_enabled;
51 unsigned int bios_clk_state;
52
53 /*
54 * A pcie downstream port only has one slot under it, so at most there
55 * are 8 functions
56 */
57 struct endpoint_state endpoints[8];
58};
59
60static int aspm_disabled;
61static DEFINE_MUTEX(aspm_lock);
62static LIST_HEAD(link_list);
63
64#define POLICY_DEFAULT 0 /* BIOS default setting */
65#define POLICY_PERFORMANCE 1 /* high performance */
66#define POLICY_POWERSAVE 2 /* high power saving */
67static int aspm_policy;
68static const char *policy_str[] = {
69 [POLICY_DEFAULT] = "default",
70 [POLICY_PERFORMANCE] = "performance",
71 [POLICY_POWERSAVE] = "powersave"
72};
73
74static int policy_to_aspm_state(struct pci_dev *pdev)
75{
76 struct pcie_link_state *link_state = pdev->link_state;
77
78 switch (aspm_policy) {
79 case POLICY_PERFORMANCE:
80 /* Disable ASPM and Clock PM */
81 return 0;
82 case POLICY_POWERSAVE:
83 /* Enable ASPM L0s/L1 */
84 return PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1;
85 case POLICY_DEFAULT:
86 return link_state->bios_aspm_state;
87 }
88 return 0;
89}
90
91static int policy_to_clkpm_state(struct pci_dev *pdev)
92{
93 struct pcie_link_state *link_state = pdev->link_state;
94
95 switch (aspm_policy) {
96 case POLICY_PERFORMANCE:
97 /* Disable ASPM and Clock PM */
98 return 0;
99 case POLICY_POWERSAVE:
100 /* Disable Clock PM */
101 return 1;
102 case POLICY_DEFAULT:
103 return link_state->bios_clk_state;
104 }
105 return 0;
106}
107
108static void pcie_set_clock_pm(struct pci_dev *pdev, int enable)
109{
110 struct pci_dev *child_dev;
111 int pos;
112 u16 reg16;
113 struct pcie_link_state *link_state = pdev->link_state;
114
115 list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
116 pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
117 if (!pos)
118 return;
119 pci_read_config_word(child_dev, pos + PCI_EXP_LNKCTL, &reg16);
120 if (enable)
121 reg16 |= PCI_EXP_LNKCTL_CLKREQ_EN;
122 else
123 reg16 &= ~PCI_EXP_LNKCTL_CLKREQ_EN;
124 pci_write_config_word(child_dev, pos + PCI_EXP_LNKCTL, reg16);
125 }
126 link_state->clk_pm_enabled = !!enable;
127}
128
129static void pcie_check_clock_pm(struct pci_dev *pdev)
130{
131 int pos;
132 u32 reg32;
133 u16 reg16;
134 int capable = 1, enabled = 1;
135 struct pci_dev *child_dev;
136 struct pcie_link_state *link_state = pdev->link_state;
137
138 /* All functions should have the same cap and state, take the worst */
139 list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
140 pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
141 if (!pos)
142 return;
143 pci_read_config_dword(child_dev, pos + PCI_EXP_LNKCAP, &reg32);
144 if (!(reg32 & PCI_EXP_LNKCAP_CLKPM)) {
145 capable = 0;
146 enabled = 0;
147 break;
148 }
149 pci_read_config_word(child_dev, pos + PCI_EXP_LNKCTL, &reg16);
150 if (!(reg16 & PCI_EXP_LNKCTL_CLKREQ_EN))
151 enabled = 0;
152 }
153 link_state->clk_pm_capable = capable;
154 link_state->clk_pm_enabled = enabled;
155 link_state->bios_clk_state = enabled;
156 pcie_set_clock_pm(pdev, policy_to_clkpm_state(pdev));
157}
158
159/*
160 * pcie_aspm_configure_common_clock: check if the 2 ends of a link
161 * could use common clock. If they are, configure them to use the
162 * common clock. That will reduce the ASPM state exit latency.
163 */
164static void pcie_aspm_configure_common_clock(struct pci_dev *pdev)
165{
166 int pos, child_pos;
167 u16 reg16 = 0;
168 struct pci_dev *child_dev;
169 int same_clock = 1;
170
171 /*
172 * all functions of a slot should have the same Slot Clock
173 * Configuration, so just check one function
174 * */
175 child_dev = list_entry(pdev->subordinate->devices.next, struct pci_dev,
176 bus_list);
177 BUG_ON(!child_dev->is_pcie);
178
179 /* Check downstream component if bit Slot Clock Configuration is 1 */
180 child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
181 pci_read_config_word(child_dev, child_pos + PCI_EXP_LNKSTA, &reg16);
182 if (!(reg16 & PCI_EXP_LNKSTA_SLC))
183 same_clock = 0;
184
185 /* Check upstream component if bit Slot Clock Configuration is 1 */
186 pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
187 pci_read_config_word(pdev, pos + PCI_EXP_LNKSTA, &reg16);
188 if (!(reg16 & PCI_EXP_LNKSTA_SLC))
189 same_clock = 0;
190
191 /* Configure downstream component, all functions */
192 list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
193 child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
194 pci_read_config_word(child_dev, child_pos + PCI_EXP_LNKCTL,
195 &reg16);
196 if (same_clock)
197 reg16 |= PCI_EXP_LNKCTL_CCC;
198 else
199 reg16 &= ~PCI_EXP_LNKCTL_CCC;
200 pci_write_config_word(child_dev, child_pos + PCI_EXP_LNKCTL,
201 reg16);
202 }
203
204 /* Configure upstream component */
205 pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
206 if (same_clock)
207 reg16 |= PCI_EXP_LNKCTL_CCC;
208 else
209 reg16 &= ~PCI_EXP_LNKCTL_CCC;
210 pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, reg16);
211
212 /* retrain link */
213 reg16 |= PCI_EXP_LNKCTL_RL;
214 pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, reg16);
215
216 /* Wait for link training end */
217 while (1) {
218 pci_read_config_word(pdev, pos + PCI_EXP_LNKSTA, &reg16);
219 if (!(reg16 & PCI_EXP_LNKSTA_LT))
220 break;
221 cpu_relax();
222 }
223}
224
225/*
226 * calc_L0S_latency: Convert L0s latency encoding to ns
227 */
228static unsigned int calc_L0S_latency(unsigned int latency_encoding, int ac)
229{
230 unsigned int ns = 64;
231
232 if (latency_encoding == 0x7) {
233 if (ac)
234 ns = -1U;
235 else
236 ns = 5*1000; /* > 4us */
237 } else
238 ns *= (1 << latency_encoding);
239 return ns;
240}
241
242/*
243 * calc_L1_latency: Convert L1 latency encoding to ns
244 */
245static unsigned int calc_L1_latency(unsigned int latency_encoding, int ac)
246{
247 unsigned int ns = 1000;
248
249 if (latency_encoding == 0x7) {
250 if (ac)
251 ns = -1U;
252 else
253 ns = 65*1000; /* > 64us */
254 } else
255 ns *= (1 << latency_encoding);
256 return ns;
257}
258
259static void pcie_aspm_get_cap_device(struct pci_dev *pdev, u32 *state,
260 unsigned int *l0s, unsigned int *l1, unsigned int *enabled)
261{
262 int pos;
263 u16 reg16;
264 u32 reg32;
265 unsigned int latency;
266
267 pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
268 pci_read_config_dword(pdev, pos + PCI_EXP_LNKCAP, &reg32);
269 *state = (reg32 & PCI_EXP_LNKCAP_ASPMS) >> 10;
270 if (*state != PCIE_LINK_STATE_L0S &&
271 *state != (PCIE_LINK_STATE_L1|PCIE_LINK_STATE_L0S))
272 * state = 0;
273 if (*state == 0)
274 return;
275
276 latency = (reg32 & PCI_EXP_LNKCAP_L0SEL) >> 12;
277 *l0s = calc_L0S_latency(latency, 0);
278 if (*state & PCIE_LINK_STATE_L1) {
279 latency = (reg32 & PCI_EXP_LNKCAP_L1EL) >> 15;
280 *l1 = calc_L1_latency(latency, 0);
281 }
282 pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
283 *enabled = reg16 & (PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1);
284}
285
286static void pcie_aspm_cap_init(struct pci_dev *pdev)
287{
288 struct pci_dev *child_dev;
289 u32 state, tmp;
290 struct pcie_link_state *link_state = pdev->link_state;
291
292 /* upstream component states */
293 pcie_aspm_get_cap_device(pdev, &link_state->support_state,
294 &link_state->l0s_upper_latency,
295 &link_state->l1_upper_latency,
296 &link_state->enabled_state);
297 /* downstream component states, all functions have the same setting */
298 child_dev = list_entry(pdev->subordinate->devices.next, struct pci_dev,
299 bus_list);
300 pcie_aspm_get_cap_device(child_dev, &state,
301 &link_state->l0s_down_latency,
302 &link_state->l1_down_latency,
303 &tmp);
304 link_state->support_state &= state;
305 if (!link_state->support_state)
306 return;
307 link_state->enabled_state &= link_state->support_state;
308 link_state->bios_aspm_state = link_state->enabled_state;
309
310 /* ENDPOINT states*/
311 list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
312 int pos;
313 u32 reg32;
314 unsigned int latency;
315 struct endpoint_state *ep_state =
316 &link_state->endpoints[PCI_FUNC(child_dev->devfn)];
317
318 if (child_dev->pcie_type != PCI_EXP_TYPE_ENDPOINT &&
319 child_dev->pcie_type != PCI_EXP_TYPE_LEG_END)
320 continue;
321
322 pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
323 pci_read_config_dword(child_dev, pos + PCI_EXP_DEVCAP, &reg32);
324 latency = (reg32 & PCI_EXP_DEVCAP_L0S) >> 6;
325 latency = calc_L0S_latency(latency, 1);
326 ep_state->l0s_acceptable_latency = latency;
327 if (link_state->support_state & PCIE_LINK_STATE_L1) {
328 latency = (reg32 & PCI_EXP_DEVCAP_L1) >> 9;
329 latency = calc_L1_latency(latency, 1);
330 ep_state->l1_acceptable_latency = latency;
331 }
332 }
333}
334
335static unsigned int __pcie_aspm_check_state_one(struct pci_dev *pdev,
336 unsigned int state)
337{
338 struct pci_dev *parent_dev, *tmp_dev;
339 unsigned int latency, l1_latency = 0;
340 struct pcie_link_state *link_state;
341 struct endpoint_state *ep_state;
342
343 parent_dev = pdev->bus->self;
344 link_state = parent_dev->link_state;
345 state &= link_state->support_state;
346 if (state == 0)
347 return 0;
348 ep_state = &link_state->endpoints[PCI_FUNC(pdev->devfn)];
349
350 /*
351 * Check latency for endpoint device.
352 * TBD: The latency from the endpoint to root complex vary per
353 * switch's upstream link state above the device. Here we just do a
354 * simple check which assumes all links above the device can be in L1
355 * state, that is we just consider the worst case. If switch's upstream
356 * link can't be put into L0S/L1, then our check is too strictly.
357 */
358 tmp_dev = pdev;
359 while (state & (PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)) {
360 parent_dev = tmp_dev->bus->self;
361 link_state = parent_dev->link_state;
362 if (state & PCIE_LINK_STATE_L0S) {
363 latency = max_t(unsigned int,
364 link_state->l0s_upper_latency,
365 link_state->l0s_down_latency);
366 if (latency > ep_state->l0s_acceptable_latency)
367 state &= ~PCIE_LINK_STATE_L0S;
368 }
369 if (state & PCIE_LINK_STATE_L1) {
370 latency = max_t(unsigned int,
371 link_state->l1_upper_latency,
372 link_state->l1_down_latency);
373 if (latency + l1_latency >
374 ep_state->l1_acceptable_latency)
375 state &= ~PCIE_LINK_STATE_L1;
376 }
377 if (!parent_dev->bus->self) /* parent_dev is a root port */
378 break;
379 else {
380 /*
381 * parent_dev is the downstream port of a switch, make
382 * tmp_dev the upstream port of the switch
383 */
384 tmp_dev = parent_dev->bus->self;
385 /*
386 * every switch on the path to root complex need 1 more
387 * microsecond for L1. Spec doesn't mention L0S.
388 */
389 if (state & PCIE_LINK_STATE_L1)
390 l1_latency += 1000;
391 }
392 }
393 return state;
394}
395
396static unsigned int pcie_aspm_check_state(struct pci_dev *pdev,
397 unsigned int state)
398{
399 struct pci_dev *child_dev;
400
401 /* If no child, disable the link */
402 if (list_empty(&pdev->subordinate->devices))
403 return 0;
404 list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
405 if (child_dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) {
406 /*
407 * If downstream component of a link is pci bridge, we
408 * disable ASPM for now for the link
409 * */
410 state = 0;
411 break;
412 }
413 if ((child_dev->pcie_type != PCI_EXP_TYPE_ENDPOINT &&
414 child_dev->pcie_type != PCI_EXP_TYPE_LEG_END))
415 continue;
416 /* Device not in D0 doesn't need check latency */
417 if (child_dev->current_state == PCI_D1 ||
418 child_dev->current_state == PCI_D2 ||
419 child_dev->current_state == PCI_D3hot ||
420 child_dev->current_state == PCI_D3cold)
421 continue;
422 state = __pcie_aspm_check_state_one(child_dev, state);
423 }
424 return state;
425}
426
427static void __pcie_aspm_config_one_dev(struct pci_dev *pdev, unsigned int state)
428{
429 u16 reg16;
430 int pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
431
432 pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
433 reg16 &= ~0x3;
434 reg16 |= state;
435 pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, reg16);
436}
437
438static void __pcie_aspm_config_link(struct pci_dev *pdev, unsigned int state)
439{
440 struct pci_dev *child_dev;
441 int valid = 1;
442 struct pcie_link_state *link_state = pdev->link_state;
443
444 /*
445 * if the downstream component has pci bridge function, don't do ASPM
446 * now
447 */
448 list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
449 if (child_dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) {
450 valid = 0;
451 break;
452 }
453 }
454 if (!valid)
455 return;
456
457 /*
458 * spec 2.0 suggests all functions should be configured the same
459 * setting for ASPM. Enabling ASPM L1 should be done in upstream
460 * component first and then downstream, and vice versa for disabling
461 * ASPM L1. Spec doesn't mention L0S.
462 */
463 if (state & PCIE_LINK_STATE_L1)
464 __pcie_aspm_config_one_dev(pdev, state);
465
466 list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list)
467 __pcie_aspm_config_one_dev(child_dev, state);
468
469 if (!(state & PCIE_LINK_STATE_L1))
470 __pcie_aspm_config_one_dev(pdev, state);
471
472 link_state->enabled_state = state;
473}
474
475static void __pcie_aspm_configure_link_state(struct pci_dev *pdev,
476 unsigned int state)
477{
478 struct pcie_link_state *link_state = pdev->link_state;
479
480 if (link_state->support_state == 0)
481 return;
482 state &= PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1;
483
484 /* state 0 means disabling aspm */
485 state = pcie_aspm_check_state(pdev, state);
486 if (link_state->enabled_state == state)
487 return;
488 __pcie_aspm_config_link(pdev, state);
489}
490
491/*
492 * pcie_aspm_configure_link_state: enable/disable PCI express link state
493 * @pdev: the root port or switch downstream port
494 */
495static void pcie_aspm_configure_link_state(struct pci_dev *pdev,
496 unsigned int state)
497{
498 down_read(&pci_bus_sem);
499 mutex_lock(&aspm_lock);
500 __pcie_aspm_configure_link_state(pdev, state);
501 mutex_unlock(&aspm_lock);
502 up_read(&pci_bus_sem);
503}
504
505static void free_link_state(struct pci_dev *pdev)
506{
507 kfree(pdev->link_state);
508 pdev->link_state = NULL;
509}
510
511/*
512 * pcie_aspm_init_link_state: Initiate PCI express link state.
513 * It is called after the pcie and its children devices are scaned.
514 * @pdev: the root port or switch downstream port
515 */
516void pcie_aspm_init_link_state(struct pci_dev *pdev)
517{
518 unsigned int state;
519 struct pcie_link_state *link_state;
520 int error = 0;
521
522 if (aspm_disabled || !pdev->is_pcie || pdev->link_state)
523 return;
524 if (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
525 pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
526 return;
527 down_read(&pci_bus_sem);
528 if (list_empty(&pdev->subordinate->devices))
529 goto out;
530
531 mutex_lock(&aspm_lock);
532
533 link_state = kzalloc(sizeof(*link_state), GFP_KERNEL);
534 if (!link_state)
535 goto unlock_out;
536 pdev->link_state = link_state;
537
538 pcie_aspm_configure_common_clock(pdev);
539
540 pcie_aspm_cap_init(pdev);
541
542 /* config link state to avoid BIOS error */
543 state = pcie_aspm_check_state(pdev, policy_to_aspm_state(pdev));
544 __pcie_aspm_config_link(pdev, state);
545
546 pcie_check_clock_pm(pdev);
547
548 link_state->pdev = pdev;
549 list_add(&link_state->sibiling, &link_list);
550
551unlock_out:
552 if (error)
553 free_link_state(pdev);
554 mutex_unlock(&aspm_lock);
555out:
556 up_read(&pci_bus_sem);
557}
558
559/* @pdev: the endpoint device */
560void pcie_aspm_exit_link_state(struct pci_dev *pdev)
561{
562 struct pci_dev *parent = pdev->bus->self;
563 struct pcie_link_state *link_state = parent->link_state;
564
565 if (aspm_disabled || !pdev->is_pcie || !parent || !link_state)
566 return;
567 if (parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
568 parent->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
569 return;
570 down_read(&pci_bus_sem);
571 mutex_lock(&aspm_lock);
572
573 /*
574 * All PCIe functions are in one slot, remove one function will remove
575 * the the whole slot, so just wait
576 */
577 if (!list_empty(&parent->subordinate->devices))
578 goto out;
579
580 /* All functions are removed, so just disable ASPM for the link */
581 __pcie_aspm_config_one_dev(parent, 0);
582 list_del(&link_state->sibiling);
583 /* Clock PM is for endpoint device */
584
585 free_link_state(parent);
586out:
587 mutex_unlock(&aspm_lock);
588 up_read(&pci_bus_sem);
589}
590
591/* @pdev: the root port or switch downstream port */
592void pcie_aspm_pm_state_change(struct pci_dev *pdev)
593{
594 struct pcie_link_state *link_state = pdev->link_state;
595
596 if (aspm_disabled || !pdev->is_pcie || !pdev->link_state)
597 return;
598 if (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
599 pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
600 return;
601 /*
602 * devices changed PM state, we should recheck if latency meets all
603 * functions' requirement
604 */
605 pcie_aspm_configure_link_state(pdev, link_state->enabled_state);
606}
607
608/*
609 * pci_disable_link_state - disable pci device's link state, so the link will
610 * never enter specific states
611 */
612void pci_disable_link_state(struct pci_dev *pdev, int state)
613{
614 struct pci_dev *parent = pdev->bus->self;
615 struct pcie_link_state *link_state;
616
617 if (aspm_disabled || !pdev->is_pcie)
618 return;
619 if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
620 pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)
621 parent = pdev;
622 if (!parent)
623 return;
624
625 down_read(&pci_bus_sem);
626 mutex_lock(&aspm_lock);
627 link_state = parent->link_state;
628 link_state->support_state &=
629 ~(state & (PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1));
630 if (state & PCIE_LINK_STATE_CLKPM)
631 link_state->clk_pm_capable = 0;
632
633 __pcie_aspm_configure_link_state(parent, link_state->enabled_state);
634 if (!link_state->clk_pm_capable && link_state->clk_pm_enabled)
635 pcie_set_clock_pm(parent, 0);
636 mutex_unlock(&aspm_lock);
637 up_read(&pci_bus_sem);
638}
639EXPORT_SYMBOL(pci_disable_link_state);
640
641static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
642{
643 int i;
644 struct pci_dev *pdev;
645 struct pcie_link_state *link_state;
646
647 for (i = 0; i < ARRAY_SIZE(policy_str); i++)
648 if (!strncmp(val, policy_str[i], strlen(policy_str[i])))
649 break;
650 if (i >= ARRAY_SIZE(policy_str))
651 return -EINVAL;
652 if (i == aspm_policy)
653 return 0;
654
655 down_read(&pci_bus_sem);
656 mutex_lock(&aspm_lock);
657 aspm_policy = i;
658 list_for_each_entry(link_state, &link_list, sibiling) {
659 pdev = link_state->pdev;
660 __pcie_aspm_configure_link_state(pdev,
661 policy_to_aspm_state(pdev));
662 if (link_state->clk_pm_capable &&
663 link_state->clk_pm_enabled != policy_to_clkpm_state(pdev))
664 pcie_set_clock_pm(pdev, policy_to_clkpm_state(pdev));
665
666 }
667 mutex_unlock(&aspm_lock);
668 up_read(&pci_bus_sem);
669 return 0;
670}
671
672static int pcie_aspm_get_policy(char *buffer, struct kernel_param *kp)
673{
674 int i, cnt = 0;
675 for (i = 0; i < ARRAY_SIZE(policy_str); i++)
676 if (i == aspm_policy)
677 cnt += sprintf(buffer + cnt, "[%s] ", policy_str[i]);
678 else
679 cnt += sprintf(buffer + cnt, "%s ", policy_str[i]);
680 return cnt;
681}
682
683module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
684 NULL, 0644);
685
686#ifdef CONFIG_PCIEASPM_DEBUG
687static ssize_t link_state_show(struct device *dev,
688 struct device_attribute *attr,
689 char *buf)
690{
691 struct pci_dev *pci_device = to_pci_dev(dev);
692 struct pcie_link_state *link_state = pci_device->link_state;
693
694 return sprintf(buf, "%d\n", link_state->enabled_state);
695}
696
697static ssize_t link_state_store(struct device *dev,
698 struct device_attribute *attr,
699 const char *buf,
700 size_t n)
701{
702 struct pci_dev *pci_device = to_pci_dev(dev);
703 int state;
704
705 if (n < 1)
706 return -EINVAL;
707 state = buf[0]-'0';
708 if (state >= 0 && state <= 3) {
709 /* setup link aspm state */
710 pcie_aspm_configure_link_state(pci_device, state);
711 return n;
712 }
713
714 return -EINVAL;
715}
716
717static ssize_t clk_ctl_show(struct device *dev,
718 struct device_attribute *attr,
719 char *buf)
720{
721 struct pci_dev *pci_device = to_pci_dev(dev);
722 struct pcie_link_state *link_state = pci_device->link_state;
723
724 return sprintf(buf, "%d\n", link_state->clk_pm_enabled);
725}
726
727static ssize_t clk_ctl_store(struct device *dev,
728 struct device_attribute *attr,
729 const char *buf,
730 size_t n)
731{
732 struct pci_dev *pci_device = to_pci_dev(dev);
733 int state;
734
735 if (n < 1)
736 return -EINVAL;
737 state = buf[0]-'0';
738
739 down_read(&pci_bus_sem);
740 mutex_lock(&aspm_lock);
741 pcie_set_clock_pm(pci_device, !!state);
742 mutex_unlock(&aspm_lock);
743 up_read(&pci_bus_sem);
744
745 return n;
746}
747
748static DEVICE_ATTR(link_state, 0644, link_state_show, link_state_store);
749static DEVICE_ATTR(clk_ctl, 0644, clk_ctl_show, clk_ctl_store);
750
751static char power_group[] = "power";
752void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev)
753{
754 struct pcie_link_state *link_state = pdev->link_state;
755
756 if (!pdev->is_pcie || (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
757 pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
758 return;
759
760 if (link_state->support_state)
761 sysfs_add_file_to_group(&pdev->dev.kobj,
762 &dev_attr_link_state.attr, power_group);
763 if (link_state->clk_pm_capable)
764 sysfs_add_file_to_group(&pdev->dev.kobj,
765 &dev_attr_clk_ctl.attr, power_group);
766}
767
768void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
769{
770 struct pcie_link_state *link_state = pdev->link_state;
771
772 if (!pdev->is_pcie || (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
773 pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
774 return;
775
776 if (link_state->support_state)
777 sysfs_remove_file_from_group(&pdev->dev.kobj,
778 &dev_attr_link_state.attr, power_group);
779 if (link_state->clk_pm_capable)
780 sysfs_remove_file_from_group(&pdev->dev.kobj,
781 &dev_attr_clk_ctl.attr, power_group);
782}
783#endif
784
785static int __init pcie_aspm_disable(char *str)
786{
787 aspm_disabled = 1;
788 return 1;
789}
790
791__setup("pcie_noaspm", pcie_aspm_disable);
792
793static int __init pcie_aspm_init(void)
794{
795 if (aspm_disabled)
796 return 0;
797 pci_osc_support_set(OSC_ACTIVE_STATE_PWR_SUPPORT|
798 OSC_CLOCK_PWR_CAPABILITY_SUPPORT);
799 return 0;
800}
801
802fs_initcall(pcie_aspm_init);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 4262dfec5676..9b4673df27e7 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -9,6 +9,7 @@
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/cpumask.h> 11#include <linux/cpumask.h>
12#include <linux/aspm.h>
12#include "pci.h" 13#include "pci.h"
13 14
14#define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ 15#define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */
@@ -1010,6 +1011,10 @@ int pci_scan_slot(struct pci_bus *bus, int devfn)
1010 break; 1011 break;
1011 } 1012 }
1012 } 1013 }
1014
1015 if (bus->self)
1016 pcie_aspm_init_link_state(bus->self);
1017
1013 return nr; 1018 return nr;
1014} 1019}
1015 1020
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 430281b2e921..05c9ad2a7f8b 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -1,5 +1,6 @@
1#include <linux/pci.h> 1#include <linux/pci.h>
2#include <linux/module.h> 2#include <linux/module.h>
3#include <linux/aspm.h>
3#include "pci.h" 4#include "pci.h"
4 5
5static void pci_free_resources(struct pci_dev *dev) 6static void pci_free_resources(struct pci_dev *dev)
@@ -30,6 +31,9 @@ static void pci_stop_dev(struct pci_dev *dev)
30 dev->global_list.next = dev->global_list.prev = NULL; 31 dev->global_list.next = dev->global_list.prev = NULL;
31 up_write(&pci_bus_sem); 32 up_write(&pci_bus_sem);
32 } 33 }
34
35 if (dev->bus->self)
36 pcie_aspm_exit_link_state(dev);
33} 37}
34 38
35static void pci_destroy_dev(struct pci_dev *dev) 39static void pci_destroy_dev(struct pci_dev *dev)
diff --git a/include/linux/aspm.h b/include/linux/aspm.h
new file mode 100644
index 000000000000..f41a69895485
--- /dev/null
+++ b/include/linux/aspm.h
@@ -0,0 +1,44 @@
1/*
2 * aspm.h
3 *
4 * PCI Express ASPM defines and function prototypes
5 *
6 * Copyright (C) 2007 Intel Corp.
7 * Zhang Yanmin (yanmin.zhang@intel.com)
8 * Shaohua Li (shaohua.li@intel.com)
9 *
10 * For more information, please consult the following manuals (look at
11 * http://www.pcisig.com/ for how to get them):
12 *
13 * PCI Express Specification
14 */
15
16#ifndef LINUX_ASPM_H
17#define LINUX_ASPM_H
18
19#include <linux/pci.h>
20
21#define PCIE_LINK_STATE_L0S 1
22#define PCIE_LINK_STATE_L1 2
23#define PCIE_LINK_STATE_CLKPM 4
24
25#ifdef CONFIG_PCIEASPM
26extern void pcie_aspm_init_link_state(struct pci_dev *pdev);
27extern void pcie_aspm_exit_link_state(struct pci_dev *pdev);
28extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
29extern void pci_disable_link_state(struct pci_dev *pdev, int state);
30#else
31#define pcie_aspm_init_link_state(pdev) do {} while (0)
32#define pcie_aspm_exit_link_state(pdev) do {} while (0)
33#define pcie_aspm_pm_state_change(pdev) do {} while (0)
34#define pci_disable_link_state(pdev, state) do {} while (0)
35#endif
36
37#ifdef CONFIG_PCIEASPM_DEBUG /* this depends on CONFIG_PCIEASPM */
38extern void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev);
39extern void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev);
40#else
41#define pcie_aspm_create_sysfs_dev_files(pdev) do {} while (0)
42#define pcie_aspm_remove_sysfs_dev_files(pdev) do {} while (0)
43#endif
44#endif /* LINUX_ASPM_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 13813b0592fc..163b45241729 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -128,6 +128,7 @@ struct pci_cap_saved_state {
128 u32 data[0]; 128 u32 data[0];
129}; 129};
130 130
131struct pcie_link_state;
131/* 132/*
132 * The pci_dev structure is used to describe PCI devices. 133 * The pci_dev structure is used to describe PCI devices.
133 */ 134 */
@@ -163,6 +164,10 @@ struct pci_dev {
163 this is D0-D3, D0 being fully functional, 164 this is D0-D3, D0 being fully functional,
164 and D3 being off. */ 165 and D3 being off. */
165 166
167#ifdef CONFIG_PCIEASPM
168 struct pcie_link_state *link_state; /* ASPM link state. */
169#endif
170
166 pci_channel_state_t error_state; /* current connectivity state */ 171 pci_channel_state_t error_state; /* current connectivity state */
167 struct device dev; /* Generic device interface */ 172 struct device dev; /* Generic device interface */
168 173
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index c1914a8b94a9..c0c1223c9194 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -395,9 +395,17 @@
395#define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */ 395#define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */
396#define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ 396#define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */
397#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ 397#define PCI_EXP_LNKCAP 12 /* Link Capabilities */
398#define PCI_EXP_LNKCAP_ASPMS 0xc00 /* ASPM Support */
399#define PCI_EXP_LNKCAP_L0SEL 0x7000 /* L0s Exit Latency */
400#define PCI_EXP_LNKCAP_L1EL 0x38000 /* L1 Exit Latency */
401#define PCI_EXP_LNKCAP_CLKPM 0x40000 /* L1 Clock Power Management */
398#define PCI_EXP_LNKCTL 16 /* Link Control */ 402#define PCI_EXP_LNKCTL 16 /* Link Control */
403#define PCI_EXP_LNKCTL_RL 0x20 /* Retrain Link */
404#define PCI_EXP_LNKCTL_CCC 0x40 /* Common Clock COnfiguration */
399#define PCI_EXP_LNKCTL_CLKREQ_EN 0x100 /* Enable clkreq */ 405#define PCI_EXP_LNKCTL_CLKREQ_EN 0x100 /* Enable clkreq */
400#define PCI_EXP_LNKSTA 18 /* Link Status */ 406#define PCI_EXP_LNKSTA 18 /* Link Status */
407#define PCI_EXP_LNKSTA_LT 0x800 /* Link Training */
408#define PCI_EXP_LNKSTA_SLC 0x1000 /* Slot Clock Configuration */
401#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ 409#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */
402#define PCI_EXP_SLTCTL 24 /* Slot Control */ 410#define PCI_EXP_SLTCTL 24 /* Slot Control */
403#define PCI_EXP_SLTSTA 26 /* Slot Status */ 411#define PCI_EXP_SLTSTA 26 /* Slot Status */