aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShaohua Li <shaohua.li@intel.com>2008-02-24 20:46:41 -0500
committerGreg Kroah-Hartman <gregkh@suse.de>2008-04-21 00:47:03 -0400
commit7d715a6c1ae5785d00fb9a876b5abdfc43abc44b (patch)
tree58ec6d1969739a590e0c6c976bfebf04c8e9f31e
parent657472e9ccd9fccb82b775eb691c4b25b27451da (diff)
PCI: add PCI Express ASPM support
PCI Express ASPM defines a protocol for PCI Express components in the D0 state to reduce Link power by placing their Links into a low power state and instructing the other end of the Link to do likewise. This capability allows hardware-autonomous, dynamic Link power reduction beyond what is achievable by software-only controlled power management. However, The device should be configured by software appropriately. Enabling ASPM will save power, but will introduce device latency. This patch adds ASPM support in Linux. It introduces a global policy for ASPM, a sysfs file /sys/module/pcie_aspm/parameters/policy can control it. The interface can be used as a boot option too. Currently we have below setting: -default, BIOS default setting -powersave, highest power saving mode, enable all available ASPM state and clock power management -performance, highest performance, disable ASPM and clock power management By default, the 'default' policy is used currently. In my test, power difference between powersave mode and performance mode is about 1.3w in a system with 3 PCIE links. Note: some devices might not work well with aspm, either because chipset issue or device issue. The patch provide API (pci_disable_link_state), driver can disable ASPM for specific device. Signed-off-by: Shaohua Li <shaohua.li@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r--drivers/pci/pci-sysfs.c5
-rw-r--r--drivers/pci/pci.c4
-rw-r--r--drivers/pci/pcie/Kconfig20
-rw-r--r--drivers/pci/pcie/Makefile3
-rw-r--r--drivers/pci/pcie/aspm.c811
-rw-r--r--drivers/pci/probe.c5
-rw-r--r--drivers/pci/remove.c4
-rw-r--r--include/linux/pci-aspm.h56
-rw-r--r--include/linux/pci.h5
-rw-r--r--include/linux/pci_regs.h8
10 files changed, 921 insertions, 0 deletions
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 8dcf1458aa2f..f5b0b622c189 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -21,6 +21,7 @@
21#include <linux/topology.h> 21#include <linux/topology.h>
22#include <linux/mm.h> 22#include <linux/mm.h>
23#include <linux/capability.h> 23#include <linux/capability.h>
24#include <linux/pci-aspm.h>
24#include "pci.h" 25#include "pci.h"
25 26
26static int sysfs_initialized; /* = 0 */ 27static int sysfs_initialized; /* = 0 */
@@ -650,6 +651,8 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
650 if (pcibios_add_platform_entries(pdev)) 651 if (pcibios_add_platform_entries(pdev))
651 goto err_rom_file; 652 goto err_rom_file;
652 653
654 pcie_aspm_create_sysfs_dev_files(pdev);
655
653 return 0; 656 return 0;
654 657
655err_rom_file: 658err_rom_file:
@@ -679,6 +682,8 @@ void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
679 if (!sysfs_initialized) 682 if (!sysfs_initialized)
680 return; 683 return;
681 684
685 pcie_aspm_remove_sysfs_dev_files(pdev);
686
682 if (pdev->cfg_size < 4096) 687 if (pdev->cfg_size < 4096)
683 sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr); 688 sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
684 else 689 else
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index a4445b7210bf..f331feb4eb8d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -18,6 +18,7 @@
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/log2.h> 20#include <linux/log2.h>
21#include <linux/pci-aspm.h>
21#include <asm/dma.h> /* isa_dma_bridge_buggy */ 22#include <asm/dma.h> /* isa_dma_bridge_buggy */
22#include "pci.h" 23#include "pci.h"
23 24
@@ -501,6 +502,9 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state)
501 if (need_restore) 502 if (need_restore)
502 pci_restore_bars(dev); 503 pci_restore_bars(dev);
503 504
505 if (dev->bus->self)
506 pcie_aspm_pm_state_change(dev->bus->self);
507
504 return 0; 508 return 0;
505} 509}
506 510
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 287a9311716c..25b04fb2517d 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -26,3 +26,23 @@ config HOTPLUG_PCI_PCIE
26 When in doubt, say N. 26 When in doubt, say N.
27 27
28source "drivers/pci/pcie/aer/Kconfig" 28source "drivers/pci/pcie/aer/Kconfig"
29
30#
31# PCI Express ASPM
32#
33config PCIEASPM
34 bool "PCI Express ASPM support(Experimental)"
35 depends on PCI && EXPERIMENTAL && PCIEPORTBUS
36 default y
37 help
38 This enables PCI Express ASPM (Active State Power Management) and
39 Clock Power Management. ASPM supports state L0/L0s/L1.
40
41 When in doubt, say N.
42config PCIEASPM_DEBUG
43 bool "Debug PCI Express ASPM"
44 depends on PCIEASPM
45 default n
46 help
47 This enables PCI Express ASPM debug support. It will add per-device
48 interface to control ASPM.
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index e00fb99acf44..11f6bb1eae24 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -2,6 +2,9 @@
2# Makefile for PCI-Express PORT Driver 2# Makefile for PCI-Express PORT Driver
3# 3#
4 4
5# Build PCI Express ASPM if needed
6obj-$(CONFIG_PCIEASPM) += aspm.o
7
5pcieportdrv-y := portdrv_core.o portdrv_pci.o portdrv_bus.o 8pcieportdrv-y := portdrv_core.o portdrv_pci.o portdrv_bus.o
6 9
7obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o 10obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
new file mode 100644
index 000000000000..61fedb2448b6
--- /dev/null
+++ b/drivers/pci/pcie/aspm.c
@@ -0,0 +1,811 @@
1/*
2 * File: drivers/pci/pcie/aspm.c
3 * Enabling PCIE link L0s/L1 state and Clock Power Management
4 *
5 * Copyright (C) 2007 Intel
6 * Copyright (C) Zhang Yanmin (yanmin.zhang@intel.com)
7 * Copyright (C) Shaohua Li (shaohua.li@intel.com)
8 */
9
10#include <linux/kernel.h>
11#include <linux/module.h>
12#include <linux/moduleparam.h>
13#include <linux/pci.h>
14#include <linux/pci_regs.h>
15#include <linux/errno.h>
16#include <linux/pm.h>
17#include <linux/init.h>
18#include <linux/slab.h>
19#include <linux/pci-aspm.h>
20#include "../pci.h"
21
22#ifdef MODULE_PARAM_PREFIX
23#undef MODULE_PARAM_PREFIX
24#endif
25#define MODULE_PARAM_PREFIX "pcie_aspm."
26
27struct endpoint_state {
28 unsigned int l0s_acceptable_latency;
29 unsigned int l1_acceptable_latency;
30};
31
32struct pcie_link_state {
33 struct list_head sibiling;
34 struct pci_dev *pdev;
35
36 /* ASPM state */
37 unsigned int support_state;
38 unsigned int enabled_state;
39 unsigned int bios_aspm_state;
40 /* upstream component */
41 unsigned int l0s_upper_latency;
42 unsigned int l1_upper_latency;
43 /* downstream component */
44 unsigned int l0s_down_latency;
45 unsigned int l1_down_latency;
46 /* Clock PM state*/
47 unsigned int clk_pm_capable;
48 unsigned int clk_pm_enabled;
49 unsigned int bios_clk_state;
50
51 /*
52 * A pcie downstream port only has one slot under it, so at most there
53 * are 8 functions
54 */
55 struct endpoint_state endpoints[8];
56};
57
58static int aspm_disabled;
59static DEFINE_MUTEX(aspm_lock);
60static LIST_HEAD(link_list);
61
62#define POLICY_DEFAULT 0 /* BIOS default setting */
63#define POLICY_PERFORMANCE 1 /* high performance */
64#define POLICY_POWERSAVE 2 /* high power saving */
65static int aspm_policy;
66static const char *policy_str[] = {
67 [POLICY_DEFAULT] = "default",
68 [POLICY_PERFORMANCE] = "performance",
69 [POLICY_POWERSAVE] = "powersave"
70};
71
72static int policy_to_aspm_state(struct pci_dev *pdev)
73{
74 struct pcie_link_state *link_state = pdev->link_state;
75
76 switch (aspm_policy) {
77 case POLICY_PERFORMANCE:
78 /* Disable ASPM and Clock PM */
79 return 0;
80 case POLICY_POWERSAVE:
81 /* Enable ASPM L0s/L1 */
82 return PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1;
83 case POLICY_DEFAULT:
84 return link_state->bios_aspm_state;
85 }
86 return 0;
87}
88
89static int policy_to_clkpm_state(struct pci_dev *pdev)
90{
91 struct pcie_link_state *link_state = pdev->link_state;
92
93 switch (aspm_policy) {
94 case POLICY_PERFORMANCE:
95 /* Disable ASPM and Clock PM */
96 return 0;
97 case POLICY_POWERSAVE:
98 /* Disable Clock PM */
99 return 1;
100 case POLICY_DEFAULT:
101 return link_state->bios_clk_state;
102 }
103 return 0;
104}
105
106static void pcie_set_clock_pm(struct pci_dev *pdev, int enable)
107{
108 struct pci_dev *child_dev;
109 int pos;
110 u16 reg16;
111 struct pcie_link_state *link_state = pdev->link_state;
112
113 list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
114 pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
115 if (!pos)
116 return;
117 pci_read_config_word(child_dev, pos + PCI_EXP_LNKCTL, &reg16);
118 if (enable)
119 reg16 |= PCI_EXP_LNKCTL_CLKREQ_EN;
120 else
121 reg16 &= ~PCI_EXP_LNKCTL_CLKREQ_EN;
122 pci_write_config_word(child_dev, pos + PCI_EXP_LNKCTL, reg16);
123 }
124 link_state->clk_pm_enabled = !!enable;
125}
126
127static void pcie_check_clock_pm(struct pci_dev *pdev)
128{
129 int pos;
130 u32 reg32;
131 u16 reg16;
132 int capable = 1, enabled = 1;
133 struct pci_dev *child_dev;
134 struct pcie_link_state *link_state = pdev->link_state;
135
136 /* All functions should have the same cap and state, take the worst */
137 list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
138 pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
139 if (!pos)
140 return;
141 pci_read_config_dword(child_dev, pos + PCI_EXP_LNKCAP, &reg32);
142 if (!(reg32 & PCI_EXP_LNKCAP_CLKPM)) {
143 capable = 0;
144 enabled = 0;
145 break;
146 }
147 pci_read_config_word(child_dev, pos + PCI_EXP_LNKCTL, &reg16);
148 if (!(reg16 & PCI_EXP_LNKCTL_CLKREQ_EN))
149 enabled = 0;
150 }
151 link_state->clk_pm_capable = capable;
152 link_state->clk_pm_enabled = enabled;
153 link_state->bios_clk_state = enabled;
154 pcie_set_clock_pm(pdev, policy_to_clkpm_state(pdev));
155}
156
157/*
158 * pcie_aspm_configure_common_clock: check if the 2 ends of a link
159 * could use common clock. If they are, configure them to use the
160 * common clock. That will reduce the ASPM state exit latency.
161 */
162static void pcie_aspm_configure_common_clock(struct pci_dev *pdev)
163{
164 int pos, child_pos;
165 u16 reg16 = 0;
166 struct pci_dev *child_dev;
167 int same_clock = 1;
168
169 /*
170 * all functions of a slot should have the same Slot Clock
171 * Configuration, so just check one function
172 * */
173 child_dev = list_entry(pdev->subordinate->devices.next, struct pci_dev,
174 bus_list);
175 BUG_ON(!child_dev->is_pcie);
176
177 /* Check downstream component if bit Slot Clock Configuration is 1 */
178 child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
179 pci_read_config_word(child_dev, child_pos + PCI_EXP_LNKSTA, &reg16);
180 if (!(reg16 & PCI_EXP_LNKSTA_SLC))
181 same_clock = 0;
182
183 /* Check upstream component if bit Slot Clock Configuration is 1 */
184 pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
185 pci_read_config_word(pdev, pos + PCI_EXP_LNKSTA, &reg16);
186 if (!(reg16 & PCI_EXP_LNKSTA_SLC))
187 same_clock = 0;
188
189 /* Configure downstream component, all functions */
190 list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
191 child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
192 pci_read_config_word(child_dev, child_pos + PCI_EXP_LNKCTL,
193 &reg16);
194 if (same_clock)
195 reg16 |= PCI_EXP_LNKCTL_CCC;
196 else
197 reg16 &= ~PCI_EXP_LNKCTL_CCC;
198 pci_write_config_word(child_dev, child_pos + PCI_EXP_LNKCTL,
199 reg16);
200 }
201
202 /* Configure upstream component */
203 pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
204 if (same_clock)
205 reg16 |= PCI_EXP_LNKCTL_CCC;
206 else
207 reg16 &= ~PCI_EXP_LNKCTL_CCC;
208 pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, reg16);
209
210 /* retrain link */
211 reg16 |= PCI_EXP_LNKCTL_RL;
212 pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, reg16);
213
214 /* Wait for link training end */
215 while (1) {
216 pci_read_config_word(pdev, pos + PCI_EXP_LNKSTA, &reg16);
217 if (!(reg16 & PCI_EXP_LNKSTA_LT))
218 break;
219 cpu_relax();
220 }
221}
222
223/*
224 * calc_L0S_latency: Convert L0s latency encoding to ns
225 */
226static unsigned int calc_L0S_latency(unsigned int latency_encoding, int ac)
227{
228 unsigned int ns = 64;
229
230 if (latency_encoding == 0x7) {
231 if (ac)
232 ns = -1U;
233 else
234 ns = 5*1000; /* > 4us */
235 } else
236 ns *= (1 << latency_encoding);
237 return ns;
238}
239
240/*
241 * calc_L1_latency: Convert L1 latency encoding to ns
242 */
243static unsigned int calc_L1_latency(unsigned int latency_encoding, int ac)
244{
245 unsigned int ns = 1000;
246
247 if (latency_encoding == 0x7) {
248 if (ac)
249 ns = -1U;
250 else
251 ns = 65*1000; /* > 64us */
252 } else
253 ns *= (1 << latency_encoding);
254 return ns;
255}
256
257static void pcie_aspm_get_cap_device(struct pci_dev *pdev, u32 *state,
258 unsigned int *l0s, unsigned int *l1, unsigned int *enabled)
259{
260 int pos;
261 u16 reg16;
262 u32 reg32;
263 unsigned int latency;
264
265 pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
266 pci_read_config_dword(pdev, pos + PCI_EXP_LNKCAP, &reg32);
267 *state = (reg32 & PCI_EXP_LNKCAP_ASPMS) >> 10;
268 if (*state != PCIE_LINK_STATE_L0S &&
269 *state != (PCIE_LINK_STATE_L1|PCIE_LINK_STATE_L0S))
270 *state = 0;
271 if (*state == 0)
272 return;
273
274 latency = (reg32 & PCI_EXP_LNKCAP_L0SEL) >> 12;
275 *l0s = calc_L0S_latency(latency, 0);
276 if (*state & PCIE_LINK_STATE_L1) {
277 latency = (reg32 & PCI_EXP_LNKCAP_L1EL) >> 15;
278 *l1 = calc_L1_latency(latency, 0);
279 }
280 pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
281 *enabled = reg16 & (PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1);
282}
283
284static void pcie_aspm_cap_init(struct pci_dev *pdev)
285{
286 struct pci_dev *child_dev;
287 u32 state, tmp;
288 struct pcie_link_state *link_state = pdev->link_state;
289
290 /* upstream component states */
291 pcie_aspm_get_cap_device(pdev, &link_state->support_state,
292 &link_state->l0s_upper_latency,
293 &link_state->l1_upper_latency,
294 &link_state->enabled_state);
295 /* downstream component states, all functions have the same setting */
296 child_dev = list_entry(pdev->subordinate->devices.next, struct pci_dev,
297 bus_list);
298 pcie_aspm_get_cap_device(child_dev, &state,
299 &link_state->l0s_down_latency,
300 &link_state->l1_down_latency,
301 &tmp);
302 link_state->support_state &= state;
303 if (!link_state->support_state)
304 return;
305 link_state->enabled_state &= link_state->support_state;
306 link_state->bios_aspm_state = link_state->enabled_state;
307
308 /* ENDPOINT states*/
309 list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
310 int pos;
311 u32 reg32;
312 unsigned int latency;
313 struct endpoint_state *ep_state =
314 &link_state->endpoints[PCI_FUNC(child_dev->devfn)];
315
316 if (child_dev->pcie_type != PCI_EXP_TYPE_ENDPOINT &&
317 child_dev->pcie_type != PCI_EXP_TYPE_LEG_END)
318 continue;
319
320 pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
321 pci_read_config_dword(child_dev, pos + PCI_EXP_DEVCAP, &reg32);
322 latency = (reg32 & PCI_EXP_DEVCAP_L0S) >> 6;
323 latency = calc_L0S_latency(latency, 1);
324 ep_state->l0s_acceptable_latency = latency;
325 if (link_state->support_state & PCIE_LINK_STATE_L1) {
326 latency = (reg32 & PCI_EXP_DEVCAP_L1) >> 9;
327 latency = calc_L1_latency(latency, 1);
328 ep_state->l1_acceptable_latency = latency;
329 }
330 }
331}
332
333static unsigned int __pcie_aspm_check_state_one(struct pci_dev *pdev,
334 unsigned int state)
335{
336 struct pci_dev *parent_dev, *tmp_dev;
337 unsigned int latency, l1_latency = 0;
338 struct pcie_link_state *link_state;
339 struct endpoint_state *ep_state;
340
341 parent_dev = pdev->bus->self;
342 link_state = parent_dev->link_state;
343 state &= link_state->support_state;
344 if (state == 0)
345 return 0;
346 ep_state = &link_state->endpoints[PCI_FUNC(pdev->devfn)];
347
348 /*
349 * Check latency for endpoint device.
350 * TBD: The latency from the endpoint to root complex vary per
351 * switch's upstream link state above the device. Here we just do a
352 * simple check which assumes all links above the device can be in L1
353 * state, that is we just consider the worst case. If switch's upstream
354 * link can't be put into L0S/L1, then our check is too strictly.
355 */
356 tmp_dev = pdev;
357 while (state & (PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)) {
358 parent_dev = tmp_dev->bus->self;
359 link_state = parent_dev->link_state;
360 if (state & PCIE_LINK_STATE_L0S) {
361 latency = max_t(unsigned int,
362 link_state->l0s_upper_latency,
363 link_state->l0s_down_latency);
364 if (latency > ep_state->l0s_acceptable_latency)
365 state &= ~PCIE_LINK_STATE_L0S;
366 }
367 if (state & PCIE_LINK_STATE_L1) {
368 latency = max_t(unsigned int,
369 link_state->l1_upper_latency,
370 link_state->l1_down_latency);
371 if (latency + l1_latency >
372 ep_state->l1_acceptable_latency)
373 state &= ~PCIE_LINK_STATE_L1;
374 }
375 if (!parent_dev->bus->self) /* parent_dev is a root port */
376 break;
377 else {
378 /*
379 * parent_dev is the downstream port of a switch, make
380 * tmp_dev the upstream port of the switch
381 */
382 tmp_dev = parent_dev->bus->self;
383 /*
384 * every switch on the path to root complex need 1 more
385 * microsecond for L1. Spec doesn't mention L0S.
386 */
387 if (state & PCIE_LINK_STATE_L1)
388 l1_latency += 1000;
389 }
390 }
391 return state;
392}
393
394static unsigned int pcie_aspm_check_state(struct pci_dev *pdev,
395 unsigned int state)
396{
397 struct pci_dev *child_dev;
398
399 /* If no child, disable the link */
400 if (list_empty(&pdev->subordinate->devices))
401 return 0;
402 list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
403 if (child_dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) {
404 /*
405 * If downstream component of a link is pci bridge, we
406 * disable ASPM for now for the link
407 * */
408 state = 0;
409 break;
410 }
411 if ((child_dev->pcie_type != PCI_EXP_TYPE_ENDPOINT &&
412 child_dev->pcie_type != PCI_EXP_TYPE_LEG_END))
413 continue;
414 /* Device not in D0 doesn't need check latency */
415 if (child_dev->current_state == PCI_D1 ||
416 child_dev->current_state == PCI_D2 ||
417 child_dev->current_state == PCI_D3hot ||
418 child_dev->current_state == PCI_D3cold)
419 continue;
420 state = __pcie_aspm_check_state_one(child_dev, state);
421 }
422 return state;
423}
424
425static void __pcie_aspm_config_one_dev(struct pci_dev *pdev, unsigned int state)
426{
427 u16 reg16;
428 int pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
429
430 pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
431 reg16 &= ~0x3;
432 reg16 |= state;
433 pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, reg16);
434}
435
436static void __pcie_aspm_config_link(struct pci_dev *pdev, unsigned int state)
437{
438 struct pci_dev *child_dev;
439 int valid = 1;
440 struct pcie_link_state *link_state = pdev->link_state;
441
442 /*
443 * if the downstream component has pci bridge function, don't do ASPM
444 * now
445 */
446 list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
447 if (child_dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) {
448 valid = 0;
449 break;
450 }
451 }
452 if (!valid)
453 return;
454
455 /*
456 * spec 2.0 suggests all functions should be configured the same
457 * setting for ASPM. Enabling ASPM L1 should be done in upstream
458 * component first and then downstream, and vice versa for disabling
459 * ASPM L1. Spec doesn't mention L0S.
460 */
461 if (state & PCIE_LINK_STATE_L1)
462 __pcie_aspm_config_one_dev(pdev, state);
463
464 list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list)
465 __pcie_aspm_config_one_dev(child_dev, state);
466
467 if (!(state & PCIE_LINK_STATE_L1))
468 __pcie_aspm_config_one_dev(pdev, state);
469
470 link_state->enabled_state = state;
471}
472
473static void __pcie_aspm_configure_link_state(struct pci_dev *pdev,
474 unsigned int state)
475{
476 struct pcie_link_state *link_state = pdev->link_state;
477
478 if (link_state->support_state == 0)
479 return;
480 state &= PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1;
481
482 /* state 0 means disabling aspm */
483 state = pcie_aspm_check_state(pdev, state);
484 if (link_state->enabled_state == state)
485 return;
486 __pcie_aspm_config_link(pdev, state);
487}
488
489/*
490 * pcie_aspm_configure_link_state: enable/disable PCI express link state
491 * @pdev: the root port or switch downstream port
492 */
493static void pcie_aspm_configure_link_state(struct pci_dev *pdev,
494 unsigned int state)
495{
496 down_read(&pci_bus_sem);
497 mutex_lock(&aspm_lock);
498 __pcie_aspm_configure_link_state(pdev, state);
499 mutex_unlock(&aspm_lock);
500 up_read(&pci_bus_sem);
501}
502
503static void free_link_state(struct pci_dev *pdev)
504{
505 kfree(pdev->link_state);
506 pdev->link_state = NULL;
507}
508
509/*
510 * pcie_aspm_init_link_state: Initiate PCI express link state.
511 * It is called after the pcie and its children devices are scaned.
512 * @pdev: the root port or switch downstream port
513 */
514void pcie_aspm_init_link_state(struct pci_dev *pdev)
515{
516 unsigned int state;
517 struct pcie_link_state *link_state;
518 int error = 0;
519
520 if (aspm_disabled || !pdev->is_pcie || pdev->link_state)
521 return;
522 if (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
523 pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
524 return;
525 down_read(&pci_bus_sem);
526 if (list_empty(&pdev->subordinate->devices))
527 goto out;
528
529 mutex_lock(&aspm_lock);
530
531 link_state = kzalloc(sizeof(*link_state), GFP_KERNEL);
532 if (!link_state)
533 goto unlock_out;
534 pdev->link_state = link_state;
535
536 pcie_aspm_configure_common_clock(pdev);
537
538 pcie_aspm_cap_init(pdev);
539
540 /* config link state to avoid BIOS error */
541 state = pcie_aspm_check_state(pdev, policy_to_aspm_state(pdev));
542 __pcie_aspm_config_link(pdev, state);
543
544 pcie_check_clock_pm(pdev);
545
546 link_state->pdev = pdev;
547 list_add(&link_state->sibiling, &link_list);
548
549unlock_out:
550 if (error)
551 free_link_state(pdev);
552 mutex_unlock(&aspm_lock);
553out:
554 up_read(&pci_bus_sem);
555}
556
557/* @pdev: the endpoint device */
558void pcie_aspm_exit_link_state(struct pci_dev *pdev)
559{
560 struct pci_dev *parent = pdev->bus->self;
561 struct pcie_link_state *link_state = parent->link_state;
562
563 if (aspm_disabled || !pdev->is_pcie || !parent || !link_state)
564 return;
565 if (parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
566 parent->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
567 return;
568 down_read(&pci_bus_sem);
569 mutex_lock(&aspm_lock);
570
571 /*
572 * All PCIe functions are in one slot, remove one function will remove
573 * the the whole slot, so just wait
574 */
575 if (!list_empty(&parent->subordinate->devices))
576 goto out;
577
578 /* All functions are removed, so just disable ASPM for the link */
579 __pcie_aspm_config_one_dev(parent, 0);
580 list_del(&link_state->sibiling);
581 /* Clock PM is for endpoint device */
582
583 free_link_state(parent);
584out:
585 mutex_unlock(&aspm_lock);
586 up_read(&pci_bus_sem);
587}
588
589/* @pdev: the root port or switch downstream port */
590void pcie_aspm_pm_state_change(struct pci_dev *pdev)
591{
592 struct pcie_link_state *link_state = pdev->link_state;
593
594 if (aspm_disabled || !pdev->is_pcie || !pdev->link_state)
595 return;
596 if (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
597 pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
598 return;
599 /*
600 * devices changed PM state, we should recheck if latency meets all
601 * functions' requirement
602 */
603 pcie_aspm_configure_link_state(pdev, link_state->enabled_state);
604}
605
606/*
607 * pci_disable_link_state - disable pci device's link state, so the link will
608 * never enter specific states
609 */
610void pci_disable_link_state(struct pci_dev *pdev, int state)
611{
612 struct pci_dev *parent = pdev->bus->self;
613 struct pcie_link_state *link_state;
614
615 if (aspm_disabled || !pdev->is_pcie)
616 return;
617 if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
618 pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)
619 parent = pdev;
620 if (!parent || !parent->link_state)
621 return;
622
623 down_read(&pci_bus_sem);
624 mutex_lock(&aspm_lock);
625 link_state = parent->link_state;
626 link_state->support_state &=
627 ~(state & (PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1));
628 if (state & PCIE_LINK_STATE_CLKPM)
629 link_state->clk_pm_capable = 0;
630
631 __pcie_aspm_configure_link_state(parent, link_state->enabled_state);
632 if (!link_state->clk_pm_capable && link_state->clk_pm_enabled)
633 pcie_set_clock_pm(parent, 0);
634 mutex_unlock(&aspm_lock);
635 up_read(&pci_bus_sem);
636}
637EXPORT_SYMBOL(pci_disable_link_state);
638
639static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
640{
641 int i;
642 struct pci_dev *pdev;
643 struct pcie_link_state *link_state;
644
645 for (i = 0; i < ARRAY_SIZE(policy_str); i++)
646 if (!strncmp(val, policy_str[i], strlen(policy_str[i])))
647 break;
648 if (i >= ARRAY_SIZE(policy_str))
649 return -EINVAL;
650 if (i == aspm_policy)
651 return 0;
652
653 down_read(&pci_bus_sem);
654 mutex_lock(&aspm_lock);
655 aspm_policy = i;
656 list_for_each_entry(link_state, &link_list, sibiling) {
657 pdev = link_state->pdev;
658 __pcie_aspm_configure_link_state(pdev,
659 policy_to_aspm_state(pdev));
660 if (link_state->clk_pm_capable &&
661 link_state->clk_pm_enabled != policy_to_clkpm_state(pdev))
662 pcie_set_clock_pm(pdev, policy_to_clkpm_state(pdev));
663
664 }
665 mutex_unlock(&aspm_lock);
666 up_read(&pci_bus_sem);
667 return 0;
668}
669
670static int pcie_aspm_get_policy(char *buffer, struct kernel_param *kp)
671{
672 int i, cnt = 0;
673 for (i = 0; i < ARRAY_SIZE(policy_str); i++)
674 if (i == aspm_policy)
675 cnt += sprintf(buffer + cnt, "[%s] ", policy_str[i]);
676 else
677 cnt += sprintf(buffer + cnt, "%s ", policy_str[i]);
678 return cnt;
679}
680
681module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
682 NULL, 0644);
683
684#ifdef CONFIG_PCIEASPM_DEBUG
685static ssize_t link_state_show(struct device *dev,
686 struct device_attribute *attr,
687 char *buf)
688{
689 struct pci_dev *pci_device = to_pci_dev(dev);
690 struct pcie_link_state *link_state = pci_device->link_state;
691
692 return sprintf(buf, "%d\n", link_state->enabled_state);
693}
694
695static ssize_t link_state_store(struct device *dev,
696 struct device_attribute *attr,
697 const char *buf,
698 size_t n)
699{
700 struct pci_dev *pci_device = to_pci_dev(dev);
701 int state;
702
703 if (n < 1)
704 return -EINVAL;
705 state = buf[0]-'0';
706 if (state >= 0 && state <= 3) {
707 /* setup link aspm state */
708 pcie_aspm_configure_link_state(pci_device, state);
709 return n;
710 }
711
712 return -EINVAL;
713}
714
715static ssize_t clk_ctl_show(struct device *dev,
716 struct device_attribute *attr,
717 char *buf)
718{
719 struct pci_dev *pci_device = to_pci_dev(dev);
720 struct pcie_link_state *link_state = pci_device->link_state;
721
722 return sprintf(buf, "%d\n", link_state->clk_pm_enabled);
723}
724
725static ssize_t clk_ctl_store(struct device *dev,
726 struct device_attribute *attr,
727 const char *buf,
728 size_t n)
729{
730 struct pci_dev *pci_device = to_pci_dev(dev);
731 int state;
732
733 if (n < 1)
734 return -EINVAL;
735 state = buf[0]-'0';
736
737 down_read(&pci_bus_sem);
738 mutex_lock(&aspm_lock);
739 pcie_set_clock_pm(pci_device, !!state);
740 mutex_unlock(&aspm_lock);
741 up_read(&pci_bus_sem);
742
743 return n;
744}
745
746static DEVICE_ATTR(link_state, 0644, link_state_show, link_state_store);
747static DEVICE_ATTR(clk_ctl, 0644, clk_ctl_show, clk_ctl_store);
748
749static char power_group[] = "power";
750void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev)
751{
752 struct pcie_link_state *link_state = pdev->link_state;
753
754 if (!pdev->is_pcie || (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
755 pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM) || !link_state)
756 return;
757
758 if (link_state->support_state)
759 sysfs_add_file_to_group(&pdev->dev.kobj,
760 &dev_attr_link_state.attr, power_group);
761 if (link_state->clk_pm_capable)
762 sysfs_add_file_to_group(&pdev->dev.kobj,
763 &dev_attr_clk_ctl.attr, power_group);
764}
765
766void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
767{
768 struct pcie_link_state *link_state = pdev->link_state;
769
770 if (!pdev->is_pcie || (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
771 pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM) || !link_state)
772 return;
773
774 if (link_state->support_state)
775 sysfs_remove_file_from_group(&pdev->dev.kobj,
776 &dev_attr_link_state.attr, power_group);
777 if (link_state->clk_pm_capable)
778 sysfs_remove_file_from_group(&pdev->dev.kobj,
779 &dev_attr_clk_ctl.attr, power_group);
780}
781#endif
782
783static int __init pcie_aspm_disable(char *str)
784{
785 aspm_disabled = 1;
786 return 1;
787}
788
789__setup("pcie_noaspm", pcie_aspm_disable);
790
791#ifdef CONFIG_ACPI
792#include <acpi/acpi_bus.h>
793#include <linux/pci-acpi.h>
794static void pcie_aspm_platform_init(void)
795{
796 pcie_osc_support_set(OSC_ACTIVE_STATE_PWR_SUPPORT|
797 OSC_CLOCK_PWR_CAPABILITY_SUPPORT);
798}
799#else
800static inline void pcie_aspm_platform_init(void) { }
801#endif
802
803static int __init pcie_aspm_init(void)
804{
805 if (aspm_disabled)
806 return 0;
807 pcie_aspm_platform_init();
808 return 0;
809}
810
811fs_initcall(pcie_aspm_init);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 07d5c7424b01..284ef392c3ea 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -9,6 +9,7 @@
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/cpumask.h> 11#include <linux/cpumask.h>
12#include <linux/pci-aspm.h>
12#include "pci.h" 13#include "pci.h"
13 14
14#define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ 15#define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */
@@ -1014,6 +1015,10 @@ int pci_scan_slot(struct pci_bus *bus, int devfn)
1014 break; 1015 break;
1015 } 1016 }
1016 } 1017 }
1018
1019 if (bus->self)
1020 pcie_aspm_init_link_state(bus->self);
1021
1017 return nr; 1022 return nr;
1018} 1023}
1019 1024
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index b6824833343f..bdc2a44d68e1 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -1,5 +1,6 @@
1#include <linux/pci.h> 1#include <linux/pci.h>
2#include <linux/module.h> 2#include <linux/module.h>
3#include <linux/pci-aspm.h>
3#include "pci.h" 4#include "pci.h"
4 5
5static void pci_free_resources(struct pci_dev *dev) 6static void pci_free_resources(struct pci_dev *dev)
@@ -24,6 +25,9 @@ static void pci_stop_dev(struct pci_dev *dev)
24 device_unregister(&dev->dev); 25 device_unregister(&dev->dev);
25 dev->is_added = 0; 26 dev->is_added = 0;
26 } 27 }
28
29 if (dev->bus->self)
30 pcie_aspm_exit_link_state(dev);
27} 31}
28 32
29static void pci_destroy_dev(struct pci_dev *dev) 33static void pci_destroy_dev(struct pci_dev *dev)
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
new file mode 100644
index 000000000000..a1a1e618e996
--- /dev/null
+++ b/include/linux/pci-aspm.h
@@ -0,0 +1,56 @@
1/*
2 * aspm.h
3 *
4 * PCI Express ASPM defines and function prototypes
5 *
6 * Copyright (C) 2007 Intel Corp.
7 * Zhang Yanmin (yanmin.zhang@intel.com)
8 * Shaohua Li (shaohua.li@intel.com)
9 *
10 * For more information, please consult the following manuals (look at
11 * http://www.pcisig.com/ for how to get them):
12 *
13 * PCI Express Specification
14 */
15
16#ifndef LINUX_ASPM_H
17#define LINUX_ASPM_H
18
19#include <linux/pci.h>
20
21#define PCIE_LINK_STATE_L0S 1
22#define PCIE_LINK_STATE_L1 2
23#define PCIE_LINK_STATE_CLKPM 4
24
25#ifdef CONFIG_PCIEASPM
26extern void pcie_aspm_init_link_state(struct pci_dev *pdev);
27extern void pcie_aspm_exit_link_state(struct pci_dev *pdev);
28extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
29extern void pci_disable_link_state(struct pci_dev *pdev, int state);
30#else
31static inline void pcie_aspm_init_link_state(struct pci_dev *pdev)
32{
33}
34static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev)
35{
36}
37static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev)
38{
39}
40static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
41{
42}
43#endif
44
45#ifdef CONFIG_PCIEASPM_DEBUG /* this depends on CONFIG_PCIEASPM */
46extern void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev);
47extern void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev);
48#else
49static inline void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev)
50{
51}
52static inline void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
53{
54}
55#endif
56#endif /* LINUX_ASPM_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 3b8a4e17052f..14bf3d236d19 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -128,6 +128,7 @@ struct pci_cap_saved_state {
128 u32 data[0]; 128 u32 data[0];
129}; 129};
130 130
131struct pcie_link_state;
131/* 132/*
132 * The pci_dev structure is used to describe PCI devices. 133 * The pci_dev structure is used to describe PCI devices.
133 */ 134 */
@@ -164,6 +165,10 @@ struct pci_dev {
164 this is D0-D3, D0 being fully functional, 165 this is D0-D3, D0 being fully functional,
165 and D3 being off. */ 166 and D3 being off. */
166 167
168#ifdef CONFIG_PCIEASPM
169 struct pcie_link_state *link_state; /* ASPM link state. */
170#endif
171
167 pci_channel_state_t error_state; /* current connectivity state */ 172 pci_channel_state_t error_state; /* current connectivity state */
168 struct device dev; /* Generic device interface */ 173 struct device dev; /* Generic device interface */
169 174
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index c1914a8b94a9..c0c1223c9194 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -395,9 +395,17 @@
395#define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */ 395#define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */
396#define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ 396#define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */
397#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ 397#define PCI_EXP_LNKCAP 12 /* Link Capabilities */
398#define PCI_EXP_LNKCAP_ASPMS 0xc00 /* ASPM Support */
399#define PCI_EXP_LNKCAP_L0SEL 0x7000 /* L0s Exit Latency */
400#define PCI_EXP_LNKCAP_L1EL 0x38000 /* L1 Exit Latency */
401#define PCI_EXP_LNKCAP_CLKPM 0x40000 /* L1 Clock Power Management */
398#define PCI_EXP_LNKCTL 16 /* Link Control */ 402#define PCI_EXP_LNKCTL 16 /* Link Control */
403#define PCI_EXP_LNKCTL_RL 0x20 /* Retrain Link */
404#define PCI_EXP_LNKCTL_CCC 0x40 /* Common Clock COnfiguration */
399#define PCI_EXP_LNKCTL_CLKREQ_EN 0x100 /* Enable clkreq */ 405#define PCI_EXP_LNKCTL_CLKREQ_EN 0x100 /* Enable clkreq */
400#define PCI_EXP_LNKSTA 18 /* Link Status */ 406#define PCI_EXP_LNKSTA 18 /* Link Status */
407#define PCI_EXP_LNKSTA_LT 0x800 /* Link Training */
408#define PCI_EXP_LNKSTA_SLC 0x1000 /* Slot Clock Configuration */
401#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ 409#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */
402#define PCI_EXP_SLTCTL 24 /* Slot Control */ 410#define PCI_EXP_SLTCTL 24 /* Slot Control */
403#define PCI_EXP_SLTSTA 26 /* Slot Status */ 411#define PCI_EXP_SLTSTA 26 /* Slot Status */