aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJon Mason <mason@myri.com>2011-07-20 16:20:54 -0400
committerJesse Barnes <jbarnes@virtuousgeek.org>2011-08-01 14:49:16 -0400
commitb03e7495a862b028294f59fc87286d6d78ee7fa1 (patch)
tree836fbfc2b0e34f034cb273c4d065baba3a65178c /include
parent5f66d2b58ca879e70740c82422354144845d6dd3 (diff)
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a different PCI-E maximum payload size. There is a sizable performance boost for having the largest possible maximum payload size on each PCI-E device. However, if improperly configured, fatal bus errors can occur. Thus, it is important to ensure that PCI-E payloads sends by a device are never larger than the MPS setting of all devices on the way to the destination. This can be achieved two ways: - A conservative approach is to use the smallest common denominator of the entire tree below a root complex for every device on that fabric. This means for example that having a 128 bytes MPS USB controller on one leg of a switch will dramatically reduce performances of a video card or 10GE adapter on another leg of that same switch. It also means that any hierarchy supporting hotplug slots (including expresscard or thunderbolt I suppose, dbl check that) will have to be entirely clamped to 128 bytes since we cannot predict what will be plugged into those slots, and we cannot change the MPS on a "live" system. - A more optimal way is possible, if it falls within a couple of constraints: * The top-level host bridge will never generate packets larger than the smallest TLP (or if it can be controlled independently from its MPS at least) * The device will never generate packets larger than MPS (which can be configured via MRRS) * No support of direct PCI-E <-> PCI-E transfers between devices without some additional code to specifically deal with that case Then we can use an approach that basically ignores downstream requests and focuses exclusively on upstream requests. In that case, all we need to care about is that a device MPS is no larger than its parent MPS, which allows us to keep all switches/bridges to the max MPS supported by their parent and eventually the PHB. In this case, your USB controller would no longer "starve" your 10GE Ethernet and your hotplug slots won't affect your global MPS. Additionally, the hotplugged devices themselves can be configured to a larger MPS up to the value configured in the hotplug bridge. To choose between the two available options, two PCI kernel boot args have been added to the PCI calls. "pcie_bus_safe" will provide the former behavior, while "pcie_bus_perf" will perform the latter behavior. By default, the latter behavior is used. NOTE: due to the location of the enablement, each arch will need to add calls to this function. This patch only enables x86. This patch includes a number of changes recommended by Benjamin Herrenschmidt. Tested-by: Jordan_Hargrave@dell.com Signed-off-by: Jon Mason <mason@myri.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/pci.h15
1 files changed, 14 insertions, 1 deletions
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f27893b3b724..1ff9bbafd932 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -251,7 +251,8 @@ struct pci_dev {
251 u8 revision; /* PCI revision, low byte of class word */ 251 u8 revision; /* PCI revision, low byte of class word */
252 u8 hdr_type; /* PCI header type (`multi' flag masked out) */ 252 u8 hdr_type; /* PCI header type (`multi' flag masked out) */
253 u8 pcie_cap; /* PCI-E capability offset */ 253 u8 pcie_cap; /* PCI-E capability offset */
254 u8 pcie_type; /* PCI-E device/port type */ 254 u8 pcie_type:4; /* PCI-E device/port type */
255 u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */
255 u8 rom_base_reg; /* which config register controls the ROM */ 256 u8 rom_base_reg; /* which config register controls the ROM */
256 u8 pin; /* which interrupt pin this device uses */ 257 u8 pin; /* which interrupt pin this device uses */
257 258
@@ -617,6 +618,16 @@ struct pci_driver {
617/* these external functions are only available when PCI support is enabled */ 618/* these external functions are only available when PCI support is enabled */
618#ifdef CONFIG_PCI 619#ifdef CONFIG_PCI
619 620
621extern void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss);
622
623enum pcie_bus_config_types {
624 PCIE_BUS_PERFORMANCE,
625 PCIE_BUS_SAFE,
626 PCIE_BUS_PEER2PEER,
627};
628
629extern enum pcie_bus_config_types pcie_bus_config;
630
620extern struct bus_type pci_bus_type; 631extern struct bus_type pci_bus_type;
621 632
622/* Do NOT directly access these two variables, unless you are arch specific pci 633/* Do NOT directly access these two variables, unless you are arch specific pci
@@ -796,6 +807,8 @@ int pcix_get_mmrbc(struct pci_dev *dev);
796int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); 807int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc);
797int pcie_get_readrq(struct pci_dev *dev); 808int pcie_get_readrq(struct pci_dev *dev);
798int pcie_set_readrq(struct pci_dev *dev, int rq); 809int pcie_set_readrq(struct pci_dev *dev, int rq);
810int pcie_get_mps(struct pci_dev *dev);
811int pcie_set_mps(struct pci_dev *dev, int mps);
799int __pci_reset_function(struct pci_dev *dev); 812int __pci_reset_function(struct pci_dev *dev);
800int pci_reset_function(struct pci_dev *dev); 813int pci_reset_function(struct pci_dev *dev);
801void pci_update_resource(struct pci_dev *dev, int resno); 814void pci_update_resource(struct pci_dev *dev, int resno);