diff options
author | Jon Mason <mason@myri.com> | 2011-07-20 16:20:54 -0400 |
---|---|---|
committer | Jesse Barnes <jbarnes@virtuousgeek.org> | 2011-08-01 14:49:16 -0400 |
commit | b03e7495a862b028294f59fc87286d6d78ee7fa1 (patch) | |
tree | 836fbfc2b0e34f034cb273c4d065baba3a65178c /include | |
parent | 5f66d2b58ca879e70740c82422354144845d6dd3 (diff) |
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a
different PCI-E maximum payload size. There is a sizable performance
boost for having the largest possible maximum payload size on each PCI-E
device. However, if improperly configured, fatal bus errors can occur.
Thus, it is important to ensure that PCI-E payloads sends by a device
are never larger than the MPS setting of all devices on the way to the
destination.
This can be achieved two ways:
- A conservative approach is to use the smallest common denominator of
the entire tree below a root complex for every device on that fabric.
This means for example that having a 128 bytes MPS USB controller on one
leg of a switch will dramatically reduce performances of a video card or
10GE adapter on another leg of that same switch.
It also means that any hierarchy supporting hotplug slots (including
expresscard or thunderbolt I suppose, dbl check that) will have to be
entirely clamped to 128 bytes since we cannot predict what will be
plugged into those slots, and we cannot change the MPS on a "live"
system.
- A more optimal way is possible, if it falls within a couple of
constraints:
* The top-level host bridge will never generate packets larger than the
smallest TLP (or if it can be controlled independently from its MPS at
least)
* The device will never generate packets larger than MPS (which can be
configured via MRRS)
* No support of direct PCI-E <-> PCI-E transfers between devices without
some additional code to specifically deal with that case
Then we can use an approach that basically ignores downstream requests
and focuses exclusively on upstream requests. In that case, all we need
to care about is that a device MPS is no larger than its parent MPS,
which allows us to keep all switches/bridges to the max MPS supported by
their parent and eventually the PHB.
In this case, your USB controller would no longer "starve" your 10GE
Ethernet and your hotplug slots won't affect your global MPS.
Additionally, the hotplugged devices themselves can be configured to a
larger MPS up to the value configured in the hotplug bridge.
To choose between the two available options, two PCI kernel boot args
have been added to the PCI calls. "pcie_bus_safe" will provide the
former behavior, while "pcie_bus_perf" will perform the latter behavior.
By default, the latter behavior is used.
NOTE: due to the location of the enablement, each arch will need to add
calls to this function. This patch only enables x86.
This patch includes a number of changes recommended by Benjamin
Herrenschmidt.
Tested-by: Jordan_Hargrave@dell.com
Signed-off-by: Jon Mason <mason@myri.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/pci.h | 15 |
1 files changed, 14 insertions, 1 deletions
diff --git a/include/linux/pci.h b/include/linux/pci.h index f27893b3b724..1ff9bbafd932 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h | |||
@@ -251,7 +251,8 @@ struct pci_dev { | |||
251 | u8 revision; /* PCI revision, low byte of class word */ | 251 | u8 revision; /* PCI revision, low byte of class word */ |
252 | u8 hdr_type; /* PCI header type (`multi' flag masked out) */ | 252 | u8 hdr_type; /* PCI header type (`multi' flag masked out) */ |
253 | u8 pcie_cap; /* PCI-E capability offset */ | 253 | u8 pcie_cap; /* PCI-E capability offset */ |
254 | u8 pcie_type; /* PCI-E device/port type */ | 254 | u8 pcie_type:4; /* PCI-E device/port type */ |
255 | u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */ | ||
255 | u8 rom_base_reg; /* which config register controls the ROM */ | 256 | u8 rom_base_reg; /* which config register controls the ROM */ |
256 | u8 pin; /* which interrupt pin this device uses */ | 257 | u8 pin; /* which interrupt pin this device uses */ |
257 | 258 | ||
@@ -617,6 +618,16 @@ struct pci_driver { | |||
617 | /* these external functions are only available when PCI support is enabled */ | 618 | /* these external functions are only available when PCI support is enabled */ |
618 | #ifdef CONFIG_PCI | 619 | #ifdef CONFIG_PCI |
619 | 620 | ||
621 | extern void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss); | ||
622 | |||
623 | enum pcie_bus_config_types { | ||
624 | PCIE_BUS_PERFORMANCE, | ||
625 | PCIE_BUS_SAFE, | ||
626 | PCIE_BUS_PEER2PEER, | ||
627 | }; | ||
628 | |||
629 | extern enum pcie_bus_config_types pcie_bus_config; | ||
630 | |||
620 | extern struct bus_type pci_bus_type; | 631 | extern struct bus_type pci_bus_type; |
621 | 632 | ||
622 | /* Do NOT directly access these two variables, unless you are arch specific pci | 633 | /* Do NOT directly access these two variables, unless you are arch specific pci |
@@ -796,6 +807,8 @@ int pcix_get_mmrbc(struct pci_dev *dev); | |||
796 | int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); | 807 | int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); |
797 | int pcie_get_readrq(struct pci_dev *dev); | 808 | int pcie_get_readrq(struct pci_dev *dev); |
798 | int pcie_set_readrq(struct pci_dev *dev, int rq); | 809 | int pcie_set_readrq(struct pci_dev *dev, int rq); |
810 | int pcie_get_mps(struct pci_dev *dev); | ||
811 | int pcie_set_mps(struct pci_dev *dev, int mps); | ||
799 | int __pci_reset_function(struct pci_dev *dev); | 812 | int __pci_reset_function(struct pci_dev *dev); |
800 | int pci_reset_function(struct pci_dev *dev); | 813 | int pci_reset_function(struct pci_dev *dev); |
801 | void pci_update_resource(struct pci_dev *dev, int resno); | 814 | void pci_update_resource(struct pci_dev *dev, int resno); |