aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci/pci.c
diff options
context:
space:
mode:
authorJon Mason <mason@myri.com>2011-07-20 16:20:54 -0400
committerJesse Barnes <jbarnes@virtuousgeek.org>2011-08-01 14:49:16 -0400
commitb03e7495a862b028294f59fc87286d6d78ee7fa1 (patch)
tree836fbfc2b0e34f034cb273c4d065baba3a65178c /drivers/pci/pci.c
parent5f66d2b58ca879e70740c82422354144845d6dd3 (diff)
PCI: Set PCI-E Max Payload Size on fabric
On a given PCI-E fabric, each device, bridge, and root port can have a different PCI-E maximum payload size. There is a sizable performance boost for having the largest possible maximum payload size on each PCI-E device. However, if improperly configured, fatal bus errors can occur. Thus, it is important to ensure that PCI-E payloads sends by a device are never larger than the MPS setting of all devices on the way to the destination. This can be achieved two ways: - A conservative approach is to use the smallest common denominator of the entire tree below a root complex for every device on that fabric. This means for example that having a 128 bytes MPS USB controller on one leg of a switch will dramatically reduce performances of a video card or 10GE adapter on another leg of that same switch. It also means that any hierarchy supporting hotplug slots (including expresscard or thunderbolt I suppose, dbl check that) will have to be entirely clamped to 128 bytes since we cannot predict what will be plugged into those slots, and we cannot change the MPS on a "live" system. - A more optimal way is possible, if it falls within a couple of constraints: * The top-level host bridge will never generate packets larger than the smallest TLP (or if it can be controlled independently from its MPS at least) * The device will never generate packets larger than MPS (which can be configured via MRRS) * No support of direct PCI-E <-> PCI-E transfers between devices without some additional code to specifically deal with that case Then we can use an approach that basically ignores downstream requests and focuses exclusively on upstream requests. In that case, all we need to care about is that a device MPS is no larger than its parent MPS, which allows us to keep all switches/bridges to the max MPS supported by their parent and eventually the PHB. In this case, your USB controller would no longer "starve" your 10GE Ethernet and your hotplug slots won't affect your global MPS. Additionally, the hotplugged devices themselves can be configured to a larger MPS up to the value configured in the hotplug bridge. To choose between the two available options, two PCI kernel boot args have been added to the PCI calls. "pcie_bus_safe" will provide the former behavior, while "pcie_bus_perf" will perform the latter behavior. By default, the latter behavior is used. NOTE: due to the location of the enablement, each arch will need to add calls to this function. This patch only enables x86. This patch includes a number of changes recommended by Benjamin Herrenschmidt. Tested-by: Jordan_Hargrave@dell.com Signed-off-by: Jon Mason <mason@myri.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Diffstat (limited to 'drivers/pci/pci.c')
-rw-r--r--drivers/pci/pci.c67
1 files changed, 67 insertions, 0 deletions
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 08a95b369d85..466fad6e6ee2 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -77,6 +77,8 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE;
77unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; 77unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE;
78unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; 78unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE;
79 79
80enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PERFORMANCE;
81
80/* 82/*
81 * The default CLS is used if arch didn't set CLS explicitly and not 83 * The default CLS is used if arch didn't set CLS explicitly and not
82 * all pci devices agree on the same value. Arch can override either 84 * all pci devices agree on the same value. Arch can override either
@@ -3223,6 +3225,67 @@ out:
3223EXPORT_SYMBOL(pcie_set_readrq); 3225EXPORT_SYMBOL(pcie_set_readrq);
3224 3226
3225/** 3227/**
3228 * pcie_get_mps - get PCI Express maximum payload size
3229 * @dev: PCI device to query
3230 *
3231 * Returns maximum payload size in bytes
3232 * or appropriate error value.
3233 */
3234int pcie_get_mps(struct pci_dev *dev)
3235{
3236 int ret, cap;
3237 u16 ctl;
3238
3239 cap = pci_pcie_cap(dev);
3240 if (!cap)
3241 return -EINVAL;
3242
3243 ret = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl);
3244 if (!ret)
3245 ret = 128 << ((ctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5);
3246
3247 return ret;
3248}
3249
3250/**
3251 * pcie_set_mps - set PCI Express maximum payload size
3252 * @dev: PCI device to query
3253 * @rq: maximum payload size in bytes
3254 * valid values are 128, 256, 512, 1024, 2048, 4096
3255 *
3256 * If possible sets maximum payload size
3257 */
3258int pcie_set_mps(struct pci_dev *dev, int mps)
3259{
3260 int cap, err = -EINVAL;
3261 u16 ctl, v;
3262
3263 if (mps < 128 || mps > 4096 || !is_power_of_2(mps))
3264 goto out;
3265
3266 v = ffs(mps) - 8;
3267 if (v > dev->pcie_mpss)
3268 goto out;
3269 v <<= 5;
3270
3271 cap = pci_pcie_cap(dev);
3272 if (!cap)
3273 goto out;
3274
3275 err = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl);
3276 if (err)
3277 goto out;
3278
3279 if ((ctl & PCI_EXP_DEVCTL_PAYLOAD) != v) {
3280 ctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
3281 ctl |= v;
3282 err = pci_write_config_word(dev, cap + PCI_EXP_DEVCTL, ctl);
3283 }
3284out:
3285 return err;
3286}
3287
3288/**
3226 * pci_select_bars - Make BAR mask from the type of resource 3289 * pci_select_bars - Make BAR mask from the type of resource
3227 * @dev: the PCI device for which BAR mask is made 3290 * @dev: the PCI device for which BAR mask is made
3228 * @flags: resource type mask to be selected 3291 * @flags: resource type mask to be selected
@@ -3505,6 +3568,10 @@ static int __init pci_setup(char *str)
3505 pci_hotplug_io_size = memparse(str + 9, &str); 3568 pci_hotplug_io_size = memparse(str + 9, &str);
3506 } else if (!strncmp(str, "hpmemsize=", 10)) { 3569 } else if (!strncmp(str, "hpmemsize=", 10)) {
3507 pci_hotplug_mem_size = memparse(str + 10, &str); 3570 pci_hotplug_mem_size = memparse(str + 10, &str);
3571 } else if (!strncmp(str, "pcie_bus_safe", 13)) {
3572 pcie_bus_config = PCIE_BUS_SAFE;
3573 } else if (!strncmp(str, "pcie_bus_perf", 13)) {
3574 pcie_bus_config = PCIE_BUS_PERFORMANCE;
3508 } else { 3575 } else {
3509 printk(KERN_ERR "PCI: Unknown option `%s'\n", 3576 printk(KERN_ERR "PCI: Unknown option `%s'\n",
3510 str); 3577 str);