aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /drivers/pci
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'drivers/pci')
-rw-r--r--drivers/pci/Kconfig26
-rw-r--r--drivers/pci/Makefile12
-rw-r--r--drivers/pci/access.c18
-rw-r--r--drivers/pci/bus.c7
-rw-r--r--drivers/pci/dmar.c34
-rw-r--r--drivers/pci/hotplug/acpi_pcihp.c2
-rw-r--r--drivers/pci/hotplug/acpiphp.h1
-rw-r--r--drivers/pci/hotplug/acpiphp_glue.c10
-rw-r--r--drivers/pci/hotplug/cpqphp_sysfs.c13
-rw-r--r--drivers/pci/hotplug/ibmphp_ebda.c6
-rw-r--r--drivers/pci/hotplug/ibmphp_hpc.c4
-rw-r--r--drivers/pci/hotplug/pciehp.h2
-rw-r--r--drivers/pci/hotplug/pciehp_acpi.c3
-rw-r--r--drivers/pci/hotplug/pciehp_core.c18
-rw-r--r--drivers/pci/hotplug/pciehp_ctrl.c9
-rw-r--r--drivers/pci/hotplug/pciehp_hpc.c20
-rw-r--r--drivers/pci/hotplug/pcihp_slot.c45
-rw-r--r--drivers/pci/hotplug/rpaphp_core.c2
-rw-r--r--drivers/pci/hotplug/rpaphp_slot.c1
-rw-r--r--drivers/pci/hotplug/shpchp.h2
-rw-r--r--drivers/pci/hotplug/shpchp_core.c20
-rw-r--r--drivers/pci/hotplug/shpchp_ctrl.c7
-rw-r--r--drivers/pci/hotplug/shpchp_hpc.c26
-rw-r--r--drivers/pci/htirq.c34
-rw-r--r--drivers/pci/intel-iommu.c324
-rw-r--r--drivers/pci/intr_remapping.c214
-rw-r--r--drivers/pci/iov.c1
-rw-r--r--drivers/pci/iova.c14
-rw-r--r--drivers/pci/msi.c61
-rw-r--r--drivers/pci/msi.h10
-rw-r--r--drivers/pci/pci-acpi.c21
-rw-r--r--drivers/pci/pci-driver.c19
-rw-r--r--drivers/pci/pci-label.c252
-rw-r--r--drivers/pci/pci-stub.c7
-rw-r--r--drivers/pci/pci-sysfs.c94
-rw-r--r--drivers/pci/pci.c568
-rw-r--r--drivers/pci/pci.h66
-rw-r--r--drivers/pci/pcie/Kconfig2
-rw-r--r--drivers/pci/pcie/aer/aer_inject.c32
-rw-r--r--drivers/pci/pcie/aer/aerdrv.c3
-rw-r--r--drivers/pci/pcie/aer/aerdrv.h18
-rw-r--r--drivers/pci/pcie/aer/aerdrv_acpi.c34
-rw-r--r--drivers/pci/pcie/aer/aerdrv_core.c2
-rw-r--r--drivers/pci/pcie/aer/aerdrv_errprint.c182
-rw-r--r--drivers/pci/pcie/aspm.c75
-rw-r--r--drivers/pci/pcie/pme.c31
-rw-r--r--drivers/pci/pcie/portdrv.h5
-rw-r--r--drivers/pci/pcie/portdrv_acpi.c23
-rw-r--r--drivers/pci/pcie/portdrv_core.c24
-rw-r--r--drivers/pci/pcie/portdrv_pci.c37
-rw-r--r--drivers/pci/probe.c53
-rw-r--r--drivers/pci/proc.c7
-rw-r--r--drivers/pci/quirks.c216
-rw-r--r--drivers/pci/remove.c2
-rw-r--r--drivers/pci/setup-bus.c455
-rw-r--r--drivers/pci/setup-res.c2
-rw-r--r--drivers/pci/xen-pcifront.c1159
57 files changed, 3415 insertions, 920 deletions
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 34ef70d562b2..0fa466a91bf4 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -40,6 +40,28 @@ config PCI_STUB
40 40
41 When in doubt, say N. 41 When in doubt, say N.
42 42
43config XEN_PCIDEV_FRONTEND
44 tristate "Xen PCI Frontend"
45 depends on PCI && X86 && XEN
46 select HOTPLUG
47 select PCI_XEN
48 select XEN_XENBUS_FRONTEND
49 default y
50 help
51 The PCI device frontend driver allows the kernel to import arbitrary
52 PCI devices from a PCI backend to support PCI driver domains.
53
54config XEN_PCIDEV_FE_DEBUG
55 bool "Xen PCI Frontend debugging"
56 depends on XEN_PCIDEV_FRONTEND && PCI_DEBUG
57 help
58 Say Y here if you want the Xen PCI frontend to produce a bunch of debug
59 messages to the system log. Select this if you are having a
60 problem with Xen PCI frontend support and want to see more of what is
61 going on.
62
63 When in doubt, say N.
64
43config HT_IRQ 65config HT_IRQ
44 bool "Interrupts on hypertransport devices" 66 bool "Interrupts on hypertransport devices"
45 default y 67 default y
@@ -65,3 +87,7 @@ config PCI_IOAPIC
65 depends on ACPI 87 depends on ACPI
66 depends on HOTPLUG 88 depends on HOTPLUG
67 default y 89 default y
90
91config PCI_LABEL
92 def_bool y if (DMI || ACPI)
93 select NLS
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index dc1aa0922868..094308e41be5 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_PCI_IOV) += iov.o
42obj-$(CONFIG_X86) += setup-bus.o 42obj-$(CONFIG_X86) += setup-bus.o
43obj-$(CONFIG_ALPHA) += setup-bus.o setup-irq.o 43obj-$(CONFIG_ALPHA) += setup-bus.o setup-irq.o
44obj-$(CONFIG_ARM) += setup-bus.o setup-irq.o 44obj-$(CONFIG_ARM) += setup-bus.o setup-irq.o
45obj-$(CONFIG_UNICORE32) += setup-bus.o setup-irq.o
45obj-$(CONFIG_PARISC) += setup-bus.o 46obj-$(CONFIG_PARISC) += setup-bus.o
46obj-$(CONFIG_SUPERH) += setup-bus.o setup-irq.o 47obj-$(CONFIG_SUPERH) += setup-bus.o setup-irq.o
47obj-$(CONFIG_PPC) += setup-bus.o 48obj-$(CONFIG_PPC) += setup-bus.o
@@ -49,14 +50,17 @@ obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o
49obj-$(CONFIG_X86_VISWS) += setup-irq.o 50obj-$(CONFIG_X86_VISWS) += setup-irq.o
50obj-$(CONFIG_MN10300) += setup-bus.o 51obj-$(CONFIG_MN10300) += setup-bus.o
51obj-$(CONFIG_MICROBLAZE) += setup-bus.o 52obj-$(CONFIG_MICROBLAZE) += setup-bus.o
53obj-$(CONFIG_TILE) += setup-bus.o setup-irq.o
54obj-$(CONFIG_SPARC_LEON) += setup-bus.o setup-irq.o
52 55
53# 56#
54# ACPI Related PCI FW Functions 57# ACPI Related PCI FW Functions
58# ACPI _DSM provided firmware instance and string name
55# 59#
56obj-$(CONFIG_ACPI) += pci-acpi.o 60obj-$(CONFIG_ACPI) += pci-acpi.o
57 61
58# SMBIOS provided firmware instance and labels 62# SMBIOS provided firmware instance and labels
59obj-$(CONFIG_DMI) += pci-label.o 63obj-$(CONFIG_PCI_LABEL) += pci-label.o
60 64
61# Cardbus & CompactPCI use setup-bus 65# Cardbus & CompactPCI use setup-bus
62obj-$(CONFIG_HOTPLUG) += setup-bus.o 66obj-$(CONFIG_HOTPLUG) += setup-bus.o
@@ -65,6 +69,6 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o
65 69
66obj-$(CONFIG_PCI_STUB) += pci-stub.o 70obj-$(CONFIG_PCI_STUB) += pci-stub.o
67 71
68ifeq ($(CONFIG_PCI_DEBUG),y) 72obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
69EXTRA_CFLAGS += -DDEBUG 73
70endif 74ccflags-$(CONFIG_PCI_DEBUG) := -DDEBUG
diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 531bc697d800..fdaa42aac7c6 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -143,33 +143,41 @@ static noinline void pci_wait_ucfg(struct pci_dev *dev)
143 __remove_wait_queue(&pci_ucfg_wait, &wait); 143 __remove_wait_queue(&pci_ucfg_wait, &wait);
144} 144}
145 145
146/* Returns 0 on success, negative values indicate error. */
146#define PCI_USER_READ_CONFIG(size,type) \ 147#define PCI_USER_READ_CONFIG(size,type) \
147int pci_user_read_config_##size \ 148int pci_user_read_config_##size \
148 (struct pci_dev *dev, int pos, type *val) \ 149 (struct pci_dev *dev, int pos, type *val) \
149{ \ 150{ \
150 int ret = 0; \ 151 int ret = 0; \
151 u32 data = -1; \ 152 u32 data = -1; \
152 if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ 153 if (PCI_##size##_BAD) \
154 return -EINVAL; \
153 raw_spin_lock_irq(&pci_lock); \ 155 raw_spin_lock_irq(&pci_lock); \
154 if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \ 156 if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \
155 ret = dev->bus->ops->read(dev->bus, dev->devfn, \ 157 ret = dev->bus->ops->read(dev->bus, dev->devfn, \
156 pos, sizeof(type), &data); \ 158 pos, sizeof(type), &data); \
157 raw_spin_unlock_irq(&pci_lock); \ 159 raw_spin_unlock_irq(&pci_lock); \
158 *val = (type)data; \ 160 *val = (type)data; \
161 if (ret > 0) \
162 ret = -EINVAL; \
159 return ret; \ 163 return ret; \
160} 164}
161 165
166/* Returns 0 on success, negative values indicate error. */
162#define PCI_USER_WRITE_CONFIG(size,type) \ 167#define PCI_USER_WRITE_CONFIG(size,type) \
163int pci_user_write_config_##size \ 168int pci_user_write_config_##size \
164 (struct pci_dev *dev, int pos, type val) \ 169 (struct pci_dev *dev, int pos, type val) \
165{ \ 170{ \
166 int ret = -EIO; \ 171 int ret = -EIO; \
167 if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ 172 if (PCI_##size##_BAD) \
173 return -EINVAL; \
168 raw_spin_lock_irq(&pci_lock); \ 174 raw_spin_lock_irq(&pci_lock); \
169 if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \ 175 if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \
170 ret = dev->bus->ops->write(dev->bus, dev->devfn, \ 176 ret = dev->bus->ops->write(dev->bus, dev->devfn, \
171 pos, sizeof(type), val); \ 177 pos, sizeof(type), val); \
172 raw_spin_unlock_irq(&pci_lock); \ 178 raw_spin_unlock_irq(&pci_lock); \
179 if (ret > 0) \
180 ret = -EINVAL; \
173 return ret; \ 181 return ret; \
174} 182}
175 183
@@ -197,6 +205,8 @@ struct pci_vpd_pci22 {
197 * This code has to spin since there is no other notification from the PCI 205 * This code has to spin since there is no other notification from the PCI
198 * hardware. Since the VPD is often implemented by serial attachment to an 206 * hardware. Since the VPD is often implemented by serial attachment to an
199 * EEPROM, it may take many milliseconds to complete. 207 * EEPROM, it may take many milliseconds to complete.
208 *
209 * Returns 0 on success, negative values indicate error.
200 */ 210 */
201static int pci_vpd_pci22_wait(struct pci_dev *dev) 211static int pci_vpd_pci22_wait(struct pci_dev *dev)
202{ 212{
@@ -212,7 +222,7 @@ static int pci_vpd_pci22_wait(struct pci_dev *dev)
212 for (;;) { 222 for (;;) {
213 ret = pci_user_read_config_word(dev, vpd->cap + PCI_VPD_ADDR, 223 ret = pci_user_read_config_word(dev, vpd->cap + PCI_VPD_ADDR,
214 &status); 224 &status);
215 if (ret) 225 if (ret < 0)
216 return ret; 226 return ret;
217 227
218 if ((status & PCI_VPD_ADDR_F) == vpd->flag) { 228 if ((status & PCI_VPD_ADDR_F) == vpd->flag) {
@@ -324,6 +334,8 @@ static ssize_t pci_vpd_pci22_write(struct pci_dev *dev, loff_t pos, size_t count
324 vpd->busy = true; 334 vpd->busy = true;
325 vpd->flag = 0; 335 vpd->flag = 0;
326 ret = pci_vpd_pci22_wait(dev); 336 ret = pci_vpd_pci22_wait(dev);
337 if (ret < 0)
338 break;
327 339
328 pos += sizeof(u32); 340 pos += sizeof(u32);
329 } 341 }
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 7f0af0e9b826..1e2ad92a4752 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -163,12 +163,6 @@ int pci_bus_add_child(struct pci_bus *bus)
163 163
164 bus->is_added = 1; 164 bus->is_added = 1;
165 165
166 retval = device_create_file(&bus->dev, &dev_attr_cpuaffinity);
167 if (retval)
168 return retval;
169
170 retval = device_create_file(&bus->dev, &dev_attr_cpulistaffinity);
171
172 /* Create legacy_io and legacy_mem files for this bus */ 166 /* Create legacy_io and legacy_mem files for this bus */
173 pci_create_legacy_files(bus); 167 pci_create_legacy_files(bus);
174 168
@@ -299,6 +293,7 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
299 } 293 }
300 up_read(&pci_bus_sem); 294 up_read(&pci_bus_sem);
301} 295}
296EXPORT_SYMBOL_GPL(pci_walk_bus);
302 297
303EXPORT_SYMBOL(pci_bus_alloc_resource); 298EXPORT_SYMBOL(pci_bus_alloc_resource);
304EXPORT_SYMBOL_GPL(pci_bus_add_device); 299EXPORT_SYMBOL_GPL(pci_bus_add_device);
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 0a19708074c2..3dc9befa5aec 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -36,6 +36,7 @@
36#include <linux/tboot.h> 36#include <linux/tboot.h>
37#include <linux/dmi.h> 37#include <linux/dmi.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <asm/iommu_table.h>
39 40
40#define PREFIX "DMAR: " 41#define PREFIX "DMAR: "
41 42
@@ -687,7 +688,7 @@ failed:
687 return 0; 688 return 0;
688} 689}
689 690
690void __init detect_intel_iommu(void) 691int __init detect_intel_iommu(void)
691{ 692{
692 int ret; 693 int ret;
693 694
@@ -697,12 +698,7 @@ void __init detect_intel_iommu(void)
697 { 698 {
698#ifdef CONFIG_INTR_REMAP 699#ifdef CONFIG_INTR_REMAP
699 struct acpi_table_dmar *dmar; 700 struct acpi_table_dmar *dmar;
700 /* 701
701 * for now we will disable dma-remapping when interrupt
702 * remapping is enabled.
703 * When support for queued invalidation for IOTLB invalidation
704 * is added, we will not need this any more.
705 */
706 dmar = (struct acpi_table_dmar *) dmar_tbl; 702 dmar = (struct acpi_table_dmar *) dmar_tbl;
707 if (ret && cpu_has_x2apic && dmar->flags & 0x1) 703 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
708 printk(KERN_INFO 704 printk(KERN_INFO
@@ -723,6 +719,8 @@ void __init detect_intel_iommu(void)
723 } 719 }
724 early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); 720 early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
725 dmar_tbl = NULL; 721 dmar_tbl = NULL;
722
723 return ret ? 1 : -ENODEV;
726} 724}
727 725
728 726
@@ -1221,9 +1219,9 @@ const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1221 } 1219 }
1222} 1220}
1223 1221
1224void dmar_msi_unmask(unsigned int irq) 1222void dmar_msi_unmask(struct irq_data *data)
1225{ 1223{
1226 struct intel_iommu *iommu = get_irq_data(irq); 1224 struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1227 unsigned long flag; 1225 unsigned long flag;
1228 1226
1229 /* unmask it */ 1227 /* unmask it */
@@ -1234,10 +1232,10 @@ void dmar_msi_unmask(unsigned int irq)
1234 spin_unlock_irqrestore(&iommu->register_lock, flag); 1232 spin_unlock_irqrestore(&iommu->register_lock, flag);
1235} 1233}
1236 1234
1237void dmar_msi_mask(unsigned int irq) 1235void dmar_msi_mask(struct irq_data *data)
1238{ 1236{
1239 unsigned long flag; 1237 unsigned long flag;
1240 struct intel_iommu *iommu = get_irq_data(irq); 1238 struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1241 1239
1242 /* mask it */ 1240 /* mask it */
1243 spin_lock_irqsave(&iommu->register_lock, flag); 1241 spin_lock_irqsave(&iommu->register_lock, flag);
@@ -1249,7 +1247,7 @@ void dmar_msi_mask(unsigned int irq)
1249 1247
1250void dmar_msi_write(int irq, struct msi_msg *msg) 1248void dmar_msi_write(int irq, struct msi_msg *msg)
1251{ 1249{
1252 struct intel_iommu *iommu = get_irq_data(irq); 1250 struct intel_iommu *iommu = irq_get_handler_data(irq);
1253 unsigned long flag; 1251 unsigned long flag;
1254 1252
1255 spin_lock_irqsave(&iommu->register_lock, flag); 1253 spin_lock_irqsave(&iommu->register_lock, flag);
@@ -1261,7 +1259,7 @@ void dmar_msi_write(int irq, struct msi_msg *msg)
1261 1259
1262void dmar_msi_read(int irq, struct msi_msg *msg) 1260void dmar_msi_read(int irq, struct msi_msg *msg)
1263{ 1261{
1264 struct intel_iommu *iommu = get_irq_data(irq); 1262 struct intel_iommu *iommu = irq_get_handler_data(irq);
1265 unsigned long flag; 1263 unsigned long flag;
1266 1264
1267 spin_lock_irqsave(&iommu->register_lock, flag); 1265 spin_lock_irqsave(&iommu->register_lock, flag);
@@ -1379,12 +1377,12 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
1379 return -EINVAL; 1377 return -EINVAL;
1380 } 1378 }
1381 1379
1382 set_irq_data(irq, iommu); 1380 irq_set_handler_data(irq, iommu);
1383 iommu->irq = irq; 1381 iommu->irq = irq;
1384 1382
1385 ret = arch_setup_dmar_msi(irq); 1383 ret = arch_setup_dmar_msi(irq);
1386 if (ret) { 1384 if (ret) {
1387 set_irq_data(irq, NULL); 1385 irq_set_handler_data(irq, NULL);
1388 iommu->irq = 0; 1386 iommu->irq = 0;
1389 destroy_irq(irq); 1387 destroy_irq(irq);
1390 return ret; 1388 return ret;
@@ -1414,6 +1412,11 @@ int __init enable_drhd_fault_handling(void)
1414 (unsigned long long)drhd->reg_base_addr, ret); 1412 (unsigned long long)drhd->reg_base_addr, ret);
1415 return -1; 1413 return -1;
1416 } 1414 }
1415
1416 /*
1417 * Clear any previous faults.
1418 */
1419 dmar_fault(iommu->irq, iommu);
1417 } 1420 }
1418 1421
1419 return 0; 1422 return 0;
@@ -1455,3 +1458,4 @@ int __init dmar_ir_support(void)
1455 return 0; 1458 return 0;
1456 return dmar->flags & 0x1; 1459 return dmar->flags & 0x1;
1457} 1460}
1461IOMMU_INIT_POST(detect_intel_iommu);
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 3bc72d18b121..8f3faf343f75 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -351,7 +351,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
351 * To handle different BIOS behavior, we look for _OSC on a root 351 * To handle different BIOS behavior, we look for _OSC on a root
352 * bridge preferentially (according to PCI fw spec). Later for 352 * bridge preferentially (according to PCI fw spec). Later for
353 * OSHP within the scope of the hotplug controller and its parents, 353 * OSHP within the scope of the hotplug controller and its parents,
354 * upto the host bridge under which this controller exists. 354 * up to the host bridge under which this controller exists.
355 */ 355 */
356 handle = acpi_find_root_bridge_handle(pdev); 356 handle = acpi_find_root_bridge_handle(pdev);
357 if (handle) { 357 if (handle) {
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index bab52047baa8..7722108e78df 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -36,7 +36,6 @@
36#define _ACPIPHP_H 36#define _ACPIPHP_H
37 37
38#include <linux/acpi.h> 38#include <linux/acpi.h>
39#include <linux/kobject.h>
40#include <linux/mutex.h> 39#include <linux/mutex.h>
41#include <linux/pci_hotplug.h> 40#include <linux/pci_hotplug.h>
42 41
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index cb23aa2ebf96..a70fa89f76fd 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -212,6 +212,7 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
212 212
213 pdev = pci_get_slot(pbus, PCI_DEVFN(device, function)); 213 pdev = pci_get_slot(pbus, PCI_DEVFN(device, function));
214 if (pdev) { 214 if (pdev) {
215 pdev->current_state = PCI_D0;
215 slot->flags |= (SLOT_ENABLED | SLOT_POWEREDON); 216 slot->flags |= (SLOT_ENABLED | SLOT_POWEREDON);
216 pci_dev_put(pdev); 217 pci_dev_put(pdev);
217 } 218 }
@@ -584,7 +585,7 @@ static void remove_bridge(acpi_handle handle)
584 585
585 /* 586 /*
586 * On root bridges with hotplug slots directly underneath (ie, 587 * On root bridges with hotplug slots directly underneath (ie,
587 * no p2p bridge inbetween), we call cleanup_bridge(). 588 * no p2p bridge between), we call cleanup_bridge().
588 * 589 *
589 * The else clause cleans up root bridges that either had no 590 * The else clause cleans up root bridges that either had no
590 * hotplug slots at all, or had a p2p bridge underneath. 591 * hotplug slots at all, or had a p2p bridge underneath.
@@ -826,6 +827,13 @@ static int __ref enable_device(struct acpiphp_slot *slot)
826 acpiphp_set_hpp_values(bus); 827 acpiphp_set_hpp_values(bus);
827 acpiphp_set_acpi_region(slot); 828 acpiphp_set_acpi_region(slot);
828 pci_enable_bridges(bus); 829 pci_enable_bridges(bus);
830
831 list_for_each_entry(dev, &bus->devices, bus_list) {
832 /* Assume that newly added devices are powered on already. */
833 if (!dev->is_added)
834 dev->current_state = PCI_D0;
835 }
836
829 pci_bus_add_devices(bus); 837 pci_bus_add_devices(bus);
830 838
831 list_for_each_entry(func, &slot->funcs, sibling) { 839 list_for_each_entry(func, &slot->funcs, sibling) {
diff --git a/drivers/pci/hotplug/cpqphp_sysfs.c b/drivers/pci/hotplug/cpqphp_sysfs.c
index 56215322930a..4cb30447a486 100644
--- a/drivers/pci/hotplug/cpqphp_sysfs.c
+++ b/drivers/pci/hotplug/cpqphp_sysfs.c
@@ -34,10 +34,11 @@
34#include <linux/workqueue.h> 34#include <linux/workqueue.h>
35#include <linux/pci.h> 35#include <linux/pci.h>
36#include <linux/pci_hotplug.h> 36#include <linux/pci_hotplug.h>
37#include <linux/smp_lock.h> 37#include <linux/mutex.h>
38#include <linux/debugfs.h> 38#include <linux/debugfs.h>
39#include "cpqphp.h" 39#include "cpqphp.h"
40 40
41static DEFINE_MUTEX(cpqphp_mutex);
41static int show_ctrl (struct controller *ctrl, char *buf) 42static int show_ctrl (struct controller *ctrl, char *buf)
42{ 43{
43 char *out = buf; 44 char *out = buf;
@@ -147,7 +148,7 @@ static int open(struct inode *inode, struct file *file)
147 struct ctrl_dbg *dbg; 148 struct ctrl_dbg *dbg;
148 int retval = -ENOMEM; 149 int retval = -ENOMEM;
149 150
150 lock_kernel(); 151 mutex_lock(&cpqphp_mutex);
151 dbg = kmalloc(sizeof(*dbg), GFP_KERNEL); 152 dbg = kmalloc(sizeof(*dbg), GFP_KERNEL);
152 if (!dbg) 153 if (!dbg)
153 goto exit; 154 goto exit;
@@ -160,7 +161,7 @@ static int open(struct inode *inode, struct file *file)
160 file->private_data = dbg; 161 file->private_data = dbg;
161 retval = 0; 162 retval = 0;
162exit: 163exit:
163 unlock_kernel(); 164 mutex_unlock(&cpqphp_mutex);
164 return retval; 165 return retval;
165} 166}
166 167
@@ -169,7 +170,7 @@ static loff_t lseek(struct file *file, loff_t off, int whence)
169 struct ctrl_dbg *dbg; 170 struct ctrl_dbg *dbg;
170 loff_t new = -1; 171 loff_t new = -1;
171 172
172 lock_kernel(); 173 mutex_lock(&cpqphp_mutex);
173 dbg = file->private_data; 174 dbg = file->private_data;
174 175
175 switch (whence) { 176 switch (whence) {
@@ -181,10 +182,10 @@ static loff_t lseek(struct file *file, loff_t off, int whence)
181 break; 182 break;
182 } 183 }
183 if (new < 0 || new > dbg->size) { 184 if (new < 0 || new > dbg->size) {
184 unlock_kernel(); 185 mutex_unlock(&cpqphp_mutex);
185 return -EINVAL; 186 return -EINVAL;
186 } 187 }
187 unlock_kernel(); 188 mutex_unlock(&cpqphp_mutex);
188 return (file->f_pos = new); 189 return (file->f_pos = new);
189} 190}
190 191
diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 5becbdee4027..2850e64dedae 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -276,6 +276,12 @@ int __init ibmphp_access_ebda (void)
276 276
277 for (;;) { 277 for (;;) {
278 offset = next_offset; 278 offset = next_offset;
279
280 /* Make sure what we read is still in the mapped section */
281 if (WARN(offset > (ebda_sz * 1024 - 4),
282 "ibmphp_ebda: next read is beyond ebda_sz\n"))
283 break;
284
279 next_offset = readw (io_mem + offset); /* offset of next blk */ 285 next_offset = readw (io_mem + offset); /* offset of next blk */
280 286
281 offset += 2; 287 offset += 2;
diff --git a/drivers/pci/hotplug/ibmphp_hpc.c b/drivers/pci/hotplug/ibmphp_hpc.c
index 1aaf3f32d3cd..f59ed30512b5 100644
--- a/drivers/pci/hotplug/ibmphp_hpc.c
+++ b/drivers/pci/hotplug/ibmphp_hpc.c
@@ -133,8 +133,8 @@ void __init ibmphp_hpc_initvars (void)
133 debug ("%s - Entry\n", __func__); 133 debug ("%s - Entry\n", __func__);
134 134
135 mutex_init(&sem_hpcaccess); 135 mutex_init(&sem_hpcaccess);
136 init_MUTEX (&semOperations); 136 sema_init(&semOperations, 1);
137 init_MUTEX_LOCKED (&sem_exit); 137 sema_init(&sem_exit, 0);
138 to_debug = 0; 138 to_debug = 0;
139 139
140 debug ("%s - Exit\n", __func__); 140 debug ("%s - Exit\n", __func__);
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 73d513989263..838f571027b7 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -36,6 +36,7 @@
36#include <linux/sched.h> /* signal_pending() */ 36#include <linux/sched.h> /* signal_pending() */
37#include <linux/pcieport_if.h> 37#include <linux/pcieport_if.h>
38#include <linux/mutex.h> 38#include <linux/mutex.h>
39#include <linux/workqueue.h>
39 40
40#define MY_NAME "pciehp" 41#define MY_NAME "pciehp"
41 42
@@ -44,6 +45,7 @@ extern int pciehp_poll_time;
44extern int pciehp_debug; 45extern int pciehp_debug;
45extern int pciehp_force; 46extern int pciehp_force;
46extern struct workqueue_struct *pciehp_wq; 47extern struct workqueue_struct *pciehp_wq;
48extern struct workqueue_struct *pciehp_ordered_wq;
47 49
48#define dbg(format, arg...) \ 50#define dbg(format, arg...) \
49do { \ 51do { \
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c
index 2574700db461..5f7226223a62 100644
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -115,7 +115,8 @@ static struct pcie_port_service_driver __initdata dummy_driver = {
115static int __init select_detection_mode(void) 115static int __init select_detection_mode(void)
116{ 116{
117 struct dummy_slot *slot, *tmp; 117 struct dummy_slot *slot, *tmp;
118 pcie_port_service_register(&dummy_driver); 118 if (pcie_port_service_register(&dummy_driver))
119 return PCIEHP_DETECT_ACPI;
119 pcie_port_service_unregister(&dummy_driver); 120 pcie_port_service_unregister(&dummy_driver);
120 list_for_each_entry_safe(slot, tmp, &dummy_slots, list) { 121 list_for_each_entry_safe(slot, tmp, &dummy_slots, list) {
121 list_del(&slot->list); 122 list_del(&slot->list);
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index aa5f3ff629ff..7ac8358df8fd 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -43,6 +43,7 @@ int pciehp_poll_mode;
43int pciehp_poll_time; 43int pciehp_poll_time;
44int pciehp_force; 44int pciehp_force;
45struct workqueue_struct *pciehp_wq; 45struct workqueue_struct *pciehp_wq;
46struct workqueue_struct *pciehp_ordered_wq;
46 47
47#define DRIVER_VERSION "0.4" 48#define DRIVER_VERSION "0.4"
48#define DRIVER_AUTHOR "Dan Zink <dan.zink@compaq.com>, Greg Kroah-Hartman <greg@kroah.com>, Dely Sy <dely.l.sy@intel.com>" 49#define DRIVER_AUTHOR "Dan Zink <dan.zink@compaq.com>, Greg Kroah-Hartman <greg@kroah.com>, Dely Sy <dely.l.sy@intel.com>"
@@ -340,18 +341,33 @@ static int __init pcied_init(void)
340{ 341{
341 int retval = 0; 342 int retval = 0;
342 343
344 pciehp_wq = alloc_workqueue("pciehp", 0, 0);
345 if (!pciehp_wq)
346 return -ENOMEM;
347
348 pciehp_ordered_wq = alloc_ordered_workqueue("pciehp_ordered", 0);
349 if (!pciehp_ordered_wq) {
350 destroy_workqueue(pciehp_wq);
351 return -ENOMEM;
352 }
353
343 pciehp_firmware_init(); 354 pciehp_firmware_init();
344 retval = pcie_port_service_register(&hpdriver_portdrv); 355 retval = pcie_port_service_register(&hpdriver_portdrv);
345 dbg("pcie_port_service_register = %d\n", retval); 356 dbg("pcie_port_service_register = %d\n", retval);
346 info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 357 info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
347 if (retval) 358 if (retval) {
359 destroy_workqueue(pciehp_ordered_wq);
360 destroy_workqueue(pciehp_wq);
348 dbg("Failure to register service\n"); 361 dbg("Failure to register service\n");
362 }
349 return retval; 363 return retval;
350} 364}
351 365
352static void __exit pcied_cleanup(void) 366static void __exit pcied_cleanup(void)
353{ 367{
354 dbg("unload_pciehpd()\n"); 368 dbg("unload_pciehpd()\n");
369 destroy_workqueue(pciehp_ordered_wq);
370 destroy_workqueue(pciehp_wq);
355 pcie_port_service_unregister(&hpdriver_portdrv); 371 pcie_port_service_unregister(&hpdriver_portdrv);
356 info(DRIVER_DESC " version: " DRIVER_VERSION " unloaded\n"); 372 info(DRIVER_DESC " version: " DRIVER_VERSION " unloaded\n");
357} 373}
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index 8f58148be044..085dbb5fc168 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -32,7 +32,6 @@
32#include <linux/types.h> 32#include <linux/types.h>
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <linux/pci.h> 34#include <linux/pci.h>
35#include <linux/workqueue.h>
36#include "../pci.h" 35#include "../pci.h"
37#include "pciehp.h" 36#include "pciehp.h"
38 37
@@ -50,7 +49,7 @@ static int queue_interrupt_event(struct slot *p_slot, u32 event_type)
50 info->p_slot = p_slot; 49 info->p_slot = p_slot;
51 INIT_WORK(&info->work, interrupt_event_handler); 50 INIT_WORK(&info->work, interrupt_event_handler);
52 51
53 schedule_work(&info->work); 52 queue_work(pciehp_wq, &info->work);
54 53
55 return 0; 54 return 0;
56} 55}
@@ -345,7 +344,7 @@ void pciehp_queue_pushbutton_work(struct work_struct *work)
345 kfree(info); 344 kfree(info);
346 goto out; 345 goto out;
347 } 346 }
348 queue_work(pciehp_wq, &info->work); 347 queue_work(pciehp_ordered_wq, &info->work);
349 out: 348 out:
350 mutex_unlock(&p_slot->lock); 349 mutex_unlock(&p_slot->lock);
351} 350}
@@ -378,7 +377,7 @@ static void handle_button_press_event(struct slot *p_slot)
378 if (ATTN_LED(ctrl)) 377 if (ATTN_LED(ctrl))
379 pciehp_set_attention_status(p_slot, 0); 378 pciehp_set_attention_status(p_slot, 0);
380 379
381 schedule_delayed_work(&p_slot->work, 5*HZ); 380 queue_delayed_work(pciehp_wq, &p_slot->work, 5*HZ);
382 break; 381 break;
383 case BLINKINGOFF_STATE: 382 case BLINKINGOFF_STATE:
384 case BLINKINGON_STATE: 383 case BLINKINGON_STATE:
@@ -440,7 +439,7 @@ static void handle_surprise_event(struct slot *p_slot)
440 else 439 else
441 p_slot->state = POWERON_STATE; 440 p_slot->state = POWERON_STATE;
442 441
443 queue_work(pciehp_wq, &info->work); 442 queue_work(pciehp_ordered_wq, &info->work);
444} 443}
445 444
446static void interrupt_event_handler(struct work_struct *work) 445static void interrupt_event_handler(struct work_struct *work)
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 0cd42047d89b..50a23da5d24d 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -41,8 +41,6 @@
41#include "../pci.h" 41#include "../pci.h"
42#include "pciehp.h" 42#include "pciehp.h"
43 43
44static atomic_t pciehp_num_controllers = ATOMIC_INIT(0);
45
46static inline int pciehp_readw(struct controller *ctrl, int reg, u16 *value) 44static inline int pciehp_readw(struct controller *ctrl, int reg, u16 *value)
47{ 45{
48 struct pci_dev *dev = ctrl->pcie->port; 46 struct pci_dev *dev = ctrl->pcie->port;
@@ -805,8 +803,8 @@ static void pcie_cleanup_slot(struct controller *ctrl)
805{ 803{
806 struct slot *slot = ctrl->slot; 804 struct slot *slot = ctrl->slot;
807 cancel_delayed_work(&slot->work); 805 cancel_delayed_work(&slot->work);
808 flush_scheduled_work();
809 flush_workqueue(pciehp_wq); 806 flush_workqueue(pciehp_wq);
807 flush_workqueue(pciehp_ordered_wq);
810 kfree(slot); 808 kfree(slot);
811} 809}
812 810
@@ -912,16 +910,6 @@ struct controller *pcie_init(struct pcie_device *dev)
912 /* Disable sotfware notification */ 910 /* Disable sotfware notification */
913 pcie_disable_notification(ctrl); 911 pcie_disable_notification(ctrl);
914 912
915 /*
916 * If this is the first controller to be initialized,
917 * initialize the pciehp work queue
918 */
919 if (atomic_add_return(1, &pciehp_num_controllers) == 1) {
920 pciehp_wq = create_singlethread_workqueue("pciehpd");
921 if (!pciehp_wq)
922 goto abort_ctrl;
923 }
924
925 ctrl_info(ctrl, "HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n", 913 ctrl_info(ctrl, "HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n",
926 pdev->vendor, pdev->device, pdev->subsystem_vendor, 914 pdev->vendor, pdev->device, pdev->subsystem_vendor,
927 pdev->subsystem_device); 915 pdev->subsystem_device);
@@ -941,11 +929,5 @@ void pciehp_release_ctrl(struct controller *ctrl)
941{ 929{
942 pcie_shutdown_notification(ctrl); 930 pcie_shutdown_notification(ctrl);
943 pcie_cleanup_slot(ctrl); 931 pcie_cleanup_slot(ctrl);
944 /*
945 * If this is the last controller to be released, destroy the
946 * pciehp work queue
947 */
948 if (atomic_dec_and_test(&pciehp_num_controllers))
949 destroy_workqueue(pciehp_wq);
950 kfree(ctrl); 932 kfree(ctrl);
951} 933}
diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c
index 80b461c98557..749fdf070319 100644
--- a/drivers/pci/hotplug/pcihp_slot.c
+++ b/drivers/pci/hotplug/pcihp_slot.c
@@ -158,6 +158,47 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
158 */ 158 */
159} 159}
160 160
161/* Program PCIE MaxPayload setting on device: ensure parent maxpayload <= device */
162static int pci_set_payload(struct pci_dev *dev)
163{
164 int pos, ppos;
165 u16 pctl, psz;
166 u16 dctl, dsz, dcap, dmax;
167 struct pci_dev *parent;
168
169 parent = dev->bus->self;
170 pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
171 if (!pos)
172 return 0;
173
174 /* Read Device MaxPayload capability and setting */
175 pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &dctl);
176 pci_read_config_word(dev, pos + PCI_EXP_DEVCAP, &dcap);
177 dsz = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5;
178 dmax = (dcap & PCI_EXP_DEVCAP_PAYLOAD);
179
180 /* Read Parent MaxPayload setting */
181 ppos = pci_find_capability(parent, PCI_CAP_ID_EXP);
182 if (!ppos)
183 return 0;
184 pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl);
185 psz = (pctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5;
186
187 /* If parent payload > device max payload -> error
188 * If parent payload > device payload -> set speed
189 * If parent payload <= device payload -> do nothing
190 */
191 if (psz > dmax)
192 return -1;
193 else if (psz > dsz) {
194 dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << psz);
195 pci_write_config_word(dev, pos + PCI_EXP_DEVCTL,
196 (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) +
197 (psz << 5));
198 }
199 return 0;
200}
201
161void pci_configure_slot(struct pci_dev *dev) 202void pci_configure_slot(struct pci_dev *dev)
162{ 203{
163 struct pci_dev *cdev; 204 struct pci_dev *cdev;
@@ -169,6 +210,10 @@ void pci_configure_slot(struct pci_dev *dev)
169 (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) 210 (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI)))
170 return; 211 return;
171 212
213 ret = pci_set_payload(dev);
214 if (ret)
215 dev_warn(&dev->dev, "could not set device max payload\n");
216
172 memset(&hpp, 0, sizeof(hpp)); 217 memset(&hpp, 0, sizeof(hpp));
173 ret = pci_get_hp_params(dev, &hpp); 218 ret = pci_get_hp_params(dev, &hpp);
174 if (ret) 219 if (ret)
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index ef7411c660b9..758adb5f47fd 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -290,7 +290,7 @@ static int is_php_dn(struct device_node *dn, const int **indexes,
290 * @dn: device node of slot 290 * @dn: device node of slot
291 * 291 *
292 * This subroutine will register a hotplugable slot with the 292 * This subroutine will register a hotplugable slot with the
293 * PCI hotplug infrastructure. This routine is typicaly called 293 * PCI hotplug infrastructure. This routine is typically called
294 * during boot time, if the hotplug slots are present at boot time, 294 * during boot time, if the hotplug slots are present at boot time,
295 * or is called later, by the dlpar add code, if the slot is 295 * or is called later, by the dlpar add code, if the slot is
296 * being dynamically added during runtime. 296 * being dynamically added during runtime.
diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c
index 2ea9cf1a8d02..b283bbea6d24 100644
--- a/drivers/pci/hotplug/rpaphp_slot.c
+++ b/drivers/pci/hotplug/rpaphp_slot.c
@@ -24,7 +24,6 @@
24 */ 24 */
25#include <linux/kernel.h> 25#include <linux/kernel.h>
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/kobject.h>
28#include <linux/sysfs.h> 27#include <linux/sysfs.h>
29#include <linux/pci.h> 28#include <linux/pci.h>
30#include <linux/string.h> 29#include <linux/string.h>
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index d2627e1c3ac1..e0c90e643b5f 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -35,6 +35,7 @@
35#include <linux/delay.h> 35#include <linux/delay.h>
36#include <linux/sched.h> /* signal_pending(), struct timer_list */ 36#include <linux/sched.h> /* signal_pending(), struct timer_list */
37#include <linux/mutex.h> 37#include <linux/mutex.h>
38#include <linux/workqueue.h>
38 39
39#if !defined(MODULE) 40#if !defined(MODULE)
40 #define MY_NAME "shpchp" 41 #define MY_NAME "shpchp"
@@ -46,6 +47,7 @@ extern int shpchp_poll_mode;
46extern int shpchp_poll_time; 47extern int shpchp_poll_time;
47extern int shpchp_debug; 48extern int shpchp_debug;
48extern struct workqueue_struct *shpchp_wq; 49extern struct workqueue_struct *shpchp_wq;
50extern struct workqueue_struct *shpchp_ordered_wq;
49 51
50#define dbg(format, arg...) \ 52#define dbg(format, arg...) \
51do { \ 53do { \
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index a7bd5048396e..aca972bbfb4c 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -33,7 +33,6 @@
33#include <linux/types.h> 33#include <linux/types.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/pci.h> 35#include <linux/pci.h>
36#include <linux/workqueue.h>
37#include "shpchp.h" 36#include "shpchp.h"
38 37
39/* Global variables */ 38/* Global variables */
@@ -41,6 +40,7 @@ int shpchp_debug;
41int shpchp_poll_mode; 40int shpchp_poll_mode;
42int shpchp_poll_time; 41int shpchp_poll_time;
43struct workqueue_struct *shpchp_wq; 42struct workqueue_struct *shpchp_wq;
43struct workqueue_struct *shpchp_ordered_wq;
44 44
45#define DRIVER_VERSION "0.4" 45#define DRIVER_VERSION "0.4"
46#define DRIVER_AUTHOR "Dan Zink <dan.zink@compaq.com>, Greg Kroah-Hartman <greg@kroah.com>, Dely Sy <dely.l.sy@intel.com>" 46#define DRIVER_AUTHOR "Dan Zink <dan.zink@compaq.com>, Greg Kroah-Hartman <greg@kroah.com>, Dely Sy <dely.l.sy@intel.com>"
@@ -174,8 +174,8 @@ void cleanup_slots(struct controller *ctrl)
174 slot = list_entry(tmp, struct slot, slot_list); 174 slot = list_entry(tmp, struct slot, slot_list);
175 list_del(&slot->slot_list); 175 list_del(&slot->slot_list);
176 cancel_delayed_work(&slot->work); 176 cancel_delayed_work(&slot->work);
177 flush_scheduled_work();
178 flush_workqueue(shpchp_wq); 177 flush_workqueue(shpchp_wq);
178 flush_workqueue(shpchp_ordered_wq);
179 pci_hp_deregister(slot->hotplug_slot); 179 pci_hp_deregister(slot->hotplug_slot);
180 } 180 }
181} 181}
@@ -360,9 +360,23 @@ static int __init shpcd_init(void)
360{ 360{
361 int retval = 0; 361 int retval = 0;
362 362
363 shpchp_wq = alloc_ordered_workqueue("shpchp", 0);
364 if (!shpchp_wq)
365 return -ENOMEM;
366
367 shpchp_ordered_wq = alloc_ordered_workqueue("shpchp_ordered", 0);
368 if (!shpchp_ordered_wq) {
369 destroy_workqueue(shpchp_wq);
370 return -ENOMEM;
371 }
372
363 retval = pci_register_driver(&shpc_driver); 373 retval = pci_register_driver(&shpc_driver);
364 dbg("%s: pci_register_driver = %d\n", __func__, retval); 374 dbg("%s: pci_register_driver = %d\n", __func__, retval);
365 info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 375 info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
376 if (retval) {
377 destroy_workqueue(shpchp_ordered_wq);
378 destroy_workqueue(shpchp_wq);
379 }
366 return retval; 380 return retval;
367} 381}
368 382
@@ -370,6 +384,8 @@ static void __exit shpcd_cleanup(void)
370{ 384{
371 dbg("unload_shpchpd()\n"); 385 dbg("unload_shpchpd()\n");
372 pci_unregister_driver(&shpc_driver); 386 pci_unregister_driver(&shpc_driver);
387 destroy_workqueue(shpchp_ordered_wq);
388 destroy_workqueue(shpchp_wq);
373 info(DRIVER_DESC " version: " DRIVER_VERSION " unloaded\n"); 389 info(DRIVER_DESC " version: " DRIVER_VERSION " unloaded\n");
374} 390}
375 391
diff --git a/drivers/pci/hotplug/shpchp_ctrl.c b/drivers/pci/hotplug/shpchp_ctrl.c
index 3387fbfb0c54..b00b09bdd38a 100644
--- a/drivers/pci/hotplug/shpchp_ctrl.c
+++ b/drivers/pci/hotplug/shpchp_ctrl.c
@@ -32,7 +32,6 @@
32#include <linux/types.h> 32#include <linux/types.h>
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <linux/pci.h> 34#include <linux/pci.h>
35#include <linux/workqueue.h>
36#include "../pci.h" 35#include "../pci.h"
37#include "shpchp.h" 36#include "shpchp.h"
38 37
@@ -52,7 +51,7 @@ static int queue_interrupt_event(struct slot *p_slot, u32 event_type)
52 info->p_slot = p_slot; 51 info->p_slot = p_slot;
53 INIT_WORK(&info->work, interrupt_event_handler); 52 INIT_WORK(&info->work, interrupt_event_handler);
54 53
55 schedule_work(&info->work); 54 queue_work(shpchp_wq, &info->work);
56 55
57 return 0; 56 return 0;
58} 57}
@@ -457,7 +456,7 @@ void shpchp_queue_pushbutton_work(struct work_struct *work)
457 kfree(info); 456 kfree(info);
458 goto out; 457 goto out;
459 } 458 }
460 queue_work(shpchp_wq, &info->work); 459 queue_work(shpchp_ordered_wq, &info->work);
461 out: 460 out:
462 mutex_unlock(&p_slot->lock); 461 mutex_unlock(&p_slot->lock);
463} 462}
@@ -505,7 +504,7 @@ static void handle_button_press_event(struct slot *p_slot)
505 p_slot->hpc_ops->green_led_blink(p_slot); 504 p_slot->hpc_ops->green_led_blink(p_slot);
506 p_slot->hpc_ops->set_attention_status(p_slot, 0); 505 p_slot->hpc_ops->set_attention_status(p_slot, 0);
507 506
508 schedule_delayed_work(&p_slot->work, 5*HZ); 507 queue_delayed_work(shpchp_wq, &p_slot->work, 5*HZ);
509 break; 508 break;
510 case BLINKINGOFF_STATE: 509 case BLINKINGOFF_STATE:
511 case BLINKINGON_STATE: 510 case BLINKINGON_STATE:
diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c
index d3985e7deab7..36547f0ce305 100644
--- a/drivers/pci/hotplug/shpchp_hpc.c
+++ b/drivers/pci/hotplug/shpchp_hpc.c
@@ -179,8 +179,6 @@
179#define SLOT_EVENT_LATCH 0x2 179#define SLOT_EVENT_LATCH 0x2
180#define SLOT_SERR_INT_MASK 0x3 180#define SLOT_SERR_INT_MASK 0x3
181 181
182static atomic_t shpchp_num_controllers = ATOMIC_INIT(0);
183
184static irqreturn_t shpc_isr(int irq, void *dev_id); 182static irqreturn_t shpc_isr(int irq, void *dev_id);
185static void start_int_poll_timer(struct controller *ctrl, int sec); 183static void start_int_poll_timer(struct controller *ctrl, int sec);
186static int hpc_check_cmd_status(struct controller *ctrl); 184static int hpc_check_cmd_status(struct controller *ctrl);
@@ -614,13 +612,6 @@ static void hpc_release_ctlr(struct controller *ctrl)
614 612
615 iounmap(ctrl->creg); 613 iounmap(ctrl->creg);
616 release_mem_region(ctrl->mmio_base, ctrl->mmio_size); 614 release_mem_region(ctrl->mmio_base, ctrl->mmio_size);
617
618 /*
619 * If this is the last controller to be released, destroy the
620 * shpchpd work queue
621 */
622 if (atomic_dec_and_test(&shpchp_num_controllers))
623 destroy_workqueue(shpchp_wq);
624} 615}
625 616
626static int hpc_power_on_slot(struct slot * slot) 617static int hpc_power_on_slot(struct slot * slot)
@@ -1077,9 +1068,8 @@ int shpc_init(struct controller *ctrl, struct pci_dev *pdev)
1077 1068
1078 rc = request_irq(ctrl->pci_dev->irq, shpc_isr, IRQF_SHARED, 1069 rc = request_irq(ctrl->pci_dev->irq, shpc_isr, IRQF_SHARED,
1079 MY_NAME, (void *)ctrl); 1070 MY_NAME, (void *)ctrl);
1080 ctrl_dbg(ctrl, "request_irq %d for hpc%d (returns %d)\n", 1071 ctrl_dbg(ctrl, "request_irq %d (returns %d)\n",
1081 ctrl->pci_dev->irq, 1072 ctrl->pci_dev->irq, rc);
1082 atomic_read(&shpchp_num_controllers), rc);
1083 if (rc) { 1073 if (rc) {
1084 ctrl_err(ctrl, "Can't get irq %d for the hotplug " 1074 ctrl_err(ctrl, "Can't get irq %d for the hotplug "
1085 "controller\n", ctrl->pci_dev->irq); 1075 "controller\n", ctrl->pci_dev->irq);
@@ -1092,18 +1082,6 @@ int shpc_init(struct controller *ctrl, struct pci_dev *pdev)
1092 shpc_get_cur_bus_speed(ctrl); 1082 shpc_get_cur_bus_speed(ctrl);
1093 1083
1094 /* 1084 /*
1095 * If this is the first controller to be initialized,
1096 * initialize the shpchpd work queue
1097 */
1098 if (atomic_add_return(1, &shpchp_num_controllers) == 1) {
1099 shpchp_wq = create_singlethread_workqueue("shpchpd");
1100 if (!shpchp_wq) {
1101 rc = -ENOMEM;
1102 goto abort_iounmap;
1103 }
1104 }
1105
1106 /*
1107 * Unmask all event interrupts of all slots 1085 * Unmask all event interrupts of all slots
1108 */ 1086 */
1109 for (hp_slot = 0; hp_slot < ctrl->num_slots; hp_slot++) { 1087 for (hp_slot = 0; hp_slot < ctrl->num_slots; hp_slot++) {
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 98abf8b91294..db057b6fe0c8 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -34,7 +34,7 @@ struct ht_irq_cfg {
34 34
35void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg) 35void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
36{ 36{
37 struct ht_irq_cfg *cfg = get_irq_data(irq); 37 struct ht_irq_cfg *cfg = irq_get_handler_data(irq);
38 unsigned long flags; 38 unsigned long flags;
39 spin_lock_irqsave(&ht_irq_lock, flags); 39 spin_lock_irqsave(&ht_irq_lock, flags);
40 if (cfg->msg.address_lo != msg->address_lo) { 40 if (cfg->msg.address_lo != msg->address_lo) {
@@ -53,32 +53,26 @@ void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
53 53
54void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg) 54void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
55{ 55{
56 struct ht_irq_cfg *cfg = get_irq_data(irq); 56 struct ht_irq_cfg *cfg = irq_get_handler_data(irq);
57 *msg = cfg->msg; 57 *msg = cfg->msg;
58} 58}
59 59
60void mask_ht_irq(unsigned int irq) 60void mask_ht_irq(struct irq_data *data)
61{ 61{
62 struct ht_irq_cfg *cfg; 62 struct ht_irq_cfg *cfg = irq_data_get_irq_handler_data(data);
63 struct ht_irq_msg msg; 63 struct ht_irq_msg msg = cfg->msg;
64
65 cfg = get_irq_data(irq);
66 64
67 msg = cfg->msg;
68 msg.address_lo |= 1; 65 msg.address_lo |= 1;
69 write_ht_irq_msg(irq, &msg); 66 write_ht_irq_msg(data->irq, &msg);
70} 67}
71 68
72void unmask_ht_irq(unsigned int irq) 69void unmask_ht_irq(struct irq_data *data)
73{ 70{
74 struct ht_irq_cfg *cfg; 71 struct ht_irq_cfg *cfg = irq_data_get_irq_handler_data(data);
75 struct ht_irq_msg msg; 72 struct ht_irq_msg msg = cfg->msg;
76
77 cfg = get_irq_data(irq);
78 73
79 msg = cfg->msg;
80 msg.address_lo &= ~1; 74 msg.address_lo &= ~1;
81 write_ht_irq_msg(irq, &msg); 75 write_ht_irq_msg(data->irq, &msg);
82} 76}
83 77
84/** 78/**
@@ -132,7 +126,7 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
132 kfree(cfg); 126 kfree(cfg);
133 return -EBUSY; 127 return -EBUSY;
134 } 128 }
135 set_irq_data(irq, cfg); 129 irq_set_handler_data(irq, cfg);
136 130
137 if (arch_setup_ht_irq(irq, dev) < 0) { 131 if (arch_setup_ht_irq(irq, dev) < 0) {
138 ht_destroy_irq(irq); 132 ht_destroy_irq(irq);
@@ -168,9 +162,9 @@ void ht_destroy_irq(unsigned int irq)
168{ 162{
169 struct ht_irq_cfg *cfg; 163 struct ht_irq_cfg *cfg;
170 164
171 cfg = get_irq_data(irq); 165 cfg = irq_get_handler_data(irq);
172 set_irq_chip(irq, NULL); 166 irq_set_chip(irq, NULL);
173 set_irq_data(irq, NULL); 167 irq_set_handler_data(irq, NULL);
174 destroy_irq(irq); 168 destroy_irq(irq);
175 169
176 kfree(cfg); 170 kfree(cfg);
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 4789f8e8bf7a..f02c34d26d1b 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -36,9 +36,10 @@
36#include <linux/iova.h> 36#include <linux/iova.h>
37#include <linux/iommu.h> 37#include <linux/iommu.h>
38#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
39#include <linux/sysdev.h> 39#include <linux/syscore_ops.h>
40#include <linux/tboot.h> 40#include <linux/tboot.h>
41#include <linux/dmi.h> 41#include <linux/dmi.h>
42#include <linux/pci-ats.h>
42#include <asm/cacheflush.h> 43#include <asm/cacheflush.h>
43#include <asm/iommu.h> 44#include <asm/iommu.h>
44#include "pci.h" 45#include "pci.h"
@@ -46,6 +47,8 @@
46#define ROOT_SIZE VTD_PAGE_SIZE 47#define ROOT_SIZE VTD_PAGE_SIZE
47#define CONTEXT_SIZE VTD_PAGE_SIZE 48#define CONTEXT_SIZE VTD_PAGE_SIZE
48 49
50#define IS_BRIDGE_HOST_DEVICE(pdev) \
51 ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
49#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) 52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
50#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) 53#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
51#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) 54#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
@@ -115,6 +118,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level)
115 return (pfn + level_size(level) - 1) & level_mask(level); 118 return (pfn + level_size(level) - 1) & level_mask(level);
116} 119}
117 120
121static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
122{
123 return 1 << ((lvl - 1) * LEVEL_STRIDE);
124}
125
118/* VT-d pages must always be _smaller_ than MM pages. Otherwise things 126/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
119 are never going to work. */ 127 are never going to work. */
120static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) 128static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
@@ -142,6 +150,12 @@ static void __init check_tylersburg_isoch(void);
142static int rwbf_quirk; 150static int rwbf_quirk;
143 151
144/* 152/*
153 * set to 1 to panic kernel if can't successfully enable VT-d
154 * (used when kernel is launched w/ TXT)
155 */
156static int force_on = 0;
157
158/*
145 * 0: Present 159 * 0: Present
146 * 1-11: Reserved 160 * 1-11: Reserved
147 * 12-63: Context Ptr (12 - (haw-1)) 161 * 12-63: Context Ptr (12 - (haw-1))
@@ -337,6 +351,9 @@ struct dmar_domain {
337 int iommu_coherency;/* indicate coherency of iommu access */ 351 int iommu_coherency;/* indicate coherency of iommu access */
338 int iommu_snooping; /* indicate snooping control feature*/ 352 int iommu_snooping; /* indicate snooping control feature*/
339 int iommu_count; /* reference count of iommu */ 353 int iommu_count; /* reference count of iommu */
354 int iommu_superpage;/* Level of superpages supported:
355 0 == 4KiB (no superpages), 1 == 2MiB,
356 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
340 spinlock_t iommu_lock; /* protect iommu set in domain */ 357 spinlock_t iommu_lock; /* protect iommu set in domain */
341 u64 max_addr; /* maximum mapped address */ 358 u64 max_addr; /* maximum mapped address */
342}; 359};
@@ -386,6 +403,7 @@ int dmar_disabled = 1;
386static int dmar_map_gfx = 1; 403static int dmar_map_gfx = 1;
387static int dmar_forcedac; 404static int dmar_forcedac;
388static int intel_iommu_strict; 405static int intel_iommu_strict;
406static int intel_iommu_superpage = 1;
389 407
390#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) 408#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
391static DEFINE_SPINLOCK(device_domain_lock); 409static DEFINE_SPINLOCK(device_domain_lock);
@@ -416,6 +434,10 @@ static int __init intel_iommu_setup(char *str)
416 printk(KERN_INFO 434 printk(KERN_INFO
417 "Intel-IOMMU: disable batched IOTLB flush\n"); 435 "Intel-IOMMU: disable batched IOTLB flush\n");
418 intel_iommu_strict = 1; 436 intel_iommu_strict = 1;
437 } else if (!strncmp(str, "sp_off", 6)) {
438 printk(KERN_INFO
439 "Intel-IOMMU: disable supported super page\n");
440 intel_iommu_superpage = 0;
419 } 441 }
420 442
421 str += strcspn(str, ","); 443 str += strcspn(str, ",");
@@ -554,11 +576,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain)
554 } 576 }
555} 577}
556 578
579static void domain_update_iommu_superpage(struct dmar_domain *domain)
580{
581 int i, mask = 0xf;
582
583 if (!intel_iommu_superpage) {
584 domain->iommu_superpage = 0;
585 return;
586 }
587
588 domain->iommu_superpage = 4; /* 1TiB */
589
590 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
591 mask |= cap_super_page_val(g_iommus[i]->cap);
592 if (!mask) {
593 break;
594 }
595 }
596 domain->iommu_superpage = fls(mask);
597}
598
557/* Some capabilities may be different across iommus */ 599/* Some capabilities may be different across iommus */
558static void domain_update_iommu_cap(struct dmar_domain *domain) 600static void domain_update_iommu_cap(struct dmar_domain *domain)
559{ 601{
560 domain_update_iommu_coherency(domain); 602 domain_update_iommu_coherency(domain);
561 domain_update_iommu_snooping(domain); 603 domain_update_iommu_snooping(domain);
604 domain_update_iommu_superpage(domain);
562} 605}
563 606
564static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) 607static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
@@ -688,23 +731,31 @@ out:
688} 731}
689 732
690static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, 733static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
691 unsigned long pfn) 734 unsigned long pfn, int large_level)
692{ 735{
693 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 736 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
694 struct dma_pte *parent, *pte = NULL; 737 struct dma_pte *parent, *pte = NULL;
695 int level = agaw_to_level(domain->agaw); 738 int level = agaw_to_level(domain->agaw);
696 int offset; 739 int offset, target_level;
697 740
698 BUG_ON(!domain->pgd); 741 BUG_ON(!domain->pgd);
699 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); 742 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
700 parent = domain->pgd; 743 parent = domain->pgd;
701 744
745 /* Search pte */
746 if (!large_level)
747 target_level = 1;
748 else
749 target_level = large_level;
750
702 while (level > 0) { 751 while (level > 0) {
703 void *tmp_page; 752 void *tmp_page;
704 753
705 offset = pfn_level_offset(pfn, level); 754 offset = pfn_level_offset(pfn, level);
706 pte = &parent[offset]; 755 pte = &parent[offset];
707 if (level == 1) 756 if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
757 break;
758 if (level == target_level)
708 break; 759 break;
709 760
710 if (!dma_pte_present(pte)) { 761 if (!dma_pte_present(pte)) {
@@ -732,10 +783,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
732 return pte; 783 return pte;
733} 784}
734 785
786
735/* return address's pte at specific level */ 787/* return address's pte at specific level */
736static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, 788static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
737 unsigned long pfn, 789 unsigned long pfn,
738 int level) 790 int level, int *large_page)
739{ 791{
740 struct dma_pte *parent, *pte = NULL; 792 struct dma_pte *parent, *pte = NULL;
741 int total = agaw_to_level(domain->agaw); 793 int total = agaw_to_level(domain->agaw);
@@ -748,8 +800,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
748 if (level == total) 800 if (level == total)
749 return pte; 801 return pte;
750 802
751 if (!dma_pte_present(pte)) 803 if (!dma_pte_present(pte)) {
804 *large_page = total;
752 break; 805 break;
806 }
807
808 if (pte->val & DMA_PTE_LARGE_PAGE) {
809 *large_page = total;
810 return pte;
811 }
812
753 parent = phys_to_virt(dma_pte_addr(pte)); 813 parent = phys_to_virt(dma_pte_addr(pte));
754 total--; 814 total--;
755 } 815 }
@@ -762,6 +822,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
762 unsigned long last_pfn) 822 unsigned long last_pfn)
763{ 823{
764 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 824 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
825 unsigned int large_page = 1;
765 struct dma_pte *first_pte, *pte; 826 struct dma_pte *first_pte, *pte;
766 827
767 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); 828 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
@@ -770,14 +831,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
770 831
771 /* we don't need lock here; nobody else touches the iova range */ 832 /* we don't need lock here; nobody else touches the iova range */
772 do { 833 do {
773 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); 834 large_page = 1;
835 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
774 if (!pte) { 836 if (!pte) {
775 start_pfn = align_to_level(start_pfn + 1, 2); 837 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
776 continue; 838 continue;
777 } 839 }
778 do { 840 do {
779 dma_clear_pte(pte); 841 dma_clear_pte(pte);
780 start_pfn++; 842 start_pfn += lvl_to_nr_pages(large_page);
781 pte++; 843 pte++;
782 } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); 844 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
783 845
@@ -797,6 +859,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
797 int total = agaw_to_level(domain->agaw); 859 int total = agaw_to_level(domain->agaw);
798 int level; 860 int level;
799 unsigned long tmp; 861 unsigned long tmp;
862 int large_page = 2;
800 863
801 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); 864 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
802 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); 865 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
@@ -812,7 +875,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
812 return; 875 return;
813 876
814 do { 877 do {
815 first_pte = pte = dma_pfn_level_pte(domain, tmp, level); 878 large_page = level;
879 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
880 if (large_page > level)
881 level = large_page + 1;
816 if (!pte) { 882 if (!pte) {
817 tmp = align_to_level(tmp + 1, level + 1); 883 tmp = align_to_level(tmp + 1, level + 1);
818 continue; 884 continue;
@@ -1206,7 +1272,7 @@ void free_dmar_iommu(struct intel_iommu *iommu)
1206 iommu_disable_translation(iommu); 1272 iommu_disable_translation(iommu);
1207 1273
1208 if (iommu->irq) { 1274 if (iommu->irq) {
1209 set_irq_data(iommu->irq, NULL); 1275 irq_set_handler_data(iommu->irq, NULL);
1210 /* This will mask the irq */ 1276 /* This will mask the irq */
1211 free_irq(iommu->irq, iommu); 1277 free_irq(iommu->irq, iommu);
1212 destroy_irq(iommu->irq); 1278 destroy_irq(iommu->irq);
@@ -1299,7 +1365,7 @@ static void iommu_detach_domain(struct dmar_domain *domain,
1299static struct iova_domain reserved_iova_list; 1365static struct iova_domain reserved_iova_list;
1300static struct lock_class_key reserved_rbtree_key; 1366static struct lock_class_key reserved_rbtree_key;
1301 1367
1302static void dmar_init_reserved_ranges(void) 1368static int dmar_init_reserved_ranges(void)
1303{ 1369{
1304 struct pci_dev *pdev = NULL; 1370 struct pci_dev *pdev = NULL;
1305 struct iova *iova; 1371 struct iova *iova;
@@ -1313,8 +1379,10 @@ static void dmar_init_reserved_ranges(void)
1313 /* IOAPIC ranges shouldn't be accessed by DMA */ 1379 /* IOAPIC ranges shouldn't be accessed by DMA */
1314 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), 1380 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1315 IOVA_PFN(IOAPIC_RANGE_END)); 1381 IOVA_PFN(IOAPIC_RANGE_END));
1316 if (!iova) 1382 if (!iova) {
1317 printk(KERN_ERR "Reserve IOAPIC range failed\n"); 1383 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1384 return -ENODEV;
1385 }
1318 1386
1319 /* Reserve all PCI MMIO to avoid peer-to-peer access */ 1387 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1320 for_each_pci_dev(pdev) { 1388 for_each_pci_dev(pdev) {
@@ -1327,11 +1395,13 @@ static void dmar_init_reserved_ranges(void)
1327 iova = reserve_iova(&reserved_iova_list, 1395 iova = reserve_iova(&reserved_iova_list,
1328 IOVA_PFN(r->start), 1396 IOVA_PFN(r->start),
1329 IOVA_PFN(r->end)); 1397 IOVA_PFN(r->end));
1330 if (!iova) 1398 if (!iova) {
1331 printk(KERN_ERR "Reserve iova failed\n"); 1399 printk(KERN_ERR "Reserve iova failed\n");
1400 return -ENODEV;
1401 }
1332 } 1402 }
1333 } 1403 }
1334 1404 return 0;
1335} 1405}
1336 1406
1337static void domain_reserve_special_ranges(struct dmar_domain *domain) 1407static void domain_reserve_special_ranges(struct dmar_domain *domain)
@@ -1392,6 +1462,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1392 else 1462 else
1393 domain->iommu_snooping = 0; 1463 domain->iommu_snooping = 0;
1394 1464
1465 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1395 domain->iommu_count = 1; 1466 domain->iommu_count = 1;
1396 domain->nid = iommu->node; 1467 domain->nid = iommu->node;
1397 1468
@@ -1412,6 +1483,10 @@ static void domain_exit(struct dmar_domain *domain)
1412 if (!domain) 1483 if (!domain)
1413 return; 1484 return;
1414 1485
1486 /* Flush any lazy unmaps that may reference this domain */
1487 if (!intel_iommu_strict)
1488 flush_unmaps_timeout(0);
1489
1415 domain_remove_dev_info(domain); 1490 domain_remove_dev_info(domain);
1416 /* destroy iovas */ 1491 /* destroy iovas */
1417 put_iova_domain(&domain->iovad); 1492 put_iova_domain(&domain->iovad);
@@ -1643,6 +1718,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr,
1643 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; 1718 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1644} 1719}
1645 1720
1721/* Return largest possible superpage level for a given mapping */
1722static inline int hardware_largepage_caps(struct dmar_domain *domain,
1723 unsigned long iov_pfn,
1724 unsigned long phy_pfn,
1725 unsigned long pages)
1726{
1727 int support, level = 1;
1728 unsigned long pfnmerge;
1729
1730 support = domain->iommu_superpage;
1731
1732 /* To use a large page, the virtual *and* physical addresses
1733 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1734 of them will mean we have to use smaller pages. So just
1735 merge them and check both at once. */
1736 pfnmerge = iov_pfn | phy_pfn;
1737
1738 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1739 pages >>= VTD_STRIDE_SHIFT;
1740 if (!pages)
1741 break;
1742 pfnmerge >>= VTD_STRIDE_SHIFT;
1743 level++;
1744 support--;
1745 }
1746 return level;
1747}
1748
1646static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 1749static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1647 struct scatterlist *sg, unsigned long phys_pfn, 1750 struct scatterlist *sg, unsigned long phys_pfn,
1648 unsigned long nr_pages, int prot) 1751 unsigned long nr_pages, int prot)
@@ -1651,6 +1754,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1651 phys_addr_t uninitialized_var(pteval); 1754 phys_addr_t uninitialized_var(pteval);
1652 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 1755 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1653 unsigned long sg_res; 1756 unsigned long sg_res;
1757 unsigned int largepage_lvl = 0;
1758 unsigned long lvl_pages = 0;
1654 1759
1655 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); 1760 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1656 1761
@@ -1666,7 +1771,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1666 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; 1771 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1667 } 1772 }
1668 1773
1669 while (nr_pages--) { 1774 while (nr_pages > 0) {
1670 uint64_t tmp; 1775 uint64_t tmp;
1671 1776
1672 if (!sg_res) { 1777 if (!sg_res) {
@@ -1674,11 +1779,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1674 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; 1779 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1675 sg->dma_length = sg->length; 1780 sg->dma_length = sg->length;
1676 pteval = page_to_phys(sg_page(sg)) | prot; 1781 pteval = page_to_phys(sg_page(sg)) | prot;
1782 phys_pfn = pteval >> VTD_PAGE_SHIFT;
1677 } 1783 }
1784
1678 if (!pte) { 1785 if (!pte) {
1679 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); 1786 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1787
1788 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1680 if (!pte) 1789 if (!pte)
1681 return -ENOMEM; 1790 return -ENOMEM;
1791 /* It is large page*/
1792 if (largepage_lvl > 1)
1793 pteval |= DMA_PTE_LARGE_PAGE;
1794 else
1795 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1796
1682 } 1797 }
1683 /* We don't need lock here, nobody else 1798 /* We don't need lock here, nobody else
1684 * touches the iova range 1799 * touches the iova range
@@ -1694,16 +1809,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1694 } 1809 }
1695 WARN_ON(1); 1810 WARN_ON(1);
1696 } 1811 }
1812
1813 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1814
1815 BUG_ON(nr_pages < lvl_pages);
1816 BUG_ON(sg_res < lvl_pages);
1817
1818 nr_pages -= lvl_pages;
1819 iov_pfn += lvl_pages;
1820 phys_pfn += lvl_pages;
1821 pteval += lvl_pages * VTD_PAGE_SIZE;
1822 sg_res -= lvl_pages;
1823
1824 /* If the next PTE would be the first in a new page, then we
1825 need to flush the cache on the entries we've just written.
1826 And then we'll need to recalculate 'pte', so clear it and
1827 let it get set again in the if (!pte) block above.
1828
1829 If we're done (!nr_pages) we need to flush the cache too.
1830
1831 Also if we've been setting superpages, we may need to
1832 recalculate 'pte' and switch back to smaller pages for the
1833 end of the mapping, if the trailing size is not enough to
1834 use another superpage (i.e. sg_res < lvl_pages). */
1697 pte++; 1835 pte++;
1698 if (!nr_pages || first_pte_in_page(pte)) { 1836 if (!nr_pages || first_pte_in_page(pte) ||
1837 (largepage_lvl > 1 && sg_res < lvl_pages)) {
1699 domain_flush_cache(domain, first_pte, 1838 domain_flush_cache(domain, first_pte,
1700 (void *)pte - (void *)first_pte); 1839 (void *)pte - (void *)first_pte);
1701 pte = NULL; 1840 pte = NULL;
1702 } 1841 }
1703 iov_pfn++; 1842
1704 pteval += VTD_PAGE_SIZE; 1843 if (!sg_res && nr_pages)
1705 sg_res--;
1706 if (!sg_res)
1707 sg = sg_next(sg); 1844 sg = sg_next(sg);
1708 } 1845 }
1709 return 0; 1846 return 0;
@@ -1835,7 +1972,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1835 1972
1836 ret = iommu_attach_domain(domain, iommu); 1973 ret = iommu_attach_domain(domain, iommu);
1837 if (ret) { 1974 if (ret) {
1838 domain_exit(domain); 1975 free_domain_mem(domain);
1839 goto error; 1976 goto error;
1840 } 1977 }
1841 1978
@@ -2011,7 +2148,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2011 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2148 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2012 return 0; 2149 return 0;
2013 return iommu_prepare_identity_map(pdev, rmrr->base_address, 2150 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2014 rmrr->end_address + 1); 2151 rmrr->end_address);
2015} 2152}
2016 2153
2017#ifdef CONFIG_DMAR_FLOPPY_WA 2154#ifdef CONFIG_DMAR_FLOPPY_WA
@@ -2025,7 +2162,7 @@ static inline void iommu_prepare_isa(void)
2025 return; 2162 return;
2026 2163
2027 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); 2164 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2028 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); 2165 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2029 2166
2030 if (ret) 2167 if (ret)
2031 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " 2168 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
@@ -2101,10 +2238,10 @@ static int identity_mapping(struct pci_dev *pdev)
2101 if (likely(!iommu_identity_mapping)) 2238 if (likely(!iommu_identity_mapping))
2102 return 0; 2239 return 0;
2103 2240
2241 info = pdev->dev.archdata.iommu;
2242 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2243 return (info->domain == si_domain);
2104 2244
2105 list_for_each_entry(info, &si_domain->devices, link)
2106 if (info->dev == pdev)
2107 return 1;
2108 return 0; 2245 return 0;
2109} 2246}
2110 2247
@@ -2182,8 +2319,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2182 * Assume that they will -- if they turn out not to be, then we can 2319 * Assume that they will -- if they turn out not to be, then we can
2183 * take them out of the 1:1 domain later. 2320 * take them out of the 1:1 domain later.
2184 */ 2321 */
2185 if (!startup) 2322 if (!startup) {
2186 return pdev->dma_mask > DMA_BIT_MASK(32); 2323 /*
2324 * If the device's dma_mask is less than the system's memory
2325 * size then this is not a candidate for identity mapping.
2326 */
2327 u64 dma_mask = pdev->dma_mask;
2328
2329 if (pdev->dev.coherent_dma_mask &&
2330 pdev->dev.coherent_dma_mask < dma_mask)
2331 dma_mask = pdev->dev.coherent_dma_mask;
2332
2333 return dma_mask >= dma_get_required_mask(&pdev->dev);
2334 }
2187 2335
2188 return 1; 2336 return 1;
2189} 2337}
@@ -2198,6 +2346,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
2198 return -EFAULT; 2346 return -EFAULT;
2199 2347
2200 for_each_pci_dev(pdev) { 2348 for_each_pci_dev(pdev) {
2349 /* Skip Host/PCI Bridge devices */
2350 if (IS_BRIDGE_HOST_DEVICE(pdev))
2351 continue;
2201 if (iommu_should_identity_map(pdev, 1)) { 2352 if (iommu_should_identity_map(pdev, 1)) {
2202 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", 2353 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2203 hw ? "hardware" : "software", pci_name(pdev)); 2354 hw ? "hardware" : "software", pci_name(pdev));
@@ -2213,7 +2364,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
2213 return 0; 2364 return 0;
2214} 2365}
2215 2366
2216int __init init_dmars(void) 2367static int __init init_dmars(void)
2217{ 2368{
2218 struct dmar_drhd_unit *drhd; 2369 struct dmar_drhd_unit *drhd;
2219 struct dmar_rmrr_unit *rmrr; 2370 struct dmar_rmrr_unit *rmrr;
@@ -2265,7 +2416,7 @@ int __init init_dmars(void)
2265 /* 2416 /*
2266 * TBD: 2417 * TBD:
2267 * we could share the same root & context tables 2418 * we could share the same root & context tables
2268 * amoung all IOMMU's. Need to Split it later. 2419 * among all IOMMU's. Need to Split it later.
2269 */ 2420 */
2270 ret = iommu_alloc_root_entry(iommu); 2421 ret = iommu_alloc_root_entry(iommu);
2271 if (ret) { 2422 if (ret) {
@@ -2393,8 +2544,15 @@ int __init init_dmars(void)
2393 * enable translation 2544 * enable translation
2394 */ 2545 */
2395 for_each_drhd_unit(drhd) { 2546 for_each_drhd_unit(drhd) {
2396 if (drhd->ignored) 2547 if (drhd->ignored) {
2548 /*
2549 * we always have to disable PMRs or DMA may fail on
2550 * this device
2551 */
2552 if (force_on)
2553 iommu_disable_protect_mem_regions(drhd->iommu);
2397 continue; 2554 continue;
2555 }
2398 iommu = drhd->iommu; 2556 iommu = drhd->iommu;
2399 2557
2400 iommu_flush_write_buffer(iommu); 2558 iommu_flush_write_buffer(iommu);
@@ -2580,8 +2738,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2580 iommu = domain_get_iommu(domain); 2738 iommu = domain_get_iommu(domain);
2581 size = aligned_nrpages(paddr, size); 2739 size = aligned_nrpages(paddr, size);
2582 2740
2583 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), 2741 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2584 pdev->dma_mask);
2585 if (!iova) 2742 if (!iova)
2586 goto error; 2743 goto error;
2587 2744
@@ -3106,7 +3263,17 @@ static int init_iommu_hw(void)
3106 if (iommu->qi) 3263 if (iommu->qi)
3107 dmar_reenable_qi(iommu); 3264 dmar_reenable_qi(iommu);
3108 3265
3109 for_each_active_iommu(iommu, drhd) { 3266 for_each_iommu(iommu, drhd) {
3267 if (drhd->ignored) {
3268 /*
3269 * we always have to disable PMRs or DMA may fail on
3270 * this device
3271 */
3272 if (force_on)
3273 iommu_disable_protect_mem_regions(iommu);
3274 continue;
3275 }
3276
3110 iommu_flush_write_buffer(iommu); 3277 iommu_flush_write_buffer(iommu);
3111 3278
3112 iommu_set_root_entry(iommu); 3279 iommu_set_root_entry(iommu);
@@ -3115,7 +3282,8 @@ static int init_iommu_hw(void)
3115 DMA_CCMD_GLOBAL_INVL); 3282 DMA_CCMD_GLOBAL_INVL);
3116 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 3283 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3117 DMA_TLB_GLOBAL_FLUSH); 3284 DMA_TLB_GLOBAL_FLUSH);
3118 iommu_enable_translation(iommu); 3285 if (iommu_enable_translation(iommu))
3286 return 1;
3119 iommu_disable_protect_mem_regions(iommu); 3287 iommu_disable_protect_mem_regions(iommu);
3120 } 3288 }
3121 3289
@@ -3135,7 +3303,7 @@ static void iommu_flush_all(void)
3135 } 3303 }
3136} 3304}
3137 3305
3138static int iommu_suspend(struct sys_device *dev, pm_message_t state) 3306static int iommu_suspend(void)
3139{ 3307{
3140 struct dmar_drhd_unit *drhd; 3308 struct dmar_drhd_unit *drhd;
3141 struct intel_iommu *iommu = NULL; 3309 struct intel_iommu *iommu = NULL;
@@ -3175,15 +3343,18 @@ nomem:
3175 return -ENOMEM; 3343 return -ENOMEM;
3176} 3344}
3177 3345
3178static int iommu_resume(struct sys_device *dev) 3346static void iommu_resume(void)
3179{ 3347{
3180 struct dmar_drhd_unit *drhd; 3348 struct dmar_drhd_unit *drhd;
3181 struct intel_iommu *iommu = NULL; 3349 struct intel_iommu *iommu = NULL;
3182 unsigned long flag; 3350 unsigned long flag;
3183 3351
3184 if (init_iommu_hw()) { 3352 if (init_iommu_hw()) {
3185 WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); 3353 if (force_on)
3186 return -EIO; 3354 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3355 else
3356 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3357 return;
3187 } 3358 }
3188 3359
3189 for_each_active_iommu(iommu, drhd) { 3360 for_each_active_iommu(iommu, drhd) {
@@ -3204,40 +3375,20 @@ static int iommu_resume(struct sys_device *dev)
3204 3375
3205 for_each_active_iommu(iommu, drhd) 3376 for_each_active_iommu(iommu, drhd)
3206 kfree(iommu->iommu_state); 3377 kfree(iommu->iommu_state);
3207
3208 return 0;
3209} 3378}
3210 3379
3211static struct sysdev_class iommu_sysclass = { 3380static struct syscore_ops iommu_syscore_ops = {
3212 .name = "iommu",
3213 .resume = iommu_resume, 3381 .resume = iommu_resume,
3214 .suspend = iommu_suspend, 3382 .suspend = iommu_suspend,
3215}; 3383};
3216 3384
3217static struct sys_device device_iommu = { 3385static void __init init_iommu_pm_ops(void)
3218 .cls = &iommu_sysclass,
3219};
3220
3221static int __init init_iommu_sysfs(void)
3222{ 3386{
3223 int error; 3387 register_syscore_ops(&iommu_syscore_ops);
3224
3225 error = sysdev_class_register(&iommu_sysclass);
3226 if (error)
3227 return error;
3228
3229 error = sysdev_register(&device_iommu);
3230 if (error)
3231 sysdev_class_unregister(&iommu_sysclass);
3232
3233 return error;
3234} 3388}
3235 3389
3236#else 3390#else
3237static int __init init_iommu_sysfs(void) 3391static inline void init_iommu_pm_ops(void) {}
3238{
3239 return 0;
3240}
3241#endif /* CONFIG_PM */ 3392#endif /* CONFIG_PM */
3242 3393
3243/* 3394/*
@@ -3260,9 +3411,15 @@ static int device_notifier(struct notifier_block *nb,
3260 if (!domain) 3411 if (!domain)
3261 return 0; 3412 return 0;
3262 3413
3263 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) 3414 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3264 domain_remove_one_dev_info(domain, pdev); 3415 domain_remove_one_dev_info(domain, pdev);
3265 3416
3417 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3418 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3419 list_empty(&domain->devices))
3420 domain_exit(domain);
3421 }
3422
3266 return 0; 3423 return 0;
3267} 3424}
3268 3425
@@ -3273,7 +3430,6 @@ static struct notifier_block device_nb = {
3273int __init intel_iommu_init(void) 3430int __init intel_iommu_init(void)
3274{ 3431{
3275 int ret = 0; 3432 int ret = 0;
3276 int force_on = 0;
3277 3433
3278 /* VT-d is required for a TXT/tboot launch, so enforce that */ 3434 /* VT-d is required for a TXT/tboot launch, so enforce that */
3279 force_on = tboot_force_iommu(); 3435 force_on = tboot_force_iommu();
@@ -3297,8 +3453,17 @@ int __init intel_iommu_init(void)
3297 if (no_iommu || dmar_disabled) 3453 if (no_iommu || dmar_disabled)
3298 return -ENODEV; 3454 return -ENODEV;
3299 3455
3300 iommu_init_mempool(); 3456 if (iommu_init_mempool()) {
3301 dmar_init_reserved_ranges(); 3457 if (force_on)
3458 panic("tboot: Failed to initialize iommu memory\n");
3459 return -ENODEV;
3460 }
3461
3462 if (dmar_init_reserved_ranges()) {
3463 if (force_on)
3464 panic("tboot: Failed to reserve iommu ranges\n");
3465 return -ENODEV;
3466 }
3302 3467
3303 init_no_remapping_devices(); 3468 init_no_remapping_devices();
3304 3469
@@ -3320,7 +3485,7 @@ int __init intel_iommu_init(void)
3320#endif 3485#endif
3321 dma_ops = &intel_dma_ops; 3486 dma_ops = &intel_dma_ops;
3322 3487
3323 init_iommu_sysfs(); 3488 init_iommu_pm_ops();
3324 3489
3325 register_iommu(&intel_iommu_ops); 3490 register_iommu(&intel_iommu_ops);
3326 3491
@@ -3373,8 +3538,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
3373 spin_lock_irqsave(&device_domain_lock, flags); 3538 spin_lock_irqsave(&device_domain_lock, flags);
3374 list_for_each_safe(entry, tmp, &domain->devices) { 3539 list_for_each_safe(entry, tmp, &domain->devices) {
3375 info = list_entry(entry, struct device_domain_info, link); 3540 info = list_entry(entry, struct device_domain_info, link);
3376 /* No need to compare PCI domain; it has to be the same */ 3541 if (info->segment == pci_domain_nr(pdev->bus) &&
3377 if (info->bus == pdev->bus->number && 3542 info->bus == pdev->bus->number &&
3378 info->devfn == pdev->devfn) { 3543 info->devfn == pdev->devfn) {
3379 list_del(&info->link); 3544 list_del(&info->link);
3380 list_del(&info->global); 3545 list_del(&info->global);
@@ -3411,6 +3576,14 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
3411 domain->iommu_count--; 3576 domain->iommu_count--;
3412 domain_update_iommu_cap(domain); 3577 domain_update_iommu_cap(domain);
3413 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); 3578 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3579
3580 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3581 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3582 spin_lock_irqsave(&iommu->lock, tmp_flags);
3583 clear_bit(domain->id, iommu->domain_ids);
3584 iommu->domains[domain->id] = NULL;
3585 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3586 }
3414 } 3587 }
3415 3588
3416 spin_unlock_irqrestore(&device_domain_lock, flags); 3589 spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -3493,6 +3666,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
3493 domain->iommu_count = 0; 3666 domain->iommu_count = 0;
3494 domain->iommu_coherency = 0; 3667 domain->iommu_coherency = 0;
3495 domain->iommu_snooping = 0; 3668 domain->iommu_snooping = 0;
3669 domain->iommu_superpage = 0;
3496 domain->max_addr = 0; 3670 domain->max_addr = 0;
3497 domain->nid = -1; 3671 domain->nid = -1;
3498 3672
@@ -3627,9 +3801,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
3627 3801
3628 pte = dmar_domain->pgd; 3802 pte = dmar_domain->pgd;
3629 if (dma_pte_present(pte)) { 3803 if (dma_pte_present(pte)) {
3630 free_pgtable_page(dmar_domain->pgd);
3631 dmar_domain->pgd = (struct dma_pte *) 3804 dmar_domain->pgd = (struct dma_pte *)
3632 phys_to_virt(dma_pte_addr(pte)); 3805 phys_to_virt(dma_pte_addr(pte));
3806 free_pgtable_page(pte);
3633 } 3807 }
3634 dmar_domain->agaw--; 3808 dmar_domain->agaw--;
3635 } 3809 }
@@ -3708,7 +3882,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3708 struct dma_pte *pte; 3882 struct dma_pte *pte;
3709 u64 phys = 0; 3883 u64 phys = 0;
3710 3884
3711 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); 3885 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
3712 if (pte) 3886 if (pte)
3713 phys = dma_pte_addr(pte); 3887 phys = dma_pte_addr(pte);
3714 3888
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index fd1d2867cdcc..3607faf28a4d 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -46,109 +46,24 @@ static __init int setup_intremap(char *str)
46} 46}
47early_param("intremap", setup_intremap); 47early_param("intremap", setup_intremap);
48 48
49struct irq_2_iommu {
50 struct intel_iommu *iommu;
51 u16 irte_index;
52 u16 sub_handle;
53 u8 irte_mask;
54};
55
56#ifdef CONFIG_GENERIC_HARDIRQS
57static struct irq_2_iommu *get_one_free_irq_2_iommu(int node)
58{
59 struct irq_2_iommu *iommu;
60
61 iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node);
62 printk(KERN_DEBUG "alloc irq_2_iommu on node %d\n", node);
63
64 return iommu;
65}
66
67static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
68{
69 struct irq_desc *desc;
70
71 desc = irq_to_desc(irq);
72
73 if (WARN_ON_ONCE(!desc))
74 return NULL;
75
76 return desc->irq_2_iommu;
77}
78
79static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
80{
81 struct irq_desc *desc;
82 struct irq_2_iommu *irq_iommu;
83
84 desc = irq_to_desc(irq);
85 if (!desc) {
86 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
87 return NULL;
88 }
89
90 irq_iommu = desc->irq_2_iommu;
91
92 if (!irq_iommu)
93 desc->irq_2_iommu = get_one_free_irq_2_iommu(irq_node(irq));
94
95 return desc->irq_2_iommu;
96}
97
98#else /* !CONFIG_SPARSE_IRQ */
99
100static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
101
102static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
103{
104 if (irq < nr_irqs)
105 return &irq_2_iommuX[irq];
106
107 return NULL;
108}
109static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
110{
111 return irq_2_iommu(irq);
112}
113#endif
114
115static DEFINE_SPINLOCK(irq_2_ir_lock); 49static DEFINE_SPINLOCK(irq_2_ir_lock);
116 50
117static struct irq_2_iommu *valid_irq_2_iommu(unsigned int irq) 51static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
118{
119 struct irq_2_iommu *irq_iommu;
120
121 irq_iommu = irq_2_iommu(irq);
122
123 if (!irq_iommu)
124 return NULL;
125
126 if (!irq_iommu->iommu)
127 return NULL;
128
129 return irq_iommu;
130}
131
132int irq_remapped(int irq)
133{ 52{
134 return valid_irq_2_iommu(irq) != NULL; 53 struct irq_cfg *cfg = irq_get_chip_data(irq);
54 return cfg ? &cfg->irq_2_iommu : NULL;
135} 55}
136 56
137int get_irte(int irq, struct irte *entry) 57int get_irte(int irq, struct irte *entry)
138{ 58{
139 int index; 59 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
140 struct irq_2_iommu *irq_iommu;
141 unsigned long flags; 60 unsigned long flags;
61 int index;
142 62
143 if (!entry) 63 if (!entry || !irq_iommu)
144 return -1; 64 return -1;
145 65
146 spin_lock_irqsave(&irq_2_ir_lock, flags); 66 spin_lock_irqsave(&irq_2_ir_lock, flags);
147 irq_iommu = valid_irq_2_iommu(irq);
148 if (!irq_iommu) {
149 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
150 return -1;
151 }
152 67
153 index = irq_iommu->irte_index + irq_iommu->sub_handle; 68 index = irq_iommu->irte_index + irq_iommu->sub_handle;
154 *entry = *(irq_iommu->iommu->ir_table->base + index); 69 *entry = *(irq_iommu->iommu->ir_table->base + index);
@@ -160,20 +75,14 @@ int get_irte(int irq, struct irte *entry)
160int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) 75int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
161{ 76{
162 struct ir_table *table = iommu->ir_table; 77 struct ir_table *table = iommu->ir_table;
163 struct irq_2_iommu *irq_iommu; 78 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
164 u16 index, start_index; 79 u16 index, start_index;
165 unsigned int mask = 0; 80 unsigned int mask = 0;
166 unsigned long flags; 81 unsigned long flags;
167 int i; 82 int i;
168 83
169 if (!count) 84 if (!count || !irq_iommu)
170 return -1;
171
172#ifndef CONFIG_SPARSE_IRQ
173 /* protect irq_2_iommu_alloc later */
174 if (irq >= nr_irqs)
175 return -1; 85 return -1;
176#endif
177 86
178 /* 87 /*
179 * start the IRTE search from index 0. 88 * start the IRTE search from index 0.
@@ -214,13 +123,6 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
214 for (i = index; i < index + count; i++) 123 for (i = index; i < index + count; i++)
215 table->base[i].present = 1; 124 table->base[i].present = 1;
216 125
217 irq_iommu = irq_2_iommu_alloc(irq);
218 if (!irq_iommu) {
219 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
220 printk(KERN_ERR "can't allocate irq_2_iommu\n");
221 return -1;
222 }
223
224 irq_iommu->iommu = iommu; 126 irq_iommu->iommu = iommu;
225 irq_iommu->irte_index = index; 127 irq_iommu->irte_index = index;
226 irq_iommu->sub_handle = 0; 128 irq_iommu->sub_handle = 0;
@@ -244,17 +146,14 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
244 146
245int map_irq_to_irte_handle(int irq, u16 *sub_handle) 147int map_irq_to_irte_handle(int irq, u16 *sub_handle)
246{ 148{
247 int index; 149 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
248 struct irq_2_iommu *irq_iommu;
249 unsigned long flags; 150 unsigned long flags;
151 int index;
250 152
251 spin_lock_irqsave(&irq_2_ir_lock, flags); 153 if (!irq_iommu)
252 irq_iommu = valid_irq_2_iommu(irq);
253 if (!irq_iommu) {
254 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
255 return -1; 154 return -1;
256 }
257 155
156 spin_lock_irqsave(&irq_2_ir_lock, flags);
258 *sub_handle = irq_iommu->sub_handle; 157 *sub_handle = irq_iommu->sub_handle;
259 index = irq_iommu->irte_index; 158 index = irq_iommu->irte_index;
260 spin_unlock_irqrestore(&irq_2_ir_lock, flags); 159 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
@@ -263,18 +162,13 @@ int map_irq_to_irte_handle(int irq, u16 *sub_handle)
263 162
264int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) 163int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
265{ 164{
266 struct irq_2_iommu *irq_iommu; 165 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
267 unsigned long flags; 166 unsigned long flags;
268 167
269 spin_lock_irqsave(&irq_2_ir_lock, flags); 168 if (!irq_iommu)
270
271 irq_iommu = irq_2_iommu_alloc(irq);
272
273 if (!irq_iommu) {
274 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
275 printk(KERN_ERR "can't allocate irq_2_iommu\n");
276 return -1; 169 return -1;
277 } 170
171 spin_lock_irqsave(&irq_2_ir_lock, flags);
278 172
279 irq_iommu->iommu = iommu; 173 irq_iommu->iommu = iommu;
280 irq_iommu->irte_index = index; 174 irq_iommu->irte_index = index;
@@ -286,43 +180,18 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
286 return 0; 180 return 0;
287} 181}
288 182
289int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
290{
291 struct irq_2_iommu *irq_iommu;
292 unsigned long flags;
293
294 spin_lock_irqsave(&irq_2_ir_lock, flags);
295 irq_iommu = valid_irq_2_iommu(irq);
296 if (!irq_iommu) {
297 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
298 return -1;
299 }
300
301 irq_iommu->iommu = NULL;
302 irq_iommu->irte_index = 0;
303 irq_iommu->sub_handle = 0;
304 irq_2_iommu(irq)->irte_mask = 0;
305
306 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
307
308 return 0;
309}
310
311int modify_irte(int irq, struct irte *irte_modified) 183int modify_irte(int irq, struct irte *irte_modified)
312{ 184{
313 int rc; 185 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
314 int index;
315 struct irte *irte;
316 struct intel_iommu *iommu; 186 struct intel_iommu *iommu;
317 struct irq_2_iommu *irq_iommu;
318 unsigned long flags; 187 unsigned long flags;
188 struct irte *irte;
189 int rc, index;
319 190
320 spin_lock_irqsave(&irq_2_ir_lock, flags); 191 if (!irq_iommu)
321 irq_iommu = valid_irq_2_iommu(irq);
322 if (!irq_iommu) {
323 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
324 return -1; 192 return -1;
325 } 193
194 spin_lock_irqsave(&irq_2_ir_lock, flags);
326 195
327 iommu = irq_iommu->iommu; 196 iommu = irq_iommu->iommu;
328 197
@@ -339,31 +208,6 @@ int modify_irte(int irq, struct irte *irte_modified)
339 return rc; 208 return rc;
340} 209}
341 210
342int flush_irte(int irq)
343{
344 int rc;
345 int index;
346 struct intel_iommu *iommu;
347 struct irq_2_iommu *irq_iommu;
348 unsigned long flags;
349
350 spin_lock_irqsave(&irq_2_ir_lock, flags);
351 irq_iommu = valid_irq_2_iommu(irq);
352 if (!irq_iommu) {
353 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
354 return -1;
355 }
356
357 iommu = irq_iommu->iommu;
358
359 index = irq_iommu->irte_index + irq_iommu->sub_handle;
360
361 rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask);
362 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
363
364 return rc;
365}
366
367struct intel_iommu *map_hpet_to_ir(u8 hpet_id) 211struct intel_iommu *map_hpet_to_ir(u8 hpet_id)
368{ 212{
369 int i; 213 int i;
@@ -420,16 +264,14 @@ static int clear_entries(struct irq_2_iommu *irq_iommu)
420 264
421int free_irte(int irq) 265int free_irte(int irq)
422{ 266{
423 int rc = 0; 267 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
424 struct irq_2_iommu *irq_iommu;
425 unsigned long flags; 268 unsigned long flags;
269 int rc;
426 270
427 spin_lock_irqsave(&irq_2_ir_lock, flags); 271 if (!irq_iommu)
428 irq_iommu = valid_irq_2_iommu(irq);
429 if (!irq_iommu) {
430 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
431 return -1; 272 return -1;
432 } 273
274 spin_lock_irqsave(&irq_2_ir_lock, flags);
433 275
434 rc = clear_entries(irq_iommu); 276 rc = clear_entries(irq_iommu);
435 277
@@ -447,7 +289,7 @@ int free_irte(int irq)
447 * source validation type 289 * source validation type
448 */ 290 */
449#define SVT_NO_VERIFY 0x0 /* no verification is required */ 291#define SVT_NO_VERIFY 0x0 /* no verification is required */
450#define SVT_VERIFY_SID_SQ 0x1 /* verify using SID and SQ fiels */ 292#define SVT_VERIFY_SID_SQ 0x1 /* verify using SID and SQ fields */
451#define SVT_VERIFY_BUS 0x2 /* verify bus of request-id */ 293#define SVT_VERIFY_BUS 0x2 /* verify bus of request-id */
452 294
453/* 295/*
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 553d8ee55c1c..42fae4776515 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -13,6 +13,7 @@
13#include <linux/mutex.h> 13#include <linux/mutex.h>
14#include <linux/string.h> 14#include <linux/string.h>
15#include <linux/delay.h> 15#include <linux/delay.h>
16#include <linux/pci-ats.h>
16#include "pci.h" 17#include "pci.h"
17 18
18#define VIRTFN_ID_LEN 16 19#define VIRTFN_ID_LEN 16
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 7914951ef29a..c5c274ab5c5a 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -63,8 +63,16 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
63 curr = iovad->cached32_node; 63 curr = iovad->cached32_node;
64 cached_iova = container_of(curr, struct iova, node); 64 cached_iova = container_of(curr, struct iova, node);
65 65
66 if (free->pfn_lo >= cached_iova->pfn_lo) 66 if (free->pfn_lo >= cached_iova->pfn_lo) {
67 iovad->cached32_node = rb_next(&free->node); 67 struct rb_node *node = rb_next(&free->node);
68 struct iova *iova = container_of(node, struct iova, node);
69
70 /* only cache if it's below 32bit pfn */
71 if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
72 iovad->cached32_node = node;
73 else
74 iovad->cached32_node = NULL;
75 }
68} 76}
69 77
70/* Computes the padding size required, to make the 78/* Computes the padding size required, to make the
@@ -391,7 +399,7 @@ reserve_iova(struct iova_domain *iovad,
391 break; 399 break;
392 } 400 }
393 401
394 /* We are here either becasue this is the first reserver node 402 /* We are here either because this is the first reserver node
395 * or need to insert remaining non overlap addr range 403 * or need to insert remaining non overlap addr range
396 */ 404 */
397 iova = __insert_new_range(iovad, pfn_lo, pfn_hi); 405 iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 69b7be33b3a2..2f10328bf661 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -35,7 +35,12 @@ int arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
35#endif 35#endif
36 36
37#ifndef arch_setup_msi_irqs 37#ifndef arch_setup_msi_irqs
38int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 38# define arch_setup_msi_irqs default_setup_msi_irqs
39# define HAVE_DEFAULT_MSI_SETUP_IRQS
40#endif
41
42#ifdef HAVE_DEFAULT_MSI_SETUP_IRQS
43int default_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
39{ 44{
40 struct msi_desc *entry; 45 struct msi_desc *entry;
41 int ret; 46 int ret;
@@ -60,7 +65,12 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
60#endif 65#endif
61 66
62#ifndef arch_teardown_msi_irqs 67#ifndef arch_teardown_msi_irqs
63void arch_teardown_msi_irqs(struct pci_dev *dev) 68# define arch_teardown_msi_irqs default_teardown_msi_irqs
69# define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
70#endif
71
72#ifdef HAVE_DEFAULT_MSI_TEARDOWN_IRQS
73void default_teardown_msi_irqs(struct pci_dev *dev)
64{ 74{
65 struct msi_desc *entry; 75 struct msi_desc *entry;
66 76
@@ -158,8 +168,9 @@ static u32 __msix_mask_irq(struct msi_desc *desc, u32 flag)
158 u32 mask_bits = desc->masked; 168 u32 mask_bits = desc->masked;
159 unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + 169 unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
160 PCI_MSIX_ENTRY_VECTOR_CTRL; 170 PCI_MSIX_ENTRY_VECTOR_CTRL;
161 mask_bits &= ~1; 171 mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
162 mask_bits |= flag; 172 if (flag)
173 mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
163 writel(mask_bits, desc->mask_base + offset); 174 writel(mask_bits, desc->mask_base + offset);
164 175
165 return mask_bits; 176 return mask_bits;
@@ -170,33 +181,31 @@ static void msix_mask_irq(struct msi_desc *desc, u32 flag)
170 desc->masked = __msix_mask_irq(desc, flag); 181 desc->masked = __msix_mask_irq(desc, flag);
171} 182}
172 183
173static void msi_set_mask_bit(unsigned irq, u32 flag) 184static void msi_set_mask_bit(struct irq_data *data, u32 flag)
174{ 185{
175 struct msi_desc *desc = get_irq_msi(irq); 186 struct msi_desc *desc = irq_data_get_msi(data);
176 187
177 if (desc->msi_attrib.is_msix) { 188 if (desc->msi_attrib.is_msix) {
178 msix_mask_irq(desc, flag); 189 msix_mask_irq(desc, flag);
179 readl(desc->mask_base); /* Flush write to device */ 190 readl(desc->mask_base); /* Flush write to device */
180 } else { 191 } else {
181 unsigned offset = irq - desc->dev->irq; 192 unsigned offset = data->irq - desc->dev->irq;
182 msi_mask_irq(desc, 1 << offset, flag << offset); 193 msi_mask_irq(desc, 1 << offset, flag << offset);
183 } 194 }
184} 195}
185 196
186void mask_msi_irq(unsigned int irq) 197void mask_msi_irq(struct irq_data *data)
187{ 198{
188 msi_set_mask_bit(irq, 1); 199 msi_set_mask_bit(data, 1);
189} 200}
190 201
191void unmask_msi_irq(unsigned int irq) 202void unmask_msi_irq(struct irq_data *data)
192{ 203{
193 msi_set_mask_bit(irq, 0); 204 msi_set_mask_bit(data, 0);
194} 205}
195 206
196void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) 207void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
197{ 208{
198 struct msi_desc *entry = get_irq_desc_msi(desc);
199
200 BUG_ON(entry->dev->current_state != PCI_D0); 209 BUG_ON(entry->dev->current_state != PCI_D0);
201 210
202 if (entry->msi_attrib.is_msix) { 211 if (entry->msi_attrib.is_msix) {
@@ -227,15 +236,13 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
227 236
228void read_msi_msg(unsigned int irq, struct msi_msg *msg) 237void read_msi_msg(unsigned int irq, struct msi_msg *msg)
229{ 238{
230 struct irq_desc *desc = irq_to_desc(irq); 239 struct msi_desc *entry = irq_get_msi_desc(irq);
231 240
232 read_msi_msg_desc(desc, msg); 241 __read_msi_msg(entry, msg);
233} 242}
234 243
235void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) 244void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
236{ 245{
237 struct msi_desc *entry = get_irq_desc_msi(desc);
238
239 /* Assert that the cache is valid, assuming that 246 /* Assert that the cache is valid, assuming that
240 * valid messages are not all-zeroes. */ 247 * valid messages are not all-zeroes. */
241 BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo | 248 BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
@@ -246,15 +253,13 @@ void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
246 253
247void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) 254void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
248{ 255{
249 struct irq_desc *desc = irq_to_desc(irq); 256 struct msi_desc *entry = irq_get_msi_desc(irq);
250 257
251 get_cached_msi_msg_desc(desc, msg); 258 __get_cached_msi_msg(entry, msg);
252} 259}
253 260
254void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) 261void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
255{ 262{
256 struct msi_desc *entry = get_irq_desc_msi(desc);
257
258 if (entry->dev->current_state != PCI_D0) { 263 if (entry->dev->current_state != PCI_D0) {
259 /* Don't touch the hardware now */ 264 /* Don't touch the hardware now */
260 } else if (entry->msi_attrib.is_msix) { 265 } else if (entry->msi_attrib.is_msix) {
@@ -292,9 +297,9 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
292 297
293void write_msi_msg(unsigned int irq, struct msi_msg *msg) 298void write_msi_msg(unsigned int irq, struct msi_msg *msg)
294{ 299{
295 struct irq_desc *desc = irq_to_desc(irq); 300 struct msi_desc *entry = irq_get_msi_desc(irq);
296 301
297 write_msi_msg_desc(desc, msg); 302 __write_msi_msg(entry, msg);
298} 303}
299 304
300static void free_msi_irqs(struct pci_dev *dev) 305static void free_msi_irqs(struct pci_dev *dev)
@@ -349,7 +354,7 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
349 if (!dev->msi_enabled) 354 if (!dev->msi_enabled)
350 return; 355 return;
351 356
352 entry = get_irq_msi(dev->irq); 357 entry = irq_get_msi_desc(dev->irq);
353 pos = entry->msi_attrib.pos; 358 pos = entry->msi_attrib.pos;
354 359
355 pci_intx_for_msi(dev, 0); 360 pci_intx_for_msi(dev, 0);
@@ -514,7 +519,7 @@ static void msix_program_entries(struct pci_dev *dev,
514 PCI_MSIX_ENTRY_VECTOR_CTRL; 519 PCI_MSIX_ENTRY_VECTOR_CTRL;
515 520
516 entries[i].vector = entry->irq; 521 entries[i].vector = entry->irq;
517 set_irq_msi(entry->irq, entry); 522 irq_set_msi_desc(entry->irq, entry);
518 entry->masked = readl(entry->mask_base + offset); 523 entry->masked = readl(entry->mask_base + offset);
519 msix_mask_irq(entry, 1); 524 msix_mask_irq(entry, 1);
520 i++; 525 i++;
diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h
index de27c1cb5a2b..65c42f80f23e 100644
--- a/drivers/pci/msi.h
+++ b/drivers/pci/msi.h
@@ -6,12 +6,6 @@
6#ifndef MSI_H 6#ifndef MSI_H
7#define MSI_H 7#define MSI_H
8 8
9#define PCI_MSIX_ENTRY_SIZE 16
10#define PCI_MSIX_ENTRY_LOWER_ADDR 0
11#define PCI_MSIX_ENTRY_UPPER_ADDR 4
12#define PCI_MSIX_ENTRY_DATA 8
13#define PCI_MSIX_ENTRY_VECTOR_CTRL 12
14
15#define msi_control_reg(base) (base + PCI_MSI_FLAGS) 9#define msi_control_reg(base) (base + PCI_MSI_FLAGS)
16#define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO) 10#define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO)
17#define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI) 11#define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI)
@@ -22,8 +16,8 @@
22#define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) 16#define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT))
23#define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) 17#define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT))
24 18
25#define msix_table_offset_reg(base) (base + 0x04) 19#define msix_table_offset_reg(base) (base + PCI_MSIX_TABLE)
26#define msix_pba_offset_reg(base) (base + 0x08) 20#define msix_pba_offset_reg(base) (base + PCI_MSIX_PBA)
27#define msix_table_size(control) ((control & PCI_MSIX_FLAGS_QSIZE)+1) 21#define msix_table_size(control) ((control & PCI_MSIX_FLAGS_QSIZE)+1)
28#define multi_msix_capable(control) msix_table_size((control)) 22#define multi_msix_capable(control) msix_table_size((control))
29 23
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 24e19c594e57..d36f41ea8cbf 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -46,9 +46,9 @@ static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context)
46 struct pci_dev *pci_dev = context; 46 struct pci_dev *pci_dev = context;
47 47
48 if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_dev) { 48 if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_dev) {
49 pci_wakeup_event(pci_dev);
49 pci_check_pme_status(pci_dev); 50 pci_check_pme_status(pci_dev);
50 pm_runtime_resume(&pci_dev->dev); 51 pm_runtime_resume(&pci_dev->dev);
51 pci_wakeup_event(pci_dev);
52 if (pci_dev->subordinate) 52 if (pci_dev->subordinate)
53 pci_pme_wakeup_bus(pci_dev->subordinate); 53 pci_pme_wakeup_bus(pci_dev->subordinate);
54 } 54 }
@@ -195,6 +195,8 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev)
195 return PCI_D2; 195 return PCI_D2;
196 case ACPI_STATE_D3: 196 case ACPI_STATE_D3:
197 return PCI_D3hot; 197 return PCI_D3hot;
198 case ACPI_STATE_D3_COLD:
199 return PCI_D3cold;
198 } 200 }
199 return PCI_POWER_ERROR; 201 return PCI_POWER_ERROR;
200} 202}
@@ -293,19 +295,11 @@ static int acpi_dev_run_wake(struct device *phys_dev, bool enable)
293 } 295 }
294 296
295 if (enable) { 297 if (enable) {
296 if (!dev->wakeup.run_wake_count++) { 298 acpi_enable_wakeup_device_power(dev, ACPI_STATE_S0);
297 acpi_enable_wakeup_device_power(dev, ACPI_STATE_S0); 299 acpi_enable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number);
298 acpi_enable_gpe(dev->wakeup.gpe_device,
299 dev->wakeup.gpe_number);
300 }
301 } else if (dev->wakeup.run_wake_count > 0) {
302 if (!--dev->wakeup.run_wake_count) {
303 acpi_disable_gpe(dev->wakeup.gpe_device,
304 dev->wakeup.gpe_number);
305 acpi_disable_wakeup_device_power(dev);
306 }
307 } else { 300 } else {
308 error = -EALREADY; 301 acpi_disable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number);
302 acpi_disable_wakeup_device_power(dev);
309 } 303 }
310 304
311 return error; 305 return error;
@@ -399,6 +393,7 @@ static int __init acpi_pci_init(void)
399 393
400 if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) { 394 if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
401 printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n"); 395 printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
396 pcie_clear_aspm();
402 pcie_no_aspm(); 397 pcie_no_aspm();
403 } 398 }
404 399
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 8a6f797de8e5..46767c53917a 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -338,7 +338,7 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
338} 338}
339 339
340/** 340/**
341 * __pci_device_probe() 341 * __pci_device_probe - check if a driver wants to claim a specific PCI device
342 * @drv: driver to call to check if it wants the PCI device 342 * @drv: driver to call to check if it wants the PCI device
343 * @pci_dev: PCI device being probed 343 * @pci_dev: PCI device being probed
344 * 344 *
@@ -431,7 +431,7 @@ static void pci_device_shutdown(struct device *dev)
431 pci_msix_shutdown(pci_dev); 431 pci_msix_shutdown(pci_dev);
432} 432}
433 433
434#ifdef CONFIG_PM_OPS 434#ifdef CONFIG_PM
435 435
436/* Auxiliary functions used for system resume and run-time resume. */ 436/* Auxiliary functions used for system resume and run-time resume. */
437 437
@@ -449,7 +449,8 @@ static int pci_restore_standard_config(struct pci_dev *pci_dev)
449 return error; 449 return error;
450 } 450 }
451 451
452 return pci_restore_state(pci_dev); 452 pci_restore_state(pci_dev);
453 return 0;
453} 454}
454 455
455static void pci_pm_default_resume_early(struct pci_dev *pci_dev) 456static void pci_pm_default_resume_early(struct pci_dev *pci_dev)
@@ -623,7 +624,7 @@ static int pci_pm_prepare(struct device *dev)
623 * system from the sleep state, we'll have to prevent it from signaling 624 * system from the sleep state, we'll have to prevent it from signaling
624 * wake-up. 625 * wake-up.
625 */ 626 */
626 pm_runtime_resume(dev); 627 pm_runtime_get_sync(dev);
627 628
628 if (drv && drv->pm && drv->pm->prepare) 629 if (drv && drv->pm && drv->pm->prepare)
629 error = drv->pm->prepare(dev); 630 error = drv->pm->prepare(dev);
@@ -637,6 +638,8 @@ static void pci_pm_complete(struct device *dev)
637 638
638 if (drv && drv->pm && drv->pm->complete) 639 if (drv && drv->pm && drv->pm->complete)
639 drv->pm->complete(dev); 640 drv->pm->complete(dev);
641
642 pm_runtime_put_sync(dev);
640} 643}
641 644
642#else /* !CONFIG_PM_SLEEP */ 645#else /* !CONFIG_PM_SLEEP */
@@ -780,7 +783,7 @@ static int pci_pm_resume(struct device *dev)
780 783
781#endif /* !CONFIG_SUSPEND */ 784#endif /* !CONFIG_SUSPEND */
782 785
783#ifdef CONFIG_HIBERNATION 786#ifdef CONFIG_HIBERNATE_CALLBACKS
784 787
785static int pci_pm_freeze(struct device *dev) 788static int pci_pm_freeze(struct device *dev)
786{ 789{
@@ -969,7 +972,7 @@ static int pci_pm_restore(struct device *dev)
969 return error; 972 return error;
970} 973}
971 974
972#else /* !CONFIG_HIBERNATION */ 975#else /* !CONFIG_HIBERNATE_CALLBACKS */
973 976
974#define pci_pm_freeze NULL 977#define pci_pm_freeze NULL
975#define pci_pm_freeze_noirq NULL 978#define pci_pm_freeze_noirq NULL
@@ -980,7 +983,7 @@ static int pci_pm_restore(struct device *dev)
980#define pci_pm_restore NULL 983#define pci_pm_restore NULL
981#define pci_pm_restore_noirq NULL 984#define pci_pm_restore_noirq NULL
982 985
983#endif /* !CONFIG_HIBERNATION */ 986#endif /* !CONFIG_HIBERNATE_CALLBACKS */
984 987
985#ifdef CONFIG_PM_RUNTIME 988#ifdef CONFIG_PM_RUNTIME
986 989
@@ -1058,7 +1061,7 @@ static int pci_pm_runtime_idle(struct device *dev)
1058 1061
1059#endif /* !CONFIG_PM_RUNTIME */ 1062#endif /* !CONFIG_PM_RUNTIME */
1060 1063
1061#ifdef CONFIG_PM_OPS 1064#ifdef CONFIG_PM
1062 1065
1063const struct dev_pm_ops pci_dev_pm_ops = { 1066const struct dev_pm_ops pci_dev_pm_ops = {
1064 .prepare = pci_pm_prepare, 1067 .prepare = pci_pm_prepare,
diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
index 90c0a729cd3a..77cb2a14c896 100644
--- a/drivers/pci/pci-label.c
+++ b/drivers/pci/pci-label.c
@@ -5,6 +5,13 @@
5 * by Narendra K <Narendra_K@dell.com>, 5 * by Narendra K <Narendra_K@dell.com>,
6 * Jordan Hargrave <Jordan_Hargrave@dell.com> 6 * Jordan Hargrave <Jordan_Hargrave@dell.com>
7 * 7 *
8 * PCI Firmware Specification Revision 3.1 section 4.6.7 (DSM for Naming a
9 * PCI or PCI Express Device Under Operating Systems) defines an instance
10 * number and string name. This code retrieves them and exports them to sysfs.
11 * If the system firmware does not provide the ACPI _DSM (Device Specific
12 * Method), then the SMBIOS type 41 instance number and string is exported to
13 * sysfs.
14 *
8 * SMBIOS defines type 41 for onboard pci devices. This code retrieves 15 * SMBIOS defines type 41 for onboard pci devices. This code retrieves
9 * the instance number and string from the type 41 record and exports 16 * the instance number and string from the type 41 record and exports
10 * it to sysfs. 17 * it to sysfs.
@@ -19,8 +26,29 @@
19#include <linux/pci_ids.h> 26#include <linux/pci_ids.h>
20#include <linux/module.h> 27#include <linux/module.h>
21#include <linux/device.h> 28#include <linux/device.h>
29#include <linux/nls.h>
30#include <linux/acpi.h>
31#include <linux/pci-acpi.h>
32#include <acpi/acpi_bus.h>
22#include "pci.h" 33#include "pci.h"
23 34
35#define DEVICE_LABEL_DSM 0x07
36
37#ifndef CONFIG_DMI
38
39static inline int
40pci_create_smbiosname_file(struct pci_dev *pdev)
41{
42 return -1;
43}
44
45static inline void
46pci_remove_smbiosname_file(struct pci_dev *pdev)
47{
48}
49
50#else
51
24enum smbios_attr_enum { 52enum smbios_attr_enum {
25 SMBIOS_ATTR_NONE = 0, 53 SMBIOS_ATTR_NONE = 0,
26 SMBIOS_ATTR_LABEL_SHOW, 54 SMBIOS_ATTR_LABEL_SHOW,
@@ -120,9 +148,7 @@ static struct attribute_group smbios_attr_group = {
120static int 148static int
121pci_create_smbiosname_file(struct pci_dev *pdev) 149pci_create_smbiosname_file(struct pci_dev *pdev)
122{ 150{
123 if (!sysfs_create_group(&pdev->dev.kobj, &smbios_attr_group)) 151 return sysfs_create_group(&pdev->dev.kobj, &smbios_attr_group);
124 return 0;
125 return -ENODEV;
126} 152}
127 153
128static void 154static void
@@ -131,13 +157,227 @@ pci_remove_smbiosname_file(struct pci_dev *pdev)
131 sysfs_remove_group(&pdev->dev.kobj, &smbios_attr_group); 157 sysfs_remove_group(&pdev->dev.kobj, &smbios_attr_group);
132} 158}
133 159
160#endif
161
162#ifndef CONFIG_ACPI
163
164static inline int
165pci_create_acpi_index_label_files(struct pci_dev *pdev)
166{
167 return -1;
168}
169
170static inline int
171pci_remove_acpi_index_label_files(struct pci_dev *pdev)
172{
173 return -1;
174}
175
176static inline bool
177device_has_dsm(struct device *dev)
178{
179 return false;
180}
181
182#else
183
184static const char device_label_dsm_uuid[] = {
185 0xD0, 0x37, 0xC9, 0xE5, 0x53, 0x35, 0x7A, 0x4D,
186 0x91, 0x17, 0xEA, 0x4D, 0x19, 0xC3, 0x43, 0x4D
187};
188
189enum acpi_attr_enum {
190 ACPI_ATTR_NONE = 0,
191 ACPI_ATTR_LABEL_SHOW,
192 ACPI_ATTR_INDEX_SHOW,
193};
194
195static void dsm_label_utf16s_to_utf8s(union acpi_object *obj, char *buf)
196{
197 int len;
198 len = utf16s_to_utf8s((const wchar_t *)obj->
199 package.elements[1].string.pointer,
200 obj->package.elements[1].string.length,
201 UTF16_LITTLE_ENDIAN,
202 buf, PAGE_SIZE);
203 buf[len] = '\n';
204}
205
206static int
207dsm_get_label(acpi_handle handle, int func,
208 struct acpi_buffer *output,
209 char *buf, enum acpi_attr_enum attribute)
210{
211 struct acpi_object_list input;
212 union acpi_object params[4];
213 union acpi_object *obj;
214 int len = 0;
215
216 int err;
217
218 input.count = 4;
219 input.pointer = params;
220 params[0].type = ACPI_TYPE_BUFFER;
221 params[0].buffer.length = sizeof(device_label_dsm_uuid);
222 params[0].buffer.pointer = (char *)device_label_dsm_uuid;
223 params[1].type = ACPI_TYPE_INTEGER;
224 params[1].integer.value = 0x02;
225 params[2].type = ACPI_TYPE_INTEGER;
226 params[2].integer.value = func;
227 params[3].type = ACPI_TYPE_PACKAGE;
228 params[3].package.count = 0;
229 params[3].package.elements = NULL;
230
231 err = acpi_evaluate_object(handle, "_DSM", &input, output);
232 if (err)
233 return -1;
234
235 obj = (union acpi_object *)output->pointer;
236
237 switch (obj->type) {
238 case ACPI_TYPE_PACKAGE:
239 if (obj->package.count != 2)
240 break;
241 len = obj->package.elements[0].integer.value;
242 if (buf) {
243 if (attribute == ACPI_ATTR_INDEX_SHOW)
244 scnprintf(buf, PAGE_SIZE, "%llu\n",
245 obj->package.elements[0].integer.value);
246 else if (attribute == ACPI_ATTR_LABEL_SHOW)
247 dsm_label_utf16s_to_utf8s(obj, buf);
248 kfree(output->pointer);
249 return strlen(buf);
250 }
251 kfree(output->pointer);
252 return len;
253 break;
254 default:
255 kfree(output->pointer);
256 }
257 return -1;
258}
259
260static bool
261device_has_dsm(struct device *dev)
262{
263 acpi_handle handle;
264 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
265
266 handle = DEVICE_ACPI_HANDLE(dev);
267
268 if (!handle)
269 return FALSE;
270
271 if (dsm_get_label(handle, DEVICE_LABEL_DSM, &output, NULL,
272 ACPI_ATTR_NONE) > 0)
273 return TRUE;
274
275 return FALSE;
276}
277
278static mode_t
279acpi_index_string_exist(struct kobject *kobj, struct attribute *attr, int n)
280{
281 struct device *dev;
282
283 dev = container_of(kobj, struct device, kobj);
284
285 if (device_has_dsm(dev))
286 return S_IRUGO;
287
288 return 0;
289}
290
291static ssize_t
292acpilabel_show(struct device *dev, struct device_attribute *attr, char *buf)
293{
294 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
295 acpi_handle handle;
296 int length;
297
298 handle = DEVICE_ACPI_HANDLE(dev);
299
300 if (!handle)
301 return -1;
302
303 length = dsm_get_label(handle, DEVICE_LABEL_DSM,
304 &output, buf, ACPI_ATTR_LABEL_SHOW);
305
306 if (length < 1)
307 return -1;
308
309 return length;
310}
311
312static ssize_t
313acpiindex_show(struct device *dev, struct device_attribute *attr, char *buf)
314{
315 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
316 acpi_handle handle;
317 int length;
318
319 handle = DEVICE_ACPI_HANDLE(dev);
320
321 if (!handle)
322 return -1;
323
324 length = dsm_get_label(handle, DEVICE_LABEL_DSM,
325 &output, buf, ACPI_ATTR_INDEX_SHOW);
326
327 if (length < 0)
328 return -1;
329
330 return length;
331
332}
333
334static struct device_attribute acpi_attr_label = {
335 .attr = {.name = "label", .mode = 0444},
336 .show = acpilabel_show,
337};
338
339static struct device_attribute acpi_attr_index = {
340 .attr = {.name = "acpi_index", .mode = 0444},
341 .show = acpiindex_show,
342};
343
344static struct attribute *acpi_attributes[] = {
345 &acpi_attr_label.attr,
346 &acpi_attr_index.attr,
347 NULL,
348};
349
350static struct attribute_group acpi_attr_group = {
351 .attrs = acpi_attributes,
352 .is_visible = acpi_index_string_exist,
353};
354
355static int
356pci_create_acpi_index_label_files(struct pci_dev *pdev)
357{
358 return sysfs_create_group(&pdev->dev.kobj, &acpi_attr_group);
359}
360
361static int
362pci_remove_acpi_index_label_files(struct pci_dev *pdev)
363{
364 sysfs_remove_group(&pdev->dev.kobj, &acpi_attr_group);
365 return 0;
366}
367#endif
368
134void pci_create_firmware_label_files(struct pci_dev *pdev) 369void pci_create_firmware_label_files(struct pci_dev *pdev)
135{ 370{
136 if (!pci_create_smbiosname_file(pdev)) 371 if (device_has_dsm(&pdev->dev))
137 ; 372 pci_create_acpi_index_label_files(pdev);
373 else
374 pci_create_smbiosname_file(pdev);
138} 375}
139 376
140void pci_remove_firmware_label_files(struct pci_dev *pdev) 377void pci_remove_firmware_label_files(struct pci_dev *pdev)
141{ 378{
142 pci_remove_smbiosname_file(pdev); 379 if (device_has_dsm(&pdev->dev))
380 pci_remove_acpi_index_label_files(pdev);
381 else
382 pci_remove_smbiosname_file(pdev);
143} 383}
diff --git a/drivers/pci/pci-stub.c b/drivers/pci/pci-stub.c
index f7b68ca6cc98..775e933c2225 100644
--- a/drivers/pci/pci-stub.c
+++ b/drivers/pci/pci-stub.c
@@ -47,6 +47,10 @@ static int __init pci_stub_init(void)
47 if (rc) 47 if (rc)
48 return rc; 48 return rc;
49 49
50 /* no ids passed actually */
51 if (ids[0] == '\0')
52 return 0;
53
50 /* add ids specified in the module parameter */ 54 /* add ids specified in the module parameter */
51 p = ids; 55 p = ids;
52 while ((id = strsep(&p, ","))) { 56 while ((id = strsep(&p, ","))) {
@@ -54,6 +58,9 @@ static int __init pci_stub_init(void)
54 subdevice = PCI_ANY_ID, class=0, class_mask=0; 58 subdevice = PCI_ANY_ID, class=0, class_mask=0;
55 int fields; 59 int fields;
56 60
61 if (!strlen(id))
62 continue;
63
57 fields = sscanf(id, "%x:%x:%x:%x:%x:%x", 64 fields = sscanf(id, "%x:%x:%x:%x:%x:%x",
58 &vendor, &device, &subvendor, &subdevice, 65 &vendor, &device, &subvendor, &subdevice,
59 &class, &class_mask); 66 &class, &class_mask);
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index b5a7d9bfcb24..7bcf12adced7 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -23,6 +23,7 @@
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/capability.h> 25#include <linux/capability.h>
26#include <linux/security.h>
26#include <linux/pci-aspm.h> 27#include <linux/pci-aspm.h>
27#include <linux/slab.h> 28#include <linux/slab.h>
28#include "pci.h" 29#include "pci.h"
@@ -107,6 +108,40 @@ static ssize_t local_cpulist_show(struct device *dev,
107 return len; 108 return len;
108} 109}
109 110
111/*
112 * PCI Bus Class Devices
113 */
114static ssize_t pci_bus_show_cpuaffinity(struct device *dev,
115 int type,
116 struct device_attribute *attr,
117 char *buf)
118{
119 int ret;
120 const struct cpumask *cpumask;
121
122 cpumask = cpumask_of_pcibus(to_pci_bus(dev));
123 ret = type ?
124 cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask) :
125 cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask);
126 buf[ret++] = '\n';
127 buf[ret] = '\0';
128 return ret;
129}
130
131static inline ssize_t pci_bus_show_cpumaskaffinity(struct device *dev,
132 struct device_attribute *attr,
133 char *buf)
134{
135 return pci_bus_show_cpuaffinity(dev, 0, attr, buf);
136}
137
138static inline ssize_t pci_bus_show_cpulistaffinity(struct device *dev,
139 struct device_attribute *attr,
140 char *buf)
141{
142 return pci_bus_show_cpuaffinity(dev, 1, attr, buf);
143}
144
110/* show resources */ 145/* show resources */
111static ssize_t 146static ssize_t
112resource_show(struct device * dev, struct device_attribute *attr, char * buf) 147resource_show(struct device * dev, struct device_attribute *attr, char * buf)
@@ -317,6 +352,25 @@ remove_store(struct device *dev, struct device_attribute *dummy,
317 count = ret; 352 count = ret;
318 return count; 353 return count;
319} 354}
355
356static ssize_t
357dev_bus_rescan_store(struct device *dev, struct device_attribute *attr,
358 const char *buf, size_t count)
359{
360 unsigned long val;
361 struct pci_bus *bus = to_pci_bus(dev);
362
363 if (strict_strtoul(buf, 0, &val) < 0)
364 return -EINVAL;
365
366 if (val) {
367 mutex_lock(&pci_remove_rescan_mutex);
368 pci_rescan_bus(bus);
369 mutex_unlock(&pci_remove_rescan_mutex);
370 }
371 return count;
372}
373
320#endif 374#endif
321 375
322struct device_attribute pci_dev_attrs[] = { 376struct device_attribute pci_dev_attrs[] = {
@@ -346,6 +400,15 @@ struct device_attribute pci_dev_attrs[] = {
346 __ATTR_NULL, 400 __ATTR_NULL,
347}; 401};
348 402
403struct device_attribute pcibus_dev_attrs[] = {
404#ifdef CONFIG_HOTPLUG
405 __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_bus_rescan_store),
406#endif
407 __ATTR(cpuaffinity, S_IRUGO, pci_bus_show_cpumaskaffinity, NULL),
408 __ATTR(cpulistaffinity, S_IRUGO, pci_bus_show_cpulistaffinity, NULL),
409 __ATTR_NULL,
410};
411
349static ssize_t 412static ssize_t
350boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf) 413boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf)
351{ 414{
@@ -368,7 +431,7 @@ pci_read_config(struct file *filp, struct kobject *kobj,
368 u8 *data = (u8*) buf; 431 u8 *data = (u8*) buf;
369 432
370 /* Several chips lock up trying to read undefined config space */ 433 /* Several chips lock up trying to read undefined config space */
371 if (cap_raised(filp->f_cred->cap_effective, CAP_SYS_ADMIN)) { 434 if (security_capable(&init_user_ns, filp->f_cred, CAP_SYS_ADMIN) == 0) {
372 size = dev->cfg_size; 435 size = dev->cfg_size;
373 } else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) { 436 } else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
374 size = 128; 437 size = 128;
@@ -644,7 +707,7 @@ pci_adjust_legacy_attr(struct pci_bus *b, enum pci_mmap_state mmap_type)
644 * a per-bus basis. This routine creates the files and ties them into 707 * a per-bus basis. This routine creates the files and ties them into
645 * their associated read, write and mmap files from pci-sysfs.c 708 * their associated read, write and mmap files from pci-sysfs.c
646 * 709 *
647 * On error unwind, but don't propogate the error to the caller 710 * On error unwind, but don't propagate the error to the caller
648 * as it is ok to set up the PCI bus without these files. 711 * as it is ok to set up the PCI bus without these files.
649 */ 712 */
650void pci_create_legacy_files(struct pci_bus *b) 713void pci_create_legacy_files(struct pci_bus *b)
@@ -705,17 +768,21 @@ void pci_remove_legacy_files(struct pci_bus *b)
705 768
706#ifdef HAVE_PCI_MMAP 769#ifdef HAVE_PCI_MMAP
707 770
708int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma) 771int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma,
772 enum pci_mmap_api mmap_api)
709{ 773{
710 unsigned long nr, start, size; 774 unsigned long nr, start, size, pci_start;
711 775
776 if (pci_resource_len(pdev, resno) == 0)
777 return 0;
712 nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 778 nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
713 start = vma->vm_pgoff; 779 start = vma->vm_pgoff;
714 size = ((pci_resource_len(pdev, resno) - 1) >> PAGE_SHIFT) + 1; 780 size = ((pci_resource_len(pdev, resno) - 1) >> PAGE_SHIFT) + 1;
715 if (start < size && size - start >= nr) 781 pci_start = (mmap_api == PCI_MMAP_PROCFS) ?
782 pci_resource_start(pdev, resno) >> PAGE_SHIFT : 0;
783 if (start >= pci_start && start < pci_start + size &&
784 start + nr <= pci_start + size)
716 return 1; 785 return 1;
717 WARN(1, "process \"%s\" tried to map 0x%08lx-0x%08lx on %s BAR %d (size 0x%08lx)\n",
718 current->comm, start, start+nr, pci_name(pdev), resno, size);
719 return 0; 786 return 0;
720} 787}
721 788
@@ -745,8 +812,15 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
745 if (i >= PCI_ROM_RESOURCE) 812 if (i >= PCI_ROM_RESOURCE)
746 return -ENODEV; 813 return -ENODEV;
747 814
748 if (!pci_mmap_fits(pdev, i, vma)) 815 if (!pci_mmap_fits(pdev, i, vma, PCI_MMAP_SYSFS)) {
816 WARN(1, "process \"%s\" tried to map 0x%08lx bytes "
817 "at page 0x%08lx on %s BAR %d (start 0x%16Lx, size 0x%16Lx)\n",
818 current->comm, vma->vm_end-vma->vm_start, vma->vm_pgoff,
819 pci_name(pdev), i,
820 (u64)pci_resource_start(pdev, i),
821 (u64)pci_resource_len(pdev, i));
749 return -EINVAL; 822 return -EINVAL;
823 }
750 824
751 /* pci_mmap_page_range() expects the same kind of entry as coming 825 /* pci_mmap_page_range() expects the same kind of entry as coming
752 * from /proc/bus/pci/ which is a "user visible" value. If this is 826 * from /proc/bus/pci/ which is a "user visible" value. If this is
@@ -1076,7 +1150,7 @@ static int pci_create_capabilities_sysfs(struct pci_dev *dev)
1076 attr->write = write_vpd_attr; 1150 attr->write = write_vpd_attr;
1077 retval = sysfs_create_bin_file(&dev->dev.kobj, attr); 1151 retval = sysfs_create_bin_file(&dev->dev.kobj, attr);
1078 if (retval) { 1152 if (retval) {
1079 kfree(dev->vpd->attr); 1153 kfree(attr);
1080 return retval; 1154 return retval;
1081 } 1155 }
1082 dev->vpd->attr = attr; 1156 dev->vpd->attr = attr;
@@ -1138,7 +1212,7 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
1138 sysfs_bin_attr_init(attr); 1212 sysfs_bin_attr_init(attr);
1139 attr->size = rom_size; 1213 attr->size = rom_size;
1140 attr->attr.name = "rom"; 1214 attr->attr.name = "rom";
1141 attr->attr.mode = S_IRUSR; 1215 attr->attr.mode = S_IRUSR | S_IWUSR;
1142 attr->read = pci_read_rom; 1216 attr->read = pci_read_rom;
1143 attr->write = pci_write_rom; 1217 attr->write = pci_write_rom;
1144 retval = sysfs_create_bin_file(&pdev->dev.kobj, attr); 1218 retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7fa3cbd742c5..692671b11667 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -38,6 +38,19 @@ EXPORT_SYMBOL(pci_pci_problems);
38 38
39unsigned int pci_pm_d3_delay; 39unsigned int pci_pm_d3_delay;
40 40
41static void pci_pme_list_scan(struct work_struct *work);
42
43static LIST_HEAD(pci_pme_list);
44static DEFINE_MUTEX(pci_pme_list_mutex);
45static DECLARE_DELAYED_WORK(pci_pme_work, pci_pme_list_scan);
46
47struct pci_pme_device {
48 struct list_head list;
49 struct pci_dev *dev;
50};
51
52#define PME_TIMEOUT 1000 /* How long between PME checks */
53
41static void pci_dev_d3_sleep(struct pci_dev *dev) 54static void pci_dev_d3_sleep(struct pci_dev *dev)
42{ 55{
43 unsigned int delay = dev->d3_delay; 56 unsigned int delay = dev->d3_delay;
@@ -727,6 +740,12 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
727 740
728 if (!__pci_complete_power_transition(dev, state)) 741 if (!__pci_complete_power_transition(dev, state))
729 error = 0; 742 error = 0;
743 /*
744 * When aspm_policy is "powersave" this call ensures
745 * that ASPM is configured.
746 */
747 if (!error && dev->bus->self)
748 pcie_aspm_powersave_config_link(dev->bus->self);
730 749
731 return error; 750 return error;
732} 751}
@@ -811,7 +830,7 @@ static int pci_save_pcie_state(struct pci_dev *dev)
811 dev_err(&dev->dev, "buffer not found in %s\n", __func__); 830 dev_err(&dev->dev, "buffer not found in %s\n", __func__);
812 return -ENOMEM; 831 return -ENOMEM;
813 } 832 }
814 cap = (u16 *)&save_state->data[0]; 833 cap = (u16 *)&save_state->cap.data[0];
815 834
816 pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags); 835 pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags);
817 836
@@ -844,7 +863,7 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
844 pos = pci_find_capability(dev, PCI_CAP_ID_EXP); 863 pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
845 if (!save_state || pos <= 0) 864 if (!save_state || pos <= 0)
846 return; 865 return;
847 cap = (u16 *)&save_state->data[0]; 866 cap = (u16 *)&save_state->cap.data[0];
848 867
849 pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags); 868 pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags);
850 869
@@ -880,7 +899,8 @@ static int pci_save_pcix_state(struct pci_dev *dev)
880 return -ENOMEM; 899 return -ENOMEM;
881 } 900 }
882 901
883 pci_read_config_word(dev, pos + PCI_X_CMD, (u16 *)save_state->data); 902 pci_read_config_word(dev, pos + PCI_X_CMD,
903 (u16 *)save_state->cap.data);
884 904
885 return 0; 905 return 0;
886} 906}
@@ -895,7 +915,7 @@ static void pci_restore_pcix_state(struct pci_dev *dev)
895 pos = pci_find_capability(dev, PCI_CAP_ID_PCIX); 915 pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
896 if (!save_state || pos <= 0) 916 if (!save_state || pos <= 0)
897 return; 917 return;
898 cap = (u16 *)&save_state->data[0]; 918 cap = (u16 *)&save_state->cap.data[0];
899 919
900 pci_write_config_word(dev, pos + PCI_X_CMD, cap[i++]); 920 pci_write_config_word(dev, pos + PCI_X_CMD, cap[i++]);
901} 921}
@@ -924,14 +944,13 @@ pci_save_state(struct pci_dev *dev)
924 * pci_restore_state - Restore the saved state of a PCI device 944 * pci_restore_state - Restore the saved state of a PCI device
925 * @dev: - PCI device that we're dealing with 945 * @dev: - PCI device that we're dealing with
926 */ 946 */
927int 947void pci_restore_state(struct pci_dev *dev)
928pci_restore_state(struct pci_dev *dev)
929{ 948{
930 int i; 949 int i;
931 u32 val; 950 u32 val;
932 951
933 if (!dev->state_saved) 952 if (!dev->state_saved)
934 return 0; 953 return;
935 954
936 /* PCI Express register must be restored first */ 955 /* PCI Express register must be restored first */
937 pci_restore_pcie_state(dev); 956 pci_restore_pcie_state(dev);
@@ -955,9 +974,105 @@ pci_restore_state(struct pci_dev *dev)
955 pci_restore_iov_state(dev); 974 pci_restore_iov_state(dev);
956 975
957 dev->state_saved = false; 976 dev->state_saved = false;
977}
978
979struct pci_saved_state {
980 u32 config_space[16];
981 struct pci_cap_saved_data cap[0];
982};
983
984/**
985 * pci_store_saved_state - Allocate and return an opaque struct containing
986 * the device saved state.
987 * @dev: PCI device that we're dealing with
988 *
989 * Rerturn NULL if no state or error.
990 */
991struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev)
992{
993 struct pci_saved_state *state;
994 struct pci_cap_saved_state *tmp;
995 struct pci_cap_saved_data *cap;
996 struct hlist_node *pos;
997 size_t size;
998
999 if (!dev->state_saved)
1000 return NULL;
1001
1002 size = sizeof(*state) + sizeof(struct pci_cap_saved_data);
1003
1004 hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next)
1005 size += sizeof(struct pci_cap_saved_data) + tmp->cap.size;
1006
1007 state = kzalloc(size, GFP_KERNEL);
1008 if (!state)
1009 return NULL;
1010
1011 memcpy(state->config_space, dev->saved_config_space,
1012 sizeof(state->config_space));
1013
1014 cap = state->cap;
1015 hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) {
1016 size_t len = sizeof(struct pci_cap_saved_data) + tmp->cap.size;
1017 memcpy(cap, &tmp->cap, len);
1018 cap = (struct pci_cap_saved_data *)((u8 *)cap + len);
1019 }
1020 /* Empty cap_save terminates list */
1021
1022 return state;
1023}
1024EXPORT_SYMBOL_GPL(pci_store_saved_state);
1025
1026/**
1027 * pci_load_saved_state - Reload the provided save state into struct pci_dev.
1028 * @dev: PCI device that we're dealing with
1029 * @state: Saved state returned from pci_store_saved_state()
1030 */
1031int pci_load_saved_state(struct pci_dev *dev, struct pci_saved_state *state)
1032{
1033 struct pci_cap_saved_data *cap;
1034
1035 dev->state_saved = false;
1036
1037 if (!state)
1038 return 0;
1039
1040 memcpy(dev->saved_config_space, state->config_space,
1041 sizeof(state->config_space));
1042
1043 cap = state->cap;
1044 while (cap->size) {
1045 struct pci_cap_saved_state *tmp;
1046
1047 tmp = pci_find_saved_cap(dev, cap->cap_nr);
1048 if (!tmp || tmp->cap.size != cap->size)
1049 return -EINVAL;
958 1050
1051 memcpy(tmp->cap.data, cap->data, tmp->cap.size);
1052 cap = (struct pci_cap_saved_data *)((u8 *)cap +
1053 sizeof(struct pci_cap_saved_data) + cap->size);
1054 }
1055
1056 dev->state_saved = true;
959 return 0; 1057 return 0;
960} 1058}
1059EXPORT_SYMBOL_GPL(pci_load_saved_state);
1060
1061/**
1062 * pci_load_and_free_saved_state - Reload the save state pointed to by state,
1063 * and free the memory allocated for it.
1064 * @dev: PCI device that we're dealing with
1065 * @state: Pointer to saved state returned from pci_store_saved_state()
1066 */
1067int pci_load_and_free_saved_state(struct pci_dev *dev,
1068 struct pci_saved_state **state)
1069{
1070 int ret = pci_load_saved_state(dev, *state);
1071 kfree(*state);
1072 *state = NULL;
1073 return ret;
1074}
1075EXPORT_SYMBOL_GPL(pci_load_and_free_saved_state);
961 1076
962static int do_pci_enable_device(struct pci_dev *dev, int bars) 1077static int do_pci_enable_device(struct pci_dev *dev, int bars)
963{ 1078{
@@ -994,6 +1109,18 @@ static int __pci_enable_device_flags(struct pci_dev *dev,
994 int err; 1109 int err;
995 int i, bars = 0; 1110 int i, bars = 0;
996 1111
1112 /*
1113 * Power state could be unknown at this point, either due to a fresh
1114 * boot or a device removal call. So get the current power state
1115 * so that things like MSI message writing will behave as expected
1116 * (e.g. if the device really is in D0 at enable time).
1117 */
1118 if (dev->pm_cap) {
1119 u16 pmcsr;
1120 pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
1121 dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
1122 }
1123
997 if (atomic_add_return(1, &dev->enable_cnt) > 1) 1124 if (atomic_add_return(1, &dev->enable_cnt) > 1)
998 return 0; /* already enabled */ 1125 return 0; /* already enabled */
999 1126
@@ -1275,22 +1402,6 @@ bool pci_check_pme_status(struct pci_dev *dev)
1275 return ret; 1402 return ret;
1276} 1403}
1277 1404
1278/*
1279 * Time to wait before the system can be put into a sleep state after reporting
1280 * a wakeup event signaled by a PCI device.
1281 */
1282#define PCI_WAKEUP_COOLDOWN 100
1283
1284/**
1285 * pci_wakeup_event - Report a wakeup event related to a given PCI device.
1286 * @dev: Device to report the wakeup event for.
1287 */
1288void pci_wakeup_event(struct pci_dev *dev)
1289{
1290 if (device_may_wakeup(&dev->dev))
1291 pm_wakeup_event(&dev->dev, PCI_WAKEUP_COOLDOWN);
1292}
1293
1294/** 1405/**
1295 * pci_pme_wakeup - Wake up a PCI device if its PME Status bit is set. 1406 * pci_pme_wakeup - Wake up a PCI device if its PME Status bit is set.
1296 * @dev: Device to handle. 1407 * @dev: Device to handle.
@@ -1302,8 +1413,8 @@ void pci_wakeup_event(struct pci_dev *dev)
1302static int pci_pme_wakeup(struct pci_dev *dev, void *ign) 1413static int pci_pme_wakeup(struct pci_dev *dev, void *ign)
1303{ 1414{
1304 if (pci_check_pme_status(dev)) { 1415 if (pci_check_pme_status(dev)) {
1305 pm_request_resume(&dev->dev);
1306 pci_wakeup_event(dev); 1416 pci_wakeup_event(dev);
1417 pm_request_resume(&dev->dev);
1307 } 1418 }
1308 return 0; 1419 return 0;
1309} 1420}
@@ -1331,6 +1442,32 @@ bool pci_pme_capable(struct pci_dev *dev, pci_power_t state)
1331 return !!(dev->pme_support & (1 << state)); 1442 return !!(dev->pme_support & (1 << state));
1332} 1443}
1333 1444
1445static void pci_pme_list_scan(struct work_struct *work)
1446{
1447 struct pci_pme_device *pme_dev;
1448
1449 mutex_lock(&pci_pme_list_mutex);
1450 if (!list_empty(&pci_pme_list)) {
1451 list_for_each_entry(pme_dev, &pci_pme_list, list)
1452 pci_pme_wakeup(pme_dev->dev, NULL);
1453 schedule_delayed_work(&pci_pme_work, msecs_to_jiffies(PME_TIMEOUT));
1454 }
1455 mutex_unlock(&pci_pme_list_mutex);
1456}
1457
1458/**
1459 * pci_external_pme - is a device an external PCI PME source?
1460 * @dev: PCI device to check
1461 *
1462 */
1463
1464static bool pci_external_pme(struct pci_dev *dev)
1465{
1466 if (pci_is_pcie(dev) || dev->bus->number == 0)
1467 return false;
1468 return true;
1469}
1470
1334/** 1471/**
1335 * pci_pme_active - enable or disable PCI device's PME# function 1472 * pci_pme_active - enable or disable PCI device's PME# function
1336 * @dev: PCI device to handle. 1473 * @dev: PCI device to handle.
@@ -1354,6 +1491,44 @@ void pci_pme_active(struct pci_dev *dev, bool enable)
1354 1491
1355 pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr); 1492 pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr);
1356 1493
1494 /* PCI (as opposed to PCIe) PME requires that the device have
1495 its PME# line hooked up correctly. Not all hardware vendors
1496 do this, so the PME never gets delivered and the device
1497 remains asleep. The easiest way around this is to
1498 periodically walk the list of suspended devices and check
1499 whether any have their PME flag set. The assumption is that
1500 we'll wake up often enough anyway that this won't be a huge
1501 hit, and the power savings from the devices will still be a
1502 win. */
1503
1504 if (pci_external_pme(dev)) {
1505 struct pci_pme_device *pme_dev;
1506 if (enable) {
1507 pme_dev = kmalloc(sizeof(struct pci_pme_device),
1508 GFP_KERNEL);
1509 if (!pme_dev)
1510 goto out;
1511 pme_dev->dev = dev;
1512 mutex_lock(&pci_pme_list_mutex);
1513 list_add(&pme_dev->list, &pci_pme_list);
1514 if (list_is_singular(&pci_pme_list))
1515 schedule_delayed_work(&pci_pme_work,
1516 msecs_to_jiffies(PME_TIMEOUT));
1517 mutex_unlock(&pci_pme_list_mutex);
1518 } else {
1519 mutex_lock(&pci_pme_list_mutex);
1520 list_for_each_entry(pme_dev, &pci_pme_list, list) {
1521 if (pme_dev->dev == dev) {
1522 list_del(&pme_dev->list);
1523 kfree(pme_dev);
1524 break;
1525 }
1526 }
1527 mutex_unlock(&pci_pme_list_mutex);
1528 }
1529 }
1530
1531out:
1357 dev_printk(KERN_DEBUG, &dev->dev, "PME# %s\n", 1532 dev_printk(KERN_DEBUG, &dev->dev, "PME# %s\n",
1358 enable ? "enabled" : "disabled"); 1533 enable ? "enabled" : "disabled");
1359} 1534}
@@ -1695,7 +1870,8 @@ static int pci_add_cap_save_buffer(
1695 if (!save_state) 1870 if (!save_state)
1696 return -ENOMEM; 1871 return -ENOMEM;
1697 1872
1698 save_state->cap_nr = cap; 1873 save_state->cap.cap_nr = cap;
1874 save_state->cap.size = size;
1699 pci_add_saved_cap(dev, save_state); 1875 pci_add_saved_cap(dev, save_state);
1700 1876
1701 return 0; 1877 return 0;
@@ -1758,6 +1934,300 @@ void pci_enable_ari(struct pci_dev *dev)
1758 bridge->ari_enabled = 1; 1934 bridge->ari_enabled = 1;
1759} 1935}
1760 1936
1937/**
1938 * pci_enable_ido - enable ID-based ordering on a device
1939 * @dev: the PCI device
1940 * @type: which types of IDO to enable
1941 *
1942 * Enable ID-based ordering on @dev. @type can contain the bits
1943 * %PCI_EXP_IDO_REQUEST and/or %PCI_EXP_IDO_COMPLETION to indicate
1944 * which types of transactions are allowed to be re-ordered.
1945 */
1946void pci_enable_ido(struct pci_dev *dev, unsigned long type)
1947{
1948 int pos;
1949 u16 ctrl;
1950
1951 pos = pci_pcie_cap(dev);
1952 if (!pos)
1953 return;
1954
1955 pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
1956 if (type & PCI_EXP_IDO_REQUEST)
1957 ctrl |= PCI_EXP_IDO_REQ_EN;
1958 if (type & PCI_EXP_IDO_COMPLETION)
1959 ctrl |= PCI_EXP_IDO_CMP_EN;
1960 pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
1961}
1962EXPORT_SYMBOL(pci_enable_ido);
1963
1964/**
1965 * pci_disable_ido - disable ID-based ordering on a device
1966 * @dev: the PCI device
1967 * @type: which types of IDO to disable
1968 */
1969void pci_disable_ido(struct pci_dev *dev, unsigned long type)
1970{
1971 int pos;
1972 u16 ctrl;
1973
1974 if (!pci_is_pcie(dev))
1975 return;
1976
1977 pos = pci_pcie_cap(dev);
1978 if (!pos)
1979 return;
1980
1981 pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
1982 if (type & PCI_EXP_IDO_REQUEST)
1983 ctrl &= ~PCI_EXP_IDO_REQ_EN;
1984 if (type & PCI_EXP_IDO_COMPLETION)
1985 ctrl &= ~PCI_EXP_IDO_CMP_EN;
1986 pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
1987}
1988EXPORT_SYMBOL(pci_disable_ido);
1989
1990/**
1991 * pci_enable_obff - enable optimized buffer flush/fill
1992 * @dev: PCI device
1993 * @type: type of signaling to use
1994 *
1995 * Try to enable @type OBFF signaling on @dev. It will try using WAKE#
1996 * signaling if possible, falling back to message signaling only if
1997 * WAKE# isn't supported. @type should indicate whether the PCIe link
1998 * be brought out of L0s or L1 to send the message. It should be either
1999 * %PCI_EXP_OBFF_SIGNAL_ALWAYS or %PCI_OBFF_SIGNAL_L0.
2000 *
2001 * If your device can benefit from receiving all messages, even at the
2002 * power cost of bringing the link back up from a low power state, use
2003 * %PCI_EXP_OBFF_SIGNAL_ALWAYS. Otherwise, use %PCI_OBFF_SIGNAL_L0 (the
2004 * preferred type).
2005 *
2006 * RETURNS:
2007 * Zero on success, appropriate error number on failure.
2008 */
2009int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type type)
2010{
2011 int pos;
2012 u32 cap;
2013 u16 ctrl;
2014 int ret;
2015
2016 if (!pci_is_pcie(dev))
2017 return -ENOTSUPP;
2018
2019 pos = pci_pcie_cap(dev);
2020 if (!pos)
2021 return -ENOTSUPP;
2022
2023 pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap);
2024 if (!(cap & PCI_EXP_OBFF_MASK))
2025 return -ENOTSUPP; /* no OBFF support at all */
2026
2027 /* Make sure the topology supports OBFF as well */
2028 if (dev->bus) {
2029 ret = pci_enable_obff(dev->bus->self, type);
2030 if (ret)
2031 return ret;
2032 }
2033
2034 pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
2035 if (cap & PCI_EXP_OBFF_WAKE)
2036 ctrl |= PCI_EXP_OBFF_WAKE_EN;
2037 else {
2038 switch (type) {
2039 case PCI_EXP_OBFF_SIGNAL_L0:
2040 if (!(ctrl & PCI_EXP_OBFF_WAKE_EN))
2041 ctrl |= PCI_EXP_OBFF_MSGA_EN;
2042 break;
2043 case PCI_EXP_OBFF_SIGNAL_ALWAYS:
2044 ctrl &= ~PCI_EXP_OBFF_WAKE_EN;
2045 ctrl |= PCI_EXP_OBFF_MSGB_EN;
2046 break;
2047 default:
2048 WARN(1, "bad OBFF signal type\n");
2049 return -ENOTSUPP;
2050 }
2051 }
2052 pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
2053
2054 return 0;
2055}
2056EXPORT_SYMBOL(pci_enable_obff);
2057
2058/**
2059 * pci_disable_obff - disable optimized buffer flush/fill
2060 * @dev: PCI device
2061 *
2062 * Disable OBFF on @dev.
2063 */
2064void pci_disable_obff(struct pci_dev *dev)
2065{
2066 int pos;
2067 u16 ctrl;
2068
2069 if (!pci_is_pcie(dev))
2070 return;
2071
2072 pos = pci_pcie_cap(dev);
2073 if (!pos)
2074 return;
2075
2076 pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
2077 ctrl &= ~PCI_EXP_OBFF_WAKE_EN;
2078 pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
2079}
2080EXPORT_SYMBOL(pci_disable_obff);
2081
2082/**
2083 * pci_ltr_supported - check whether a device supports LTR
2084 * @dev: PCI device
2085 *
2086 * RETURNS:
2087 * True if @dev supports latency tolerance reporting, false otherwise.
2088 */
2089bool pci_ltr_supported(struct pci_dev *dev)
2090{
2091 int pos;
2092 u32 cap;
2093
2094 if (!pci_is_pcie(dev))
2095 return false;
2096
2097 pos = pci_pcie_cap(dev);
2098 if (!pos)
2099 return false;
2100
2101 pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap);
2102
2103 return cap & PCI_EXP_DEVCAP2_LTR;
2104}
2105EXPORT_SYMBOL(pci_ltr_supported);
2106
2107/**
2108 * pci_enable_ltr - enable latency tolerance reporting
2109 * @dev: PCI device
2110 *
2111 * Enable LTR on @dev if possible, which means enabling it first on
2112 * upstream ports.
2113 *
2114 * RETURNS:
2115 * Zero on success, errno on failure.
2116 */
2117int pci_enable_ltr(struct pci_dev *dev)
2118{
2119 int pos;
2120 u16 ctrl;
2121 int ret;
2122
2123 if (!pci_ltr_supported(dev))
2124 return -ENOTSUPP;
2125
2126 pos = pci_pcie_cap(dev);
2127 if (!pos)
2128 return -ENOTSUPP;
2129
2130 /* Only primary function can enable/disable LTR */
2131 if (PCI_FUNC(dev->devfn) != 0)
2132 return -EINVAL;
2133
2134 /* Enable upstream ports first */
2135 if (dev->bus) {
2136 ret = pci_enable_ltr(dev->bus->self);
2137 if (ret)
2138 return ret;
2139 }
2140
2141 pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
2142 ctrl |= PCI_EXP_LTR_EN;
2143 pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
2144
2145 return 0;
2146}
2147EXPORT_SYMBOL(pci_enable_ltr);
2148
2149/**
2150 * pci_disable_ltr - disable latency tolerance reporting
2151 * @dev: PCI device
2152 */
2153void pci_disable_ltr(struct pci_dev *dev)
2154{
2155 int pos;
2156 u16 ctrl;
2157
2158 if (!pci_ltr_supported(dev))
2159 return;
2160
2161 pos = pci_pcie_cap(dev);
2162 if (!pos)
2163 return;
2164
2165 /* Only primary function can enable/disable LTR */
2166 if (PCI_FUNC(dev->devfn) != 0)
2167 return;
2168
2169 pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
2170 ctrl &= ~PCI_EXP_LTR_EN;
2171 pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
2172}
2173EXPORT_SYMBOL(pci_disable_ltr);
2174
2175static int __pci_ltr_scale(int *val)
2176{
2177 int scale = 0;
2178
2179 while (*val > 1023) {
2180 *val = (*val + 31) / 32;
2181 scale++;
2182 }
2183 return scale;
2184}
2185
2186/**
2187 * pci_set_ltr - set LTR latency values
2188 * @dev: PCI device
2189 * @snoop_lat_ns: snoop latency in nanoseconds
2190 * @nosnoop_lat_ns: nosnoop latency in nanoseconds
2191 *
2192 * Figure out the scale and set the LTR values accordingly.
2193 */
2194int pci_set_ltr(struct pci_dev *dev, int snoop_lat_ns, int nosnoop_lat_ns)
2195{
2196 int pos, ret, snoop_scale, nosnoop_scale;
2197 u16 val;
2198
2199 if (!pci_ltr_supported(dev))
2200 return -ENOTSUPP;
2201
2202 snoop_scale = __pci_ltr_scale(&snoop_lat_ns);
2203 nosnoop_scale = __pci_ltr_scale(&nosnoop_lat_ns);
2204
2205 if (snoop_lat_ns > PCI_LTR_VALUE_MASK ||
2206 nosnoop_lat_ns > PCI_LTR_VALUE_MASK)
2207 return -EINVAL;
2208
2209 if ((snoop_scale > (PCI_LTR_SCALE_MASK >> PCI_LTR_SCALE_SHIFT)) ||
2210 (nosnoop_scale > (PCI_LTR_SCALE_MASK >> PCI_LTR_SCALE_SHIFT)))
2211 return -EINVAL;
2212
2213 pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_LTR);
2214 if (!pos)
2215 return -ENOTSUPP;
2216
2217 val = (snoop_scale << PCI_LTR_SCALE_SHIFT) | snoop_lat_ns;
2218 ret = pci_write_config_word(dev, pos + PCI_LTR_MAX_SNOOP_LAT, val);
2219 if (ret != 4)
2220 return -EIO;
2221
2222 val = (nosnoop_scale << PCI_LTR_SCALE_SHIFT) | nosnoop_lat_ns;
2223 ret = pci_write_config_word(dev, pos + PCI_LTR_MAX_NOSNOOP_LAT, val);
2224 if (ret != 4)
2225 return -EIO;
2226
2227 return 0;
2228}
2229EXPORT_SYMBOL(pci_set_ltr);
2230
1761static int pci_acs_enable; 2231static int pci_acs_enable;
1762 2232
1763/** 2233/**
@@ -2403,6 +2873,21 @@ clear:
2403 return 0; 2873 return 0;
2404} 2874}
2405 2875
2876/**
2877 * pci_pm_reset - Put device into PCI_D3 and back into PCI_D0.
2878 * @dev: Device to reset.
2879 * @probe: If set, only check if the device can be reset this way.
2880 *
2881 * If @dev supports native PCI PM and its PCI_PM_CTRL_NO_SOFT_RESET flag is
2882 * unset, it will be reinitialized internally when going from PCI_D3hot to
2883 * PCI_D0. If that's the case and the device is not in a low-power state
2884 * already, force it into PCI_D3hot and back to PCI_D0, causing it to be reset.
2885 *
2886 * NOTE: This causes the caller to sleep for twice the device power transition
2887 * cooldown period, which for the D0->D3hot and D3hot->D0 transitions is 10 ms
2888 * by devault (i.e. unless the @dev's d3_delay field has a different value).
2889 * Moreover, only devices in D0 can be reset by this function.
2890 */
2406static int pci_pm_reset(struct pci_dev *dev, int probe) 2891static int pci_pm_reset(struct pci_dev *dev, int probe)
2407{ 2892{
2408 u16 csr; 2893 u16 csr;
@@ -2689,7 +3174,7 @@ int pcie_get_readrq(struct pci_dev *dev)
2689 3174
2690 ret = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl); 3175 ret = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl);
2691 if (!ret) 3176 if (!ret)
2692 ret = 128 << ((ctl & PCI_EXP_DEVCTL_READRQ) >> 12); 3177 ret = 128 << ((ctl & PCI_EXP_DEVCTL_READRQ) >> 12);
2693 3178
2694 return ret; 3179 return ret;
2695} 3180}
@@ -2786,11 +3271,11 @@ void __init pci_register_set_vga_state(arch_set_vga_state_t func)
2786} 3271}
2787 3272
2788static int pci_set_vga_state_arch(struct pci_dev *dev, bool decode, 3273static int pci_set_vga_state_arch(struct pci_dev *dev, bool decode,
2789 unsigned int command_bits, bool change_bridge) 3274 unsigned int command_bits, u32 flags)
2790{ 3275{
2791 if (arch_set_vga_state) 3276 if (arch_set_vga_state)
2792 return arch_set_vga_state(dev, decode, command_bits, 3277 return arch_set_vga_state(dev, decode, command_bits,
2793 change_bridge); 3278 flags);
2794 return 0; 3279 return 0;
2795} 3280}
2796 3281
@@ -2799,31 +3284,34 @@ static int pci_set_vga_state_arch(struct pci_dev *dev, bool decode,
2799 * @dev: the PCI device 3284 * @dev: the PCI device
2800 * @decode: true = enable decoding, false = disable decoding 3285 * @decode: true = enable decoding, false = disable decoding
2801 * @command_bits: PCI_COMMAND_IO and/or PCI_COMMAND_MEMORY 3286 * @command_bits: PCI_COMMAND_IO and/or PCI_COMMAND_MEMORY
2802 * @change_bridge: traverse ancestors and change bridges 3287 * @flags: traverse ancestors and change bridges
3288 * CHANGE_BRIDGE_ONLY / CHANGE_BRIDGE
2803 */ 3289 */
2804int pci_set_vga_state(struct pci_dev *dev, bool decode, 3290int pci_set_vga_state(struct pci_dev *dev, bool decode,
2805 unsigned int command_bits, bool change_bridge) 3291 unsigned int command_bits, u32 flags)
2806{ 3292{
2807 struct pci_bus *bus; 3293 struct pci_bus *bus;
2808 struct pci_dev *bridge; 3294 struct pci_dev *bridge;
2809 u16 cmd; 3295 u16 cmd;
2810 int rc; 3296 int rc;
2811 3297
2812 WARN_ON(command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY)); 3298 WARN_ON((flags & PCI_VGA_STATE_CHANGE_DECODES) & (command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY)));
2813 3299
2814 /* ARCH specific VGA enables */ 3300 /* ARCH specific VGA enables */
2815 rc = pci_set_vga_state_arch(dev, decode, command_bits, change_bridge); 3301 rc = pci_set_vga_state_arch(dev, decode, command_bits, flags);
2816 if (rc) 3302 if (rc)
2817 return rc; 3303 return rc;
2818 3304
2819 pci_read_config_word(dev, PCI_COMMAND, &cmd); 3305 if (flags & PCI_VGA_STATE_CHANGE_DECODES) {
2820 if (decode == true) 3306 pci_read_config_word(dev, PCI_COMMAND, &cmd);
2821 cmd |= command_bits; 3307 if (decode == true)
2822 else 3308 cmd |= command_bits;
2823 cmd &= ~command_bits; 3309 else
2824 pci_write_config_word(dev, PCI_COMMAND, cmd); 3310 cmd &= ~command_bits;
3311 pci_write_config_word(dev, PCI_COMMAND, cmd);
3312 }
2825 3313
2826 if (change_bridge == false) 3314 if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE))
2827 return 0; 3315 return 0;
2828 3316
2829 bus = dev->bus; 3317 bus = dev->bus;
@@ -2995,6 +3483,8 @@ static int __init pci_setup(char *str)
2995 pci_no_msi(); 3483 pci_no_msi();
2996 } else if (!strcmp(str, "noaer")) { 3484 } else if (!strcmp(str, "noaer")) {
2997 pci_no_aer(); 3485 pci_no_aer();
3486 } else if (!strncmp(str, "realloc", 7)) {
3487 pci_realloc();
2998 } else if (!strcmp(str, "nodomains")) { 3488 } else if (!strcmp(str, "nodomains")) {
2999 pci_no_domains(); 3489 pci_no_domains();
3000 } else if (!strncmp(str, "cbiosize=", 9)) { 3490 } else if (!strncmp(str, "cbiosize=", 9)) {
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 6beb11b617a9..3a39bf1f1e2c 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -11,7 +11,7 @@
11extern int pci_uevent(struct device *dev, struct kobj_uevent_env *env); 11extern int pci_uevent(struct device *dev, struct kobj_uevent_env *env);
12extern int pci_create_sysfs_dev_files(struct pci_dev *pdev); 12extern int pci_create_sysfs_dev_files(struct pci_dev *pdev);
13extern void pci_remove_sysfs_dev_files(struct pci_dev *pdev); 13extern void pci_remove_sysfs_dev_files(struct pci_dev *pdev);
14#ifndef CONFIG_DMI 14#if !defined(CONFIG_DMI) && !defined(CONFIG_ACPI)
15static inline void pci_create_firmware_label_files(struct pci_dev *pdev) 15static inline void pci_create_firmware_label_files(struct pci_dev *pdev)
16{ return; } 16{ return; }
17static inline void pci_remove_firmware_label_files(struct pci_dev *pdev) 17static inline void pci_remove_firmware_label_files(struct pci_dev *pdev)
@@ -22,8 +22,13 @@ extern void pci_remove_firmware_label_files(struct pci_dev *pdev);
22#endif 22#endif
23extern void pci_cleanup_rom(struct pci_dev *dev); 23extern void pci_cleanup_rom(struct pci_dev *dev);
24#ifdef HAVE_PCI_MMAP 24#ifdef HAVE_PCI_MMAP
25enum pci_mmap_api {
26 PCI_MMAP_SYSFS, /* mmap on /sys/bus/pci/devices/<BDF>/resource<N> */
27 PCI_MMAP_PROCFS /* mmap on /proc/bus/pci/<BDF> */
28};
25extern int pci_mmap_fits(struct pci_dev *pdev, int resno, 29extern int pci_mmap_fits(struct pci_dev *pdev, int resno,
26 struct vm_area_struct *vma); 30 struct vm_area_struct *vmai,
31 enum pci_mmap_api mmap_api);
27#endif 32#endif
28int pci_probe_reset_function(struct pci_dev *dev); 33int pci_probe_reset_function(struct pci_dev *dev);
29 34
@@ -63,15 +68,18 @@ struct pci_platform_pm_ops {
63extern int pci_set_platform_pm(struct pci_platform_pm_ops *ops); 68extern int pci_set_platform_pm(struct pci_platform_pm_ops *ops);
64extern void pci_update_current_state(struct pci_dev *dev, pci_power_t state); 69extern void pci_update_current_state(struct pci_dev *dev, pci_power_t state);
65extern void pci_disable_enabled_device(struct pci_dev *dev); 70extern void pci_disable_enabled_device(struct pci_dev *dev);
66extern bool pci_check_pme_status(struct pci_dev *dev);
67extern int pci_finish_runtime_suspend(struct pci_dev *dev); 71extern int pci_finish_runtime_suspend(struct pci_dev *dev);
68extern void pci_wakeup_event(struct pci_dev *dev);
69extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign); 72extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
70extern void pci_pme_wakeup_bus(struct pci_bus *bus);
71extern void pci_pm_init(struct pci_dev *dev); 73extern void pci_pm_init(struct pci_dev *dev);
72extern void platform_pci_wakeup_init(struct pci_dev *dev); 74extern void platform_pci_wakeup_init(struct pci_dev *dev);
73extern void pci_allocate_cap_save_buffers(struct pci_dev *dev); 75extern void pci_allocate_cap_save_buffers(struct pci_dev *dev);
74 76
77static inline void pci_wakeup_event(struct pci_dev *dev)
78{
79 /* Wait 100 ms before the system can be put into a sleep state. */
80 pm_wakeup_event(&dev->dev, 100);
81}
82
75static inline bool pci_is_bridge(struct pci_dev *pci_dev) 83static inline bool pci_is_bridge(struct pci_dev *pci_dev)
76{ 84{
77 return !!(pci_dev->subordinate); 85 return !!(pci_dev->subordinate);
@@ -138,13 +146,7 @@ static inline void pci_no_msi(void) { }
138static inline void pci_msi_init_pci_dev(struct pci_dev *dev) { } 146static inline void pci_msi_init_pci_dev(struct pci_dev *dev) { }
139#endif 147#endif
140 148
141#ifdef CONFIG_PCIEAER 149extern void pci_realloc(void);
142void pci_no_aer(void);
143bool pci_aer_available(void);
144#else
145static inline void pci_no_aer(void) { }
146static inline bool pci_aer_available(void) { return false; }
147#endif
148 150
149static inline int pci_no_d1d2(struct pci_dev *dev) 151static inline int pci_no_d1d2(struct pci_dev *dev)
150{ 152{
@@ -156,8 +158,7 @@ static inline int pci_no_d1d2(struct pci_dev *dev)
156 158
157} 159}
158extern struct device_attribute pci_dev_attrs[]; 160extern struct device_attribute pci_dev_attrs[];
159extern struct device_attribute dev_attr_cpuaffinity; 161extern struct device_attribute pcibus_dev_attrs[];
160extern struct device_attribute dev_attr_cpulistaffinity;
161#ifdef CONFIG_HOTPLUG 162#ifdef CONFIG_HOTPLUG
162extern struct bus_attribute pci_bus_attrs[]; 163extern struct bus_attribute pci_bus_attrs[];
163#else 164#else
@@ -250,15 +251,6 @@ struct pci_sriov {
250 u8 __iomem *mstate; /* VF Migration State Array */ 251 u8 __iomem *mstate; /* VF Migration State Array */
251}; 252};
252 253
253/* Address Translation Service */
254struct pci_ats {
255 int pos; /* capability position */
256 int stu; /* Smallest Translation Unit */
257 int qdep; /* Invalidate Queue Depth */
258 int ref_cnt; /* Physical Function reference count */
259 unsigned int is_enabled:1; /* Enable bit is set */
260};
261
262#ifdef CONFIG_PCI_IOV 254#ifdef CONFIG_PCI_IOV
263extern int pci_iov_init(struct pci_dev *dev); 255extern int pci_iov_init(struct pci_dev *dev);
264extern void pci_iov_release(struct pci_dev *dev); 256extern void pci_iov_release(struct pci_dev *dev);
@@ -269,19 +261,6 @@ extern resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev,
269extern void pci_restore_iov_state(struct pci_dev *dev); 261extern void pci_restore_iov_state(struct pci_dev *dev);
270extern int pci_iov_bus_range(struct pci_bus *bus); 262extern int pci_iov_bus_range(struct pci_bus *bus);
271 263
272extern int pci_enable_ats(struct pci_dev *dev, int ps);
273extern void pci_disable_ats(struct pci_dev *dev);
274extern int pci_ats_queue_depth(struct pci_dev *dev);
275/**
276 * pci_ats_enabled - query the ATS status
277 * @dev: the PCI device
278 *
279 * Returns 1 if ATS capability is enabled, or 0 if not.
280 */
281static inline int pci_ats_enabled(struct pci_dev *dev)
282{
283 return dev->ats && dev->ats->is_enabled;
284}
285#else 264#else
286static inline int pci_iov_init(struct pci_dev *dev) 265static inline int pci_iov_init(struct pci_dev *dev)
287{ 266{
@@ -304,21 +283,6 @@ static inline int pci_iov_bus_range(struct pci_bus *bus)
304 return 0; 283 return 0;
305} 284}
306 285
307static inline int pci_enable_ats(struct pci_dev *dev, int ps)
308{
309 return -ENODEV;
310}
311static inline void pci_disable_ats(struct pci_dev *dev)
312{
313}
314static inline int pci_ats_queue_depth(struct pci_dev *dev)
315{
316 return -ENODEV;
317}
318static inline int pci_ats_enabled(struct pci_dev *dev)
319{
320 return 0;
321}
322#endif /* CONFIG_PCI_IOV */ 286#endif /* CONFIG_PCI_IOV */
323 287
324static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, 288static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index dda70981b7a6..dc29348264c6 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -31,7 +31,7 @@ source "drivers/pci/pcie/aer/Kconfig"
31# PCI Express ASPM 31# PCI Express ASPM
32# 32#
33config PCIEASPM 33config PCIEASPM
34 bool "PCI Express ASPM control" if EMBEDDED 34 bool "PCI Express ASPM control" if EXPERT
35 depends on PCI && PCIEPORTBUS 35 depends on PCI && PCIEPORTBUS
36 default y 36 default y
37 help 37 help
diff --git a/drivers/pci/pcie/aer/aer_inject.c b/drivers/pci/pcie/aer/aer_inject.c
index 909924692b8a..95489cd9a555 100644
--- a/drivers/pci/pcie/aer/aer_inject.c
+++ b/drivers/pci/pcie/aer/aer_inject.c
@@ -27,6 +27,10 @@
27#include <linux/stddef.h> 27#include <linux/stddef.h>
28#include "aerdrv.h" 28#include "aerdrv.h"
29 29
30/* Override the existing corrected and uncorrected error masks */
31static int aer_mask_override;
32module_param(aer_mask_override, bool, 0);
33
30struct aer_error_inj { 34struct aer_error_inj {
31 u8 bus; 35 u8 bus;
32 u8 dev; 36 u8 dev;
@@ -322,7 +326,7 @@ static int aer_inject(struct aer_error_inj *einj)
322 unsigned long flags; 326 unsigned long flags;
323 unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn); 327 unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn);
324 int pos_cap_err, rp_pos_cap_err; 328 int pos_cap_err, rp_pos_cap_err;
325 u32 sever, cor_mask, uncor_mask; 329 u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0;
326 int ret = 0; 330 int ret = 0;
327 331
328 dev = pci_get_domain_bus_and_slot((int)einj->domain, einj->bus, devfn); 332 dev = pci_get_domain_bus_and_slot((int)einj->domain, einj->bus, devfn);
@@ -361,6 +365,18 @@ static int aer_inject(struct aer_error_inj *einj)
361 goto out_put; 365 goto out_put;
362 } 366 }
363 367
368 if (aer_mask_override) {
369 cor_mask_orig = cor_mask;
370 cor_mask &= !(einj->cor_status);
371 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
372 cor_mask);
373
374 uncor_mask_orig = uncor_mask;
375 uncor_mask &= !(einj->uncor_status);
376 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
377 uncor_mask);
378 }
379
364 spin_lock_irqsave(&inject_lock, flags); 380 spin_lock_irqsave(&inject_lock, flags);
365 381
366 err = __find_aer_error_by_dev(dev); 382 err = __find_aer_error_by_dev(dev);
@@ -378,14 +394,16 @@ static int aer_inject(struct aer_error_inj *einj)
378 err->header_log2 = einj->header_log2; 394 err->header_log2 = einj->header_log2;
379 err->header_log3 = einj->header_log3; 395 err->header_log3 = einj->header_log3;
380 396
381 if (einj->cor_status && !(einj->cor_status & ~cor_mask)) { 397 if (!aer_mask_override && einj->cor_status &&
398 !(einj->cor_status & ~cor_mask)) {
382 ret = -EINVAL; 399 ret = -EINVAL;
383 printk(KERN_WARNING "The correctable error(s) is masked " 400 printk(KERN_WARNING "The correctable error(s) is masked "
384 "by device\n"); 401 "by device\n");
385 spin_unlock_irqrestore(&inject_lock, flags); 402 spin_unlock_irqrestore(&inject_lock, flags);
386 goto out_put; 403 goto out_put;
387 } 404 }
388 if (einj->uncor_status && !(einj->uncor_status & ~uncor_mask)) { 405 if (!aer_mask_override && einj->uncor_status &&
406 !(einj->uncor_status & ~uncor_mask)) {
389 ret = -EINVAL; 407 ret = -EINVAL;
390 printk(KERN_WARNING "The uncorrectable error(s) is masked " 408 printk(KERN_WARNING "The uncorrectable error(s) is masked "
391 "by device\n"); 409 "by device\n");
@@ -425,6 +443,13 @@ static int aer_inject(struct aer_error_inj *einj)
425 } 443 }
426 spin_unlock_irqrestore(&inject_lock, flags); 444 spin_unlock_irqrestore(&inject_lock, flags);
427 445
446 if (aer_mask_override) {
447 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
448 cor_mask_orig);
449 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
450 uncor_mask_orig);
451 }
452
428 ret = pci_bus_set_aer_ops(dev->bus); 453 ret = pci_bus_set_aer_ops(dev->bus);
429 if (ret) 454 if (ret)
430 goto out_put; 455 goto out_put;
@@ -472,6 +497,7 @@ static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf,
472static const struct file_operations aer_inject_fops = { 497static const struct file_operations aer_inject_fops = {
473 .write = aer_inject_write, 498 .write = aer_inject_write,
474 .owner = THIS_MODULE, 499 .owner = THIS_MODULE,
500 .llseek = noop_llseek,
475}; 501};
476 502
477static struct miscdevice aer_inject_device = { 503static struct miscdevice aer_inject_device = {
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index f409948e1a9b..58ad7917553c 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -17,6 +17,7 @@
17 17
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/pci.h> 19#include <linux/pci.h>
20#include <linux/pci-acpi.h>
20#include <linux/sched.h> 21#include <linux/sched.h>
21#include <linux/kernel.h> 22#include <linux/kernel.h>
22#include <linux/errno.h> 23#include <linux/errno.h>
@@ -416,7 +417,7 @@ static void aer_error_resume(struct pci_dev *dev)
416 */ 417 */
417static int __init aer_service_init(void) 418static int __init aer_service_init(void)
418{ 419{
419 if (!pci_aer_available()) 420 if (!pci_aer_available() || aer_acpi_firmware_first())
420 return -ENXIO; 421 return -ENXIO;
421 return pcie_port_service_register(&aerdriver); 422 return pcie_port_service_register(&aerdriver);
422} 423}
diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h
index 80c11d131499..94a7598eb262 100644
--- a/drivers/pci/pcie/aer/aerdrv.h
+++ b/drivers/pci/pcie/aer/aerdrv.h
@@ -35,13 +35,6 @@
35 PCI_ERR_UNC_UNX_COMP| \ 35 PCI_ERR_UNC_UNX_COMP| \
36 PCI_ERR_UNC_MALF_TLP) 36 PCI_ERR_UNC_MALF_TLP)
37 37
38struct header_log_regs {
39 unsigned int dw0;
40 unsigned int dw1;
41 unsigned int dw2;
42 unsigned int dw3;
43};
44
45#define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */ 38#define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */
46struct aer_err_info { 39struct aer_err_info {
47 struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; 40 struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
@@ -59,7 +52,7 @@ struct aer_err_info {
59 52
60 unsigned int status; /* COR/UNCOR Error Status */ 53 unsigned int status; /* COR/UNCOR Error Status */
61 unsigned int mask; /* COR/UNCOR Error Mask */ 54 unsigned int mask; /* COR/UNCOR Error Mask */
62 struct header_log_regs tlp; /* TLP Header */ 55 struct aer_header_log_regs tlp; /* TLP Header */
63}; 56};
64 57
65struct aer_err_source { 58struct aer_err_source {
@@ -121,15 +114,6 @@ extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
121extern void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info); 114extern void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info);
122extern irqreturn_t aer_irq(int irq, void *context); 115extern irqreturn_t aer_irq(int irq, void *context);
123 116
124#ifdef CONFIG_ACPI
125extern int aer_osc_setup(struct pcie_device *pciedev);
126#else
127static inline int aer_osc_setup(struct pcie_device *pciedev)
128{
129 return 0;
130}
131#endif
132
133#ifdef CONFIG_ACPI_APEI 117#ifdef CONFIG_ACPI_APEI
134extern int pcie_aer_get_firmware_first(struct pci_dev *pci_dev); 118extern int pcie_aer_get_firmware_first(struct pci_dev *pci_dev);
135#else 119#else
diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c
index 2bb9b8972211..275bf158ffa7 100644
--- a/drivers/pci/pcie/aer/aerdrv_acpi.c
+++ b/drivers/pci/pcie/aer/aerdrv_acpi.c
@@ -93,4 +93,38 @@ int pcie_aer_get_firmware_first(struct pci_dev *dev)
93 aer_set_firmware_first(dev); 93 aer_set_firmware_first(dev);
94 return dev->__aer_firmware_first; 94 return dev->__aer_firmware_first;
95} 95}
96
97static bool aer_firmware_first;
98
99static int aer_hest_parse_aff(struct acpi_hest_header *hest_hdr, void *data)
100{
101 struct acpi_hest_aer_common *p;
102
103 if (aer_firmware_first)
104 return 0;
105
106 switch (hest_hdr->type) {
107 case ACPI_HEST_TYPE_AER_ROOT_PORT:
108 case ACPI_HEST_TYPE_AER_ENDPOINT:
109 case ACPI_HEST_TYPE_AER_BRIDGE:
110 p = (struct acpi_hest_aer_common *)(hest_hdr + 1);
111 aer_firmware_first = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
112 default:
113 return 0;
114 }
115}
116
117/**
118 * aer_acpi_firmware_first - Check if APEI should control AER.
119 */
120bool aer_acpi_firmware_first(void)
121{
122 static bool parsed = false;
123
124 if (!parsed) {
125 apei_hest_parse(aer_hest_parse_aff, NULL);
126 parsed = true;
127 }
128 return aer_firmware_first;
129}
96#endif 130#endif
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 29e268fadf14..43421fbe080a 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -754,7 +754,7 @@ void aer_isr(struct work_struct *work)
754{ 754{
755 struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler); 755 struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler);
756 struct pcie_device *p_device = rpc->rpd; 756 struct pcie_device *p_device = rpc->rpd;
757 struct aer_err_source e_src; 757 struct aer_err_source uninitialized_var(e_src);
758 758
759 mutex_lock(&rpc->rpc_mutex); 759 mutex_lock(&rpc->rpc_mutex);
760 while (get_e_source(rpc, &e_src)) 760 while (get_e_source(rpc, &e_src))
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 9d3e4c8d0184..b07a42e0b350 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -19,6 +19,7 @@
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/pm.h> 20#include <linux/pm.h>
21#include <linux/suspend.h> 21#include <linux/suspend.h>
22#include <linux/cper.h>
22 23
23#include "aerdrv.h" 24#include "aerdrv.h"
24 25
@@ -57,86 +58,44 @@
57 (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \ 58 (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \
58 AER_TRANSACTION_LAYER_ERROR) 59 AER_TRANSACTION_LAYER_ERROR)
59 60
60#define AER_PR(info, pdev, fmt, args...) \
61 printk("%s%s %s: " fmt, (info->severity == AER_CORRECTABLE) ? \
62 KERN_WARNING : KERN_ERR, dev_driver_string(&pdev->dev), \
63 dev_name(&pdev->dev), ## args)
64
65/* 61/*
66 * AER error strings 62 * AER error strings
67 */ 63 */
68static char *aer_error_severity_string[] = { 64static const char *aer_error_severity_string[] = {
69 "Uncorrected (Non-Fatal)", 65 "Uncorrected (Non-Fatal)",
70 "Uncorrected (Fatal)", 66 "Uncorrected (Fatal)",
71 "Corrected" 67 "Corrected"
72}; 68};
73 69
74static char *aer_error_layer[] = { 70static const char *aer_error_layer[] = {
75 "Physical Layer", 71 "Physical Layer",
76 "Data Link Layer", 72 "Data Link Layer",
77 "Transaction Layer" 73 "Transaction Layer"
78}; 74};
79static char *aer_correctable_error_string[] = { 75
80 "Receiver Error ", /* Bit Position 0 */ 76static const char *aer_correctable_error_string[] = {
81 NULL, 77 "Receiver Error", /* Bit Position 0 */
82 NULL,
83 NULL,
84 NULL,
85 NULL,
86 "Bad TLP ", /* Bit Position 6 */
87 "Bad DLLP ", /* Bit Position 7 */
88 "RELAY_NUM Rollover ", /* Bit Position 8 */
89 NULL,
90 NULL,
91 NULL,
92 "Replay Timer Timeout ", /* Bit Position 12 */
93 "Advisory Non-Fatal ", /* Bit Position 13 */
94 NULL,
95 NULL,
96 NULL,
97 NULL,
98 NULL,
99 NULL,
100 NULL,
101 NULL,
102 NULL,
103 NULL,
104 NULL, 78 NULL,
105 NULL, 79 NULL,
106 NULL, 80 NULL,
107 NULL, 81 NULL,
108 NULL, 82 NULL,
83 "Bad TLP", /* Bit Position 6 */
84 "Bad DLLP", /* Bit Position 7 */
85 "RELAY_NUM Rollover", /* Bit Position 8 */
109 NULL, 86 NULL,
110 NULL, 87 NULL,
111 NULL, 88 NULL,
89 "Replay Timer Timeout", /* Bit Position 12 */
90 "Advisory Non-Fatal", /* Bit Position 13 */
112}; 91};
113 92
114static char *aer_uncorrectable_error_string[] = { 93static const char *aer_uncorrectable_error_string[] = {
115 NULL,
116 NULL,
117 NULL,
118 NULL,
119 "Data Link Protocol ", /* Bit Position 4 */
120 NULL,
121 NULL,
122 NULL,
123 NULL,
124 NULL,
125 NULL,
126 NULL,
127 "Poisoned TLP ", /* Bit Position 12 */
128 "Flow Control Protocol ", /* Bit Position 13 */
129 "Completion Timeout ", /* Bit Position 14 */
130 "Completer Abort ", /* Bit Position 15 */
131 "Unexpected Completion ", /* Bit Position 16 */
132 "Receiver Overflow ", /* Bit Position 17 */
133 "Malformed TLP ", /* Bit Position 18 */
134 "ECRC ", /* Bit Position 19 */
135 "Unsupported Request ", /* Bit Position 20 */
136 NULL, 94 NULL,
137 NULL, 95 NULL,
138 NULL, 96 NULL,
139 NULL, 97 NULL,
98 "Data Link Protocol", /* Bit Position 4 */
140 NULL, 99 NULL,
141 NULL, 100 NULL,
142 NULL, 101 NULL,
@@ -144,19 +103,29 @@ static char *aer_uncorrectable_error_string[] = {
144 NULL, 103 NULL,
145 NULL, 104 NULL,
146 NULL, 105 NULL,
106 "Poisoned TLP", /* Bit Position 12 */
107 "Flow Control Protocol", /* Bit Position 13 */
108 "Completion Timeout", /* Bit Position 14 */
109 "Completer Abort", /* Bit Position 15 */
110 "Unexpected Completion", /* Bit Position 16 */
111 "Receiver Overflow", /* Bit Position 17 */
112 "Malformed TLP", /* Bit Position 18 */
113 "ECRC", /* Bit Position 19 */
114 "Unsupported Request", /* Bit Position 20 */
147}; 115};
148 116
149static char *aer_agent_string[] = { 117static const char *aer_agent_string[] = {
150 "Receiver ID", 118 "Receiver ID",
151 "Requester ID", 119 "Requester ID",
152 "Completer ID", 120 "Completer ID",
153 "Transmitter ID" 121 "Transmitter ID"
154}; 122};
155 123
156static void __aer_print_error(struct aer_err_info *info, struct pci_dev *dev) 124static void __aer_print_error(const char *prefix,
125 struct aer_err_info *info)
157{ 126{
158 int i, status; 127 int i, status;
159 char *errmsg = NULL; 128 const char *errmsg = NULL;
160 129
161 status = (info->status & ~info->mask); 130 status = (info->status & ~info->mask);
162 131
@@ -165,15 +134,17 @@ static void __aer_print_error(struct aer_err_info *info, struct pci_dev *dev)
165 continue; 134 continue;
166 135
167 if (info->severity == AER_CORRECTABLE) 136 if (info->severity == AER_CORRECTABLE)
168 errmsg = aer_correctable_error_string[i]; 137 errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ?
138 aer_correctable_error_string[i] : NULL;
169 else 139 else
170 errmsg = aer_uncorrectable_error_string[i]; 140 errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
141 aer_uncorrectable_error_string[i] : NULL;
171 142
172 if (errmsg) 143 if (errmsg)
173 AER_PR(info, dev, " [%2d] %s%s\n", i, errmsg, 144 printk("%s"" [%2d] %-22s%s\n", prefix, i, errmsg,
174 info->first_error == i ? " (First)" : ""); 145 info->first_error == i ? " (First)" : "");
175 else 146 else
176 AER_PR(info, dev, " [%2d] Unknown Error Bit%s\n", i, 147 printk("%s"" [%2d] Unknown Error Bit%s\n", prefix, i,
177 info->first_error == i ? " (First)" : ""); 148 info->first_error == i ? " (First)" : "");
178 } 149 }
179} 150}
@@ -181,11 +152,15 @@ static void __aer_print_error(struct aer_err_info *info, struct pci_dev *dev)
181void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) 152void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
182{ 153{
183 int id = ((dev->bus->number << 8) | dev->devfn); 154 int id = ((dev->bus->number << 8) | dev->devfn);
155 char prefix[44];
156
157 snprintf(prefix, sizeof(prefix), "%s%s %s: ",
158 (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR,
159 dev_driver_string(&dev->dev), dev_name(&dev->dev));
184 160
185 if (info->status == 0) { 161 if (info->status == 0) {
186 AER_PR(info, dev, 162 printk("%s""PCIe Bus Error: severity=%s, type=Unaccessible, "
187 "PCIe Bus Error: severity=%s, type=Unaccessible, " 163 "id=%04x(Unregistered Agent ID)\n", prefix,
188 "id=%04x(Unregistered Agent ID)\n",
189 aer_error_severity_string[info->severity], id); 164 aer_error_severity_string[info->severity], id);
190 } else { 165 } else {
191 int layer, agent; 166 int layer, agent;
@@ -193,23 +168,22 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
193 layer = AER_GET_LAYER_ERROR(info->severity, info->status); 168 layer = AER_GET_LAYER_ERROR(info->severity, info->status);
194 agent = AER_GET_AGENT(info->severity, info->status); 169 agent = AER_GET_AGENT(info->severity, info->status);
195 170
196 AER_PR(info, dev, 171 printk("%s""PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
197 "PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n", 172 prefix, aer_error_severity_string[info->severity],
198 aer_error_severity_string[info->severity],
199 aer_error_layer[layer], id, aer_agent_string[agent]); 173 aer_error_layer[layer], id, aer_agent_string[agent]);
200 174
201 AER_PR(info, dev, 175 printk("%s"" device [%04x:%04x] error status/mask=%08x/%08x\n",
202 " device [%04x:%04x] error status/mask=%08x/%08x\n", 176 prefix, dev->vendor, dev->device,
203 dev->vendor, dev->device, info->status, info->mask); 177 info->status, info->mask);
204 178
205 __aer_print_error(info, dev); 179 __aer_print_error(prefix, info);
206 180
207 if (info->tlp_header_valid) { 181 if (info->tlp_header_valid) {
208 unsigned char *tlp = (unsigned char *) &info->tlp; 182 unsigned char *tlp = (unsigned char *) &info->tlp;
209 AER_PR(info, dev, " TLP Header:" 183 printk("%s"" TLP Header:"
210 " %02x%02x%02x%02x %02x%02x%02x%02x" 184 " %02x%02x%02x%02x %02x%02x%02x%02x"
211 " %02x%02x%02x%02x %02x%02x%02x%02x\n", 185 " %02x%02x%02x%02x %02x%02x%02x%02x\n",
212 *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, 186 prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
213 *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), 187 *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
214 *(tlp + 11), *(tlp + 10), *(tlp + 9), 188 *(tlp + 11), *(tlp + 10), *(tlp + 9),
215 *(tlp + 8), *(tlp + 15), *(tlp + 14), 189 *(tlp + 8), *(tlp + 15), *(tlp + 14),
@@ -218,8 +192,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
218 } 192 }
219 193
220 if (info->id && info->error_dev_num > 1 && info->id == id) 194 if (info->id && info->error_dev_num > 1 && info->id == id)
221 AER_PR(info, dev, 195 printk("%s"" Error of this Agent(%04x) is reported first\n",
222 " Error of this Agent(%04x) is reported first\n", id); 196 prefix, id);
223} 197}
224 198
225void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) 199void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
@@ -228,3 +202,61 @@ void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
228 info->multi_error_valid ? "Multiple " : "", 202 info->multi_error_valid ? "Multiple " : "",
229 aer_error_severity_string[info->severity], info->id); 203 aer_error_severity_string[info->severity], info->id);
230} 204}
205
206#ifdef CONFIG_ACPI_APEI_PCIEAER
207static int cper_severity_to_aer(int cper_severity)
208{
209 switch (cper_severity) {
210 case CPER_SEV_RECOVERABLE:
211 return AER_NONFATAL;
212 case CPER_SEV_FATAL:
213 return AER_FATAL;
214 default:
215 return AER_CORRECTABLE;
216 }
217}
218
219void cper_print_aer(const char *prefix, int cper_severity,
220 struct aer_capability_regs *aer)
221{
222 int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
223 u32 status, mask;
224 const char **status_strs;
225
226 aer_severity = cper_severity_to_aer(cper_severity);
227 if (aer_severity == AER_CORRECTABLE) {
228 status = aer->cor_status;
229 mask = aer->cor_mask;
230 status_strs = aer_correctable_error_string;
231 status_strs_size = ARRAY_SIZE(aer_correctable_error_string);
232 } else {
233 status = aer->uncor_status;
234 mask = aer->uncor_mask;
235 status_strs = aer_uncorrectable_error_string;
236 status_strs_size = ARRAY_SIZE(aer_uncorrectable_error_string);
237 tlp_header_valid = status & AER_LOG_TLP_MASKS;
238 }
239 layer = AER_GET_LAYER_ERROR(aer_severity, status);
240 agent = AER_GET_AGENT(aer_severity, status);
241 printk("%s""aer_status: 0x%08x, aer_mask: 0x%08x\n",
242 prefix, status, mask);
243 cper_print_bits(prefix, status, status_strs, status_strs_size);
244 printk("%s""aer_layer=%s, aer_agent=%s\n", prefix,
245 aer_error_layer[layer], aer_agent_string[agent]);
246 if (aer_severity != AER_CORRECTABLE)
247 printk("%s""aer_uncor_severity: 0x%08x\n",
248 prefix, aer->uncor_severity);
249 if (tlp_header_valid) {
250 const unsigned char *tlp;
251 tlp = (const unsigned char *)&aer->header_log;
252 printk("%s""aer_tlp_header:"
253 " %02x%02x%02x%02x %02x%02x%02x%02x"
254 " %02x%02x%02x%02x %02x%02x%02x%02x\n",
255 prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
256 *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
257 *(tlp + 11), *(tlp + 10), *(tlp + 9),
258 *(tlp + 8), *(tlp + 15), *(tlp + 14),
259 *(tlp + 13), *(tlp + 12));
260 }
261}
262#endif
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 71222814c1ec..6892601fc76f 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -68,7 +68,8 @@ struct pcie_link_state {
68 struct aspm_latency acceptable[8]; 68 struct aspm_latency acceptable[8];
69}; 69};
70 70
71static int aspm_disabled, aspm_force; 71static int aspm_disabled, aspm_force, aspm_clear_state;
72static bool aspm_support_enabled = true;
72static DEFINE_MUTEX(aspm_lock); 73static DEFINE_MUTEX(aspm_lock);
73static LIST_HEAD(link_list); 74static LIST_HEAD(link_list);
74 75
@@ -139,7 +140,7 @@ static void pcie_set_clkpm(struct pcie_link_state *link, int enable)
139{ 140{
140 /* Don't enable Clock PM if the link is not Clock PM capable */ 141 /* Don't enable Clock PM if the link is not Clock PM capable */
141 if (!link->clkpm_capable && enable) 142 if (!link->clkpm_capable && enable)
142 return; 143 enable = 0;
143 /* Need nothing if the specified equals to current state */ 144 /* Need nothing if the specified equals to current state */
144 if (link->clkpm_enabled == enable) 145 if (link->clkpm_enabled == enable)
145 return; 146 return;
@@ -498,6 +499,10 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
498 struct pci_dev *child; 499 struct pci_dev *child;
499 int pos; 500 int pos;
500 u32 reg32; 501 u32 reg32;
502
503 if (aspm_clear_state)
504 return -EINVAL;
505
501 /* 506 /*
502 * Some functions in a slot might not all be PCIe functions, 507 * Some functions in a slot might not all be PCIe functions,
503 * very strange. Disable ASPM for the whole slot 508 * very strange. Disable ASPM for the whole slot
@@ -563,12 +568,15 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
563 struct pcie_link_state *link; 568 struct pcie_link_state *link;
564 int blacklist = !!pcie_aspm_sanity_check(pdev); 569 int blacklist = !!pcie_aspm_sanity_check(pdev);
565 570
566 if (aspm_disabled || !pci_is_pcie(pdev) || pdev->link_state) 571 if (!pci_is_pcie(pdev) || pdev->link_state)
567 return; 572 return;
568 if (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT && 573 if (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
569 pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM) 574 pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
570 return; 575 return;
571 576
577 if (aspm_disabled && !aspm_clear_state)
578 return;
579
572 /* VIA has a strange chipset, root port is under a bridge */ 580 /* VIA has a strange chipset, root port is under a bridge */
573 if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT && 581 if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT &&
574 pdev->bus->self) 582 pdev->bus->self)
@@ -600,7 +608,7 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
600 * the BIOS's expectation, we'll do so once pci_enable_device() is 608 * the BIOS's expectation, we'll do so once pci_enable_device() is
601 * called. 609 * called.
602 */ 610 */
603 if (aspm_policy != POLICY_POWERSAVE) { 611 if (aspm_policy != POLICY_POWERSAVE || aspm_clear_state) {
604 pcie_config_aspm_path(link); 612 pcie_config_aspm_path(link);
605 pcie_set_clkpm(link, policy_to_clkpm_state(link)); 613 pcie_set_clkpm(link, policy_to_clkpm_state(link));
606 } 614 }
@@ -641,7 +649,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
641 struct pci_dev *parent = pdev->bus->self; 649 struct pci_dev *parent = pdev->bus->self;
642 struct pcie_link_state *link, *root, *parent_link; 650 struct pcie_link_state *link, *root, *parent_link;
643 651
644 if (aspm_disabled || !pci_is_pcie(pdev) || 652 if ((aspm_disabled && !aspm_clear_state) || !pci_is_pcie(pdev) ||
645 !parent || !parent->link_state) 653 !parent || !parent->link_state)
646 return; 654 return;
647 if ((parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT) && 655 if ((parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT) &&
@@ -700,11 +708,33 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev)
700 up_read(&pci_bus_sem); 708 up_read(&pci_bus_sem);
701} 709}
702 710
711void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
712{
713 struct pcie_link_state *link = pdev->link_state;
714
715 if (aspm_disabled || !pci_is_pcie(pdev) || !link)
716 return;
717
718 if (aspm_policy != POLICY_POWERSAVE)
719 return;
720
721 if ((pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT) &&
722 (pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
723 return;
724
725 down_read(&pci_bus_sem);
726 mutex_lock(&aspm_lock);
727 pcie_config_aspm_path(link);
728 pcie_set_clkpm(link, policy_to_clkpm_state(link));
729 mutex_unlock(&aspm_lock);
730 up_read(&pci_bus_sem);
731}
732
703/* 733/*
704 * pci_disable_link_state - disable pci device's link state, so the link will 734 * pci_disable_link_state - disable pci device's link state, so the link will
705 * never enter specific states 735 * never enter specific states
706 */ 736 */
707void pci_disable_link_state(struct pci_dev *pdev, int state) 737static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
708{ 738{
709 struct pci_dev *parent = pdev->bus->self; 739 struct pci_dev *parent = pdev->bus->self;
710 struct pcie_link_state *link; 740 struct pcie_link_state *link;
@@ -717,7 +747,8 @@ void pci_disable_link_state(struct pci_dev *pdev, int state)
717 if (!parent || !parent->link_state) 747 if (!parent || !parent->link_state)
718 return; 748 return;
719 749
720 down_read(&pci_bus_sem); 750 if (sem)
751 down_read(&pci_bus_sem);
721 mutex_lock(&aspm_lock); 752 mutex_lock(&aspm_lock);
722 link = parent->link_state; 753 link = parent->link_state;
723 if (state & PCIE_LINK_STATE_L0S) 754 if (state & PCIE_LINK_STATE_L0S)
@@ -731,7 +762,19 @@ void pci_disable_link_state(struct pci_dev *pdev, int state)
731 pcie_set_clkpm(link, 0); 762 pcie_set_clkpm(link, 0);
732 } 763 }
733 mutex_unlock(&aspm_lock); 764 mutex_unlock(&aspm_lock);
734 up_read(&pci_bus_sem); 765 if (sem)
766 up_read(&pci_bus_sem);
767}
768
769void pci_disable_link_state_locked(struct pci_dev *pdev, int state)
770{
771 __pci_disable_link_state(pdev, state, false);
772}
773EXPORT_SYMBOL(pci_disable_link_state_locked);
774
775void pci_disable_link_state(struct pci_dev *pdev, int state)
776{
777 __pci_disable_link_state(pdev, state, true);
735} 778}
736EXPORT_SYMBOL(pci_disable_link_state); 779EXPORT_SYMBOL(pci_disable_link_state);
737 780
@@ -740,6 +783,8 @@ static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
740 int i; 783 int i;
741 struct pcie_link_state *link; 784 struct pcie_link_state *link;
742 785
786 if (aspm_disabled)
787 return -EPERM;
743 for (i = 0; i < ARRAY_SIZE(policy_str); i++) 788 for (i = 0; i < ARRAY_SIZE(policy_str); i++)
744 if (!strncmp(val, policy_str[i], strlen(policy_str[i]))) 789 if (!strncmp(val, policy_str[i], strlen(policy_str[i])))
745 break; 790 break;
@@ -794,6 +839,8 @@ static ssize_t link_state_store(struct device *dev,
794 struct pcie_link_state *link, *root = pdev->link_state->root; 839 struct pcie_link_state *link, *root = pdev->link_state->root;
795 u32 val = buf[0] - '0', state = 0; 840 u32 val = buf[0] - '0', state = 0;
796 841
842 if (aspm_disabled)
843 return -EPERM;
797 if (n < 1 || val > 3) 844 if (n < 1 || val > 3)
798 return -EINVAL; 845 return -EINVAL;
799 846
@@ -889,6 +936,7 @@ static int __init pcie_aspm_disable(char *str)
889{ 936{
890 if (!strcmp(str, "off")) { 937 if (!strcmp(str, "off")) {
891 aspm_disabled = 1; 938 aspm_disabled = 1;
939 aspm_support_enabled = false;
892 printk(KERN_INFO "PCIe ASPM is disabled\n"); 940 printk(KERN_INFO "PCIe ASPM is disabled\n");
893 } else if (!strcmp(str, "force")) { 941 } else if (!strcmp(str, "force")) {
894 aspm_force = 1; 942 aspm_force = 1;
@@ -899,6 +947,12 @@ static int __init pcie_aspm_disable(char *str)
899 947
900__setup("pcie_aspm=", pcie_aspm_disable); 948__setup("pcie_aspm=", pcie_aspm_disable);
901 949
950void pcie_clear_aspm(void)
951{
952 if (!aspm_force)
953 aspm_clear_state = 1;
954}
955
902void pcie_no_aspm(void) 956void pcie_no_aspm(void)
903{ 957{
904 if (!aspm_force) 958 if (!aspm_force)
@@ -917,3 +971,8 @@ int pcie_aspm_enabled(void)
917} 971}
918EXPORT_SYMBOL(pcie_aspm_enabled); 972EXPORT_SYMBOL(pcie_aspm_enabled);
919 973
974bool pcie_aspm_support_enabled(void)
975{
976 return aspm_support_enabled;
977}
978EXPORT_SYMBOL(pcie_aspm_support_enabled);
diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
index 2f3c90407227..0057344a3fcb 100644
--- a/drivers/pci/pcie/pme.c
+++ b/drivers/pci/pcie/pme.c
@@ -26,9 +26,6 @@
26#include "../pci.h" 26#include "../pci.h"
27#include "portdrv.h" 27#include "portdrv.h"
28 28
29#define PCI_EXP_RTSTA_PME 0x10000 /* PME status */
30#define PCI_EXP_RTSTA_PENDING 0x20000 /* PME pending */
31
32/* 29/*
33 * If this switch is set, MSI will not be used for PCIe PME signaling. This 30 * If this switch is set, MSI will not be used for PCIe PME signaling. This
34 * causes the PCIe port driver to use INTx interrupts only, but it turns out 31 * causes the PCIe port driver to use INTx interrupts only, but it turns out
@@ -74,22 +71,6 @@ void pcie_pme_interrupt_enable(struct pci_dev *dev, bool enable)
74} 71}
75 72
76/** 73/**
77 * pcie_pme_clear_status - Clear root port PME interrupt status.
78 * @dev: PCIe root port or event collector.
79 */
80static void pcie_pme_clear_status(struct pci_dev *dev)
81{
82 int rtsta_pos;
83 u32 rtsta;
84
85 rtsta_pos = pci_pcie_cap(dev) + PCI_EXP_RTSTA;
86
87 pci_read_config_dword(dev, rtsta_pos, &rtsta);
88 rtsta |= PCI_EXP_RTSTA_PME;
89 pci_write_config_dword(dev, rtsta_pos, rtsta);
90}
91
92/**
93 * pcie_pme_walk_bus - Scan a PCI bus for devices asserting PME#. 74 * pcie_pme_walk_bus - Scan a PCI bus for devices asserting PME#.
94 * @bus: PCI bus to scan. 75 * @bus: PCI bus to scan.
95 * 76 *
@@ -103,8 +84,8 @@ static bool pcie_pme_walk_bus(struct pci_bus *bus)
103 list_for_each_entry(dev, &bus->devices, bus_list) { 84 list_for_each_entry(dev, &bus->devices, bus_list) {
104 /* Skip PCIe devices in case we started from a root port. */ 85 /* Skip PCIe devices in case we started from a root port. */
105 if (!pci_is_pcie(dev) && pci_check_pme_status(dev)) { 86 if (!pci_is_pcie(dev) && pci_check_pme_status(dev)) {
106 pm_request_resume(&dev->dev);
107 pci_wakeup_event(dev); 87 pci_wakeup_event(dev);
88 pm_request_resume(&dev->dev);
108 ret = true; 89 ret = true;
109 } 90 }
110 91
@@ -206,8 +187,8 @@ static void pcie_pme_handle_request(struct pci_dev *port, u16 req_id)
206 /* The device is there, but we have to check its PME status. */ 187 /* The device is there, but we have to check its PME status. */
207 found = pci_check_pme_status(dev); 188 found = pci_check_pme_status(dev);
208 if (found) { 189 if (found) {
209 pm_request_resume(&dev->dev);
210 pci_wakeup_event(dev); 190 pci_wakeup_event(dev);
191 pm_request_resume(&dev->dev);
211 } 192 }
212 pci_dev_put(dev); 193 pci_dev_put(dev);
213 } else if (devfn) { 194 } else if (devfn) {
@@ -253,7 +234,7 @@ static void pcie_pme_work_fn(struct work_struct *work)
253 * Clear PME status of the port. If there are other 234 * Clear PME status of the port. If there are other
254 * pending PMEs, the status will be set again. 235 * pending PMEs, the status will be set again.
255 */ 236 */
256 pcie_pme_clear_status(port); 237 pcie_clear_root_pme_status(port);
257 238
258 spin_unlock_irq(&data->lock); 239 spin_unlock_irq(&data->lock);
259 pcie_pme_handle_request(port, rtsta & 0xffff); 240 pcie_pme_handle_request(port, rtsta & 0xffff);
@@ -378,7 +359,7 @@ static int pcie_pme_probe(struct pcie_device *srv)
378 359
379 port = srv->port; 360 port = srv->port;
380 pcie_pme_interrupt_enable(port, false); 361 pcie_pme_interrupt_enable(port, false);
381 pcie_pme_clear_status(port); 362 pcie_clear_root_pme_status(port);
382 363
383 ret = request_irq(srv->irq, pcie_pme_irq, IRQF_SHARED, "PCIe PME", srv); 364 ret = request_irq(srv->irq, pcie_pme_irq, IRQF_SHARED, "PCIe PME", srv);
384 if (ret) { 365 if (ret) {
@@ -402,7 +383,7 @@ static int pcie_pme_suspend(struct pcie_device *srv)
402 383
403 spin_lock_irq(&data->lock); 384 spin_lock_irq(&data->lock);
404 pcie_pme_interrupt_enable(port, false); 385 pcie_pme_interrupt_enable(port, false);
405 pcie_pme_clear_status(port); 386 pcie_clear_root_pme_status(port);
406 data->noirq = true; 387 data->noirq = true;
407 spin_unlock_irq(&data->lock); 388 spin_unlock_irq(&data->lock);
408 389
@@ -422,7 +403,7 @@ static int pcie_pme_resume(struct pcie_device *srv)
422 403
423 spin_lock_irq(&data->lock); 404 spin_lock_irq(&data->lock);
424 data->noirq = false; 405 data->noirq = false;
425 pcie_pme_clear_status(port); 406 pcie_clear_root_pme_status(port);
426 pcie_pme_interrupt_enable(port, true); 407 pcie_pme_interrupt_enable(port, true);
427 spin_unlock_irq(&data->lock); 408 spin_unlock_irq(&data->lock);
428 409
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index 7b5aba0a3291..bd00a01aef14 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -20,9 +20,6 @@
20 20
21#define get_descriptor_id(type, service) (((type - 4) << 4) | service) 21#define get_descriptor_id(type, service) (((type - 4) << 4) | service)
22 22
23extern bool pcie_ports_disabled;
24extern bool pcie_ports_auto;
25
26extern struct bus_type pcie_port_bus_type; 23extern struct bus_type pcie_port_bus_type;
27extern int pcie_port_device_register(struct pci_dev *dev); 24extern int pcie_port_device_register(struct pci_dev *dev);
28#ifdef CONFIG_PM 25#ifdef CONFIG_PM
@@ -35,6 +32,8 @@ extern void pcie_port_bus_unregister(void);
35 32
36struct pci_dev; 33struct pci_dev;
37 34
35extern void pcie_clear_root_pme_status(struct pci_dev *dev);
36
38#ifdef CONFIG_PCIE_PME 37#ifdef CONFIG_PCIE_PME
39extern bool pcie_pme_msi_disabled; 38extern bool pcie_pme_msi_disabled;
40 39
diff --git a/drivers/pci/pcie/portdrv_acpi.c b/drivers/pci/pcie/portdrv_acpi.c
index b7c4cb1ccb23..a86b56e5f2f2 100644
--- a/drivers/pci/pcie/portdrv_acpi.c
+++ b/drivers/pci/pcie/portdrv_acpi.c
@@ -33,7 +33,7 @@
33 */ 33 */
34int pcie_port_acpi_setup(struct pci_dev *port, int *srv_mask) 34int pcie_port_acpi_setup(struct pci_dev *port, int *srv_mask)
35{ 35{
36 acpi_status status; 36 struct acpi_pci_root *root;
37 acpi_handle handle; 37 acpi_handle handle;
38 u32 flags; 38 u32 flags;
39 39
@@ -44,26 +44,11 @@ int pcie_port_acpi_setup(struct pci_dev *port, int *srv_mask)
44 if (!handle) 44 if (!handle)
45 return -EINVAL; 45 return -EINVAL;
46 46
47 flags = OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL 47 root = acpi_pci_find_root(handle);
48 | OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 48 if (!root)
49 | OSC_PCI_EXPRESS_PME_CONTROL;
50
51 if (pci_aer_available()) {
52 if (pcie_aer_get_firmware_first(port))
53 dev_dbg(&port->dev, "PCIe errors handled by BIOS.\n");
54 else
55 flags |= OSC_PCI_EXPRESS_AER_CONTROL;
56 }
57
58 status = acpi_pci_osc_control_set(handle, &flags,
59 OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
60 if (ACPI_FAILURE(status)) {
61 dev_dbg(&port->dev, "ACPI _OSC request failed (code %d)\n",
62 status);
63 return -ENODEV; 49 return -ENODEV;
64 }
65 50
66 dev_info(&port->dev, "ACPI _OSC control granted for 0x%02x\n", flags); 51 flags = root->osc_control_set;
67 52
68 *srv_mask = PCIE_PORT_SERVICE_VC; 53 *srv_mask = PCIE_PORT_SERVICE_VC;
69 if (flags & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL) 54 if (flags & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL)
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index a9c222d79ebc..595654a1a6a6 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -15,7 +15,6 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/pcieport_if.h> 16#include <linux/pcieport_if.h>
17#include <linux/aer.h> 17#include <linux/aer.h>
18#include <linux/pci-aspm.h>
19 18
20#include "../pci.h" 19#include "../pci.h"
21#include "portdrv.h" 20#include "portdrv.h"
@@ -241,17 +240,17 @@ static int get_port_device_capability(struct pci_dev *dev)
241 int cap_mask; 240 int cap_mask;
242 int err; 241 int err;
243 242
243 if (pcie_ports_disabled)
244 return 0;
245
244 err = pcie_port_platform_notify(dev, &cap_mask); 246 err = pcie_port_platform_notify(dev, &cap_mask);
245 if (pcie_ports_auto) { 247 if (!pcie_ports_auto) {
246 if (err) {
247 pcie_no_aspm();
248 return 0;
249 }
250 } else {
251 cap_mask = PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP 248 cap_mask = PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP
252 | PCIE_PORT_SERVICE_VC; 249 | PCIE_PORT_SERVICE_VC;
253 if (pci_aer_available()) 250 if (pci_aer_available())
254 cap_mask |= PCIE_PORT_SERVICE_AER; 251 cap_mask |= PCIE_PORT_SERVICE_AER;
252 } else if (err) {
253 return 0;
255 } 254 }
256 255
257 pos = pci_pcie_cap(dev); 256 pos = pci_pcie_cap(dev);
@@ -349,15 +348,16 @@ int pcie_port_device_register(struct pci_dev *dev)
349 int status, capabilities, i, nr_service; 348 int status, capabilities, i, nr_service;
350 int irqs[PCIE_PORT_DEVICE_MAXSERVICES]; 349 int irqs[PCIE_PORT_DEVICE_MAXSERVICES];
351 350
352 /* Get and check PCI Express port services */
353 capabilities = get_port_device_capability(dev);
354 if (!capabilities)
355 return -ENODEV;
356
357 /* Enable PCI Express port device */ 351 /* Enable PCI Express port device */
358 status = pci_enable_device(dev); 352 status = pci_enable_device(dev);
359 if (status) 353 if (status)
360 return status; 354 return status;
355
356 /* Get and check PCI Express port services */
357 capabilities = get_port_device_capability(dev);
358 if (!capabilities)
359 return 0;
360
361 pci_set_master(dev); 361 pci_set_master(dev);
362 /* 362 /*
363 * Initialize service irqs. Don't use service devices that 363 * Initialize service irqs. Don't use service devices that
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index f9033e190fb6..e0610bda1dea 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -57,6 +57,22 @@ __setup("pcie_ports=", pcie_port_setup);
57 57
58/* global data */ 58/* global data */
59 59
60/**
61 * pcie_clear_root_pme_status - Clear root port PME interrupt status.
62 * @dev: PCIe root port or event collector.
63 */
64void pcie_clear_root_pme_status(struct pci_dev *dev)
65{
66 int rtsta_pos;
67 u32 rtsta;
68
69 rtsta_pos = pci_pcie_cap(dev) + PCI_EXP_RTSTA;
70
71 pci_read_config_dword(dev, rtsta_pos, &rtsta);
72 rtsta |= PCI_EXP_RTSTA_PME;
73 pci_write_config_dword(dev, rtsta_pos, rtsta);
74}
75
60static int pcie_portdrv_restore_config(struct pci_dev *dev) 76static int pcie_portdrv_restore_config(struct pci_dev *dev)
61{ 77{
62 int retval; 78 int retval;
@@ -69,6 +85,20 @@ static int pcie_portdrv_restore_config(struct pci_dev *dev)
69} 85}
70 86
71#ifdef CONFIG_PM 87#ifdef CONFIG_PM
88static int pcie_port_resume_noirq(struct device *dev)
89{
90 struct pci_dev *pdev = to_pci_dev(dev);
91
92 /*
93 * Some BIOSes forget to clear Root PME Status bits after system wakeup
94 * which breaks ACPI-based runtime wakeup on PCI Express, so clear those
95 * bits now just in case (shouldn't hurt).
96 */
97 if(pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT)
98 pcie_clear_root_pme_status(pdev);
99 return 0;
100}
101
72static const struct dev_pm_ops pcie_portdrv_pm_ops = { 102static const struct dev_pm_ops pcie_portdrv_pm_ops = {
73 .suspend = pcie_port_device_suspend, 103 .suspend = pcie_port_device_suspend,
74 .resume = pcie_port_device_resume, 104 .resume = pcie_port_device_resume,
@@ -76,6 +106,7 @@ static const struct dev_pm_ops pcie_portdrv_pm_ops = {
76 .thaw = pcie_port_device_resume, 106 .thaw = pcie_port_device_resume,
77 .poweroff = pcie_port_device_suspend, 107 .poweroff = pcie_port_device_suspend,
78 .restore = pcie_port_device_resume, 108 .restore = pcie_port_device_resume,
109 .resume_noirq = pcie_port_resume_noirq,
79}; 110};
80 111
81#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops) 112#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops)
@@ -327,10 +358,8 @@ static int __init pcie_portdrv_init(void)
327{ 358{
328 int retval; 359 int retval;
329 360
330 if (pcie_ports_disabled) { 361 if (pcie_ports_disabled)
331 pcie_no_aspm(); 362 return pci_register_driver(&pcie_portdriver);
332 return -EACCES;
333 }
334 363
335 dmi_check_system(pcie_portdrv_dmi_table); 364 dmi_check_system(pcie_portdrv_dmi_table);
336 365
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 12625d90f8b5..bafb3c3d4a89 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -43,43 +43,6 @@ int no_pci_devices(void)
43EXPORT_SYMBOL(no_pci_devices); 43EXPORT_SYMBOL(no_pci_devices);
44 44
45/* 45/*
46 * PCI Bus Class Devices
47 */
48static ssize_t pci_bus_show_cpuaffinity(struct device *dev,
49 int type,
50 struct device_attribute *attr,
51 char *buf)
52{
53 int ret;
54 const struct cpumask *cpumask;
55
56 cpumask = cpumask_of_pcibus(to_pci_bus(dev));
57 ret = type?
58 cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask) :
59 cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask);
60 buf[ret++] = '\n';
61 buf[ret] = '\0';
62 return ret;
63}
64
65static ssize_t inline pci_bus_show_cpumaskaffinity(struct device *dev,
66 struct device_attribute *attr,
67 char *buf)
68{
69 return pci_bus_show_cpuaffinity(dev, 0, attr, buf);
70}
71
72static ssize_t inline pci_bus_show_cpulistaffinity(struct device *dev,
73 struct device_attribute *attr,
74 char *buf)
75{
76 return pci_bus_show_cpuaffinity(dev, 1, attr, buf);
77}
78
79DEVICE_ATTR(cpuaffinity, S_IRUGO, pci_bus_show_cpumaskaffinity, NULL);
80DEVICE_ATTR(cpulistaffinity, S_IRUGO, pci_bus_show_cpulistaffinity, NULL);
81
82/*
83 * PCI Bus Class 46 * PCI Bus Class
84 */ 47 */
85static void release_pcibus_dev(struct device *dev) 48static void release_pcibus_dev(struct device *dev)
@@ -95,6 +58,7 @@ static void release_pcibus_dev(struct device *dev)
95static struct class pcibus_class = { 58static struct class pcibus_class = {
96 .name = "pci_bus", 59 .name = "pci_bus",
97 .dev_release = &release_pcibus_dev, 60 .dev_release = &release_pcibus_dev,
61 .dev_attrs = pcibus_dev_attrs,
98}; 62};
99 63
100static int __init pcibus_class_init(void) 64static int __init pcibus_class_init(void)
@@ -204,7 +168,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
204 res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN; 168 res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN;
205 if (type == pci_bar_io) { 169 if (type == pci_bar_io) {
206 l &= PCI_BASE_ADDRESS_IO_MASK; 170 l &= PCI_BASE_ADDRESS_IO_MASK;
207 mask = PCI_BASE_ADDRESS_IO_MASK & IO_SPACE_LIMIT; 171 mask = PCI_BASE_ADDRESS_IO_MASK & (u32) IO_SPACE_LIMIT;
208 } else { 172 } else {
209 l &= PCI_BASE_ADDRESS_MEM_MASK; 173 l &= PCI_BASE_ADDRESS_MEM_MASK;
210 mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; 174 mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
@@ -764,6 +728,8 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
764 if (pci_find_bus(pci_domain_nr(bus), max+1)) 728 if (pci_find_bus(pci_domain_nr(bus), max+1))
765 goto out; 729 goto out;
766 child = pci_add_new_bus(bus, dev, ++max); 730 child = pci_add_new_bus(bus, dev, ++max);
731 if (!child)
732 goto out;
767 buses = (buses & 0xff000000) 733 buses = (buses & 0xff000000)
768 | ((unsigned int)(child->primary) << 0) 734 | ((unsigned int)(child->primary) << 0)
769 | ((unsigned int)(child->secondary) << 8) 735 | ((unsigned int)(child->secondary) << 8)
@@ -777,7 +743,7 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
777 buses &= ~0xff000000; 743 buses &= ~0xff000000;
778 buses |= CARDBUS_LATENCY_TIMER << 24; 744 buses |= CARDBUS_LATENCY_TIMER << 24;
779 } 745 }
780 746
781 /* 747 /*
782 * We need to blast all three values with a single write. 748 * We need to blast all three values with a single write.
783 */ 749 */
@@ -961,8 +927,8 @@ int pci_setup_device(struct pci_dev *dev)
961 dev->class = class; 927 dev->class = class;
962 class >>= 8; 928 class >>= 8;
963 929
964 dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n", 930 dev_printk(KERN_DEBUG, &dev->dev, "[%04x:%04x] type %d class %#08x\n",
965 dev->vendor, dev->device, class, dev->hdr_type); 931 dev->vendor, dev->device, dev->hdr_type, class);
966 932
967 /* need to have dev->class ready */ 933 /* need to have dev->class ready */
968 dev->cfg_size = pci_cfg_space_size(dev); 934 dev->cfg_size = pci_cfg_space_size(dev);
@@ -1453,9 +1419,6 @@ struct pci_bus * pci_create_bus(struct device *parent,
1453 error = device_register(&b->dev); 1419 error = device_register(&b->dev);
1454 if (error) 1420 if (error)
1455 goto class_dev_reg_err; 1421 goto class_dev_reg_err;
1456 error = device_create_file(&b->dev, &dev_attr_cpuaffinity);
1457 if (error)
1458 goto dev_create_file_err;
1459 1422
1460 /* Create legacy_io and legacy_mem files for this bus */ 1423 /* Create legacy_io and legacy_mem files for this bus */
1461 pci_create_legacy_files(b); 1424 pci_create_legacy_files(b);
@@ -1466,8 +1429,6 @@ struct pci_bus * pci_create_bus(struct device *parent,
1466 1429
1467 return b; 1430 return b;
1468 1431
1469dev_create_file_err:
1470 device_unregister(&b->dev);
1471class_dev_reg_err: 1432class_dev_reg_err:
1472 device_unregister(dev); 1433 device_unregister(dev);
1473dev_reg_err: 1434dev_reg_err:
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index 01f0306525a5..27911b55c2a5 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -10,7 +10,6 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/proc_fs.h> 11#include <linux/proc_fs.h>
12#include <linux/seq_file.h> 12#include <linux/seq_file.h>
13#include <linux/smp_lock.h>
14#include <linux/capability.h> 13#include <linux/capability.h>
15#include <asm/uaccess.h> 14#include <asm/uaccess.h>
16#include <asm/byteorder.h> 15#include <asm/byteorder.h>
@@ -212,8 +211,6 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd,
212#endif /* HAVE_PCI_MMAP */ 211#endif /* HAVE_PCI_MMAP */
213 int ret = 0; 212 int ret = 0;
214 213
215 lock_kernel();
216
217 switch (cmd) { 214 switch (cmd) {
218 case PCIIOC_CONTROLLER: 215 case PCIIOC_CONTROLLER:
219 ret = pci_domain_nr(dev->bus); 216 ret = pci_domain_nr(dev->bus);
@@ -242,7 +239,6 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd,
242 break; 239 break;
243 }; 240 };
244 241
245 unlock_kernel();
246 return ret; 242 return ret;
247} 243}
248 244
@@ -260,7 +256,7 @@ static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
260 256
261 /* Make sure the caller is mapping a real resource for this device */ 257 /* Make sure the caller is mapping a real resource for this device */
262 for (i = 0; i < PCI_ROM_RESOURCE; i++) { 258 for (i = 0; i < PCI_ROM_RESOURCE; i++) {
263 if (pci_mmap_fits(dev, i, vma)) 259 if (pci_mmap_fits(dev, i, vma, PCI_MMAP_PROCFS))
264 break; 260 break;
265 } 261 }
266 262
@@ -306,6 +302,7 @@ static const struct file_operations proc_bus_pci_operations = {
306 .read = proc_bus_pci_read, 302 .read = proc_bus_pci_read,
307 .write = proc_bus_pci_write, 303 .write = proc_bus_pci_write,
308 .unlocked_ioctl = proc_bus_pci_ioctl, 304 .unlocked_ioctl = proc_bus_pci_ioctl,
305 .compat_ioctl = proc_bus_pci_ioctl,
309#ifdef HAVE_PCI_MMAP 306#ifdef HAVE_PCI_MMAP
310 .open = proc_bus_pci_open, 307 .open = proc_bus_pci_open,
311 .release = proc_bus_pci_release, 308 .release = proc_bus_pci_release,
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 857ae01734a6..02145e9697a9 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -226,6 +226,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439TX, quir
226 * VIA Apollo KT133 needs PCI latency patch 226 * VIA Apollo KT133 needs PCI latency patch
227 * Made according to a windows driver based patch by George E. Breese 227 * Made according to a windows driver based patch by George E. Breese
228 * see PCI Latency Adjust on http://www.viahardware.com/download/viatweak.shtm 228 * see PCI Latency Adjust on http://www.viahardware.com/download/viatweak.shtm
229 * and http://www.georgebreese.com/net/software/#PCI
229 * Also see http://www.au-ja.org/review-kt133a-1-en.phtml for 230 * Also see http://www.au-ja.org/review-kt133a-1-en.phtml for
230 * the info on which Mr Breese based his work. 231 * the info on which Mr Breese based his work.
231 * 232 *
@@ -262,7 +263,7 @@ static void quirk_vialatency(struct pci_dev *dev)
262 * This happens to include the IDE controllers.... 263 * This happens to include the IDE controllers....
263 * 264 *
264 * VIA only apply this fix when an SB Live! is present but under 265 * VIA only apply this fix when an SB Live! is present but under
265 * both Linux and Windows this isnt enough, and we have seen 266 * both Linux and Windows this isn't enough, and we have seen
266 * corruption without SB Live! but with things like 3 UDMA IDE 267 * corruption without SB Live! but with things like 3 UDMA IDE
267 * controllers. So we ignore that bit of the VIA recommendation.. 268 * controllers. So we ignore that bit of the VIA recommendation..
268 */ 269 */
@@ -532,6 +533,17 @@ static void __devinit quirk_piix4_acpi(struct pci_dev *dev)
532DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, quirk_piix4_acpi); 533DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, quirk_piix4_acpi);
533DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443MX_3, quirk_piix4_acpi); 534DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443MX_3, quirk_piix4_acpi);
534 535
536#define ICH_PMBASE 0x40
537#define ICH_ACPI_CNTL 0x44
538#define ICH4_ACPI_EN 0x10
539#define ICH6_ACPI_EN 0x80
540#define ICH4_GPIOBASE 0x58
541#define ICH4_GPIO_CNTL 0x5c
542#define ICH4_GPIO_EN 0x10
543#define ICH6_GPIOBASE 0x48
544#define ICH6_GPIO_CNTL 0x4c
545#define ICH6_GPIO_EN 0x10
546
535/* 547/*
536 * ICH4, ICH4-M, ICH5, ICH5-M ACPI: Three IO regions pointed to by longwords at 548 * ICH4, ICH4-M, ICH5, ICH5-M ACPI: Three IO regions pointed to by longwords at
537 * 0x40 (128 bytes of ACPI, GPIO & TCO registers) 549 * 0x40 (128 bytes of ACPI, GPIO & TCO registers)
@@ -540,12 +552,33 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443MX_3, qui
540static void __devinit quirk_ich4_lpc_acpi(struct pci_dev *dev) 552static void __devinit quirk_ich4_lpc_acpi(struct pci_dev *dev)
541{ 553{
542 u32 region; 554 u32 region;
555 u8 enable;
543 556
544 pci_read_config_dword(dev, 0x40, &region); 557 /*
545 quirk_io_region(dev, region, 128, PCI_BRIDGE_RESOURCES, "ICH4 ACPI/GPIO/TCO"); 558 * The check for PCIBIOS_MIN_IO is to ensure we won't create a conflict
559 * with low legacy (and fixed) ports. We don't know the decoding
560 * priority and can't tell whether the legacy device or the one created
561 * here is really at that address. This happens on boards with broken
562 * BIOSes.
563 */
564
565 pci_read_config_byte(dev, ICH_ACPI_CNTL, &enable);
566 if (enable & ICH4_ACPI_EN) {
567 pci_read_config_dword(dev, ICH_PMBASE, &region);
568 region &= PCI_BASE_ADDRESS_IO_MASK;
569 if (region >= PCIBIOS_MIN_IO)
570 quirk_io_region(dev, region, 128, PCI_BRIDGE_RESOURCES,
571 "ICH4 ACPI/GPIO/TCO");
572 }
546 573
547 pci_read_config_dword(dev, 0x58, &region); 574 pci_read_config_byte(dev, ICH4_GPIO_CNTL, &enable);
548 quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES+1, "ICH4 GPIO"); 575 if (enable & ICH4_GPIO_EN) {
576 pci_read_config_dword(dev, ICH4_GPIOBASE, &region);
577 region &= PCI_BASE_ADDRESS_IO_MASK;
578 if (region >= PCIBIOS_MIN_IO)
579 quirk_io_region(dev, region, 64,
580 PCI_BRIDGE_RESOURCES + 1, "ICH4 GPIO");
581 }
549} 582}
550DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_0, quirk_ich4_lpc_acpi); 583DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_0, quirk_ich4_lpc_acpi);
551DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AB_0, quirk_ich4_lpc_acpi); 584DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AB_0, quirk_ich4_lpc_acpi);
@@ -561,12 +594,25 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1, qui
561static void __devinit ich6_lpc_acpi_gpio(struct pci_dev *dev) 594static void __devinit ich6_lpc_acpi_gpio(struct pci_dev *dev)
562{ 595{
563 u32 region; 596 u32 region;
597 u8 enable;
564 598
565 pci_read_config_dword(dev, 0x40, &region); 599 pci_read_config_byte(dev, ICH_ACPI_CNTL, &enable);
566 quirk_io_region(dev, region, 128, PCI_BRIDGE_RESOURCES, "ICH6 ACPI/GPIO/TCO"); 600 if (enable & ICH6_ACPI_EN) {
601 pci_read_config_dword(dev, ICH_PMBASE, &region);
602 region &= PCI_BASE_ADDRESS_IO_MASK;
603 if (region >= PCIBIOS_MIN_IO)
604 quirk_io_region(dev, region, 128, PCI_BRIDGE_RESOURCES,
605 "ICH6 ACPI/GPIO/TCO");
606 }
567 607
568 pci_read_config_dword(dev, 0x48, &region); 608 pci_read_config_byte(dev, ICH6_GPIO_CNTL, &enable);
569 quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES+1, "ICH6 GPIO"); 609 if (enable & ICH6_GPIO_EN) {
610 pci_read_config_dword(dev, ICH6_GPIOBASE, &region);
611 region &= PCI_BASE_ADDRESS_IO_MASK;
612 if (region >= PCIBIOS_MIN_IO)
613 quirk_io_region(dev, region, 64,
614 PCI_BRIDGE_RESOURCES + 1, "ICH6 GPIO");
615 }
570} 616}
571 617
572static void __devinit ich6_lpc_generic_decode(struct pci_dev *dev, unsigned reg, const char *name, int dynsize) 618static void __devinit ich6_lpc_generic_decode(struct pci_dev *dev, unsigned reg, const char *name, int dynsize)
@@ -635,7 +681,7 @@ static void __devinit ich7_lpc_generic_decode(struct pci_dev *dev, unsigned reg,
635/* ICH7-10 has the same common LPC generic IO decode registers */ 681/* ICH7-10 has the same common LPC generic IO decode registers */
636static void __devinit quirk_ich7_lpc(struct pci_dev *dev) 682static void __devinit quirk_ich7_lpc(struct pci_dev *dev)
637{ 683{
638 /* We share the common ACPI/DPIO decode with ICH6 */ 684 /* We share the common ACPI/GPIO decode with ICH6 */
639 ich6_lpc_acpi_gpio(dev); 685 ich6_lpc_acpi_gpio(dev);
640 686
641 /* And have 4 ICH7+ generic decodes */ 687 /* And have 4 ICH7+ generic decodes */
@@ -1016,7 +1062,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TOSHIBA, 0x605, quirk_transparent_bridge)
1016/* 1062/*
1017 * Common misconfiguration of the MediaGX/Geode PCI master that will 1063 * Common misconfiguration of the MediaGX/Geode PCI master that will
1018 * reduce PCI bandwidth from 70MB/s to 25MB/s. See the GXM/GXLV/GX1 1064 * reduce PCI bandwidth from 70MB/s to 25MB/s. See the GXM/GXLV/GX1
1019 * datasheets found at http://www.national.com/ds/GX for info on what 1065 * datasheets found at http://www.national.com/analog for info on what
1020 * these bits do. <christer@weinigel.se> 1066 * these bits do. <christer@weinigel.se>
1021 */ 1067 */
1022static void quirk_mediagx_master(struct pci_dev *dev) 1068static void quirk_mediagx_master(struct pci_dev *dev)
@@ -2135,6 +2181,24 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82865_HB,
2135DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82875_HB, 2181DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82875_HB,
2136 quirk_unhide_mch_dev6); 2182 quirk_unhide_mch_dev6);
2137 2183
2184#ifdef CONFIG_TILE
2185/*
2186 * The Tilera TILEmpower platform needs to set the link speed
2187 * to 2.5GT(Giga-Transfers)/s (Gen 1). The default link speed
2188 * setting is 5GT/s (Gen 2). 0x98 is the Link Control2 PCIe
2189 * capability register of the PEX8624 PCIe switch. The switch
2190 * supports link speed auto negotiation, but falsely sets
2191 * the link speed to 5GT/s.
2192 */
2193static void __devinit quirk_tile_plx_gen1(struct pci_dev *dev)
2194{
2195 if (tile_plx_gen1) {
2196 pci_write_config_dword(dev, 0x98, 0x1);
2197 mdelay(50);
2198 }
2199}
2200DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PLX, 0x8624, quirk_tile_plx_gen1);
2201#endif /* CONFIG_TILE */
2138 2202
2139#ifdef CONFIG_PCI_MSI 2203#ifdef CONFIG_PCI_MSI
2140/* Some chipsets do not support MSI. We cannot easily rely on setting 2204/* Some chipsets do not support MSI. We cannot easily rely on setting
@@ -2285,8 +2349,11 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE,
2285 */ 2349 */
2286static void __devinit nvenet_msi_disable(struct pci_dev *dev) 2350static void __devinit nvenet_msi_disable(struct pci_dev *dev)
2287{ 2351{
2288 if (dmi_name_in_vendors("P5N32-SLI PREMIUM") || 2352 const char *board_name = dmi_get_system_info(DMI_BOARD_NAME);
2289 dmi_name_in_vendors("P5N32-E SLI")) { 2353
2354 if (board_name &&
2355 (strstr(board_name, "P5N32-SLI PREMIUM") ||
2356 strstr(board_name, "P5N32-E SLI"))) {
2290 dev_info(&dev->dev, 2357 dev_info(&dev->dev,
2291 "Disabling msi for MCP55 NIC on P5N32-SLI\n"); 2358 "Disabling msi for MCP55 NIC on P5N32-SLI\n");
2292 dev->no_msi = 1; 2359 dev->no_msi = 1;
@@ -2296,6 +2363,40 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
2296 PCI_DEVICE_ID_NVIDIA_NVENET_15, 2363 PCI_DEVICE_ID_NVIDIA_NVENET_15,
2297 nvenet_msi_disable); 2364 nvenet_msi_disable);
2298 2365
2366/*
2367 * Some versions of the MCP55 bridge from nvidia have a legacy irq routing
2368 * config register. This register controls the routing of legacy interrupts
2369 * from devices that route through the MCP55. If this register is misprogramed
2370 * interrupts are only sent to the bsp, unlike conventional systems where the
2371 * irq is broadxast to all online cpus. Not having this register set
2372 * properly prevents kdump from booting up properly, so lets make sure that
2373 * we have it set correctly.
2374 * Note this is an undocumented register.
2375 */
2376static void __devinit nvbridge_check_legacy_irq_routing(struct pci_dev *dev)
2377{
2378 u32 cfg;
2379
2380 if (!pci_find_capability(dev, PCI_CAP_ID_HT))
2381 return;
2382
2383 pci_read_config_dword(dev, 0x74, &cfg);
2384
2385 if (cfg & ((1 << 2) | (1 << 15))) {
2386 printk(KERN_INFO "Rewriting irq routing register on MCP55\n");
2387 cfg &= ~((1 << 2) | (1 << 15));
2388 pci_write_config_dword(dev, 0x74, cfg);
2389 }
2390}
2391
2392DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
2393 PCI_DEVICE_ID_NVIDIA_MCP55_BRIDGE_V0,
2394 nvbridge_check_legacy_irq_routing);
2395
2396DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
2397 PCI_DEVICE_ID_NVIDIA_MCP55_BRIDGE_V4,
2398 nvbridge_check_legacy_irq_routing);
2399
2299static int __devinit ht_check_msi_mapping(struct pci_dev *dev) 2400static int __devinit ht_check_msi_mapping(struct pci_dev *dev)
2300{ 2401{
2301 int pos, ttl = 48; 2402 int pos, ttl = 48;
@@ -2565,58 +2666,6 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4375,
2565 2666
2566#endif /* CONFIG_PCI_MSI */ 2667#endif /* CONFIG_PCI_MSI */
2567 2668
2568#ifdef CONFIG_PCI_IOV
2569
2570/*
2571 * For Intel 82576 SR-IOV NIC, if BIOS doesn't allocate resources for the
2572 * SR-IOV BARs, zero the Flash BAR and program the SR-IOV BARs to use the
2573 * old Flash Memory Space.
2574 */
2575static void __devinit quirk_i82576_sriov(struct pci_dev *dev)
2576{
2577 int pos, flags;
2578 u32 bar, start, size;
2579
2580 if (PAGE_SIZE > 0x10000)
2581 return;
2582
2583 flags = pci_resource_flags(dev, 0);
2584 if ((flags & PCI_BASE_ADDRESS_SPACE) !=
2585 PCI_BASE_ADDRESS_SPACE_MEMORY ||
2586 (flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK) !=
2587 PCI_BASE_ADDRESS_MEM_TYPE_32)
2588 return;
2589
2590 pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
2591 if (!pos)
2592 return;
2593
2594 pci_read_config_dword(dev, pos + PCI_SRIOV_BAR, &bar);
2595 if (bar & PCI_BASE_ADDRESS_MEM_MASK)
2596 return;
2597
2598 start = pci_resource_start(dev, 1);
2599 size = pci_resource_len(dev, 1);
2600 if (!start || size != 0x400000 || start & (size - 1))
2601 return;
2602
2603 pci_resource_flags(dev, 1) = 0;
2604 pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, 0);
2605 pci_write_config_dword(dev, pos + PCI_SRIOV_BAR, start);
2606 pci_write_config_dword(dev, pos + PCI_SRIOV_BAR + 12, start + size / 2);
2607
2608 dev_info(&dev->dev, "use Flash Memory Space for SR-IOV BARs\n");
2609}
2610DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10c9, quirk_i82576_sriov);
2611DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10e6, quirk_i82576_sriov);
2612DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10e7, quirk_i82576_sriov);
2613DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10e8, quirk_i82576_sriov);
2614DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x150a, quirk_i82576_sriov);
2615DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x150d, quirk_i82576_sriov);
2616DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1518, quirk_i82576_sriov);
2617
2618#endif /* CONFIG_PCI_IOV */
2619
2620/* Allow manual resource allocation for PCI hotplug bridges 2669/* Allow manual resource allocation for PCI hotplug bridges
2621 * via pci=hpmemsize=nnM and pci=hpiosize=nnM parameters. For 2670 * via pci=hpmemsize=nnM and pci=hpiosize=nnM parameters. For
2622 * some PCI-PCI hotplug bridges, like PLX 6254 (former HINT HB6), 2671 * some PCI-PCI hotplug bridges, like PLX 6254 (former HINT HB6),
@@ -2634,7 +2683,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HINT, 0x0020, quirk_hotplug_bridge);
2634 * This is a quirk for the Ricoh MMC controller found as a part of 2683 * This is a quirk for the Ricoh MMC controller found as a part of
2635 * some mulifunction chips. 2684 * some mulifunction chips.
2636 2685
2637 * This is very similiar and based on the ricoh_mmc driver written by 2686 * This is very similar and based on the ricoh_mmc driver written by
2638 * Philip Langdale. Thank you for these magic sequences. 2687 * Philip Langdale. Thank you for these magic sequences.
2639 * 2688 *
2640 * These chips implement the four main memory card controllers (SD, MMC, MS, xD) 2689 * These chips implement the four main memory card controllers (SD, MMC, MS, xD)
@@ -2712,8 +2761,43 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev)
2712} 2761}
2713DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832); 2762DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
2714DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832); 2763DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
2764DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832);
2765DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832);
2715#endif /*CONFIG_MMC_RICOH_MMC*/ 2766#endif /*CONFIG_MMC_RICOH_MMC*/
2716 2767
2768#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
2769#define VTUNCERRMSK_REG 0x1ac
2770#define VTD_MSK_SPEC_ERRORS (1 << 31)
2771/*
2772 * This is a quirk for masking vt-d spec defined errors to platform error
2773 * handling logic. With out this, platforms using Intel 7500, 5500 chipsets
2774 * (and the derivative chipsets like X58 etc) seem to generate NMI/SMI (based
2775 * on the RAS config settings of the platform) when a vt-d fault happens.
2776 * The resulting SMI caused the system to hang.
2777 *
2778 * VT-d spec related errors are already handled by the VT-d OS code, so no
2779 * need to report the same error through other channels.
2780 */
2781static void vtd_mask_spec_errors(struct pci_dev *dev)
2782{
2783 u32 word;
2784
2785 pci_read_config_dword(dev, VTUNCERRMSK_REG, &word);
2786 pci_write_config_dword(dev, VTUNCERRMSK_REG, word | VTD_MSK_SPEC_ERRORS);
2787}
2788DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, vtd_mask_spec_errors);
2789DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x3c28, vtd_mask_spec_errors);
2790#endif
2791
2792static void __devinit fixup_ti816x_class(struct pci_dev* dev)
2793{
2794 /* TI 816x devices do not have class code set when in PCIe boot mode */
2795 if (dev->class == PCI_CLASS_NOT_DEFINED) {
2796 dev_info(&dev->dev, "Setting PCI class for 816x PCIe device\n");
2797 dev->class = PCI_CLASS_MULTIMEDIA_VIDEO;
2798 }
2799}
2800DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_TI, 0xb800, fixup_ti816x_class);
2717 2801
2718static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, 2802static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
2719 struct pci_fixup *end) 2803 struct pci_fixup *end)
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 176615e7231f..7f87beed35ac 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -73,8 +73,6 @@ void pci_remove_bus(struct pci_bus *pci_bus)
73 return; 73 return;
74 74
75 pci_remove_legacy_files(pci_bus); 75 pci_remove_legacy_files(pci_bus);
76 device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity);
77 device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity);
78 device_unregister(&pci_bus->dev); 76 device_unregister(&pci_bus->dev);
79} 77}
80EXPORT_SYMBOL(pci_remove_bus); 78EXPORT_SYMBOL(pci_remove_bus);
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 66cb8f4cc5f4..9995842e45b5 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -33,11 +33,39 @@ struct resource_list_x {
33 struct pci_dev *dev; 33 struct pci_dev *dev;
34 resource_size_t start; 34 resource_size_t start;
35 resource_size_t end; 35 resource_size_t end;
36 resource_size_t add_size;
36 unsigned long flags; 37 unsigned long flags;
37}; 38};
38 39
39static void add_to_failed_list(struct resource_list_x *head, 40#define free_list(type, head) do { \
40 struct pci_dev *dev, struct resource *res) 41 struct type *list, *tmp; \
42 for (list = (head)->next; list;) { \
43 tmp = list; \
44 list = list->next; \
45 kfree(tmp); \
46 } \
47 (head)->next = NULL; \
48} while (0)
49
50int pci_realloc_enable = 0;
51#define pci_realloc_enabled() pci_realloc_enable
52void pci_realloc(void)
53{
54 pci_realloc_enable = 1;
55}
56
57/**
58 * add_to_list() - add a new resource tracker to the list
59 * @head: Head of the list
60 * @dev: device corresponding to which the resource
61 * belongs
62 * @res: The resource to be tracked
63 * @add_size: additional size to be optionally added
64 * to the resource
65 */
66static void add_to_list(struct resource_list_x *head,
67 struct pci_dev *dev, struct resource *res,
68 resource_size_t add_size)
41{ 69{
42 struct resource_list_x *list = head; 70 struct resource_list_x *list = head;
43 struct resource_list_x *ln = list->next; 71 struct resource_list_x *ln = list->next;
@@ -45,7 +73,7 @@ static void add_to_failed_list(struct resource_list_x *head,
45 73
46 tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); 74 tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
47 if (!tmp) { 75 if (!tmp) {
48 pr_warning("add_to_failed_list: kmalloc() failed!\n"); 76 pr_warning("add_to_list: kmalloc() failed!\n");
49 return; 77 return;
50 } 78 }
51 79
@@ -55,20 +83,14 @@ static void add_to_failed_list(struct resource_list_x *head,
55 tmp->start = res->start; 83 tmp->start = res->start;
56 tmp->end = res->end; 84 tmp->end = res->end;
57 tmp->flags = res->flags; 85 tmp->flags = res->flags;
86 tmp->add_size = add_size;
58 list->next = tmp; 87 list->next = tmp;
59} 88}
60 89
61static void free_failed_list(struct resource_list_x *head) 90static void add_to_failed_list(struct resource_list_x *head,
91 struct pci_dev *dev, struct resource *res)
62{ 92{
63 struct resource_list_x *list, *tmp; 93 add_to_list(head, dev, res, 0);
64
65 for (list = head->next; list;) {
66 tmp = list;
67 list = list->next;
68 kfree(tmp);
69 }
70
71 head->next = NULL;
72} 94}
73 95
74static void __dev_sort_resources(struct pci_dev *dev, 96static void __dev_sort_resources(struct pci_dev *dev,
@@ -91,18 +113,88 @@ static void __dev_sort_resources(struct pci_dev *dev,
91 pdev_sort_resources(dev, head); 113 pdev_sort_resources(dev, head);
92} 114}
93 115
94static void __assign_resources_sorted(struct resource_list *head, 116static inline void reset_resource(struct resource *res)
95 struct resource_list_x *fail_head) 117{
118 res->start = 0;
119 res->end = 0;
120 res->flags = 0;
121}
122
123/**
124 * adjust_resources_sorted() - satisfy any additional resource requests
125 *
126 * @add_head : head of the list tracking requests requiring additional
127 * resources
128 * @head : head of the list tracking requests with allocated
129 * resources
130 *
131 * Walk through each element of the add_head and try to procure
132 * additional resources for the element, provided the element
133 * is in the head list.
134 */
135static void adjust_resources_sorted(struct resource_list_x *add_head,
136 struct resource_list *head)
96{ 137{
97 struct resource *res; 138 struct resource *res;
98 struct resource_list *list, *tmp; 139 struct resource_list_x *list, *tmp, *prev;
140 struct resource_list *hlist;
141 resource_size_t add_size;
99 int idx; 142 int idx;
100 143
101 for (list = head->next; list;) { 144 prev = add_head;
145 for (list = add_head->next; list;) {
102 res = list->res; 146 res = list->res;
147 /* skip resource that has been reset */
148 if (!res->flags)
149 goto out;
150
151 /* skip this resource if not found in head list */
152 for (hlist = head->next; hlist && hlist->res != res;
153 hlist = hlist->next);
154 if (!hlist) { /* just skip */
155 prev = list;
156 list = list->next;
157 continue;
158 }
159
103 idx = res - &list->dev->resource[0]; 160 idx = res - &list->dev->resource[0];
161 add_size=list->add_size;
162 if (!resource_size(res) && add_size) {
163 res->end = res->start + add_size - 1;
164 if(pci_assign_resource(list->dev, idx))
165 reset_resource(res);
166 } else if (add_size) {
167 adjust_resource(res, res->start,
168 resource_size(res) + add_size);
169 }
170out:
171 tmp = list;
172 prev->next = list = list->next;
173 kfree(tmp);
174 }
175}
104 176
105 if (pci_assign_resource(list->dev, idx)) { 177/**
178 * assign_requested_resources_sorted() - satisfy resource requests
179 *
180 * @head : head of the list tracking requests for resources
181 * @failed_list : head of the list tracking requests that could
182 * not be allocated
183 *
184 * Satisfy resource requests of each element in the list. Add
185 * requests that could not satisfied to the failed_list.
186 */
187static void assign_requested_resources_sorted(struct resource_list *head,
188 struct resource_list_x *fail_head)
189{
190 struct resource *res;
191 struct resource_list *list;
192 int idx;
193
194 for (list = head->next; list; list = list->next) {
195 res = list->res;
196 idx = res - &list->dev->resource[0];
197 if (resource_size(res) && pci_assign_resource(list->dev, idx)) {
106 if (fail_head && !pci_is_root_bus(list->dev->bus)) { 198 if (fail_head && !pci_is_root_bus(list->dev->bus)) {
107 /* 199 /*
108 * if the failed res is for ROM BAR, and it will 200 * if the failed res is for ROM BAR, and it will
@@ -112,16 +204,25 @@ static void __assign_resources_sorted(struct resource_list *head,
112 (!(res->flags & IORESOURCE_ROM_ENABLE)))) 204 (!(res->flags & IORESOURCE_ROM_ENABLE))))
113 add_to_failed_list(fail_head, list->dev, res); 205 add_to_failed_list(fail_head, list->dev, res);
114 } 206 }
115 res->start = 0; 207 reset_resource(res);
116 res->end = 0;
117 res->flags = 0;
118 } 208 }
119 tmp = list;
120 list = list->next;
121 kfree(tmp);
122 } 209 }
123} 210}
124 211
212static void __assign_resources_sorted(struct resource_list *head,
213 struct resource_list_x *add_head,
214 struct resource_list_x *fail_head)
215{
216 /* Satisfy the must-have resource requests */
217 assign_requested_resources_sorted(head, fail_head);
218
219 /* Try to satisfy any additional nice-to-have resource
220 requests */
221 if (add_head)
222 adjust_resources_sorted(add_head, head);
223 free_list(resource_list, head);
224}
225
125static void pdev_assign_resources_sorted(struct pci_dev *dev, 226static void pdev_assign_resources_sorted(struct pci_dev *dev,
126 struct resource_list_x *fail_head) 227 struct resource_list_x *fail_head)
127{ 228{
@@ -129,11 +230,12 @@ static void pdev_assign_resources_sorted(struct pci_dev *dev,
129 230
130 head.next = NULL; 231 head.next = NULL;
131 __dev_sort_resources(dev, &head); 232 __dev_sort_resources(dev, &head);
132 __assign_resources_sorted(&head, fail_head); 233 __assign_resources_sorted(&head, NULL, fail_head);
133 234
134} 235}
135 236
136static void pbus_assign_resources_sorted(const struct pci_bus *bus, 237static void pbus_assign_resources_sorted(const struct pci_bus *bus,
238 struct resource_list_x *add_head,
137 struct resource_list_x *fail_head) 239 struct resource_list_x *fail_head)
138{ 240{
139 struct pci_dev *dev; 241 struct pci_dev *dev;
@@ -143,7 +245,7 @@ static void pbus_assign_resources_sorted(const struct pci_bus *bus,
143 list_for_each_entry(dev, &bus->devices, bus_list) 245 list_for_each_entry(dev, &bus->devices, bus_list)
144 __dev_sort_resources(dev, &head); 246 __dev_sort_resources(dev, &head);
145 247
146 __assign_resources_sorted(&head, fail_head); 248 __assign_resources_sorted(&head, add_head, fail_head);
147} 249}
148 250
149void pci_setup_cardbus(struct pci_bus *bus) 251void pci_setup_cardbus(struct pci_bus *bus)
@@ -404,15 +506,62 @@ static struct resource *find_free_bus_resource(struct pci_bus *bus, unsigned lon
404 return NULL; 506 return NULL;
405} 507}
406 508
407/* Sizing the IO windows of the PCI-PCI bridge is trivial, 509static resource_size_t calculate_iosize(resource_size_t size,
408 since these windows have 4K granularity and the IO ranges 510 resource_size_t min_size,
409 of non-bridge PCI devices are limited to 256 bytes. 511 resource_size_t size1,
410 We must be careful with the ISA aliasing though. */ 512 resource_size_t old_size,
411static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size) 513 resource_size_t align)
514{
515 if (size < min_size)
516 size = min_size;
517 if (old_size == 1 )
518 old_size = 0;
519 /* To be fixed in 2.5: we should have sort of HAVE_ISA
520 flag in the struct pci_bus. */
521#if defined(CONFIG_ISA) || defined(CONFIG_EISA)
522 size = (size & 0xff) + ((size & ~0xffUL) << 2);
523#endif
524 size = ALIGN(size + size1, align);
525 if (size < old_size)
526 size = old_size;
527 return size;
528}
529
530static resource_size_t calculate_memsize(resource_size_t size,
531 resource_size_t min_size,
532 resource_size_t size1,
533 resource_size_t old_size,
534 resource_size_t align)
535{
536 if (size < min_size)
537 size = min_size;
538 if (old_size == 1 )
539 old_size = 0;
540 if (size < old_size)
541 size = old_size;
542 size = ALIGN(size + size1, align);
543 return size;
544}
545
546/**
547 * pbus_size_io() - size the io window of a given bus
548 *
549 * @bus : the bus
550 * @min_size : the minimum io window that must to be allocated
551 * @add_size : additional optional io window
552 * @add_head : track the additional io window on this list
553 *
554 * Sizing the IO windows of the PCI-PCI bridge is trivial,
555 * since these windows have 4K granularity and the IO ranges
556 * of non-bridge PCI devices are limited to 256 bytes.
557 * We must be careful with the ISA aliasing though.
558 */
559static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
560 resource_size_t add_size, struct resource_list_x *add_head)
412{ 561{
413 struct pci_dev *dev; 562 struct pci_dev *dev;
414 struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO); 563 struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
415 unsigned long size = 0, size1 = 0, old_size; 564 unsigned long size = 0, size0 = 0, size1 = 0;
416 565
417 if (!b_res) 566 if (!b_res)
418 return; 567 return;
@@ -435,20 +584,12 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size)
435 size1 += r_size; 584 size1 += r_size;
436 } 585 }
437 } 586 }
438 if (size < min_size) 587 size0 = calculate_iosize(size, min_size, size1,
439 size = min_size; 588 resource_size(b_res), 4096);
440 old_size = resource_size(b_res); 589 size1 = (!add_head || (add_head && !add_size)) ? size0 :
441 if (old_size == 1) 590 calculate_iosize(size, min_size+add_size, size1,
442 old_size = 0; 591 resource_size(b_res), 4096);
443/* To be fixed in 2.5: we should have sort of HAVE_ISA 592 if (!size0 && !size1) {
444 flag in the struct pci_bus. */
445#if defined(CONFIG_ISA) || defined(CONFIG_EISA)
446 size = (size & 0xff) + ((size & ~0xffUL) << 2);
447#endif
448 size = ALIGN(size + size1, 4096);
449 if (size < old_size)
450 size = old_size;
451 if (!size) {
452 if (b_res->start || b_res->end) 593 if (b_res->start || b_res->end)
453 dev_info(&bus->self->dev, "disabling bridge window " 594 dev_info(&bus->self->dev, "disabling bridge window "
454 "%pR to [bus %02x-%02x] (unused)\n", b_res, 595 "%pR to [bus %02x-%02x] (unused)\n", b_res,
@@ -458,17 +599,30 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size)
458 } 599 }
459 /* Alignment of the IO window is always 4K */ 600 /* Alignment of the IO window is always 4K */
460 b_res->start = 4096; 601 b_res->start = 4096;
461 b_res->end = b_res->start + size - 1; 602 b_res->end = b_res->start + size0 - 1;
462 b_res->flags |= IORESOURCE_STARTALIGN; 603 b_res->flags |= IORESOURCE_STARTALIGN;
604 if (size1 > size0 && add_head)
605 add_to_list(add_head, bus->self, b_res, size1-size0);
463} 606}
464 607
465/* Calculate the size of the bus and minimal alignment which 608/**
466 guarantees that all child resources fit in this size. */ 609 * pbus_size_mem() - size the memory window of a given bus
610 *
611 * @bus : the bus
612 * @min_size : the minimum memory window that must to be allocated
613 * @add_size : additional optional memory window
614 * @add_head : track the additional memory window on this list
615 *
616 * Calculate the size of the bus and minimal alignment which
617 * guarantees that all child resources fit in this size.
618 */
467static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, 619static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
468 unsigned long type, resource_size_t min_size) 620 unsigned long type, resource_size_t min_size,
621 resource_size_t add_size,
622 struct resource_list_x *add_head)
469{ 623{
470 struct pci_dev *dev; 624 struct pci_dev *dev;
471 resource_size_t min_align, align, size, old_size; 625 resource_size_t min_align, align, size, size0, size1;
472 resource_size_t aligns[12]; /* Alignments from 1Mb to 2Gb */ 626 resource_size_t aligns[12]; /* Alignments from 1Mb to 2Gb */
473 int order, max_order; 627 int order, max_order;
474 struct resource *b_res = find_free_bus_resource(bus, type); 628 struct resource *b_res = find_free_bus_resource(bus, type);
@@ -516,14 +670,6 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
516 mem64_mask &= r->flags & IORESOURCE_MEM_64; 670 mem64_mask &= r->flags & IORESOURCE_MEM_64;
517 } 671 }
518 } 672 }
519 if (size < min_size)
520 size = min_size;
521 old_size = resource_size(b_res);
522 if (old_size == 1)
523 old_size = 0;
524 if (size < old_size)
525 size = old_size;
526
527 align = 0; 673 align = 0;
528 min_align = 0; 674 min_align = 0;
529 for (order = 0; order <= max_order; order++) { 675 for (order = 0; order <= max_order; order++) {
@@ -537,8 +683,11 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
537 min_align = align1 >> 1; 683 min_align = align1 >> 1;
538 align += aligns[order]; 684 align += aligns[order];
539 } 685 }
540 size = ALIGN(size, min_align); 686 size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align);
541 if (!size) { 687 size1 = (!add_head || (add_head && !add_size)) ? size0 :
688 calculate_memsize(size, min_size+add_size, 0,
689 resource_size(b_res), min_align);
690 if (!size0 && !size1) {
542 if (b_res->start || b_res->end) 691 if (b_res->start || b_res->end)
543 dev_info(&bus->self->dev, "disabling bridge window " 692 dev_info(&bus->self->dev, "disabling bridge window "
544 "%pR to [bus %02x-%02x] (unused)\n", b_res, 693 "%pR to [bus %02x-%02x] (unused)\n", b_res,
@@ -547,9 +696,10 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
547 return 1; 696 return 1;
548 } 697 }
549 b_res->start = min_align; 698 b_res->start = min_align;
550 b_res->end = size + min_align - 1; 699 b_res->end = size0 + min_align - 1;
551 b_res->flags |= IORESOURCE_STARTALIGN; 700 b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask;
552 b_res->flags |= mem64_mask; 701 if (size1 > size0 && add_head)
702 add_to_list(add_head, bus->self, b_res, size1-size0);
553 return 1; 703 return 1;
554} 704}
555 705
@@ -602,11 +752,12 @@ static void pci_bus_size_cardbus(struct pci_bus *bus)
602 } 752 }
603} 753}
604 754
605void __ref pci_bus_size_bridges(struct pci_bus *bus) 755void __ref __pci_bus_size_bridges(struct pci_bus *bus,
756 struct resource_list_x *add_head)
606{ 757{
607 struct pci_dev *dev; 758 struct pci_dev *dev;
608 unsigned long mask, prefmask; 759 unsigned long mask, prefmask;
609 resource_size_t min_mem_size = 0, min_io_size = 0; 760 resource_size_t additional_mem_size = 0, additional_io_size = 0;
610 761
611 list_for_each_entry(dev, &bus->devices, bus_list) { 762 list_for_each_entry(dev, &bus->devices, bus_list) {
612 struct pci_bus *b = dev->subordinate; 763 struct pci_bus *b = dev->subordinate;
@@ -620,7 +771,7 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
620 771
621 case PCI_CLASS_BRIDGE_PCI: 772 case PCI_CLASS_BRIDGE_PCI:
622 default: 773 default:
623 pci_bus_size_bridges(b); 774 __pci_bus_size_bridges(b, add_head);
624 break; 775 break;
625 } 776 }
626 } 777 }
@@ -637,11 +788,14 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
637 case PCI_CLASS_BRIDGE_PCI: 788 case PCI_CLASS_BRIDGE_PCI:
638 pci_bridge_check_ranges(bus); 789 pci_bridge_check_ranges(bus);
639 if (bus->self->is_hotplug_bridge) { 790 if (bus->self->is_hotplug_bridge) {
640 min_io_size = pci_hotplug_io_size; 791 additional_io_size = pci_hotplug_io_size;
641 min_mem_size = pci_hotplug_mem_size; 792 additional_mem_size = pci_hotplug_mem_size;
642 } 793 }
794 /*
795 * Follow thru
796 */
643 default: 797 default:
644 pbus_size_io(bus, min_io_size); 798 pbus_size_io(bus, 0, additional_io_size, add_head);
645 /* If the bridge supports prefetchable range, size it 799 /* If the bridge supports prefetchable range, size it
646 separately. If it doesn't, or its prefetchable window 800 separately. If it doesn't, or its prefetchable window
647 has already been allocated by arch code, try 801 has already been allocated by arch code, try
@@ -649,30 +803,36 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
649 resources. */ 803 resources. */
650 mask = IORESOURCE_MEM; 804 mask = IORESOURCE_MEM;
651 prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH; 805 prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
652 if (pbus_size_mem(bus, prefmask, prefmask, min_mem_size)) 806 if (pbus_size_mem(bus, prefmask, prefmask, 0, additional_mem_size, add_head))
653 mask = prefmask; /* Success, size non-prefetch only. */ 807 mask = prefmask; /* Success, size non-prefetch only. */
654 else 808 else
655 min_mem_size += min_mem_size; 809 additional_mem_size += additional_mem_size;
656 pbus_size_mem(bus, mask, IORESOURCE_MEM, min_mem_size); 810 pbus_size_mem(bus, mask, IORESOURCE_MEM, 0, additional_mem_size, add_head);
657 break; 811 break;
658 } 812 }
659} 813}
814
815void __ref pci_bus_size_bridges(struct pci_bus *bus)
816{
817 __pci_bus_size_bridges(bus, NULL);
818}
660EXPORT_SYMBOL(pci_bus_size_bridges); 819EXPORT_SYMBOL(pci_bus_size_bridges);
661 820
662static void __ref __pci_bus_assign_resources(const struct pci_bus *bus, 821static void __ref __pci_bus_assign_resources(const struct pci_bus *bus,
822 struct resource_list_x *add_head,
663 struct resource_list_x *fail_head) 823 struct resource_list_x *fail_head)
664{ 824{
665 struct pci_bus *b; 825 struct pci_bus *b;
666 struct pci_dev *dev; 826 struct pci_dev *dev;
667 827
668 pbus_assign_resources_sorted(bus, fail_head); 828 pbus_assign_resources_sorted(bus, add_head, fail_head);
669 829
670 list_for_each_entry(dev, &bus->devices, bus_list) { 830 list_for_each_entry(dev, &bus->devices, bus_list) {
671 b = dev->subordinate; 831 b = dev->subordinate;
672 if (!b) 832 if (!b)
673 continue; 833 continue;
674 834
675 __pci_bus_assign_resources(b, fail_head); 835 __pci_bus_assign_resources(b, add_head, fail_head);
676 836
677 switch (dev->class >> 8) { 837 switch (dev->class >> 8) {
678 case PCI_CLASS_BRIDGE_PCI: 838 case PCI_CLASS_BRIDGE_PCI:
@@ -694,7 +854,7 @@ static void __ref __pci_bus_assign_resources(const struct pci_bus *bus,
694 854
695void __ref pci_bus_assign_resources(const struct pci_bus *bus) 855void __ref pci_bus_assign_resources(const struct pci_bus *bus)
696{ 856{
697 __pci_bus_assign_resources(bus, NULL); 857 __pci_bus_assign_resources(bus, NULL, NULL);
698} 858}
699EXPORT_SYMBOL(pci_bus_assign_resources); 859EXPORT_SYMBOL(pci_bus_assign_resources);
700 860
@@ -709,7 +869,7 @@ static void __ref __pci_bridge_assign_resources(const struct pci_dev *bridge,
709 if (!b) 869 if (!b)
710 return; 870 return;
711 871
712 __pci_bus_assign_resources(b, fail_head); 872 __pci_bus_assign_resources(b, NULL, fail_head);
713 873
714 switch (bridge->class >> 8) { 874 switch (bridge->class >> 8) {
715 case PCI_CLASS_BRIDGE_PCI: 875 case PCI_CLASS_BRIDGE_PCI:
@@ -838,26 +998,147 @@ static void pci_bus_dump_resources(struct pci_bus *bus)
838 } 998 }
839} 999}
840 1000
1001static int __init pci_bus_get_depth(struct pci_bus *bus)
1002{
1003 int depth = 0;
1004 struct pci_dev *dev;
1005
1006 list_for_each_entry(dev, &bus->devices, bus_list) {
1007 int ret;
1008 struct pci_bus *b = dev->subordinate;
1009 if (!b)
1010 continue;
1011
1012 ret = pci_bus_get_depth(b);
1013 if (ret + 1 > depth)
1014 depth = ret + 1;
1015 }
1016
1017 return depth;
1018}
1019static int __init pci_get_max_depth(void)
1020{
1021 int depth = 0;
1022 struct pci_bus *bus;
1023
1024 list_for_each_entry(bus, &pci_root_buses, node) {
1025 int ret;
1026
1027 ret = pci_bus_get_depth(bus);
1028 if (ret > depth)
1029 depth = ret;
1030 }
1031
1032 return depth;
1033}
1034
1035
1036/*
1037 * first try will not touch pci bridge res
1038 * second and later try will clear small leaf bridge res
1039 * will stop till to the max deepth if can not find good one
1040 */
841void __init 1041void __init
842pci_assign_unassigned_resources(void) 1042pci_assign_unassigned_resources(void)
843{ 1043{
844 struct pci_bus *bus; 1044 struct pci_bus *bus;
1045 struct resource_list_x add_list; /* list of resources that
1046 want additional resources */
1047 int tried_times = 0;
1048 enum release_type rel_type = leaf_only;
1049 struct resource_list_x head, *list;
1050 unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
1051 IORESOURCE_PREFETCH;
1052 unsigned long failed_type;
1053 int max_depth = pci_get_max_depth();
1054 int pci_try_num;
1055
1056
1057 head.next = NULL;
1058 add_list.next = NULL;
845 1059
1060 pci_try_num = max_depth + 1;
1061 printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n",
1062 max_depth, pci_try_num);
1063
1064again:
846 /* Depth first, calculate sizes and alignments of all 1065 /* Depth first, calculate sizes and alignments of all
847 subordinate buses. */ 1066 subordinate buses. */
848 list_for_each_entry(bus, &pci_root_buses, node) { 1067 list_for_each_entry(bus, &pci_root_buses, node)
849 pci_bus_size_bridges(bus); 1068 __pci_bus_size_bridges(bus, &add_list);
850 } 1069
851 /* Depth last, allocate resources and update the hardware. */ 1070 /* Depth last, allocate resources and update the hardware. */
852 list_for_each_entry(bus, &pci_root_buses, node) { 1071 list_for_each_entry(bus, &pci_root_buses, node)
853 pci_bus_assign_resources(bus); 1072 __pci_bus_assign_resources(bus, &add_list, &head);
854 pci_enable_bridges(bus); 1073 BUG_ON(add_list.next);
1074 tried_times++;
1075
1076 /* any device complain? */
1077 if (!head.next)
1078 goto enable_and_dump;
1079
1080 /* don't realloc if asked to do so */
1081 if (!pci_realloc_enabled()) {
1082 free_list(resource_list_x, &head);
1083 goto enable_and_dump;
1084 }
1085
1086 failed_type = 0;
1087 for (list = head.next; list;) {
1088 failed_type |= list->flags;
1089 list = list->next;
1090 }
1091 /*
1092 * io port are tight, don't try extra
1093 * or if reach the limit, don't want to try more
1094 */
1095 failed_type &= type_mask;
1096 if ((failed_type == IORESOURCE_IO) || (tried_times >= pci_try_num)) {
1097 free_list(resource_list_x, &head);
1098 goto enable_and_dump;
855 } 1099 }
856 1100
1101 printk(KERN_DEBUG "PCI: No. %d try to assign unassigned res\n",
1102 tried_times + 1);
1103
1104 /* third times and later will not check if it is leaf */
1105 if ((tried_times + 1) > 2)
1106 rel_type = whole_subtree;
1107
1108 /*
1109 * Try to release leaf bridge's resources that doesn't fit resource of
1110 * child device under that bridge
1111 */
1112 for (list = head.next; list;) {
1113 bus = list->dev->bus;
1114 pci_bus_release_bridge_resources(bus, list->flags & type_mask,
1115 rel_type);
1116 list = list->next;
1117 }
1118 /* restore size and flags */
1119 for (list = head.next; list;) {
1120 struct resource *res = list->res;
1121
1122 res->start = list->start;
1123 res->end = list->end;
1124 res->flags = list->flags;
1125 if (list->dev->subordinate)
1126 res->flags = 0;
1127
1128 list = list->next;
1129 }
1130 free_list(resource_list_x, &head);
1131
1132 goto again;
1133
1134enable_and_dump:
1135 /* Depth last, update the hardware. */
1136 list_for_each_entry(bus, &pci_root_buses, node)
1137 pci_enable_bridges(bus);
1138
857 /* dump the resource on buses */ 1139 /* dump the resource on buses */
858 list_for_each_entry(bus, &pci_root_buses, node) { 1140 list_for_each_entry(bus, &pci_root_buses, node)
859 pci_bus_dump_resources(bus); 1141 pci_bus_dump_resources(bus);
860 }
861} 1142}
862 1143
863void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge) 1144void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)
@@ -882,7 +1163,7 @@ again:
882 1163
883 if (tried_times >= 2) { 1164 if (tried_times >= 2) {
884 /* still fail, don't need to try more */ 1165 /* still fail, don't need to try more */
885 free_failed_list(&head); 1166 free_list(resource_list_x, &head);
886 goto enable_all; 1167 goto enable_all;
887 } 1168 }
888 1169
@@ -913,7 +1194,7 @@ again:
913 1194
914 list = list->next; 1195 list = list->next;
915 } 1196 }
916 free_failed_list(&head); 1197 free_list(resource_list_x, &head);
917 1198
918 goto again; 1199 goto again;
919 1200
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 2aaa13150de3..bc0e6eea0fff 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -85,7 +85,7 @@ void pci_update_resource(struct pci_dev *dev, int resno)
85 } 85 }
86 } 86 }
87 res->flags &= ~IORESOURCE_UNSET; 87 res->flags &= ~IORESOURCE_UNSET;
88 dev_info(&dev->dev, "BAR %d: set to %pR (PCI address [%#llx-%#llx]\n", 88 dev_info(&dev->dev, "BAR %d: set to %pR (PCI address [%#llx-%#llx])\n",
89 resno, res, (unsigned long long)region.start, 89 resno, res, (unsigned long long)region.start,
90 (unsigned long long)region.end); 90 (unsigned long long)region.end);
91} 91}
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
new file mode 100644
index 000000000000..492b7d807fe8
--- /dev/null
+++ b/drivers/pci/xen-pcifront.c
@@ -0,0 +1,1159 @@
1/*
2 * Xen PCI Frontend.
3 *
4 * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
5 */
6#include <linux/module.h>
7#include <linux/init.h>
8#include <linux/mm.h>
9#include <xen/xenbus.h>
10#include <xen/events.h>
11#include <xen/grant_table.h>
12#include <xen/page.h>
13#include <linux/spinlock.h>
14#include <linux/pci.h>
15#include <linux/msi.h>
16#include <xen/interface/io/pciif.h>
17#include <asm/xen/pci.h>
18#include <linux/interrupt.h>
19#include <asm/atomic.h>
20#include <linux/workqueue.h>
21#include <linux/bitops.h>
22#include <linux/time.h>
23
24#define INVALID_GRANT_REF (0)
25#define INVALID_EVTCHN (-1)
26
27struct pci_bus_entry {
28 struct list_head list;
29 struct pci_bus *bus;
30};
31
32#define _PDEVB_op_active (0)
33#define PDEVB_op_active (1 << (_PDEVB_op_active))
34
35struct pcifront_device {
36 struct xenbus_device *xdev;
37 struct list_head root_buses;
38
39 int evtchn;
40 int gnt_ref;
41
42 int irq;
43
44 /* Lock this when doing any operations in sh_info */
45 spinlock_t sh_info_lock;
46 struct xen_pci_sharedinfo *sh_info;
47 struct work_struct op_work;
48 unsigned long flags;
49
50};
51
52struct pcifront_sd {
53 int domain;
54 struct pcifront_device *pdev;
55};
56
57static inline struct pcifront_device *
58pcifront_get_pdev(struct pcifront_sd *sd)
59{
60 return sd->pdev;
61}
62
63static inline void pcifront_init_sd(struct pcifront_sd *sd,
64 unsigned int domain, unsigned int bus,
65 struct pcifront_device *pdev)
66{
67 sd->domain = domain;
68 sd->pdev = pdev;
69}
70
71static DEFINE_SPINLOCK(pcifront_dev_lock);
72static struct pcifront_device *pcifront_dev;
73
74static int verbose_request;
75module_param(verbose_request, int, 0644);
76
77static int errno_to_pcibios_err(int errno)
78{
79 switch (errno) {
80 case XEN_PCI_ERR_success:
81 return PCIBIOS_SUCCESSFUL;
82
83 case XEN_PCI_ERR_dev_not_found:
84 return PCIBIOS_DEVICE_NOT_FOUND;
85
86 case XEN_PCI_ERR_invalid_offset:
87 case XEN_PCI_ERR_op_failed:
88 return PCIBIOS_BAD_REGISTER_NUMBER;
89
90 case XEN_PCI_ERR_not_implemented:
91 return PCIBIOS_FUNC_NOT_SUPPORTED;
92
93 case XEN_PCI_ERR_access_denied:
94 return PCIBIOS_SET_FAILED;
95 }
96 return errno;
97}
98
99static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev)
100{
101 if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
102 && !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) {
103 dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n");
104 schedule_work(&pdev->op_work);
105 }
106}
107
108static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
109{
110 int err = 0;
111 struct xen_pci_op *active_op = &pdev->sh_info->op;
112 unsigned long irq_flags;
113 evtchn_port_t port = pdev->evtchn;
114 unsigned irq = pdev->irq;
115 s64 ns, ns_timeout;
116 struct timeval tv;
117
118 spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
119
120 memcpy(active_op, op, sizeof(struct xen_pci_op));
121
122 /* Go */
123 wmb();
124 set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
125 notify_remote_via_evtchn(port);
126
127 /*
128 * We set a poll timeout of 3 seconds but give up on return after
129 * 2 seconds. It is better to time out too late rather than too early
130 * (in the latter case we end up continually re-executing poll() with a
131 * timeout in the past). 1s difference gives plenty of slack for error.
132 */
133 do_gettimeofday(&tv);
134 ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
135
136 xen_clear_irq_pending(irq);
137
138 while (test_bit(_XEN_PCIF_active,
139 (unsigned long *)&pdev->sh_info->flags)) {
140 xen_poll_irq_timeout(irq, jiffies + 3*HZ);
141 xen_clear_irq_pending(irq);
142 do_gettimeofday(&tv);
143 ns = timeval_to_ns(&tv);
144 if (ns > ns_timeout) {
145 dev_err(&pdev->xdev->dev,
146 "pciback not responding!!!\n");
147 clear_bit(_XEN_PCIF_active,
148 (unsigned long *)&pdev->sh_info->flags);
149 err = XEN_PCI_ERR_dev_not_found;
150 goto out;
151 }
152 }
153
154 /*
155 * We might lose backend service request since we
156 * reuse same evtchn with pci_conf backend response. So re-schedule
157 * aer pcifront service.
158 */
159 if (test_bit(_XEN_PCIB_active,
160 (unsigned long *)&pdev->sh_info->flags)) {
161 dev_err(&pdev->xdev->dev,
162 "schedule aer pcifront service\n");
163 schedule_pcifront_aer_op(pdev);
164 }
165
166 memcpy(op, active_op, sizeof(struct xen_pci_op));
167
168 err = op->err;
169out:
170 spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags);
171 return err;
172}
173
174/* Access to this function is spinlocked in drivers/pci/access.c */
175static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
176 int where, int size, u32 *val)
177{
178 int err = 0;
179 struct xen_pci_op op = {
180 .cmd = XEN_PCI_OP_conf_read,
181 .domain = pci_domain_nr(bus),
182 .bus = bus->number,
183 .devfn = devfn,
184 .offset = where,
185 .size = size,
186 };
187 struct pcifront_sd *sd = bus->sysdata;
188 struct pcifront_device *pdev = pcifront_get_pdev(sd);
189
190 if (verbose_request)
191 dev_info(&pdev->xdev->dev,
192 "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n",
193 pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
194 PCI_FUNC(devfn), where, size);
195
196 err = do_pci_op(pdev, &op);
197
198 if (likely(!err)) {
199 if (verbose_request)
200 dev_info(&pdev->xdev->dev, "read got back value %x\n",
201 op.value);
202
203 *val = op.value;
204 } else if (err == -ENODEV) {
205 /* No device here, pretend that it just returned 0 */
206 err = 0;
207 *val = 0;
208 }
209
210 return errno_to_pcibios_err(err);
211}
212
213/* Access to this function is spinlocked in drivers/pci/access.c */
214static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
215 int where, int size, u32 val)
216{
217 struct xen_pci_op op = {
218 .cmd = XEN_PCI_OP_conf_write,
219 .domain = pci_domain_nr(bus),
220 .bus = bus->number,
221 .devfn = devfn,
222 .offset = where,
223 .size = size,
224 .value = val,
225 };
226 struct pcifront_sd *sd = bus->sysdata;
227 struct pcifront_device *pdev = pcifront_get_pdev(sd);
228
229 if (verbose_request)
230 dev_info(&pdev->xdev->dev,
231 "write dev=%04x:%02x:%02x.%01x - "
232 "offset %x size %d val %x\n",
233 pci_domain_nr(bus), bus->number,
234 PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
235
236 return errno_to_pcibios_err(do_pci_op(pdev, &op));
237}
238
239struct pci_ops pcifront_bus_ops = {
240 .read = pcifront_bus_read,
241 .write = pcifront_bus_write,
242};
243
244#ifdef CONFIG_PCI_MSI
245static int pci_frontend_enable_msix(struct pci_dev *dev,
246 int vector[], int nvec)
247{
248 int err;
249 int i;
250 struct xen_pci_op op = {
251 .cmd = XEN_PCI_OP_enable_msix,
252 .domain = pci_domain_nr(dev->bus),
253 .bus = dev->bus->number,
254 .devfn = dev->devfn,
255 .value = nvec,
256 };
257 struct pcifront_sd *sd = dev->bus->sysdata;
258 struct pcifront_device *pdev = pcifront_get_pdev(sd);
259 struct msi_desc *entry;
260
261 if (nvec > SH_INFO_MAX_VEC) {
262 dev_err(&dev->dev, "too much vector for pci frontend: %x."
263 " Increase SH_INFO_MAX_VEC.\n", nvec);
264 return -EINVAL;
265 }
266
267 i = 0;
268 list_for_each_entry(entry, &dev->msi_list, list) {
269 op.msix_entries[i].entry = entry->msi_attrib.entry_nr;
270 /* Vector is useless at this point. */
271 op.msix_entries[i].vector = -1;
272 i++;
273 }
274
275 err = do_pci_op(pdev, &op);
276
277 if (likely(!err)) {
278 if (likely(!op.value)) {
279 /* we get the result */
280 for (i = 0; i < nvec; i++) {
281 if (op.msix_entries[i].vector <= 0) {
282 dev_warn(&dev->dev, "MSI-X entry %d is invalid: %d!\n",
283 i, op.msix_entries[i].vector);
284 err = -EINVAL;
285 vector[i] = -1;
286 continue;
287 }
288 vector[i] = op.msix_entries[i].vector;
289 }
290 } else {
291 printk(KERN_DEBUG "enable msix get value %x\n",
292 op.value);
293 }
294 } else {
295 dev_err(&dev->dev, "enable msix get err %x\n", err);
296 }
297 return err;
298}
299
300static void pci_frontend_disable_msix(struct pci_dev *dev)
301{
302 int err;
303 struct xen_pci_op op = {
304 .cmd = XEN_PCI_OP_disable_msix,
305 .domain = pci_domain_nr(dev->bus),
306 .bus = dev->bus->number,
307 .devfn = dev->devfn,
308 };
309 struct pcifront_sd *sd = dev->bus->sysdata;
310 struct pcifront_device *pdev = pcifront_get_pdev(sd);
311
312 err = do_pci_op(pdev, &op);
313
314 /* What should do for error ? */
315 if (err)
316 dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
317}
318
319static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[])
320{
321 int err;
322 struct xen_pci_op op = {
323 .cmd = XEN_PCI_OP_enable_msi,
324 .domain = pci_domain_nr(dev->bus),
325 .bus = dev->bus->number,
326 .devfn = dev->devfn,
327 };
328 struct pcifront_sd *sd = dev->bus->sysdata;
329 struct pcifront_device *pdev = pcifront_get_pdev(sd);
330
331 err = do_pci_op(pdev, &op);
332 if (likely(!err)) {
333 vector[0] = op.value;
334 if (op.value <= 0) {
335 dev_warn(&dev->dev, "MSI entry is invalid: %d!\n",
336 op.value);
337 err = -EINVAL;
338 vector[0] = -1;
339 }
340 } else {
341 dev_err(&dev->dev, "pci frontend enable msi failed for dev "
342 "%x:%x\n", op.bus, op.devfn);
343 err = -EINVAL;
344 }
345 return err;
346}
347
348static void pci_frontend_disable_msi(struct pci_dev *dev)
349{
350 int err;
351 struct xen_pci_op op = {
352 .cmd = XEN_PCI_OP_disable_msi,
353 .domain = pci_domain_nr(dev->bus),
354 .bus = dev->bus->number,
355 .devfn = dev->devfn,
356 };
357 struct pcifront_sd *sd = dev->bus->sysdata;
358 struct pcifront_device *pdev = pcifront_get_pdev(sd);
359
360 err = do_pci_op(pdev, &op);
361 if (err == XEN_PCI_ERR_dev_not_found) {
362 /* XXX No response from backend, what shall we do? */
363 printk(KERN_DEBUG "get no response from backend for disable MSI\n");
364 return;
365 }
366 if (err)
367 /* how can pciback notify us fail? */
368 printk(KERN_DEBUG "get fake response frombackend\n");
369}
370
371static struct xen_pci_frontend_ops pci_frontend_ops = {
372 .enable_msi = pci_frontend_enable_msi,
373 .disable_msi = pci_frontend_disable_msi,
374 .enable_msix = pci_frontend_enable_msix,
375 .disable_msix = pci_frontend_disable_msix,
376};
377
378static void pci_frontend_registrar(int enable)
379{
380 if (enable)
381 xen_pci_frontend = &pci_frontend_ops;
382 else
383 xen_pci_frontend = NULL;
384};
385#else
386static inline void pci_frontend_registrar(int enable) { };
387#endif /* CONFIG_PCI_MSI */
388
389/* Claim resources for the PCI frontend as-is, backend won't allow changes */
390static int pcifront_claim_resource(struct pci_dev *dev, void *data)
391{
392 struct pcifront_device *pdev = data;
393 int i;
394 struct resource *r;
395
396 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
397 r = &dev->resource[i];
398
399 if (!r->parent && r->start && r->flags) {
400 dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
401 pci_name(dev), i);
402 if (pci_claim_resource(dev, i)) {
403 dev_err(&pdev->xdev->dev, "Could not claim "
404 "resource %s/%d! Device offline. Try "
405 "giving less than 4GB to domain.\n",
406 pci_name(dev), i);
407 }
408 }
409 }
410
411 return 0;
412}
413
414static int __devinit pcifront_scan_bus(struct pcifront_device *pdev,
415 unsigned int domain, unsigned int bus,
416 struct pci_bus *b)
417{
418 struct pci_dev *d;
419 unsigned int devfn;
420
421 /* Scan the bus for functions and add.
422 * We omit handling of PCI bridge attachment because pciback prevents
423 * bridges from being exported.
424 */
425 for (devfn = 0; devfn < 0x100; devfn++) {
426 d = pci_get_slot(b, devfn);
427 if (d) {
428 /* Device is already known. */
429 pci_dev_put(d);
430 continue;
431 }
432
433 d = pci_scan_single_device(b, devfn);
434 if (d)
435 dev_info(&pdev->xdev->dev, "New device on "
436 "%04x:%02x:%02x.%02x found.\n", domain, bus,
437 PCI_SLOT(devfn), PCI_FUNC(devfn));
438 }
439
440 return 0;
441}
442
443static int __devinit pcifront_scan_root(struct pcifront_device *pdev,
444 unsigned int domain, unsigned int bus)
445{
446 struct pci_bus *b;
447 struct pcifront_sd *sd = NULL;
448 struct pci_bus_entry *bus_entry = NULL;
449 int err = 0;
450
451#ifndef CONFIG_PCI_DOMAINS
452 if (domain != 0) {
453 dev_err(&pdev->xdev->dev,
454 "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
455 dev_err(&pdev->xdev->dev,
456 "Please compile with CONFIG_PCI_DOMAINS\n");
457 err = -EINVAL;
458 goto err_out;
459 }
460#endif
461
462 dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
463 domain, bus);
464
465 bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
466 sd = kmalloc(sizeof(*sd), GFP_KERNEL);
467 if (!bus_entry || !sd) {
468 err = -ENOMEM;
469 goto err_out;
470 }
471 pcifront_init_sd(sd, domain, bus, pdev);
472
473 b = pci_scan_bus_parented(&pdev->xdev->dev, bus,
474 &pcifront_bus_ops, sd);
475 if (!b) {
476 dev_err(&pdev->xdev->dev,
477 "Error creating PCI Frontend Bus!\n");
478 err = -ENOMEM;
479 goto err_out;
480 }
481
482 bus_entry->bus = b;
483
484 list_add(&bus_entry->list, &pdev->root_buses);
485
486 /* pci_scan_bus_parented skips devices which do not have a have
487 * devfn==0. The pcifront_scan_bus enumerates all devfn. */
488 err = pcifront_scan_bus(pdev, domain, bus, b);
489
490 /* Claim resources before going "live" with our devices */
491 pci_walk_bus(b, pcifront_claim_resource, pdev);
492
493 /* Create SysFS and notify udev of the devices. Aka: "going live" */
494 pci_bus_add_devices(b);
495
496 return err;
497
498err_out:
499 kfree(bus_entry);
500 kfree(sd);
501
502 return err;
503}
504
505static int __devinit pcifront_rescan_root(struct pcifront_device *pdev,
506 unsigned int domain, unsigned int bus)
507{
508 int err;
509 struct pci_bus *b;
510
511#ifndef CONFIG_PCI_DOMAINS
512 if (domain != 0) {
513 dev_err(&pdev->xdev->dev,
514 "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
515 dev_err(&pdev->xdev->dev,
516 "Please compile with CONFIG_PCI_DOMAINS\n");
517 return -EINVAL;
518 }
519#endif
520
521 dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
522 domain, bus);
523
524 b = pci_find_bus(domain, bus);
525 if (!b)
526 /* If the bus is unknown, create it. */
527 return pcifront_scan_root(pdev, domain, bus);
528
529 err = pcifront_scan_bus(pdev, domain, bus, b);
530
531 /* Claim resources before going "live" with our devices */
532 pci_walk_bus(b, pcifront_claim_resource, pdev);
533
534 /* Create SysFS and notify udev of the devices. Aka: "going live" */
535 pci_bus_add_devices(b);
536
537 return err;
538}
539
540static void free_root_bus_devs(struct pci_bus *bus)
541{
542 struct pci_dev *dev;
543
544 while (!list_empty(&bus->devices)) {
545 dev = container_of(bus->devices.next, struct pci_dev,
546 bus_list);
547 dev_dbg(&dev->dev, "removing device\n");
548 pci_remove_bus_device(dev);
549 }
550}
551
552static void pcifront_free_roots(struct pcifront_device *pdev)
553{
554 struct pci_bus_entry *bus_entry, *t;
555
556 dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n");
557
558 list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) {
559 list_del(&bus_entry->list);
560
561 free_root_bus_devs(bus_entry->bus);
562
563 kfree(bus_entry->bus->sysdata);
564
565 device_unregister(bus_entry->bus->bridge);
566 pci_remove_bus(bus_entry->bus);
567
568 kfree(bus_entry);
569 }
570}
571
572static pci_ers_result_t pcifront_common_process(int cmd,
573 struct pcifront_device *pdev,
574 pci_channel_state_t state)
575{
576 pci_ers_result_t result;
577 struct pci_driver *pdrv;
578 int bus = pdev->sh_info->aer_op.bus;
579 int devfn = pdev->sh_info->aer_op.devfn;
580 struct pci_dev *pcidev;
581 int flag = 0;
582
583 dev_dbg(&pdev->xdev->dev,
584 "pcifront AER process: cmd %x (bus:%x, devfn%x)",
585 cmd, bus, devfn);
586 result = PCI_ERS_RESULT_NONE;
587
588 pcidev = pci_get_bus_and_slot(bus, devfn);
589 if (!pcidev || !pcidev->driver) {
590 dev_err(&pdev->xdev->dev, "device or AER driver is NULL\n");
591 if (pcidev)
592 pci_dev_put(pcidev);
593 return result;
594 }
595 pdrv = pcidev->driver;
596
597 if (get_driver(&pdrv->driver)) {
598 if (pdrv->err_handler && pdrv->err_handler->error_detected) {
599 dev_dbg(&pcidev->dev,
600 "trying to call AER service\n");
601 if (pcidev) {
602 flag = 1;
603 switch (cmd) {
604 case XEN_PCI_OP_aer_detected:
605 result = pdrv->err_handler->
606 error_detected(pcidev, state);
607 break;
608 case XEN_PCI_OP_aer_mmio:
609 result = pdrv->err_handler->
610 mmio_enabled(pcidev);
611 break;
612 case XEN_PCI_OP_aer_slotreset:
613 result = pdrv->err_handler->
614 slot_reset(pcidev);
615 break;
616 case XEN_PCI_OP_aer_resume:
617 pdrv->err_handler->resume(pcidev);
618 break;
619 default:
620 dev_err(&pdev->xdev->dev,
621 "bad request in aer recovery "
622 "operation!\n");
623
624 }
625 }
626 }
627 put_driver(&pdrv->driver);
628 }
629 if (!flag)
630 result = PCI_ERS_RESULT_NONE;
631
632 return result;
633}
634
635
636static void pcifront_do_aer(struct work_struct *data)
637{
638 struct pcifront_device *pdev =
639 container_of(data, struct pcifront_device, op_work);
640 int cmd = pdev->sh_info->aer_op.cmd;
641 pci_channel_state_t state =
642 (pci_channel_state_t)pdev->sh_info->aer_op.err;
643
644 /*If a pci_conf op is in progress,
645 we have to wait until it is done before service aer op*/
646 dev_dbg(&pdev->xdev->dev,
647 "pcifront service aer bus %x devfn %x\n",
648 pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn);
649
650 pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state);
651
652 /* Post the operation to the guest. */
653 wmb();
654 clear_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags);
655 notify_remote_via_evtchn(pdev->evtchn);
656
657 /*in case of we lost an aer request in four lines time_window*/
658 smp_mb__before_clear_bit();
659 clear_bit(_PDEVB_op_active, &pdev->flags);
660 smp_mb__after_clear_bit();
661
662 schedule_pcifront_aer_op(pdev);
663
664}
665
666static irqreturn_t pcifront_handler_aer(int irq, void *dev)
667{
668 struct pcifront_device *pdev = dev;
669 schedule_pcifront_aer_op(pdev);
670 return IRQ_HANDLED;
671}
672static int pcifront_connect(struct pcifront_device *pdev)
673{
674 int err = 0;
675
676 spin_lock(&pcifront_dev_lock);
677
678 if (!pcifront_dev) {
679 dev_info(&pdev->xdev->dev, "Installing PCI frontend\n");
680 pcifront_dev = pdev;
681 } else {
682 dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n");
683 err = -EEXIST;
684 }
685
686 spin_unlock(&pcifront_dev_lock);
687
688 return err;
689}
690
691static void pcifront_disconnect(struct pcifront_device *pdev)
692{
693 spin_lock(&pcifront_dev_lock);
694
695 if (pdev == pcifront_dev) {
696 dev_info(&pdev->xdev->dev,
697 "Disconnecting PCI Frontend Buses\n");
698 pcifront_dev = NULL;
699 }
700
701 spin_unlock(&pcifront_dev_lock);
702}
703static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
704{
705 struct pcifront_device *pdev;
706
707 pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL);
708 if (pdev == NULL)
709 goto out;
710
711 pdev->sh_info =
712 (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL);
713 if (pdev->sh_info == NULL) {
714 kfree(pdev);
715 pdev = NULL;
716 goto out;
717 }
718 pdev->sh_info->flags = 0;
719
720 /*Flag for registering PV AER handler*/
721 set_bit(_XEN_PCIB_AERHANDLER, (void *)&pdev->sh_info->flags);
722
723 dev_set_drvdata(&xdev->dev, pdev);
724 pdev->xdev = xdev;
725
726 INIT_LIST_HEAD(&pdev->root_buses);
727
728 spin_lock_init(&pdev->sh_info_lock);
729
730 pdev->evtchn = INVALID_EVTCHN;
731 pdev->gnt_ref = INVALID_GRANT_REF;
732 pdev->irq = -1;
733
734 INIT_WORK(&pdev->op_work, pcifront_do_aer);
735
736 dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n",
737 pdev, pdev->sh_info);
738out:
739 return pdev;
740}
741
742static void free_pdev(struct pcifront_device *pdev)
743{
744 dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev);
745
746 pcifront_free_roots(pdev);
747
748 cancel_work_sync(&pdev->op_work);
749
750 if (pdev->irq >= 0)
751 unbind_from_irqhandler(pdev->irq, pdev);
752
753 if (pdev->evtchn != INVALID_EVTCHN)
754 xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
755
756 if (pdev->gnt_ref != INVALID_GRANT_REF)
757 gnttab_end_foreign_access(pdev->gnt_ref, 0 /* r/w page */,
758 (unsigned long)pdev->sh_info);
759 else
760 free_page((unsigned long)pdev->sh_info);
761
762 dev_set_drvdata(&pdev->xdev->dev, NULL);
763
764 kfree(pdev);
765}
766
767static int pcifront_publish_info(struct pcifront_device *pdev)
768{
769 int err = 0;
770 struct xenbus_transaction trans;
771
772 err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
773 if (err < 0)
774 goto out;
775
776 pdev->gnt_ref = err;
777
778 err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
779 if (err)
780 goto out;
781
782 err = bind_evtchn_to_irqhandler(pdev->evtchn, pcifront_handler_aer,
783 0, "pcifront", pdev);
784
785 if (err < 0)
786 return err;
787
788 pdev->irq = err;
789
790do_publish:
791 err = xenbus_transaction_start(&trans);
792 if (err) {
793 xenbus_dev_fatal(pdev->xdev, err,
794 "Error writing configuration for backend "
795 "(start transaction)");
796 goto out;
797 }
798
799 err = xenbus_printf(trans, pdev->xdev->nodename,
800 "pci-op-ref", "%u", pdev->gnt_ref);
801 if (!err)
802 err = xenbus_printf(trans, pdev->xdev->nodename,
803 "event-channel", "%u", pdev->evtchn);
804 if (!err)
805 err = xenbus_printf(trans, pdev->xdev->nodename,
806 "magic", XEN_PCI_MAGIC);
807
808 if (err) {
809 xenbus_transaction_end(trans, 1);
810 xenbus_dev_fatal(pdev->xdev, err,
811 "Error writing configuration for backend");
812 goto out;
813 } else {
814 err = xenbus_transaction_end(trans, 0);
815 if (err == -EAGAIN)
816 goto do_publish;
817 else if (err) {
818 xenbus_dev_fatal(pdev->xdev, err,
819 "Error completing transaction "
820 "for backend");
821 goto out;
822 }
823 }
824
825 xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
826
827 dev_dbg(&pdev->xdev->dev, "publishing successful!\n");
828
829out:
830 return err;
831}
832
833static int __devinit pcifront_try_connect(struct pcifront_device *pdev)
834{
835 int err = -EFAULT;
836 int i, num_roots, len;
837 char str[64];
838 unsigned int domain, bus;
839
840
841 /* Only connect once */
842 if (xenbus_read_driver_state(pdev->xdev->nodename) !=
843 XenbusStateInitialised)
844 goto out;
845
846 err = pcifront_connect(pdev);
847 if (err) {
848 xenbus_dev_fatal(pdev->xdev, err,
849 "Error connecting PCI Frontend");
850 goto out;
851 }
852
853 err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
854 "root_num", "%d", &num_roots);
855 if (err == -ENOENT) {
856 xenbus_dev_error(pdev->xdev, err,
857 "No PCI Roots found, trying 0000:00");
858 err = pcifront_scan_root(pdev, 0, 0);
859 num_roots = 0;
860 } else if (err != 1) {
861 if (err == 0)
862 err = -EINVAL;
863 xenbus_dev_fatal(pdev->xdev, err,
864 "Error reading number of PCI roots");
865 goto out;
866 }
867
868 for (i = 0; i < num_roots; i++) {
869 len = snprintf(str, sizeof(str), "root-%d", i);
870 if (unlikely(len >= (sizeof(str) - 1))) {
871 err = -ENOMEM;
872 goto out;
873 }
874
875 err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
876 "%x:%x", &domain, &bus);
877 if (err != 2) {
878 if (err >= 0)
879 err = -EINVAL;
880 xenbus_dev_fatal(pdev->xdev, err,
881 "Error reading PCI root %d", i);
882 goto out;
883 }
884
885 err = pcifront_scan_root(pdev, domain, bus);
886 if (err) {
887 xenbus_dev_fatal(pdev->xdev, err,
888 "Error scanning PCI root %04x:%02x",
889 domain, bus);
890 goto out;
891 }
892 }
893
894 err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
895
896out:
897 return err;
898}
899
900static int pcifront_try_disconnect(struct pcifront_device *pdev)
901{
902 int err = 0;
903 enum xenbus_state prev_state;
904
905
906 prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
907
908 if (prev_state >= XenbusStateClosing)
909 goto out;
910
911 if (prev_state == XenbusStateConnected) {
912 pcifront_free_roots(pdev);
913 pcifront_disconnect(pdev);
914 }
915
916 err = xenbus_switch_state(pdev->xdev, XenbusStateClosed);
917
918out:
919
920 return err;
921}
922
923static int __devinit pcifront_attach_devices(struct pcifront_device *pdev)
924{
925 int err = -EFAULT;
926 int i, num_roots, len;
927 unsigned int domain, bus;
928 char str[64];
929
930 if (xenbus_read_driver_state(pdev->xdev->nodename) !=
931 XenbusStateReconfiguring)
932 goto out;
933
934 err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
935 "root_num", "%d", &num_roots);
936 if (err == -ENOENT) {
937 xenbus_dev_error(pdev->xdev, err,
938 "No PCI Roots found, trying 0000:00");
939 err = pcifront_rescan_root(pdev, 0, 0);
940 num_roots = 0;
941 } else if (err != 1) {
942 if (err == 0)
943 err = -EINVAL;
944 xenbus_dev_fatal(pdev->xdev, err,
945 "Error reading number of PCI roots");
946 goto out;
947 }
948
949 for (i = 0; i < num_roots; i++) {
950 len = snprintf(str, sizeof(str), "root-%d", i);
951 if (unlikely(len >= (sizeof(str) - 1))) {
952 err = -ENOMEM;
953 goto out;
954 }
955
956 err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
957 "%x:%x", &domain, &bus);
958 if (err != 2) {
959 if (err >= 0)
960 err = -EINVAL;
961 xenbus_dev_fatal(pdev->xdev, err,
962 "Error reading PCI root %d", i);
963 goto out;
964 }
965
966 err = pcifront_rescan_root(pdev, domain, bus);
967 if (err) {
968 xenbus_dev_fatal(pdev->xdev, err,
969 "Error scanning PCI root %04x:%02x",
970 domain, bus);
971 goto out;
972 }
973 }
974
975 xenbus_switch_state(pdev->xdev, XenbusStateConnected);
976
977out:
978 return err;
979}
980
981static int pcifront_detach_devices(struct pcifront_device *pdev)
982{
983 int err = 0;
984 int i, num_devs;
985 unsigned int domain, bus, slot, func;
986 struct pci_bus *pci_bus;
987 struct pci_dev *pci_dev;
988 char str[64];
989
990 if (xenbus_read_driver_state(pdev->xdev->nodename) !=
991 XenbusStateConnected)
992 goto out;
993
994 err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d",
995 &num_devs);
996 if (err != 1) {
997 if (err >= 0)
998 err = -EINVAL;
999 xenbus_dev_fatal(pdev->xdev, err,
1000 "Error reading number of PCI devices");
1001 goto out;
1002 }
1003
1004 /* Find devices being detached and remove them. */
1005 for (i = 0; i < num_devs; i++) {
1006 int l, state;
1007 l = snprintf(str, sizeof(str), "state-%d", i);
1008 if (unlikely(l >= (sizeof(str) - 1))) {
1009 err = -ENOMEM;
1010 goto out;
1011 }
1012 err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d",
1013 &state);
1014 if (err != 1)
1015 state = XenbusStateUnknown;
1016
1017 if (state != XenbusStateClosing)
1018 continue;
1019
1020 /* Remove device. */
1021 l = snprintf(str, sizeof(str), "vdev-%d", i);
1022 if (unlikely(l >= (sizeof(str) - 1))) {
1023 err = -ENOMEM;
1024 goto out;
1025 }
1026 err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
1027 "%x:%x:%x.%x", &domain, &bus, &slot, &func);
1028 if (err != 4) {
1029 if (err >= 0)
1030 err = -EINVAL;
1031 xenbus_dev_fatal(pdev->xdev, err,
1032 "Error reading PCI device %d", i);
1033 goto out;
1034 }
1035
1036 pci_bus = pci_find_bus(domain, bus);
1037 if (!pci_bus) {
1038 dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n",
1039 domain, bus);
1040 continue;
1041 }
1042 pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func));
1043 if (!pci_dev) {
1044 dev_dbg(&pdev->xdev->dev,
1045 "Cannot get PCI device %04x:%02x:%02x.%02x\n",
1046 domain, bus, slot, func);
1047 continue;
1048 }
1049 pci_remove_bus_device(pci_dev);
1050 pci_dev_put(pci_dev);
1051
1052 dev_dbg(&pdev->xdev->dev,
1053 "PCI device %04x:%02x:%02x.%02x removed.\n",
1054 domain, bus, slot, func);
1055 }
1056
1057 err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
1058
1059out:
1060 return err;
1061}
1062
1063static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev,
1064 enum xenbus_state be_state)
1065{
1066 struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
1067
1068 switch (be_state) {
1069 case XenbusStateUnknown:
1070 case XenbusStateInitialising:
1071 case XenbusStateInitWait:
1072 case XenbusStateInitialised:
1073 case XenbusStateClosed:
1074 break;
1075
1076 case XenbusStateConnected:
1077 pcifront_try_connect(pdev);
1078 break;
1079
1080 case XenbusStateClosing:
1081 dev_warn(&xdev->dev, "backend going away!\n");
1082 pcifront_try_disconnect(pdev);
1083 break;
1084
1085 case XenbusStateReconfiguring:
1086 pcifront_detach_devices(pdev);
1087 break;
1088
1089 case XenbusStateReconfigured:
1090 pcifront_attach_devices(pdev);
1091 break;
1092 }
1093}
1094
1095static int pcifront_xenbus_probe(struct xenbus_device *xdev,
1096 const struct xenbus_device_id *id)
1097{
1098 int err = 0;
1099 struct pcifront_device *pdev = alloc_pdev(xdev);
1100
1101 if (pdev == NULL) {
1102 err = -ENOMEM;
1103 xenbus_dev_fatal(xdev, err,
1104 "Error allocating pcifront_device struct");
1105 goto out;
1106 }
1107
1108 err = pcifront_publish_info(pdev);
1109 if (err)
1110 free_pdev(pdev);
1111
1112out:
1113 return err;
1114}
1115
1116static int pcifront_xenbus_remove(struct xenbus_device *xdev)
1117{
1118 struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
1119 if (pdev)
1120 free_pdev(pdev);
1121
1122 return 0;
1123}
1124
1125static const struct xenbus_device_id xenpci_ids[] = {
1126 {"pci"},
1127 {""},
1128};
1129
1130static struct xenbus_driver xenbus_pcifront_driver = {
1131 .name = "pcifront",
1132 .owner = THIS_MODULE,
1133 .ids = xenpci_ids,
1134 .probe = pcifront_xenbus_probe,
1135 .remove = pcifront_xenbus_remove,
1136 .otherend_changed = pcifront_backend_changed,
1137};
1138
1139static int __init pcifront_init(void)
1140{
1141 if (!xen_pv_domain() || xen_initial_domain())
1142 return -ENODEV;
1143
1144 pci_frontend_registrar(1 /* enable */);
1145
1146 return xenbus_register_frontend(&xenbus_pcifront_driver);
1147}
1148
1149static void __exit pcifront_cleanup(void)
1150{
1151 xenbus_unregister_driver(&xenbus_pcifront_driver);
1152 pci_frontend_registrar(0 /* disable */);
1153}
1154module_init(pcifront_init);
1155module_exit(pcifront_cleanup);
1156
1157MODULE_DESCRIPTION("Xen PCI passthrough frontend.");
1158MODULE_LICENSE("GPL");
1159MODULE_ALIAS("xen:pci");