aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/PCI/MSI-HOWTO.txt89
-rw-r--r--Documentation/block/cfq-iosched.txt71
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--Documentation/virtual/00-INDEX3
-rw-r--r--Documentation/virtual/lguest/lguest.c3
-rw-r--r--Documentation/virtual/virtio-spec.txt2200
-rw-r--r--MAINTAINERS2
-rw-r--r--Makefile4
-rw-r--r--arch/ia64/Kconfig1
-rw-r--r--arch/ia64/configs/generic_defconfig1
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/include/asm/spinlock_32.h11
-rw-r--r--arch/sparc/include/asm/spinlock_64.h6
-rw-r--r--arch/sparc/kernel/pcic.c4
-rw-r--r--arch/x86/include/asm/xen/page.h4
-rw-r--r--arch/x86/kvm/Kconfig3
-rw-r--r--arch/x86/mm/fault.c1
-rw-r--r--arch/x86/pci/acpi.c9
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/enlighten.c4
-rw-r--r--arch/x86/xen/mmu.c12
-rw-r--r--arch/x86/xen/smp.c4
-rw-r--r--block/Kconfig10
-rw-r--r--block/Makefile1
-rw-r--r--block/blk-core.c8
-rw-r--r--block/blk-flush.c25
-rw-r--r--block/blk-softirq.c8
-rw-r--r--block/blk-throttle.c4
-rw-r--r--block/blk.h2
-rw-r--r--block/bsg-lib.c298
-rw-r--r--block/cfq-iosched.c21
-rw-r--r--block/genhd.c8
-rw-r--r--drivers/ata/Kconfig9
-rw-r--r--drivers/ata/Makefile1
-rw-r--r--drivers/ata/pata_imx.c253
-rw-r--r--drivers/ata/pata_via.c18
-rw-r--r--drivers/ata/sata_dwc_460ex.c14
-rw-r--r--drivers/ata/sata_sil.c2
-rw-r--r--drivers/base/power/domain.c30
-rw-r--r--drivers/base/regmap/regmap-i2c.c1
-rw-r--r--drivers/base/regmap/regmap-spi.c3
-rw-r--r--drivers/base/regmap/regmap.c2
-rw-r--r--drivers/block/Kconfig17
-rw-r--r--drivers/block/drbd/drbd_nl.c4
-rw-r--r--drivers/block/loop.c297
-rw-r--r--drivers/block/swim3.c1
-rw-r--r--drivers/block/xen-blkfront.c6
-rw-r--r--drivers/cdrom/cdrom.c8
-rw-r--r--drivers/edac/i7core_edac.c2
-rw-r--r--drivers/firewire/core-cdev.c24
-rw-r--r--drivers/firewire/core-device.c15
-rw-r--r--drivers/firewire/ohci.c9
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h4
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c6
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h17
-rw-r--r--drivers/gpu/drm/i915/i915_suspend.c3
-rw-r--r--drivers/gpu/drm/i915/intel_display.c187
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c7
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h3
-rw-r--r--drivers/gpu/drm/i915/intel_lvds.c82
-rw-r--r--drivers/gpu/drm/i915/intel_opregion.c1
-rw-r--r--drivers/gpu/drm/i915/intel_panel.c72
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c4
-rw-r--r--drivers/gpu/drm/radeon/atombios_dp.c12
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_connectors.c29
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_encoders.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_mode.h1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c8
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c10
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h2
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c2
-rw-r--r--drivers/pci/hotplug/pcihp_slot.c45
-rw-r--r--drivers/pci/of.c2
-rw-r--r--drivers/pci/pci.c67
-rw-r--r--drivers/pci/pci.h4
-rw-r--r--drivers/pci/probe.c146
-rw-r--r--drivers/pci/setup-bus.c166
-rw-r--r--drivers/pci/setup-res.c152
-rw-r--r--drivers/rtc/interface.c4
-rw-r--r--drivers/staging/gma500/mdfld_dsi_dbi.c3
-rw-r--r--drivers/staging/gma500/mdfld_dsi_dbi.h3
-rw-r--r--drivers/staging/gma500/mdfld_dsi_dpi.c7
-rw-r--r--drivers/staging/gma500/mdfld_dsi_output.c4
-rw-r--r--drivers/staging/gma500/medfield.h2
-rw-r--r--drivers/staging/gma500/psb_drv.h1
-rw-r--r--drivers/xen/xen-selfballoon.c1
-rw-r--r--fs/befs/linuxvfs.c23
-rw-r--r--fs/btrfs/ctree.h10
-rw-r--r--fs/btrfs/extent-tree.c77
-rw-r--r--fs/btrfs/file.c28
-rw-r--r--fs/btrfs/free-space-cache.c16
-rw-r--r--fs/btrfs/inode.c12
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/tree-log.c28
-rw-r--r--fs/btrfs/volumes.c51
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/cifs/cifs_debug.c2
-rw-r--r--fs/cifs/cifsacl.c28
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h56
-rw-r--r--fs/cifs/connect.c3
-rw-r--r--fs/cifs/dir.c4
-rw-r--r--fs/cifs/transport.c51
-rw-r--r--fs/ext4/ext4_jbd2.h4
-rw-r--r--fs/ext4/indirect.c9
-rw-r--r--fs/ext4/inode.c26
-rw-r--r--fs/ext4/page-io.c6
-rw-r--r--fs/ext4/super.c1
-rw-r--r--fs/fat/dir.c2
-rw-r--r--fs/fat/inode.c7
-rw-r--r--fs/jfs/jfs_umount.c4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c1
-rw-r--r--fs/nfs/callback.h2
-rw-r--r--fs/nfs/callback_proc.c25
-rw-r--r--fs/nfs/callback_xdr.c24
-rw-r--r--fs/nfs/objlayout/objio_osd.c28
-rw-r--r--fs/nfs/objlayout/pnfs_osd_xdr_cli.c3
-rw-r--r--include/asm-generic/memory_model.h4
-rw-r--r--include/linux/blk_types.h7
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/blktrace_api.h5
-rw-r--r--include/linux/bsg-lib.h73
-rw-r--r--include/linux/hash.h2
-rw-r--r--include/linux/irq.h11
-rw-r--r--include/linux/irqdesc.h1
-rw-r--r--include/linux/loop.h5
-rw-r--r--include/linux/miscdevice.h1
-rw-r--r--include/linux/mm.h6
-rw-r--r--include/linux/pci.h16
-rw-r--r--include/linux/pm_domain.h10
-rw-r--r--include/linux/rtc.h3
-rw-r--r--include/trace/events/block.h20
-rw-r--r--kernel/irq/generic-chip.c4
-rw-r--r--kernel/irq/irqdesc.c37
-rw-r--r--kernel/irq/manage.c18
-rw-r--r--kernel/lockdep.c8
-rw-r--r--kernel/power/Kconfig4
-rw-r--r--kernel/trace/blktrace.c21
-rw-r--r--mm/highmem.c4
-rw-r--r--sound/aoa/fabrics/layout.c2
-rw-r--r--sound/pci/ac97/ac97_patch.c1
-rw-r--r--sound/pci/hda/alc268_quirks.c36
-rw-r--r--sound/pci/hda/hda_eld.c31
-rw-r--r--sound/pci/hda/patch_cirrus.c8
-rw-r--r--sound/pci/hda/patch_realtek.c2
-rw-r--r--sound/usb/caiaq/audio.c31
-rw-r--r--sound/usb/caiaq/device.h1
-rw-r--r--sound/usb/mixer.c3
-rw-r--r--tools/perf/builtin-probe.c14
-rw-r--r--tools/perf/builtin-record.c4
-rw-r--r--tools/perf/builtin-stat.c7
-rw-r--r--tools/perf/util/dwarf-aux.c210
-rw-r--r--tools/perf/util/dwarf-aux.h11
-rw-r--r--tools/perf/util/evlist.c11
-rw-r--r--tools/perf/util/header.c11
-rw-r--r--tools/perf/util/include/linux/compiler.h2
-rw-r--r--tools/perf/util/parse-events.c8
-rw-r--r--tools/perf/util/probe-finder.c231
-rw-r--r--tools/perf/util/probe-finder.h2
-rw-r--r--tools/perf/util/symbol.c59
-rw-r--r--tools/perf/util/symbol.h1
-rw-r--r--tools/perf/util/ui/browsers/top.c1
165 files changed, 5170 insertions, 934 deletions
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 3f5e0b09bed..53e6fca146d 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -45,7 +45,7 @@ arrived in memory (this becomes more likely with devices behind PCI-PCI
45bridges). In order to ensure that all the data has arrived in memory, 45bridges). In order to ensure that all the data has arrived in memory,
46the interrupt handler must read a register on the device which raised 46the interrupt handler must read a register on the device which raised
47the interrupt. PCI transaction ordering rules require that all the data 47the interrupt. PCI transaction ordering rules require that all the data
48arrives in memory before the value can be returned from the register. 48arrive in memory before the value may be returned from the register.
49Using MSIs avoids this problem as the interrupt-generating write cannot 49Using MSIs avoids this problem as the interrupt-generating write cannot
50pass the data writes, so by the time the interrupt is raised, the driver 50pass the data writes, so by the time the interrupt is raised, the driver
51knows that all the data has arrived in memory. 51knows that all the data has arrived in memory.
@@ -86,13 +86,13 @@ device.
86 86
87int pci_enable_msi(struct pci_dev *dev) 87int pci_enable_msi(struct pci_dev *dev)
88 88
89A successful call will allocate ONE interrupt to the device, regardless 89A successful call allocates ONE interrupt to the device, regardless
90of how many MSIs the device supports. The device will be switched from 90of how many MSIs the device supports. The device is switched from
91pin-based interrupt mode to MSI mode. The dev->irq number is changed 91pin-based interrupt mode to MSI mode. The dev->irq number is changed
92to a new number which represents the message signaled interrupt. 92to a new number which represents the message signaled interrupt;
93This function should be called before the driver calls request_irq() 93consequently, this function should be called before the driver calls
94since enabling MSIs disables the pin-based IRQ and the driver will not 94request_irq(), because an MSI is delivered via a vector that is
95receive interrupts on the old interrupt. 95different from the vector of a pin-based interrupt.
96 96
974.2.2 pci_enable_msi_block 974.2.2 pci_enable_msi_block
98 98
@@ -111,20 +111,20 @@ the device are in the range dev->irq to dev->irq + count - 1.
111 111
112If this function returns a negative number, it indicates an error and 112If this function returns a negative number, it indicates an error and
113the driver should not attempt to request any more MSI interrupts for 113the driver should not attempt to request any more MSI interrupts for
114this device. If this function returns a positive number, it will be 114this device. If this function returns a positive number, it is
115less than 'count' and indicate the number of interrupts that could have 115less than 'count' and indicates the number of interrupts that could have
116been allocated. In neither case will the irq value have been 116been allocated. In neither case is the irq value updated or the device
117updated, nor will the device have been switched into MSI mode. 117switched into MSI mode.
118 118
119The device driver must decide what action to take if 119The device driver must decide what action to take if
120pci_enable_msi_block() returns a value less than the number asked for. 120pci_enable_msi_block() returns a value less than the number requested.
121Some devices can make use of fewer interrupts than the maximum they 121For instance, the driver could still make use of fewer interrupts;
122request; in this case the driver should call pci_enable_msi_block() 122in this case the driver should call pci_enable_msi_block()
123again. Note that it is not guaranteed to succeed, even when the 123again. Note that it is not guaranteed to succeed, even when the
124'count' has been reduced to the value returned from a previous call to 124'count' has been reduced to the value returned from a previous call to
125pci_enable_msi_block(). This is because there are multiple constraints 125pci_enable_msi_block(). This is because there are multiple constraints
126on the number of vectors that can be allocated; pci_enable_msi_block() 126on the number of vectors that can be allocated; pci_enable_msi_block()
127will return as soon as it finds any constraint that doesn't allow the 127returns as soon as it finds any constraint that doesn't allow the
128call to succeed. 128call to succeed.
129 129
1304.2.3 pci_disable_msi 1304.2.3 pci_disable_msi
@@ -137,10 +137,10 @@ interrupt number and frees the previously allocated message signaled
137interrupt(s). The interrupt may subsequently be assigned to another 137interrupt(s). The interrupt may subsequently be assigned to another
138device, so drivers should not cache the value of dev->irq. 138device, so drivers should not cache the value of dev->irq.
139 139
140A device driver must always call free_irq() on the interrupt(s) 140Before calling this function, a device driver must always call free_irq()
141for which it has called request_irq() before calling this function. 141on any interrupt for which it previously called request_irq().
142Failure to do so will result in a BUG_ON(), the device will be left with 142Failure to do so results in a BUG_ON(), leaving the device with
143MSI enabled and will leak its vector. 143MSI enabled and thus leaking its vector.
144 144
1454.3 Using MSI-X 1454.3 Using MSI-X
146 146
@@ -155,10 +155,10 @@ struct msix_entry {
155}; 155};
156 156
157This allows for the device to use these interrupts in a sparse fashion; 157This allows for the device to use these interrupts in a sparse fashion;
158for example it could use interrupts 3 and 1027 and allocate only a 158for example, it could use interrupts 3 and 1027 and yet allocate only a
159two-element array. The driver is expected to fill in the 'entry' value 159two-element array. The driver is expected to fill in the 'entry' value
160in each element of the array to indicate which entries it wants the kernel 160in each element of the array to indicate for which entries the kernel
161to assign interrupts for. It is invalid to fill in two entries with the 161should assign interrupts; it is invalid to fill in two entries with the
162same number. 162same number.
163 163
1644.3.1 pci_enable_msix 1644.3.1 pci_enable_msix
@@ -168,10 +168,11 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
168Calling this function asks the PCI subsystem to allocate 'nvec' MSIs. 168Calling this function asks the PCI subsystem to allocate 'nvec' MSIs.
169The 'entries' argument is a pointer to an array of msix_entry structs 169The 'entries' argument is a pointer to an array of msix_entry structs
170which should be at least 'nvec' entries in size. On success, the 170which should be at least 'nvec' entries in size. On success, the
171function will return 0 and the device will have been switched into 171device is switched into MSI-X mode and the function returns 0.
172MSI-X interrupt mode. The 'vector' elements in each entry will have 172The 'vector' member in each entry is populated with the interrupt number;
173been filled in with the interrupt number. The driver should then call 173the driver should then call request_irq() for each 'vector' that it
174request_irq() for each 'vector' that it decides to use. 174decides to use. The device driver is responsible for keeping track of the
175interrupts assigned to the MSI-X vectors so it can free them again later.
175 176
176If this function returns a negative number, it indicates an error and 177If this function returns a negative number, it indicates an error and
177the driver should not attempt to allocate any more MSI-X interrupts for 178the driver should not attempt to allocate any more MSI-X interrupts for
@@ -181,16 +182,14 @@ below.
181 182
182This function, in contrast with pci_enable_msi(), does not adjust 183This function, in contrast with pci_enable_msi(), does not adjust
183dev->irq. The device will not generate interrupts for this interrupt 184dev->irq. The device will not generate interrupts for this interrupt
184number once MSI-X is enabled. The device driver is responsible for 185number once MSI-X is enabled.
185keeping track of the interrupts assigned to the MSI-X vectors so it can
186free them again later.
187 186
188Device drivers should normally call this function once per device 187Device drivers should normally call this function once per device
189during the initialization phase. 188during the initialization phase.
190 189
191It is ideal if drivers can cope with a variable number of MSI-X interrupts, 190It is ideal if drivers can cope with a variable number of MSI-X interrupts;
192there are many reasons why the platform may not be able to provide the 191there are many reasons why the platform may not be able to provide the
193exact number a driver asks for. 192exact number that a driver asks for.
194 193
195A request loop to achieve that might look like: 194A request loop to achieve that might look like:
196 195
@@ -212,15 +211,15 @@ static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec)
212 211
213void pci_disable_msix(struct pci_dev *dev) 212void pci_disable_msix(struct pci_dev *dev)
214 213
215This API should be used to undo the effect of pci_enable_msix(). It frees 214This function should be used to undo the effect of pci_enable_msix(). It frees
216the previously allocated message signaled interrupts. The interrupts may 215the previously allocated message signaled interrupts. The interrupts may
217subsequently be assigned to another device, so drivers should not cache 216subsequently be assigned to another device, so drivers should not cache
218the value of the 'vector' elements over a call to pci_disable_msix(). 217the value of the 'vector' elements over a call to pci_disable_msix().
219 218
220A device driver must always call free_irq() on the interrupt(s) 219Before calling this function, a device driver must always call free_irq()
221for which it has called request_irq() before calling this function. 220on any interrupt for which it previously called request_irq().
222Failure to do so will result in a BUG_ON(), the device will be left with 221Failure to do so results in a BUG_ON(), leaving the device with
223MSI enabled and will leak its vector. 222MSI-X enabled and thus leaking its vector.
224 223
2254.3.3 The MSI-X Table 2244.3.3 The MSI-X Table
226 225
@@ -232,10 +231,10 @@ mask or unmask an interrupt, it should call disable_irq() / enable_irq().
2324.4 Handling devices implementing both MSI and MSI-X capabilities 2314.4 Handling devices implementing both MSI and MSI-X capabilities
233 232
234If a device implements both MSI and MSI-X capabilities, it can 233If a device implements both MSI and MSI-X capabilities, it can
235run in either MSI mode or MSI-X mode but not both simultaneously. 234run in either MSI mode or MSI-X mode, but not both simultaneously.
236This is a requirement of the PCI spec, and it is enforced by the 235This is a requirement of the PCI spec, and it is enforced by the
237PCI layer. Calling pci_enable_msi() when MSI-X is already enabled or 236PCI layer. Calling pci_enable_msi() when MSI-X is already enabled or
238pci_enable_msix() when MSI is already enabled will result in an error. 237pci_enable_msix() when MSI is already enabled results in an error.
239If a device driver wishes to switch between MSI and MSI-X at runtime, 238If a device driver wishes to switch between MSI and MSI-X at runtime,
240it must first quiesce the device, then switch it back to pin-interrupt 239it must first quiesce the device, then switch it back to pin-interrupt
241mode, before calling pci_enable_msi() or pci_enable_msix() and resuming 240mode, before calling pci_enable_msi() or pci_enable_msix() and resuming
@@ -251,7 +250,7 @@ the MSI-X facilities in preference to the MSI facilities. As mentioned
251above, MSI-X supports any number of interrupts between 1 and 2048. 250above, MSI-X supports any number of interrupts between 1 and 2048.
252In constrast, MSI is restricted to a maximum of 32 interrupts (and 251In constrast, MSI is restricted to a maximum of 32 interrupts (and
253must be a power of two). In addition, the MSI interrupt vectors must 252must be a power of two). In addition, the MSI interrupt vectors must
254be allocated consecutively, so the system may not be able to allocate 253be allocated consecutively, so the system might not be able to allocate
255as many vectors for MSI as it could for MSI-X. On some platforms, MSI 254as many vectors for MSI as it could for MSI-X. On some platforms, MSI
256interrupts must all be targeted at the same set of CPUs whereas MSI-X 255interrupts must all be targeted at the same set of CPUs whereas MSI-X
257interrupts can all be targeted at different CPUs. 256interrupts can all be targeted at different CPUs.
@@ -281,7 +280,7 @@ disabled to enabled and back again.
281 280
282Using 'lspci -v' (as root) may show some devices with "MSI", "Message 281Using 'lspci -v' (as root) may show some devices with "MSI", "Message
283Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities 282Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities
284has an 'Enable' flag which will be followed with either "+" (enabled) 283has an 'Enable' flag which is followed with either "+" (enabled)
285or "-" (disabled). 284or "-" (disabled).
286 285
287 286
@@ -298,7 +297,7 @@ The PCI stack provides three ways to disable MSIs:
298 297
299Some host chipsets simply don't support MSIs properly. If we're 298Some host chipsets simply don't support MSIs properly. If we're
300lucky, the manufacturer knows this and has indicated it in the ACPI 299lucky, the manufacturer knows this and has indicated it in the ACPI
301FADT table. In this case, Linux will automatically disable MSIs. 300FADT table. In this case, Linux automatically disables MSIs.
302Some boards don't include this information in the table and so we have 301Some boards don't include this information in the table and so we have
303to detect them ourselves. The complete list of these is found near the 302to detect them ourselves. The complete list of these is found near the
304quirk_disable_all_msi() function in drivers/pci/quirks.c. 303quirk_disable_all_msi() function in drivers/pci/quirks.c.
@@ -317,7 +316,7 @@ Some bridges allow you to enable MSIs by changing some bits in their
317PCI configuration space (especially the Hypertransport chipsets such 316PCI configuration space (especially the Hypertransport chipsets such
318as the nVidia nForce and Serverworks HT2000). As with host chipsets, 317as the nVidia nForce and Serverworks HT2000). As with host chipsets,
319Linux mostly knows about them and automatically enables MSIs if it can. 318Linux mostly knows about them and automatically enables MSIs if it can.
320If you have a bridge which Linux doesn't yet know about, you can enable 319If you have a bridge unknown to Linux, you can enable
321MSIs in configuration space using whatever method you know works, then 320MSIs in configuration space using whatever method you know works, then
322enable MSIs on that bridge by doing: 321enable MSIs on that bridge by doing:
323 322
@@ -327,7 +326,7 @@ where $bridge is the PCI address of the bridge you've enabled (eg
3270000:00:0e.0). 3260000:00:0e.0).
328 327
329To disable MSIs, echo 0 instead of 1. Changing this value should be 328To disable MSIs, echo 0 instead of 1. Changing this value should be
330done with caution as it can break interrupt handling for all devices 329done with caution as it could break interrupt handling for all devices
331below this bridge. 330below this bridge.
332 331
333Again, please notify linux-pci@vger.kernel.org of any bridges that need 332Again, please notify linux-pci@vger.kernel.org of any bridges that need
@@ -336,7 +335,7 @@ special handling.
3365.3. Disabling MSIs on a single device 3355.3. Disabling MSIs on a single device
337 336
338Some devices are known to have faulty MSI implementations. Usually this 337Some devices are known to have faulty MSI implementations. Usually this
339is handled in the individual device driver but occasionally it's necessary 338is handled in the individual device driver, but occasionally it's necessary
340to handle this with a quirk. Some drivers have an option to disable use 339to handle this with a quirk. Some drivers have an option to disable use
341of MSI. While this is a convenient workaround for the driver author, 340of MSI. While this is a convenient workaround for the driver author,
342it is not good practise, and should not be emulated. 341it is not good practise, and should not be emulated.
@@ -350,7 +349,7 @@ for your machine. You should also check your .config to be sure you
350have enabled CONFIG_PCI_MSI. 349have enabled CONFIG_PCI_MSI.
351 350
352Then, 'lspci -t' gives the list of bridges above a device. Reading 351Then, 'lspci -t' gives the list of bridges above a device. Reading
353/sys/bus/pci/devices/*/msi_bus will tell you whether MSI are enabled (1) 352/sys/bus/pci/devices/*/msi_bus will tell you whether MSIs are enabled (1)
354or disabled (0). If 0 is found in any of the msi_bus files belonging 353or disabled (0). If 0 is found in any of the msi_bus files belonging
355to bridges between the PCI root and the device, MSIs are disabled. 354to bridges between the PCI root and the device, MSIs are disabled.
356 355
diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt
index e578feed6d8..6d670f57045 100644
--- a/Documentation/block/cfq-iosched.txt
+++ b/Documentation/block/cfq-iosched.txt
@@ -43,3 +43,74 @@ If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches
43to IOPS mode and starts providing fairness in terms of number of requests 43to IOPS mode and starts providing fairness in terms of number of requests
44dispatched. Note that this mode switching takes effect only for group 44dispatched. Note that this mode switching takes effect only for group
45scheduling. For non-cgroup users nothing should change. 45scheduling. For non-cgroup users nothing should change.
46
47CFQ IO scheduler Idling Theory
48===============================
49Idling on a queue is primarily about waiting for the next request to come
50on same queue after completion of a request. In this process CFQ will not
51dispatch requests from other cfq queues even if requests are pending there.
52
53The rationale behind idling is that it can cut down on number of seeks
54on rotational media. For example, if a process is doing dependent
55sequential reads (next read will come on only after completion of previous
56one), then not dispatching request from other queue should help as we
57did not move the disk head and kept on dispatching sequential IO from
58one queue.
59
60CFQ has following service trees and various queues are put on these trees.
61
62 sync-idle sync-noidle async
63
64All cfq queues doing synchronous sequential IO go on to sync-idle tree.
65On this tree we idle on each queue individually.
66
67All synchronous non-sequential queues go on sync-noidle tree. Also any
68request which are marked with REQ_NOIDLE go on this service tree. On this
69tree we do not idle on individual queues instead idle on the whole group
70of queues or the tree. So if there are 4 queues waiting for IO to dispatch
71we will idle only once last queue has dispatched the IO and there is
72no more IO on this service tree.
73
74All async writes go on async service tree. There is no idling on async
75queues.
76
77CFQ has some optimizations for SSDs and if it detects a non-rotational
78media which can support higher queue depth (multiple requests at in
79flight at a time), then it cuts down on idling of individual queues and
80all the queues move to sync-noidle tree and only tree idle remains. This
81tree idling provides isolation with buffered write queues on async tree.
82
83FAQ
84===
85Q1. Why to idle at all on queues marked with REQ_NOIDLE.
86
87A1. We only do tree idle (all queues on sync-noidle tree) on queues marked
88 with REQ_NOIDLE. This helps in providing isolation with all the sync-idle
89 queues. Otherwise in presence of many sequential readers, other
90 synchronous IO might not get fair share of disk.
91
92 For example, if there are 10 sequential readers doing IO and they get
93 100ms each. If a REQ_NOIDLE request comes in, it will be scheduled
94 roughly after 1 second. If after completion of REQ_NOIDLE request we
95 do not idle, and after a couple of milli seconds a another REQ_NOIDLE
96 request comes in, again it will be scheduled after 1second. Repeat it
97 and notice how a workload can lose its disk share and suffer due to
98 multiple sequential readers.
99
100 fsync can generate dependent IO where bunch of data is written in the
101 context of fsync, and later some journaling data is written. Journaling
102 data comes in only after fsync has finished its IO (atleast for ext4
103 that seemed to be the case). Now if one decides not to idle on fsync
104 thread due to REQ_NOIDLE, then next journaling write will not get
105 scheduled for another second. A process doing small fsync, will suffer
106 badly in presence of multiple sequential readers.
107
108 Hence doing tree idling on threads using REQ_NOIDLE flag on requests
109 provides isolation from multiple sequential readers and at the same
110 time we do not idle on individual threads.
111
112Q2. When to specify REQ_NOIDLE
113A2. I would think whenever one is doing synchronous write and not expecting
114 more writes to be dispatched from same context soon, should be able
115 to specify REQ_NOIDLE on writes and that probably should work well for
116 most of the cases.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 6ca1f5cb71e..614d0382e2c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1350,9 +1350,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1350 it is equivalent to "nosmp", which also disables 1350 it is equivalent to "nosmp", which also disables
1351 the IO APIC. 1351 the IO APIC.
1352 1352
1353 max_loop= [LOOP] Maximum number of loopback devices that can 1353 max_loop= [LOOP] The number of loop block devices that get
1354 be mounted 1354 (loop.max_loop) unconditionally pre-created at init time. The default
1355 Format: <1-256> 1355 number is configured by BLK_DEV_LOOP_MIN_COUNT. Instead
1356 of statically allocating a predefined number, loop
1357 devices can be requested on-demand with the
1358 /dev/loop-control interface.
1356 1359
1357 mcatest= [IA-64] 1360 mcatest= [IA-64]
1358 1361
diff --git a/Documentation/virtual/00-INDEX b/Documentation/virtual/00-INDEX
index fe0251c4cfb..8e601991d91 100644
--- a/Documentation/virtual/00-INDEX
+++ b/Documentation/virtual/00-INDEX
@@ -8,3 +8,6 @@ lguest/
8 - Extremely simple hypervisor for experimental/educational use. 8 - Extremely simple hypervisor for experimental/educational use.
9uml/ 9uml/
10 - User Mode Linux, builds/runs Linux kernel as a userspace program. 10 - User Mode Linux, builds/runs Linux kernel as a userspace program.
11virtio.txt
12 - Text version of draft virtio spec.
13 See http://ozlabs.org/~rusty/virtio-spec
diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c
index 043bd7df313..d928c134dee 100644
--- a/Documentation/virtual/lguest/lguest.c
+++ b/Documentation/virtual/lguest/lguest.c
@@ -1996,6 +1996,9 @@ int main(int argc, char *argv[])
1996 /* We use a simple helper to copy the arguments separated by spaces. */ 1996 /* We use a simple helper to copy the arguments separated by spaces. */
1997 concat((char *)(boot + 1), argv+optind+2); 1997 concat((char *)(boot + 1), argv+optind+2);
1998 1998
1999 /* Set kernel alignment to 16M (CONFIG_PHYSICAL_ALIGN) */
2000 boot->hdr.kernel_alignment = 0x1000000;
2001
1999 /* Boot protocol version: 2.07 supports the fields for lguest. */ 2002 /* Boot protocol version: 2.07 supports the fields for lguest. */
2000 boot->hdr.version = 0x207; 2003 boot->hdr.version = 0x207;
2001 2004
diff --git a/Documentation/virtual/virtio-spec.txt b/Documentation/virtual/virtio-spec.txt
new file mode 100644
index 00000000000..a350ae135b8
--- /dev/null
+++ b/Documentation/virtual/virtio-spec.txt
@@ -0,0 +1,2200 @@
1[Generated file: see http://ozlabs.org/~rusty/virtio-spec/]
2Virtio PCI Card Specification
3v0.9.1 DRAFT
4-
5
6Rusty Russell <rusty@rustcorp.com.au>IBM Corporation (Editor)
7
82011 August 1.
9
10Purpose and Description
11
12This document describes the specifications of the “virtio” family
13of PCI[LaTeX Command: nomenclature] devices. These are devices
14are found in virtual environments[LaTeX Command: nomenclature],
15yet by design they are not all that different from physical PCI
16devices, and this document treats them as such. This allows the
17guest to use standard PCI drivers and discovery mechanisms.
18
19The purpose of virtio and this specification is that virtual
20environments and guests should have a straightforward, efficient,
21standard and extensible mechanism for virtual devices, rather
22than boutique per-environment or per-OS mechanisms.
23
24 Straightforward: Virtio PCI devices use normal PCI mechanisms
25 of interrupts and DMA which should be familiar to any device
26 driver author. There is no exotic page-flipping or COW
27 mechanism: it's just a PCI device.[footnote:
28This lack of page-sharing implies that the implementation of the
29device (e.g. the hypervisor or host) needs full access to the
30guest memory. Communication with untrusted parties (i.e.
31inter-guest communication) requires copying.
32]
33
34 Efficient: Virtio PCI devices consist of rings of descriptors
35 for input and output, which are neatly separated to avoid cache
36 effects from both guest and device writing to the same cache
37 lines.
38
39 Standard: Virtio PCI makes no assumptions about the environment
40 in which it operates, beyond supporting PCI. In fact the virtio
41 devices specified in the appendices do not require PCI at all:
42 they have been implemented on non-PCI buses.[footnote:
43The Linux implementation further separates the PCI virtio code
44from the specific virtio drivers: these drivers are shared with
45the non-PCI implementations (currently lguest and S/390).
46]
47
48 Extensible: Virtio PCI devices contain feature bits which are
49 acknowledged by the guest operating system during device setup.
50 This allows forwards and backwards compatibility: the device
51 offers all the features it knows about, and the driver
52 acknowledges those it understands and wishes to use.
53
54 Virtqueues
55
56The mechanism for bulk data transport on virtio PCI devices is
57pretentiously called a virtqueue. Each device can have zero or
58more virtqueues: for example, the network device has one for
59transmit and one for receive.
60
61Each virtqueue occupies two or more physically-contiguous pages
62(defined, for the purposes of this specification, as 4096 bytes),
63and consists of three parts:
64
65
66+-------------------+-----------------------------------+-----------+
67| Descriptor Table | Available Ring (padding) | Used Ring |
68+-------------------+-----------------------------------+-----------+
69
70
71When the driver wants to send buffers to the device, it puts them
72in one or more slots in the descriptor table, and writes the
73descriptor indices into the available ring. It then notifies the
74device. When the device has finished with the buffers, it writes
75the descriptors into the used ring, and sends an interrupt.
76
77Specification
78
79 PCI Discovery
80
81Any PCI device with Vendor ID 0x1AF4, and Device ID 0x1000
82through 0x103F inclusive is a virtio device[footnote:
83The actual value within this range is ignored
84]. The device must also have a Revision ID of 0 to match this
85specification.
86
87The Subsystem Device ID indicates which virtio device is
88supported by the device. The Subsystem Vendor ID should reflect
89the PCI Vendor ID of the environment (it's currently only used
90for informational purposes by the guest).
91
92
93+----------------------+--------------------+---------------+
94| Subsystem Device ID | Virtio Device | Specification |
95+----------------------+--------------------+---------------+
96+----------------------+--------------------+---------------+
97| 1 | network card | Appendix C |
98+----------------------+--------------------+---------------+
99| 2 | block device | Appendix D |
100+----------------------+--------------------+---------------+
101| 3 | console | Appendix E |
102+----------------------+--------------------+---------------+
103| 4 | entropy source | Appendix F |
104+----------------------+--------------------+---------------+
105| 5 | memory ballooning | Appendix G |
106+----------------------+--------------------+---------------+
107| 6 | ioMemory | - |
108+----------------------+--------------------+---------------+
109| 9 | 9P transport | - |
110+----------------------+--------------------+---------------+
111
112
113 Device Configuration
114
115To configure the device, we use the first I/O region of the PCI
116device. This contains a virtio header followed by a
117device-specific region.
118
119There may be different widths of accesses to the I/O region; the “
120natural” access method for each field in the virtio header must
121be used (i.e. 32-bit accesses for 32-bit fields, etc), but the
122device-specific region can be accessed using any width accesses,
123and should obtain the same results.
124
125Note that this is possible because while the virtio header is PCI
126(i.e. little) endian, the device-specific region is encoded in
127the native endian of the guest (where such distinction is
128applicable).
129
130 Device Initialization Sequence
131
132We start with an overview of device initialization, then expand
133on the details of the device and how each step is preformed.
134
135 Reset the device. This is not required on initial start up.
136
137 The ACKNOWLEDGE status bit is set: we have noticed the device.
138
139 The DRIVER status bit is set: we know how to drive the device.
140
141 Device-specific setup, including reading the Device Feature
142 Bits, discovery of virtqueues for the device, optional MSI-X
143 setup, and reading and possibly writing the virtio
144 configuration space.
145
146 The subset of Device Feature Bits understood by the driver is
147 written to the device.
148
149 The DRIVER_OK status bit is set.
150
151 The device can now be used (ie. buffers added to the
152 virtqueues)[footnote:
153Historically, drivers have used the device before steps 5 and 6.
154This is only allowed if the driver does not use any features
155which would alter this early use of the device.
156]
157
158If any of these steps go irrecoverably wrong, the guest should
159set the FAILED status bit to indicate that it has given up on the
160device (it can reset the device later to restart if desired).
161
162We now cover the fields required for general setup in detail.
163
164 Virtio Header
165
166The virtio header looks as follows:
167
168
169+------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+
170| Bits || 32 | 32 | 32 | 16 | 16 | 16 | 8 | 8 |
171+------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+
172| Read/Write || R | R+W | R+W | R | R+W | R+W | R+W | R |
173+------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+
174| Purpose || Device | Guest | Queue | Queue | Queue | Queue | Device | ISR |
175| || Features bits 0:31 | Features bits 0:31 | Address | Size | Select | Notify | Status | Status |
176+------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+
177
178
179If MSI-X is enabled for the device, two additional fields
180immediately follow this header:
181
182
183+------------++----------------+--------+
184| Bits || 16 | 16 |
185 +----------------+--------+
186+------------++----------------+--------+
187| Read/Write || R+W | R+W |
188+------------++----------------+--------+
189| Purpose || Configuration | Queue |
190| (MSI-X) || Vector | Vector |
191+------------++----------------+--------+
192
193
194Finally, if feature bits (VIRTIO_F_FEATURES_HI) this is
195immediately followed by two additional fields:
196
197
198+------------++----------------------+----------------------
199| Bits || 32 | 32
200+------------++----------------------+----------------------
201| Read/Write || R | R+W
202+------------++----------------------+----------------------
203| Purpose || Device | Guest
204| || Features bits 32:63 | Features bits 32:63
205+------------++----------------------+----------------------
206
207
208Immediately following these general headers, there may be
209device-specific headers:
210
211
212+------------++--------------------+
213| Bits || Device Specific |
214 +--------------------+
215+------------++--------------------+
216| Read/Write || Device Specific |
217+------------++--------------------+
218| Purpose || Device Specific... |
219| || |
220+------------++--------------------+
221
222
223 Device Status
224
225The Device Status field is updated by the guest to indicate its
226progress. This provides a simple low-level diagnostic: it's most
227useful to imagine them hooked up to traffic lights on the console
228indicating the status of each device.
229
230The device can be reset by writing a 0 to this field, otherwise
231at least one bit should be set:
232
233 ACKNOWLEDGE (1) Indicates that the guest OS has found the
234 device and recognized it as a valid virtio device.
235
236 DRIVER (2) Indicates that the guest OS knows how to drive the
237 device. Under Linux, drivers can be loadable modules so there
238 may be a significant (or infinite) delay before setting this
239 bit.
240
241 DRIVER_OK (3) Indicates that the driver is set up and ready to
242 drive the device.
243
244 FAILED (8) Indicates that something went wrong in the guest,
245 and it has given up on the device. This could be an internal
246 error, or the driver didn't like the device for some reason, or
247 even a fatal error during device operation. The device must be
248 reset before attempting to re-initialize.
249
250 Feature Bits
251
252The least significant 31 bits of the first configuration field
253indicates the features that the device supports (the high bit is
254reserved, and will be used to indicate the presence of future
255feature bits elsewhere). If more than 31 feature bits are
256supported, the device indicates so by setting feature bit 31 (see
257[cha:Reserved-Feature-Bits]). The bits are allocated as follows:
258
259 0 to 23 Feature bits for the specific device type
260
261 24 to 40 Feature bits reserved for extensions to the queue and
262 feature negotiation mechanisms
263
264 41 to 63 Feature bits reserved for future extensions
265
266For example, feature bit 0 for a network device (i.e. Subsystem
267Device ID 1) indicates that the device supports checksumming of
268packets.
269
270The feature bits are negotiated: the device lists all the
271features it understands in the Device Features field, and the
272guest writes the subset that it understands into the Guest
273Features field. The only way to renegotiate is to reset the
274device.
275
276In particular, new fields in the device configuration header are
277indicated by offering a feature bit, so the guest can check
278before accessing that part of the configuration space.
279
280This allows for forwards and backwards compatibility: if the
281device is enhanced with a new feature bit, older guests will not
282write that feature bit back to the Guest Features field and it
283can go into backwards compatibility mode. Similarly, if a guest
284is enhanced with a feature that the device doesn't support, it
285will not see that feature bit in the Device Features field and
286can go into backwards compatibility mode (or, for poor
287implementations, set the FAILED Device Status bit).
288
289Access to feature bits 32 to 63 is enabled by Guest by setting
290feature bit 31. If this bit is unset, Device must assume that all
291feature bits > 31 are unset.
292
293 Configuration/Queue Vectors
294
295When MSI-X capability is present and enabled in the device
296(through standard PCI configuration space) 4 bytes at byte offset
29720 are used to map configuration change and queue interrupts to
298MSI-X vectors. In this case, the ISR Status field is unused, and
299device specific configuration starts at byte offset 24 in virtio
300header structure. When MSI-X capability is not enabled, device
301specific configuration starts at byte offset 20 in virtio header.
302
303Writing a valid MSI-X Table entry number, 0 to 0x7FF, to one of
304Configuration/Queue Vector registers, maps interrupts triggered
305by the configuration change/selected queue events respectively to
306the corresponding MSI-X vector. To disable interrupts for a
307specific event type, unmap it by writing a special NO_VECTOR
308value:
309
310/* Vector value used to disable MSI for queue */
311
312#define VIRTIO_MSI_NO_VECTOR 0xffff
313
314Reading these registers returns vector mapped to a given event,
315or NO_VECTOR if unmapped. All queue and configuration change
316events are unmapped by default.
317
318Note that mapping an event to vector might require allocating
319internal device resources, and might fail. Devices report such
320failures by returning the NO_VECTOR value when the relevant
321Vector field is read. After mapping an event to vector, the
322driver must verify success by reading the Vector field value: on
323success, the previously written value is returned, and on
324failure, NO_VECTOR is returned. If a mapping failure is detected,
325the driver can retry mapping with fewervectors, or disable MSI-X.
326
327 Virtqueue Configuration
328
329As a device can have zero or more virtqueues for bulk data
330transport (for example, the network driver has two), the driver
331needs to configure them as part of the device-specific
332configuration.
333
334This is done as follows, for each virtqueue a device has:
335
336 Write the virtqueue index (first queue is 0) to the Queue
337 Select field.
338
339 Read the virtqueue size from the Queue Size field, which is
340 always a power of 2. This controls how big the virtqueue is
341 (see below). If this field is 0, the virtqueue does not exist.
342
343 Allocate and zero virtqueue in contiguous physical memory, on a
344 4096 byte alignment. Write the physical address, divided by
345 4096 to the Queue Address field.[footnote:
346The 4096 is based on the x86 page size, but it's also large
347enough to ensure that the separate parts of the virtqueue are on
348separate cache lines.
349]
350
351 Optionally, if MSI-X capability is present and enabled on the
352 device, select a vector to use to request interrupts triggered
353 by virtqueue events. Write the MSI-X Table entry number
354 corresponding to this vector in Queue Vector field. Read the
355 Queue Vector field: on success, previously written value is
356 returned; on failure, NO_VECTOR value is returned.
357
358The Queue Size field controls the total number of bytes required
359for the virtqueue according to the following formula:
360
361#define ALIGN(x) (((x) + 4095) & ~4095)
362
363static inline unsigned vring_size(unsigned int qsz)
364
365{
366
367 return ALIGN(sizeof(struct vring_desc)*qsz + sizeof(u16)*(2
368+ qsz))
369
370 + ALIGN(sizeof(struct vring_used_elem)*qsz);
371
372}
373
374This currently wastes some space with padding, but also allows
375future extensions. The virtqueue layout structure looks like this
376(qsz is the Queue Size field, which is a variable, so this code
377won't compile):
378
379struct vring {
380
381 /* The actual descriptors (16 bytes each) */
382
383 struct vring_desc desc[qsz];
384
385
386
387 /* A ring of available descriptor heads with free-running
388index. */
389
390 struct vring_avail avail;
391
392
393
394 // Padding to the next 4096 boundary.
395
396 char pad[];
397
398
399
400 // A ring of used descriptor heads with free-running index.
401
402 struct vring_used used;
403
404};
405
406 A Note on Virtqueue Endianness
407
408Note that the endian of these fields and everything else in the
409virtqueue is the native endian of the guest, not little-endian as
410PCI normally is. This makes for simpler guest code, and it is
411assumed that the host already has to be deeply aware of the guest
412endian so such an “endian-aware” device is not a significant
413issue.
414
415 Descriptor Table
416
417The descriptor table refers to the buffers the guest is using for
418the device. The addresses are physical addresses, and the buffers
419can be chained via the next field. Each descriptor describes a
420buffer which is read-only or write-only, but a chain of
421descriptors can contain both read-only and write-only buffers.
422
423No descriptor chain may be more than 2^32 bytes long in total.struct vring_desc {
424
425 /* Address (guest-physical). */
426
427 u64 addr;
428
429 /* Length. */
430
431 u32 len;
432
433/* This marks a buffer as continuing via the next field. */
434
435#define VRING_DESC_F_NEXT 1
436
437/* This marks a buffer as write-only (otherwise read-only). */
438
439#define VRING_DESC_F_WRITE 2
440
441/* This means the buffer contains a list of buffer descriptors.
442*/
443
444#define VRING_DESC_F_INDIRECT 4
445
446 /* The flags as indicated above. */
447
448 u16 flags;
449
450 /* Next field if flags & NEXT */
451
452 u16 next;
453
454};
455
456The number of descriptors in the table is specified by the Queue
457Size field for this virtqueue.
458
459 <sub:Indirect-Descriptors>Indirect Descriptors
460
461Some devices benefit by concurrently dispatching a large number
462of large requests. The VIRTIO_RING_F_INDIRECT_DESC feature can be
463used to allow this (see [cha:Reserved-Feature-Bits]). To increase
464ring capacity it is possible to store a table of indirect
465descriptors anywhere in memory, and insert a descriptor in main
466virtqueue (with flags&INDIRECT on) that refers to memory buffer
467containing this indirect descriptor table; fields addr and len
468refer to the indirect table address and length in bytes,
469respectively. The indirect table layout structure looks like this
470(len is the length of the descriptor that refers to this table,
471which is a variable, so this code won't compile):
472
473struct indirect_descriptor_table {
474
475 /* The actual descriptors (16 bytes each) */
476
477 struct vring_desc desc[len / 16];
478
479};
480
481The first indirect descriptor is located at start of the indirect
482descriptor table (index 0), additional indirect descriptors are
483chained by next field. An indirect descriptor without next field
484(with flags&NEXT off) signals the end of the indirect descriptor
485table, and transfers control back to the main virtqueue. An
486indirect descriptor can not refer to another indirect descriptor
487table (flags&INDIRECT must be off). A single indirect descriptor
488table can include both read-only and write-only descriptors;
489write-only flag (flags&WRITE) in the descriptor that refers to it
490is ignored.
491
492 Available Ring
493
494The available ring refers to what descriptors we are offering the
495device: it refers to the head of a descriptor chain. The “flags”
496field is currently 0 or 1: 1 indicating that we do not need an
497interrupt when the device consumes a descriptor from the
498available ring. Alternatively, the guest can ask the device to
499delay interrupts until an entry with an index specified by the “
500used_event” field is written in the used ring (equivalently,
501until the idx field in the used ring will reach the value
502used_event + 1). The method employed by the device is controlled
503by the VIRTIO_RING_F_EVENT_IDX feature bit (see [cha:Reserved-Feature-Bits]
504). This interrupt suppression is merely an optimization; it may
505not suppress interrupts entirely.
506
507The “idx” field indicates where we would put the next descriptor
508entry (modulo the ring size). This starts at 0, and increases.
509
510struct vring_avail {
511
512#define VRING_AVAIL_F_NO_INTERRUPT 1
513
514 u16 flags;
515
516 u16 idx;
517
518 u16 ring[qsz]; /* qsz is the Queue Size field read from device
519*/
520
521 u16 used_event;
522
523};
524
525 Used Ring
526
527The used ring is where the device returns buffers once it is done
528with them. The flags field can be used by the device to hint that
529no notification is necessary when the guest adds to the available
530ring. Alternatively, the “avail_event” field can be used by the
531device to hint that no notification is necessary until an entry
532with an index specified by the “avail_event” is written in the
533available ring (equivalently, until the idx field in the
534available ring will reach the value avail_event + 1). The method
535employed by the device is controlled by the guest through the
536VIRTIO_RING_F_EVENT_IDX feature bit (see [cha:Reserved-Feature-Bits]
537). [footnote:
538These fields are kept here because this is the only part of the
539virtqueue written by the device
540].
541
542Each entry in the ring is a pair: the head entry of the
543descriptor chain describing the buffer (this matches an entry
544placed in the available ring by the guest earlier), and the total
545of bytes written into the buffer. The latter is extremely useful
546for guests using untrusted buffers: if you do not know exactly
547how much has been written by the device, you usually have to zero
548the buffer to ensure no data leakage occurs.
549
550/* u32 is used here for ids for padding reasons. */
551
552struct vring_used_elem {
553
554 /* Index of start of used descriptor chain. */
555
556 u32 id;
557
558 /* Total length of the descriptor chain which was used
559(written to) */
560
561 u32 len;
562
563};
564
565
566
567struct vring_used {
568
569#define VRING_USED_F_NO_NOTIFY 1
570
571 u16 flags;
572
573 u16 idx;
574
575 struct vring_used_elem ring[qsz];
576
577 u16 avail_event;
578
579};
580
581 Helpers for Managing Virtqueues
582
583The Linux Kernel Source code contains the definitions above and
584helper routines in a more usable form, in
585include/linux/virtio_ring.h. This was explicitly licensed by IBM
586and Red Hat under the (3-clause) BSD license so that it can be
587freely used by all other projects, and is reproduced (with slight
588variation to remove Linux assumptions) in Appendix A.
589
590 Device Operation
591
592There are two parts to device operation: supplying new buffers to
593the device, and processing used buffers from the device. As an
594example, the virtio network device has two virtqueues: the
595transmit virtqueue and the receive virtqueue. The driver adds
596outgoing (read-only) packets to the transmit virtqueue, and then
597frees them after they are used. Similarly, incoming (write-only)
598buffers are added to the receive virtqueue, and processed after
599they are used.
600
601 Supplying Buffers to The Device
602
603Actual transfer of buffers from the guest OS to the device
604operates as follows:
605
606 Place the buffer(s) into free descriptor(s).
607
608 If there are no free descriptors, the guest may choose to
609 notify the device even if notifications are suppressed (to
610 reduce latency).[footnote:
611The Linux drivers do this only for read-only buffers: for
612write-only buffers, it is assumed that the driver is merely
613trying to keep the receive buffer ring full, and no notification
614of this expected condition is necessary.
615]
616
617 Place the id of the buffer in the next ring entry of the
618 available ring.
619
620 The steps (1) and (2) may be performed repeatedly if batching
621 is possible.
622
623 A memory barrier should be executed to ensure the device sees
624 the updated descriptor table and available ring before the next
625 step.
626
627 The available “idx” field should be increased by the number of
628 entries added to the available ring.
629
630 A memory barrier should be executed to ensure that we update
631 the idx field before checking for notification suppression.
632
633 If notifications are not suppressed, the device should be
634 notified of the new buffers.
635
636Note that the above code does not take precautions against the
637available ring buffer wrapping around: this is not possible since
638the ring buffer is the same size as the descriptor table, so step
639(1) will prevent such a condition.
640
641In addition, the maximum queue size is 32768 (it must be a power
642of 2 which fits in 16 bits), so the 16-bit “idx” value can always
643distinguish between a full and empty buffer.
644
645Here is a description of each stage in more detail.
646
647 Placing Buffers Into The Descriptor Table
648
649A buffer consists of zero or more read-only physically-contiguous
650elements followed by zero or more physically-contiguous
651write-only elements (it must have at least one element). This
652algorithm maps it into the descriptor table:
653
654 for each buffer element, b:
655
656 Get the next free descriptor table entry, d
657
658 Set d.addr to the physical address of the start of b
659
660 Set d.len to the length of b.
661
662 If b is write-only, set d.flags to VRING_DESC_F_WRITE,
663 otherwise 0.
664
665 If there is a buffer element after this:
666
667 Set d.next to the index of the next free descriptor element.
668
669 Set the VRING_DESC_F_NEXT bit in d.flags.
670
671In practice, the d.next fields are usually used to chain free
672descriptors, and a separate count kept to check there are enough
673free descriptors before beginning the mappings.
674
675 Updating The Available Ring
676
677The head of the buffer we mapped is the first d in the algorithm
678above. A naive implementation would do the following:
679
680avail->ring[avail->idx % qsz] = head;
681
682However, in general we can add many descriptors before we update
683the “idx” field (at which point they become visible to the
684device), so we keep a counter of how many we've added:
685
686avail->ring[(avail->idx + added++) % qsz] = head;
687
688 Updating The Index Field
689
690Once the idx field of the virtqueue is updated, the device will
691be able to access the descriptor entries we've created and the
692memory they refer to. This is why a memory barrier is generally
693used before the idx update, to ensure it sees the most up-to-date
694copy.
695
696The idx field always increments, and we let it wrap naturally at
69765536:
698
699avail->idx += added;
700
701 <sub:Notifying-The-Device>Notifying The Device
702
703Device notification occurs by writing the 16-bit virtqueue index
704of this virtqueue to the Queue Notify field of the virtio header
705in the first I/O region of the PCI device. This can be expensive,
706however, so the device can suppress such notifications if it
707doesn't need them. We have to be careful to expose the new idx
708value before checking the suppression flag: it's OK to notify
709gratuitously, but not to omit a required notification. So again,
710we use a memory barrier here before reading the flags or the
711avail_event field.
712
713If the VIRTIO_F_RING_EVENT_IDX feature is not negotiated, and if
714the VRING_USED_F_NOTIFY flag is not set, we go ahead and write to
715the PCI configuration space.
716
717If the VIRTIO_F_RING_EVENT_IDX feature is negotiated, we read the
718avail_event field in the available ring structure. If the
719available index crossed_the avail_event field value since the
720last notification, we go ahead and write to the PCI configuration
721space. The avail_event field wraps naturally at 65536 as well:
722
723(u16)(new_idx - avail_event - 1) < (u16)(new_idx - old_idx)
724
725 <sub:Receiving-Used-Buffers>Receiving Used Buffers From The
726 Device
727
728Once the device has used a buffer (read from or written to it, or
729parts of both, depending on the nature of the virtqueue and the
730device), it sends an interrupt, following an algorithm very
731similar to the algorithm used for the driver to send the device a
732buffer:
733
734 Write the head descriptor number to the next field in the used
735 ring.
736
737 Update the used ring idx.
738
739 Determine whether an interrupt is necessary:
740
741 If the VIRTIO_F_RING_EVENT_IDX feature is not negotiated: check
742 if f the VRING_AVAIL_F_NO_INTERRUPT flag is not set in avail-
743 >flags
744
745 If the VIRTIO_F_RING_EVENT_IDX feature is negotiated: check
746 whether the used index crossed the used_event field value
747 since the last update. The used_event field wraps naturally
748 at 65536 as well:(u16)(new_idx - used_event - 1) < (u16)(new_idx - old_idx)
749
750 If an interrupt is necessary:
751
752 If MSI-X capability is disabled:
753
754 Set the lower bit of the ISR Status field for the device.
755
756 Send the appropriate PCI interrupt for the device.
757
758 If MSI-X capability is enabled:
759
760 Request the appropriate MSI-X interrupt message for the
761 device, Queue Vector field sets the MSI-X Table entry
762 number.
763
764 If Queue Vector field value is NO_VECTOR, no interrupt
765 message is requested for this event.
766
767The guest interrupt handler should:
768
769 If MSI-X capability is disabled: read the ISR Status field,
770 which will reset it to zero. If the lower bit is zero, the
771 interrupt was not for this device. Otherwise, the guest driver
772 should look through the used rings of each virtqueue for the
773 device, to see if any progress has been made by the device
774 which requires servicing.
775
776 If MSI-X capability is enabled: look through the used rings of
777 each virtqueue mapped to the specific MSI-X vector for the
778 device, to see if any progress has been made by the device
779 which requires servicing.
780
781For each ring, guest should then disable interrupts by writing
782VRING_AVAIL_F_NO_INTERRUPT flag in avail structure, if required.
783It can then process used ring entries finally enabling interrupts
784by clearing the VRING_AVAIL_F_NO_INTERRUPT flag or updating the
785EVENT_IDX field in the available structure, Guest should then
786execute a memory barrier, and then recheck the ring empty
787condition. This is necessary to handle the case where, after the
788last check and before enabling interrupts, an interrupt has been
789suppressed by the device:
790
791vring_disable_interrupts(vq);
792
793for (;;) {
794
795 if (vq->last_seen_used != vring->used.idx) {
796
797 vring_enable_interrupts(vq);
798
799 mb();
800
801 if (vq->last_seen_used != vring->used.idx)
802
803 break;
804
805 }
806
807 struct vring_used_elem *e =
808vring.used->ring[vq->last_seen_used%vsz];
809
810 process_buffer(e);
811
812 vq->last_seen_used++;
813
814}
815
816 Dealing With Configuration Changes
817
818Some virtio PCI devices can change the device configuration
819state, as reflected in the virtio header in the PCI configuration
820space. In this case:
821
822 If MSI-X capability is disabled: an interrupt is delivered and
823 the second highest bit is set in the ISR Status field to
824 indicate that the driver should re-examine the configuration
825 space.Note that a single interrupt can indicate both that one
826 or more virtqueue has been used and that the configuration
827 space has changed: even if the config bit is set, virtqueues
828 must be scanned.
829
830 If MSI-X capability is enabled: an interrupt message is
831 requested. The Configuration Vector field sets the MSI-X Table
832 entry number to use. If Configuration Vector field value is
833 NO_VECTOR, no interrupt message is requested for this event.
834
835Creating New Device Types
836
837Various considerations are necessary when creating a new device
838type:
839
840 How Many Virtqueues?
841
842It is possible that a very simple device will operate entirely
843through its configuration space, but most will need at least one
844virtqueue in which it will place requests. A device with both
845input and output (eg. console and network devices described here)
846need two queues: one which the driver fills with buffers to
847receive input, and one which the driver places buffers to
848transmit output.
849
850 What Configuration Space Layout?
851
852Configuration space is generally used for rarely-changing or
853initialization-time parameters. But it is a limited resource, so
854it might be better to use a virtqueue to update configuration
855information (the network device does this for filtering,
856otherwise the table in the config space could potentially be very
857large).
858
859Note that this space is generally the guest's native endian,
860rather than PCI's little-endian.
861
862 What Device Number?
863
864Currently device numbers are assigned quite freely: a simple
865request mail to the author of this document or the Linux
866virtualization mailing list[footnote:
867
868https://lists.linux-foundation.org/mailman/listinfo/virtualization
869] will be sufficient to secure a unique one.
870
871Meanwhile for experimental drivers, use 65535 and work backwards.
872
873 How many MSI-X vectors?
874
875Using the optional MSI-X capability devices can speed up
876interrupt processing by removing the need to read ISR Status
877register by guest driver (which might be an expensive operation),
878reducing interrupt sharing between devices and queues within the
879device, and handling interrupts from multiple CPUs. However, some
880systems impose a limit (which might be as low as 256) on the
881total number of MSI-X vectors that can be allocated to all
882devices. Devices and/or device drivers should take this into
883account, limiting the number of vectors used unless the device is
884expected to cause a high volume of interrupts. Devices can
885control the number of vectors used by limiting the MSI-X Table
886Size or not presenting MSI-X capability in PCI configuration
887space. Drivers can control this by mapping events to as small
888number of vectors as possible, or disabling MSI-X capability
889altogether.
890
891 Message Framing
892
893The descriptors used for a buffer should not effect the semantics
894of the message, except for the total length of the buffer. For
895example, a network buffer consists of a 10 byte header followed
896by the network packet. Whether this is presented in the ring
897descriptor chain as (say) a 10 byte buffer and a 1514 byte
898buffer, or a single 1524 byte buffer, or even three buffers,
899should have no effect.
900
901In particular, no implementation should use the descriptor
902boundaries to determine the size of any header in a request.[footnote:
903The current qemu device implementations mistakenly insist that
904the first descriptor cover the header in these cases exactly, so
905a cautious driver should arrange it so.
906]
907
908 Device Improvements
909
910Any change to configuration space, or new virtqueues, or
911behavioural changes, should be indicated by negotiation of a new
912feature bit. This establishes clarity[footnote:
913Even if it does mean documenting design or implementation
914mistakes!
915] and avoids future expansion problems.
916
917Clusters of functionality which are always implemented together
918can use a single bit, but if one feature makes sense without the
919others they should not be gratuitously grouped together to
920conserve feature bits. We can always extend the spec when the
921first person needs more than 24 feature bits for their device.
922
923[LaTeX Command: printnomenclature]
924
925Appendix A: virtio_ring.h
926
927#ifndef VIRTIO_RING_H
928
929#define VIRTIO_RING_H
930
931/* An interface for efficient virtio implementation.
932
933 *
934
935 * This header is BSD licensed so anyone can use the definitions
936
937 * to implement compatible drivers/servers.
938
939 *
940
941 * Copyright 2007, 2009, IBM Corporation
942
943 * Copyright 2011, Red Hat, Inc
944
945 * All rights reserved.
946
947 *
948
949 * Redistribution and use in source and binary forms, with or
950without
951
952 * modification, are permitted provided that the following
953conditions
954
955 * are met:
956
957 * 1. Redistributions of source code must retain the above
958copyright
959
960 * notice, this list of conditions and the following
961disclaimer.
962
963 * 2. Redistributions in binary form must reproduce the above
964copyright
965
966 * notice, this list of conditions and the following
967disclaimer in the
968
969 * documentation and/or other materials provided with the
970distribution.
971
972 * 3. Neither the name of IBM nor the names of its contributors
973
974 * may be used to endorse or promote products derived from
975this software
976
977 * without specific prior written permission.
978
979 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
980CONTRIBUTORS ``AS IS'' AND
981
982 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
983TO, THE
984
985 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
986PARTICULAR PURPOSE
987
988 * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE
989LIABLE
990
991 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
992CONSEQUENTIAL
993
994 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
995SUBSTITUTE GOODS
996
997 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
998INTERRUPTION)
999
1000 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
1001CONTRACT, STRICT
1002
1003 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1004IN ANY WAY
1005
1006 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1007POSSIBILITY OF
1008
1009 * SUCH DAMAGE.
1010
1011 */
1012
1013
1014
1015/* This marks a buffer as continuing via the next field. */
1016
1017#define VRING_DESC_F_NEXT 1
1018
1019/* This marks a buffer as write-only (otherwise read-only). */
1020
1021#define VRING_DESC_F_WRITE 2
1022
1023
1024
1025/* The Host uses this in used->flags to advise the Guest: don't
1026kick me
1027
1028 * when you add a buffer. It's unreliable, so it's simply an
1029
1030 * optimization. Guest will still kick if it's out of buffers.
1031*/
1032
1033#define VRING_USED_F_NO_NOTIFY 1
1034
1035/* The Guest uses this in avail->flags to advise the Host: don't
1036
1037 * interrupt me when you consume a buffer. It's unreliable, so
1038it's
1039
1040 * simply an optimization. */
1041
1042#define VRING_AVAIL_F_NO_INTERRUPT 1
1043
1044
1045
1046/* Virtio ring descriptors: 16 bytes.
1047
1048 * These can chain together via "next". */
1049
1050struct vring_desc {
1051
1052 /* Address (guest-physical). */
1053
1054 uint64_t addr;
1055
1056 /* Length. */
1057
1058 uint32_t len;
1059
1060 /* The flags as indicated above. */
1061
1062 uint16_t flags;
1063
1064 /* We chain unused descriptors via this, too */
1065
1066 uint16_t next;
1067
1068};
1069
1070
1071
1072struct vring_avail {
1073
1074 uint16_t flags;
1075
1076 uint16_t idx;
1077
1078 uint16_t ring[];
1079
1080 uint16_t used_event;
1081
1082};
1083
1084
1085
1086/* u32 is used here for ids for padding reasons. */
1087
1088struct vring_used_elem {
1089
1090 /* Index of start of used descriptor chain. */
1091
1092 uint32_t id;
1093
1094 /* Total length of the descriptor chain which was written
1095to. */
1096
1097 uint32_t len;
1098
1099};
1100
1101
1102
1103struct vring_used {
1104
1105 uint16_t flags;
1106
1107 uint16_t idx;
1108
1109 struct vring_used_elem ring[];
1110
1111 uint16_t avail_event;
1112
1113};
1114
1115
1116
1117struct vring {
1118
1119 unsigned int num;
1120
1121
1122
1123 struct vring_desc *desc;
1124
1125 struct vring_avail *avail;
1126
1127 struct vring_used *used;
1128
1129};
1130
1131
1132
1133/* The standard layout for the ring is a continuous chunk of
1134memory which
1135
1136 * looks like this. We assume num is a power of 2.
1137
1138 *
1139
1140 * struct vring {
1141
1142 * // The actual descriptors (16 bytes each)
1143
1144 * struct vring_desc desc[num];
1145
1146 *
1147
1148 * // A ring of available descriptor heads with free-running
1149index.
1150
1151 * __u16 avail_flags;
1152
1153 * __u16 avail_idx;
1154
1155 * __u16 available[num];
1156
1157 *
1158
1159 * // Padding to the next align boundary.
1160
1161 * char pad[];
1162
1163 *
1164
1165 * // A ring of used descriptor heads with free-running
1166index.
1167
1168 * __u16 used_flags;
1169
1170 * __u16 EVENT_IDX;
1171
1172 * struct vring_used_elem used[num];
1173
1174 * };
1175
1176 * Note: for virtio PCI, align is 4096.
1177
1178 */
1179
1180static inline void vring_init(struct vring *vr, unsigned int num,
1181void *p,
1182
1183 unsigned long align)
1184
1185{
1186
1187 vr->num = num;
1188
1189 vr->desc = p;
1190
1191 vr->avail = p + num*sizeof(struct vring_desc);
1192
1193 vr->used = (void *)(((unsigned long)&vr->avail->ring[num]
1194
1195 + align-1)
1196
1197 & ~(align - 1));
1198
1199}
1200
1201
1202
1203static inline unsigned vring_size(unsigned int num, unsigned long
1204align)
1205
1206{
1207
1208 return ((sizeof(struct vring_desc)*num +
1209sizeof(uint16_t)*(2+num)
1210
1211 + align - 1) & ~(align - 1))
1212
1213 + sizeof(uint16_t)*3 + sizeof(struct
1214vring_used_elem)*num;
1215
1216}
1217
1218
1219
1220static inline int vring_need_event(uint16_t event_idx, uint16_t
1221new_idx, uint16_t old_idx)
1222
1223{
1224
1225 return (uint16_t)(new_idx - event_idx - 1) <
1226(uint16_t)(new_idx - old_idx);
1227
1228}
1229
1230#endif /* VIRTIO_RING_H */
1231
1232<cha:Reserved-Feature-Bits>Appendix B: Reserved Feature Bits
1233
1234Currently there are five device-independent feature bits defined:
1235
1236 VIRTIO_F_NOTIFY_ON_EMPTY (24) Negotiating this feature
1237 indicates that the driver wants an interrupt if the device runs
1238 out of available descriptors on a virtqueue, even though
1239 interrupts are suppressed using the VRING_AVAIL_F_NO_INTERRUPT
1240 flag or the used_event field. An example of this is the
1241 networking driver: it doesn't need to know every time a packet
1242 is transmitted, but it does need to free the transmitted
1243 packets a finite time after they are transmitted. It can avoid
1244 using a timer if the device interrupts it when all the packets
1245 are transmitted.
1246
1247 VIRTIO_F_RING_INDIRECT_DESC (28) Negotiating this feature
1248 indicates that the driver can use descriptors with the
1249 VRING_DESC_F_INDIRECT flag set, as described in [sub:Indirect-Descriptors]
1250 .
1251
1252 VIRTIO_F_RING_EVENT_IDX(29) This feature enables the used_event
1253 and the avail_event fields. If set, it indicates that the
1254 device should ignore the flags field in the available ring
1255 structure. Instead, the used_event field in this structure is
1256 used by guest to suppress device interrupts. Further, the
1257 driver should ignore the flags field in the used ring
1258 structure. Instead, the avail_event field in this structure is
1259 used by the device to suppress notifications. If unset, the
1260 driver should ignore the used_event field; the device should
1261 ignore the avail_event field; the flags field is used
1262
1263 VIRTIO_F_BAD_FEATURE(30) This feature should never be
1264 negotiated by the guest; doing so is an indication that the
1265 guest is faulty[footnote:
1266An experimental virtio PCI driver contained in Linux version
12672.6.25 had this problem, and this feature bit can be used to
1268detect it.
1269]
1270
1271 VIRTIO_F_FEATURES_HIGH(31) This feature indicates that the
1272 device supports feature bits 32:63. If unset, feature bits
1273 32:63 are unset.
1274
1275Appendix C: Network Device
1276
1277The virtio network device is a virtual ethernet card, and is the
1278most complex of the devices supported so far by virtio. It has
1279enhanced rapidly and demonstrates clearly how support for new
1280features should be added to an existing device. Empty buffers are
1281placed in one virtqueue for receiving packets, and outgoing
1282packets are enqueued into another for transmission in that order.
1283A third command queue is used to control advanced filtering
1284features.
1285
1286 Configuration
1287
1288 Subsystem Device ID 1
1289
1290 Virtqueues 0:receiveq. 1:transmitq. 2:controlq[footnote:
1291Only if VIRTIO_NET_F_CTRL_VQ set
1292]
1293
1294 Feature bits
1295
1296 VIRTIO_NET_F_CSUM (0) Device handles packets with partial
1297 checksum
1298
1299 VIRTIO_NET_F_GUEST_CSUM (1) Guest handles packets with partial
1300 checksum
1301
1302 VIRTIO_NET_F_MAC (5) Device has given MAC address.
1303
1304 VIRTIO_NET_F_GSO (6) (Deprecated) device handles packets with
1305 any GSO type.[footnote:
1306It was supposed to indicate segmentation offload support, but
1307upon further investigation it became clear that multiple bits
1308were required.
1309]
1310
1311 VIRTIO_NET_F_GUEST_TSO4 (7) Guest can receive TSOv4.
1312
1313 VIRTIO_NET_F_GUEST_TSO6 (8) Guest can receive TSOv6.
1314
1315 VIRTIO_NET_F_GUEST_ECN (9) Guest can receive TSO with ECN.
1316
1317 VIRTIO_NET_F_GUEST_UFO (10) Guest can receive UFO.
1318
1319 VIRTIO_NET_F_HOST_TSO4 (11) Device can receive TSOv4.
1320
1321 VIRTIO_NET_F_HOST_TSO6 (12) Device can receive TSOv6.
1322
1323 VIRTIO_NET_F_HOST_ECN (13) Device can receive TSO with ECN.
1324
1325 VIRTIO_NET_F_HOST_UFO (14) Device can receive UFO.
1326
1327 VIRTIO_NET_F_MRG_RXBUF (15) Guest can merge receive buffers.
1328
1329 VIRTIO_NET_F_STATUS (16) Configuration status field is
1330 available.
1331
1332 VIRTIO_NET_F_CTRL_VQ (17) Control channel is available.
1333
1334 VIRTIO_NET_F_CTRL_RX (18) Control channel RX mode support.
1335
1336 VIRTIO_NET_F_CTRL_VLAN (19) Control channel VLAN filtering.
1337
1338 Device configuration layout Two configuration fields are
1339 currently defined. The mac address field always exists (though
1340 is only valid if VIRTIO_NET_F_MAC is set), and the status field
1341 only exists if VIRTIO_NET_F_STATUS is set. Only one bit is
1342 currently defined for the status field: VIRTIO_NET_S_LINK_UP. #define VIRTIO_NET_S_LINK_UP 1
1343
1344
1345
1346struct virtio_net_config {
1347
1348 u8 mac[6];
1349
1350 u16 status;
1351
1352};
1353
1354 Device Initialization
1355
1356 The initialization routine should identify the receive and
1357 transmission virtqueues.
1358
1359 If the VIRTIO_NET_F_MAC feature bit is set, the configuration
1360 space “mac” entry indicates the “physical” address of the the
1361 network card, otherwise a private MAC address should be
1362 assigned. All guests are expected to negotiate this feature if
1363 it is set.
1364
1365 If the VIRTIO_NET_F_CTRL_VQ feature bit is negotiated, identify
1366 the control virtqueue.
1367
1368 If the VIRTIO_NET_F_STATUS feature bit is negotiated, the link
1369 status can be read from the bottom bit of the “status” config
1370 field. Otherwise, the link should be assumed active.
1371
1372 The receive virtqueue should be filled with receive buffers.
1373 This is described in detail below in “Setting Up Receive
1374 Buffers”.
1375
1376 A driver can indicate that it will generate checksumless
1377 packets by negotating the VIRTIO_NET_F_CSUM feature. This “
1378 checksum offload” is a common feature on modern network cards.
1379
1380 If that feature is negotiated, a driver can use TCP or UDP
1381 segmentation offload by negotiating the VIRTIO_NET_F_HOST_TSO4
1382 (IPv4 TCP), VIRTIO_NET_F_HOST_TSO6 (IPv6 TCP) and
1383 VIRTIO_NET_F_HOST_UFO (UDP fragmentation) features. It should
1384 not send TCP packets requiring segmentation offload which have
1385 the Explicit Congestion Notification bit set, unless the
1386 VIRTIO_NET_F_HOST_ECN feature is negotiated.[footnote:
1387This is a common restriction in real, older network cards.
1388]
1389
1390 The converse features are also available: a driver can save the
1391 virtual device some work by negotiating these features.[footnote:
1392For example, a network packet transported between two guests on
1393the same system may not require checksumming at all, nor
1394segmentation, if both guests are amenable.
1395] The VIRTIO_NET_F_GUEST_CSUM feature indicates that partially
1396 checksummed packets can be received, and if it can do that then
1397 the VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
1398 VIRTIO_NET_F_GUEST_UFO and VIRTIO_NET_F_GUEST_ECN are the input
1399 equivalents of the features described above. See “Receiving
1400 Packets” below.
1401
1402 Device Operation
1403
1404Packets are transmitted by placing them in the transmitq, and
1405buffers for incoming packets are placed in the receiveq. In each
1406case, the packet itself is preceeded by a header:
1407
1408struct virtio_net_hdr {
1409
1410#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1
1411
1412 u8 flags;
1413
1414#define VIRTIO_NET_HDR_GSO_NONE 0
1415
1416#define VIRTIO_NET_HDR_GSO_TCPV4 1
1417
1418#define VIRTIO_NET_HDR_GSO_UDP 3
1419
1420#define VIRTIO_NET_HDR_GSO_TCPV6 4
1421
1422#define VIRTIO_NET_HDR_GSO_ECN 0x80
1423
1424 u8 gso_type;
1425
1426 u16 hdr_len;
1427
1428 u16 gso_size;
1429
1430 u16 csum_start;
1431
1432 u16 csum_offset;
1433
1434/* Only if VIRTIO_NET_F_MRG_RXBUF: */
1435
1436 u16 num_buffers
1437
1438};
1439
1440The controlq is used to control device features such as
1441filtering.
1442
1443 Packet Transmission
1444
1445Transmitting a single packet is simple, but varies depending on
1446the different features the driver negotiated.
1447
1448 If the driver negotiated VIRTIO_NET_F_CSUM, and the packet has
1449 not been fully checksummed, then the virtio_net_hdr's fields
1450 are set as follows. Otherwise, the packet must be fully
1451 checksummed, and flags is zero.
1452
1453 flags has the VIRTIO_NET_HDR_F_NEEDS_CSUM set,
1454
1455 <ite:csum_start-is-set>csum_start is set to the offset within
1456 the packet to begin checksumming, and
1457
1458 csum_offset indicates how many bytes after the csum_start the
1459 new (16 bit ones' complement) checksum should be placed.[footnote:
1460For example, consider a partially checksummed TCP (IPv4) packet.
1461It will have a 14 byte ethernet header and 20 byte IP header
1462followed by the TCP header (with the TCP checksum field 16 bytes
1463into that header). csum_start will be 14+20 = 34 (the TCP
1464checksum includes the header), and csum_offset will be 16. The
1465value in the TCP checksum field will be the sum of the TCP pseudo
1466header, so that replacing it by the ones' complement checksum of
1467the TCP header and body will give the correct result.
1468]
1469
1470 <enu:If-the-driver>If the driver negotiated
1471 VIRTIO_NET_F_HOST_TSO4, TSO6 or UFO, and the packet requires
1472 TCP segmentation or UDP fragmentation, then the “gso_type”
1473 field is set to VIRTIO_NET_HDR_GSO_TCPV4, TCPV6 or UDP.
1474 (Otherwise, it is set to VIRTIO_NET_HDR_GSO_NONE). In this
1475 case, packets larger than 1514 bytes can be transmitted: the
1476 metadata indicates how to replicate the packet header to cut it
1477 into smaller packets. The other gso fields are set:
1478
1479 hdr_len is a hint to the device as to how much of the header
1480 needs to be kept to copy into each packet, usually set to the
1481 length of the headers, including the transport header.[footnote:
1482Due to various bugs in implementations, this field is not useful
1483as a guarantee of the transport header size.
1484]
1485
1486 gso_size is the size of the packet beyond that header (ie.
1487 MSS).
1488
1489 If the driver negotiated the VIRTIO_NET_F_HOST_ECN feature, the
1490 VIRTIO_NET_HDR_GSO_ECN bit may be set in “gso_type” as well,
1491 indicating that the TCP packet has the ECN bit set.[footnote:
1492This case is not handled by some older hardware, so is called out
1493specifically in the protocol.
1494]
1495
1496 If the driver negotiated the VIRTIO_NET_F_MRG_RXBUF feature,
1497 the num_buffers field is set to zero.
1498
1499 The header and packet are added as one output buffer to the
1500 transmitq, and the device is notified of the new entry (see [sub:Notifying-The-Device]
1501 ).[footnote:
1502Note that the header will be two bytes longer for the
1503VIRTIO_NET_F_MRG_RXBUF case.
1504]
1505
1506 Packet Transmission Interrupt
1507
1508Often a driver will suppress transmission interrupts using the
1509VRING_AVAIL_F_NO_INTERRUPT flag (see [sub:Receiving-Used-Buffers]
1510) and check for used packets in the transmit path of following
1511packets. However, it will still receive interrupts if the
1512VIRTIO_F_NOTIFY_ON_EMPTY feature is negotiated, indicating that
1513the transmission queue is completely emptied.
1514
1515The normal behavior in this interrupt handler is to retrieve and
1516new descriptors from the used ring and free the corresponding
1517headers and packets.
1518
1519 Setting Up Receive Buffers
1520
1521It is generally a good idea to keep the receive virtqueue as
1522fully populated as possible: if it runs out, network performance
1523will suffer.
1524
1525If the VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6 or
1526VIRTIO_NET_F_GUEST_UFO features are used, the Guest will need to
1527accept packets of up to 65550 bytes long (the maximum size of a
1528TCP or UDP packet, plus the 14 byte ethernet header), otherwise
15291514 bytes. So unless VIRTIO_NET_F_MRG_RXBUF is negotiated, every
1530buffer in the receive queue needs to be at least this length [footnote:
1531Obviously each one can be split across multiple descriptor
1532elements.
1533].
1534
1535If VIRTIO_NET_F_MRG_RXBUF is negotiated, each buffer must be at
1536least the size of the struct virtio_net_hdr.
1537
1538 Packet Receive Interrupt
1539
1540When a packet is copied into a buffer in the receiveq, the
1541optimal path is to disable further interrupts for the receiveq
1542(see [sub:Receiving-Used-Buffers]) and process packets until no
1543more are found, then re-enable them.
1544
1545Processing packet involves:
1546
1547 If the driver negotiated the VIRTIO_NET_F_MRG_RXBUF feature,
1548 then the “num_buffers” field indicates how many descriptors
1549 this packet is spread over (including this one). This allows
1550 receipt of large packets without having to allocate large
1551 buffers. In this case, there will be at least “num_buffers” in
1552 the used ring, and they should be chained together to form a
1553 single packet. The other buffers will not begin with a struct
1554 virtio_net_hdr.
1555
1556 If the VIRTIO_NET_F_MRG_RXBUF feature was not negotiated, or
1557 the “num_buffers” field is one, then the entire packet will be
1558 contained within this buffer, immediately following the struct
1559 virtio_net_hdr.
1560
1561 If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the
1562 VIRTIO_NET_HDR_F_NEEDS_CSUM bit in the “flags” field may be
1563 set: if so, the checksum on the packet is incomplete and the “
1564 csum_start” and “csum_offset” fields indicate how to calculate
1565 it (see [ite:csum_start-is-set]).
1566
1567 If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were
1568 negotiated, then the “gso_type” may be something other than
1569 VIRTIO_NET_HDR_GSO_NONE, and the “gso_size” field indicates the
1570 desired MSS (see [enu:If-the-driver]).Control Virtqueue
1571
1572The driver uses the control virtqueue (if VIRTIO_NET_F_VTRL_VQ is
1573negotiated) to send commands to manipulate various features of
1574the device which would not easily map into the configuration
1575space.
1576
1577All commands are of the following form:
1578
1579struct virtio_net_ctrl {
1580
1581 u8 class;
1582
1583 u8 command;
1584
1585 u8 command-specific-data[];
1586
1587 u8 ack;
1588
1589};
1590
1591
1592
1593/* ack values */
1594
1595#define VIRTIO_NET_OK 0
1596
1597#define VIRTIO_NET_ERR 1
1598
1599The class, command and command-specific-data are set by the
1600driver, and the device sets the ack byte. There is little it can
1601do except issue a diagnostic if the ack byte is not
1602VIRTIO_NET_OK.
1603
1604 Packet Receive Filtering
1605
1606If the VIRTIO_NET_F_CTRL_RX feature is negotiated, the driver can
1607send control commands for promiscuous mode, multicast receiving,
1608and filtering of MAC addresses.
1609
1610Note that in general, these commands are best-effort: unwanted
1611packets may still arrive.
1612
1613 Setting Promiscuous Mode
1614
1615#define VIRTIO_NET_CTRL_RX 0
1616
1617 #define VIRTIO_NET_CTRL_RX_PROMISC 0
1618
1619 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1
1620
1621The class VIRTIO_NET_CTRL_RX has two commands:
1622VIRTIO_NET_CTRL_RX_PROMISC turns promiscuous mode on and off, and
1623VIRTIO_NET_CTRL_RX_ALLMULTI turns all-multicast receive on and
1624off. The command-specific-data is one byte containing 0 (off) or
16251 (on).
1626
1627 Setting MAC Address Filtering
1628
1629struct virtio_net_ctrl_mac {
1630
1631 u32 entries;
1632
1633 u8 macs[entries][ETH_ALEN];
1634
1635};
1636
1637
1638
1639#define VIRTIO_NET_CTRL_MAC 1
1640
1641 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0
1642
1643The device can filter incoming packets by any number of
1644destination MAC addresses.[footnote:
1645Since there are no guarentees, it can use a hash filter
1646orsilently switch to allmulti or promiscuous mode if it is given
1647too many addresses.
1648] This table is set using the class VIRTIO_NET_CTRL_MAC and the
1649command VIRTIO_NET_CTRL_MAC_TABLE_SET. The command-specific-data
1650is two variable length tables of 6-byte MAC addresses. The first
1651table contains unicast addresses, and the second contains
1652multicast addresses.
1653
1654 VLAN Filtering
1655
1656If the driver negotiates the VIRTION_NET_F_CTRL_VLAN feature, it
1657can control a VLAN filter table in the device.
1658
1659#define VIRTIO_NET_CTRL_VLAN 2
1660
1661 #define VIRTIO_NET_CTRL_VLAN_ADD 0
1662
1663 #define VIRTIO_NET_CTRL_VLAN_DEL 1
1664
1665Both the VIRTIO_NET_CTRL_VLAN_ADD and VIRTIO_NET_CTRL_VLAN_DEL
1666command take a 16-bit VLAN id as the command-specific-data.
1667
1668Appendix D: Block Device
1669
1670The virtio block device is a simple virtual block device (ie.
1671disk). Read and write requests (and other exotic requests) are
1672placed in the queue, and serviced (probably out of order) by the
1673device except where noted.
1674
1675 Configuration
1676
1677 Subsystem Device ID 2
1678
1679 Virtqueues 0:requestq.
1680
1681 Feature bits
1682
1683 VIRTIO_BLK_F_BARRIER (0) Host supports request barriers.
1684
1685 VIRTIO_BLK_F_SIZE_MAX (1) Maximum size of any single segment is
1686 in “size_max”.
1687
1688 VIRTIO_BLK_F_SEG_MAX (2) Maximum number of segments in a
1689 request is in “seg_max”.
1690
1691 VIRTIO_BLK_F_GEOMETRY (4) Disk-style geometry specified in “
1692 geometry”.
1693
1694 VIRTIO_BLK_F_RO (5) Device is read-only.
1695
1696 VIRTIO_BLK_F_BLK_SIZE (6) Block size of disk is in “blk_size”.
1697
1698 VIRTIO_BLK_F_SCSI (7) Device supports scsi packet commands.
1699
1700 VIRTIO_BLK_F_FLUSH (9) Cache flush command support.
1701
1702
1703
1704 Device configuration layout The capacity of the device
1705 (expressed in 512-byte sectors) is always present. The
1706 availability of the others all depend on various feature bits
1707 as indicated above. struct virtio_blk_config {
1708
1709 u64 capacity;
1710
1711 u32 size_max;
1712
1713 u32 seg_max;
1714
1715 struct virtio_blk_geometry {
1716
1717 u16 cylinders;
1718
1719 u8 heads;
1720
1721 u8 sectors;
1722
1723 } geometry;
1724
1725 u32 blk_size;
1726
1727
1728
1729};
1730
1731 Device Initialization
1732
1733 The device size should be read from the “capacity”
1734 configuration field. No requests should be submitted which goes
1735 beyond this limit.
1736
1737 If the VIRTIO_BLK_F_BLK_SIZE feature is negotiated, the
1738 blk_size field can be read to determine the optimal sector size
1739 for the driver to use. This does not effect the units used in
1740 the protocol (always 512 bytes), but awareness of the correct
1741 value can effect performance.
1742
1743 If the VIRTIO_BLK_F_RO feature is set by the device, any write
1744 requests will fail.
1745
1746
1747
1748 Device Operation
1749
1750The driver queues requests to the virtqueue, and they are used by
1751the device (not necessarily in order). Each request is of form:
1752
1753struct virtio_blk_req {
1754
1755
1756
1757 u32 type;
1758
1759 u32 ioprio;
1760
1761 u64 sector;
1762
1763 char data[][512];
1764
1765 u8 status;
1766
1767};
1768
1769If the device has VIRTIO_BLK_F_SCSI feature, it can also support
1770scsi packet command requests, each of these requests is of form:struct virtio_scsi_pc_req {
1771
1772 u32 type;
1773
1774 u32 ioprio;
1775
1776 u64 sector;
1777
1778 char cmd[];
1779
1780 char data[][512];
1781
1782#define SCSI_SENSE_BUFFERSIZE 96
1783
1784 u8 sense[SCSI_SENSE_BUFFERSIZE];
1785
1786 u32 errors;
1787
1788 u32 data_len;
1789
1790 u32 sense_len;
1791
1792 u32 residual;
1793
1794 u8 status;
1795
1796};
1797
1798The type of the request is either a read (VIRTIO_BLK_T_IN), a
1799write (VIRTIO_BLK_T_OUT), a scsi packet command
1800(VIRTIO_BLK_T_SCSI_CMD or VIRTIO_BLK_T_SCSI_CMD_OUT[footnote:
1801the SCSI_CMD and SCSI_CMD_OUT types are equivalent, the device
1802does not distinguish between them
1803]) or a flush (VIRTIO_BLK_T_FLUSH or VIRTIO_BLK_T_FLUSH_OUT[footnote:
1804the FLUSH and FLUSH_OUT types are equivalent, the device does not
1805distinguish between them
1806]). If the device has VIRTIO_BLK_F_BARRIER feature the high bit
1807(VIRTIO_BLK_T_BARRIER) indicates that this request acts as a
1808barrier and that all preceeding requests must be complete before
1809this one, and all following requests must not be started until
1810this is complete. Note that a barrier does not flush caches in
1811the underlying backend device in host, and thus does not serve as
1812data consistency guarantee. Driver must use FLUSH request to
1813flush the host cache.
1814
1815#define VIRTIO_BLK_T_IN 0
1816
1817#define VIRTIO_BLK_T_OUT 1
1818
1819#define VIRTIO_BLK_T_SCSI_CMD 2
1820
1821#define VIRTIO_BLK_T_SCSI_CMD_OUT 3
1822
1823#define VIRTIO_BLK_T_FLUSH 4
1824
1825#define VIRTIO_BLK_T_FLUSH_OUT 5
1826
1827#define VIRTIO_BLK_T_BARRIER 0x80000000
1828
1829The ioprio field is a hint about the relative priorities of
1830requests to the device: higher numbers indicate more important
1831requests.
1832
1833The sector number indicates the offset (multiplied by 512) where
1834the read or write is to occur. This field is unused and set to 0
1835for scsi packet commands and for flush commands.
1836
1837The cmd field is only present for scsi packet command requests,
1838and indicates the command to perform. This field must reside in a
1839single, separate read-only buffer; command length can be derived
1840from the length of this buffer.
1841
1842Note that these first three (four for scsi packet commands)
1843fields are always read-only: the data field is either read-only
1844or write-only, depending on the request. The size of the read or
1845write can be derived from the total size of the request buffers.
1846
1847The sense field is only present for scsi packet command requests,
1848and indicates the buffer for scsi sense data.
1849
1850The data_len field is only present for scsi packet command
1851requests, this field is deprecated, and should be ignored by the
1852driver. Historically, devices copied data length there.
1853
1854The sense_len field is only present for scsi packet command
1855requests and indicates the number of bytes actually written to
1856the sense buffer.
1857
1858The residual field is only present for scsi packet command
1859requests and indicates the residual size, calculated as data
1860length - number of bytes actually transferred.
1861
1862The final status byte is written by the device: either
1863VIRTIO_BLK_S_OK for success, VIRTIO_BLK_S_IOERR for host or guest
1864error or VIRTIO_BLK_S_UNSUPP for a request unsupported by host:#define VIRTIO_BLK_S_OK 0
1865
1866#define VIRTIO_BLK_S_IOERR 1
1867
1868#define VIRTIO_BLK_S_UNSUPP 2
1869
1870Historically, devices assumed that the fields type, ioprio and
1871sector reside in a single, separate read-only buffer; the fields
1872errors, data_len, sense_len and residual reside in a single,
1873separate write-only buffer; the sense field in a separate
1874write-only buffer of size 96 bytes, by itself; the fields errors,
1875data_len, sense_len and residual in a single write-only buffer;
1876and the status field is a separate read-only buffer of size 1
1877byte, by itself.
1878
1879Appendix E: Console Device
1880
1881The virtio console device is a simple device for data input and
1882output. A device may have one or more ports. Each port has a pair
1883of input and output virtqueues. Moreover, a device has a pair of
1884control IO virtqueues. The control virtqueues are used to
1885communicate information between the device and the driver about
1886ports being opened and closed on either side of the connection,
1887indication from the host about whether a particular port is a
1888console port, adding new ports, port hot-plug/unplug, etc., and
1889indication from the guest about whether a port or a device was
1890successfully added, port open/close, etc.. For data IO, one or
1891more empty buffers are placed in the receive queue for incoming
1892data and outgoing characters are placed in the transmit queue.
1893
1894 Configuration
1895
1896 Subsystem Device ID 3
1897
1898 Virtqueues 0:receiveq(port0). 1:transmitq(port0), 2:control
1899 receiveq[footnote:
1900Ports 2 onwards only if VIRTIO_CONSOLE_F_MULTIPORT is set
1901], 3:control transmitq, 4:receiveq(port1), 5:transmitq(port1),
1902 ...
1903
1904 Feature bits
1905
1906 VIRTIO_CONSOLE_F_SIZE (0) Configuration cols and rows fields
1907 are valid.
1908
1909 VIRTIO_CONSOLE_F_MULTIPORT(1) Device has support for multiple
1910 ports; configuration fields nr_ports and max_nr_ports are
1911 valid and control virtqueues will be used.
1912
1913 Device configuration layout The size of the console is supplied
1914 in the configuration space if the VIRTIO_CONSOLE_F_SIZE feature
1915 is set. Furthermore, if the VIRTIO_CONSOLE_F_MULTIPORT feature
1916 is set, the maximum number of ports supported by the device can
1917 be fetched.struct virtio_console_config {
1918
1919 u16 cols;
1920
1921 u16 rows;
1922
1923
1924
1925 u32 max_nr_ports;
1926
1927};
1928
1929 Device Initialization
1930
1931 If the VIRTIO_CONSOLE_F_SIZE feature is negotiated, the driver
1932 can read the console dimensions from the configuration fields.
1933
1934 If the VIRTIO_CONSOLE_F_MULTIPORT feature is negotiated, the
1935 driver can spawn multiple ports, not all of which may be
1936 attached to a console. Some could be generic ports. In this
1937 case, the control virtqueues are enabled and according to the
1938 max_nr_ports configuration-space value, the appropriate number
1939 of virtqueues are created. A control message indicating the
1940 driver is ready is sent to the host. The host can then send
1941 control messages for adding new ports to the device. After
1942 creating and initializing each port, a
1943 VIRTIO_CONSOLE_PORT_READY control message is sent to the host
1944 for that port so the host can let us know of any additional
1945 configuration options set for that port.
1946
1947 The receiveq for each port is populated with one or more
1948 receive buffers.
1949
1950 Device Operation
1951
1952 For output, a buffer containing the characters is placed in the
1953 port's transmitq.[footnote:
1954Because this is high importance and low bandwidth, the current
1955Linux implementation polls for the buffer to be used, rather than
1956waiting for an interrupt, simplifying the implementation
1957significantly. However, for generic serial ports with the
1958O_NONBLOCK flag set, the polling limitation is relaxed and the
1959consumed buffers are freed upon the next write or poll call or
1960when a port is closed or hot-unplugged.
1961]
1962
1963 When a buffer is used in the receiveq (signalled by an
1964 interrupt), the contents is the input to the port associated
1965 with the virtqueue for which the notification was received.
1966
1967 If the driver negotiated the VIRTIO_CONSOLE_F_SIZE feature, a
1968 configuration change interrupt may occur. The updated size can
1969 be read from the configuration fields.
1970
1971 If the driver negotiated the VIRTIO_CONSOLE_F_MULTIPORT
1972 feature, active ports are announced by the host using the
1973 VIRTIO_CONSOLE_PORT_ADD control message. The same message is
1974 used for port hot-plug as well.
1975
1976 If the host specified a port `name', a sysfs attribute is
1977 created with the name filled in, so that udev rules can be
1978 written that can create a symlink from the port's name to the
1979 char device for port discovery by applications in the guest.
1980
1981 Changes to ports' state are effected by control messages.
1982 Appropriate action is taken on the port indicated in the
1983 control message. The layout of the structure of the control
1984 buffer and the events associated are:struct virtio_console_control {
1985
1986 uint32_t id; /* Port number */
1987
1988 uint16_t event; /* The kind of control event */
1989
1990 uint16_t value; /* Extra information for the event */
1991
1992};
1993
1994
1995
1996/* Some events for the internal messages (control packets) */
1997
1998
1999
2000#define VIRTIO_CONSOLE_DEVICE_READY 0
2001
2002#define VIRTIO_CONSOLE_PORT_ADD 1
2003
2004#define VIRTIO_CONSOLE_PORT_REMOVE 2
2005
2006#define VIRTIO_CONSOLE_PORT_READY 3
2007
2008#define VIRTIO_CONSOLE_CONSOLE_PORT 4
2009
2010#define VIRTIO_CONSOLE_RESIZE 5
2011
2012#define VIRTIO_CONSOLE_PORT_OPEN 6
2013
2014#define VIRTIO_CONSOLE_PORT_NAME 7
2015
2016Appendix F: Entropy Device
2017
2018The virtio entropy device supplies high-quality randomness for
2019guest use.
2020
2021 Configuration
2022
2023 Subsystem Device ID 4
2024
2025 Virtqueues 0:requestq.
2026
2027 Feature bits None currently defined
2028
2029 Device configuration layout None currently defined.
2030
2031 Device Initialization
2032
2033 The virtqueue is initialized
2034
2035 Device Operation
2036
2037When the driver requires random bytes, it places the descriptor
2038of one or more buffers in the queue. It will be completely filled
2039by random data by the device.
2040
2041Appendix G: Memory Balloon Device
2042
2043The virtio memory balloon device is a primitive device for
2044managing guest memory: the device asks for a certain amount of
2045memory, and the guest supplies it (or withdraws it, if the device
2046has more than it asks for). This allows the guest to adapt to
2047changes in allowance of underlying physical memory. If the
2048feature is negotiated, the device can also be used to communicate
2049guest memory statistics to the host.
2050
2051 Configuration
2052
2053 Subsystem Device ID 5
2054
2055 Virtqueues 0:inflateq. 1:deflateq. 2:statsq.[footnote:
2056Only if VIRTIO_BALLON_F_STATS_VQ set
2057]
2058
2059 Feature bits
2060
2061 VIRTIO_BALLOON_F_MUST_TELL_HOST (0) Host must be told before
2062 pages from the balloon are used.
2063
2064 VIRTIO_BALLOON_F_STATS_VQ (1) A virtqueue for reporting guest
2065 memory statistics is present.
2066
2067 Device configuration layout Both fields of this configuration
2068 are always available. Note that they are little endian, despite
2069 convention that device fields are guest endian:struct virtio_balloon_config {
2070
2071 u32 num_pages;
2072
2073 u32 actual;
2074
2075};
2076
2077 Device Initialization
2078
2079 The inflate and deflate virtqueues are identified.
2080
2081 If the VIRTIO_BALLOON_F_STATS_VQ feature bit is negotiated:
2082
2083 Identify the stats virtqueue.
2084
2085 Add one empty buffer to the stats virtqueue and notify the
2086 host.
2087
2088Device operation begins immediately.
2089
2090 Device Operation
2091
2092 Memory Ballooning The device is driven by the receipt of a
2093 configuration change interrupt.
2094
2095 The “num_pages” configuration field is examined. If this is
2096 greater than the “actual” number of pages, memory must be given
2097 to the balloon. If it is less than the “actual” number of
2098 pages, memory may be taken back from the balloon for general
2099 use.
2100
2101 To supply memory to the balloon (aka. inflate):
2102
2103 The driver constructs an array of addresses of unused memory
2104 pages. These addresses are divided by 4096[footnote:
2105This is historical, and independent of the guest page size
2106] and the descriptor describing the resulting 32-bit array is
2107 added to the inflateq.
2108
2109 To remove memory from the balloon (aka. deflate):
2110
2111 The driver constructs an array of addresses of memory pages it
2112 has previously given to the balloon, as described above. This
2113 descriptor is added to the deflateq.
2114
2115 If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is set, the
2116 guest may not use these requested pages until that descriptor
2117 in the deflateq has been used by the device.
2118
2119 Otherwise, the guest may begin to re-use pages previously given
2120 to the balloon before the device has acknowledged their
2121 withdrawl. [footnote:
2122In this case, deflation advice is merely a courtesy
2123]
2124
2125 In either case, once the device has completed the inflation or
2126 deflation, the “actual” field of the configuration should be
2127 updated to reflect the new number of pages in the balloon.[footnote:
2128As updates to configuration space are not atomic, this field
2129isn't particularly reliable, but can be used to diagnose buggy
2130guests.
2131]
2132
2133 Memory Statistics
2134
2135The stats virtqueue is atypical because communication is driven
2136by the device (not the driver). The channel becomes active at
2137driver initialization time when the driver adds an empty buffer
2138and notifies the device. A request for memory statistics proceeds
2139as follows:
2140
2141 The device pushes the buffer onto the used ring and sends an
2142 interrupt.
2143
2144 The driver pops the used buffer and discards it.
2145
2146 The driver collects memory statistics and writes them into a
2147 new buffer.
2148
2149 The driver adds the buffer to the virtqueue and notifies the
2150 device.
2151
2152 The device pops the buffer (retaining it to initiate a
2153 subsequent request) and consumes the statistics.
2154
2155 Memory Statistics Format Each statistic consists of a 16 bit
2156 tag and a 64 bit value. Both quantities are represented in the
2157 native endian of the guest. All statistics are optional and the
2158 driver may choose which ones to supply. To guarantee backwards
2159 compatibility, unsupported statistics should be omitted.
2160
2161 struct virtio_balloon_stat {
2162
2163#define VIRTIO_BALLOON_S_SWAP_IN 0
2164
2165#define VIRTIO_BALLOON_S_SWAP_OUT 1
2166
2167#define VIRTIO_BALLOON_S_MAJFLT 2
2168
2169#define VIRTIO_BALLOON_S_MINFLT 3
2170
2171#define VIRTIO_BALLOON_S_MEMFREE 4
2172
2173#define VIRTIO_BALLOON_S_MEMTOT 5
2174
2175 u16 tag;
2176
2177 u64 val;
2178
2179} __attribute__((packed));
2180
2181 Tags
2182
2183 VIRTIO_BALLOON_S_SWAP_IN The amount of memory that has been
2184 swapped in (in bytes).
2185
2186 VIRTIO_BALLOON_S_SWAP_OUT The amount of memory that has been
2187 swapped out to disk (in bytes).
2188
2189 VIRTIO_BALLOON_S_MAJFLT The number of major page faults that
2190 have occurred.
2191
2192 VIRTIO_BALLOON_S_MINFLT The number of minor page faults that
2193 have occurred.
2194
2195 VIRTIO_BALLOON_S_MEMFREE The amount of memory not being used
2196 for any purpose (in bytes).
2197
2198 VIRTIO_BALLOON_S_MEMTOT The total amount of memory available
2199 (in bytes).
2200
diff --git a/MAINTAINERS b/MAINTAINERS
index 1d445f57298..069ee3b5c65 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4971,7 +4971,7 @@ M: Paul Mackerras <paulus@samba.org>
4971M: Ingo Molnar <mingo@elte.hu> 4971M: Ingo Molnar <mingo@elte.hu>
4972M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> 4972M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
4973S: Supported 4973S: Supported
4974F: kernel/perf_event*.c 4974F: kernel/events/*
4975F: include/linux/perf_event.h 4975F: include/linux/perf_event.h
4976F: arch/*/kernel/perf_event*.c 4976F: arch/*/kernel/perf_event*.c
4977F: arch/*/kernel/*/perf_event*.c 4977F: arch/*/kernel/*/perf_event*.c
diff --git a/Makefile b/Makefile
index 3241d41dfbf..788511f86a6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
1VERSION = 3 1VERSION = 3
2PATCHLEVEL = 1 2PATCHLEVEL = 1
3SUBLEVEL = 0 3SUBLEVEL = 0
4EXTRAVERSION = -rc2 4EXTRAVERSION = -rc3
5NAME = Wet Seal 5NAME = "Divemaster Edition"
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
8# To see a list of typical targets execute "make help" 8# To see a list of typical targets execute "make help"
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 12485471495..3ff7785b3be 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -162,7 +162,6 @@ config IA64_GENERIC
162 select ACPI_NUMA 162 select ACPI_NUMA
163 select SWIOTLB 163 select SWIOTLB
164 select PCI_MSI 164 select PCI_MSI
165 select DMAR
166 help 165 help
167 This selects the system type of your hardware. A "generic" kernel 166 This selects the system type of your hardware. A "generic" kernel
168 will run on any supported IA-64 system. However, if you configure 167 will run on any supported IA-64 system. However, if you configure
diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig
index 1d7bca0a396..0e5cd1405e0 100644
--- a/arch/ia64/configs/generic_defconfig
+++ b/arch/ia64/configs/generic_defconfig
@@ -234,3 +234,4 @@ CONFIG_CRYPTO_MD5=y
234# CONFIG_CRYPTO_ANSI_CPRNG is not set 234# CONFIG_CRYPTO_ANSI_CPRNG is not set
235CONFIG_CRC_T10DIF=y 235CONFIG_CRC_T10DIF=y
236CONFIG_MISC_DEVICES=y 236CONFIG_MISC_DEVICES=y
237CONFIG_DMAR=y
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 42c67beadca..1a6f20d4e7e 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -55,6 +55,7 @@ config SPARC64
55 select PERF_USE_VMALLOC 55 select PERF_USE_VMALLOC
56 select IRQ_PREFLOW_FASTEOI 56 select IRQ_PREFLOW_FASTEOI
57 select ARCH_HAVE_NMI_SAFE_CMPXCHG 57 select ARCH_HAVE_NMI_SAFE_CMPXCHG
58 select HAVE_C_RECORDMCOUNT
58 59
59config ARCH_DEFCONFIG 60config ARCH_DEFCONFIG
60 string 61 string
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index 5f5b8bf3f50..bcc98fc3528 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -131,6 +131,15 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
131 *(volatile __u32 *)&lp->lock = ~0U; 131 *(volatile __u32 *)&lp->lock = ~0U;
132} 132}
133 133
134static void inline arch_write_unlock(arch_rwlock_t *lock)
135{
136 __asm__ __volatile__(
137" st %%g0, [%0]"
138 : /* no outputs */
139 : "r" (lock)
140 : "memory");
141}
142
134static inline int arch_write_trylock(arch_rwlock_t *rw) 143static inline int arch_write_trylock(arch_rwlock_t *rw)
135{ 144{
136 unsigned int val; 145 unsigned int val;
@@ -175,8 +184,6 @@ static inline int __arch_read_trylock(arch_rwlock_t *rw)
175 res; \ 184 res; \
176}) 185})
177 186
178#define arch_write_unlock(rw) do { (rw)->lock = 0; } while(0)
179
180#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 187#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
181#define arch_read_lock_flags(rw, flags) arch_read_lock(rw) 188#define arch_read_lock_flags(rw, flags) arch_read_lock(rw)
182#define arch_write_lock_flags(rw, flags) arch_write_lock(rw) 189#define arch_write_lock_flags(rw, flags) arch_write_lock(rw)
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 073936a8b27..96891769497 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -210,14 +210,8 @@ static int inline arch_write_trylock(arch_rwlock_t *lock)
210 return result; 210 return result;
211} 211}
212 212
213#define arch_read_lock(p) arch_read_lock(p)
214#define arch_read_lock_flags(p, f) arch_read_lock(p) 213#define arch_read_lock_flags(p, f) arch_read_lock(p)
215#define arch_read_trylock(p) arch_read_trylock(p)
216#define arch_read_unlock(p) arch_read_unlock(p)
217#define arch_write_lock(p) arch_write_lock(p)
218#define arch_write_lock_flags(p, f) arch_write_lock(p) 214#define arch_write_lock_flags(p, f) arch_write_lock(p)
219#define arch_write_unlock(p) arch_write_unlock(p)
220#define arch_write_trylock(p) arch_write_trylock(p)
221 215
222#define arch_read_can_lock(rw) (!((rw)->lock & 0x80000000UL)) 216#define arch_read_can_lock(rw) (!((rw)->lock & 0x80000000UL))
223#define arch_write_can_lock(rw) (!(rw)->lock) 217#define arch_write_can_lock(rw) (!(rw)->lock)
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index a19f0419547..1aaf8c180be 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -352,8 +352,8 @@ int __init pcic_probe(void)
352 strcpy(pbm->prom_name, namebuf); 352 strcpy(pbm->prom_name, namebuf);
353 353
354 { 354 {
355 extern volatile int t_nmi[1]; 355 extern volatile int t_nmi[4];
356 extern int pcic_nmi_trap_patch[1]; 356 extern int pcic_nmi_trap_patch[4];
357 357
358 t_nmi[0] = pcic_nmi_trap_patch[0]; 358 t_nmi[0] = pcic_nmi_trap_patch[0];
359 t_nmi[1] = pcic_nmi_trap_patch[1]; 359 t_nmi[1] = pcic_nmi_trap_patch[1];
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 64a619d47d3..7ff4669580c 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -39,7 +39,7 @@ typedef struct xpaddr {
39 ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) 39 ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
40 40
41extern unsigned long *machine_to_phys_mapping; 41extern unsigned long *machine_to_phys_mapping;
42extern unsigned int machine_to_phys_order; 42extern unsigned long machine_to_phys_nr;
43 43
44extern unsigned long get_phys_to_machine(unsigned long pfn); 44extern unsigned long get_phys_to_machine(unsigned long pfn);
45extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); 45extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
@@ -87,7 +87,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
87 if (xen_feature(XENFEAT_auto_translated_physmap)) 87 if (xen_feature(XENFEAT_auto_translated_physmap))
88 return mfn; 88 return mfn;
89 89
90 if (unlikely((mfn >> machine_to_phys_order) != 0)) { 90 if (unlikely(mfn >= machine_to_phys_nr)) {
91 pfn = ~0; 91 pfn = ~0;
92 goto try_override; 92 goto try_override;
93 } 93 }
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 988724b236b..ff5790d8e99 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -22,6 +22,8 @@ config KVM
22 depends on HAVE_KVM 22 depends on HAVE_KVM
23 # for device assignment: 23 # for device assignment:
24 depends on PCI 24 depends on PCI
25 # for TASKSTATS/TASK_DELAY_ACCT:
26 depends on NET
25 select PREEMPT_NOTIFIERS 27 select PREEMPT_NOTIFIERS
26 select MMU_NOTIFIER 28 select MMU_NOTIFIER
27 select ANON_INODES 29 select ANON_INODES
@@ -31,6 +33,7 @@ config KVM
31 select KVM_ASYNC_PF 33 select KVM_ASYNC_PF
32 select USER_RETURN_NOTIFIER 34 select USER_RETURN_NOTIFIER
33 select KVM_MMIO 35 select KVM_MMIO
36 select TASKSTATS
34 select TASK_DELAY_ACCT 37 select TASK_DELAY_ACCT
35 ---help--- 38 ---help---
36 Support hosting fully virtualized guest machines using hardware 39 Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 247aae3dc00..0d17c8c50ac 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -17,6 +17,7 @@
17#include <asm/traps.h> /* dotraplinkage, ... */ 17#include <asm/traps.h> /* dotraplinkage, ... */
18#include <asm/pgalloc.h> /* pgd_*(), ... */ 18#include <asm/pgalloc.h> /* pgd_*(), ... */
19#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ 19#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
20#include <asm/vsyscall.h>
20 21
21/* 22/*
22 * Page fault error code bits: 23 * Page fault error code bits:
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index ae3cb23cd89..c95330267f0 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -360,6 +360,15 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
360 } 360 }
361 } 361 }
362 362
363 /* After the PCI-E bus has been walked and all devices discovered,
364 * configure any settings of the fabric that might be necessary.
365 */
366 if (bus) {
367 struct pci_bus *child;
368 list_for_each_entry(child, &bus->children, node)
369 pcie_bus_configure_settings(child, child->self->pcie_mpss);
370 }
371
363 if (!bus) 372 if (!bus)
364 kfree(sd); 373 kfree(sd);
365 374
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3326204e251..add2c2d729c 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -15,7 +15,7 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
15 grant-table.o suspend.o platform-pci-unplug.o \ 15 grant-table.o suspend.o platform-pci-unplug.o \
16 p2m.o 16 p2m.o
17 17
18obj-$(CONFIG_FTRACE) += trace.o 18obj-$(CONFIG_EVENT_TRACING) += trace.o
19 19
20obj-$(CONFIG_SMP) += smp.o 20obj-$(CONFIG_SMP) += smp.o
21obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o 21obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e2345af01af..2d69617950f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(xen_domain_type);
77 77
78unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; 78unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
79EXPORT_SYMBOL(machine_to_phys_mapping); 79EXPORT_SYMBOL(machine_to_phys_mapping);
80unsigned int machine_to_phys_order; 80unsigned long machine_to_phys_nr;
81EXPORT_SYMBOL(machine_to_phys_order); 81EXPORT_SYMBOL(machine_to_phys_nr);
82 82
83struct start_info *xen_start_info; 83struct start_info *xen_start_info;
84EXPORT_SYMBOL_GPL(xen_start_info); 84EXPORT_SYMBOL_GPL(xen_start_info);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 8cce339db5e..20a61427506 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1713,15 +1713,19 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1713void __init xen_setup_machphys_mapping(void) 1713void __init xen_setup_machphys_mapping(void)
1714{ 1714{
1715 struct xen_machphys_mapping mapping; 1715 struct xen_machphys_mapping mapping;
1716 unsigned long machine_to_phys_nr_ents;
1717 1716
1718 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { 1717 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
1719 machine_to_phys_mapping = (unsigned long *)mapping.v_start; 1718 machine_to_phys_mapping = (unsigned long *)mapping.v_start;
1720 machine_to_phys_nr_ents = mapping.max_mfn + 1; 1719 machine_to_phys_nr = mapping.max_mfn + 1;
1721 } else { 1720 } else {
1722 machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; 1721 machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
1723 } 1722 }
1724 machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); 1723#ifdef CONFIG_X86_32
1724 if ((machine_to_phys_mapping + machine_to_phys_nr)
1725 < machine_to_phys_mapping)
1726 machine_to_phys_nr = (unsigned long *)NULL
1727 - machine_to_phys_mapping;
1728#endif
1725} 1729}
1726 1730
1727#ifdef CONFIG_X86_64 1731#ifdef CONFIG_X86_64
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index b4533a86d7e..e79dbb95482 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -521,8 +521,6 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
521 native_smp_prepare_cpus(max_cpus); 521 native_smp_prepare_cpus(max_cpus);
522 WARN_ON(xen_smp_intr_init(0)); 522 WARN_ON(xen_smp_intr_init(0));
523 523
524 if (!xen_have_vector_callback)
525 return;
526 xen_init_lock_cpu(0); 524 xen_init_lock_cpu(0);
527 xen_init_spinlocks(); 525 xen_init_spinlocks();
528} 526}
@@ -546,6 +544,8 @@ static void xen_hvm_cpu_die(unsigned int cpu)
546 544
547void __init xen_hvm_smp_init(void) 545void __init xen_hvm_smp_init(void)
548{ 546{
547 if (!xen_have_vector_callback)
548 return;
549 smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; 549 smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
550 smp_ops.smp_send_reschedule = xen_smp_send_reschedule; 550 smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
551 smp_ops.cpu_up = xen_hvm_cpu_up; 551 smp_ops.cpu_up = xen_hvm_cpu_up;
diff --git a/block/Kconfig b/block/Kconfig
index 60be1e0455d..e97934eecec 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -65,6 +65,16 @@ config BLK_DEV_BSG
65 65
66 If unsure, say Y. 66 If unsure, say Y.
67 67
68config BLK_DEV_BSGLIB
69 bool "Block layer SG support v4 helper lib"
70 default n
71 select BLK_DEV_BSG
72 help
73 Subsystems will normally enable this if needed. Users will not
74 normally need to manually enable this.
75
76 If unsure, say N.
77
68config BLK_DEV_INTEGRITY 78config BLK_DEV_INTEGRITY
69 bool "Block layer data integrity support" 79 bool "Block layer data integrity support"
70 ---help--- 80 ---help---
diff --git a/block/Makefile b/block/Makefile
index 0fec4b3fab5..514c6e4f427 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
8 blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o 8 blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
9 9
10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
11obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
11obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o 12obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
12obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o 13obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
13obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 14obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c
index b627558c461..90e1ffdeb41 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1702,6 +1702,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1702int blk_insert_cloned_request(struct request_queue *q, struct request *rq) 1702int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1703{ 1703{
1704 unsigned long flags; 1704 unsigned long flags;
1705 int where = ELEVATOR_INSERT_BACK;
1705 1706
1706 if (blk_rq_check_limits(q, rq)) 1707 if (blk_rq_check_limits(q, rq))
1707 return -EIO; 1708 return -EIO;
@@ -1718,7 +1719,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1718 */ 1719 */
1719 BUG_ON(blk_queued_rq(rq)); 1720 BUG_ON(blk_queued_rq(rq));
1720 1721
1721 add_acct_request(q, rq, ELEVATOR_INSERT_BACK); 1722 if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
1723 where = ELEVATOR_INSERT_FLUSH;
1724
1725 add_acct_request(q, rq, where);
1722 spin_unlock_irqrestore(q->queue_lock, flags); 1726 spin_unlock_irqrestore(q->queue_lock, flags);
1723 1727
1724 return 0; 1728 return 0;
@@ -2275,7 +2279,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
2275 * %false - we are done with this request 2279 * %false - we are done with this request
2276 * %true - still buffers pending for this request 2280 * %true - still buffers pending for this request
2277 **/ 2281 **/
2278static bool __blk_end_bidi_request(struct request *rq, int error, 2282bool __blk_end_bidi_request(struct request *rq, int error,
2279 unsigned int nr_bytes, unsigned int bidi_bytes) 2283 unsigned int nr_bytes, unsigned int bidi_bytes)
2280{ 2284{
2281 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2285 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
diff --git a/block/blk-flush.c b/block/blk-flush.c
index bb21e4c36f7..491eb30a242 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
95{ 95{
96 unsigned int policy = 0; 96 unsigned int policy = 0;
97 97
98 if (blk_rq_sectors(rq))
99 policy |= REQ_FSEQ_DATA;
100
98 if (fflags & REQ_FLUSH) { 101 if (fflags & REQ_FLUSH) {
99 if (rq->cmd_flags & REQ_FLUSH) 102 if (rq->cmd_flags & REQ_FLUSH)
100 policy |= REQ_FSEQ_PREFLUSH; 103 policy |= REQ_FSEQ_PREFLUSH;
101 if (blk_rq_sectors(rq))
102 policy |= REQ_FSEQ_DATA;
103 if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) 104 if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
104 policy |= REQ_FSEQ_POSTFLUSH; 105 policy |= REQ_FSEQ_POSTFLUSH;
105 } 106 }
@@ -122,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq)
122 123
123 /* make @rq a normal request */ 124 /* make @rq a normal request */
124 rq->cmd_flags &= ~REQ_FLUSH_SEQ; 125 rq->cmd_flags &= ~REQ_FLUSH_SEQ;
125 rq->end_io = NULL; 126 rq->end_io = rq->flush.saved_end_io;
126} 127}
127 128
128/** 129/**
@@ -300,9 +301,6 @@ void blk_insert_flush(struct request *rq)
300 unsigned int fflags = q->flush_flags; /* may change, cache */ 301 unsigned int fflags = q->flush_flags; /* may change, cache */
301 unsigned int policy = blk_flush_policy(fflags, rq); 302 unsigned int policy = blk_flush_policy(fflags, rq);
302 303
303 BUG_ON(rq->end_io);
304 BUG_ON(!rq->bio || rq->bio != rq->biotail);
305
306 /* 304 /*
307 * @policy now records what operations need to be done. Adjust 305 * @policy now records what operations need to be done. Adjust
308 * REQ_FLUSH and FUA for the driver. 306 * REQ_FLUSH and FUA for the driver.
@@ -312,6 +310,19 @@ void blk_insert_flush(struct request *rq)
312 rq->cmd_flags &= ~REQ_FUA; 310 rq->cmd_flags &= ~REQ_FUA;
313 311
314 /* 312 /*
313 * An empty flush handed down from a stacking driver may
314 * translate into nothing if the underlying device does not
315 * advertise a write-back cache. In this case, simply
316 * complete the request.
317 */
318 if (!policy) {
319 __blk_end_bidi_request(rq, 0, 0, 0);
320 return;
321 }
322
323 BUG_ON(!rq->bio || rq->bio != rq->biotail);
324
325 /*
315 * If there's data but flush is not necessary, the request can be 326 * If there's data but flush is not necessary, the request can be
316 * processed directly without going through flush machinery. Queue 327 * processed directly without going through flush machinery. Queue
317 * for normal execution. 328 * for normal execution.
@@ -319,6 +330,7 @@ void blk_insert_flush(struct request *rq)
319 if ((policy & REQ_FSEQ_DATA) && 330 if ((policy & REQ_FSEQ_DATA) &&
320 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { 331 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
321 list_add_tail(&rq->queuelist, &q->queue_head); 332 list_add_tail(&rq->queuelist, &q->queue_head);
333 blk_run_queue_async(q);
322 return; 334 return;
323 } 335 }
324 336
@@ -329,6 +341,7 @@ void blk_insert_flush(struct request *rq)
329 memset(&rq->flush, 0, sizeof(rq->flush)); 341 memset(&rq->flush, 0, sizeof(rq->flush));
330 INIT_LIST_HEAD(&rq->flush.list); 342 INIT_LIST_HEAD(&rq->flush.list);
331 rq->cmd_flags |= REQ_FLUSH_SEQ; 343 rq->cmd_flags |= REQ_FLUSH_SEQ;
344 rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
332 rq->end_io = flush_data_end_io; 345 rq->end_io = flush_data_end_io;
333 346
334 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); 347 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 475fab809a8..58340d0cb23 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -124,6 +124,14 @@ void __blk_complete_request(struct request *req)
124 } else 124 } else
125 ccpu = cpu; 125 ccpu = cpu;
126 126
127 /*
128 * If current CPU and requested CPU are in the same group, running
129 * softirq in current CPU. One might concern this is just like
130 * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
131 * running in interrupt handler, and currently I/O controller doesn't
132 * support multiple interrupts, so current CPU is unique actually. This
133 * avoids IPI sending from current CPU to the first CPU of a group.
134 */
127 if (ccpu == cpu || ccpu == group_cpu) { 135 if (ccpu == cpu || ccpu == group_cpu) {
128 struct list_head *list; 136 struct list_head *list;
129do_local: 137do_local:
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f6a79412050..a19f58c6fc3 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
746static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) 746static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
747{ 747{
748 bool rw = bio_data_dir(bio); 748 bool rw = bio_data_dir(bio);
749 bool sync = bio->bi_rw & REQ_SYNC; 749 bool sync = rw_is_sync(bio->bi_rw);
750 750
751 /* Charge the bio to the group */ 751 /* Charge the bio to the group */
752 tg->bytes_disp[rw] += bio->bi_size; 752 tg->bytes_disp[rw] += bio->bi_size;
@@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
1150 1150
1151 if (tg_no_rule_group(tg, rw)) { 1151 if (tg_no_rule_group(tg, rw)) {
1152 blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, 1152 blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
1153 rw, bio->bi_rw & REQ_SYNC); 1153 rw, rw_is_sync(bio->bi_rw));
1154 rcu_read_unlock(); 1154 rcu_read_unlock();
1155 return 0; 1155 return 0;
1156 } 1156 }
diff --git a/block/blk.h b/block/blk.h
index d6586287adc..20b900a377c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
17 struct bio *bio); 17 struct bio *bio);
18void blk_dequeue_request(struct request *rq); 18void blk_dequeue_request(struct request *rq);
19void __blk_queue_free_tags(struct request_queue *q); 19void __blk_queue_free_tags(struct request_queue *q);
20bool __blk_end_bidi_request(struct request *rq, int error,
21 unsigned int nr_bytes, unsigned int bidi_bytes);
20 22
21void blk_rq_timed_out_timer(unsigned long data); 23void blk_rq_timed_out_timer(unsigned long data);
22void blk_delete_timer(struct request *); 24void blk_delete_timer(struct request *);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
new file mode 100644
index 00000000000..6690e6e4103
--- /dev/null
+++ b/block/bsg-lib.c
@@ -0,0 +1,298 @@
1/*
2 * BSG helper library
3 *
4 * Copyright (C) 2008 James Smart, Emulex Corporation
5 * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
6 * Copyright (C) 2011 Mike Christie
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 */
23#include <linux/slab.h>
24#include <linux/blkdev.h>
25#include <linux/delay.h>
26#include <linux/scatterlist.h>
27#include <linux/bsg-lib.h>
28#include <linux/module.h>
29#include <scsi/scsi_cmnd.h>
30
31/**
32 * bsg_destroy_job - routine to teardown/delete a bsg job
33 * @job: bsg_job that is to be torn down
34 */
35static void bsg_destroy_job(struct bsg_job *job)
36{
37 put_device(job->dev); /* release reference for the request */
38
39 kfree(job->request_payload.sg_list);
40 kfree(job->reply_payload.sg_list);
41 kfree(job);
42}
43
44/**
45 * bsg_job_done - completion routine for bsg requests
46 * @job: bsg_job that is complete
47 * @result: job reply result
48 * @reply_payload_rcv_len: length of payload recvd
49 *
50 * The LLD should call this when the bsg job has completed.
51 */
52void bsg_job_done(struct bsg_job *job, int result,
53 unsigned int reply_payload_rcv_len)
54{
55 struct request *req = job->req;
56 struct request *rsp = req->next_rq;
57 int err;
58
59 err = job->req->errors = result;
60 if (err < 0)
61 /* we're only returning the result field in the reply */
62 job->req->sense_len = sizeof(u32);
63 else
64 job->req->sense_len = job->reply_len;
65 /* we assume all request payload was transferred, residual == 0 */
66 req->resid_len = 0;
67
68 if (rsp) {
69 WARN_ON(reply_payload_rcv_len > rsp->resid_len);
70
71 /* set reply (bidi) residual */
72 rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len);
73 }
74 blk_complete_request(req);
75}
76EXPORT_SYMBOL_GPL(bsg_job_done);
77
78/**
79 * bsg_softirq_done - softirq done routine for destroying the bsg requests
80 * @rq: BSG request that holds the job to be destroyed
81 */
82static void bsg_softirq_done(struct request *rq)
83{
84 struct bsg_job *job = rq->special;
85
86 blk_end_request_all(rq, rq->errors);
87 bsg_destroy_job(job);
88}
89
90static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
91{
92 size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
93
94 BUG_ON(!req->nr_phys_segments);
95
96 buf->sg_list = kzalloc(sz, GFP_KERNEL);
97 if (!buf->sg_list)
98 return -ENOMEM;
99 sg_init_table(buf->sg_list, req->nr_phys_segments);
100 buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
101 buf->payload_len = blk_rq_bytes(req);
102 return 0;
103}
104
105/**
106 * bsg_create_job - create the bsg_job structure for the bsg request
107 * @dev: device that is being sent the bsg request
108 * @req: BSG request that needs a job structure
109 */
110static int bsg_create_job(struct device *dev, struct request *req)
111{
112 struct request *rsp = req->next_rq;
113 struct request_queue *q = req->q;
114 struct bsg_job *job;
115 int ret;
116
117 BUG_ON(req->special);
118
119 job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
120 if (!job)
121 return -ENOMEM;
122
123 req->special = job;
124 job->req = req;
125 if (q->bsg_job_size)
126 job->dd_data = (void *)&job[1];
127 job->request = req->cmd;
128 job->request_len = req->cmd_len;
129 job->reply = req->sense;
130 job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
131 * allocated */
132 if (req->bio) {
133 ret = bsg_map_buffer(&job->request_payload, req);
134 if (ret)
135 goto failjob_rls_job;
136 }
137 if (rsp && rsp->bio) {
138 ret = bsg_map_buffer(&job->reply_payload, rsp);
139 if (ret)
140 goto failjob_rls_rqst_payload;
141 }
142 job->dev = dev;
143 /* take a reference for the request */
144 get_device(job->dev);
145 return 0;
146
147failjob_rls_rqst_payload:
148 kfree(job->request_payload.sg_list);
149failjob_rls_job:
150 kfree(job);
151 return -ENOMEM;
152}
153
154/*
155 * bsg_goose_queue - restart queue in case it was stopped
156 * @q: request q to be restarted
157 */
158void bsg_goose_queue(struct request_queue *q)
159{
160 if (!q)
161 return;
162
163 blk_run_queue_async(q);
164}
165EXPORT_SYMBOL_GPL(bsg_goose_queue);
166
167/**
168 * bsg_request_fn - generic handler for bsg requests
169 * @q: request queue to manage
170 *
171 * On error the create_bsg_job function should return a -Exyz error value
172 * that will be set to the req->errors.
173 *
174 * Drivers/subsys should pass this to the queue init function.
175 */
176void bsg_request_fn(struct request_queue *q)
177{
178 struct device *dev = q->queuedata;
179 struct request *req;
180 struct bsg_job *job;
181 int ret;
182
183 if (!get_device(dev))
184 return;
185
186 while (1) {
187 req = blk_fetch_request(q);
188 if (!req)
189 break;
190 spin_unlock_irq(q->queue_lock);
191
192 ret = bsg_create_job(dev, req);
193 if (ret) {
194 req->errors = ret;
195 blk_end_request_all(req, ret);
196 spin_lock_irq(q->queue_lock);
197 continue;
198 }
199
200 job = req->special;
201 ret = q->bsg_job_fn(job);
202 spin_lock_irq(q->queue_lock);
203 if (ret)
204 break;
205 }
206
207 spin_unlock_irq(q->queue_lock);
208 put_device(dev);
209 spin_lock_irq(q->queue_lock);
210}
211EXPORT_SYMBOL_GPL(bsg_request_fn);
212
213/**
214 * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
215 * @dev: device to attach bsg device to
216 * @q: request queue setup by caller
217 * @name: device to give bsg device
218 * @job_fn: bsg job handler
219 * @dd_job_size: size of LLD data needed for each job
220 *
221 * The caller should have setup the reuqest queue with bsg_request_fn
222 * as the request_fn.
223 */
224int bsg_setup_queue(struct device *dev, struct request_queue *q,
225 char *name, bsg_job_fn *job_fn, int dd_job_size)
226{
227 int ret;
228
229 q->queuedata = dev;
230 q->bsg_job_size = dd_job_size;
231 q->bsg_job_fn = job_fn;
232 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
233 blk_queue_softirq_done(q, bsg_softirq_done);
234 blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
235
236 ret = bsg_register_queue(q, dev, name, NULL);
237 if (ret) {
238 printk(KERN_ERR "%s: bsg interface failed to "
239 "initialize - register queue\n", dev->kobj.name);
240 return ret;
241 }
242
243 return 0;
244}
245EXPORT_SYMBOL_GPL(bsg_setup_queue);
246
247/**
248 * bsg_remove_queue - Deletes the bsg dev from the q
249 * @q: the request_queue that is to be torn down.
250 *
251 * Notes:
252 * Before unregistering the queue empty any requests that are blocked
253 */
254void bsg_remove_queue(struct request_queue *q)
255{
256 struct request *req; /* block request */
257 int counts; /* totals for request_list count and starved */
258
259 if (!q)
260 return;
261
262 /* Stop taking in new requests */
263 spin_lock_irq(q->queue_lock);
264 blk_stop_queue(q);
265
266 /* drain all requests in the queue */
267 while (1) {
268 /* need the lock to fetch a request
269 * this may fetch the same reqeust as the previous pass
270 */
271 req = blk_fetch_request(q);
272 /* save requests in use and starved */
273 counts = q->rq.count[0] + q->rq.count[1] +
274 q->rq.starved[0] + q->rq.starved[1];
275 spin_unlock_irq(q->queue_lock);
276 /* any requests still outstanding? */
277 if (counts == 0)
278 break;
279
280 /* This may be the same req as the previous iteration,
281 * always send the blk_end_request_all after a prefetch.
282 * It is not okay to not end the request because the
283 * prefetch started the request.
284 */
285 if (req) {
286 /* return -ENXIO to indicate that this queue is
287 * going away
288 */
289 req->errors = -ENXIO;
290 blk_end_request_all(req, -ENXIO);
291 }
292
293 msleep(200); /* allow bsg to possibly finish */
294 spin_lock_irq(q->queue_lock);
295 }
296 bsg_unregister_queue(q);
297}
298EXPORT_SYMBOL_GPL(bsg_remove_queue);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1f96ad6254f..a33bd4377c6 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -130,6 +130,8 @@ struct cfq_queue {
130 unsigned long slice_end; 130 unsigned long slice_end;
131 long slice_resid; 131 long slice_resid;
132 132
133 /* pending metadata requests */
134 int meta_pending;
133 /* number of requests that are on the dispatch list or inside driver */ 135 /* number of requests that are on the dispatch list or inside driver */
134 int dispatched; 136 int dispatched;
135 137
@@ -682,6 +684,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
682 if (rq_is_sync(rq1) != rq_is_sync(rq2)) 684 if (rq_is_sync(rq1) != rq_is_sync(rq2))
683 return rq_is_sync(rq1) ? rq1 : rq2; 685 return rq_is_sync(rq1) ? rq1 : rq2;
684 686
687 if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
688 return rq1->cmd_flags & REQ_META ? rq1 : rq2;
689
685 s1 = blk_rq_pos(rq1); 690 s1 = blk_rq_pos(rq1);
686 s2 = blk_rq_pos(rq2); 691 s2 = blk_rq_pos(rq2);
687 692
@@ -1209,6 +1214,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
1209 1214
1210 hlist_del_init(&cfqg->cfqd_node); 1215 hlist_del_init(&cfqg->cfqd_node);
1211 1216
1217 BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
1218 cfqd->nr_blkcg_linked_grps--;
1219
1212 /* 1220 /*
1213 * Put the reference taken at the time of creation so that when all 1221 * Put the reference taken at the time of creation so that when all
1214 * queues are gone, group can be destroyed. 1222 * queues are gone, group can be destroyed.
@@ -1604,6 +1612,10 @@ static void cfq_remove_request(struct request *rq)
1604 cfqq->cfqd->rq_queued--; 1612 cfqq->cfqd->rq_queued--;
1605 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, 1613 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
1606 rq_data_dir(rq), rq_is_sync(rq)); 1614 rq_data_dir(rq), rq_is_sync(rq));
1615 if (rq->cmd_flags & REQ_META) {
1616 WARN_ON(!cfqq->meta_pending);
1617 cfqq->meta_pending--;
1618 }
1607} 1619}
1608 1620
1609static int cfq_merge(struct request_queue *q, struct request **req, 1621static int cfq_merge(struct request_queue *q, struct request **req,
@@ -3357,6 +3369,13 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3357 return true; 3369 return true;
3358 3370
3359 /* 3371 /*
3372 * So both queues are sync. Let the new request get disk time if
3373 * it's a metadata request and the current queue is doing regular IO.
3374 */
3375 if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
3376 return true;
3377
3378 /*
3360 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. 3379 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
3361 */ 3380 */
3362 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) 3381 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
@@ -3420,6 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3420 struct cfq_io_context *cic = RQ_CIC(rq); 3439 struct cfq_io_context *cic = RQ_CIC(rq);
3421 3440
3422 cfqd->rq_queued++; 3441 cfqd->rq_queued++;
3442 if (rq->cmd_flags & REQ_META)
3443 cfqq->meta_pending++;
3423 3444
3424 cfq_update_io_thinktime(cfqd, cfqq, cic); 3445 cfq_update_io_thinktime(cfqd, cfqq, cic);
3425 cfq_update_io_seektime(cfqd, cfqq, rq); 3446 cfq_update_io_seektime(cfqd, cfqq, rq);
diff --git a/block/genhd.c b/block/genhd.c
index 5cb51c55f6d..e2f67902dd0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1146,17 +1146,17 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1146 cpu = part_stat_lock(); 1146 cpu = part_stat_lock();
1147 part_round_stats(cpu, hd); 1147 part_round_stats(cpu, hd);
1148 part_stat_unlock(); 1148 part_stat_unlock();
1149 seq_printf(seqf, "%4d %7d %s %lu %lu %llu " 1149 seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
1150 "%u %lu %lu %llu %u %u %u %u\n", 1150 "%u %lu %lu %lu %u %u %u %u\n",
1151 MAJOR(part_devt(hd)), MINOR(part_devt(hd)), 1151 MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
1152 disk_name(gp, hd->partno, buf), 1152 disk_name(gp, hd->partno, buf),
1153 part_stat_read(hd, ios[READ]), 1153 part_stat_read(hd, ios[READ]),
1154 part_stat_read(hd, merges[READ]), 1154 part_stat_read(hd, merges[READ]),
1155 (unsigned long long)part_stat_read(hd, sectors[READ]), 1155 part_stat_read(hd, sectors[READ]),
1156 jiffies_to_msecs(part_stat_read(hd, ticks[READ])), 1156 jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
1157 part_stat_read(hd, ios[WRITE]), 1157 part_stat_read(hd, ios[WRITE]),
1158 part_stat_read(hd, merges[WRITE]), 1158 part_stat_read(hd, merges[WRITE]),
1159 (unsigned long long)part_stat_read(hd, sectors[WRITE]), 1159 part_stat_read(hd, sectors[WRITE]),
1160 jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), 1160 jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
1161 part_in_flight(hd), 1161 part_in_flight(hd),
1162 jiffies_to_msecs(part_stat_read(hd, io_ticks)), 1162 jiffies_to_msecs(part_stat_read(hd, io_ticks)),
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index ca3e6be44a0..5987e0ba8c2 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -468,6 +468,15 @@ config PATA_ICSIDE
468 interface card. This is not required for ICS partition support. 468 interface card. This is not required for ICS partition support.
469 If you are unsure, say N to this. 469 If you are unsure, say N to this.
470 470
471config PATA_IMX
472 tristate "PATA support for Freescale iMX"
473 depends on ARCH_MXC
474 help
475 This option enables support for the PATA host available on Freescale
476 iMX SoCs.
477
478 If unsure, say N.
479
471config PATA_IT8213 480config PATA_IT8213
472 tristate "IT8213 PATA support (Experimental)" 481 tristate "IT8213 PATA support (Experimental)"
473 depends on PCI && EXPERIMENTAL 482 depends on PCI && EXPERIMENTAL
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 8ac64e1aa05..9550d691fd1 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_PATA_HPT37X) += pata_hpt37x.o
48obj-$(CONFIG_PATA_HPT3X2N) += pata_hpt3x2n.o 48obj-$(CONFIG_PATA_HPT3X2N) += pata_hpt3x2n.o
49obj-$(CONFIG_PATA_HPT3X3) += pata_hpt3x3.o 49obj-$(CONFIG_PATA_HPT3X3) += pata_hpt3x3.o
50obj-$(CONFIG_PATA_ICSIDE) += pata_icside.o 50obj-$(CONFIG_PATA_ICSIDE) += pata_icside.o
51obj-$(CONFIG_PATA_IMX) += pata_imx.o
51obj-$(CONFIG_PATA_IT8213) += pata_it8213.o 52obj-$(CONFIG_PATA_IT8213) += pata_it8213.o
52obj-$(CONFIG_PATA_IT821X) += pata_it821x.o 53obj-$(CONFIG_PATA_IT821X) += pata_it821x.o
53obj-$(CONFIG_PATA_JMICRON) += pata_jmicron.o 54obj-$(CONFIG_PATA_JMICRON) += pata_jmicron.o
diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c
new file mode 100644
index 00000000000..ca9d9caedfa
--- /dev/null
+++ b/drivers/ata/pata_imx.c
@@ -0,0 +1,253 @@
1/*
2 * Freescale iMX PATA driver
3 *
4 * Copyright (C) 2011 Arnaud Patard <arnaud.patard@rtp-net.org>
5 *
6 * Based on pata_platform - Copyright (C) 2006 - 2007 Paul Mundt
7 *
8 * This file is subject to the terms and conditions of the GNU General Public
9 * License. See the file "COPYING" in the main directory of this archive
10 * for more details.
11 *
12 * TODO:
13 * - dmaengine support
14 * - check if timing stuff needed
15 */
16#include <linux/kernel.h>
17#include <linux/module.h>
18#include <linux/init.h>
19#include <linux/blkdev.h>
20#include <scsi/scsi_host.h>
21#include <linux/ata.h>
22#include <linux/libata.h>
23#include <linux/platform_device.h>
24#include <linux/clk.h>
25
26#define DRV_NAME "pata_imx"
27
28#define PATA_IMX_ATA_CONTROL 0x24
29#define PATA_IMX_ATA_CTRL_FIFO_RST_B (1<<7)
30#define PATA_IMX_ATA_CTRL_ATA_RST_B (1<<6)
31#define PATA_IMX_ATA_CTRL_IORDY_EN (1<<0)
32#define PATA_IMX_ATA_INT_EN 0x2C
33#define PATA_IMX_ATA_INTR_ATA_INTRQ2 (1<<3)
34#define PATA_IMX_DRIVE_DATA 0xA0
35#define PATA_IMX_DRIVE_CONTROL 0xD8
36
37struct pata_imx_priv {
38 struct clk *clk;
39 /* timings/interrupt/control regs */
40 u8 *host_regs;
41 u32 ata_ctl;
42};
43
44static int pata_imx_set_mode(struct ata_link *link, struct ata_device **unused)
45{
46 struct ata_device *dev;
47 struct ata_port *ap = link->ap;
48 struct pata_imx_priv *priv = ap->host->private_data;
49 u32 val;
50
51 ata_for_each_dev(dev, link, ENABLED) {
52 dev->pio_mode = dev->xfer_mode = XFER_PIO_0;
53 dev->xfer_shift = ATA_SHIFT_PIO;
54 dev->flags |= ATA_DFLAG_PIO;
55
56 val = __raw_readl(priv->host_regs + PATA_IMX_ATA_CONTROL);
57 if (ata_pio_need_iordy(dev))
58 val |= PATA_IMX_ATA_CTRL_IORDY_EN;
59 else
60 val &= ~PATA_IMX_ATA_CTRL_IORDY_EN;
61 __raw_writel(val, priv->host_regs + PATA_IMX_ATA_CONTROL);
62
63 ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
64 }
65 return 0;
66}
67
68static struct scsi_host_template pata_imx_sht = {
69 ATA_PIO_SHT(DRV_NAME),
70};
71
72static struct ata_port_operations pata_imx_port_ops = {
73 .inherits = &ata_sff_port_ops,
74 .sff_data_xfer = ata_sff_data_xfer_noirq,
75 .cable_detect = ata_cable_unknown,
76 .set_mode = pata_imx_set_mode,
77};
78
79static void pata_imx_setup_port(struct ata_ioports *ioaddr)
80{
81 /* Fixup the port shift for platforms that need it */
82 ioaddr->data_addr = ioaddr->cmd_addr + (ATA_REG_DATA << 2);
83 ioaddr->error_addr = ioaddr->cmd_addr + (ATA_REG_ERR << 2);
84 ioaddr->feature_addr = ioaddr->cmd_addr + (ATA_REG_FEATURE << 2);
85 ioaddr->nsect_addr = ioaddr->cmd_addr + (ATA_REG_NSECT << 2);
86 ioaddr->lbal_addr = ioaddr->cmd_addr + (ATA_REG_LBAL << 2);
87 ioaddr->lbam_addr = ioaddr->cmd_addr + (ATA_REG_LBAM << 2);
88 ioaddr->lbah_addr = ioaddr->cmd_addr + (ATA_REG_LBAH << 2);
89 ioaddr->device_addr = ioaddr->cmd_addr + (ATA_REG_DEVICE << 2);
90 ioaddr->status_addr = ioaddr->cmd_addr + (ATA_REG_STATUS << 2);
91 ioaddr->command_addr = ioaddr->cmd_addr + (ATA_REG_CMD << 2);
92}
93
94static int __devinit pata_imx_probe(struct platform_device *pdev)
95{
96 struct ata_host *host;
97 struct ata_port *ap;
98 struct pata_imx_priv *priv;
99 int irq = 0;
100 struct resource *io_res;
101
102 io_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
103 if (io_res == NULL)
104 return -EINVAL;
105
106 irq = platform_get_irq(pdev, 0);
107 if (irq <= 0)
108 return -EINVAL;
109
110 priv = devm_kzalloc(&pdev->dev,
111 sizeof(struct pata_imx_priv), GFP_KERNEL);
112 if (!priv)
113 return -ENOMEM;
114
115 priv->clk = clk_get(&pdev->dev, NULL);
116 if (IS_ERR(priv->clk)) {
117 dev_err(&pdev->dev, "Failed to get clock\n");
118 return PTR_ERR(priv->clk);
119 }
120
121 clk_enable(priv->clk);
122
123 host = ata_host_alloc(&pdev->dev, 1);
124 if (!host)
125 goto free_priv;
126
127 host->private_data = priv;
128 ap = host->ports[0];
129
130 ap->ops = &pata_imx_port_ops;
131 ap->pio_mask = ATA_PIO0;
132 ap->flags |= ATA_FLAG_SLAVE_POSS;
133
134 priv->host_regs = devm_ioremap(&pdev->dev, io_res->start,
135 resource_size(io_res));
136 if (!priv->host_regs) {
137 dev_err(&pdev->dev, "failed to map IO/CTL base\n");
138 goto free_priv;
139 }
140
141 ap->ioaddr.cmd_addr = priv->host_regs + PATA_IMX_DRIVE_DATA;
142 ap->ioaddr.ctl_addr = priv->host_regs + PATA_IMX_DRIVE_CONTROL;
143
144 ap->ioaddr.altstatus_addr = ap->ioaddr.ctl_addr;
145
146 pata_imx_setup_port(&ap->ioaddr);
147
148 ata_port_desc(ap, "cmd 0x%llx ctl 0x%llx",
149 (unsigned long long)io_res->start + PATA_IMX_DRIVE_DATA,
150 (unsigned long long)io_res->start + PATA_IMX_DRIVE_CONTROL);
151
152 /* deassert resets */
153 __raw_writel(PATA_IMX_ATA_CTRL_FIFO_RST_B |
154 PATA_IMX_ATA_CTRL_ATA_RST_B,
155 priv->host_regs + PATA_IMX_ATA_CONTROL);
156 /* enable interrupts */
157 __raw_writel(PATA_IMX_ATA_INTR_ATA_INTRQ2,
158 priv->host_regs + PATA_IMX_ATA_INT_EN);
159
160 /* activate */
161 return ata_host_activate(host, irq, ata_sff_interrupt, 0,
162 &pata_imx_sht);
163
164free_priv:
165 clk_disable(priv->clk);
166 clk_put(priv->clk);
167 return -ENOMEM;
168}
169
170static int __devexit pata_imx_remove(struct platform_device *pdev)
171{
172 struct ata_host *host = dev_get_drvdata(&pdev->dev);
173 struct pata_imx_priv *priv = host->private_data;
174
175 ata_host_detach(host);
176
177 __raw_writel(0, priv->host_regs + PATA_IMX_ATA_INT_EN);
178
179 clk_disable(priv->clk);
180 clk_put(priv->clk);
181
182 return 0;
183}
184
185#ifdef CONFIG_PM
186static int pata_imx_suspend(struct device *dev)
187{
188 struct ata_host *host = dev_get_drvdata(dev);
189 struct pata_imx_priv *priv = host->private_data;
190 int ret;
191
192 ret = ata_host_suspend(host, PMSG_SUSPEND);
193 if (!ret) {
194 __raw_writel(0, priv->host_regs + PATA_IMX_ATA_INT_EN);
195 priv->ata_ctl =
196 __raw_readl(priv->host_regs + PATA_IMX_ATA_CONTROL);
197 clk_disable(priv->clk);
198 }
199
200 return ret;
201}
202
203static int pata_imx_resume(struct device *dev)
204{
205 struct ata_host *host = dev_get_drvdata(dev);
206 struct pata_imx_priv *priv = host->private_data;
207
208 clk_enable(priv->clk);
209
210 __raw_writel(priv->ata_ctl, priv->host_regs + PATA_IMX_ATA_CONTROL);
211
212 __raw_writel(PATA_IMX_ATA_INTR_ATA_INTRQ2,
213 priv->host_regs + PATA_IMX_ATA_INT_EN);
214
215 ata_host_resume(host);
216
217 return 0;
218}
219
220static const struct dev_pm_ops pata_imx_pm_ops = {
221 .suspend = pata_imx_suspend,
222 .resume = pata_imx_resume,
223};
224#endif
225
226static struct platform_driver pata_imx_driver = {
227 .probe = pata_imx_probe,
228 .remove = __devexit_p(pata_imx_remove),
229 .driver = {
230 .name = DRV_NAME,
231 .owner = THIS_MODULE,
232#ifdef CONFIG_PM
233 .pm = &pata_imx_pm_ops,
234#endif
235 },
236};
237
238static int __init pata_imx_init(void)
239{
240 return platform_driver_register(&pata_imx_driver);
241}
242
243static void __exit pata_imx_exit(void)
244{
245 platform_driver_unregister(&pata_imx_driver);
246}
247module_init(pata_imx_init);
248module_exit(pata_imx_exit);
249
250MODULE_AUTHOR("Arnaud Patard <arnaud.patard@rtp-net.org>");
251MODULE_DESCRIPTION("low-level driver for iMX PATA");
252MODULE_LICENSE("GPL");
253MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c
index 65e4be6be22..8e9f5048a10 100644
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c
@@ -124,6 +124,17 @@ static const struct via_isa_bridge {
124 { NULL } 124 { NULL }
125}; 125};
126 126
127static const struct dmi_system_id no_atapi_dma_dmi_table[] = {
128 {
129 .ident = "AVERATEC 3200",
130 .matches = {
131 DMI_MATCH(DMI_BOARD_VENDOR, "AVERATEC"),
132 DMI_MATCH(DMI_BOARD_NAME, "3200"),
133 },
134 },
135 { }
136};
137
127struct via_port { 138struct via_port {
128 u8 cached_device; 139 u8 cached_device;
129}; 140};
@@ -355,6 +366,13 @@ static unsigned long via_mode_filter(struct ata_device *dev, unsigned long mask)
355 mask &= ~ ATA_MASK_UDMA; 366 mask &= ~ ATA_MASK_UDMA;
356 } 367 }
357 } 368 }
369
370 if (dev->class == ATA_DEV_ATAPI &&
371 dmi_check_system(no_atapi_dma_dmi_table)) {
372 ata_dev_warn(dev, "controller locks up on ATAPI DMA, forcing PIO\n");
373 mask &= ATA_MASK_PIO;
374 }
375
358 return mask; 376 return mask;
359} 377}
360 378
diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c
index 0a9a774a7e1..5c4237452f5 100644
--- a/drivers/ata/sata_dwc_460ex.c
+++ b/drivers/ata/sata_dwc_460ex.c
@@ -1329,7 +1329,7 @@ static int sata_dwc_port_start(struct ata_port *ap)
1329 dev_err(ap->dev, "%s: dma_alloc_coherent failed\n", 1329 dev_err(ap->dev, "%s: dma_alloc_coherent failed\n",
1330 __func__); 1330 __func__);
1331 err = -ENOMEM; 1331 err = -ENOMEM;
1332 goto CLEANUP; 1332 goto CLEANUP_ALLOC;
1333 } 1333 }
1334 } 1334 }
1335 1335
@@ -1349,15 +1349,13 @@ static int sata_dwc_port_start(struct ata_port *ap)
1349 /* Clear any error bits before libata starts issuing commands */ 1349 /* Clear any error bits before libata starts issuing commands */
1350 clear_serror(); 1350 clear_serror();
1351 ap->private_data = hsdevp; 1351 ap->private_data = hsdevp;
1352 dev_dbg(ap->dev, "%s: done\n", __func__);
1353 return 0;
1352 1354
1355CLEANUP_ALLOC:
1356 kfree(hsdevp);
1353CLEANUP: 1357CLEANUP:
1354 if (err) { 1358 dev_dbg(ap->dev, "%s: fail. ap->id = %d\n", __func__, ap->print_id);
1355 sata_dwc_port_stop(ap);
1356 dev_dbg(ap->dev, "%s: fail\n", __func__);
1357 } else {
1358 dev_dbg(ap->dev, "%s: done\n", __func__);
1359 }
1360
1361 return err; 1359 return err;
1362} 1360}
1363 1361
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index 98c1d780f55..9dfb40b8c2c 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -438,7 +438,7 @@ static void sil_host_intr(struct ata_port *ap, u32 bmdma2)
438 u8 status; 438 u8 status;
439 439
440 if (unlikely(bmdma2 & SIL_DMA_SATA_IRQ)) { 440 if (unlikely(bmdma2 & SIL_DMA_SATA_IRQ)) {
441 u32 serror; 441 u32 serror = 0xffffffff;
442 442
443 /* SIEN doesn't mask SATA IRQs on some 3112s. Those 443 /* SIEN doesn't mask SATA IRQs on some 3112s. Those
444 * controllers continue to assert IRQ as long as 444 * controllers continue to assert IRQ as long as
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e18566a0fed..1c374579407 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -460,6 +460,21 @@ static int pm_genpd_runtime_resume(struct device *dev)
460 return 0; 460 return 0;
461} 461}
462 462
463/**
464 * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use.
465 */
466void pm_genpd_poweroff_unused(void)
467{
468 struct generic_pm_domain *genpd;
469
470 mutex_lock(&gpd_list_lock);
471
472 list_for_each_entry(genpd, &gpd_list, gpd_list_node)
473 genpd_queue_power_off_work(genpd);
474
475 mutex_unlock(&gpd_list_lock);
476}
477
463#else 478#else
464 479
465static inline void genpd_power_off_work_fn(struct work_struct *work) {} 480static inline void genpd_power_off_work_fn(struct work_struct *work) {}
@@ -1255,18 +1270,3 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
1255 list_add(&genpd->gpd_list_node, &gpd_list); 1270 list_add(&genpd->gpd_list_node, &gpd_list);
1256 mutex_unlock(&gpd_list_lock); 1271 mutex_unlock(&gpd_list_lock);
1257} 1272}
1258
1259/**
1260 * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use.
1261 */
1262void pm_genpd_poweroff_unused(void)
1263{
1264 struct generic_pm_domain *genpd;
1265
1266 mutex_lock(&gpd_list_lock);
1267
1268 list_for_each_entry(genpd, &gpd_list, gpd_list_node)
1269 genpd_queue_power_off_work(genpd);
1270
1271 mutex_unlock(&gpd_list_lock);
1272}
diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c
index c2231ff06cb..c4f7a45cd2c 100644
--- a/drivers/base/regmap/regmap-i2c.c
+++ b/drivers/base/regmap/regmap-i2c.c
@@ -113,3 +113,4 @@ struct regmap *regmap_init_i2c(struct i2c_client *i2c,
113} 113}
114EXPORT_SYMBOL_GPL(regmap_init_i2c); 114EXPORT_SYMBOL_GPL(regmap_init_i2c);
115 115
116MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap-spi.c b/drivers/base/regmap/regmap-spi.c
index 4deba0621bc..f8396945d6e 100644
--- a/drivers/base/regmap/regmap-spi.c
+++ b/drivers/base/regmap/regmap-spi.c
@@ -13,6 +13,7 @@
13#include <linux/regmap.h> 13#include <linux/regmap.h>
14#include <linux/spi/spi.h> 14#include <linux/spi/spi.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/module.h>
16 17
17static int regmap_spi_write(struct device *dev, const void *data, size_t count) 18static int regmap_spi_write(struct device *dev, const void *data, size_t count)
18{ 19{
@@ -70,3 +71,5 @@ struct regmap *regmap_init_spi(struct spi_device *spi,
70 return regmap_init(&spi->dev, &regmap_spi, config); 71 return regmap_init(&spi->dev, &regmap_spi, config);
71} 72}
72EXPORT_SYMBOL_GPL(regmap_init_spi); 73EXPORT_SYMBOL_GPL(regmap_init_spi);
74
75MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index cf3565cae93..0eef4da1ac6 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -317,7 +317,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
317 u8[0] |= map->bus->read_flag_mask; 317 u8[0] |= map->bus->read_flag_mask;
318 318
319 ret = map->bus->read(map->dev, map->work_buf, map->format.reg_bytes, 319 ret = map->bus->read(map->dev, map->work_buf, map->format.reg_bytes,
320 val, map->format.val_bytes); 320 val, val_len);
321 if (ret != 0) 321 if (ret != 0)
322 return ret; 322 return ret;
323 323
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 717d6e4e18d..6f07ec1c2f5 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -256,6 +256,21 @@ config BLK_DEV_LOOP
256 256
257 Most users will answer N here. 257 Most users will answer N here.
258 258
259config BLK_DEV_LOOP_MIN_COUNT
260 int "Number of loop devices to pre-create at init time"
261 depends on BLK_DEV_LOOP
262 default 8
263 help
264 Static number of loop devices to be unconditionally pre-created
265 at init time.
266
267 This default value can be overwritten on the kernel command
268 line or with module-parameter loop.max_loop.
269
270 The historic default is 8. If a late 2011 version of losetup(8)
271 is used, it can be set to 0, since needed loop devices can be
272 dynamically allocated with the /dev/loop-control interface.
273
259config BLK_DEV_CRYPTOLOOP 274config BLK_DEV_CRYPTOLOOP
260 tristate "Cryptoloop Support" 275 tristate "Cryptoloop Support"
261 select CRYPTO 276 select CRYPTO
@@ -471,7 +486,7 @@ config XEN_BLKDEV_FRONTEND
471 in another domain which drives the actual block device. 486 in another domain which drives the actual block device.
472 487
473config XEN_BLKDEV_BACKEND 488config XEN_BLKDEV_BACKEND
474 tristate "Block-device backend driver" 489 tristate "Xen block-device backend driver"
475 depends on XEN_BACKEND 490 depends on XEN_BACKEND
476 help 491 help
477 The block-device backend driver allows the kernel to export its 492 The block-device backend driver allows the kernel to export its
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 515bcd948a4..0feab261e29 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1829,10 +1829,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
1829 1829
1830 /* silently ignore cpu mask on UP kernel */ 1830 /* silently ignore cpu mask on UP kernel */
1831 if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) { 1831 if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
1832 err = __bitmap_parse(sc.cpu_mask, 32, 0, 1832 err = bitmap_parse(sc.cpu_mask, 32,
1833 cpumask_bits(new_cpu_mask), nr_cpu_ids); 1833 cpumask_bits(new_cpu_mask), nr_cpu_ids);
1834 if (err) { 1834 if (err) {
1835 dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); 1835 dev_warn(DEV, "bitmap_parse() failed with %d\n", err);
1836 retcode = ERR_CPU_MASK_PARSE; 1836 retcode = ERR_CPU_MASK_PARSE;
1837 goto fail; 1837 goto fail;
1838 } 1838 }
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 76c8da78212..4720c7ade0a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -75,11 +75,11 @@
75#include <linux/kthread.h> 75#include <linux/kthread.h>
76#include <linux/splice.h> 76#include <linux/splice.h>
77#include <linux/sysfs.h> 77#include <linux/sysfs.h>
78 78#include <linux/miscdevice.h>
79#include <asm/uaccess.h> 79#include <asm/uaccess.h>
80 80
81static LIST_HEAD(loop_devices); 81static DEFINE_IDR(loop_index_idr);
82static DEFINE_MUTEX(loop_devices_mutex); 82static DEFINE_MUTEX(loop_index_mutex);
83 83
84static int max_part; 84static int max_part;
85static int part_shift; 85static int part_shift;
@@ -722,17 +722,10 @@ static inline int is_loop_device(struct file *file)
722static ssize_t loop_attr_show(struct device *dev, char *page, 722static ssize_t loop_attr_show(struct device *dev, char *page,
723 ssize_t (*callback)(struct loop_device *, char *)) 723 ssize_t (*callback)(struct loop_device *, char *))
724{ 724{
725 struct loop_device *l, *lo = NULL; 725 struct gendisk *disk = dev_to_disk(dev);
726 726 struct loop_device *lo = disk->private_data;
727 mutex_lock(&loop_devices_mutex);
728 list_for_each_entry(l, &loop_devices, lo_list)
729 if (disk_to_dev(l->lo_disk) == dev) {
730 lo = l;
731 break;
732 }
733 mutex_unlock(&loop_devices_mutex);
734 727
735 return lo ? callback(lo, page) : -EIO; 728 return callback(lo, page);
736} 729}
737 730
738#define LOOP_ATTR_RO(_name) \ 731#define LOOP_ATTR_RO(_name) \
@@ -750,10 +743,10 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
750 ssize_t ret; 743 ssize_t ret;
751 char *p = NULL; 744 char *p = NULL;
752 745
753 mutex_lock(&lo->lo_ctl_mutex); 746 spin_lock_irq(&lo->lo_lock);
754 if (lo->lo_backing_file) 747 if (lo->lo_backing_file)
755 p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1); 748 p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
756 mutex_unlock(&lo->lo_ctl_mutex); 749 spin_unlock_irq(&lo->lo_lock);
757 750
758 if (IS_ERR_OR_NULL(p)) 751 if (IS_ERR_OR_NULL(p))
759 ret = PTR_ERR(p); 752 ret = PTR_ERR(p);
@@ -1007,7 +1000,9 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
1007 1000
1008 kthread_stop(lo->lo_thread); 1001 kthread_stop(lo->lo_thread);
1009 1002
1003 spin_lock_irq(&lo->lo_lock);
1010 lo->lo_backing_file = NULL; 1004 lo->lo_backing_file = NULL;
1005 spin_unlock_irq(&lo->lo_lock);
1011 1006
1012 loop_release_xfer(lo); 1007 loop_release_xfer(lo);
1013 lo->transfer = NULL; 1008 lo->transfer = NULL;
@@ -1485,13 +1480,22 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
1485 1480
1486static int lo_open(struct block_device *bdev, fmode_t mode) 1481static int lo_open(struct block_device *bdev, fmode_t mode)
1487{ 1482{
1488 struct loop_device *lo = bdev->bd_disk->private_data; 1483 struct loop_device *lo;
1484 int err = 0;
1485
1486 mutex_lock(&loop_index_mutex);
1487 lo = bdev->bd_disk->private_data;
1488 if (!lo) {
1489 err = -ENXIO;
1490 goto out;
1491 }
1489 1492
1490 mutex_lock(&lo->lo_ctl_mutex); 1493 mutex_lock(&lo->lo_ctl_mutex);
1491 lo->lo_refcnt++; 1494 lo->lo_refcnt++;
1492 mutex_unlock(&lo->lo_ctl_mutex); 1495 mutex_unlock(&lo->lo_ctl_mutex);
1493 1496out:
1494 return 0; 1497 mutex_unlock(&loop_index_mutex);
1498 return err;
1495} 1499}
1496 1500
1497static int lo_release(struct gendisk *disk, fmode_t mode) 1501static int lo_release(struct gendisk *disk, fmode_t mode)
@@ -1557,40 +1561,71 @@ int loop_register_transfer(struct loop_func_table *funcs)
1557 return 0; 1561 return 0;
1558} 1562}
1559 1563
1564static int unregister_transfer_cb(int id, void *ptr, void *data)
1565{
1566 struct loop_device *lo = ptr;
1567 struct loop_func_table *xfer = data;
1568
1569 mutex_lock(&lo->lo_ctl_mutex);
1570 if (lo->lo_encryption == xfer)
1571 loop_release_xfer(lo);
1572 mutex_unlock(&lo->lo_ctl_mutex);
1573 return 0;
1574}
1575
1560int loop_unregister_transfer(int number) 1576int loop_unregister_transfer(int number)
1561{ 1577{
1562 unsigned int n = number; 1578 unsigned int n = number;
1563 struct loop_device *lo;
1564 struct loop_func_table *xfer; 1579 struct loop_func_table *xfer;
1565 1580
1566 if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) 1581 if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
1567 return -EINVAL; 1582 return -EINVAL;
1568 1583
1569 xfer_funcs[n] = NULL; 1584 xfer_funcs[n] = NULL;
1570 1585 idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
1571 list_for_each_entry(lo, &loop_devices, lo_list) {
1572 mutex_lock(&lo->lo_ctl_mutex);
1573
1574 if (lo->lo_encryption == xfer)
1575 loop_release_xfer(lo);
1576
1577 mutex_unlock(&lo->lo_ctl_mutex);
1578 }
1579
1580 return 0; 1586 return 0;
1581} 1587}
1582 1588
1583EXPORT_SYMBOL(loop_register_transfer); 1589EXPORT_SYMBOL(loop_register_transfer);
1584EXPORT_SYMBOL(loop_unregister_transfer); 1590EXPORT_SYMBOL(loop_unregister_transfer);
1585 1591
1586static struct loop_device *loop_alloc(int i) 1592static int loop_add(struct loop_device **l, int i)
1587{ 1593{
1588 struct loop_device *lo; 1594 struct loop_device *lo;
1589 struct gendisk *disk; 1595 struct gendisk *disk;
1596 int err;
1590 1597
1591 lo = kzalloc(sizeof(*lo), GFP_KERNEL); 1598 lo = kzalloc(sizeof(*lo), GFP_KERNEL);
1592 if (!lo) 1599 if (!lo) {
1600 err = -ENOMEM;
1593 goto out; 1601 goto out;
1602 }
1603
1604 err = idr_pre_get(&loop_index_idr, GFP_KERNEL);
1605 if (err < 0)
1606 goto out_free_dev;
1607
1608 if (i >= 0) {
1609 int m;
1610
1611 /* create specific i in the index */
1612 err = idr_get_new_above(&loop_index_idr, lo, i, &m);
1613 if (err >= 0 && i != m) {
1614 idr_remove(&loop_index_idr, m);
1615 err = -EEXIST;
1616 }
1617 } else if (i == -1) {
1618 int m;
1619
1620 /* get next free nr */
1621 err = idr_get_new(&loop_index_idr, lo, &m);
1622 if (err >= 0)
1623 i = m;
1624 } else {
1625 err = -EINVAL;
1626 }
1627 if (err < 0)
1628 goto out_free_dev;
1594 1629
1595 lo->lo_queue = blk_alloc_queue(GFP_KERNEL); 1630 lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
1596 if (!lo->lo_queue) 1631 if (!lo->lo_queue)
@@ -1611,81 +1646,158 @@ static struct loop_device *loop_alloc(int i)
1611 disk->private_data = lo; 1646 disk->private_data = lo;
1612 disk->queue = lo->lo_queue; 1647 disk->queue = lo->lo_queue;
1613 sprintf(disk->disk_name, "loop%d", i); 1648 sprintf(disk->disk_name, "loop%d", i);
1614 return lo; 1649 add_disk(disk);
1650 *l = lo;
1651 return lo->lo_number;
1615 1652
1616out_free_queue: 1653out_free_queue:
1617 blk_cleanup_queue(lo->lo_queue); 1654 blk_cleanup_queue(lo->lo_queue);
1618out_free_dev: 1655out_free_dev:
1619 kfree(lo); 1656 kfree(lo);
1620out: 1657out:
1621 return NULL; 1658 return err;
1622} 1659}
1623 1660
1624static void loop_free(struct loop_device *lo) 1661static void loop_remove(struct loop_device *lo)
1625{ 1662{
1663 del_gendisk(lo->lo_disk);
1626 blk_cleanup_queue(lo->lo_queue); 1664 blk_cleanup_queue(lo->lo_queue);
1627 put_disk(lo->lo_disk); 1665 put_disk(lo->lo_disk);
1628 list_del(&lo->lo_list);
1629 kfree(lo); 1666 kfree(lo);
1630} 1667}
1631 1668
1632static struct loop_device *loop_init_one(int i) 1669static int find_free_cb(int id, void *ptr, void *data)
1670{
1671 struct loop_device *lo = ptr;
1672 struct loop_device **l = data;
1673
1674 if (lo->lo_state == Lo_unbound) {
1675 *l = lo;
1676 return 1;
1677 }
1678 return 0;
1679}
1680
1681static int loop_lookup(struct loop_device **l, int i)
1633{ 1682{
1634 struct loop_device *lo; 1683 struct loop_device *lo;
1684 int ret = -ENODEV;
1635 1685
1636 list_for_each_entry(lo, &loop_devices, lo_list) { 1686 if (i < 0) {
1637 if (lo->lo_number == i) 1687 int err;
1638 return lo; 1688
1689 err = idr_for_each(&loop_index_idr, &find_free_cb, &lo);
1690 if (err == 1) {
1691 *l = lo;
1692 ret = lo->lo_number;
1693 }
1694 goto out;
1639 } 1695 }
1640 1696
1641 lo = loop_alloc(i); 1697 /* lookup and return a specific i */
1698 lo = idr_find(&loop_index_idr, i);
1642 if (lo) { 1699 if (lo) {
1643 add_disk(lo->lo_disk); 1700 *l = lo;
1644 list_add_tail(&lo->lo_list, &loop_devices); 1701 ret = lo->lo_number;
1645 } 1702 }
1646 return lo; 1703out:
1647} 1704 return ret;
1648
1649static void loop_del_one(struct loop_device *lo)
1650{
1651 del_gendisk(lo->lo_disk);
1652 loop_free(lo);
1653} 1705}
1654 1706
1655static struct kobject *loop_probe(dev_t dev, int *part, void *data) 1707static struct kobject *loop_probe(dev_t dev, int *part, void *data)
1656{ 1708{
1657 struct loop_device *lo; 1709 struct loop_device *lo;
1658 struct kobject *kobj; 1710 struct kobject *kobj;
1711 int err;
1659 1712
1660 mutex_lock(&loop_devices_mutex); 1713 mutex_lock(&loop_index_mutex);
1661 lo = loop_init_one(MINOR(dev) >> part_shift); 1714 err = loop_lookup(&lo, MINOR(dev) >> part_shift);
1662 kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM); 1715 if (err < 0)
1663 mutex_unlock(&loop_devices_mutex); 1716 err = loop_add(&lo, MINOR(dev) >> part_shift);
1717 if (err < 0)
1718 kobj = ERR_PTR(err);
1719 else
1720 kobj = get_disk(lo->lo_disk);
1721 mutex_unlock(&loop_index_mutex);
1664 1722
1665 *part = 0; 1723 *part = 0;
1666 return kobj; 1724 return kobj;
1667} 1725}
1668 1726
1727static long loop_control_ioctl(struct file *file, unsigned int cmd,
1728 unsigned long parm)
1729{
1730 struct loop_device *lo;
1731 int ret = -ENOSYS;
1732
1733 mutex_lock(&loop_index_mutex);
1734 switch (cmd) {
1735 case LOOP_CTL_ADD:
1736 ret = loop_lookup(&lo, parm);
1737 if (ret >= 0) {
1738 ret = -EEXIST;
1739 break;
1740 }
1741 ret = loop_add(&lo, parm);
1742 break;
1743 case LOOP_CTL_REMOVE:
1744 ret = loop_lookup(&lo, parm);
1745 if (ret < 0)
1746 break;
1747 mutex_lock(&lo->lo_ctl_mutex);
1748 if (lo->lo_state != Lo_unbound) {
1749 ret = -EBUSY;
1750 mutex_unlock(&lo->lo_ctl_mutex);
1751 break;
1752 }
1753 if (lo->lo_refcnt > 0) {
1754 ret = -EBUSY;
1755 mutex_unlock(&lo->lo_ctl_mutex);
1756 break;
1757 }
1758 lo->lo_disk->private_data = NULL;
1759 mutex_unlock(&lo->lo_ctl_mutex);
1760 idr_remove(&loop_index_idr, lo->lo_number);
1761 loop_remove(lo);
1762 break;
1763 case LOOP_CTL_GET_FREE:
1764 ret = loop_lookup(&lo, -1);
1765 if (ret >= 0)
1766 break;
1767 ret = loop_add(&lo, -1);
1768 }
1769 mutex_unlock(&loop_index_mutex);
1770
1771 return ret;
1772}
1773
1774static const struct file_operations loop_ctl_fops = {
1775 .open = nonseekable_open,
1776 .unlocked_ioctl = loop_control_ioctl,
1777 .compat_ioctl = loop_control_ioctl,
1778 .owner = THIS_MODULE,
1779 .llseek = noop_llseek,
1780};
1781
1782static struct miscdevice loop_misc = {
1783 .minor = LOOP_CTRL_MINOR,
1784 .name = "loop-control",
1785 .fops = &loop_ctl_fops,
1786};
1787
1788MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR);
1789MODULE_ALIAS("devname:loop-control");
1790
1669static int __init loop_init(void) 1791static int __init loop_init(void)
1670{ 1792{
1671 int i, nr; 1793 int i, nr;
1672 unsigned long range; 1794 unsigned long range;
1673 struct loop_device *lo, *next; 1795 struct loop_device *lo;
1796 int err;
1674 1797
1675 /* 1798 err = misc_register(&loop_misc);
1676 * loop module now has a feature to instantiate underlying device 1799 if (err < 0)
1677 * structure on-demand, provided that there is an access dev node. 1800 return err;
1678 * However, this will not work well with user space tool that doesn't
1679 * know about such "feature". In order to not break any existing
1680 * tool, we do the following:
1681 *
1682 * (1) if max_loop is specified, create that many upfront, and this
1683 * also becomes a hard limit.
1684 * (2) if max_loop is not specified, create 8 loop device on module
1685 * load, user can further extend loop device by create dev node
1686 * themselves and have kernel automatically instantiate actual
1687 * device on-demand.
1688 */
1689 1801
1690 part_shift = 0; 1802 part_shift = 0;
1691 if (max_part > 0) { 1803 if (max_part > 0) {
@@ -1708,57 +1820,60 @@ static int __init loop_init(void)
1708 if (max_loop > 1UL << (MINORBITS - part_shift)) 1820 if (max_loop > 1UL << (MINORBITS - part_shift))
1709 return -EINVAL; 1821 return -EINVAL;
1710 1822
1823 /*
1824 * If max_loop is specified, create that many devices upfront.
1825 * This also becomes a hard limit. If max_loop is not specified,
1826 * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module
1827 * init time. Loop devices can be requested on-demand with the
1828 * /dev/loop-control interface, or be instantiated by accessing
1829 * a 'dead' device node.
1830 */
1711 if (max_loop) { 1831 if (max_loop) {
1712 nr = max_loop; 1832 nr = max_loop;
1713 range = max_loop << part_shift; 1833 range = max_loop << part_shift;
1714 } else { 1834 } else {
1715 nr = 8; 1835 nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT;
1716 range = 1UL << MINORBITS; 1836 range = 1UL << MINORBITS;
1717 } 1837 }
1718 1838
1719 if (register_blkdev(LOOP_MAJOR, "loop")) 1839 if (register_blkdev(LOOP_MAJOR, "loop"))
1720 return -EIO; 1840 return -EIO;
1721 1841
1722 for (i = 0; i < nr; i++) {
1723 lo = loop_alloc(i);
1724 if (!lo)
1725 goto Enomem;
1726 list_add_tail(&lo->lo_list, &loop_devices);
1727 }
1728
1729 /* point of no return */
1730
1731 list_for_each_entry(lo, &loop_devices, lo_list)
1732 add_disk(lo->lo_disk);
1733
1734 blk_register_region(MKDEV(LOOP_MAJOR, 0), range, 1842 blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
1735 THIS_MODULE, loop_probe, NULL, NULL); 1843 THIS_MODULE, loop_probe, NULL, NULL);
1736 1844
1845 /* pre-create number of devices given by config or max_loop */
1846 mutex_lock(&loop_index_mutex);
1847 for (i = 0; i < nr; i++)
1848 loop_add(&lo, i);
1849 mutex_unlock(&loop_index_mutex);
1850
1737 printk(KERN_INFO "loop: module loaded\n"); 1851 printk(KERN_INFO "loop: module loaded\n");
1738 return 0; 1852 return 0;
1853}
1739 1854
1740Enomem: 1855static int loop_exit_cb(int id, void *ptr, void *data)
1741 printk(KERN_INFO "loop: out of memory\n"); 1856{
1742 1857 struct loop_device *lo = ptr;
1743 list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1744 loop_free(lo);
1745 1858
1746 unregister_blkdev(LOOP_MAJOR, "loop"); 1859 loop_remove(lo);
1747 return -ENOMEM; 1860 return 0;
1748} 1861}
1749 1862
1750static void __exit loop_exit(void) 1863static void __exit loop_exit(void)
1751{ 1864{
1752 unsigned long range; 1865 unsigned long range;
1753 struct loop_device *lo, *next;
1754 1866
1755 range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; 1867 range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
1756 1868
1757 list_for_each_entry_safe(lo, next, &loop_devices, lo_list) 1869 idr_for_each(&loop_index_idr, &loop_exit_cb, NULL);
1758 loop_del_one(lo); 1870 idr_remove_all(&loop_index_idr);
1871 idr_destroy(&loop_index_idr);
1759 1872
1760 blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); 1873 blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
1761 unregister_blkdev(LOOP_MAJOR, "loop"); 1874 unregister_blkdev(LOOP_MAJOR, "loop");
1875
1876 misc_deregister(&loop_misc);
1762} 1877}
1763 1878
1764module_init(loop_init); 1879module_init(loop_init);
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 773bfa79277..ae3e167e17a 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1184,6 +1184,7 @@ static struct of_device_id swim3_match[] =
1184 { 1184 {
1185 .compatible = "swim3" 1185 .compatible = "swim3"
1186 }, 1186 },
1187 { /* end of list */ }
1187}; 1188};
1188 1189
1189static struct macio_driver swim3_driver = 1190static struct macio_driver swim3_driver =
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index b536a9cef91..9ea8c2576c7 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -123,8 +123,8 @@ static DEFINE_SPINLOCK(minor_lock);
123#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) 123#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
124#define EMULATED_HD_DISK_MINOR_OFFSET (0) 124#define EMULATED_HD_DISK_MINOR_OFFSET (0)
125#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256) 125#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
126#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16)) 126#define EMULATED_SD_DISK_MINOR_OFFSET (0)
127#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4) 127#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256)
128 128
129#define DEV_NAME "xvd" /* name in /dev */ 129#define DEV_NAME "xvd" /* name in /dev */
130 130
@@ -529,7 +529,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
529 minor = BLKIF_MINOR_EXT(info->vdevice); 529 minor = BLKIF_MINOR_EXT(info->vdevice);
530 nr_parts = PARTS_PER_EXT_DISK; 530 nr_parts = PARTS_PER_EXT_DISK;
531 offset = minor / nr_parts; 531 offset = minor / nr_parts;
532 if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4) 532 if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4)
533 printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with " 533 printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
534 "emulated IDE disks,\n\t choose an xvd device name" 534 "emulated IDE disks,\n\t choose an xvd device name"
535 "from xvde on\n", info->vdevice); 535 "from xvde on\n", info->vdevice);
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 75fb965b8f7..f997c27d79e 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1929,11 +1929,17 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s,
1929 goto out; 1929 goto out;
1930 1930
1931 s->manufact.len = buf[0] << 8 | buf[1]; 1931 s->manufact.len = buf[0] << 8 | buf[1];
1932 if (s->manufact.len < 0 || s->manufact.len > 2048) { 1932 if (s->manufact.len < 0) {
1933 cdinfo(CD_WARNING, "Received invalid manufacture info length" 1933 cdinfo(CD_WARNING, "Received invalid manufacture info length"
1934 " (%d)\n", s->manufact.len); 1934 " (%d)\n", s->manufact.len);
1935 ret = -EIO; 1935 ret = -EIO;
1936 } else { 1936 } else {
1937 if (s->manufact.len > 2048) {
1938 cdinfo(CD_WARNING, "Received invalid manufacture info "
1939 "length (%d): truncating to 2048\n",
1940 s->manufact.len);
1941 s->manufact.len = 2048;
1942 }
1937 memcpy(s->manufact.value, &buf[4], s->manufact.len); 1943 memcpy(s->manufact.value, &buf[4], s->manufact.len);
1938 } 1944 }
1939 1945
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 04f1e7ce02b..f6cf448d69b 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -1670,7 +1670,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
1670 char *type, *optype, *err, *msg; 1670 char *type, *optype, *err, *msg;
1671 unsigned long error = m->status & 0x1ff0000l; 1671 unsigned long error = m->status & 0x1ff0000l;
1672 u32 optypenum = (m->status >> 4) & 0x07; 1672 u32 optypenum = (m->status >> 4) & 0x07;
1673 u32 core_err_cnt = (m->status >> 38) && 0x7fff; 1673 u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1674 u32 dimm = (m->misc >> 16) & 0x3; 1674 u32 dimm = (m->misc >> 16) & 0x3;
1675 u32 channel = (m->misc >> 18) & 0x3; 1675 u32 channel = (m->misc >> 18) & 0x3;
1676 u32 syndrome = m->misc >> 32; 1676 u32 syndrome = m->misc >> 32;
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index e6ad3bb6c1a..4799393247c 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -216,15 +216,33 @@ struct inbound_phy_packet_event {
216 struct fw_cdev_event_phy_packet phy_packet; 216 struct fw_cdev_event_phy_packet phy_packet;
217}; 217};
218 218
219static inline void __user *u64_to_uptr(__u64 value) 219#ifdef CONFIG_COMPAT
220static void __user *u64_to_uptr(u64 value)
221{
222 if (is_compat_task())
223 return compat_ptr(value);
224 else
225 return (void __user *)(unsigned long)value;
226}
227
228static u64 uptr_to_u64(void __user *ptr)
229{
230 if (is_compat_task())
231 return ptr_to_compat(ptr);
232 else
233 return (u64)(unsigned long)ptr;
234}
235#else
236static inline void __user *u64_to_uptr(u64 value)
220{ 237{
221 return (void __user *)(unsigned long)value; 238 return (void __user *)(unsigned long)value;
222} 239}
223 240
224static inline __u64 uptr_to_u64(void __user *ptr) 241static inline u64 uptr_to_u64(void __user *ptr)
225{ 242{
226 return (__u64)(unsigned long)ptr; 243 return (u64)(unsigned long)ptr;
227} 244}
245#endif /* CONFIG_COMPAT */
228 246
229static int fw_device_op_open(struct inode *inode, struct file *file) 247static int fw_device_op_open(struct inode *inode, struct file *file)
230{ 248{
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 8ba7f7928f1..f3b890da1e8 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -455,15 +455,20 @@ static struct device_attribute fw_device_attributes[] = {
455static int read_rom(struct fw_device *device, 455static int read_rom(struct fw_device *device,
456 int generation, int index, u32 *data) 456 int generation, int index, u32 *data)
457{ 457{
458 int rcode; 458 u64 offset = (CSR_REGISTER_BASE | CSR_CONFIG_ROM) + index * 4;
459 int i, rcode;
459 460
460 /* device->node_id, accessed below, must not be older than generation */ 461 /* device->node_id, accessed below, must not be older than generation */
461 smp_rmb(); 462 smp_rmb();
462 463
463 rcode = fw_run_transaction(device->card, TCODE_READ_QUADLET_REQUEST, 464 for (i = 10; i < 100; i += 10) {
464 device->node_id, generation, device->max_speed, 465 rcode = fw_run_transaction(device->card,
465 (CSR_REGISTER_BASE | CSR_CONFIG_ROM) + index * 4, 466 TCODE_READ_QUADLET_REQUEST, device->node_id,
466 data, 4); 467 generation, device->max_speed, offset, data, 4);
468 if (rcode != RCODE_BUSY)
469 break;
470 msleep(i);
471 }
467 be32_to_cpus(data); 472 be32_to_cpus(data);
468 473
469 return rcode; 474 return rcode;
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index bcf792fac44..57cd3a406ed 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -2179,8 +2179,13 @@ static int ohci_enable(struct fw_card *card,
2179 ohci_driver_name, ohci)) { 2179 ohci_driver_name, ohci)) {
2180 fw_error("Failed to allocate interrupt %d.\n", dev->irq); 2180 fw_error("Failed to allocate interrupt %d.\n", dev->irq);
2181 pci_disable_msi(dev); 2181 pci_disable_msi(dev);
2182 dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE, 2182
2183 ohci->config_rom, ohci->config_rom_bus); 2183 if (config_rom) {
2184 dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE,
2185 ohci->next_config_rom,
2186 ohci->next_config_rom_bus);
2187 ohci->next_config_rom = NULL;
2188 }
2184 return -EIO; 2189 return -EIO;
2185 } 2190 }
2186 2191
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a8ab6263e0d..3c395a59da3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -499,7 +499,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
499 seq_printf(m, "Interrupts received: %d\n", 499 seq_printf(m, "Interrupts received: %d\n",
500 atomic_read(&dev_priv->irq_received)); 500 atomic_read(&dev_priv->irq_received));
501 for (i = 0; i < I915_NUM_RINGS; i++) { 501 for (i = 0; i < I915_NUM_RINGS; i++) {
502 if (IS_GEN6(dev)) { 502 if (IS_GEN6(dev) || IS_GEN7(dev)) {
503 seq_printf(m, "Graphics Interrupt mask (%s): %08x\n", 503 seq_printf(m, "Graphics Interrupt mask (%s): %08x\n",
504 dev_priv->ring[i].name, 504 dev_priv->ring[i].name,
505 I915_READ_IMR(&dev_priv->ring[i])); 505 I915_READ_IMR(&dev_priv->ring[i]));
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index feb4f164fd1..7916bd97d5c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -36,6 +36,7 @@
36#include <linux/io-mapping.h> 36#include <linux/io-mapping.h>
37#include <linux/i2c.h> 37#include <linux/i2c.h>
38#include <drm/intel-gtt.h> 38#include <drm/intel-gtt.h>
39#include <linux/backlight.h>
39 40
40/* General customization: 41/* General customization:
41 */ 42 */
@@ -690,6 +691,7 @@ typedef struct drm_i915_private {
690 int child_dev_num; 691 int child_dev_num;
691 struct child_device_config *child_dev; 692 struct child_device_config *child_dev;
692 struct drm_connector *int_lvds_connector; 693 struct drm_connector *int_lvds_connector;
694 struct drm_connector *int_edp_connector;
693 695
694 bool mchbar_need_disable; 696 bool mchbar_need_disable;
695 697
@@ -723,6 +725,8 @@ typedef struct drm_i915_private {
723 /* list of fbdev register on this device */ 725 /* list of fbdev register on this device */
724 struct intel_fbdev *fbdev; 726 struct intel_fbdev *fbdev;
725 727
728 struct backlight_device *backlight;
729
726 struct drm_property *broadcast_rgb_property; 730 struct drm_property *broadcast_rgb_property;
727 struct drm_property *force_audio_property; 731 struct drm_property *force_audio_property;
728 732
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 02f96fd0d52..9cbb0cd8f46 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2058,8 +2058,10 @@ void intel_irq_init(struct drm_device *dev)
2058 dev->driver->get_vblank_counter = gm45_get_vblank_counter; 2058 dev->driver->get_vblank_counter = gm45_get_vblank_counter;
2059 } 2059 }
2060 2060
2061 2061 if (drm_core_check_feature(dev, DRIVER_MODESET))
2062 dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; 2062 dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp;
2063 else
2064 dev->driver->get_vblank_timestamp = NULL;
2063 dev->driver->get_scanout_position = i915_get_crtc_scanoutpos; 2065 dev->driver->get_scanout_position = i915_get_crtc_scanoutpos;
2064 2066
2065 if (IS_IVYBRIDGE(dev)) { 2067 if (IS_IVYBRIDGE(dev)) {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index d1331f771e2..542453f7498 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -375,6 +375,7 @@
375# define MI_FLUSH_ENABLE (1 << 11) 375# define MI_FLUSH_ENABLE (1 << 11)
376 376
377#define GFX_MODE 0x02520 377#define GFX_MODE 0x02520
378#define GFX_MODE_GEN7 0x0229c
378#define GFX_RUN_LIST_ENABLE (1<<15) 379#define GFX_RUN_LIST_ENABLE (1<<15)
379#define GFX_TLB_INVALIDATE_ALWAYS (1<<13) 380#define GFX_TLB_INVALIDATE_ALWAYS (1<<13)
380#define GFX_SURFACE_FAULT_ENABLE (1<<12) 381#define GFX_SURFACE_FAULT_ENABLE (1<<12)
@@ -382,6 +383,9 @@
382#define GFX_PSMI_GRANULARITY (1<<10) 383#define GFX_PSMI_GRANULARITY (1<<10)
383#define GFX_PPGTT_ENABLE (1<<9) 384#define GFX_PPGTT_ENABLE (1<<9)
384 385
386#define GFX_MODE_ENABLE(bit) (((bit) << 16) | (bit))
387#define GFX_MODE_DISABLE(bit) (((bit) << 16) | (0))
388
385#define SCPD0 0x0209c /* 915+ only */ 389#define SCPD0 0x0209c /* 915+ only */
386#define IER 0x020a0 390#define IER 0x020a0
387#define IIR 0x020a4 391#define IIR 0x020a4
@@ -1318,6 +1322,7 @@
1318#define ADPA_PIPE_SELECT_MASK (1<<30) 1322#define ADPA_PIPE_SELECT_MASK (1<<30)
1319#define ADPA_PIPE_A_SELECT 0 1323#define ADPA_PIPE_A_SELECT 0
1320#define ADPA_PIPE_B_SELECT (1<<30) 1324#define ADPA_PIPE_B_SELECT (1<<30)
1325#define ADPA_PIPE_SELECT(pipe) ((pipe) << 30)
1321#define ADPA_USE_VGA_HVPOLARITY (1<<15) 1326#define ADPA_USE_VGA_HVPOLARITY (1<<15)
1322#define ADPA_SETS_HVPOLARITY 0 1327#define ADPA_SETS_HVPOLARITY 0
1323#define ADPA_VSYNC_CNTL_DISABLE (1<<11) 1328#define ADPA_VSYNC_CNTL_DISABLE (1<<11)
@@ -1460,6 +1465,7 @@
1460/* Selects pipe B for LVDS data. Must be set on pre-965. */ 1465/* Selects pipe B for LVDS data. Must be set on pre-965. */
1461#define LVDS_PIPEB_SELECT (1 << 30) 1466#define LVDS_PIPEB_SELECT (1 << 30)
1462#define LVDS_PIPE_MASK (1 << 30) 1467#define LVDS_PIPE_MASK (1 << 30)
1468#define LVDS_PIPE(pipe) ((pipe) << 30)
1463/* LVDS dithering flag on 965/g4x platform */ 1469/* LVDS dithering flag on 965/g4x platform */
1464#define LVDS_ENABLE_DITHER (1 << 25) 1470#define LVDS_ENABLE_DITHER (1 << 25)
1465/* LVDS sync polarity flags. Set to invert (i.e. negative) */ 1471/* LVDS sync polarity flags. Set to invert (i.e. negative) */
@@ -1499,9 +1505,6 @@
1499#define LVDS_B0B3_POWER_DOWN (0 << 2) 1505#define LVDS_B0B3_POWER_DOWN (0 << 2)
1500#define LVDS_B0B3_POWER_UP (3 << 2) 1506#define LVDS_B0B3_POWER_UP (3 << 2)
1501 1507
1502#define LVDS_PIPE_ENABLED(V, P) \
1503 (((V) & (LVDS_PIPE_MASK | LVDS_PORT_EN)) == ((P) << 30 | LVDS_PORT_EN))
1504
1505/* Video Data Island Packet control */ 1508/* Video Data Island Packet control */
1506#define VIDEO_DIP_DATA 0x61178 1509#define VIDEO_DIP_DATA 0x61178
1507#define VIDEO_DIP_CTL 0x61170 1510#define VIDEO_DIP_CTL 0x61170
@@ -3256,14 +3259,12 @@
3256#define ADPA_CRT_HOTPLUG_VOLREF_475MV (1<<17) 3259#define ADPA_CRT_HOTPLUG_VOLREF_475MV (1<<17)
3257#define ADPA_CRT_HOTPLUG_FORCE_TRIGGER (1<<16) 3260#define ADPA_CRT_HOTPLUG_FORCE_TRIGGER (1<<16)
3258 3261
3259#define ADPA_PIPE_ENABLED(V, P) \
3260 (((V) & (ADPA_TRANS_SELECT_MASK | ADPA_DAC_ENABLE)) == ((P) << 30 | ADPA_DAC_ENABLE))
3261
3262/* or SDVOB */ 3262/* or SDVOB */
3263#define HDMIB 0xe1140 3263#define HDMIB 0xe1140
3264#define PORT_ENABLE (1 << 31) 3264#define PORT_ENABLE (1 << 31)
3265#define TRANSCODER_A (0) 3265#define TRANSCODER_A (0)
3266#define TRANSCODER_B (1 << 30) 3266#define TRANSCODER_B (1 << 30)
3267#define TRANSCODER(pipe) ((pipe) << 30)
3267#define TRANSCODER_MASK (1 << 30) 3268#define TRANSCODER_MASK (1 << 30)
3268#define COLOR_FORMAT_8bpc (0) 3269#define COLOR_FORMAT_8bpc (0)
3269#define COLOR_FORMAT_12bpc (3 << 26) 3270#define COLOR_FORMAT_12bpc (3 << 26)
@@ -3280,9 +3281,6 @@
3280#define HSYNC_ACTIVE_HIGH (1 << 3) 3281#define HSYNC_ACTIVE_HIGH (1 << 3)
3281#define PORT_DETECTED (1 << 2) 3282#define PORT_DETECTED (1 << 2)
3282 3283
3283#define HDMI_PIPE_ENABLED(V, P) \
3284 (((V) & (TRANSCODER_MASK | PORT_ENABLE)) == ((P) << 30 | PORT_ENABLE))
3285
3286/* PCH SDVOB multiplex with HDMIB */ 3284/* PCH SDVOB multiplex with HDMIB */
3287#define PCH_SDVOB HDMIB 3285#define PCH_SDVOB HDMIB
3288 3286
@@ -3349,6 +3347,7 @@
3349#define PORT_TRANS_B_SEL_CPT (1<<29) 3347#define PORT_TRANS_B_SEL_CPT (1<<29)
3350#define PORT_TRANS_C_SEL_CPT (2<<29) 3348#define PORT_TRANS_C_SEL_CPT (2<<29)
3351#define PORT_TRANS_SEL_MASK (3<<29) 3349#define PORT_TRANS_SEL_MASK (3<<29)
3350#define PORT_TRANS_SEL_CPT(pipe) ((pipe) << 29)
3352 3351
3353#define TRANS_DP_CTL_A 0xe0300 3352#define TRANS_DP_CTL_A 0xe0300
3354#define TRANS_DP_CTL_B 0xe1300 3353#define TRANS_DP_CTL_B 0xe1300
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index 87677d60d0d..f10742359ec 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -871,7 +871,8 @@ int i915_restore_state(struct drm_device *dev)
871 } 871 }
872 mutex_unlock(&dev->struct_mutex); 872 mutex_unlock(&dev->struct_mutex);
873 873
874 intel_init_clock_gating(dev); 874 if (drm_core_check_feature(dev, DRIVER_MODESET))
875 intel_init_clock_gating(dev);
875 876
876 if (IS_IRONLAKE_M(dev)) { 877 if (IS_IRONLAKE_M(dev)) {
877 ironlake_enable_drps(dev); 878 ironlake_enable_drps(dev);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 35364e68a09..ee1d701317f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -980,8 +980,8 @@ static void assert_transcoder_disabled(struct drm_i915_private *dev_priv,
980 pipe_name(pipe)); 980 pipe_name(pipe));
981} 981}
982 982
983static bool dp_pipe_enabled(struct drm_i915_private *dev_priv, enum pipe pipe, 983static bool dp_pipe_enabled(struct drm_i915_private *dev_priv,
984 int reg, u32 port_sel, u32 val) 984 enum pipe pipe, u32 port_sel, u32 val)
985{ 985{
986 if ((val & DP_PORT_EN) == 0) 986 if ((val & DP_PORT_EN) == 0)
987 return false; 987 return false;
@@ -998,11 +998,58 @@ static bool dp_pipe_enabled(struct drm_i915_private *dev_priv, enum pipe pipe,
998 return true; 998 return true;
999} 999}
1000 1000
1001static bool hdmi_pipe_enabled(struct drm_i915_private *dev_priv,
1002 enum pipe pipe, u32 val)
1003{
1004 if ((val & PORT_ENABLE) == 0)
1005 return false;
1006
1007 if (HAS_PCH_CPT(dev_priv->dev)) {
1008 if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe))
1009 return false;
1010 } else {
1011 if ((val & TRANSCODER_MASK) != TRANSCODER(pipe))
1012 return false;
1013 }
1014 return true;
1015}
1016
1017static bool lvds_pipe_enabled(struct drm_i915_private *dev_priv,
1018 enum pipe pipe, u32 val)
1019{
1020 if ((val & LVDS_PORT_EN) == 0)
1021 return false;
1022
1023 if (HAS_PCH_CPT(dev_priv->dev)) {
1024 if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe))
1025 return false;
1026 } else {
1027 if ((val & LVDS_PIPE_MASK) != LVDS_PIPE(pipe))
1028 return false;
1029 }
1030 return true;
1031}
1032
1033static bool adpa_pipe_enabled(struct drm_i915_private *dev_priv,
1034 enum pipe pipe, u32 val)
1035{
1036 if ((val & ADPA_DAC_ENABLE) == 0)
1037 return false;
1038 if (HAS_PCH_CPT(dev_priv->dev)) {
1039 if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe))
1040 return false;
1041 } else {
1042 if ((val & ADPA_PIPE_SELECT_MASK) != ADPA_PIPE_SELECT(pipe))
1043 return false;
1044 }
1045 return true;
1046}
1047
1001static void assert_pch_dp_disabled(struct drm_i915_private *dev_priv, 1048static void assert_pch_dp_disabled(struct drm_i915_private *dev_priv,
1002 enum pipe pipe, int reg, u32 port_sel) 1049 enum pipe pipe, int reg, u32 port_sel)
1003{ 1050{
1004 u32 val = I915_READ(reg); 1051 u32 val = I915_READ(reg);
1005 WARN(dp_pipe_enabled(dev_priv, pipe, reg, port_sel, val), 1052 WARN(dp_pipe_enabled(dev_priv, pipe, port_sel, val),
1006 "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n", 1053 "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n",
1007 reg, pipe_name(pipe)); 1054 reg, pipe_name(pipe));
1008} 1055}
@@ -1011,7 +1058,7 @@ static void assert_pch_hdmi_disabled(struct drm_i915_private *dev_priv,
1011 enum pipe pipe, int reg) 1058 enum pipe pipe, int reg)
1012{ 1059{
1013 u32 val = I915_READ(reg); 1060 u32 val = I915_READ(reg);
1014 WARN(HDMI_PIPE_ENABLED(val, pipe), 1061 WARN(hdmi_pipe_enabled(dev_priv, val, pipe),
1015 "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n", 1062 "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n",
1016 reg, pipe_name(pipe)); 1063 reg, pipe_name(pipe));
1017} 1064}
@@ -1028,13 +1075,13 @@ static void assert_pch_ports_disabled(struct drm_i915_private *dev_priv,
1028 1075
1029 reg = PCH_ADPA; 1076 reg = PCH_ADPA;
1030 val = I915_READ(reg); 1077 val = I915_READ(reg);
1031 WARN(ADPA_PIPE_ENABLED(val, pipe), 1078 WARN(adpa_pipe_enabled(dev_priv, val, pipe),
1032 "PCH VGA enabled on transcoder %c, should be disabled\n", 1079 "PCH VGA enabled on transcoder %c, should be disabled\n",
1033 pipe_name(pipe)); 1080 pipe_name(pipe));
1034 1081
1035 reg = PCH_LVDS; 1082 reg = PCH_LVDS;
1036 val = I915_READ(reg); 1083 val = I915_READ(reg);
1037 WARN(LVDS_PIPE_ENABLED(val, pipe), 1084 WARN(lvds_pipe_enabled(dev_priv, val, pipe),
1038 "PCH LVDS enabled on transcoder %c, should be disabled\n", 1085 "PCH LVDS enabled on transcoder %c, should be disabled\n",
1039 pipe_name(pipe)); 1086 pipe_name(pipe));
1040 1087
@@ -1360,7 +1407,7 @@ static void disable_pch_dp(struct drm_i915_private *dev_priv,
1360 enum pipe pipe, int reg, u32 port_sel) 1407 enum pipe pipe, int reg, u32 port_sel)
1361{ 1408{
1362 u32 val = I915_READ(reg); 1409 u32 val = I915_READ(reg);
1363 if (dp_pipe_enabled(dev_priv, pipe, reg, port_sel, val)) { 1410 if (dp_pipe_enabled(dev_priv, pipe, port_sel, val)) {
1364 DRM_DEBUG_KMS("Disabling pch dp %x on pipe %d\n", reg, pipe); 1411 DRM_DEBUG_KMS("Disabling pch dp %x on pipe %d\n", reg, pipe);
1365 I915_WRITE(reg, val & ~DP_PORT_EN); 1412 I915_WRITE(reg, val & ~DP_PORT_EN);
1366 } 1413 }
@@ -1370,7 +1417,7 @@ static void disable_pch_hdmi(struct drm_i915_private *dev_priv,
1370 enum pipe pipe, int reg) 1417 enum pipe pipe, int reg)
1371{ 1418{
1372 u32 val = I915_READ(reg); 1419 u32 val = I915_READ(reg);
1373 if (HDMI_PIPE_ENABLED(val, pipe)) { 1420 if (hdmi_pipe_enabled(dev_priv, val, pipe)) {
1374 DRM_DEBUG_KMS("Disabling pch HDMI %x on pipe %d\n", 1421 DRM_DEBUG_KMS("Disabling pch HDMI %x on pipe %d\n",
1375 reg, pipe); 1422 reg, pipe);
1376 I915_WRITE(reg, val & ~PORT_ENABLE); 1423 I915_WRITE(reg, val & ~PORT_ENABLE);
@@ -1392,12 +1439,13 @@ static void intel_disable_pch_ports(struct drm_i915_private *dev_priv,
1392 1439
1393 reg = PCH_ADPA; 1440 reg = PCH_ADPA;
1394 val = I915_READ(reg); 1441 val = I915_READ(reg);
1395 if (ADPA_PIPE_ENABLED(val, pipe)) 1442 if (adpa_pipe_enabled(dev_priv, val, pipe))
1396 I915_WRITE(reg, val & ~ADPA_DAC_ENABLE); 1443 I915_WRITE(reg, val & ~ADPA_DAC_ENABLE);
1397 1444
1398 reg = PCH_LVDS; 1445 reg = PCH_LVDS;
1399 val = I915_READ(reg); 1446 val = I915_READ(reg);
1400 if (LVDS_PIPE_ENABLED(val, pipe)) { 1447 if (lvds_pipe_enabled(dev_priv, val, pipe)) {
1448 DRM_DEBUG_KMS("disable lvds on pipe %d val 0x%08x\n", pipe, val);
1401 I915_WRITE(reg, val & ~LVDS_PORT_EN); 1449 I915_WRITE(reg, val & ~LVDS_PORT_EN);
1402 POSTING_READ(reg); 1450 POSTING_READ(reg);
1403 udelay(100); 1451 udelay(100);
@@ -5049,6 +5097,81 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
5049 return ret; 5097 return ret;
5050} 5098}
5051 5099
5100static void ironlake_update_pch_refclk(struct drm_device *dev)
5101{
5102 struct drm_i915_private *dev_priv = dev->dev_private;
5103 struct drm_mode_config *mode_config = &dev->mode_config;
5104 struct drm_crtc *crtc;
5105 struct intel_encoder *encoder;
5106 struct intel_encoder *has_edp_encoder = NULL;
5107 u32 temp;
5108 bool has_lvds = false;
5109
5110 /* We need to take the global config into account */
5111 list_for_each_entry(crtc, &mode_config->crtc_list, head) {
5112 if (!crtc->enabled)
5113 continue;
5114
5115 list_for_each_entry(encoder, &mode_config->encoder_list,
5116 base.head) {
5117 if (encoder->base.crtc != crtc)
5118 continue;
5119
5120 switch (encoder->type) {
5121 case INTEL_OUTPUT_LVDS:
5122 has_lvds = true;
5123 case INTEL_OUTPUT_EDP:
5124 has_edp_encoder = encoder;
5125 break;
5126 }
5127 }
5128 }
5129
5130 /* Ironlake: try to setup display ref clock before DPLL
5131 * enabling. This is only under driver's control after
5132 * PCH B stepping, previous chipset stepping should be
5133 * ignoring this setting.
5134 */
5135 temp = I915_READ(PCH_DREF_CONTROL);
5136 /* Always enable nonspread source */
5137 temp &= ~DREF_NONSPREAD_SOURCE_MASK;
5138 temp |= DREF_NONSPREAD_SOURCE_ENABLE;
5139 temp &= ~DREF_SSC_SOURCE_MASK;
5140 temp |= DREF_SSC_SOURCE_ENABLE;
5141 I915_WRITE(PCH_DREF_CONTROL, temp);
5142
5143 POSTING_READ(PCH_DREF_CONTROL);
5144 udelay(200);
5145
5146 if (has_edp_encoder) {
5147 if (intel_panel_use_ssc(dev_priv)) {
5148 temp |= DREF_SSC1_ENABLE;
5149 I915_WRITE(PCH_DREF_CONTROL, temp);
5150
5151 POSTING_READ(PCH_DREF_CONTROL);
5152 udelay(200);
5153 }
5154 temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
5155
5156 /* Enable CPU source on CPU attached eDP */
5157 if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
5158 if (intel_panel_use_ssc(dev_priv))
5159 temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
5160 else
5161 temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
5162 } else {
5163 /* Enable SSC on PCH eDP if needed */
5164 if (intel_panel_use_ssc(dev_priv)) {
5165 DRM_ERROR("enabling SSC on PCH\n");
5166 temp |= DREF_SUPERSPREAD_SOURCE_ENABLE;
5167 }
5168 }
5169 I915_WRITE(PCH_DREF_CONTROL, temp);
5170 POSTING_READ(PCH_DREF_CONTROL);
5171 udelay(200);
5172 }
5173}
5174
5052static int ironlake_crtc_mode_set(struct drm_crtc *crtc, 5175static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
5053 struct drm_display_mode *mode, 5176 struct drm_display_mode *mode,
5054 struct drm_display_mode *adjusted_mode, 5177 struct drm_display_mode *adjusted_mode,
@@ -5244,49 +5367,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
5244 ironlake_compute_m_n(intel_crtc->bpp, lane, target_clock, link_bw, 5367 ironlake_compute_m_n(intel_crtc->bpp, lane, target_clock, link_bw,
5245 &m_n); 5368 &m_n);
5246 5369
5247 /* Ironlake: try to setup display ref clock before DPLL 5370 ironlake_update_pch_refclk(dev);
5248 * enabling. This is only under driver's control after
5249 * PCH B stepping, previous chipset stepping should be
5250 * ignoring this setting.
5251 */
5252 temp = I915_READ(PCH_DREF_CONTROL);
5253 /* Always enable nonspread source */
5254 temp &= ~DREF_NONSPREAD_SOURCE_MASK;
5255 temp |= DREF_NONSPREAD_SOURCE_ENABLE;
5256 temp &= ~DREF_SSC_SOURCE_MASK;
5257 temp |= DREF_SSC_SOURCE_ENABLE;
5258 I915_WRITE(PCH_DREF_CONTROL, temp);
5259
5260 POSTING_READ(PCH_DREF_CONTROL);
5261 udelay(200);
5262
5263 if (has_edp_encoder) {
5264 if (intel_panel_use_ssc(dev_priv)) {
5265 temp |= DREF_SSC1_ENABLE;
5266 I915_WRITE(PCH_DREF_CONTROL, temp);
5267
5268 POSTING_READ(PCH_DREF_CONTROL);
5269 udelay(200);
5270 }
5271 temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
5272
5273 /* Enable CPU source on CPU attached eDP */
5274 if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
5275 if (intel_panel_use_ssc(dev_priv))
5276 temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
5277 else
5278 temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
5279 } else {
5280 /* Enable SSC on PCH eDP if needed */
5281 if (intel_panel_use_ssc(dev_priv)) {
5282 DRM_ERROR("enabling SSC on PCH\n");
5283 temp |= DREF_SUPERSPREAD_SOURCE_ENABLE;
5284 }
5285 }
5286 I915_WRITE(PCH_DREF_CONTROL, temp);
5287 POSTING_READ(PCH_DREF_CONTROL);
5288 udelay(200);
5289 }
5290 5371
5291 fp = clock.n << 16 | clock.m1 << 8 | clock.m2; 5372 fp = clock.n << 16 | clock.m1 << 8 | clock.m2;
5292 if (has_reduced_clock) 5373 if (has_reduced_clock)
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 0feae908bb3..44fef5e1c49 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1841,6 +1841,11 @@ done:
1841static void 1841static void
1842intel_dp_destroy (struct drm_connector *connector) 1842intel_dp_destroy (struct drm_connector *connector)
1843{ 1843{
1844 struct drm_device *dev = connector->dev;
1845
1846 if (intel_dpd_is_edp(dev))
1847 intel_panel_destroy_backlight(dev);
1848
1844 drm_sysfs_connector_remove(connector); 1849 drm_sysfs_connector_remove(connector);
1845 drm_connector_cleanup(connector); 1850 drm_connector_cleanup(connector);
1846 kfree(connector); 1851 kfree(connector);
@@ -2072,6 +2077,8 @@ intel_dp_init(struct drm_device *dev, int output_reg)
2072 DRM_MODE_TYPE_PREFERRED; 2077 DRM_MODE_TYPE_PREFERRED;
2073 } 2078 }
2074 } 2079 }
2080 dev_priv->int_edp_connector = connector;
2081 intel_panel_setup_backlight(dev);
2075 } 2082 }
2076 2083
2077 intel_dp_add_properties(intel_dp, connector); 2084 intel_dp_add_properties(intel_dp, connector);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 7b330e76a43..0b2ee9d3998 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -297,9 +297,10 @@ extern void intel_pch_panel_fitting(struct drm_device *dev,
297extern u32 intel_panel_get_max_backlight(struct drm_device *dev); 297extern u32 intel_panel_get_max_backlight(struct drm_device *dev);
298extern u32 intel_panel_get_backlight(struct drm_device *dev); 298extern u32 intel_panel_get_backlight(struct drm_device *dev);
299extern void intel_panel_set_backlight(struct drm_device *dev, u32 level); 299extern void intel_panel_set_backlight(struct drm_device *dev, u32 level);
300extern void intel_panel_setup_backlight(struct drm_device *dev); 300extern int intel_panel_setup_backlight(struct drm_device *dev);
301extern void intel_panel_enable_backlight(struct drm_device *dev); 301extern void intel_panel_enable_backlight(struct drm_device *dev);
302extern void intel_panel_disable_backlight(struct drm_device *dev); 302extern void intel_panel_disable_backlight(struct drm_device *dev);
303extern void intel_panel_destroy_backlight(struct drm_device *dev);
303extern enum drm_connector_status intel_panel_detect(struct drm_device *dev); 304extern enum drm_connector_status intel_panel_detect(struct drm_device *dev);
304 305
305extern void intel_crtc_load_lut(struct drm_crtc *crtc); 306extern void intel_crtc_load_lut(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 2e8ddfcba40..31da77f5c05 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -72,14 +72,16 @@ static void intel_lvds_enable(struct intel_lvds *intel_lvds)
72{ 72{
73 struct drm_device *dev = intel_lvds->base.base.dev; 73 struct drm_device *dev = intel_lvds->base.base.dev;
74 struct drm_i915_private *dev_priv = dev->dev_private; 74 struct drm_i915_private *dev_priv = dev->dev_private;
75 u32 ctl_reg, lvds_reg; 75 u32 ctl_reg, lvds_reg, stat_reg;
76 76
77 if (HAS_PCH_SPLIT(dev)) { 77 if (HAS_PCH_SPLIT(dev)) {
78 ctl_reg = PCH_PP_CONTROL; 78 ctl_reg = PCH_PP_CONTROL;
79 lvds_reg = PCH_LVDS; 79 lvds_reg = PCH_LVDS;
80 stat_reg = PCH_PP_STATUS;
80 } else { 81 } else {
81 ctl_reg = PP_CONTROL; 82 ctl_reg = PP_CONTROL;
82 lvds_reg = LVDS; 83 lvds_reg = LVDS;
84 stat_reg = PP_STATUS;
83 } 85 }
84 86
85 I915_WRITE(lvds_reg, I915_READ(lvds_reg) | LVDS_PORT_EN); 87 I915_WRITE(lvds_reg, I915_READ(lvds_reg) | LVDS_PORT_EN);
@@ -94,17 +96,16 @@ static void intel_lvds_enable(struct intel_lvds *intel_lvds)
94 DRM_DEBUG_KMS("applying panel-fitter: %x, %x\n", 96 DRM_DEBUG_KMS("applying panel-fitter: %x, %x\n",
95 intel_lvds->pfit_control, 97 intel_lvds->pfit_control,
96 intel_lvds->pfit_pgm_ratios); 98 intel_lvds->pfit_pgm_ratios);
97 if (wait_for((I915_READ(PP_STATUS) & PP_ON) == 0, 1000)) { 99
98 DRM_ERROR("timed out waiting for panel to power off\n"); 100 I915_WRITE(PFIT_PGM_RATIOS, intel_lvds->pfit_pgm_ratios);
99 } else { 101 I915_WRITE(PFIT_CONTROL, intel_lvds->pfit_control);
100 I915_WRITE(PFIT_PGM_RATIOS, intel_lvds->pfit_pgm_ratios); 102 intel_lvds->pfit_dirty = false;
101 I915_WRITE(PFIT_CONTROL, intel_lvds->pfit_control);
102 intel_lvds->pfit_dirty = false;
103 }
104 } 103 }
105 104
106 I915_WRITE(ctl_reg, I915_READ(ctl_reg) | POWER_TARGET_ON); 105 I915_WRITE(ctl_reg, I915_READ(ctl_reg) | POWER_TARGET_ON);
107 POSTING_READ(lvds_reg); 106 POSTING_READ(lvds_reg);
107 if (wait_for((I915_READ(stat_reg) & PP_ON) != 0, 1000))
108 DRM_ERROR("timed out waiting for panel to power on\n");
108 109
109 intel_panel_enable_backlight(dev); 110 intel_panel_enable_backlight(dev);
110} 111}
@@ -113,24 +114,25 @@ static void intel_lvds_disable(struct intel_lvds *intel_lvds)
113{ 114{
114 struct drm_device *dev = intel_lvds->base.base.dev; 115 struct drm_device *dev = intel_lvds->base.base.dev;
115 struct drm_i915_private *dev_priv = dev->dev_private; 116 struct drm_i915_private *dev_priv = dev->dev_private;
116 u32 ctl_reg, lvds_reg; 117 u32 ctl_reg, lvds_reg, stat_reg;
117 118
118 if (HAS_PCH_SPLIT(dev)) { 119 if (HAS_PCH_SPLIT(dev)) {
119 ctl_reg = PCH_PP_CONTROL; 120 ctl_reg = PCH_PP_CONTROL;
120 lvds_reg = PCH_LVDS; 121 lvds_reg = PCH_LVDS;
122 stat_reg = PCH_PP_STATUS;
121 } else { 123 } else {
122 ctl_reg = PP_CONTROL; 124 ctl_reg = PP_CONTROL;
123 lvds_reg = LVDS; 125 lvds_reg = LVDS;
126 stat_reg = PP_STATUS;
124 } 127 }
125 128
126 intel_panel_disable_backlight(dev); 129 intel_panel_disable_backlight(dev);
127 130
128 I915_WRITE(ctl_reg, I915_READ(ctl_reg) & ~POWER_TARGET_ON); 131 I915_WRITE(ctl_reg, I915_READ(ctl_reg) & ~POWER_TARGET_ON);
132 if (wait_for((I915_READ(stat_reg) & PP_ON) == 0, 1000))
133 DRM_ERROR("timed out waiting for panel to power off\n");
129 134
130 if (intel_lvds->pfit_control) { 135 if (intel_lvds->pfit_control) {
131 if (wait_for((I915_READ(PP_STATUS) & PP_ON) == 0, 1000))
132 DRM_ERROR("timed out waiting for panel to power off\n");
133
134 I915_WRITE(PFIT_CONTROL, 0); 136 I915_WRITE(PFIT_CONTROL, 0);
135 intel_lvds->pfit_dirty = true; 137 intel_lvds->pfit_dirty = true;
136 } 138 }
@@ -398,53 +400,21 @@ out:
398 400
399static void intel_lvds_prepare(struct drm_encoder *encoder) 401static void intel_lvds_prepare(struct drm_encoder *encoder)
400{ 402{
401 struct drm_device *dev = encoder->dev;
402 struct drm_i915_private *dev_priv = dev->dev_private;
403 struct intel_lvds *intel_lvds = to_intel_lvds(encoder); 403 struct intel_lvds *intel_lvds = to_intel_lvds(encoder);
404 404
405 /* We try to do the minimum that is necessary in order to unlock 405 /*
406 * the registers for mode setting.
407 *
408 * On Ironlake, this is quite simple as we just set the unlock key
409 * and ignore all subtleties. (This may cause some issues...)
410 *
411 * Prior to Ironlake, we must disable the pipe if we want to adjust 406 * Prior to Ironlake, we must disable the pipe if we want to adjust
412 * the panel fitter. However at all other times we can just reset 407 * the panel fitter. However at all other times we can just reset
413 * the registers regardless. 408 * the registers regardless.
414 */ 409 */
415 410 if (!HAS_PCH_SPLIT(encoder->dev) && intel_lvds->pfit_dirty)
416 if (HAS_PCH_SPLIT(dev)) { 411 intel_lvds_disable(intel_lvds);
417 I915_WRITE(PCH_PP_CONTROL,
418 I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS);
419 } else if (intel_lvds->pfit_dirty) {
420 I915_WRITE(PP_CONTROL,
421 (I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS)
422 & ~POWER_TARGET_ON);
423 } else {
424 I915_WRITE(PP_CONTROL,
425 I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS);
426 }
427} 412}
428 413
429static void intel_lvds_commit(struct drm_encoder *encoder) 414static void intel_lvds_commit(struct drm_encoder *encoder)
430{ 415{
431 struct drm_device *dev = encoder->dev;
432 struct drm_i915_private *dev_priv = dev->dev_private;
433 struct intel_lvds *intel_lvds = to_intel_lvds(encoder); 416 struct intel_lvds *intel_lvds = to_intel_lvds(encoder);
434 417
435 /* Undo any unlocking done in prepare to prevent accidental
436 * adjustment of the registers.
437 */
438 if (HAS_PCH_SPLIT(dev)) {
439 u32 val = I915_READ(PCH_PP_CONTROL);
440 if ((val & PANEL_UNLOCK_REGS) == PANEL_UNLOCK_REGS)
441 I915_WRITE(PCH_PP_CONTROL, val & 0x3);
442 } else {
443 u32 val = I915_READ(PP_CONTROL);
444 if ((val & PANEL_UNLOCK_REGS) == PANEL_UNLOCK_REGS)
445 I915_WRITE(PP_CONTROL, val & 0x3);
446 }
447
448 /* Always do a full power on as we do not know what state 418 /* Always do a full power on as we do not know what state
449 * we were left in. 419 * we were left in.
450 */ 420 */
@@ -582,6 +552,8 @@ static void intel_lvds_destroy(struct drm_connector *connector)
582 struct drm_device *dev = connector->dev; 552 struct drm_device *dev = connector->dev;
583 struct drm_i915_private *dev_priv = dev->dev_private; 553 struct drm_i915_private *dev_priv = dev->dev_private;
584 554
555 intel_panel_destroy_backlight(dev);
556
585 if (dev_priv->lid_notifier.notifier_call) 557 if (dev_priv->lid_notifier.notifier_call)
586 acpi_lid_notifier_unregister(&dev_priv->lid_notifier); 558 acpi_lid_notifier_unregister(&dev_priv->lid_notifier);
587 drm_sysfs_connector_remove(connector); 559 drm_sysfs_connector_remove(connector);
@@ -1040,6 +1012,19 @@ out:
1040 pwm = I915_READ(BLC_PWM_PCH_CTL1); 1012 pwm = I915_READ(BLC_PWM_PCH_CTL1);
1041 pwm |= PWM_PCH_ENABLE; 1013 pwm |= PWM_PCH_ENABLE;
1042 I915_WRITE(BLC_PWM_PCH_CTL1, pwm); 1014 I915_WRITE(BLC_PWM_PCH_CTL1, pwm);
1015 /*
1016 * Unlock registers and just
1017 * leave them unlocked
1018 */
1019 I915_WRITE(PCH_PP_CONTROL,
1020 I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS);
1021 } else {
1022 /*
1023 * Unlock registers and just
1024 * leave them unlocked
1025 */
1026 I915_WRITE(PP_CONTROL,
1027 I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS);
1043 } 1028 }
1044 dev_priv->lid_notifier.notifier_call = intel_lid_notify; 1029 dev_priv->lid_notifier.notifier_call = intel_lid_notify;
1045 if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) { 1030 if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) {
@@ -1049,6 +1034,9 @@ out:
1049 /* keep the LVDS connector */ 1034 /* keep the LVDS connector */
1050 dev_priv->int_lvds_connector = connector; 1035 dev_priv->int_lvds_connector = connector;
1051 drm_sysfs_connector_add(connector); 1036 drm_sysfs_connector_add(connector);
1037
1038 intel_panel_setup_backlight(dev);
1039
1052 return true; 1040 return true;
1053 1041
1054failed: 1042failed:
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index b7c5ddb564d..b8e8158bb16 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -227,7 +227,6 @@ void intel_opregion_asle_intr(struct drm_device *dev)
227 asle->aslc = asle_stat; 227 asle->aslc = asle_stat;
228} 228}
229 229
230/* Only present on Ironlake+ */
231void intel_opregion_gse_intr(struct drm_device *dev) 230void intel_opregion_gse_intr(struct drm_device *dev)
232{ 231{
233 struct drm_i915_private *dev_priv = dev->dev_private; 232 struct drm_i915_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 05f500cd9c2..a9e0c7bcd31 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -277,7 +277,7 @@ void intel_panel_enable_backlight(struct drm_device *dev)
277 dev_priv->backlight_enabled = true; 277 dev_priv->backlight_enabled = true;
278} 278}
279 279
280void intel_panel_setup_backlight(struct drm_device *dev) 280static void intel_panel_init_backlight(struct drm_device *dev)
281{ 281{
282 struct drm_i915_private *dev_priv = dev->dev_private; 282 struct drm_i915_private *dev_priv = dev->dev_private;
283 283
@@ -309,3 +309,73 @@ intel_panel_detect(struct drm_device *dev)
309 309
310 return connector_status_unknown; 310 return connector_status_unknown;
311} 311}
312
313#ifdef CONFIG_BACKLIGHT_CLASS_DEVICE
314static int intel_panel_update_status(struct backlight_device *bd)
315{
316 struct drm_device *dev = bl_get_data(bd);
317 intel_panel_set_backlight(dev, bd->props.brightness);
318 return 0;
319}
320
321static int intel_panel_get_brightness(struct backlight_device *bd)
322{
323 struct drm_device *dev = bl_get_data(bd);
324 return intel_panel_get_backlight(dev);
325}
326
327static const struct backlight_ops intel_panel_bl_ops = {
328 .update_status = intel_panel_update_status,
329 .get_brightness = intel_panel_get_brightness,
330};
331
332int intel_panel_setup_backlight(struct drm_device *dev)
333{
334 struct drm_i915_private *dev_priv = dev->dev_private;
335 struct backlight_properties props;
336 struct drm_connector *connector;
337
338 intel_panel_init_backlight(dev);
339
340 if (dev_priv->int_lvds_connector)
341 connector = dev_priv->int_lvds_connector;
342 else if (dev_priv->int_edp_connector)
343 connector = dev_priv->int_edp_connector;
344 else
345 return -ENODEV;
346
347 props.type = BACKLIGHT_RAW;
348 props.max_brightness = intel_panel_get_max_backlight(dev);
349 dev_priv->backlight =
350 backlight_device_register("intel_backlight",
351 &connector->kdev, dev,
352 &intel_panel_bl_ops, &props);
353
354 if (IS_ERR(dev_priv->backlight)) {
355 DRM_ERROR("Failed to register backlight: %ld\n",
356 PTR_ERR(dev_priv->backlight));
357 dev_priv->backlight = NULL;
358 return -ENODEV;
359 }
360 dev_priv->backlight->props.brightness = intel_panel_get_backlight(dev);
361 return 0;
362}
363
364void intel_panel_destroy_backlight(struct drm_device *dev)
365{
366 struct drm_i915_private *dev_priv = dev->dev_private;
367 if (dev_priv->backlight)
368 backlight_device_unregister(dev_priv->backlight);
369}
370#else
371int intel_panel_setup_backlight(struct drm_device *dev)
372{
373 intel_panel_init_backlight(dev);
374 return 0;
375}
376
377void intel_panel_destroy_backlight(struct drm_device *dev)
378{
379 return;
380}
381#endif
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 47b9b277703..c30626ea9f9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -290,6 +290,10 @@ static int init_render_ring(struct intel_ring_buffer *ring)
290 if (IS_GEN6(dev) || IS_GEN7(dev)) 290 if (IS_GEN6(dev) || IS_GEN7(dev))
291 mode |= MI_FLUSH_ENABLE << 16 | MI_FLUSH_ENABLE; 291 mode |= MI_FLUSH_ENABLE << 16 | MI_FLUSH_ENABLE;
292 I915_WRITE(MI_MODE, mode); 292 I915_WRITE(MI_MODE, mode);
293 if (IS_GEN7(dev))
294 I915_WRITE(GFX_MODE_GEN7,
295 GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
296 GFX_MODE_ENABLE(GFX_REPLAY_MODE));
293 } 297 }
294 298
295 if (INTEL_INFO(dev)->gen >= 6) { 299 if (INTEL_INFO(dev)->gen >= 6) {
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 645b84b3d20..7ad43c6b1db 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -613,6 +613,18 @@ static bool radeon_dp_get_link_status(struct radeon_connector *radeon_connector,
613 return true; 613 return true;
614} 614}
615 615
616bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector)
617{
618 u8 link_status[DP_LINK_STATUS_SIZE];
619 struct radeon_connector_atom_dig *dig = radeon_connector->con_priv;
620
621 if (!radeon_dp_get_link_status(radeon_connector, link_status))
622 return false;
623 if (dp_channel_eq_ok(link_status, dig->dp_lane_count))
624 return false;
625 return true;
626}
627
616struct radeon_dp_link_train_info { 628struct radeon_dp_link_train_info {
617 struct radeon_device *rdev; 629 struct radeon_device *rdev;
618 struct drm_encoder *encoder; 630 struct drm_encoder *encoder;
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 14dce9f2217..fb5fa089886 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -743,7 +743,7 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,
743 !evergreen_average_bandwidth_vs_available_bandwidth(&wm) || 743 !evergreen_average_bandwidth_vs_available_bandwidth(&wm) ||
744 !evergreen_check_latency_hiding(&wm) || 744 !evergreen_check_latency_hiding(&wm) ||
745 (rdev->disp_priority == 2)) { 745 (rdev->disp_priority == 2)) {
746 DRM_INFO("force priority to high\n"); 746 DRM_DEBUG_KMS("force priority to high\n");
747 priority_a_cnt |= PRIORITY_ALWAYS_ON; 747 priority_a_cnt |= PRIORITY_ALWAYS_ON;
748 priority_b_cnt |= PRIORITY_ALWAYS_ON; 748 priority_b_cnt |= PRIORITY_ALWAYS_ON;
749 } 749 }
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 6d6b5f16bc0..7f65940f918 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -60,18 +60,20 @@ void radeon_connector_hotplug(struct drm_connector *connector)
60 60
61 radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); 61 radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
62 62
63 /* powering up/down the eDP panel generates hpd events which 63 /* if the connector is already off, don't turn it back on */
64 * can interfere with modesetting. 64 if (connector->dpms != DRM_MODE_DPMS_ON)
65 */
66 if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
67 return; 65 return;
68 66
69 /* pre-r600 did not always have the hpd pins mapped accurately to connectors */ 67 /* just deal with DP (not eDP) here. */
70 if (rdev->family >= CHIP_R600) { 68 if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
71 if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) 69 int saved_dpms = connector->dpms;
70
71 if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) &&
72 radeon_dp_needs_link_train(radeon_connector))
72 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); 73 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
73 else 74 else
74 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); 75 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
76 connector->dpms = saved_dpms;
75 } 77 }
76} 78}
77 79
@@ -474,11 +476,19 @@ static void radeon_fixup_lvds_native_mode(struct drm_encoder *encoder,
474{ 476{
475 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); 477 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
476 struct drm_display_mode *native_mode = &radeon_encoder->native_mode; 478 struct drm_display_mode *native_mode = &radeon_encoder->native_mode;
479 struct drm_display_mode *t, *mode;
480
481 /* If the EDID preferred mode doesn't match the native mode, use it */
482 list_for_each_entry_safe(mode, t, &connector->probed_modes, head) {
483 if (mode->type & DRM_MODE_TYPE_PREFERRED) {
484 if (mode->hdisplay != native_mode->hdisplay ||
485 mode->vdisplay != native_mode->vdisplay)
486 memcpy(native_mode, mode, sizeof(*mode));
487 }
488 }
477 489
478 /* Try to get native mode details from EDID if necessary */ 490 /* Try to get native mode details from EDID if necessary */
479 if (!native_mode->clock) { 491 if (!native_mode->clock) {
480 struct drm_display_mode *t, *mode;
481
482 list_for_each_entry_safe(mode, t, &connector->probed_modes, head) { 492 list_for_each_entry_safe(mode, t, &connector->probed_modes, head) {
483 if (mode->hdisplay == native_mode->hdisplay && 493 if (mode->hdisplay == native_mode->hdisplay &&
484 mode->vdisplay == native_mode->vdisplay) { 494 mode->vdisplay == native_mode->vdisplay) {
@@ -489,6 +499,7 @@ static void radeon_fixup_lvds_native_mode(struct drm_encoder *encoder,
489 } 499 }
490 } 500 }
491 } 501 }
502
492 if (!native_mode->clock) { 503 if (!native_mode->clock) {
493 DRM_DEBUG_KMS("No LVDS native mode details, disabling RMX\n"); 504 DRM_DEBUG_KMS("No LVDS native mode details, disabling RMX\n");
494 radeon_encoder->rmx_type = RMX_OFF; 505 radeon_encoder->rmx_type = RMX_OFF;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 440e6ecccc4..a3b011b4946 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -32,6 +32,7 @@
32#include <drm/radeon_drm.h> 32#include <drm/radeon_drm.h>
33#include <linux/vgaarb.h> 33#include <linux/vgaarb.h>
34#include <linux/vga_switcheroo.h> 34#include <linux/vga_switcheroo.h>
35#include <linux/efi.h>
35#include "radeon_reg.h" 36#include "radeon_reg.h"
36#include "radeon.h" 37#include "radeon.h"
37#include "atom.h" 38#include "atom.h"
@@ -348,6 +349,9 @@ bool radeon_card_posted(struct radeon_device *rdev)
348{ 349{
349 uint32_t reg; 350 uint32_t reg;
350 351
352 if (efi_enabled && rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE)
353 return false;
354
351 /* first check CRTCs */ 355 /* first check CRTCs */
352 if (ASIC_IS_DCE41(rdev)) { 356 if (ASIC_IS_DCE41(rdev)) {
353 reg = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET) | 357 reg = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET) |
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index b293487e5aa..319d85d7e75 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -2323,6 +2323,9 @@ radeon_add_atom_encoder(struct drm_device *dev,
2323 default: 2323 default:
2324 encoder->possible_crtcs = 0x3; 2324 encoder->possible_crtcs = 0x3;
2325 break; 2325 break;
2326 case 4:
2327 encoder->possible_crtcs = 0xf;
2328 break;
2326 case 6: 2329 case 6:
2327 encoder->possible_crtcs = 0x3f; 2330 encoder->possible_crtcs = 0x3f;
2328 break; 2331 break;
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index d09031c03e2..68820f5f630 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -479,6 +479,7 @@ extern void radeon_dp_set_link_config(struct drm_connector *connector,
479 struct drm_display_mode *mode); 479 struct drm_display_mode *mode);
480extern void radeon_dp_link_train(struct drm_encoder *encoder, 480extern void radeon_dp_link_train(struct drm_encoder *encoder,
481 struct drm_connector *connector); 481 struct drm_connector *connector);
482extern bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector);
482extern u8 radeon_dp_getsinktype(struct radeon_connector *radeon_connector); 483extern u8 radeon_dp_getsinktype(struct radeon_connector *radeon_connector);
483extern bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector); 484extern bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector);
484extern void atombios_dig_encoder_setup(struct drm_encoder *encoder, int action, int panel_mode); 485extern void atombios_dig_encoder_setup(struct drm_encoder *encoder, int action, int panel_mode);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 43f89ba0a90..fe89c4660d5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -717,11 +717,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
717{ 717{
718 struct ipoib_dev_priv *priv = netdev_priv(dev); 718 struct ipoib_dev_priv *priv = netdev_priv(dev);
719 struct ipoib_neigh *neigh; 719 struct ipoib_neigh *neigh;
720 struct neighbour *n; 720 struct neighbour *n = NULL;
721 unsigned long flags; 721 unsigned long flags;
722 722
723 n = dst_get_neighbour(skb_dst(skb)); 723 if (likely(skb_dst(skb)))
724 if (likely(skb_dst(skb) && n)) { 724 n = dst_get_neighbour(skb_dst(skb));
725
726 if (likely(n)) {
725 if (unlikely(!*to_ipoib_neigh(n))) { 727 if (unlikely(!*to_ipoib_neigh(n))) {
726 ipoib_path_lookup(skb, dev); 728 ipoib_path_lookup(skb, dev);
727 return NETDEV_TX_OK; 729 return NETDEV_TX_OK;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 8db008de539..9c61b9c2c59 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -101,13 +101,17 @@ iscsi_iser_recv(struct iscsi_conn *conn,
101 101
102 /* verify PDU length */ 102 /* verify PDU length */
103 datalen = ntoh24(hdr->dlength); 103 datalen = ntoh24(hdr->dlength);
104 if (datalen != rx_data_len) { 104 if (datalen > rx_data_len || (datalen + 4) < rx_data_len) {
105 printk(KERN_ERR "iscsi_iser: datalen %d (hdr) != %d (IB) \n", 105 iser_err("wrong datalen %d (hdr), %d (IB)\n",
106 datalen, rx_data_len); 106 datalen, rx_data_len);
107 rc = ISCSI_ERR_DATALEN; 107 rc = ISCSI_ERR_DATALEN;
108 goto error; 108 goto error;
109 } 109 }
110 110
111 if (datalen != rx_data_len)
112 iser_dbg("aligned datalen (%d) hdr, %d (IB)\n",
113 datalen, rx_data_len);
114
111 /* read AHS */ 115 /* read AHS */
112 ahslen = hdr->hlength * 4; 116 ahslen = hdr->hlength * 4;
113 117
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 342cbc1bdaa..db6f3ce9f3b 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -89,7 +89,7 @@
89 } while (0) 89 } while (0)
90 90
91#define SHIFT_4K 12 91#define SHIFT_4K 12
92#define SIZE_4K (1UL << SHIFT_4K) 92#define SIZE_4K (1ULL << SHIFT_4K)
93#define MASK_4K (~(SIZE_4K-1)) 93#define MASK_4K (~(SIZE_4K-1))
94 94
95 /* support up to 512KB in one RDMA */ 95 /* support up to 512KB in one RDMA */
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 5745b7fe158..f299de6b419 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -412,7 +412,7 @@ int iser_send_control(struct iscsi_conn *conn,
412 memcpy(iser_conn->ib_conn->login_buf, task->data, 412 memcpy(iser_conn->ib_conn->login_buf, task->data,
413 task->data_count); 413 task->data_count);
414 tx_dsg->addr = iser_conn->ib_conn->login_dma; 414 tx_dsg->addr = iser_conn->ib_conn->login_dma;
415 tx_dsg->length = data_seg_len; 415 tx_dsg->length = task->data_count;
416 tx_dsg->lkey = device->mr->lkey; 416 tx_dsg->lkey = device->mr->lkey;
417 mdesc->num_sge = 2; 417 mdesc->num_sge = 2;
418 } 418 }
diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c
index 749fdf07031..753b21aaea6 100644
--- a/drivers/pci/hotplug/pcihp_slot.c
+++ b/drivers/pci/hotplug/pcihp_slot.c
@@ -158,47 +158,6 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
158 */ 158 */
159} 159}
160 160
161/* Program PCIE MaxPayload setting on device: ensure parent maxpayload <= device */
162static int pci_set_payload(struct pci_dev *dev)
163{
164 int pos, ppos;
165 u16 pctl, psz;
166 u16 dctl, dsz, dcap, dmax;
167 struct pci_dev *parent;
168
169 parent = dev->bus->self;
170 pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
171 if (!pos)
172 return 0;
173
174 /* Read Device MaxPayload capability and setting */
175 pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &dctl);
176 pci_read_config_word(dev, pos + PCI_EXP_DEVCAP, &dcap);
177 dsz = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5;
178 dmax = (dcap & PCI_EXP_DEVCAP_PAYLOAD);
179
180 /* Read Parent MaxPayload setting */
181 ppos = pci_find_capability(parent, PCI_CAP_ID_EXP);
182 if (!ppos)
183 return 0;
184 pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl);
185 psz = (pctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5;
186
187 /* If parent payload > device max payload -> error
188 * If parent payload > device payload -> set speed
189 * If parent payload <= device payload -> do nothing
190 */
191 if (psz > dmax)
192 return -1;
193 else if (psz > dsz) {
194 dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << psz);
195 pci_write_config_word(dev, pos + PCI_EXP_DEVCTL,
196 (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) +
197 (psz << 5));
198 }
199 return 0;
200}
201
202void pci_configure_slot(struct pci_dev *dev) 161void pci_configure_slot(struct pci_dev *dev)
203{ 162{
204 struct pci_dev *cdev; 163 struct pci_dev *cdev;
@@ -210,9 +169,7 @@ void pci_configure_slot(struct pci_dev *dev)
210 (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) 169 (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI)))
211 return; 170 return;
212 171
213 ret = pci_set_payload(dev); 172 pcie_bus_configure_settings(dev->bus, dev->bus->self->pcie_mpss);
214 if (ret)
215 dev_warn(&dev->dev, "could not set device max payload\n");
216 173
217 memset(&hpp, 0, sizeof(hpp)); 174 memset(&hpp, 0, sizeof(hpp));
218 ret = pci_get_hp_params(dev, &hpp); 175 ret = pci_get_hp_params(dev, &hpp);
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index c94d37ec55c..f0929934bb7 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -55,7 +55,7 @@ struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus)
55 */ 55 */
56 if (bus->bridge->of_node) 56 if (bus->bridge->of_node)
57 return of_node_get(bus->bridge->of_node); 57 return of_node_get(bus->bridge->of_node);
58 if (bus->bridge->parent->of_node) 58 if (bus->bridge->parent && bus->bridge->parent->of_node)
59 return of_node_get(bus->bridge->parent->of_node); 59 return of_node_get(bus->bridge->parent->of_node);
60 return NULL; 60 return NULL;
61} 61}
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 08a95b369d8..0ce67423a0a 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -77,6 +77,8 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE;
77unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; 77unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE;
78unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; 78unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE;
79 79
80enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PERFORMANCE;
81
80/* 82/*
81 * The default CLS is used if arch didn't set CLS explicitly and not 83 * The default CLS is used if arch didn't set CLS explicitly and not
82 * all pci devices agree on the same value. Arch can override either 84 * all pci devices agree on the same value. Arch can override either
@@ -3223,6 +3225,67 @@ out:
3223EXPORT_SYMBOL(pcie_set_readrq); 3225EXPORT_SYMBOL(pcie_set_readrq);
3224 3226
3225/** 3227/**
3228 * pcie_get_mps - get PCI Express maximum payload size
3229 * @dev: PCI device to query
3230 *
3231 * Returns maximum payload size in bytes
3232 * or appropriate error value.
3233 */
3234int pcie_get_mps(struct pci_dev *dev)
3235{
3236 int ret, cap;
3237 u16 ctl;
3238
3239 cap = pci_pcie_cap(dev);
3240 if (!cap)
3241 return -EINVAL;
3242
3243 ret = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl);
3244 if (!ret)
3245 ret = 128 << ((ctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5);
3246
3247 return ret;
3248}
3249
3250/**
3251 * pcie_set_mps - set PCI Express maximum payload size
3252 * @dev: PCI device to query
3253 * @mps: maximum payload size in bytes
3254 * valid values are 128, 256, 512, 1024, 2048, 4096
3255 *
3256 * If possible sets maximum payload size
3257 */
3258int pcie_set_mps(struct pci_dev *dev, int mps)
3259{
3260 int cap, err = -EINVAL;
3261 u16 ctl, v;
3262
3263 if (mps < 128 || mps > 4096 || !is_power_of_2(mps))
3264 goto out;
3265
3266 v = ffs(mps) - 8;
3267 if (v > dev->pcie_mpss)
3268 goto out;
3269 v <<= 5;
3270
3271 cap = pci_pcie_cap(dev);
3272 if (!cap)
3273 goto out;
3274
3275 err = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl);
3276 if (err)
3277 goto out;
3278
3279 if ((ctl & PCI_EXP_DEVCTL_PAYLOAD) != v) {
3280 ctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
3281 ctl |= v;
3282 err = pci_write_config_word(dev, cap + PCI_EXP_DEVCTL, ctl);
3283 }
3284out:
3285 return err;
3286}
3287
3288/**
3226 * pci_select_bars - Make BAR mask from the type of resource 3289 * pci_select_bars - Make BAR mask from the type of resource
3227 * @dev: the PCI device for which BAR mask is made 3290 * @dev: the PCI device for which BAR mask is made
3228 * @flags: resource type mask to be selected 3291 * @flags: resource type mask to be selected
@@ -3505,6 +3568,10 @@ static int __init pci_setup(char *str)
3505 pci_hotplug_io_size = memparse(str + 9, &str); 3568 pci_hotplug_io_size = memparse(str + 9, &str);
3506 } else if (!strncmp(str, "hpmemsize=", 10)) { 3569 } else if (!strncmp(str, "hpmemsize=", 10)) {
3507 pci_hotplug_mem_size = memparse(str + 10, &str); 3570 pci_hotplug_mem_size = memparse(str + 10, &str);
3571 } else if (!strncmp(str, "pcie_bus_safe", 13)) {
3572 pcie_bus_config = PCIE_BUS_SAFE;
3573 } else if (!strncmp(str, "pcie_bus_perf", 13)) {
3574 pcie_bus_config = PCIE_BUS_PERFORMANCE;
3508 } else { 3575 } else {
3509 printk(KERN_ERR "PCI: Unknown option `%s'\n", 3576 printk(KERN_ERR "PCI: Unknown option `%s'\n",
3510 str); 3577 str);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index c8cee764b0d..b74084e9ca1 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -283,6 +283,8 @@ static inline int pci_iov_bus_range(struct pci_bus *bus)
283 283
284#endif /* CONFIG_PCI_IOV */ 284#endif /* CONFIG_PCI_IOV */
285 285
286extern unsigned long pci_cardbus_resource_alignment(struct resource *);
287
286static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, 288static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
287 struct resource *res) 289 struct resource *res)
288{ 290{
@@ -292,6 +294,8 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
292 if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END) 294 if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END)
293 return pci_sriov_resource_alignment(dev, resno); 295 return pci_sriov_resource_alignment(dev, resno);
294#endif 296#endif
297 if (dev->class >> 8 == PCI_CLASS_BRIDGE_CARDBUS)
298 return pci_cardbus_resource_alignment(res);
295 return resource_alignment(res); 299 return resource_alignment(res);
296} 300}
297 301
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 795c9026d55..8473727b29f 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -856,6 +856,8 @@ void set_pcie_port_type(struct pci_dev *pdev)
856 pdev->pcie_cap = pos; 856 pdev->pcie_cap = pos;
857 pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16); 857 pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
858 pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; 858 pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
859 pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, &reg16);
860 pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD;
859} 861}
860 862
861void set_pcie_hotplug_bridge(struct pci_dev *pdev) 863void set_pcie_hotplug_bridge(struct pci_dev *pdev)
@@ -1326,6 +1328,150 @@ int pci_scan_slot(struct pci_bus *bus, int devfn)
1326 return nr; 1328 return nr;
1327} 1329}
1328 1330
1331static int pcie_find_smpss(struct pci_dev *dev, void *data)
1332{
1333 u8 *smpss = data;
1334
1335 if (!pci_is_pcie(dev))
1336 return 0;
1337
1338 /* For PCIE hotplug enabled slots not connected directly to a
1339 * PCI-E root port, there can be problems when hotplugging
1340 * devices. This is due to the possibility of hotplugging a
1341 * device into the fabric with a smaller MPS that the devices
1342 * currently running have configured. Modifying the MPS on the
1343 * running devices could cause a fatal bus error due to an
1344 * incoming frame being larger than the newly configured MPS.
1345 * To work around this, the MPS for the entire fabric must be
1346 * set to the minimum size. Any devices hotplugged into this
1347 * fabric will have the minimum MPS set. If the PCI hotplug
1348 * slot is directly connected to the root port and there are not
1349 * other devices on the fabric (which seems to be the most
1350 * common case), then this is not an issue and MPS discovery
1351 * will occur as normal.
1352 */
1353 if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) ||
1354 dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT))
1355 *smpss = 0;
1356
1357 if (*smpss > dev->pcie_mpss)
1358 *smpss = dev->pcie_mpss;
1359
1360 return 0;
1361}
1362
1363static void pcie_write_mps(struct pci_dev *dev, int mps)
1364{
1365 int rc, dev_mpss;
1366
1367 dev_mpss = 128 << dev->pcie_mpss;
1368
1369 if (pcie_bus_config == PCIE_BUS_PERFORMANCE) {
1370 if (dev->bus->self) {
1371 dev_dbg(&dev->bus->dev, "Bus MPSS %d\n",
1372 128 << dev->bus->self->pcie_mpss);
1373
1374 /* For "MPS Force Max", the assumption is made that
1375 * downstream communication will never be larger than
1376 * the MRRS. So, the MPS only needs to be configured
1377 * for the upstream communication. This being the case,
1378 * walk from the top down and set the MPS of the child
1379 * to that of the parent bus.
1380 */
1381 mps = 128 << dev->bus->self->pcie_mpss;
1382 if (mps > dev_mpss)
1383 dev_warn(&dev->dev, "MPS configured higher than"
1384 " maximum supported by the device. If"
1385 " a bus issue occurs, try running with"
1386 " pci=pcie_bus_safe.\n");
1387 }
1388
1389 dev->pcie_mpss = ffs(mps) - 8;
1390 }
1391
1392 rc = pcie_set_mps(dev, mps);
1393 if (rc)
1394 dev_err(&dev->dev, "Failed attempting to set the MPS\n");
1395}
1396
1397static void pcie_write_mrrs(struct pci_dev *dev, int mps)
1398{
1399 int rc, mrrs;
1400
1401 if (pcie_bus_config == PCIE_BUS_PERFORMANCE) {
1402 int dev_mpss = 128 << dev->pcie_mpss;
1403
1404 /* For Max performance, the MRRS must be set to the largest
1405 * supported value. However, it cannot be configured larger
1406 * than the MPS the device or the bus can support. This assumes
1407 * that the largest MRRS available on the device cannot be
1408 * smaller than the device MPSS.
1409 */
1410 mrrs = mps < dev_mpss ? mps : dev_mpss;
1411 } else
1412 /* In the "safe" case, configure the MRRS for fairness on the
1413 * bus by making all devices have the same size
1414 */
1415 mrrs = mps;
1416
1417
1418 /* MRRS is a R/W register. Invalid values can be written, but a
1419 * subsiquent read will verify if the value is acceptable or not.
1420 * If the MRRS value provided is not acceptable (e.g., too large),
1421 * shrink the value until it is acceptable to the HW.
1422 */
1423 while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) {
1424 rc = pcie_set_readrq(dev, mrrs);
1425 if (rc)
1426 dev_err(&dev->dev, "Failed attempting to set the MRRS\n");
1427
1428 mrrs /= 2;
1429 }
1430}
1431
1432static int pcie_bus_configure_set(struct pci_dev *dev, void *data)
1433{
1434 int mps = 128 << *(u8 *)data;
1435
1436 if (!pci_is_pcie(dev))
1437 return 0;
1438
1439 dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n",
1440 pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev));
1441
1442 pcie_write_mps(dev, mps);
1443 pcie_write_mrrs(dev, mps);
1444
1445 dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n",
1446 pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev));
1447
1448 return 0;
1449}
1450
1451/* pcie_bus_configure_mps requires that pci_walk_bus work in a top-down,
1452 * parents then children fashion. If this changes, then this code will not
1453 * work as designed.
1454 */
1455void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss)
1456{
1457 u8 smpss = mpss;
1458
1459 if (!bus->self)
1460 return;
1461
1462 if (!pci_is_pcie(bus->self))
1463 return;
1464
1465 if (pcie_bus_config == PCIE_BUS_SAFE) {
1466 pcie_find_smpss(bus->self, &smpss);
1467 pci_walk_bus(bus, pcie_find_smpss, &smpss);
1468 }
1469
1470 pcie_bus_configure_set(bus->self, &smpss);
1471 pci_walk_bus(bus, pcie_bus_configure_set, &smpss);
1472}
1473EXPORT_SYMBOL_GPL(pcie_bus_configure_settings);
1474
1329unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) 1475unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus)
1330{ 1476{
1331 unsigned int devfn, pass, max = bus->secondary; 1477 unsigned int devfn, pass, max = bus->secondary;
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 8a1d3c7863a..784da9d3602 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -34,6 +34,7 @@ struct resource_list_x {
34 resource_size_t start; 34 resource_size_t start;
35 resource_size_t end; 35 resource_size_t end;
36 resource_size_t add_size; 36 resource_size_t add_size;
37 resource_size_t min_align;
37 unsigned long flags; 38 unsigned long flags;
38}; 39};
39 40
@@ -65,7 +66,7 @@ void pci_realloc(void)
65 */ 66 */
66static void add_to_list(struct resource_list_x *head, 67static void add_to_list(struct resource_list_x *head,
67 struct pci_dev *dev, struct resource *res, 68 struct pci_dev *dev, struct resource *res,
68 resource_size_t add_size) 69 resource_size_t add_size, resource_size_t min_align)
69{ 70{
70 struct resource_list_x *list = head; 71 struct resource_list_x *list = head;
71 struct resource_list_x *ln = list->next; 72 struct resource_list_x *ln = list->next;
@@ -84,13 +85,16 @@ static void add_to_list(struct resource_list_x *head,
84 tmp->end = res->end; 85 tmp->end = res->end;
85 tmp->flags = res->flags; 86 tmp->flags = res->flags;
86 tmp->add_size = add_size; 87 tmp->add_size = add_size;
88 tmp->min_align = min_align;
87 list->next = tmp; 89 list->next = tmp;
88} 90}
89 91
90static void add_to_failed_list(struct resource_list_x *head, 92static void add_to_failed_list(struct resource_list_x *head,
91 struct pci_dev *dev, struct resource *res) 93 struct pci_dev *dev, struct resource *res)
92{ 94{
93 add_to_list(head, dev, res, 0); 95 add_to_list(head, dev, res,
96 0 /* dont care */,
97 0 /* dont care */);
94} 98}
95 99
96static void __dev_sort_resources(struct pci_dev *dev, 100static void __dev_sort_resources(struct pci_dev *dev,
@@ -121,18 +125,18 @@ static inline void reset_resource(struct resource *res)
121} 125}
122 126
123/** 127/**
124 * adjust_resources_sorted() - satisfy any additional resource requests 128 * reassign_resources_sorted() - satisfy any additional resource requests
125 * 129 *
126 * @add_head : head of the list tracking requests requiring additional 130 * @realloc_head : head of the list tracking requests requiring additional
127 * resources 131 * resources
128 * @head : head of the list tracking requests with allocated 132 * @head : head of the list tracking requests with allocated
129 * resources 133 * resources
130 * 134 *
131 * Walk through each element of the add_head and try to procure 135 * Walk through each element of the realloc_head and try to procure
132 * additional resources for the element, provided the element 136 * additional resources for the element, provided the element
133 * is in the head list. 137 * is in the head list.
134 */ 138 */
135static void adjust_resources_sorted(struct resource_list_x *add_head, 139static void reassign_resources_sorted(struct resource_list_x *realloc_head,
136 struct resource_list *head) 140 struct resource_list *head)
137{ 141{
138 struct resource *res; 142 struct resource *res;
@@ -141,8 +145,8 @@ static void adjust_resources_sorted(struct resource_list_x *add_head,
141 resource_size_t add_size; 145 resource_size_t add_size;
142 int idx; 146 int idx;
143 147
144 prev = add_head; 148 prev = realloc_head;
145 for (list = add_head->next; list;) { 149 for (list = realloc_head->next; list;) {
146 res = list->res; 150 res = list->res;
147 /* skip resource that has been reset */ 151 /* skip resource that has been reset */
148 if (!res->flags) 152 if (!res->flags)
@@ -159,13 +163,17 @@ static void adjust_resources_sorted(struct resource_list_x *add_head,
159 163
160 idx = res - &list->dev->resource[0]; 164 idx = res - &list->dev->resource[0];
161 add_size=list->add_size; 165 add_size=list->add_size;
162 if (!resource_size(res) && add_size) { 166 if (!resource_size(res)) {
163 res->end = res->start + add_size - 1; 167 res->start = list->start;
164 if(pci_assign_resource(list->dev, idx)) 168 res->end = res->start + add_size - 1;
169 if(pci_assign_resource(list->dev, idx))
165 reset_resource(res); 170 reset_resource(res);
166 } else if (add_size) { 171 } else {
167 adjust_resource(res, res->start, 172 resource_size_t align = list->min_align;
168 resource_size(res) + add_size); 173 res->flags |= list->flags & (IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN);
174 if (pci_reassign_resource(list->dev, idx, add_size, align))
175 dev_printk(KERN_DEBUG, &list->dev->dev, "failed to add optional resources res=%pR\n",
176 res);
169 } 177 }
170out: 178out:
171 tmp = list; 179 tmp = list;
@@ -210,16 +218,16 @@ static void assign_requested_resources_sorted(struct resource_list *head,
210} 218}
211 219
212static void __assign_resources_sorted(struct resource_list *head, 220static void __assign_resources_sorted(struct resource_list *head,
213 struct resource_list_x *add_head, 221 struct resource_list_x *realloc_head,
214 struct resource_list_x *fail_head) 222 struct resource_list_x *fail_head)
215{ 223{
216 /* Satisfy the must-have resource requests */ 224 /* Satisfy the must-have resource requests */
217 assign_requested_resources_sorted(head, fail_head); 225 assign_requested_resources_sorted(head, fail_head);
218 226
219 /* Try to satisfy any additional nice-to-have resource 227 /* Try to satisfy any additional optional resource
220 requests */ 228 requests */
221 if (add_head) 229 if (realloc_head)
222 adjust_resources_sorted(add_head, head); 230 reassign_resources_sorted(realloc_head, head);
223 free_list(resource_list, head); 231 free_list(resource_list, head);
224} 232}
225 233
@@ -235,7 +243,7 @@ static void pdev_assign_resources_sorted(struct pci_dev *dev,
235} 243}
236 244
237static void pbus_assign_resources_sorted(const struct pci_bus *bus, 245static void pbus_assign_resources_sorted(const struct pci_bus *bus,
238 struct resource_list_x *add_head, 246 struct resource_list_x *realloc_head,
239 struct resource_list_x *fail_head) 247 struct resource_list_x *fail_head)
240{ 248{
241 struct pci_dev *dev; 249 struct pci_dev *dev;
@@ -245,7 +253,7 @@ static void pbus_assign_resources_sorted(const struct pci_bus *bus,
245 list_for_each_entry(dev, &bus->devices, bus_list) 253 list_for_each_entry(dev, &bus->devices, bus_list)
246 __dev_sort_resources(dev, &head); 254 __dev_sort_resources(dev, &head);
247 255
248 __assign_resources_sorted(&head, add_head, fail_head); 256 __assign_resources_sorted(&head, realloc_head, fail_head);
249} 257}
250 258
251void pci_setup_cardbus(struct pci_bus *bus) 259void pci_setup_cardbus(struct pci_bus *bus)
@@ -540,13 +548,27 @@ static resource_size_t calculate_memsize(resource_size_t size,
540 return size; 548 return size;
541} 549}
542 550
551static resource_size_t get_res_add_size(struct resource_list_x *realloc_head,
552 struct resource *res)
553{
554 struct resource_list_x *list;
555
556 /* check if it is in realloc_head list */
557 for (list = realloc_head->next; list && list->res != res;
558 list = list->next);
559 if (list)
560 return list->add_size;
561
562 return 0;
563}
564
543/** 565/**
544 * pbus_size_io() - size the io window of a given bus 566 * pbus_size_io() - size the io window of a given bus
545 * 567 *
546 * @bus : the bus 568 * @bus : the bus
547 * @min_size : the minimum io window that must to be allocated 569 * @min_size : the minimum io window that must to be allocated
548 * @add_size : additional optional io window 570 * @add_size : additional optional io window
549 * @add_head : track the additional io window on this list 571 * @realloc_head : track the additional io window on this list
550 * 572 *
551 * Sizing the IO windows of the PCI-PCI bridge is trivial, 573 * Sizing the IO windows of the PCI-PCI bridge is trivial,
552 * since these windows have 4K granularity and the IO ranges 574 * since these windows have 4K granularity and the IO ranges
@@ -554,11 +576,12 @@ static resource_size_t calculate_memsize(resource_size_t size,
554 * We must be careful with the ISA aliasing though. 576 * We must be careful with the ISA aliasing though.
555 */ 577 */
556static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, 578static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
557 resource_size_t add_size, struct resource_list_x *add_head) 579 resource_size_t add_size, struct resource_list_x *realloc_head)
558{ 580{
559 struct pci_dev *dev; 581 struct pci_dev *dev;
560 struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO); 582 struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
561 unsigned long size = 0, size0 = 0, size1 = 0; 583 unsigned long size = 0, size0 = 0, size1 = 0;
584 resource_size_t children_add_size = 0;
562 585
563 if (!b_res) 586 if (!b_res)
564 return; 587 return;
@@ -579,11 +602,16 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
579 size += r_size; 602 size += r_size;
580 else 603 else
581 size1 += r_size; 604 size1 += r_size;
605
606 if (realloc_head)
607 children_add_size += get_res_add_size(realloc_head, r);
582 } 608 }
583 } 609 }
584 size0 = calculate_iosize(size, min_size, size1, 610 size0 = calculate_iosize(size, min_size, size1,
585 resource_size(b_res), 4096); 611 resource_size(b_res), 4096);
586 size1 = (!add_head || (add_head && !add_size)) ? size0 : 612 if (children_add_size > add_size)
613 add_size = children_add_size;
614 size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 :
587 calculate_iosize(size, min_size+add_size, size1, 615 calculate_iosize(size, min_size+add_size, size1,
588 resource_size(b_res), 4096); 616 resource_size(b_res), 4096);
589 if (!size0 && !size1) { 617 if (!size0 && !size1) {
@@ -598,8 +626,8 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
598 b_res->start = 4096; 626 b_res->start = 4096;
599 b_res->end = b_res->start + size0 - 1; 627 b_res->end = b_res->start + size0 - 1;
600 b_res->flags |= IORESOURCE_STARTALIGN; 628 b_res->flags |= IORESOURCE_STARTALIGN;
601 if (size1 > size0 && add_head) 629 if (size1 > size0 && realloc_head)
602 add_to_list(add_head, bus->self, b_res, size1-size0); 630 add_to_list(realloc_head, bus->self, b_res, size1-size0, 4096);
603} 631}
604 632
605/** 633/**
@@ -608,7 +636,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
608 * @bus : the bus 636 * @bus : the bus
609 * @min_size : the minimum memory window that must to be allocated 637 * @min_size : the minimum memory window that must to be allocated
610 * @add_size : additional optional memory window 638 * @add_size : additional optional memory window
611 * @add_head : track the additional memory window on this list 639 * @realloc_head : track the additional memory window on this list
612 * 640 *
613 * Calculate the size of the bus and minimal alignment which 641 * Calculate the size of the bus and minimal alignment which
614 * guarantees that all child resources fit in this size. 642 * guarantees that all child resources fit in this size.
@@ -616,7 +644,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
616static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, 644static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
617 unsigned long type, resource_size_t min_size, 645 unsigned long type, resource_size_t min_size,
618 resource_size_t add_size, 646 resource_size_t add_size,
619 struct resource_list_x *add_head) 647 struct resource_list_x *realloc_head)
620{ 648{
621 struct pci_dev *dev; 649 struct pci_dev *dev;
622 resource_size_t min_align, align, size, size0, size1; 650 resource_size_t min_align, align, size, size0, size1;
@@ -624,6 +652,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
624 int order, max_order; 652 int order, max_order;
625 struct resource *b_res = find_free_bus_resource(bus, type); 653 struct resource *b_res = find_free_bus_resource(bus, type);
626 unsigned int mem64_mask = 0; 654 unsigned int mem64_mask = 0;
655 resource_size_t children_add_size = 0;
627 656
628 if (!b_res) 657 if (!b_res)
629 return 0; 658 return 0;
@@ -645,6 +674,16 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
645 if (r->parent || (r->flags & mask) != type) 674 if (r->parent || (r->flags & mask) != type)
646 continue; 675 continue;
647 r_size = resource_size(r); 676 r_size = resource_size(r);
677#ifdef CONFIG_PCI_IOV
678 /* put SRIOV requested res to the optional list */
679 if (realloc_head && i >= PCI_IOV_RESOURCES &&
680 i <= PCI_IOV_RESOURCE_END) {
681 r->end = r->start - 1;
682 add_to_list(realloc_head, dev, r, r_size, 0/* dont' care */);
683 children_add_size += r_size;
684 continue;
685 }
686#endif
648 /* For bridges size != alignment */ 687 /* For bridges size != alignment */
649 align = pci_resource_alignment(dev, r); 688 align = pci_resource_alignment(dev, r);
650 order = __ffs(align) - 20; 689 order = __ffs(align) - 20;
@@ -665,6 +704,9 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
665 if (order > max_order) 704 if (order > max_order)
666 max_order = order; 705 max_order = order;
667 mem64_mask &= r->flags & IORESOURCE_MEM_64; 706 mem64_mask &= r->flags & IORESOURCE_MEM_64;
707
708 if (realloc_head)
709 children_add_size += get_res_add_size(realloc_head, r);
668 } 710 }
669 } 711 }
670 align = 0; 712 align = 0;
@@ -681,7 +723,9 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
681 align += aligns[order]; 723 align += aligns[order];
682 } 724 }
683 size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align); 725 size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align);
684 size1 = (!add_head || (add_head && !add_size)) ? size0 : 726 if (children_add_size > add_size)
727 add_size = children_add_size;
728 size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 :
685 calculate_memsize(size, min_size+add_size, 0, 729 calculate_memsize(size, min_size+add_size, 0,
686 resource_size(b_res), min_align); 730 resource_size(b_res), min_align);
687 if (!size0 && !size1) { 731 if (!size0 && !size1) {
@@ -695,12 +739,22 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
695 b_res->start = min_align; 739 b_res->start = min_align;
696 b_res->end = size0 + min_align - 1; 740 b_res->end = size0 + min_align - 1;
697 b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask; 741 b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask;
698 if (size1 > size0 && add_head) 742 if (size1 > size0 && realloc_head)
699 add_to_list(add_head, bus->self, b_res, size1-size0); 743 add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align);
700 return 1; 744 return 1;
701} 745}
702 746
703static void pci_bus_size_cardbus(struct pci_bus *bus) 747unsigned long pci_cardbus_resource_alignment(struct resource *res)
748{
749 if (res->flags & IORESOURCE_IO)
750 return pci_cardbus_io_size;
751 if (res->flags & IORESOURCE_MEM)
752 return pci_cardbus_mem_size;
753 return 0;
754}
755
756static void pci_bus_size_cardbus(struct pci_bus *bus,
757 struct resource_list_x *realloc_head)
704{ 758{
705 struct pci_dev *bridge = bus->self; 759 struct pci_dev *bridge = bus->self;
706 struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES]; 760 struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES];
@@ -711,12 +765,14 @@ static void pci_bus_size_cardbus(struct pci_bus *bus)
711 * a fixed amount of bus space for CardBus bridges. 765 * a fixed amount of bus space for CardBus bridges.
712 */ 766 */
713 b_res[0].start = 0; 767 b_res[0].start = 0;
714 b_res[0].end = pci_cardbus_io_size - 1;
715 b_res[0].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; 768 b_res[0].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN;
769 if (realloc_head)
770 add_to_list(realloc_head, bridge, b_res, pci_cardbus_io_size, 0 /* dont care */);
716 771
717 b_res[1].start = 0; 772 b_res[1].start = 0;
718 b_res[1].end = pci_cardbus_io_size - 1;
719 b_res[1].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; 773 b_res[1].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN;
774 if (realloc_head)
775 add_to_list(realloc_head, bridge, b_res+1, pci_cardbus_io_size, 0 /* dont care */);
720 776
721 /* 777 /*
722 * Check whether prefetchable memory is supported 778 * Check whether prefetchable memory is supported
@@ -736,21 +792,31 @@ static void pci_bus_size_cardbus(struct pci_bus *bus)
736 */ 792 */
737 if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) { 793 if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) {
738 b_res[2].start = 0; 794 b_res[2].start = 0;
739 b_res[2].end = pci_cardbus_mem_size - 1;
740 b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_SIZEALIGN; 795 b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_SIZEALIGN;
796 if (realloc_head)
797 add_to_list(realloc_head, bridge, b_res+2, pci_cardbus_mem_size, 0 /* dont care */);
741 798
742 b_res[3].start = 0; 799 b_res[3].start = 0;
743 b_res[3].end = pci_cardbus_mem_size - 1;
744 b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; 800 b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN;
801 if (realloc_head)
802 add_to_list(realloc_head, bridge, b_res+3, pci_cardbus_mem_size, 0 /* dont care */);
745 } else { 803 } else {
746 b_res[3].start = 0; 804 b_res[3].start = 0;
747 b_res[3].end = pci_cardbus_mem_size * 2 - 1;
748 b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; 805 b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN;
806 if (realloc_head)
807 add_to_list(realloc_head, bridge, b_res+3, pci_cardbus_mem_size * 2, 0 /* dont care */);
749 } 808 }
809
810 /* set the size of the resource to zero, so that the resource does not
811 * get assigned during required-resource allocation cycle but gets assigned
812 * during the optional-resource allocation cycle.
813 */
814 b_res[0].start = b_res[1].start = b_res[2].start = b_res[3].start = 1;
815 b_res[0].end = b_res[1].end = b_res[2].end = b_res[3].end = 0;
750} 816}
751 817
752void __ref __pci_bus_size_bridges(struct pci_bus *bus, 818void __ref __pci_bus_size_bridges(struct pci_bus *bus,
753 struct resource_list_x *add_head) 819 struct resource_list_x *realloc_head)
754{ 820{
755 struct pci_dev *dev; 821 struct pci_dev *dev;
756 unsigned long mask, prefmask; 822 unsigned long mask, prefmask;
@@ -763,12 +829,12 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
763 829
764 switch (dev->class >> 8) { 830 switch (dev->class >> 8) {
765 case PCI_CLASS_BRIDGE_CARDBUS: 831 case PCI_CLASS_BRIDGE_CARDBUS:
766 pci_bus_size_cardbus(b); 832 pci_bus_size_cardbus(b, realloc_head);
767 break; 833 break;
768 834
769 case PCI_CLASS_BRIDGE_PCI: 835 case PCI_CLASS_BRIDGE_PCI:
770 default: 836 default:
771 __pci_bus_size_bridges(b, add_head); 837 __pci_bus_size_bridges(b, realloc_head);
772 break; 838 break;
773 } 839 }
774 } 840 }
@@ -792,7 +858,7 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
792 * Follow thru 858 * Follow thru
793 */ 859 */
794 default: 860 default:
795 pbus_size_io(bus, 0, additional_io_size, add_head); 861 pbus_size_io(bus, 0, additional_io_size, realloc_head);
796 /* If the bridge supports prefetchable range, size it 862 /* If the bridge supports prefetchable range, size it
797 separately. If it doesn't, or its prefetchable window 863 separately. If it doesn't, or its prefetchable window
798 has already been allocated by arch code, try 864 has already been allocated by arch code, try
@@ -800,11 +866,11 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
800 resources. */ 866 resources. */
801 mask = IORESOURCE_MEM; 867 mask = IORESOURCE_MEM;
802 prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH; 868 prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
803 if (pbus_size_mem(bus, prefmask, prefmask, 0, additional_mem_size, add_head)) 869 if (pbus_size_mem(bus, prefmask, prefmask, 0, additional_mem_size, realloc_head))
804 mask = prefmask; /* Success, size non-prefetch only. */ 870 mask = prefmask; /* Success, size non-prefetch only. */
805 else 871 else
806 additional_mem_size += additional_mem_size; 872 additional_mem_size += additional_mem_size;
807 pbus_size_mem(bus, mask, IORESOURCE_MEM, 0, additional_mem_size, add_head); 873 pbus_size_mem(bus, mask, IORESOURCE_MEM, 0, additional_mem_size, realloc_head);
808 break; 874 break;
809 } 875 }
810} 876}
@@ -816,20 +882,20 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
816EXPORT_SYMBOL(pci_bus_size_bridges); 882EXPORT_SYMBOL(pci_bus_size_bridges);
817 883
818static void __ref __pci_bus_assign_resources(const struct pci_bus *bus, 884static void __ref __pci_bus_assign_resources(const struct pci_bus *bus,
819 struct resource_list_x *add_head, 885 struct resource_list_x *realloc_head,
820 struct resource_list_x *fail_head) 886 struct resource_list_x *fail_head)
821{ 887{
822 struct pci_bus *b; 888 struct pci_bus *b;
823 struct pci_dev *dev; 889 struct pci_dev *dev;
824 890
825 pbus_assign_resources_sorted(bus, add_head, fail_head); 891 pbus_assign_resources_sorted(bus, realloc_head, fail_head);
826 892
827 list_for_each_entry(dev, &bus->devices, bus_list) { 893 list_for_each_entry(dev, &bus->devices, bus_list) {
828 b = dev->subordinate; 894 b = dev->subordinate;
829 if (!b) 895 if (!b)
830 continue; 896 continue;
831 897
832 __pci_bus_assign_resources(b, add_head, fail_head); 898 __pci_bus_assign_resources(b, realloc_head, fail_head);
833 899
834 switch (dev->class >> 8) { 900 switch (dev->class >> 8) {
835 case PCI_CLASS_BRIDGE_PCI: 901 case PCI_CLASS_BRIDGE_PCI:
@@ -1039,7 +1105,7 @@ void __init
1039pci_assign_unassigned_resources(void) 1105pci_assign_unassigned_resources(void)
1040{ 1106{
1041 struct pci_bus *bus; 1107 struct pci_bus *bus;
1042 struct resource_list_x add_list; /* list of resources that 1108 struct resource_list_x realloc_list; /* list of resources that
1043 want additional resources */ 1109 want additional resources */
1044 int tried_times = 0; 1110 int tried_times = 0;
1045 enum release_type rel_type = leaf_only; 1111 enum release_type rel_type = leaf_only;
@@ -1052,7 +1118,7 @@ pci_assign_unassigned_resources(void)
1052 1118
1053 1119
1054 head.next = NULL; 1120 head.next = NULL;
1055 add_list.next = NULL; 1121 realloc_list.next = NULL;
1056 1122
1057 pci_try_num = max_depth + 1; 1123 pci_try_num = max_depth + 1;
1058 printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n", 1124 printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n",
@@ -1062,12 +1128,12 @@ again:
1062 /* Depth first, calculate sizes and alignments of all 1128 /* Depth first, calculate sizes and alignments of all
1063 subordinate buses. */ 1129 subordinate buses. */
1064 list_for_each_entry(bus, &pci_root_buses, node) 1130 list_for_each_entry(bus, &pci_root_buses, node)
1065 __pci_bus_size_bridges(bus, &add_list); 1131 __pci_bus_size_bridges(bus, &realloc_list);
1066 1132
1067 /* Depth last, allocate resources and update the hardware. */ 1133 /* Depth last, allocate resources and update the hardware. */
1068 list_for_each_entry(bus, &pci_root_buses, node) 1134 list_for_each_entry(bus, &pci_root_buses, node)
1069 __pci_bus_assign_resources(bus, &add_list, &head); 1135 __pci_bus_assign_resources(bus, &realloc_list, &head);
1070 BUG_ON(add_list.next); 1136 BUG_ON(realloc_list.next);
1071 tried_times++; 1137 tried_times++;
1072 1138
1073 /* any device complain? */ 1139 /* any device complain? */
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 319f359906e..51a9095c7da 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -128,16 +128,16 @@ void pci_disable_bridge_window(struct pci_dev *dev)
128} 128}
129#endif /* CONFIG_PCI_QUIRKS */ 129#endif /* CONFIG_PCI_QUIRKS */
130 130
131
132
131static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, 133static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
132 int resno) 134 int resno, resource_size_t size, resource_size_t align)
133{ 135{
134 struct resource *res = dev->resource + resno; 136 struct resource *res = dev->resource + resno;
135 resource_size_t size, min, align; 137 resource_size_t min;
136 int ret; 138 int ret;
137 139
138 size = resource_size(res);
139 min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; 140 min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
140 align = pci_resource_alignment(dev, res);
141 141
142 /* First, try exact prefetching match.. */ 142 /* First, try exact prefetching match.. */
143 ret = pci_bus_alloc_resource(bus, res, size, align, min, 143 ret = pci_bus_alloc_resource(bus, res, size, align, min,
@@ -154,56 +154,101 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
154 ret = pci_bus_alloc_resource(bus, res, size, align, min, 0, 154 ret = pci_bus_alloc_resource(bus, res, size, align, min, 0,
155 pcibios_align_resource, dev); 155 pcibios_align_resource, dev);
156 } 156 }
157 return ret;
158}
157 159
158 if (ret < 0 && dev->fw_addr[resno]) { 160static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev,
159 struct resource *root, *conflict; 161 int resno, resource_size_t size)
160 resource_size_t start, end; 162{
163 struct resource *root, *conflict;
164 resource_size_t start, end;
165 int ret = 0;
161 166
162 /* 167 if (res->flags & IORESOURCE_IO)
163 * If we failed to assign anything, let's try the address 168 root = &ioport_resource;
164 * where firmware left it. That at least has a chance of 169 else
165 * working, which is better than just leaving it disabled. 170 root = &iomem_resource;
166 */ 171
172 start = res->start;
173 end = res->end;
174 res->start = dev->fw_addr[resno];
175 res->end = res->start + size - 1;
176 dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n",
177 resno, res);
178 conflict = request_resource_conflict(root, res);
179 if (conflict) {
180 dev_info(&dev->dev,
181 "BAR %d: %pR conflicts with %s %pR\n", resno,
182 res, conflict->name, conflict);
183 res->start = start;
184 res->end = end;
185 ret = 1;
186 }
187 return ret;
188}
189
190static int _pci_assign_resource(struct pci_dev *dev, int resno, int size, resource_size_t min_align)
191{
192 struct resource *res = dev->resource + resno;
193 struct pci_bus *bus;
194 int ret;
195 char *type;
167 196
168 if (res->flags & IORESOURCE_IO) 197 bus = dev->bus;
169 root = &ioport_resource; 198 while ((ret = __pci_assign_resource(bus, dev, resno, size, min_align))) {
199 if (!bus->parent || !bus->self->transparent)
200 break;
201 bus = bus->parent;
202 }
203
204 if (ret) {
205 if (res->flags & IORESOURCE_MEM)
206 if (res->flags & IORESOURCE_PREFETCH)
207 type = "mem pref";
208 else
209 type = "mem";
210 else if (res->flags & IORESOURCE_IO)
211 type = "io";
170 else 212 else
171 root = &iomem_resource; 213 type = "unknown";
172 214 dev_info(&dev->dev,
173 start = res->start; 215 "BAR %d: can't assign %s (size %#llx)\n",
174 end = res->end; 216 resno, type, (unsigned long long) resource_size(res));
175 res->start = dev->fw_addr[resno];
176 res->end = res->start + size - 1;
177 dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n",
178 resno, res);
179 conflict = request_resource_conflict(root, res);
180 if (conflict) {
181 dev_info(&dev->dev,
182 "BAR %d: %pR conflicts with %s %pR\n", resno,
183 res, conflict->name, conflict);
184 res->start = start;
185 res->end = end;
186 } else
187 ret = 0;
188 } 217 }
189 218
219 return ret;
220}
221
222int pci_reassign_resource(struct pci_dev *dev, int resno, resource_size_t addsize,
223 resource_size_t min_align)
224{
225 struct resource *res = dev->resource + resno;
226 resource_size_t new_size;
227 int ret;
228
229 if (!res->parent) {
230 dev_info(&dev->dev, "BAR %d: can't reassign an unassigned resouce %pR "
231 "\n", resno, res);
232 return -EINVAL;
233 }
234
235 new_size = resource_size(res) + addsize + min_align;
236 ret = _pci_assign_resource(dev, resno, new_size, min_align);
190 if (!ret) { 237 if (!ret) {
191 res->flags &= ~IORESOURCE_STARTALIGN; 238 res->flags &= ~IORESOURCE_STARTALIGN;
192 dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); 239 dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res);
193 if (resno < PCI_BRIDGE_RESOURCES) 240 if (resno < PCI_BRIDGE_RESOURCES)
194 pci_update_resource(dev, resno); 241 pci_update_resource(dev, resno);
195 } 242 }
196
197 return ret; 243 return ret;
198} 244}
199 245
200int pci_assign_resource(struct pci_dev *dev, int resno) 246int pci_assign_resource(struct pci_dev *dev, int resno)
201{ 247{
202 struct resource *res = dev->resource + resno; 248 struct resource *res = dev->resource + resno;
203 resource_size_t align; 249 resource_size_t align, size;
204 struct pci_bus *bus; 250 struct pci_bus *bus;
205 int ret; 251 int ret;
206 char *type;
207 252
208 align = pci_resource_alignment(dev, res); 253 align = pci_resource_alignment(dev, res);
209 if (!align) { 254 if (!align) {
@@ -213,34 +258,27 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
213 } 258 }
214 259
215 bus = dev->bus; 260 bus = dev->bus;
216 while ((ret = __pci_assign_resource(bus, dev, resno))) { 261 size = resource_size(res);
217 if (bus->parent && bus->self->transparent) 262 ret = _pci_assign_resource(dev, resno, size, align);
218 bus = bus->parent;
219 else
220 bus = NULL;
221 if (bus)
222 continue;
223 break;
224 }
225 263
226 if (ret) { 264 /*
227 if (res->flags & IORESOURCE_MEM) 265 * If we failed to assign anything, let's try the address
228 if (res->flags & IORESOURCE_PREFETCH) 266 * where firmware left it. That at least has a chance of
229 type = "mem pref"; 267 * working, which is better than just leaving it disabled.
230 else 268 */
231 type = "mem"; 269 if (ret < 0 && dev->fw_addr[resno])
232 else if (res->flags & IORESOURCE_IO) 270 ret = pci_revert_fw_address(res, dev, resno, size);
233 type = "io";
234 else
235 type = "unknown";
236 dev_info(&dev->dev,
237 "BAR %d: can't assign %s (size %#llx)\n",
238 resno, type, (unsigned long long) resource_size(res));
239 }
240 271
272 if (!ret) {
273 res->flags &= ~IORESOURCE_STARTALIGN;
274 dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res);
275 if (resno < PCI_BRIDGE_RESOURCES)
276 pci_update_resource(dev, resno);
277 }
241 return ret; 278 return ret;
242} 279}
243 280
281
244/* Sort resources by alignment */ 282/* Sort resources by alignment */
245void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) 283void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
246{ 284{
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 3195dbd3ec3..44e91e598f8 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -639,7 +639,7 @@ EXPORT_SYMBOL_GPL(rtc_irq_unregister);
639static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled) 639static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled)
640{ 640{
641 /* 641 /*
642 * We unconditionally cancel the timer here, because otherwise 642 * We always cancel the timer here first, because otherwise
643 * we could run into BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); 643 * we could run into BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
644 * when we manage to start the timer before the callback 644 * when we manage to start the timer before the callback
645 * returns HRTIMER_RESTART. 645 * returns HRTIMER_RESTART.
@@ -708,7 +708,7 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq)
708 int err = 0; 708 int err = 0;
709 unsigned long flags; 709 unsigned long flags;
710 710
711 if (freq <= 0 || freq > 5000) 711 if (freq <= 0 || freq > RTC_MAX_FREQ)
712 return -EINVAL; 712 return -EINVAL;
713retry: 713retry:
714 spin_lock_irqsave(&rtc->irq_task_lock, flags); 714 spin_lock_irqsave(&rtc->irq_task_lock, flags);
diff --git a/drivers/staging/gma500/mdfld_dsi_dbi.c b/drivers/staging/gma500/mdfld_dsi_dbi.c
index 02e17c9c863..fd211f3467c 100644
--- a/drivers/staging/gma500/mdfld_dsi_dbi.c
+++ b/drivers/staging/gma500/mdfld_dsi_dbi.c
@@ -711,10 +711,11 @@ struct mdfld_dsi_encoder *mdfld_dsi_dbi_init(struct drm_device *dev,
711 /* Create drm encoder object */ 711 /* Create drm encoder object */
712 connector = &dsi_connector->base.base; 712 connector = &dsi_connector->base.base;
713 encoder = &dbi_output->base.base; 713 encoder = &dbi_output->base.base;
714 /* Review this if we ever get MIPI-HDMI bridges or similar */
714 drm_encoder_init(dev, 715 drm_encoder_init(dev,
715 encoder, 716 encoder,
716 p_funcs->encoder_funcs, 717 p_funcs->encoder_funcs,
717 DRM_MODE_ENCODER_MIPI); 718 DRM_MODE_ENCODER_LVDS);
718 drm_encoder_helper_add(encoder, p_funcs->encoder_helper_funcs); 719 drm_encoder_helper_add(encoder, p_funcs->encoder_helper_funcs);
719 720
720 /* Attach to given connector */ 721 /* Attach to given connector */
diff --git a/drivers/staging/gma500/mdfld_dsi_dbi.h b/drivers/staging/gma500/mdfld_dsi_dbi.h
index dc6242c51d0..f0fa986fd93 100644
--- a/drivers/staging/gma500/mdfld_dsi_dbi.h
+++ b/drivers/staging/gma500/mdfld_dsi_dbi.h
@@ -42,9 +42,6 @@
42#include "mdfld_dsi_output.h" 42#include "mdfld_dsi_output.h"
43#include "mdfld_output.h" 43#include "mdfld_output.h"
44 44
45#define DRM_MODE_ENCODER_MIPI 5
46
47
48/* 45/*
49 * DBI encoder which inherits from mdfld_dsi_encoder 46 * DBI encoder which inherits from mdfld_dsi_encoder
50 */ 47 */
diff --git a/drivers/staging/gma500/mdfld_dsi_dpi.c b/drivers/staging/gma500/mdfld_dsi_dpi.c
index 6e03a91e947..e685f1217ba 100644
--- a/drivers/staging/gma500/mdfld_dsi_dpi.c
+++ b/drivers/staging/gma500/mdfld_dsi_dpi.c
@@ -777,10 +777,15 @@ struct mdfld_dsi_encoder *mdfld_dsi_dpi_init(struct drm_device *dev,
777 /* Create drm encoder object */ 777 /* Create drm encoder object */
778 connector = &dsi_connector->base.base; 778 connector = &dsi_connector->base.base;
779 encoder = &dpi_output->base.base; 779 encoder = &dpi_output->base.base;
780 /*
781 * On existing hardware this will be a panel of some form,
782 * if future devices also have HDMI bridges this will need
783 * revisiting
784 */
780 drm_encoder_init(dev, 785 drm_encoder_init(dev,
781 encoder, 786 encoder,
782 p_funcs->encoder_funcs, 787 p_funcs->encoder_funcs,
783 DRM_MODE_ENCODER_MIPI); 788 DRM_MODE_ENCODER_LVDS);
784 drm_encoder_helper_add(encoder, 789 drm_encoder_helper_add(encoder,
785 p_funcs->encoder_helper_funcs); 790 p_funcs->encoder_helper_funcs);
786 791
diff --git a/drivers/staging/gma500/mdfld_dsi_output.c b/drivers/staging/gma500/mdfld_dsi_output.c
index 7536095c30a..9050c0f78b1 100644
--- a/drivers/staging/gma500/mdfld_dsi_output.c
+++ b/drivers/staging/gma500/mdfld_dsi_output.c
@@ -955,7 +955,9 @@ void mdfld_dsi_output_init(struct drm_device *dev,
955 psb_output->type = (pipe == 0) ? INTEL_OUTPUT_MIPI : INTEL_OUTPUT_MIPI2; 955 psb_output->type = (pipe == 0) ? INTEL_OUTPUT_MIPI : INTEL_OUTPUT_MIPI2;
956 956
957 connector = &psb_output->base; 957 connector = &psb_output->base;
958 drm_connector_init(dev, connector, &mdfld_dsi_connector_funcs, DRM_MODE_CONNECTOR_MIPI); 958 /* Revisit type if MIPI/HDMI bridges ever appear on Medfield */
959 drm_connector_init(dev, connector, &mdfld_dsi_connector_funcs,
960 DRM_MODE_CONNECTOR_LVDS);
959 drm_connector_helper_add(connector, &mdfld_dsi_connector_helper_funcs); 961 drm_connector_helper_add(connector, &mdfld_dsi_connector_helper_funcs);
960 962
961 connector->display_info.subpixel_order = SubPixelHorizontalRGB; 963 connector->display_info.subpixel_order = SubPixelHorizontalRGB;
diff --git a/drivers/staging/gma500/medfield.h b/drivers/staging/gma500/medfield.h
index 38165e8367e..09e9687431f 100644
--- a/drivers/staging/gma500/medfield.h
+++ b/drivers/staging/gma500/medfield.h
@@ -21,8 +21,6 @@
21 * DEALINGS IN THE SOFTWARE. 21 * DEALINGS IN THE SOFTWARE.
22 */ 22 */
23 23
24#define DRM_MODE_ENCODER_MIPI 5
25
26/* Medfield DSI controller registers */ 24/* Medfield DSI controller registers */
27 25
28#define MIPIA_DEVICE_READY_REG 0xb000 26#define MIPIA_DEVICE_READY_REG 0xb000
diff --git a/drivers/staging/gma500/psb_drv.h b/drivers/staging/gma500/psb_drv.h
index 72f487a2a1b..fd4732dd783 100644
--- a/drivers/staging/gma500/psb_drv.h
+++ b/drivers/staging/gma500/psb_drv.h
@@ -35,7 +35,6 @@
35 35
36/* Append new drm mode definition here, align with libdrm definition */ 36/* Append new drm mode definition here, align with libdrm definition */
37#define DRM_MODE_SCALE_NO_SCALE 2 37#define DRM_MODE_SCALE_NO_SCALE 2
38#define DRM_MODE_CONNECTOR_MIPI 15
39 38
40enum { 39enum {
41 CHIP_PSB_8108 = 0, /* Poulsbo */ 40 CHIP_PSB_8108 = 0, /* Poulsbo */
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 1b4afd81f87..6ea852e2516 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -70,6 +70,7 @@
70#include <linux/kernel.h> 70#include <linux/kernel.h>
71#include <linux/mm.h> 71#include <linux/mm.h>
72#include <linux/mman.h> 72#include <linux/mman.h>
73#include <linux/module.h>
73#include <linux/workqueue.h> 74#include <linux/workqueue.h>
74#include <xen/balloon.h> 75#include <xen/balloon.h>
75#include <xen/tmem.h> 76#include <xen/tmem.h>
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 54b8c28bebc..720d885e8dc 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -474,17 +474,22 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
474 befs_data_stream *data = &befs_ino->i_data.ds; 474 befs_data_stream *data = &befs_ino->i_data.ds;
475 befs_off_t len = data->size; 475 befs_off_t len = data->size;
476 476
477 befs_debug(sb, "Follow long symlink"); 477 if (len == 0) {
478 478 befs_error(sb, "Long symlink with illegal length");
479 link = kmalloc(len, GFP_NOFS);
480 if (!link) {
481 link = ERR_PTR(-ENOMEM);
482 } else if (befs_read_lsymlink(sb, data, link, len) != len) {
483 kfree(link);
484 befs_error(sb, "Failed to read entire long symlink");
485 link = ERR_PTR(-EIO); 479 link = ERR_PTR(-EIO);
486 } else { 480 } else {
487 link[len - 1] = '\0'; 481 befs_debug(sb, "Follow long symlink");
482
483 link = kmalloc(len, GFP_NOFS);
484 if (!link) {
485 link = ERR_PTR(-ENOMEM);
486 } else if (befs_read_lsymlink(sb, data, link, len) != len) {
487 kfree(link);
488 befs_error(sb, "Failed to read entire long symlink");
489 link = ERR_PTR(-EIO);
490 } else {
491 link[len - 1] = '\0';
492 }
488 } 493 }
489 } else { 494 } else {
490 link = befs_ino->i_data.symlink; 495 link = befs_ino->i_data.symlink;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0469263e327..03912c5c6f4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1415,17 +1415,15 @@ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
1415#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ 1415#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
1416static inline u##bits btrfs_##name(struct extent_buffer *eb) \ 1416static inline u##bits btrfs_##name(struct extent_buffer *eb) \
1417{ \ 1417{ \
1418 type *p = kmap_atomic(eb->first_page, KM_USER0); \ 1418 type *p = page_address(eb->first_page); \
1419 u##bits res = le##bits##_to_cpu(p->member); \ 1419 u##bits res = le##bits##_to_cpu(p->member); \
1420 kunmap_atomic(p, KM_USER0); \
1421 return res; \ 1420 return res; \
1422} \ 1421} \
1423static inline void btrfs_set_##name(struct extent_buffer *eb, \ 1422static inline void btrfs_set_##name(struct extent_buffer *eb, \
1424 u##bits val) \ 1423 u##bits val) \
1425{ \ 1424{ \
1426 type *p = kmap_atomic(eb->first_page, KM_USER0); \ 1425 type *p = page_address(eb->first_page); \
1427 p->member = cpu_to_le##bits(val); \ 1426 p->member = cpu_to_le##bits(val); \
1428 kunmap_atomic(p, KM_USER0); \
1429} 1427}
1430 1428
1431#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ 1429#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \
@@ -2367,8 +2365,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2367int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); 2365int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
2368int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); 2366int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
2369int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); 2367int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
2370int btrfs_drop_snapshot(struct btrfs_root *root, 2368void btrfs_drop_snapshot(struct btrfs_root *root,
2371 struct btrfs_block_rsv *block_rsv, int update_ref); 2369 struct btrfs_block_rsv *block_rsv, int update_ref);
2372int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 2370int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
2373 struct btrfs_root *root, 2371 struct btrfs_root *root,
2374 struct extent_buffer *node, 2372 struct extent_buffer *node,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 66bac226944..f5be06a2462 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1782,6 +1782,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1782 1782
1783 1783
1784 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1784 for (i = 0; i < multi->num_stripes; i++, stripe++) {
1785 if (!stripe->dev->can_discard)
1786 continue;
1787
1785 ret = btrfs_issue_discard(stripe->dev->bdev, 1788 ret = btrfs_issue_discard(stripe->dev->bdev,
1786 stripe->physical, 1789 stripe->physical,
1787 stripe->length); 1790 stripe->length);
@@ -1789,11 +1792,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1789 discarded_bytes += stripe->length; 1792 discarded_bytes += stripe->length;
1790 else if (ret != -EOPNOTSUPP) 1793 else if (ret != -EOPNOTSUPP)
1791 break; 1794 break;
1795
1796 /*
1797 * Just in case we get back EOPNOTSUPP for some reason,
1798 * just ignore the return value so we don't screw up
1799 * people calling discard_extent.
1800 */
1801 ret = 0;
1792 } 1802 }
1793 kfree(multi); 1803 kfree(multi);
1794 } 1804 }
1795 if (discarded_bytes && ret == -EOPNOTSUPP)
1796 ret = 0;
1797 1805
1798 if (actual_bytes) 1806 if (actual_bytes)
1799 *actual_bytes = discarded_bytes; 1807 *actual_bytes = discarded_bytes;
@@ -6269,8 +6277,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
6269 * also make sure backrefs for the shared block and all lower level 6277 * also make sure backrefs for the shared block and all lower level
6270 * blocks are properly updated. 6278 * blocks are properly updated.
6271 */ 6279 */
6272int btrfs_drop_snapshot(struct btrfs_root *root, 6280void btrfs_drop_snapshot(struct btrfs_root *root,
6273 struct btrfs_block_rsv *block_rsv, int update_ref) 6281 struct btrfs_block_rsv *block_rsv, int update_ref)
6274{ 6282{
6275 struct btrfs_path *path; 6283 struct btrfs_path *path;
6276 struct btrfs_trans_handle *trans; 6284 struct btrfs_trans_handle *trans;
@@ -6283,13 +6291,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6283 int level; 6291 int level;
6284 6292
6285 path = btrfs_alloc_path(); 6293 path = btrfs_alloc_path();
6286 if (!path) 6294 if (!path) {
6287 return -ENOMEM; 6295 err = -ENOMEM;
6296 goto out;
6297 }
6288 6298
6289 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6299 wc = kzalloc(sizeof(*wc), GFP_NOFS);
6290 if (!wc) { 6300 if (!wc) {
6291 btrfs_free_path(path); 6301 btrfs_free_path(path);
6292 return -ENOMEM; 6302 err = -ENOMEM;
6303 goto out;
6293 } 6304 }
6294 6305
6295 trans = btrfs_start_transaction(tree_root, 0); 6306 trans = btrfs_start_transaction(tree_root, 0);
@@ -6318,7 +6329,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6318 path->lowest_level = 0; 6329 path->lowest_level = 0;
6319 if (ret < 0) { 6330 if (ret < 0) {
6320 err = ret; 6331 err = ret;
6321 goto out; 6332 goto out_free;
6322 } 6333 }
6323 WARN_ON(ret > 0); 6334 WARN_ON(ret > 0);
6324 6335
@@ -6425,11 +6436,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6425 free_extent_buffer(root->commit_root); 6436 free_extent_buffer(root->commit_root);
6426 kfree(root); 6437 kfree(root);
6427 } 6438 }
6428out: 6439out_free:
6429 btrfs_end_transaction_throttle(trans, tree_root); 6440 btrfs_end_transaction_throttle(trans, tree_root);
6430 kfree(wc); 6441 kfree(wc);
6431 btrfs_free_path(path); 6442 btrfs_free_path(path);
6432 return err; 6443out:
6444 if (err)
6445 btrfs_std_error(root->fs_info, err);
6446 return;
6433} 6447}
6434 6448
6435/* 6449/*
@@ -6720,6 +6734,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
6720 struct btrfs_space_info *space_info; 6734 struct btrfs_space_info *space_info;
6721 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 6735 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
6722 struct btrfs_device *device; 6736 struct btrfs_device *device;
6737 u64 min_free;
6738 u64 dev_min = 1;
6739 u64 dev_nr = 0;
6740 int index;
6723 int full = 0; 6741 int full = 0;
6724 int ret = 0; 6742 int ret = 0;
6725 6743
@@ -6729,8 +6747,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
6729 if (!block_group) 6747 if (!block_group)
6730 return -1; 6748 return -1;
6731 6749
6750 min_free = btrfs_block_group_used(&block_group->item);
6751
6732 /* no bytes used, we're good */ 6752 /* no bytes used, we're good */
6733 if (!btrfs_block_group_used(&block_group->item)) 6753 if (!min_free)
6734 goto out; 6754 goto out;
6735 6755
6736 space_info = block_group->space_info; 6756 space_info = block_group->space_info;
@@ -6746,10 +6766,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
6746 * all of the extents from this block group. If we can, we're good 6766 * all of the extents from this block group. If we can, we're good
6747 */ 6767 */
6748 if ((space_info->total_bytes != block_group->key.offset) && 6768 if ((space_info->total_bytes != block_group->key.offset) &&
6749 (space_info->bytes_used + space_info->bytes_reserved + 6769 (space_info->bytes_used + space_info->bytes_reserved +
6750 space_info->bytes_pinned + space_info->bytes_readonly + 6770 space_info->bytes_pinned + space_info->bytes_readonly +
6751 btrfs_block_group_used(&block_group->item) < 6771 min_free < space_info->total_bytes)) {
6752 space_info->total_bytes)) {
6753 spin_unlock(&space_info->lock); 6772 spin_unlock(&space_info->lock);
6754 goto out; 6773 goto out;
6755 } 6774 }
@@ -6766,9 +6785,31 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
6766 if (full) 6785 if (full)
6767 goto out; 6786 goto out;
6768 6787
6788 /*
6789 * index:
6790 * 0: raid10
6791 * 1: raid1
6792 * 2: dup
6793 * 3: raid0
6794 * 4: single
6795 */
6796 index = get_block_group_index(block_group);
6797 if (index == 0) {
6798 dev_min = 4;
6799 /* Divide by 2 */
6800 min_free >>= 1;
6801 } else if (index == 1) {
6802 dev_min = 2;
6803 } else if (index == 2) {
6804 /* Multiply by 2 */
6805 min_free <<= 1;
6806 } else if (index == 3) {
6807 dev_min = fs_devices->rw_devices;
6808 do_div(min_free, dev_min);
6809 }
6810
6769 mutex_lock(&root->fs_info->chunk_mutex); 6811 mutex_lock(&root->fs_info->chunk_mutex);
6770 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 6812 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
6771 u64 min_free = btrfs_block_group_used(&block_group->item);
6772 u64 dev_offset; 6813 u64 dev_offset;
6773 6814
6774 /* 6815 /*
@@ -6779,7 +6820,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
6779 ret = find_free_dev_extent(NULL, device, min_free, 6820 ret = find_free_dev_extent(NULL, device, min_free,
6780 &dev_offset, NULL); 6821 &dev_offset, NULL);
6781 if (!ret) 6822 if (!ret)
6823 dev_nr++;
6824
6825 if (dev_nr >= dev_min)
6782 break; 6826 break;
6827
6783 ret = -1; 6828 ret = -1;
6784 } 6829 }
6785 } 6830 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 658d66959ab..e7872e485f1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -150,6 +150,8 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
150 spin_lock(&root->fs_info->defrag_inodes_lock); 150 spin_lock(&root->fs_info->defrag_inodes_lock);
151 if (!BTRFS_I(inode)->in_defrag) 151 if (!BTRFS_I(inode)->in_defrag)
152 __btrfs_add_inode_defrag(inode, defrag); 152 __btrfs_add_inode_defrag(inode, defrag);
153 else
154 kfree(defrag);
153 spin_unlock(&root->fs_info->defrag_inodes_lock); 155 spin_unlock(&root->fs_info->defrag_inodes_lock);
154 return 0; 156 return 0;
155} 157}
@@ -1638,11 +1640,15 @@ static long btrfs_fallocate(struct file *file, int mode,
1638 1640
1639 cur_offset = alloc_start; 1641 cur_offset = alloc_start;
1640 while (1) { 1642 while (1) {
1643 u64 actual_end;
1644
1641 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 1645 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
1642 alloc_end - cur_offset, 0); 1646 alloc_end - cur_offset, 0);
1643 BUG_ON(IS_ERR_OR_NULL(em)); 1647 BUG_ON(IS_ERR_OR_NULL(em));
1644 last_byte = min(extent_map_end(em), alloc_end); 1648 last_byte = min(extent_map_end(em), alloc_end);
1649 actual_end = min_t(u64, extent_map_end(em), offset + len);
1645 last_byte = (last_byte + mask) & ~mask; 1650 last_byte = (last_byte + mask) & ~mask;
1651
1646 if (em->block_start == EXTENT_MAP_HOLE || 1652 if (em->block_start == EXTENT_MAP_HOLE ||
1647 (cur_offset >= inode->i_size && 1653 (cur_offset >= inode->i_size &&
1648 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 1654 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
@@ -1655,6 +1661,16 @@ static long btrfs_fallocate(struct file *file, int mode,
1655 free_extent_map(em); 1661 free_extent_map(em);
1656 break; 1662 break;
1657 } 1663 }
1664 } else if (actual_end > inode->i_size &&
1665 !(mode & FALLOC_FL_KEEP_SIZE)) {
1666 /*
1667 * We didn't need to allocate any more space, but we
1668 * still extended the size of the file so we need to
1669 * update i_size.
1670 */
1671 inode->i_ctime = CURRENT_TIME;
1672 i_size_write(inode, actual_end);
1673 btrfs_ordered_update_i_size(inode, actual_end, NULL);
1658 } 1674 }
1659 free_extent_map(em); 1675 free_extent_map(em);
1660 1676
@@ -1804,10 +1820,14 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
1804 } 1820 }
1805 } 1821 }
1806 1822
1807 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) 1823 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) {
1808 return -EINVAL; 1824 ret = -EINVAL;
1809 if (offset > inode->i_sb->s_maxbytes) 1825 goto out;
1810 return -EINVAL; 1826 }
1827 if (offset > inode->i_sb->s_maxbytes) {
1828 ret = -EINVAL;
1829 goto out;
1830 }
1811 1831
1812 /* Special lock needed here? */ 1832 /* Special lock needed here? */
1813 if (offset != file->f_pos) { 1833 if (offset != file->f_pos) {
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 6377713f639..6a265b9f85f 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1168,9 +1168,9 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1168 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1168 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1169} 1169}
1170 1170
1171static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1171static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1172 struct btrfs_free_space *info, u64 offset, 1172 struct btrfs_free_space *info,
1173 u64 bytes) 1173 u64 offset, u64 bytes)
1174{ 1174{
1175 unsigned long start, count; 1175 unsigned long start, count;
1176 1176
@@ -1181,6 +1181,13 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1181 bitmap_clear(info->bitmap, start, count); 1181 bitmap_clear(info->bitmap, start, count);
1182 1182
1183 info->bytes -= bytes; 1183 info->bytes -= bytes;
1184}
1185
1186static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1187 struct btrfs_free_space *info, u64 offset,
1188 u64 bytes)
1189{
1190 __bitmap_clear_bits(ctl, info, offset, bytes);
1184 ctl->free_space -= bytes; 1191 ctl->free_space -= bytes;
1185} 1192}
1186 1193
@@ -1984,7 +1991,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1984 return 0; 1991 return 0;
1985 1992
1986 ret = search_start; 1993 ret = search_start;
1987 bitmap_clear_bits(ctl, entry, ret, bytes); 1994 __bitmap_clear_bits(ctl, entry, ret, bytes);
1988 1995
1989 return ret; 1996 return ret;
1990} 1997}
@@ -2039,7 +2046,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2039 continue; 2046 continue;
2040 } 2047 }
2041 } else { 2048 } else {
2042
2043 ret = entry->offset; 2049 ret = entry->offset;
2044 2050
2045 entry->offset += bytes; 2051 entry->offset += bytes;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 15fceefbca0..0ccc7438ad3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7354,11 +7354,15 @@ static int btrfs_set_page_dirty(struct page *page)
7354static int btrfs_permission(struct inode *inode, int mask) 7354static int btrfs_permission(struct inode *inode, int mask)
7355{ 7355{
7356 struct btrfs_root *root = BTRFS_I(inode)->root; 7356 struct btrfs_root *root = BTRFS_I(inode)->root;
7357 umode_t mode = inode->i_mode;
7357 7358
7358 if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) 7359 if (mask & MAY_WRITE &&
7359 return -EROFS; 7360 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
7360 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) 7361 if (btrfs_root_readonly(root))
7361 return -EACCES; 7362 return -EROFS;
7363 if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
7364 return -EACCES;
7365 }
7362 return generic_permission(inode, mask); 7366 return generic_permission(inode, mask);
7363} 7367}
7364 7368
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7cf01334994..970977aab22 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2236,6 +2236,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2236 btrfs_wait_ordered_range(src, off, len); 2236 btrfs_wait_ordered_range(src, off, len);
2237 } 2237 }
2238 2238
2239 /* truncate page cache pages from target inode range */
2240 truncate_inode_pages_range(&inode->i_data, off,
2241 ALIGN(off + len, PAGE_CACHE_SIZE) - 1);
2242
2239 /* clone data */ 2243 /* clone data */
2240 key.objectid = btrfs_ino(src); 2244 key.objectid = btrfs_ino(src);
2241 key.type = BTRFS_EXTENT_DATA_KEY; 2245 key.type = BTRFS_EXTENT_DATA_KEY;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index babee65f8ed..786639fca06 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -799,14 +799,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
799 struct extent_buffer *eb, int slot, 799 struct extent_buffer *eb, int slot,
800 struct btrfs_key *key) 800 struct btrfs_key *key)
801{ 801{
802 struct inode *dir;
803 int ret;
804 struct btrfs_inode_ref *ref; 802 struct btrfs_inode_ref *ref;
803 struct btrfs_dir_item *di;
804 struct inode *dir;
805 struct inode *inode; 805 struct inode *inode;
806 char *name;
807 int namelen;
808 unsigned long ref_ptr; 806 unsigned long ref_ptr;
809 unsigned long ref_end; 807 unsigned long ref_end;
808 char *name;
809 int namelen;
810 int ret;
810 int search_done = 0; 811 int search_done = 0;
811 812
812 /* 813 /*
@@ -909,6 +910,25 @@ again:
909 } 910 }
910 btrfs_release_path(path); 911 btrfs_release_path(path);
911 912
913 /* look for a conflicting sequence number */
914 di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
915 btrfs_inode_ref_index(eb, ref),
916 name, namelen, 0);
917 if (di && !IS_ERR(di)) {
918 ret = drop_one_dir_item(trans, root, path, dir, di);
919 BUG_ON(ret);
920 }
921 btrfs_release_path(path);
922
923 /* look for a conflicing name */
924 di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
925 name, namelen, 0);
926 if (di && !IS_ERR(di)) {
927 ret = drop_one_dir_item(trans, root, path, dir, di);
928 BUG_ON(ret);
929 }
930 btrfs_release_path(path);
931
912insert: 932insert:
913 /* insert our name */ 933 /* insert our name */
914 ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, 934 ret = btrfs_add_link(trans, dir, inode, name, namelen, 0,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 53875ae73ad..f2a4cc79da6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -142,6 +142,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
142 unsigned long limit; 142 unsigned long limit;
143 unsigned long last_waited = 0; 143 unsigned long last_waited = 0;
144 int force_reg = 0; 144 int force_reg = 0;
145 int sync_pending = 0;
145 struct blk_plug plug; 146 struct blk_plug plug;
146 147
147 /* 148 /*
@@ -229,6 +230,22 @@ loop_lock:
229 230
230 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 231 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
231 232
233 /*
234 * if we're doing the sync list, record that our
235 * plug has some sync requests on it
236 *
237 * If we're doing the regular list and there are
238 * sync requests sitting around, unplug before
239 * we add more
240 */
241 if (pending_bios == &device->pending_sync_bios) {
242 sync_pending = 1;
243 } else if (sync_pending) {
244 blk_finish_plug(&plug);
245 blk_start_plug(&plug);
246 sync_pending = 0;
247 }
248
232 submit_bio(cur->bi_rw, cur); 249 submit_bio(cur->bi_rw, cur);
233 num_run++; 250 num_run++;
234 batch_run++; 251 batch_run++;
@@ -500,6 +517,9 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
500 fs_devices->rw_devices--; 517 fs_devices->rw_devices--;
501 } 518 }
502 519
520 if (device->can_discard)
521 fs_devices->num_can_discard--;
522
503 new_device = kmalloc(sizeof(*new_device), GFP_NOFS); 523 new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
504 BUG_ON(!new_device); 524 BUG_ON(!new_device);
505 memcpy(new_device, device, sizeof(*new_device)); 525 memcpy(new_device, device, sizeof(*new_device));
@@ -508,6 +528,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
508 new_device->bdev = NULL; 528 new_device->bdev = NULL;
509 new_device->writeable = 0; 529 new_device->writeable = 0;
510 new_device->in_fs_metadata = 0; 530 new_device->in_fs_metadata = 0;
531 new_device->can_discard = 0;
511 list_replace_rcu(&device->dev_list, &new_device->dev_list); 532 list_replace_rcu(&device->dev_list, &new_device->dev_list);
512 533
513 call_rcu(&device->rcu, free_device); 534 call_rcu(&device->rcu, free_device);
@@ -547,6 +568,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
547static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 568static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
548 fmode_t flags, void *holder) 569 fmode_t flags, void *holder)
549{ 570{
571 struct request_queue *q;
550 struct block_device *bdev; 572 struct block_device *bdev;
551 struct list_head *head = &fs_devices->devices; 573 struct list_head *head = &fs_devices->devices;
552 struct btrfs_device *device; 574 struct btrfs_device *device;
@@ -603,6 +625,12 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
603 seeding = 0; 625 seeding = 0;
604 } 626 }
605 627
628 q = bdev_get_queue(bdev);
629 if (blk_queue_discard(q)) {
630 device->can_discard = 1;
631 fs_devices->num_can_discard++;
632 }
633
606 device->bdev = bdev; 634 device->bdev = bdev;
607 device->in_fs_metadata = 0; 635 device->in_fs_metadata = 0;
608 device->mode = flags; 636 device->mode = flags;
@@ -835,6 +863,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
835 863
836 max_hole_start = search_start; 864 max_hole_start = search_start;
837 max_hole_size = 0; 865 max_hole_size = 0;
866 hole_size = 0;
838 867
839 if (search_start >= search_end) { 868 if (search_start >= search_end) {
840 ret = -ENOSPC; 869 ret = -ENOSPC;
@@ -917,7 +946,14 @@ next:
917 cond_resched(); 946 cond_resched();
918 } 947 }
919 948
920 hole_size = search_end- search_start; 949 /*
950 * At this point, search_start should be the end of
951 * allocated dev extents, and when shrinking the device,
952 * search_end may be smaller than search_start.
953 */
954 if (search_end > search_start)
955 hole_size = search_end - search_start;
956
921 if (hole_size > max_hole_size) { 957 if (hole_size > max_hole_size) {
922 max_hole_start = search_start; 958 max_hole_start = search_start;
923 max_hole_size = hole_size; 959 max_hole_size = hole_size;
@@ -1543,6 +1579,7 @@ error:
1543 1579
1544int btrfs_init_new_device(struct btrfs_root *root, char *device_path) 1580int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1545{ 1581{
1582 struct request_queue *q;
1546 struct btrfs_trans_handle *trans; 1583 struct btrfs_trans_handle *trans;
1547 struct btrfs_device *device; 1584 struct btrfs_device *device;
1548 struct block_device *bdev; 1585 struct block_device *bdev;
@@ -1612,6 +1649,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1612 1649
1613 lock_chunks(root); 1650 lock_chunks(root);
1614 1651
1652 q = bdev_get_queue(bdev);
1653 if (blk_queue_discard(q))
1654 device->can_discard = 1;
1615 device->writeable = 1; 1655 device->writeable = 1;
1616 device->work.func = pending_bios_fn; 1656 device->work.func = pending_bios_fn;
1617 generate_random_uuid(device->uuid); 1657 generate_random_uuid(device->uuid);
@@ -1647,6 +1687,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1647 root->fs_info->fs_devices->num_devices++; 1687 root->fs_info->fs_devices->num_devices++;
1648 root->fs_info->fs_devices->open_devices++; 1688 root->fs_info->fs_devices->open_devices++;
1649 root->fs_info->fs_devices->rw_devices++; 1689 root->fs_info->fs_devices->rw_devices++;
1690 if (device->can_discard)
1691 root->fs_info->fs_devices->num_can_discard++;
1650 root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; 1692 root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
1651 1693
1652 if (!blk_queue_nonrot(bdev_get_queue(bdev))) 1694 if (!blk_queue_nonrot(bdev_get_queue(bdev)))
@@ -2413,9 +2455,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2413 total_avail = device->total_bytes - device->bytes_used; 2455 total_avail = device->total_bytes - device->bytes_used;
2414 else 2456 else
2415 total_avail = 0; 2457 total_avail = 0;
2416 /* avail is off by max(alloc_start, 1MB), but that is the same 2458
2417 * for all devices, so it doesn't hurt the sorting later on 2459 /* If there is no space on this device, skip it. */
2418 */ 2460 if (total_avail == 0)
2461 continue;
2419 2462
2420 ret = find_free_dev_extent(trans, device, 2463 ret = find_free_dev_extent(trans, device,
2421 max_stripe_size * dev_stripes, 2464 max_stripe_size * dev_stripes,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 7c12d61ae7a..6d866db4e17 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -48,6 +48,7 @@ struct btrfs_device {
48 int writeable; 48 int writeable;
49 int in_fs_metadata; 49 int in_fs_metadata;
50 int missing; 50 int missing;
51 int can_discard;
51 52
52 spinlock_t io_lock; 53 spinlock_t io_lock;
53 54
@@ -104,6 +105,7 @@ struct btrfs_fs_devices {
104 u64 rw_devices; 105 u64 rw_devices;
105 u64 missing_devices; 106 u64 missing_devices;
106 u64 total_rw_bytes; 107 u64 total_rw_bytes;
108 u64 num_can_discard;
107 struct block_device *latest_bdev; 109 struct block_device *latest_bdev;
108 110
109 /* all of the devices in the FS, protected by a mutex 111 /* all of the devices in the FS, protected by a mutex
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 2fe3cf13b2e..6d40656e1e2 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -176,7 +176,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
176 176
177#ifdef CONFIG_CIFS_STATS2 177#ifdef CONFIG_CIFS_STATS2
178 seq_printf(m, " In Send: %d In MaxReq Wait: %d", 178 seq_printf(m, " In Send: %d In MaxReq Wait: %d",
179 atomic_read(&server->inSend), 179 atomic_read(&server->in_send),
180 atomic_read(&server->num_waiters)); 180 atomic_read(&server->num_waiters));
181#endif 181#endif
182 182
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 21de1d6d584..d0f59faefb7 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -991,24 +991,6 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
991 return pntsd; 991 return pntsd;
992} 992}
993 993
994static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid,
995 struct cifs_ntsd *pnntsd, u32 acllen)
996{
997 int xid, rc;
998 struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
999
1000 if (IS_ERR(tlink))
1001 return PTR_ERR(tlink);
1002
1003 xid = GetXid();
1004 rc = CIFSSMBSetCIFSACL(xid, tlink_tcon(tlink), fid, pnntsd, acllen);
1005 FreeXid(xid);
1006 cifs_put_tlink(tlink);
1007
1008 cFYI(DBG2, "SetCIFSACL rc = %d", rc);
1009 return rc;
1010}
1011
1012static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, 994static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path,
1013 struct cifs_ntsd *pnntsd, u32 acllen) 995 struct cifs_ntsd *pnntsd, u32 acllen)
1014{ 996{
@@ -1047,18 +1029,10 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
1047 struct inode *inode, const char *path) 1029 struct inode *inode, const char *path)
1048{ 1030{
1049 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1031 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1050 struct cifsFileInfo *open_file;
1051 int rc;
1052 1032
1053 cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode); 1033 cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode);
1054 1034
1055 open_file = find_readable_file(CIFS_I(inode), true); 1035 return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen);
1056 if (!open_file)
1057 return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen);
1058
1059 rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen);
1060 cifsFileInfo_put(open_file);
1061 return rc;
1062} 1036}
1063 1037
1064/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ 1038/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index cb71dc1f94d..95da8027983 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -125,5 +125,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
125extern const struct export_operations cifs_export_ops; 125extern const struct export_operations cifs_export_ops;
126#endif /* CIFS_NFSD_EXPORT */ 126#endif /* CIFS_NFSD_EXPORT */
127 127
128#define CIFS_VERSION "1.74" 128#define CIFS_VERSION "1.75"
129#endif /* _CIFSFS_H */ 129#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 38ce6d44b14..95dad9d14cf 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -291,7 +291,7 @@ struct TCP_Server_Info {
291 struct fscache_cookie *fscache; /* client index cache cookie */ 291 struct fscache_cookie *fscache; /* client index cache cookie */
292#endif 292#endif
293#ifdef CONFIG_CIFS_STATS2 293#ifdef CONFIG_CIFS_STATS2
294 atomic_t inSend; /* requests trying to send */ 294 atomic_t in_send; /* requests trying to send */
295 atomic_t num_waiters; /* blocked waiting to get in sendrecv */ 295 atomic_t num_waiters; /* blocked waiting to get in sendrecv */
296#endif 296#endif
297}; 297};
@@ -672,12 +672,54 @@ struct mid_q_entry {
672 bool multiEnd:1; /* both received */ 672 bool multiEnd:1; /* both received */
673}; 673};
674 674
675struct oplock_q_entry { 675/* Make code in transport.c a little cleaner by moving
676 struct list_head qhead; 676 update of optional stats into function below */
677 struct inode *pinode; 677#ifdef CONFIG_CIFS_STATS2
678 struct cifs_tcon *tcon; 678
679 __u16 netfid; 679static inline void cifs_in_send_inc(struct TCP_Server_Info *server)
680}; 680{
681 atomic_inc(&server->in_send);
682}
683
684static inline void cifs_in_send_dec(struct TCP_Server_Info *server)
685{
686 atomic_dec(&server->in_send);
687}
688
689static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server)
690{
691 atomic_inc(&server->num_waiters);
692}
693
694static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server)
695{
696 atomic_dec(&server->num_waiters);
697}
698
699static inline void cifs_save_when_sent(struct mid_q_entry *mid)
700{
701 mid->when_sent = jiffies;
702}
703#else
704static inline void cifs_in_send_inc(struct TCP_Server_Info *server)
705{
706}
707static inline void cifs_in_send_dec(struct TCP_Server_Info *server)
708{
709}
710
711static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server)
712{
713}
714
715static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server)
716{
717}
718
719static inline void cifs_save_when_sent(struct mid_q_entry *mid)
720{
721}
722#endif
681 723
682/* for pending dnotify requests */ 724/* for pending dnotify requests */
683struct dir_notify_req { 725struct dir_notify_req {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 80c2e3add3a..633c246b677 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2878,7 +2878,8 @@ cleanup_volume_info_contents(struct smb_vol *volume_info)
2878 kfree(volume_info->username); 2878 kfree(volume_info->username);
2879 kzfree(volume_info->password); 2879 kzfree(volume_info->password);
2880 kfree(volume_info->UNC); 2880 kfree(volume_info->UNC);
2881 kfree(volume_info->UNCip); 2881 if (volume_info->UNCip != volume_info->UNC + 2)
2882 kfree(volume_info->UNCip);
2882 kfree(volume_info->domainname); 2883 kfree(volume_info->domainname);
2883 kfree(volume_info->iocharset); 2884 kfree(volume_info->iocharset);
2884 kfree(volume_info->prepath); 2885 kfree(volume_info->prepath);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index ae576fbb514..72d448bf96c 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -105,8 +105,8 @@ cifs_bp_rename_retry:
105 } 105 }
106 rcu_read_unlock(); 106 rcu_read_unlock();
107 if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) { 107 if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) {
108 cERROR(1, "did not end path lookup where expected namelen is %d", 108 cFYI(1, "did not end path lookup where expected. namelen=%d "
109 namelen); 109 "dfsplen=%d", namelen, dfsplen);
110 /* presumably this is only possible if racing with a rename 110 /* presumably this is only possible if racing with a rename
111 of one of the parent directories (we can not lock the dentries 111 of one of the parent directories (we can not lock the dentries
112 above us to prevent this, but retrying should be harmless) */ 112 above us to prevent this, but retrying should be harmless) */
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index c1b9c4b1073..10ca6b2c26b 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -266,15 +266,11 @@ static int wait_for_free_request(struct TCP_Server_Info *server,
266 while (1) { 266 while (1) {
267 if (atomic_read(&server->inFlight) >= cifs_max_pending) { 267 if (atomic_read(&server->inFlight) >= cifs_max_pending) {
268 spin_unlock(&GlobalMid_Lock); 268 spin_unlock(&GlobalMid_Lock);
269#ifdef CONFIG_CIFS_STATS2 269 cifs_num_waiters_inc(server);
270 atomic_inc(&server->num_waiters);
271#endif
272 wait_event(server->request_q, 270 wait_event(server->request_q,
273 atomic_read(&server->inFlight) 271 atomic_read(&server->inFlight)
274 < cifs_max_pending); 272 < cifs_max_pending);
275#ifdef CONFIG_CIFS_STATS2 273 cifs_num_waiters_dec(server);
276 atomic_dec(&server->num_waiters);
277#endif
278 spin_lock(&GlobalMid_Lock); 274 spin_lock(&GlobalMid_Lock);
279 } else { 275 } else {
280 if (server->tcpStatus == CifsExiting) { 276 if (server->tcpStatus == CifsExiting) {
@@ -381,15 +377,13 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
381 mid->callback = callback; 377 mid->callback = callback;
382 mid->callback_data = cbdata; 378 mid->callback_data = cbdata;
383 mid->midState = MID_REQUEST_SUBMITTED; 379 mid->midState = MID_REQUEST_SUBMITTED;
384#ifdef CONFIG_CIFS_STATS2 380
385 atomic_inc(&server->inSend); 381 cifs_in_send_inc(server);
386#endif
387 rc = smb_sendv(server, iov, nvec); 382 rc = smb_sendv(server, iov, nvec);
388#ifdef CONFIG_CIFS_STATS2 383 cifs_in_send_dec(server);
389 atomic_dec(&server->inSend); 384 cifs_save_when_sent(mid);
390 mid->when_sent = jiffies;
391#endif
392 mutex_unlock(&server->srv_mutex); 385 mutex_unlock(&server->srv_mutex);
386
393 if (rc) 387 if (rc)
394 goto out_err; 388 goto out_err;
395 389
@@ -575,14 +569,10 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
575 } 569 }
576 570
577 midQ->midState = MID_REQUEST_SUBMITTED; 571 midQ->midState = MID_REQUEST_SUBMITTED;
578#ifdef CONFIG_CIFS_STATS2 572 cifs_in_send_inc(ses->server);
579 atomic_inc(&ses->server->inSend);
580#endif
581 rc = smb_sendv(ses->server, iov, n_vec); 573 rc = smb_sendv(ses->server, iov, n_vec);
582#ifdef CONFIG_CIFS_STATS2 574 cifs_in_send_dec(ses->server);
583 atomic_dec(&ses->server->inSend); 575 cifs_save_when_sent(midQ);
584 midQ->when_sent = jiffies;
585#endif
586 576
587 mutex_unlock(&ses->server->srv_mutex); 577 mutex_unlock(&ses->server->srv_mutex);
588 578
@@ -703,14 +693,11 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
703 } 693 }
704 694
705 midQ->midState = MID_REQUEST_SUBMITTED; 695 midQ->midState = MID_REQUEST_SUBMITTED;
706#ifdef CONFIG_CIFS_STATS2 696
707 atomic_inc(&ses->server->inSend); 697 cifs_in_send_inc(ses->server);
708#endif
709 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); 698 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
710#ifdef CONFIG_CIFS_STATS2 699 cifs_in_send_dec(ses->server);
711 atomic_dec(&ses->server->inSend); 700 cifs_save_when_sent(midQ);
712 midQ->when_sent = jiffies;
713#endif
714 mutex_unlock(&ses->server->srv_mutex); 701 mutex_unlock(&ses->server->srv_mutex);
715 702
716 if (rc < 0) 703 if (rc < 0)
@@ -843,14 +830,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
843 } 830 }
844 831
845 midQ->midState = MID_REQUEST_SUBMITTED; 832 midQ->midState = MID_REQUEST_SUBMITTED;
846#ifdef CONFIG_CIFS_STATS2 833 cifs_in_send_inc(ses->server);
847 atomic_inc(&ses->server->inSend);
848#endif
849 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); 834 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
850#ifdef CONFIG_CIFS_STATS2 835 cifs_in_send_dec(ses->server);
851 atomic_dec(&ses->server->inSend); 836 cifs_save_when_sent(midQ);
852 midQ->when_sent = jiffies;
853#endif
854 mutex_unlock(&ses->server->srv_mutex); 837 mutex_unlock(&ses->server->srv_mutex);
855 838
856 if (rc < 0) { 839 if (rc < 0) {
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index bb85757689b..5802fa1dab1 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -289,10 +289,10 @@ static inline int ext4_should_order_data(struct inode *inode)
289 289
290static inline int ext4_should_writeback_data(struct inode *inode) 290static inline int ext4_should_writeback_data(struct inode *inode)
291{ 291{
292 if (!S_ISREG(inode->i_mode))
293 return 0;
294 if (EXT4_JOURNAL(inode) == NULL) 292 if (EXT4_JOURNAL(inode) == NULL)
295 return 1; 293 return 1;
294 if (!S_ISREG(inode->i_mode))
295 return 0;
296 if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) 296 if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
297 return 0; 297 return 0;
298 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 298 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index b8602cde5b5..0962642119c 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -800,12 +800,17 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
800 } 800 }
801 801
802retry: 802retry:
803 if (rw == READ && ext4_should_dioread_nolock(inode)) 803 if (rw == READ && ext4_should_dioread_nolock(inode)) {
804 if (unlikely(!list_empty(&ei->i_completed_io_list))) {
805 mutex_lock(&inode->i_mutex);
806 ext4_flush_completed_IO(inode);
807 mutex_unlock(&inode->i_mutex);
808 }
804 ret = __blockdev_direct_IO(rw, iocb, inode, 809 ret = __blockdev_direct_IO(rw, iocb, inode,
805 inode->i_sb->s_bdev, iov, 810 inode->i_sb->s_bdev, iov,
806 offset, nr_segs, 811 offset, nr_segs,
807 ext4_get_block, NULL, NULL, 0); 812 ext4_get_block, NULL, NULL, 0);
808 else { 813 } else {
809 ret = blockdev_direct_IO(rw, iocb, inode, iov, 814 ret = blockdev_direct_IO(rw, iocb, inode, iov,
810 offset, nr_segs, ext4_get_block); 815 offset, nr_segs, ext4_get_block);
811 816
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d47264cafee..c4da98a959a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -120,6 +120,12 @@ void ext4_evict_inode(struct inode *inode)
120 int err; 120 int err;
121 121
122 trace_ext4_evict_inode(inode); 122 trace_ext4_evict_inode(inode);
123
124 mutex_lock(&inode->i_mutex);
125 ext4_flush_completed_IO(inode);
126 mutex_unlock(&inode->i_mutex);
127 ext4_ioend_wait(inode);
128
123 if (inode->i_nlink) { 129 if (inode->i_nlink) {
124 /* 130 /*
125 * When journalling data dirty buffers are tracked only in the 131 * When journalling data dirty buffers are tracked only in the
@@ -983,6 +989,8 @@ static int ext4_journalled_write_end(struct file *file,
983 from = pos & (PAGE_CACHE_SIZE - 1); 989 from = pos & (PAGE_CACHE_SIZE - 1);
984 to = from + len; 990 to = from + len;
985 991
992 BUG_ON(!ext4_handle_valid(handle));
993
986 if (copied < len) { 994 if (copied < len) {
987 if (!PageUptodate(page)) 995 if (!PageUptodate(page))
988 copied = 0; 996 copied = 0;
@@ -1283,7 +1291,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1283 else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) 1291 else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
1284 err = ext4_bio_write_page(&io_submit, page, 1292 err = ext4_bio_write_page(&io_submit, page,
1285 len, mpd->wbc); 1293 len, mpd->wbc);
1286 else 1294 else if (buffer_uninit(page_bufs)) {
1295 ext4_set_bh_endio(page_bufs, inode);
1296 err = block_write_full_page_endio(page,
1297 noalloc_get_block_write,
1298 mpd->wbc, ext4_end_io_buffer_write);
1299 } else
1287 err = block_write_full_page(page, 1300 err = block_write_full_page(page,
1288 noalloc_get_block_write, mpd->wbc); 1301 noalloc_get_block_write, mpd->wbc);
1289 1302
@@ -1699,6 +1712,8 @@ static int __ext4_journalled_writepage(struct page *page,
1699 goto out; 1712 goto out;
1700 } 1713 }
1701 1714
1715 BUG_ON(!ext4_handle_valid(handle));
1716
1702 ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, 1717 ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
1703 do_journal_get_write_access); 1718 do_journal_get_write_access);
1704 1719
@@ -2668,8 +2683,15 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
2668 goto out; 2683 goto out;
2669 } 2684 }
2670 2685
2671 io_end->flag = EXT4_IO_END_UNWRITTEN; 2686 /*
2687 * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now,
2688 * but being more careful is always safe for the future change.
2689 */
2672 inode = io_end->inode; 2690 inode = io_end->inode;
2691 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
2692 io_end->flag |= EXT4_IO_END_UNWRITTEN;
2693 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
2694 }
2673 2695
2674 /* Add the io_end to per-inode completed io list*/ 2696 /* Add the io_end to per-inode completed io list*/
2675 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); 2697 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 430c401d089..78839af7ce2 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -334,8 +334,10 @@ submit_and_retry:
334 if ((io_end->num_io_pages >= MAX_IO_PAGES) && 334 if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
335 (io_end->pages[io_end->num_io_pages-1] != io_page)) 335 (io_end->pages[io_end->num_io_pages-1] != io_page))
336 goto submit_and_retry; 336 goto submit_and_retry;
337 if (buffer_uninit(bh)) 337 if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
338 io->io_end->flag |= EXT4_IO_END_UNWRITTEN; 338 io_end->flag |= EXT4_IO_END_UNWRITTEN;
339 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
340 }
339 io->io_end->size += bh->b_size; 341 io->io_end->size += bh->b_size;
340 io->io_next_block++; 342 io->io_next_block++;
341 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); 343 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4687fea0c00..44d0c8db223 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -919,7 +919,6 @@ static void ext4_i_callback(struct rcu_head *head)
919 919
920static void ext4_destroy_inode(struct inode *inode) 920static void ext4_destroy_inode(struct inode *inode)
921{ 921{
922 ext4_ioend_wait(inode);
923 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 922 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
924 ext4_msg(inode->i_sb, KERN_ERR, 923 ext4_msg(inode->i_sb, KERN_ERR,
925 "Inode %lu (%p): orphan list check failed!", 924 "Inode %lu (%p): orphan list check failed!",
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 4ad64732cbc..5efbd5d7701 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -1231,7 +1231,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
1231 struct super_block *sb = dir->i_sb; 1231 struct super_block *sb = dir->i_sb;
1232 struct msdos_sb_info *sbi = MSDOS_SB(sb); 1232 struct msdos_sb_info *sbi = MSDOS_SB(sb);
1233 struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */ 1233 struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */
1234 struct msdos_dir_entry *de; 1234 struct msdos_dir_entry *uninitialized_var(de);
1235 int err, free_slots, i, nr_bhs; 1235 int err, free_slots, i, nr_bhs;
1236 loff_t pos, i_pos; 1236 loff_t pos, i_pos;
1237 1237
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 5942fec22c6..1726d730304 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1188,9 +1188,9 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
1188out: 1188out:
1189 /* UTF-8 doesn't provide FAT semantics */ 1189 /* UTF-8 doesn't provide FAT semantics */
1190 if (!strcmp(opts->iocharset, "utf8")) { 1190 if (!strcmp(opts->iocharset, "utf8")) {
1191 fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset" 1191 fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset"
1192 " for FAT filesystems, filesystem will be " 1192 " for FAT filesystems, filesystem will be "
1193 "case sensitive!\n"); 1193 "case sensitive!");
1194 } 1194 }
1195 1195
1196 /* If user doesn't specify allow_utime, it's initialized from dmask. */ 1196 /* If user doesn't specify allow_utime, it's initialized from dmask. */
@@ -1367,6 +1367,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1367 sbi->free_clusters = -1; /* Don't know yet */ 1367 sbi->free_clusters = -1; /* Don't know yet */
1368 sbi->free_clus_valid = 0; 1368 sbi->free_clus_valid = 0;
1369 sbi->prev_free = FAT_START_ENT; 1369 sbi->prev_free = FAT_START_ENT;
1370 sb->s_maxbytes = 0xffffffff;
1370 1371
1371 if (!sbi->fat_length && b->fat32_length) { 1372 if (!sbi->fat_length && b->fat32_length) {
1372 struct fat_boot_fsinfo *fsinfo; 1373 struct fat_boot_fsinfo *fsinfo;
@@ -1377,8 +1378,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1377 sbi->fat_length = le32_to_cpu(b->fat32_length); 1378 sbi->fat_length = le32_to_cpu(b->fat32_length);
1378 sbi->root_cluster = le32_to_cpu(b->root_cluster); 1379 sbi->root_cluster = le32_to_cpu(b->root_cluster);
1379 1380
1380 sb->s_maxbytes = 0xffffffff;
1381
1382 /* MC - if info_sector is 0, don't multiply by 0 */ 1381 /* MC - if info_sector is 0, don't multiply by 0 */
1383 sbi->fsinfo_sector = le16_to_cpu(b->info_sector); 1382 sbi->fsinfo_sector = le16_to_cpu(b->info_sector);
1384 if (sbi->fsinfo_sector == 0) 1383 if (sbi->fsinfo_sector == 0)
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index adcf92d3b60..7971f37534a 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -68,7 +68,7 @@ int jfs_umount(struct super_block *sb)
68 /* 68 /*
69 * Wait for outstanding transactions to be written to log: 69 * Wait for outstanding transactions to be written to log:
70 */ 70 */
71 jfs_flush_journal(log, 1); 71 jfs_flush_journal(log, 2);
72 72
73 /* 73 /*
74 * close fileset inode allocation map (aka fileset inode) 74 * close fileset inode allocation map (aka fileset inode)
@@ -146,7 +146,7 @@ int jfs_umount_rw(struct super_block *sb)
146 * 146 *
147 * remove file system from log active file system list. 147 * remove file system from log active file system list.
148 */ 148 */
149 jfs_flush_journal(log, 1); 149 jfs_flush_journal(log, 2);
150 150
151 /* 151 /*
152 * Make sure all metadata makes it to disk 152 * Make sure all metadata makes it to disk
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index e56564d2ef9..9561c8fc8bd 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -36,6 +36,7 @@
36#include <linux/namei.h> 36#include <linux/namei.h>
37#include <linux/bio.h> /* struct bio */ 37#include <linux/bio.h> /* struct bio */
38#include <linux/buffer_head.h> /* various write calls */ 38#include <linux/buffer_head.h> /* various write calls */
39#include <linux/prefetch.h>
39 40
40#include "blocklayout.h" 41#include "blocklayout.h"
41 42
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index b257383bb56..07df5f1d85e 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -38,6 +38,7 @@ enum nfs4_callback_opnum {
38struct cb_process_state { 38struct cb_process_state {
39 __be32 drc_status; 39 __be32 drc_status;
40 struct nfs_client *clp; 40 struct nfs_client *clp;
41 int slotid;
41}; 42};
42 43
43struct cb_compound_hdr_arg { 44struct cb_compound_hdr_arg {
@@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall(
166 void *dummy, struct cb_process_state *cps); 167 void *dummy, struct cb_process_state *cps);
167 168
168extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); 169extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
169extern void nfs4_cb_take_slot(struct nfs_client *clp);
170 170
171struct cb_devicenotifyitem { 171struct cb_devicenotifyitem {
172 uint32_t cbd_notify_type; 172 uint32_t cbd_notify_type;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 74780f9f852..43926add945 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -348,7 +348,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
348 /* Normal */ 348 /* Normal */
349 if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { 349 if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
350 slot->seq_nr++; 350 slot->seq_nr++;
351 return htonl(NFS4_OK); 351 goto out_ok;
352 } 352 }
353 353
354 /* Replay */ 354 /* Replay */
@@ -367,11 +367,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
367 /* Wraparound */ 367 /* Wraparound */
368 if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { 368 if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) {
369 slot->seq_nr = 1; 369 slot->seq_nr = 1;
370 return htonl(NFS4_OK); 370 goto out_ok;
371 } 371 }
372 372
373 /* Misordered request */ 373 /* Misordered request */
374 return htonl(NFS4ERR_SEQ_MISORDERED); 374 return htonl(NFS4ERR_SEQ_MISORDERED);
375out_ok:
376 tbl->highest_used_slotid = args->csa_slotid;
377 return htonl(NFS4_OK);
375} 378}
376 379
377/* 380/*
@@ -433,26 +436,37 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
433 struct cb_sequenceres *res, 436 struct cb_sequenceres *res,
434 struct cb_process_state *cps) 437 struct cb_process_state *cps)
435{ 438{
439 struct nfs4_slot_table *tbl;
436 struct nfs_client *clp; 440 struct nfs_client *clp;
437 int i; 441 int i;
438 __be32 status = htonl(NFS4ERR_BADSESSION); 442 __be32 status = htonl(NFS4ERR_BADSESSION);
439 443
440 cps->clp = NULL;
441
442 clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); 444 clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid);
443 if (clp == NULL) 445 if (clp == NULL)
444 goto out; 446 goto out;
445 447
448 tbl = &clp->cl_session->bc_slot_table;
449
450 spin_lock(&tbl->slot_tbl_lock);
446 /* state manager is resetting the session */ 451 /* state manager is resetting the session */
447 if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { 452 if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
448 status = NFS4ERR_DELAY; 453 spin_unlock(&tbl->slot_tbl_lock);
454 status = htonl(NFS4ERR_DELAY);
455 /* Return NFS4ERR_BADSESSION if we're draining the session
456 * in order to reset it.
457 */
458 if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
459 status = htonl(NFS4ERR_BADSESSION);
449 goto out; 460 goto out;
450 } 461 }
451 462
452 status = validate_seqid(&clp->cl_session->bc_slot_table, args); 463 status = validate_seqid(&clp->cl_session->bc_slot_table, args);
464 spin_unlock(&tbl->slot_tbl_lock);
453 if (status) 465 if (status)
454 goto out; 466 goto out;
455 467
468 cps->slotid = args->csa_slotid;
469
456 /* 470 /*
457 * Check for pending referring calls. If a match is found, a 471 * Check for pending referring calls. If a match is found, a
458 * related callback was received before the response to the original 472 * related callback was received before the response to the original
@@ -469,7 +483,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
469 res->csr_slotid = args->csa_slotid; 483 res->csr_slotid = args->csa_slotid;
470 res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; 484 res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
471 res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; 485 res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
472 nfs4_cb_take_slot(clp);
473 486
474out: 487out:
475 cps->clp = clp; /* put in nfs4_callback_compound */ 488 cps->clp = clp; /* put in nfs4_callback_compound */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c6c86a77e04..918ad647afe 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(struct nfs4_session *session)
754 * Let the state manager know callback processing done. 754 * Let the state manager know callback processing done.
755 * A single slot, so highest used slotid is either 0 or -1 755 * A single slot, so highest used slotid is either 0 or -1
756 */ 756 */
757 tbl->highest_used_slotid--; 757 tbl->highest_used_slotid = -1;
758 nfs4_check_drain_bc_complete(session); 758 nfs4_check_drain_bc_complete(session);
759 spin_unlock(&tbl->slot_tbl_lock); 759 spin_unlock(&tbl->slot_tbl_lock);
760} 760}
761 761
762static void nfs4_cb_free_slot(struct nfs_client *clp) 762static void nfs4_cb_free_slot(struct cb_process_state *cps)
763{ 763{
764 if (clp && clp->cl_session) 764 if (cps->slotid != -1)
765 nfs4_callback_free_slot(clp->cl_session); 765 nfs4_callback_free_slot(cps->clp->cl_session);
766}
767
768/* A single slot, so highest used slotid is either 0 or -1 */
769void nfs4_cb_take_slot(struct nfs_client *clp)
770{
771 struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table;
772
773 spin_lock(&tbl->slot_tbl_lock);
774 tbl->highest_used_slotid++;
775 BUG_ON(tbl->highest_used_slotid != 0);
776 spin_unlock(&tbl->slot_tbl_lock);
777} 766}
778 767
779#else /* CONFIG_NFS_V4_1 */ 768#else /* CONFIG_NFS_V4_1 */
@@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
784 return htonl(NFS4ERR_MINOR_VERS_MISMATCH); 773 return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
785} 774}
786 775
787static void nfs4_cb_free_slot(struct nfs_client *clp) 776static void nfs4_cb_free_slot(struct cb_process_state *cps)
788{ 777{
789} 778}
790#endif /* CONFIG_NFS_V4_1 */ 779#endif /* CONFIG_NFS_V4_1 */
@@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
866 struct cb_process_state cps = { 855 struct cb_process_state cps = {
867 .drc_status = 0, 856 .drc_status = 0,
868 .clp = NULL, 857 .clp = NULL,
858 .slotid = -1,
869 }; 859 };
870 unsigned int nops = 0; 860 unsigned int nops = 0;
871 861
@@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
906 896
907 *hdr_res.status = status; 897 *hdr_res.status = status;
908 *hdr_res.nops = htonl(nops); 898 *hdr_res.nops = htonl(nops);
909 nfs4_cb_free_slot(cps.clp); 899 nfs4_cb_free_slot(&cps);
910 nfs_put_client(cps.clp); 900 nfs_put_client(cps.clp);
911 dprintk("%s: done, status = %u\n", __func__, ntohl(status)); 901 dprintk("%s: done, status = %u\n", __func__, ntohl(status));
912 return rpc_success; 902 return rpc_success;
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 9383ca7245b..d0cda12fddc 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -479,7 +479,6 @@ static int _io_check(struct objio_state *ios, bool is_write)
479 for (i = 0; i < ios->numdevs; i++) { 479 for (i = 0; i < ios->numdevs; i++) {
480 struct osd_sense_info osi; 480 struct osd_sense_info osi;
481 struct osd_request *or = ios->per_dev[i].or; 481 struct osd_request *or = ios->per_dev[i].or;
482 unsigned dev;
483 int ret; 482 int ret;
484 483
485 if (!or) 484 if (!or)
@@ -500,9 +499,8 @@ static int _io_check(struct objio_state *ios, bool is_write)
500 499
501 continue; /* we recovered */ 500 continue; /* we recovered */
502 } 501 }
503 dev = ios->per_dev[i].dev; 502 objlayout_io_set_result(&ios->ol_state, i,
504 objlayout_io_set_result(&ios->ol_state, dev, 503 &ios->layout->comps[i].oc_object_id,
505 &ios->layout->comps[dev].oc_object_id,
506 osd_pri_2_pnfs_err(osi.osd_err_pri), 504 osd_pri_2_pnfs_err(osi.osd_err_pri),
507 ios->per_dev[i].offset, 505 ios->per_dev[i].offset,
508 ios->per_dev[i].length, 506 ios->per_dev[i].length,
@@ -589,22 +587,19 @@ static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
589} 587}
590 588
591static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, 589static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg,
592 unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, 590 unsigned pgbase, struct _objio_per_comp *per_dev, int len,
593 gfp_t gfp_flags) 591 gfp_t gfp_flags)
594{ 592{
595 unsigned pg = *cur_pg; 593 unsigned pg = *cur_pg;
594 int cur_len = len;
596 struct request_queue *q = 595 struct request_queue *q =
597 osd_request_queue(_io_od(ios, per_dev->dev)); 596 osd_request_queue(_io_od(ios, per_dev->dev));
598 597
599 per_dev->length += cur_len;
600
601 if (per_dev->bio == NULL) { 598 if (per_dev->bio == NULL) {
602 unsigned stripes = ios->layout->num_comps / 599 unsigned pages_in_stripe = ios->layout->group_width *
603 ios->layout->mirrors_p1;
604 unsigned pages_in_stripe = stripes *
605 (ios->layout->stripe_unit / PAGE_SIZE); 600 (ios->layout->stripe_unit / PAGE_SIZE);
606 unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / 601 unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
607 stripes; 602 ios->layout->group_width;
608 603
609 if (BIO_MAX_PAGES_KMALLOC < bio_size) 604 if (BIO_MAX_PAGES_KMALLOC < bio_size)
610 bio_size = BIO_MAX_PAGES_KMALLOC; 605 bio_size = BIO_MAX_PAGES_KMALLOC;
@@ -632,6 +627,7 @@ static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg,
632 } 627 }
633 BUG_ON(cur_len); 628 BUG_ON(cur_len);
634 629
630 per_dev->length += len;
635 *cur_pg = pg; 631 *cur_pg = pg;
636 return 0; 632 return 0;
637} 633}
@@ -650,7 +646,7 @@ static int _prepare_one_group(struct objio_state *ios, u64 length,
650 int ret = 0; 646 int ret = 0;
651 647
652 while (length) { 648 while (length) {
653 struct _objio_per_comp *per_dev = &ios->per_dev[dev]; 649 struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev];
654 unsigned cur_len, page_off = 0; 650 unsigned cur_len, page_off = 0;
655 651
656 if (!per_dev->length) { 652 if (!per_dev->length) {
@@ -670,8 +666,8 @@ static int _prepare_one_group(struct objio_state *ios, u64 length,
670 cur_len = stripe_unit; 666 cur_len = stripe_unit;
671 } 667 }
672 668
673 if (max_comp < dev) 669 if (max_comp < dev - first_dev)
674 max_comp = dev; 670 max_comp = dev - first_dev;
675 } else { 671 } else {
676 cur_len = stripe_unit; 672 cur_len = stripe_unit;
677 } 673 }
@@ -806,7 +802,7 @@ static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
806 struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; 802 struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
807 unsigned dev = per_dev->dev; 803 unsigned dev = per_dev->dev;
808 struct pnfs_osd_object_cred *cred = 804 struct pnfs_osd_object_cred *cred =
809 &ios->layout->comps[dev]; 805 &ios->layout->comps[cur_comp];
810 struct osd_obj_id obj = { 806 struct osd_obj_id obj = {
811 .partition = cred->oc_object_id.oid_partition_id, 807 .partition = cred->oc_object_id.oid_partition_id,
812 .id = cred->oc_object_id.oid_object_id, 808 .id = cred->oc_object_id.oid_object_id,
@@ -904,7 +900,7 @@ static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
904 for (; cur_comp < last_comp; ++cur_comp, ++dev) { 900 for (; cur_comp < last_comp; ++cur_comp, ++dev) {
905 struct osd_request *or = NULL; 901 struct osd_request *or = NULL;
906 struct pnfs_osd_object_cred *cred = 902 struct pnfs_osd_object_cred *cred =
907 &ios->layout->comps[dev]; 903 &ios->layout->comps[cur_comp];
908 struct osd_obj_id obj = { 904 struct osd_obj_id obj = {
909 .partition = cred->oc_object_id.oid_partition_id, 905 .partition = cred->oc_object_id.oid_partition_id,
910 .id = cred->oc_object_id.oid_object_id, 906 .id = cred->oc_object_id.oid_object_id,
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
index 16fc758e912..b3918f7ac34 100644
--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
@@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
170 p = _osd_xdr_decode_data_map(p, &layout->olo_map); 170 p = _osd_xdr_decode_data_map(p, &layout->olo_map);
171 layout->olo_comps_index = be32_to_cpup(p++); 171 layout->olo_comps_index = be32_to_cpup(p++);
172 layout->olo_num_comps = be32_to_cpup(p++); 172 layout->olo_num_comps = be32_to_cpup(p++);
173 dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__,
174 layout->olo_comps_index, layout->olo_num_comps);
175
173 iter->total_comps = layout->olo_num_comps; 176 iter->total_comps = layout->olo_num_comps;
174 return 0; 177 return 0;
175} 178}
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index fb2d63f13f4..aea9e45efce 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -39,7 +39,7 @@
39}) 39})
40 40
41#define __page_to_pfn(pg) \ 41#define __page_to_pfn(pg) \
42({ struct page *__pg = (pg); \ 42({ const struct page *__pg = (pg); \
43 struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg)); \ 43 struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg)); \
44 (unsigned long)(__pg - __pgdat->node_mem_map) + \ 44 (unsigned long)(__pg - __pgdat->node_mem_map) + \
45 __pgdat->node_start_pfn; \ 45 __pgdat->node_start_pfn; \
@@ -57,7 +57,7 @@
57 * section[i].section_mem_map == mem_map's address - start_pfn; 57 * section[i].section_mem_map == mem_map's address - start_pfn;
58 */ 58 */
59#define __page_to_pfn(pg) \ 59#define __page_to_pfn(pg) \
60({ struct page *__pg = (pg); \ 60({ const struct page *__pg = (pg); \
61 int __sec = page_to_section(__pg); \ 61 int __sec = page_to_section(__pg); \
62 (unsigned long)(__pg - __section_mem_map_addr(__nr_to_section(__sec))); \ 62 (unsigned long)(__pg - __section_mem_map_addr(__nr_to_section(__sec))); \
63}) 63})
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 6395692b2e7..32f0076e844 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -125,7 +125,11 @@ enum rq_flag_bits {
125 __REQ_SYNC, /* request is sync (sync write or read) */ 125 __REQ_SYNC, /* request is sync (sync write or read) */
126 __REQ_META, /* metadata io request */ 126 __REQ_META, /* metadata io request */
127 __REQ_DISCARD, /* request to discard sectors */ 127 __REQ_DISCARD, /* request to discard sectors */
128 __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
129
128 __REQ_NOIDLE, /* don't anticipate more IO after this one */ 130 __REQ_NOIDLE, /* don't anticipate more IO after this one */
131 __REQ_FUA, /* forced unit access */
132 __REQ_FLUSH, /* request for cache flush */
129 133
130 /* bio only flags */ 134 /* bio only flags */
131 __REQ_RAHEAD, /* read ahead, can fail anytime */ 135 __REQ_RAHEAD, /* read ahead, can fail anytime */
@@ -135,7 +139,6 @@ enum rq_flag_bits {
135 /* request only flags */ 139 /* request only flags */
136 __REQ_SORTED, /* elevator knows about this request */ 140 __REQ_SORTED, /* elevator knows about this request */
137 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ 141 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
138 __REQ_FUA, /* forced unit access */
139 __REQ_NOMERGE, /* don't touch this for merging */ 142 __REQ_NOMERGE, /* don't touch this for merging */
140 __REQ_STARTED, /* drive already may have started this one */ 143 __REQ_STARTED, /* drive already may have started this one */
141 __REQ_DONTPREP, /* don't call prep for this one */ 144 __REQ_DONTPREP, /* don't call prep for this one */
@@ -146,11 +149,9 @@ enum rq_flag_bits {
146 __REQ_PREEMPT, /* set for "ide_preempt" requests */ 149 __REQ_PREEMPT, /* set for "ide_preempt" requests */
147 __REQ_ALLOCED, /* request came from our alloc pool */ 150 __REQ_ALLOCED, /* request came from our alloc pool */
148 __REQ_COPY_USER, /* contains copies of user pages */ 151 __REQ_COPY_USER, /* contains copies of user pages */
149 __REQ_FLUSH, /* request for cache flush */
150 __REQ_FLUSH_SEQ, /* request for flush sequence */ 152 __REQ_FLUSH_SEQ, /* request for flush sequence */
151 __REQ_IO_STAT, /* account I/O stat */ 153 __REQ_IO_STAT, /* account I/O stat */
152 __REQ_MIXED_MERGE, /* merge of different types, fail separately */ 154 __REQ_MIXED_MERGE, /* merge of different types, fail separately */
153 __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
154 __REQ_NR_BITS, /* stops here */ 155 __REQ_NR_BITS, /* stops here */
155}; 156};
156 157
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0e67c45b3bc..84b15d54f8c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -30,6 +30,7 @@ struct request_pm_state;
30struct blk_trace; 30struct blk_trace;
31struct request; 31struct request;
32struct sg_io_hdr; 32struct sg_io_hdr;
33struct bsg_job;
33 34
34#define BLKDEV_MIN_RQ 4 35#define BLKDEV_MIN_RQ 4
35#define BLKDEV_MAX_RQ 128 /* Default maximum */ 36#define BLKDEV_MAX_RQ 128 /* Default maximum */
@@ -117,6 +118,7 @@ struct request {
117 struct { 118 struct {
118 unsigned int seq; 119 unsigned int seq;
119 struct list_head list; 120 struct list_head list;
121 rq_end_io_fn *saved_end_io;
120 } flush; 122 } flush;
121 }; 123 };
122 124
@@ -209,6 +211,7 @@ typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
209typedef void (softirq_done_fn)(struct request *); 211typedef void (softirq_done_fn)(struct request *);
210typedef int (dma_drain_needed_fn)(struct request *); 212typedef int (dma_drain_needed_fn)(struct request *);
211typedef int (lld_busy_fn) (struct request_queue *q); 213typedef int (lld_busy_fn) (struct request_queue *q);
214typedef int (bsg_job_fn) (struct bsg_job *);
212 215
213enum blk_eh_timer_return { 216enum blk_eh_timer_return {
214 BLK_EH_NOT_HANDLED, 217 BLK_EH_NOT_HANDLED,
@@ -375,6 +378,8 @@ struct request_queue {
375 struct mutex sysfs_lock; 378 struct mutex sysfs_lock;
376 379
377#if defined(CONFIG_BLK_DEV_BSG) 380#if defined(CONFIG_BLK_DEV_BSG)
381 bsg_job_fn *bsg_job_fn;
382 int bsg_job_size;
378 struct bsg_class_device bsg_dev; 383 struct bsg_class_device bsg_dev;
379#endif 384#endif
380 385
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 8c7c2de7631..8e9e4bc6d73 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -14,7 +14,7 @@
14enum blktrace_cat { 14enum blktrace_cat {
15 BLK_TC_READ = 1 << 0, /* reads */ 15 BLK_TC_READ = 1 << 0, /* reads */
16 BLK_TC_WRITE = 1 << 1, /* writes */ 16 BLK_TC_WRITE = 1 << 1, /* writes */
17 BLK_TC_BARRIER = 1 << 2, /* barrier */ 17 BLK_TC_FLUSH = 1 << 2, /* flush */
18 BLK_TC_SYNC = 1 << 3, /* sync IO */ 18 BLK_TC_SYNC = 1 << 3, /* sync IO */
19 BLK_TC_SYNCIO = BLK_TC_SYNC, 19 BLK_TC_SYNCIO = BLK_TC_SYNC,
20 BLK_TC_QUEUE = 1 << 4, /* queueing/merging */ 20 BLK_TC_QUEUE = 1 << 4, /* queueing/merging */
@@ -28,8 +28,9 @@ enum blktrace_cat {
28 BLK_TC_META = 1 << 12, /* metadata */ 28 BLK_TC_META = 1 << 12, /* metadata */
29 BLK_TC_DISCARD = 1 << 13, /* discard requests */ 29 BLK_TC_DISCARD = 1 << 13, /* discard requests */
30 BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */ 30 BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */
31 BLK_TC_FUA = 1 << 15, /* fua requests */
31 32
32 BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ 33 BLK_TC_END = 1 << 15, /* we've run out of bits! */
33}; 34};
34 35
35#define BLK_TC_SHIFT (16) 36#define BLK_TC_SHIFT (16)
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
new file mode 100644
index 00000000000..f55ab8cdc10
--- /dev/null
+++ b/include/linux/bsg-lib.h
@@ -0,0 +1,73 @@
1/*
2 * BSG helper library
3 *
4 * Copyright (C) 2008 James Smart, Emulex Corporation
5 * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
6 * Copyright (C) 2011 Mike Christie
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 */
23#ifndef _BLK_BSG_
24#define _BLK_BSG_
25
26#include <linux/blkdev.h>
27
28struct request;
29struct device;
30struct scatterlist;
31struct request_queue;
32
33struct bsg_buffer {
34 unsigned int payload_len;
35 int sg_cnt;
36 struct scatterlist *sg_list;
37};
38
39struct bsg_job {
40 struct device *dev;
41 struct request *req;
42
43 /* Transport/driver specific request/reply structs */
44 void *request;
45 void *reply;
46
47 unsigned int request_len;
48 unsigned int reply_len;
49 /*
50 * On entry : reply_len indicates the buffer size allocated for
51 * the reply.
52 *
53 * Upon completion : the message handler must set reply_len
54 * to indicates the size of the reply to be returned to the
55 * caller.
56 */
57
58 /* DMA payloads for the request/response */
59 struct bsg_buffer request_payload;
60 struct bsg_buffer reply_payload;
61
62 void *dd_data; /* Used for driver-specific storage */
63};
64
65void bsg_job_done(struct bsg_job *job, int result,
66 unsigned int reply_payload_rcv_len);
67int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
68 bsg_job_fn *job_fn, int dd_job_size);
69void bsg_request_fn(struct request_queue *q);
70void bsg_remove_queue(struct request_queue *q);
71void bsg_goose_queue(struct request_queue *q);
72
73#endif
diff --git a/include/linux/hash.h b/include/linux/hash.h
index 06d25c189cc..b80506bdd73 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -63,7 +63,7 @@ static inline u32 hash_32(u32 val, unsigned int bits)
63 return hash >> (32 - bits); 63 return hash >> (32 - bits);
64} 64}
65 65
66static inline unsigned long hash_ptr(void *ptr, unsigned int bits) 66static inline unsigned long hash_ptr(const void *ptr, unsigned int bits)
67{ 67{
68 return hash_long((unsigned long)ptr, bits); 68 return hash_long((unsigned long)ptr, bits);
69} 69}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 87a06f345bd..59517300a31 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -23,6 +23,7 @@
23#include <linux/errno.h> 23#include <linux/errno.h>
24#include <linux/topology.h> 24#include <linux/topology.h>
25#include <linux/wait.h> 25#include <linux/wait.h>
26#include <linux/module.h>
26 27
27#include <asm/irq.h> 28#include <asm/irq.h>
28#include <asm/ptrace.h> 29#include <asm/ptrace.h>
@@ -547,7 +548,15 @@ static inline struct msi_desc *irq_data_get_msi(struct irq_data *d)
547 return d->msi_desc; 548 return d->msi_desc;
548} 549}
549 550
550int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node); 551int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
552 struct module *owner);
553
554static inline int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt,
555 int node)
556{
557 return __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE);
558}
559
551void irq_free_descs(unsigned int irq, unsigned int cnt); 560void irq_free_descs(unsigned int irq, unsigned int cnt);
552int irq_reserve_irqs(unsigned int from, unsigned int cnt); 561int irq_reserve_irqs(unsigned int from, unsigned int cnt);
553 562
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 2d921b35212..150134ac709 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -66,6 +66,7 @@ struct irq_desc {
66#ifdef CONFIG_PROC_FS 66#ifdef CONFIG_PROC_FS
67 struct proc_dir_entry *dir; 67 struct proc_dir_entry *dir;
68#endif 68#endif
69 struct module *owner;
69 const char *name; 70 const char *name;
70} ____cacheline_internodealigned_in_smp; 71} ____cacheline_internodealigned_in_smp;
71 72
diff --git a/include/linux/loop.h b/include/linux/loop.h
index 66c194e2d9b..683d6989011 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -64,7 +64,6 @@ struct loop_device {
64 64
65 struct request_queue *lo_queue; 65 struct request_queue *lo_queue;
66 struct gendisk *lo_disk; 66 struct gendisk *lo_disk;
67 struct list_head lo_list;
68}; 67};
69 68
70#endif /* __KERNEL__ */ 69#endif /* __KERNEL__ */
@@ -161,4 +160,8 @@ int loop_unregister_transfer(int number);
161#define LOOP_CHANGE_FD 0x4C06 160#define LOOP_CHANGE_FD 0x4C06
162#define LOOP_SET_CAPACITY 0x4C07 161#define LOOP_SET_CAPACITY 0x4C07
163 162
163/* /dev/loop-control interface */
164#define LOOP_CTL_ADD 0x4C80
165#define LOOP_CTL_REMOVE 0x4C81
166#define LOOP_CTL_GET_FREE 0x4C82
164#endif 167#endif
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 18fd13028ba..c309b1ecdc1 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -40,6 +40,7 @@
40#define BTRFS_MINOR 234 40#define BTRFS_MINOR 234
41#define AUTOFS_MINOR 235 41#define AUTOFS_MINOR 235
42#define MAPPER_CTRL_MINOR 236 42#define MAPPER_CTRL_MINOR 236
43#define LOOP_CTRL_MINOR 237
43#define MISC_DYNAMIC_MINOR 255 44#define MISC_DYNAMIC_MINOR 255
44 45
45struct device; 46struct device;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fd599f4bb84..7438071b44a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -685,7 +685,7 @@ static inline void set_page_section(struct page *page, unsigned long section)
685 page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; 685 page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
686} 686}
687 687
688static inline unsigned long page_to_section(struct page *page) 688static inline unsigned long page_to_section(const struct page *page)
689{ 689{
690 return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; 690 return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
691} 691}
@@ -720,7 +720,7 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
720 720
721static __always_inline void *lowmem_page_address(const struct page *page) 721static __always_inline void *lowmem_page_address(const struct page *page)
722{ 722{
723 return __va(PFN_PHYS(page_to_pfn((struct page *)page))); 723 return __va(PFN_PHYS(page_to_pfn(page)));
724} 724}
725 725
726#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) 726#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
@@ -737,7 +737,7 @@ static __always_inline void *lowmem_page_address(const struct page *page)
737#endif 737#endif
738 738
739#if defined(HASHED_PAGE_VIRTUAL) 739#if defined(HASHED_PAGE_VIRTUAL)
740void *page_address(struct page *page); 740void *page_address(const struct page *page);
741void set_page_address(struct page *page, void *virtual); 741void set_page_address(struct page *page, void *virtual);
742void page_address_init(void); 742void page_address_init(void);
743#endif 743#endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f27893b3b72..8c230cbcbb4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -251,7 +251,8 @@ struct pci_dev {
251 u8 revision; /* PCI revision, low byte of class word */ 251 u8 revision; /* PCI revision, low byte of class word */
252 u8 hdr_type; /* PCI header type (`multi' flag masked out) */ 252 u8 hdr_type; /* PCI header type (`multi' flag masked out) */
253 u8 pcie_cap; /* PCI-E capability offset */ 253 u8 pcie_cap; /* PCI-E capability offset */
254 u8 pcie_type; /* PCI-E device/port type */ 254 u8 pcie_type:4; /* PCI-E device/port type */
255 u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */
255 u8 rom_base_reg; /* which config register controls the ROM */ 256 u8 rom_base_reg; /* which config register controls the ROM */
256 u8 pin; /* which interrupt pin this device uses */ 257 u8 pin; /* which interrupt pin this device uses */
257 258
@@ -617,6 +618,16 @@ struct pci_driver {
617/* these external functions are only available when PCI support is enabled */ 618/* these external functions are only available when PCI support is enabled */
618#ifdef CONFIG_PCI 619#ifdef CONFIG_PCI
619 620
621extern void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss);
622
623enum pcie_bus_config_types {
624 PCIE_BUS_PERFORMANCE,
625 PCIE_BUS_SAFE,
626 PCIE_BUS_PEER2PEER,
627};
628
629extern enum pcie_bus_config_types pcie_bus_config;
630
620extern struct bus_type pci_bus_type; 631extern struct bus_type pci_bus_type;
621 632
622/* Do NOT directly access these two variables, unless you are arch specific pci 633/* Do NOT directly access these two variables, unless you are arch specific pci
@@ -796,10 +807,13 @@ int pcix_get_mmrbc(struct pci_dev *dev);
796int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); 807int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc);
797int pcie_get_readrq(struct pci_dev *dev); 808int pcie_get_readrq(struct pci_dev *dev);
798int pcie_set_readrq(struct pci_dev *dev, int rq); 809int pcie_set_readrq(struct pci_dev *dev, int rq);
810int pcie_get_mps(struct pci_dev *dev);
811int pcie_set_mps(struct pci_dev *dev, int mps);
799int __pci_reset_function(struct pci_dev *dev); 812int __pci_reset_function(struct pci_dev *dev);
800int pci_reset_function(struct pci_dev *dev); 813int pci_reset_function(struct pci_dev *dev);
801void pci_update_resource(struct pci_dev *dev, int resno); 814void pci_update_resource(struct pci_dev *dev, int resno);
802int __must_check pci_assign_resource(struct pci_dev *dev, int i); 815int __must_check pci_assign_resource(struct pci_dev *dev, int i);
816int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align);
803int pci_select_bars(struct pci_dev *dev, unsigned long flags); 817int pci_select_bars(struct pci_dev *dev, unsigned long flags);
804 818
805/* ROM control related routines */ 819/* ROM control related routines */
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 21097cb086f..f9ec1736a11 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -72,8 +72,6 @@ extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
72extern void pm_genpd_init(struct generic_pm_domain *genpd, 72extern void pm_genpd_init(struct generic_pm_domain *genpd,
73 struct dev_power_governor *gov, bool is_off); 73 struct dev_power_governor *gov, bool is_off);
74extern int pm_genpd_poweron(struct generic_pm_domain *genpd); 74extern int pm_genpd_poweron(struct generic_pm_domain *genpd);
75extern void pm_genpd_poweroff_unused(void);
76extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd);
77#else 75#else
78static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, 76static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
79 struct device *dev) 77 struct device *dev)
@@ -101,8 +99,14 @@ static inline int pm_genpd_poweron(struct generic_pm_domain *genpd)
101{ 99{
102 return -ENOSYS; 100 return -ENOSYS;
103} 101}
104static inline void pm_genpd_poweroff_unused(void) {} 102#endif
103
104#ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME
105extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd);
106extern void pm_genpd_poweroff_unused(void);
107#else
105static inline void genpd_queue_power_off_work(struct generic_pm_domain *gpd) {} 108static inline void genpd_queue_power_off_work(struct generic_pm_domain *gpd) {}
109static inline void pm_genpd_poweroff_unused(void) {}
106#endif 110#endif
107 111
108#endif /* _LINUX_PM_DOMAIN_H */ 112#endif /* _LINUX_PM_DOMAIN_H */
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index b27ebea2566..93f4d035076 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -97,6 +97,9 @@ struct rtc_pll_info {
97#define RTC_AF 0x20 /* Alarm interrupt */ 97#define RTC_AF 0x20 /* Alarm interrupt */
98#define RTC_UF 0x10 /* Update interrupt for 1Hz RTC */ 98#define RTC_UF 0x10 /* Update interrupt for 1Hz RTC */
99 99
100
101#define RTC_MAX_FREQ 8192
102
100#ifdef __KERNEL__ 103#ifdef __KERNEL__
101 104
102#include <linux/types.h> 105#include <linux/types.h>
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index bf366547da2..05c5e61f0a7 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -8,6 +8,8 @@
8#include <linux/blkdev.h> 8#include <linux/blkdev.h>
9#include <linux/tracepoint.h> 9#include <linux/tracepoint.h>
10 10
11#define RWBS_LEN 8
12
11DECLARE_EVENT_CLASS(block_rq_with_error, 13DECLARE_EVENT_CLASS(block_rq_with_error,
12 14
13 TP_PROTO(struct request_queue *q, struct request *rq), 15 TP_PROTO(struct request_queue *q, struct request *rq),
@@ -19,7 +21,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
19 __field( sector_t, sector ) 21 __field( sector_t, sector )
20 __field( unsigned int, nr_sector ) 22 __field( unsigned int, nr_sector )
21 __field( int, errors ) 23 __field( int, errors )
22 __array( char, rwbs, 6 ) 24 __array( char, rwbs, RWBS_LEN )
23 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) 25 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
24 ), 26 ),
25 27
@@ -104,7 +106,7 @@ DECLARE_EVENT_CLASS(block_rq,
104 __field( sector_t, sector ) 106 __field( sector_t, sector )
105 __field( unsigned int, nr_sector ) 107 __field( unsigned int, nr_sector )
106 __field( unsigned int, bytes ) 108 __field( unsigned int, bytes )
107 __array( char, rwbs, 6 ) 109 __array( char, rwbs, RWBS_LEN )
108 __array( char, comm, TASK_COMM_LEN ) 110 __array( char, comm, TASK_COMM_LEN )
109 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) 111 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
110 ), 112 ),
@@ -183,7 +185,7 @@ TRACE_EVENT(block_bio_bounce,
183 __field( dev_t, dev ) 185 __field( dev_t, dev )
184 __field( sector_t, sector ) 186 __field( sector_t, sector )
185 __field( unsigned int, nr_sector ) 187 __field( unsigned int, nr_sector )
186 __array( char, rwbs, 6 ) 188 __array( char, rwbs, RWBS_LEN )
187 __array( char, comm, TASK_COMM_LEN ) 189 __array( char, comm, TASK_COMM_LEN )
188 ), 190 ),
189 191
@@ -222,7 +224,7 @@ TRACE_EVENT(block_bio_complete,
222 __field( sector_t, sector ) 224 __field( sector_t, sector )
223 __field( unsigned, nr_sector ) 225 __field( unsigned, nr_sector )
224 __field( int, error ) 226 __field( int, error )
225 __array( char, rwbs, 6 ) 227 __array( char, rwbs, RWBS_LEN)
226 ), 228 ),
227 229
228 TP_fast_assign( 230 TP_fast_assign(
@@ -249,7 +251,7 @@ DECLARE_EVENT_CLASS(block_bio,
249 __field( dev_t, dev ) 251 __field( dev_t, dev )
250 __field( sector_t, sector ) 252 __field( sector_t, sector )
251 __field( unsigned int, nr_sector ) 253 __field( unsigned int, nr_sector )
252 __array( char, rwbs, 6 ) 254 __array( char, rwbs, RWBS_LEN )
253 __array( char, comm, TASK_COMM_LEN ) 255 __array( char, comm, TASK_COMM_LEN )
254 ), 256 ),
255 257
@@ -321,7 +323,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
321 __field( dev_t, dev ) 323 __field( dev_t, dev )
322 __field( sector_t, sector ) 324 __field( sector_t, sector )
323 __field( unsigned int, nr_sector ) 325 __field( unsigned int, nr_sector )
324 __array( char, rwbs, 6 ) 326 __array( char, rwbs, RWBS_LEN )
325 __array( char, comm, TASK_COMM_LEN ) 327 __array( char, comm, TASK_COMM_LEN )
326 ), 328 ),
327 329
@@ -456,7 +458,7 @@ TRACE_EVENT(block_split,
456 __field( dev_t, dev ) 458 __field( dev_t, dev )
457 __field( sector_t, sector ) 459 __field( sector_t, sector )
458 __field( sector_t, new_sector ) 460 __field( sector_t, new_sector )
459 __array( char, rwbs, 6 ) 461 __array( char, rwbs, RWBS_LEN )
460 __array( char, comm, TASK_COMM_LEN ) 462 __array( char, comm, TASK_COMM_LEN )
461 ), 463 ),
462 464
@@ -498,7 +500,7 @@ TRACE_EVENT(block_bio_remap,
498 __field( unsigned int, nr_sector ) 500 __field( unsigned int, nr_sector )
499 __field( dev_t, old_dev ) 501 __field( dev_t, old_dev )
500 __field( sector_t, old_sector ) 502 __field( sector_t, old_sector )
501 __array( char, rwbs, 6 ) 503 __array( char, rwbs, RWBS_LEN)
502 ), 504 ),
503 505
504 TP_fast_assign( 506 TP_fast_assign(
@@ -542,7 +544,7 @@ TRACE_EVENT(block_rq_remap,
542 __field( unsigned int, nr_sector ) 544 __field( unsigned int, nr_sector )
543 __field( dev_t, old_dev ) 545 __field( dev_t, old_dev )
544 __field( sector_t, old_sector ) 546 __field( sector_t, old_sector )
545 __array( char, rwbs, 6 ) 547 __array( char, rwbs, RWBS_LEN)
546 ), 548 ),
547 549
548 TP_fast_assign( 550 TP_fast_assign(
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 3a2cab407b9..e38544dddb1 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -246,7 +246,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
246 gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask); 246 gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask);
247 247
248 for (i = gc->irq_base; msk; msk >>= 1, i++) { 248 for (i = gc->irq_base; msk; msk >>= 1, i++) {
249 if (!msk & 0x01) 249 if (!(msk & 0x01))
250 continue; 250 continue;
251 251
252 if (flags & IRQ_GC_INIT_NESTED_LOCK) 252 if (flags & IRQ_GC_INIT_NESTED_LOCK)
@@ -301,7 +301,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
301 raw_spin_unlock(&gc_lock); 301 raw_spin_unlock(&gc_lock);
302 302
303 for (; msk; msk >>= 1, i++) { 303 for (; msk; msk >>= 1, i++) {
304 if (!msk & 0x01) 304 if (!(msk & 0x01))
305 continue; 305 continue;
306 306
307 /* Remove handler first. That will mask the irq line */ 307 /* Remove handler first. That will mask the irq line */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 4c60a50e66b..039b889ea05 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -70,7 +70,8 @@ static inline void desc_smp_init(struct irq_desc *desc, int node) { }
70static inline int desc_node(struct irq_desc *desc) { return 0; } 70static inline int desc_node(struct irq_desc *desc) { return 0; }
71#endif 71#endif
72 72
73static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) 73static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
74 struct module *owner)
74{ 75{
75 int cpu; 76 int cpu;
76 77
@@ -86,6 +87,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
86 desc->irq_count = 0; 87 desc->irq_count = 0;
87 desc->irqs_unhandled = 0; 88 desc->irqs_unhandled = 0;
88 desc->name = NULL; 89 desc->name = NULL;
90 desc->owner = owner;
89 for_each_possible_cpu(cpu) 91 for_each_possible_cpu(cpu)
90 *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; 92 *per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
91 desc_smp_init(desc, node); 93 desc_smp_init(desc, node);
@@ -128,7 +130,7 @@ static void free_masks(struct irq_desc *desc)
128static inline void free_masks(struct irq_desc *desc) { } 130static inline void free_masks(struct irq_desc *desc) { }
129#endif 131#endif
130 132
131static struct irq_desc *alloc_desc(int irq, int node) 133static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
132{ 134{
133 struct irq_desc *desc; 135 struct irq_desc *desc;
134 gfp_t gfp = GFP_KERNEL; 136 gfp_t gfp = GFP_KERNEL;
@@ -147,7 +149,7 @@ static struct irq_desc *alloc_desc(int irq, int node)
147 raw_spin_lock_init(&desc->lock); 149 raw_spin_lock_init(&desc->lock);
148 lockdep_set_class(&desc->lock, &irq_desc_lock_class); 150 lockdep_set_class(&desc->lock, &irq_desc_lock_class);
149 151
150 desc_set_defaults(irq, desc, node); 152 desc_set_defaults(irq, desc, node, owner);
151 153
152 return desc; 154 return desc;
153 155
@@ -173,13 +175,14 @@ static void free_desc(unsigned int irq)
173 kfree(desc); 175 kfree(desc);
174} 176}
175 177
176static int alloc_descs(unsigned int start, unsigned int cnt, int node) 178static int alloc_descs(unsigned int start, unsigned int cnt, int node,
179 struct module *owner)
177{ 180{
178 struct irq_desc *desc; 181 struct irq_desc *desc;
179 int i; 182 int i;
180 183
181 for (i = 0; i < cnt; i++) { 184 for (i = 0; i < cnt; i++) {
182 desc = alloc_desc(start + i, node); 185 desc = alloc_desc(start + i, node, owner);
183 if (!desc) 186 if (!desc)
184 goto err; 187 goto err;
185 mutex_lock(&sparse_irq_lock); 188 mutex_lock(&sparse_irq_lock);
@@ -227,7 +230,7 @@ int __init early_irq_init(void)
227 nr_irqs = initcnt; 230 nr_irqs = initcnt;
228 231
229 for (i = 0; i < initcnt; i++) { 232 for (i = 0; i < initcnt; i++) {
230 desc = alloc_desc(i, node); 233 desc = alloc_desc(i, node, NULL);
231 set_bit(i, allocated_irqs); 234 set_bit(i, allocated_irqs);
232 irq_insert_desc(i, desc); 235 irq_insert_desc(i, desc);
233 } 236 }
@@ -261,7 +264,7 @@ int __init early_irq_init(void)
261 alloc_masks(&desc[i], GFP_KERNEL, node); 264 alloc_masks(&desc[i], GFP_KERNEL, node);
262 raw_spin_lock_init(&desc[i].lock); 265 raw_spin_lock_init(&desc[i].lock);
263 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); 266 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
264 desc_set_defaults(i, &desc[i], node); 267 desc_set_defaults(i, &desc[i], node, NULL);
265 } 268 }
266 return arch_early_irq_init(); 269 return arch_early_irq_init();
267} 270}
@@ -276,8 +279,16 @@ static void free_desc(unsigned int irq)
276 dynamic_irq_cleanup(irq); 279 dynamic_irq_cleanup(irq);
277} 280}
278 281
279static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) 282static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
283 struct module *owner)
280{ 284{
285 u32 i;
286
287 for (i = 0; i < cnt; i++) {
288 struct irq_desc *desc = irq_to_desc(start + i);
289
290 desc->owner = owner;
291 }
281 return start; 292 return start;
282} 293}
283 294
@@ -333,11 +344,13 @@ EXPORT_SYMBOL_GPL(irq_free_descs);
333 * @from: Start the search from this irq number 344 * @from: Start the search from this irq number
334 * @cnt: Number of consecutive irqs to allocate. 345 * @cnt: Number of consecutive irqs to allocate.
335 * @node: Preferred node on which the irq descriptor should be allocated 346 * @node: Preferred node on which the irq descriptor should be allocated
347 * @owner: Owning module (can be NULL)
336 * 348 *
337 * Returns the first irq number or error code 349 * Returns the first irq number or error code
338 */ 350 */
339int __ref 351int __ref
340irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) 352__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
353 struct module *owner)
341{ 354{
342 int start, ret; 355 int start, ret;
343 356
@@ -366,13 +379,13 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
366 379
367 bitmap_set(allocated_irqs, start, cnt); 380 bitmap_set(allocated_irqs, start, cnt);
368 mutex_unlock(&sparse_irq_lock); 381 mutex_unlock(&sparse_irq_lock);
369 return alloc_descs(start, cnt, node); 382 return alloc_descs(start, cnt, node, owner);
370 383
371err: 384err:
372 mutex_unlock(&sparse_irq_lock); 385 mutex_unlock(&sparse_irq_lock);
373 return ret; 386 return ret;
374} 387}
375EXPORT_SYMBOL_GPL(irq_alloc_descs); 388EXPORT_SYMBOL_GPL(__irq_alloc_descs);
376 389
377/** 390/**
378 * irq_reserve_irqs - mark irqs allocated 391 * irq_reserve_irqs - mark irqs allocated
@@ -440,7 +453,7 @@ void dynamic_irq_cleanup(unsigned int irq)
440 unsigned long flags; 453 unsigned long flags;
441 454
442 raw_spin_lock_irqsave(&desc->lock, flags); 455 raw_spin_lock_irqsave(&desc->lock, flags);
443 desc_set_defaults(irq, desc, desc_node(desc)); 456 desc_set_defaults(irq, desc, desc_node(desc), NULL);
444 raw_spin_unlock_irqrestore(&desc->lock, flags); 457 raw_spin_unlock_irqrestore(&desc->lock, flags);
445} 458}
446 459
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0a7840aeb0f..2e9425889fa 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -883,6 +883,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
883 883
884 if (desc->irq_data.chip == &no_irq_chip) 884 if (desc->irq_data.chip == &no_irq_chip)
885 return -ENOSYS; 885 return -ENOSYS;
886 if (!try_module_get(desc->owner))
887 return -ENODEV;
886 /* 888 /*
887 * Some drivers like serial.c use request_irq() heavily, 889 * Some drivers like serial.c use request_irq() heavily,
888 * so we have to be careful not to interfere with a 890 * so we have to be careful not to interfere with a
@@ -906,8 +908,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
906 */ 908 */
907 nested = irq_settings_is_nested_thread(desc); 909 nested = irq_settings_is_nested_thread(desc);
908 if (nested) { 910 if (nested) {
909 if (!new->thread_fn) 911 if (!new->thread_fn) {
910 return -EINVAL; 912 ret = -EINVAL;
913 goto out_mput;
914 }
911 /* 915 /*
912 * Replace the primary handler which was provided from 916 * Replace the primary handler which was provided from
913 * the driver for non nested interrupt handling by the 917 * the driver for non nested interrupt handling by the
@@ -929,8 +933,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
929 933
930 t = kthread_create(irq_thread, new, "irq/%d-%s", irq, 934 t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
931 new->name); 935 new->name);
932 if (IS_ERR(t)) 936 if (IS_ERR(t)) {
933 return PTR_ERR(t); 937 ret = PTR_ERR(t);
938 goto out_mput;
939 }
934 /* 940 /*
935 * We keep the reference to the task struct even if 941 * We keep the reference to the task struct even if
936 * the thread dies to avoid that the interrupt code 942 * the thread dies to avoid that the interrupt code
@@ -1095,6 +1101,8 @@ out_thread:
1095 kthread_stop(t); 1101 kthread_stop(t);
1096 put_task_struct(t); 1102 put_task_struct(t);
1097 } 1103 }
1104out_mput:
1105 module_put(desc->owner);
1098 return ret; 1106 return ret;
1099} 1107}
1100 1108
@@ -1203,6 +1211,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
1203 put_task_struct(action->thread); 1211 put_task_struct(action->thread);
1204 } 1212 }
1205 1213
1214 module_put(desc->owner);
1206 return action; 1215 return action;
1207} 1216}
1208 1217
@@ -1322,6 +1331,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
1322 if (!thread_fn) 1331 if (!thread_fn)
1323 return -EINVAL; 1332 return -EINVAL;
1324 handler = irq_default_primary_handler; 1333 handler = irq_default_primary_handler;
1334 irqflags |= IRQF_ONESHOT;
1325 } 1335 }
1326 1336
1327 action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); 1337 action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 8c24294e477..91d67ce3a8d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3111,7 +3111,13 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
3111 if (!class) 3111 if (!class)
3112 class = look_up_lock_class(lock, 0); 3112 class = look_up_lock_class(lock, 0);
3113 3113
3114 if (DEBUG_LOCKS_WARN_ON(!class)) 3114 /*
3115 * If look_up_lock_class() failed to find a class, we're trying
3116 * to test if we hold a lock that has never yet been acquired.
3117 * Clearly if the lock hasn't been acquired _ever_, we're not
3118 * holding it either, so report failure.
3119 */
3120 if (!class)
3115 return 0; 3121 return 0;
3116 3122
3117 if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) 3123 if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index b1914cb9095..3744c594b19 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -231,3 +231,7 @@ config PM_CLK
231config PM_GENERIC_DOMAINS 231config PM_GENERIC_DOMAINS
232 bool 232 bool
233 depends on PM 233 depends on PM
234
235config PM_GENERIC_DOMAINS_RUNTIME
236 def_bool y
237 depends on PM_RUNTIME && PM_GENERIC_DOMAINS
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 6957aa298df..7c910a5593a 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
206 what |= MASK_TC_BIT(rw, RAHEAD); 206 what |= MASK_TC_BIT(rw, RAHEAD);
207 what |= MASK_TC_BIT(rw, META); 207 what |= MASK_TC_BIT(rw, META);
208 what |= MASK_TC_BIT(rw, DISCARD); 208 what |= MASK_TC_BIT(rw, DISCARD);
209 what |= MASK_TC_BIT(rw, FLUSH);
210 what |= MASK_TC_BIT(rw, FUA);
209 211
210 pid = tsk->pid; 212 pid = tsk->pid;
211 if (act_log_check(bt, what, sector, pid)) 213 if (act_log_check(bt, what, sector, pid))
@@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
1054 goto out; 1056 goto out;
1055 } 1057 }
1056 1058
1059 if (tc & BLK_TC_FLUSH)
1060 rwbs[i++] = 'F';
1061
1057 if (tc & BLK_TC_DISCARD) 1062 if (tc & BLK_TC_DISCARD)
1058 rwbs[i++] = 'D'; 1063 rwbs[i++] = 'D';
1059 else if (tc & BLK_TC_WRITE) 1064 else if (tc & BLK_TC_WRITE)
@@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
1063 else 1068 else
1064 rwbs[i++] = 'N'; 1069 rwbs[i++] = 'N';
1065 1070
1071 if (tc & BLK_TC_FUA)
1072 rwbs[i++] = 'F';
1066 if (tc & BLK_TC_AHEAD) 1073 if (tc & BLK_TC_AHEAD)
1067 rwbs[i++] = 'A'; 1074 rwbs[i++] = 'A';
1068 if (tc & BLK_TC_BARRIER)
1069 rwbs[i++] = 'B';
1070 if (tc & BLK_TC_SYNC) 1075 if (tc & BLK_TC_SYNC)
1071 rwbs[i++] = 'S'; 1076 rwbs[i++] = 'S';
1072 if (tc & BLK_TC_META) 1077 if (tc & BLK_TC_META)
@@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
1132 1137
1133static int blk_log_action_classic(struct trace_iterator *iter, const char *act) 1138static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
1134{ 1139{
1135 char rwbs[6]; 1140 char rwbs[RWBS_LEN];
1136 unsigned long long ts = iter->ts; 1141 unsigned long long ts = iter->ts;
1137 unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); 1142 unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
1138 unsigned secs = (unsigned long)ts; 1143 unsigned secs = (unsigned long)ts;
@@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
1148 1153
1149static int blk_log_action(struct trace_iterator *iter, const char *act) 1154static int blk_log_action(struct trace_iterator *iter, const char *act)
1150{ 1155{
1151 char rwbs[6]; 1156 char rwbs[RWBS_LEN];
1152 const struct blk_io_trace *t = te_blk_io_trace(iter->ent); 1157 const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
1153 1158
1154 fill_rwbs(rwbs, t); 1159 fill_rwbs(rwbs, t);
@@ -1561,7 +1566,7 @@ static const struct {
1561} mask_maps[] = { 1566} mask_maps[] = {
1562 { BLK_TC_READ, "read" }, 1567 { BLK_TC_READ, "read" },
1563 { BLK_TC_WRITE, "write" }, 1568 { BLK_TC_WRITE, "write" },
1564 { BLK_TC_BARRIER, "barrier" }, 1569 { BLK_TC_FLUSH, "flush" },
1565 { BLK_TC_SYNC, "sync" }, 1570 { BLK_TC_SYNC, "sync" },
1566 { BLK_TC_QUEUE, "queue" }, 1571 { BLK_TC_QUEUE, "queue" },
1567 { BLK_TC_REQUEUE, "requeue" }, 1572 { BLK_TC_REQUEUE, "requeue" },
@@ -1573,6 +1578,7 @@ static const struct {
1573 { BLK_TC_META, "meta" }, 1578 { BLK_TC_META, "meta" },
1574 { BLK_TC_DISCARD, "discard" }, 1579 { BLK_TC_DISCARD, "discard" },
1575 { BLK_TC_DRV_DATA, "drv_data" }, 1580 { BLK_TC_DRV_DATA, "drv_data" },
1581 { BLK_TC_FUA, "fua" },
1576}; 1582};
1577 1583
1578static int blk_trace_str2mask(const char *str) 1584static int blk_trace_str2mask(const char *str)
@@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1788{ 1794{
1789 int i = 0; 1795 int i = 0;
1790 1796
1797 if (rw & REQ_FLUSH)
1798 rwbs[i++] = 'F';
1799
1791 if (rw & WRITE) 1800 if (rw & WRITE)
1792 rwbs[i++] = 'W'; 1801 rwbs[i++] = 'W';
1793 else if (rw & REQ_DISCARD) 1802 else if (rw & REQ_DISCARD)
@@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1797 else 1806 else
1798 rwbs[i++] = 'N'; 1807 rwbs[i++] = 'N';
1799 1808
1809 if (rw & REQ_FUA)
1810 rwbs[i++] = 'F';
1800 if (rw & REQ_RAHEAD) 1811 if (rw & REQ_RAHEAD)
1801 rwbs[i++] = 'A'; 1812 rwbs[i++] = 'A';
1802 if (rw & REQ_SYNC) 1813 if (rw & REQ_SYNC)
diff --git a/mm/highmem.c b/mm/highmem.c
index 693394daa2e..5ef672c07f7 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -326,7 +326,7 @@ static struct page_address_slot {
326 spinlock_t lock; /* Protect this bucket's list */ 326 spinlock_t lock; /* Protect this bucket's list */
327} ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER]; 327} ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER];
328 328
329static struct page_address_slot *page_slot(struct page *page) 329static struct page_address_slot *page_slot(const struct page *page)
330{ 330{
331 return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)]; 331 return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
332} 332}
@@ -337,7 +337,7 @@ static struct page_address_slot *page_slot(struct page *page)
337 * 337 *
338 * Returns the page's virtual address. 338 * Returns the page's virtual address.
339 */ 339 */
340void *page_address(struct page *page) 340void *page_address(const struct page *page)
341{ 341{
342 unsigned long flags; 342 unsigned long flags;
343 void *ret; 343 void *ret;
diff --git a/sound/aoa/fabrics/layout.c b/sound/aoa/fabrics/layout.c
index 3fd1a7e2492..552b97afbca 100644
--- a/sound/aoa/fabrics/layout.c
+++ b/sound/aoa/fabrics/layout.c
@@ -1073,10 +1073,10 @@ static int aoa_fabric_layout_probe(struct soundbus_dev *sdev)
1073 sdev->pcmid = -1; 1073 sdev->pcmid = -1;
1074 list_del(&ldev->list); 1074 list_del(&ldev->list);
1075 layouts_list_items--; 1075 layouts_list_items--;
1076 kfree(ldev);
1076 outnodev: 1077 outnodev:
1077 of_node_put(sound); 1078 of_node_put(sound);
1078 layout_device = NULL; 1079 layout_device = NULL;
1079 kfree(ldev);
1080 return -ENODEV; 1080 return -ENODEV;
1081} 1081}
1082 1082
diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c
index 200c9a1d48b..a872d0a8297 100644
--- a/sound/pci/ac97/ac97_patch.c
+++ b/sound/pci/ac97/ac97_patch.c
@@ -1909,6 +1909,7 @@ static unsigned int ad1981_jacks_whitelist[] = {
1909 0x103c0944, /* HP nc6220 */ 1909 0x103c0944, /* HP nc6220 */
1910 0x103c0934, /* HP nc8220 */ 1910 0x103c0934, /* HP nc8220 */
1911 0x103c006d, /* HP nx9105 */ 1911 0x103c006d, /* HP nx9105 */
1912 0x103c300d, /* HP Compaq dc5100 SFF(PT003AW) */
1912 0x17340088, /* FSC Scenic-W */ 1913 0x17340088, /* FSC Scenic-W */
1913 0 /* end */ 1914 0 /* end */
1914}; 1915};
diff --git a/sound/pci/hda/alc268_quirks.c b/sound/pci/hda/alc268_quirks.c
index be58bf2f3ae..2e5876ce71f 100644
--- a/sound/pci/hda/alc268_quirks.c
+++ b/sound/pci/hda/alc268_quirks.c
@@ -476,8 +476,8 @@ static const struct snd_pci_quirk alc268_ssid_cfg_tbl[] = {
476 476
477static const struct alc_config_preset alc268_presets[] = { 477static const struct alc_config_preset alc268_presets[] = {
478 [ALC267_QUANTA_IL1] = { 478 [ALC267_QUANTA_IL1] = {
479 .mixers = { alc267_quanta_il1_mixer, alc268_beep_mixer, 479 .mixers = { alc267_quanta_il1_mixer, alc268_beep_mixer },
480 alc268_capture_nosrc_mixer }, 480 .cap_mixer = alc268_capture_nosrc_mixer,
481 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, 481 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
482 alc267_quanta_il1_verbs }, 482 alc267_quanta_il1_verbs },
483 .num_dacs = ARRAY_SIZE(alc268_dac_nids), 483 .num_dacs = ARRAY_SIZE(alc268_dac_nids),
@@ -492,8 +492,8 @@ static const struct alc_config_preset alc268_presets[] = {
492 .init_hook = alc_inithook, 492 .init_hook = alc_inithook,
493 }, 493 },
494 [ALC268_3ST] = { 494 [ALC268_3ST] = {
495 .mixers = { alc268_base_mixer, alc268_capture_alt_mixer, 495 .mixers = { alc268_base_mixer, alc268_beep_mixer },
496 alc268_beep_mixer }, 496 .cap_mixer = alc268_capture_alt_mixer,
497 .init_verbs = { alc268_base_init_verbs }, 497 .init_verbs = { alc268_base_init_verbs },
498 .num_dacs = ARRAY_SIZE(alc268_dac_nids), 498 .num_dacs = ARRAY_SIZE(alc268_dac_nids),
499 .dac_nids = alc268_dac_nids, 499 .dac_nids = alc268_dac_nids,
@@ -507,8 +507,8 @@ static const struct alc_config_preset alc268_presets[] = {
507 .input_mux = &alc268_capture_source, 507 .input_mux = &alc268_capture_source,
508 }, 508 },
509 [ALC268_TOSHIBA] = { 509 [ALC268_TOSHIBA] = {
510 .mixers = { alc268_toshiba_mixer, alc268_capture_alt_mixer, 510 .mixers = { alc268_toshiba_mixer, alc268_beep_mixer },
511 alc268_beep_mixer }, 511 .cap_mixer = alc268_capture_alt_mixer,
512 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, 512 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
513 alc268_toshiba_verbs }, 513 alc268_toshiba_verbs },
514 .num_dacs = ARRAY_SIZE(alc268_dac_nids), 514 .num_dacs = ARRAY_SIZE(alc268_dac_nids),
@@ -525,8 +525,8 @@ static const struct alc_config_preset alc268_presets[] = {
525 .init_hook = alc_inithook, 525 .init_hook = alc_inithook,
526 }, 526 },
527 [ALC268_ACER] = { 527 [ALC268_ACER] = {
528 .mixers = { alc268_acer_mixer, alc268_capture_alt_mixer, 528 .mixers = { alc268_acer_mixer, alc268_beep_mixer },
529 alc268_beep_mixer }, 529 .cap_mixer = alc268_capture_alt_mixer,
530 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, 530 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
531 alc268_acer_verbs }, 531 alc268_acer_verbs },
532 .num_dacs = ARRAY_SIZE(alc268_dac_nids), 532 .num_dacs = ARRAY_SIZE(alc268_dac_nids),
@@ -543,8 +543,8 @@ static const struct alc_config_preset alc268_presets[] = {
543 .init_hook = alc_inithook, 543 .init_hook = alc_inithook,
544 }, 544 },
545 [ALC268_ACER_DMIC] = { 545 [ALC268_ACER_DMIC] = {
546 .mixers = { alc268_acer_dmic_mixer, alc268_capture_alt_mixer, 546 .mixers = { alc268_acer_dmic_mixer, alc268_beep_mixer },
547 alc268_beep_mixer }, 547 .cap_mixer = alc268_capture_alt_mixer,
548 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, 548 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
549 alc268_acer_verbs }, 549 alc268_acer_verbs },
550 .num_dacs = ARRAY_SIZE(alc268_dac_nids), 550 .num_dacs = ARRAY_SIZE(alc268_dac_nids),
@@ -561,9 +561,8 @@ static const struct alc_config_preset alc268_presets[] = {
561 .init_hook = alc_inithook, 561 .init_hook = alc_inithook,
562 }, 562 },
563 [ALC268_ACER_ASPIRE_ONE] = { 563 [ALC268_ACER_ASPIRE_ONE] = {
564 .mixers = { alc268_acer_aspire_one_mixer, 564 .mixers = { alc268_acer_aspire_one_mixer, alc268_beep_mixer},
565 alc268_beep_mixer, 565 .cap_mixer = alc268_capture_nosrc_mixer,
566 alc268_capture_nosrc_mixer },
567 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, 566 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
568 alc268_acer_aspire_one_verbs }, 567 alc268_acer_aspire_one_verbs },
569 .num_dacs = ARRAY_SIZE(alc268_dac_nids), 568 .num_dacs = ARRAY_SIZE(alc268_dac_nids),
@@ -579,8 +578,8 @@ static const struct alc_config_preset alc268_presets[] = {
579 .init_hook = alc_inithook, 578 .init_hook = alc_inithook,
580 }, 579 },
581 [ALC268_DELL] = { 580 [ALC268_DELL] = {
582 .mixers = { alc268_dell_mixer, alc268_beep_mixer, 581 .mixers = { alc268_dell_mixer, alc268_beep_mixer},
583 alc268_capture_nosrc_mixer }, 582 .cap_mixer = alc268_capture_nosrc_mixer,
584 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, 583 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
585 alc268_dell_verbs }, 584 alc268_dell_verbs },
586 .num_dacs = ARRAY_SIZE(alc268_dac_nids), 585 .num_dacs = ARRAY_SIZE(alc268_dac_nids),
@@ -596,8 +595,8 @@ static const struct alc_config_preset alc268_presets[] = {
596 .init_hook = alc_inithook, 595 .init_hook = alc_inithook,
597 }, 596 },
598 [ALC268_ZEPTO] = { 597 [ALC268_ZEPTO] = {
599 .mixers = { alc268_base_mixer, alc268_capture_alt_mixer, 598 .mixers = { alc268_base_mixer, alc268_beep_mixer },
600 alc268_beep_mixer }, 599 .cap_mixer = alc268_capture_alt_mixer,
601 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, 600 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
602 alc268_toshiba_verbs }, 601 alc268_toshiba_verbs },
603 .num_dacs = ARRAY_SIZE(alc268_dac_nids), 602 .num_dacs = ARRAY_SIZE(alc268_dac_nids),
@@ -616,7 +615,8 @@ static const struct alc_config_preset alc268_presets[] = {
616 }, 615 },
617#ifdef CONFIG_SND_DEBUG 616#ifdef CONFIG_SND_DEBUG
618 [ALC268_TEST] = { 617 [ALC268_TEST] = {
619 .mixers = { alc268_test_mixer, alc268_capture_mixer }, 618 .mixers = { alc268_test_mixer },
619 .cap_mixer = alc268_capture_mixer,
620 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, 620 .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
621 alc268_volume_init_verbs, 621 alc268_volume_init_verbs,
622 alc268_beep_init_verbs }, 622 alc268_beep_init_verbs },
diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c
index 28ce17d09c3..c34f730f481 100644
--- a/sound/pci/hda/hda_eld.c
+++ b/sound/pci/hda/hda_eld.c
@@ -144,25 +144,17 @@ static int cea_sampling_frequencies[8] = {
144 SNDRV_PCM_RATE_192000, /* 7: 192000Hz */ 144 SNDRV_PCM_RATE_192000, /* 7: 192000Hz */
145}; 145};
146 146
147static unsigned char hdmi_get_eld_byte(struct hda_codec *codec, hda_nid_t nid, 147static unsigned int hdmi_get_eld_data(struct hda_codec *codec, hda_nid_t nid,
148 int byte_index) 148 int byte_index)
149{ 149{
150 unsigned int val; 150 unsigned int val;
151 151
152 val = snd_hda_codec_read(codec, nid, 0, 152 val = snd_hda_codec_read(codec, nid, 0,
153 AC_VERB_GET_HDMI_ELDD, byte_index); 153 AC_VERB_GET_HDMI_ELDD, byte_index);
154
155#ifdef BE_PARANOID 154#ifdef BE_PARANOID
156 printk(KERN_INFO "HDMI: ELD data byte %d: 0x%x\n", byte_index, val); 155 printk(KERN_INFO "HDMI: ELD data byte %d: 0x%x\n", byte_index, val);
157#endif 156#endif
158 157 return val;
159 if ((val & AC_ELDD_ELD_VALID) == 0) {
160 snd_printd(KERN_INFO "HDMI: invalid ELD data byte %d\n",
161 byte_index);
162 val = 0;
163 }
164
165 return val & AC_ELDD_ELD_DATA;
166} 158}
167 159
168#define GRAB_BITS(buf, byte, lowbit, bits) \ 160#define GRAB_BITS(buf, byte, lowbit, bits) \
@@ -344,11 +336,26 @@ int snd_hdmi_get_eld(struct hdmi_eld *eld,
344 if (!buf) 336 if (!buf)
345 return -ENOMEM; 337 return -ENOMEM;
346 338
347 for (i = 0; i < size; i++) 339 for (i = 0; i < size; i++) {
348 buf[i] = hdmi_get_eld_byte(codec, nid, i); 340 unsigned int val = hdmi_get_eld_data(codec, nid, i);
341 if (!(val & AC_ELDD_ELD_VALID)) {
342 if (!i) {
343 snd_printd(KERN_INFO
344 "HDMI: invalid ELD data\n");
345 ret = -EINVAL;
346 goto error;
347 }
348 snd_printd(KERN_INFO
349 "HDMI: invalid ELD data byte %d\n", i);
350 val = 0;
351 } else
352 val &= AC_ELDD_ELD_DATA;
353 buf[i] = val;
354 }
349 355
350 ret = hdmi_update_eld(eld, buf, size); 356 ret = hdmi_update_eld(eld, buf, size);
351 357
358error:
352 kfree(buf); 359 kfree(buf);
353 return ret; 360 return ret;
354} 361}
diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index 47d6ffc9b5b..d6c93d92b55 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -375,7 +375,7 @@ static int is_ext_mic(struct hda_codec *codec, unsigned int idx)
375static hda_nid_t get_adc(struct hda_codec *codec, hda_nid_t pin, 375static hda_nid_t get_adc(struct hda_codec *codec, hda_nid_t pin,
376 unsigned int *idxp) 376 unsigned int *idxp)
377{ 377{
378 int i; 378 int i, idx;
379 hda_nid_t nid; 379 hda_nid_t nid;
380 380
381 nid = codec->start_nid; 381 nid = codec->start_nid;
@@ -384,9 +384,11 @@ static hda_nid_t get_adc(struct hda_codec *codec, hda_nid_t pin,
384 type = get_wcaps_type(get_wcaps(codec, nid)); 384 type = get_wcaps_type(get_wcaps(codec, nid));
385 if (type != AC_WID_AUD_IN) 385 if (type != AC_WID_AUD_IN)
386 continue; 386 continue;
387 *idxp = snd_hda_get_conn_index(codec, nid, pin, false); 387 idx = snd_hda_get_conn_index(codec, nid, pin, false);
388 if (*idxp >= 0) 388 if (idx >= 0) {
389 *idxp = idx;
389 return nid; 390 return nid;
391 }
390 } 392 }
391 return 0; 393 return 0;
392} 394}
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 9a1aa09f47f..fcb11af9ad2 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1784,6 +1784,7 @@ static const char * const alc_slave_vols[] = {
1784 "Speaker Playback Volume", 1784 "Speaker Playback Volume",
1785 "Mono Playback Volume", 1785 "Mono Playback Volume",
1786 "Line-Out Playback Volume", 1786 "Line-Out Playback Volume",
1787 "PCM Playback Volume",
1787 NULL, 1788 NULL,
1788}; 1789};
1789 1790
@@ -1798,6 +1799,7 @@ static const char * const alc_slave_sws[] = {
1798 "Mono Playback Switch", 1799 "Mono Playback Switch",
1799 "IEC958 Playback Switch", 1800 "IEC958 Playback Switch",
1800 "Line-Out Playback Switch", 1801 "Line-Out Playback Switch",
1802 "PCM Playback Switch",
1801 NULL, 1803 NULL,
1802}; 1804};
1803 1805
diff --git a/sound/usb/caiaq/audio.c b/sound/usb/caiaq/audio.c
index aa52b3e13bb..2cf87f5afed 100644
--- a/sound/usb/caiaq/audio.c
+++ b/sound/usb/caiaq/audio.c
@@ -139,8 +139,12 @@ static void stream_stop(struct snd_usb_caiaqdev *dev)
139 139
140 for (i = 0; i < N_URBS; i++) { 140 for (i = 0; i < N_URBS; i++) {
141 usb_kill_urb(dev->data_urbs_in[i]); 141 usb_kill_urb(dev->data_urbs_in[i]);
142 usb_kill_urb(dev->data_urbs_out[i]); 142
143 if (test_bit(i, &dev->outurb_active_mask))
144 usb_kill_urb(dev->data_urbs_out[i]);
143 } 145 }
146
147 dev->outurb_active_mask = 0;
144} 148}
145 149
146static int snd_usb_caiaq_substream_open(struct snd_pcm_substream *substream) 150static int snd_usb_caiaq_substream_open(struct snd_pcm_substream *substream)
@@ -612,8 +616,8 @@ static void read_completed(struct urb *urb)
612{ 616{
613 struct snd_usb_caiaq_cb_info *info = urb->context; 617 struct snd_usb_caiaq_cb_info *info = urb->context;
614 struct snd_usb_caiaqdev *dev; 618 struct snd_usb_caiaqdev *dev;
615 struct urb *out; 619 struct urb *out = NULL;
616 int frame, len, send_it = 0, outframe = 0; 620 int i, frame, len, send_it = 0, outframe = 0;
617 size_t offset = 0; 621 size_t offset = 0;
618 622
619 if (urb->status || !info) 623 if (urb->status || !info)
@@ -624,7 +628,17 @@ static void read_completed(struct urb *urb)
624 if (!dev->streaming) 628 if (!dev->streaming)
625 return; 629 return;
626 630
627 out = dev->data_urbs_out[info->index]; 631 /* find an unused output urb that is unused */
632 for (i = 0; i < N_URBS; i++)
633 if (test_and_set_bit(i, &dev->outurb_active_mask) == 0) {
634 out = dev->data_urbs_out[i];
635 break;
636 }
637
638 if (!out) {
639 log("Unable to find an output urb to use\n");
640 goto requeue;
641 }
628 642
629 /* read the recently received packet and send back one which has 643 /* read the recently received packet and send back one which has
630 * the same layout */ 644 * the same layout */
@@ -655,8 +669,12 @@ static void read_completed(struct urb *urb)
655 out->number_of_packets = outframe; 669 out->number_of_packets = outframe;
656 out->transfer_flags = URB_ISO_ASAP; 670 out->transfer_flags = URB_ISO_ASAP;
657 usb_submit_urb(out, GFP_ATOMIC); 671 usb_submit_urb(out, GFP_ATOMIC);
672 } else {
673 struct snd_usb_caiaq_cb_info *oinfo = out->context;
674 clear_bit(oinfo->index, &dev->outurb_active_mask);
658 } 675 }
659 676
677requeue:
660 /* re-submit inbound urb */ 678 /* re-submit inbound urb */
661 for (frame = 0; frame < FRAMES_PER_URB; frame++) { 679 for (frame = 0; frame < FRAMES_PER_URB; frame++) {
662 urb->iso_frame_desc[frame].offset = BYTES_PER_FRAME * frame; 680 urb->iso_frame_desc[frame].offset = BYTES_PER_FRAME * frame;
@@ -678,6 +696,8 @@ static void write_completed(struct urb *urb)
678 dev->output_running = 1; 696 dev->output_running = 1;
679 wake_up(&dev->prepare_wait_queue); 697 wake_up(&dev->prepare_wait_queue);
680 } 698 }
699
700 clear_bit(info->index, &dev->outurb_active_mask);
681} 701}
682 702
683static struct urb **alloc_urbs(struct snd_usb_caiaqdev *dev, int dir, int *ret) 703static struct urb **alloc_urbs(struct snd_usb_caiaqdev *dev, int dir, int *ret)
@@ -829,6 +849,9 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *dev)
829 if (!dev->data_cb_info) 849 if (!dev->data_cb_info)
830 return -ENOMEM; 850 return -ENOMEM;
831 851
852 dev->outurb_active_mask = 0;
853 BUILD_BUG_ON(N_URBS > (sizeof(dev->outurb_active_mask) * 8));
854
832 for (i = 0; i < N_URBS; i++) { 855 for (i = 0; i < N_URBS; i++) {
833 dev->data_cb_info[i].dev = dev; 856 dev->data_cb_info[i].dev = dev;
834 dev->data_cb_info[i].index = i; 857 dev->data_cb_info[i].index = i;
diff --git a/sound/usb/caiaq/device.h b/sound/usb/caiaq/device.h
index b2b310194ff..3f9c6339ae9 100644
--- a/sound/usb/caiaq/device.h
+++ b/sound/usb/caiaq/device.h
@@ -96,6 +96,7 @@ struct snd_usb_caiaqdev {
96 int input_panic, output_panic, warned; 96 int input_panic, output_panic, warned;
97 char *audio_in_buf, *audio_out_buf; 97 char *audio_in_buf, *audio_out_buf;
98 unsigned int samplerates, bpp; 98 unsigned int samplerates, bpp;
99 unsigned long outurb_active_mask;
99 100
100 struct snd_pcm_substream *sub_playback[MAX_STREAMS]; 101 struct snd_pcm_substream *sub_playback[MAX_STREAMS];
101 struct snd_pcm_substream *sub_capture[MAX_STREAMS]; 102 struct snd_pcm_substream *sub_capture[MAX_STREAMS];
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index c04d7c71ac8..cdd19d7fe50 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -152,6 +152,7 @@ static inline void check_mapped_dB(const struct usbmix_name_map *p,
152 if (p && p->dB) { 152 if (p && p->dB) {
153 cval->dBmin = p->dB->min; 153 cval->dBmin = p->dB->min;
154 cval->dBmax = p->dB->max; 154 cval->dBmax = p->dB->max;
155 cval->initialized = 1;
155 } 156 }
156} 157}
157 158
@@ -1092,7 +1093,7 @@ static void build_feature_ctl(struct mixer_build *state, void *raw_desc,
1092 " Switch" : " Volume"); 1093 " Switch" : " Volume");
1093 if (control == UAC_FU_VOLUME) { 1094 if (control == UAC_FU_VOLUME) {
1094 check_mapped_dB(map, cval); 1095 check_mapped_dB(map, cval);
1095 if (cval->dBmin < cval->dBmax) { 1096 if (cval->dBmin < cval->dBmax || !cval->initialized) {
1096 kctl->tlv.c = mixer_vol_tlv; 1097 kctl->tlv.c = mixer_vol_tlv;
1097 kctl->vd[0].access |= 1098 kctl->vd[0].access |=
1098 SNDRV_CTL_ELEM_ACCESS_TLV_READ | 1099 SNDRV_CTL_ELEM_ACCESS_TLV_READ |
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 5f2a5c7046d..710ae3d0a48 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -134,10 +134,18 @@ static int opt_show_lines(const struct option *opt __used,
134{ 134{
135 int ret = 0; 135 int ret = 0;
136 136
137 if (str) 137 if (!str)
138 ret = parse_line_range_desc(str, &params.line_range); 138 return 0;
139 INIT_LIST_HEAD(&params.line_range.line_list); 139
140 if (params.show_lines) {
141 pr_warning("Warning: more than one --line options are"
142 " detected. Only the first one is valid.\n");
143 return 0;
144 }
145
140 params.show_lines = true; 146 params.show_lines = true;
147 ret = parse_line_range_desc(str, &params.line_range);
148 INIT_LIST_HEAD(&params.line_range.line_list);
141 149
142 return ret; 150 return ret;
143} 151}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f6426b496f4..6b0519f885e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -45,7 +45,7 @@ static int freq = 1000;
45static int output; 45static int output;
46static int pipe_output = 0; 46static int pipe_output = 0;
47static const char *output_name = NULL; 47static const char *output_name = NULL;
48static int group = 0; 48static bool group = false;
49static int realtime_prio = 0; 49static int realtime_prio = 0;
50static bool nodelay = false; 50static bool nodelay = false;
51static bool raw_samples = false; 51static bool raw_samples = false;
@@ -753,6 +753,8 @@ const struct option record_options[] = {
753 "child tasks do not inherit counters"), 753 "child tasks do not inherit counters"),
754 OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"), 754 OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
755 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), 755 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
756 OPT_BOOLEAN(0, "group", &group,
757 "put the counters into a counter group"),
756 OPT_BOOLEAN('g', "call-graph", &call_graph, 758 OPT_BOOLEAN('g', "call-graph", &call_graph,
757 "do call-graph (stack chain/backtrace) recording"), 759 "do call-graph (stack chain/backtrace) recording"),
758 OPT_INCR('v', "verbose", &verbose, 760 OPT_INCR('v', "verbose", &verbose,
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 1ad04ce29c3..5deb17d9e79 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -193,6 +193,7 @@ static int big_num_opt = -1;
193static const char *cpu_list; 193static const char *cpu_list;
194static const char *csv_sep = NULL; 194static const char *csv_sep = NULL;
195static bool csv_output = false; 195static bool csv_output = false;
196static bool group = false;
196 197
197static volatile int done = 0; 198static volatile int done = 0;
198 199
@@ -280,14 +281,14 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
280 attr->inherit = !no_inherit; 281 attr->inherit = !no_inherit;
281 282
282 if (system_wide) 283 if (system_wide)
283 return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false); 284 return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, group);
284 285
285 if (target_pid == -1 && target_tid == -1) { 286 if (target_pid == -1 && target_tid == -1) {
286 attr->disabled = 1; 287 attr->disabled = 1;
287 attr->enable_on_exec = 1; 288 attr->enable_on_exec = 1;
288 } 289 }
289 290
290 return perf_evsel__open_per_thread(evsel, evsel_list->threads, false); 291 return perf_evsel__open_per_thread(evsel, evsel_list->threads, group);
291} 292}
292 293
293/* 294/*
@@ -1043,6 +1044,8 @@ static const struct option options[] = {
1043 "stat events on existing thread id"), 1044 "stat events on existing thread id"),
1044 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1045 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1045 "system-wide collection from all CPUs"), 1046 "system-wide collection from all CPUs"),
1047 OPT_BOOLEAN('g', "group", &group,
1048 "put the counters into a counter group"),
1046 OPT_BOOLEAN('c', "scale", &scale, 1049 OPT_BOOLEAN('c', "scale", &scale,
1047 "scale/normalize counters"), 1050 "scale/normalize counters"),
1048 OPT_INCR('v', "verbose", &verbose, 1051 OPT_INCR('v', "verbose", &verbose,
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index fddf40f30d3..ee51e9b4dc0 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -96,6 +96,39 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, unsigned long addr,
96 return *lineno ?: -ENOENT; 96 return *lineno ?: -ENOENT;
97} 97}
98 98
99static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data);
100
101/**
102 * cu_walk_functions_at - Walk on function DIEs at given address
103 * @cu_die: A CU DIE
104 * @addr: An address
105 * @callback: A callback which called with found DIEs
106 * @data: A user data
107 *
108 * Walk on function DIEs at given @addr in @cu_die. Passed DIEs
109 * should be subprogram or inlined-subroutines.
110 */
111int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
112 int (*callback)(Dwarf_Die *, void *), void *data)
113{
114 Dwarf_Die die_mem;
115 Dwarf_Die *sc_die;
116 int ret = -ENOENT;
117
118 /* Inlined function could be recursive. Trace it until fail */
119 for (sc_die = die_find_realfunc(cu_die, addr, &die_mem);
120 sc_die != NULL;
121 sc_die = die_find_child(sc_die, __die_find_inline_cb, &addr,
122 &die_mem)) {
123 ret = callback(sc_die, data);
124 if (ret)
125 break;
126 }
127
128 return ret;
129
130}
131
99/** 132/**
100 * die_compare_name - Compare diename and tname 133 * die_compare_name - Compare diename and tname
101 * @dw_die: a DIE 134 * @dw_die: a DIE
@@ -198,6 +231,19 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
198 return 0; 231 return 0;
199} 232}
200 233
234/* Get attribute and translate it as a sdata */
235static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name,
236 Dwarf_Sword *result)
237{
238 Dwarf_Attribute attr;
239
240 if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
241 dwarf_formsdata(&attr, result) != 0)
242 return -ENOENT;
243
244 return 0;
245}
246
201/** 247/**
202 * die_is_signed_type - Check whether a type DIE is signed or not 248 * die_is_signed_type - Check whether a type DIE is signed or not
203 * @tp_die: a DIE of a type 249 * @tp_die: a DIE of a type
@@ -250,6 +296,50 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
250 return 0; 296 return 0;
251} 297}
252 298
299/* Get the call file index number in CU DIE */
300static int die_get_call_fileno(Dwarf_Die *in_die)
301{
302 Dwarf_Sword idx;
303
304 if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0)
305 return (int)idx;
306 else
307 return -ENOENT;
308}
309
310/* Get the declared file index number in CU DIE */
311static int die_get_decl_fileno(Dwarf_Die *pdie)
312{
313 Dwarf_Sword idx;
314
315 if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0)
316 return (int)idx;
317 else
318 return -ENOENT;
319}
320
321/**
322 * die_get_call_file - Get callsite file name of inlined function instance
323 * @in_die: a DIE of an inlined function instance
324 *
325 * Get call-site file name of @in_die. This means from which file the inline
326 * function is called.
327 */
328const char *die_get_call_file(Dwarf_Die *in_die)
329{
330 Dwarf_Die cu_die;
331 Dwarf_Files *files;
332 int idx;
333
334 idx = die_get_call_fileno(in_die);
335 if (idx < 0 || !dwarf_diecu(in_die, &cu_die, NULL, NULL) ||
336 dwarf_getsrcfiles(&cu_die, &files, NULL) != 0)
337 return NULL;
338
339 return dwarf_filesrc(files, idx, NULL, NULL);
340}
341
342
253/** 343/**
254 * die_find_child - Generic DIE search function in DIE tree 344 * die_find_child - Generic DIE search function in DIE tree
255 * @rt_die: a root DIE 345 * @rt_die: a root DIE
@@ -374,9 +464,78 @@ Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
374 return die_mem; 464 return die_mem;
375} 465}
376 466
467struct __instance_walk_param {
468 void *addr;
469 int (*callback)(Dwarf_Die *, void *);
470 void *data;
471 int retval;
472};
473
474static int __die_walk_instances_cb(Dwarf_Die *inst, void *data)
475{
476 struct __instance_walk_param *iwp = data;
477 Dwarf_Attribute attr_mem;
478 Dwarf_Die origin_mem;
479 Dwarf_Attribute *attr;
480 Dwarf_Die *origin;
481 int tmp;
482
483 attr = dwarf_attr(inst, DW_AT_abstract_origin, &attr_mem);
484 if (attr == NULL)
485 return DIE_FIND_CB_CONTINUE;
486
487 origin = dwarf_formref_die(attr, &origin_mem);
488 if (origin == NULL || origin->addr != iwp->addr)
489 return DIE_FIND_CB_CONTINUE;
490
491 /* Ignore redundant instances */
492 if (dwarf_tag(inst) == DW_TAG_inlined_subroutine) {
493 dwarf_decl_line(origin, &tmp);
494 if (die_get_call_lineno(inst) == tmp) {
495 tmp = die_get_decl_fileno(origin);
496 if (die_get_call_fileno(inst) == tmp)
497 return DIE_FIND_CB_CONTINUE;
498 }
499 }
500
501 iwp->retval = iwp->callback(inst, iwp->data);
502
503 return (iwp->retval) ? DIE_FIND_CB_END : DIE_FIND_CB_CONTINUE;
504}
505
506/**
507 * die_walk_instances - Walk on instances of given DIE
508 * @or_die: an abstract original DIE
509 * @callback: a callback function which is called with instance DIE
510 * @data: user data
511 *
512 * Walk on the instances of give @in_die. @in_die must be an inlined function
513 * declartion. This returns the return value of @callback if it returns
514 * non-zero value, or -ENOENT if there is no instance.
515 */
516int die_walk_instances(Dwarf_Die *or_die, int (*callback)(Dwarf_Die *, void *),
517 void *data)
518{
519 Dwarf_Die cu_die;
520 Dwarf_Die die_mem;
521 struct __instance_walk_param iwp = {
522 .addr = or_die->addr,
523 .callback = callback,
524 .data = data,
525 .retval = -ENOENT,
526 };
527
528 if (dwarf_diecu(or_die, &cu_die, NULL, NULL) == NULL)
529 return -ENOENT;
530
531 die_find_child(&cu_die, __die_walk_instances_cb, &iwp, &die_mem);
532
533 return iwp.retval;
534}
535
377/* Line walker internal parameters */ 536/* Line walker internal parameters */
378struct __line_walk_param { 537struct __line_walk_param {
379 const char *fname; 538 bool recursive;
380 line_walk_callback_t callback; 539 line_walk_callback_t callback;
381 void *data; 540 void *data;
382 int retval; 541 int retval;
@@ -385,39 +544,56 @@ struct __line_walk_param {
385static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data) 544static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
386{ 545{
387 struct __line_walk_param *lw = data; 546 struct __line_walk_param *lw = data;
388 Dwarf_Addr addr; 547 Dwarf_Addr addr = 0;
548 const char *fname;
389 int lineno; 549 int lineno;
390 550
391 if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) { 551 if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
552 fname = die_get_call_file(in_die);
392 lineno = die_get_call_lineno(in_die); 553 lineno = die_get_call_lineno(in_die);
393 if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) { 554 if (fname && lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) {
394 lw->retval = lw->callback(lw->fname, lineno, addr, 555 lw->retval = lw->callback(fname, lineno, addr, lw->data);
395 lw->data);
396 if (lw->retval != 0) 556 if (lw->retval != 0)
397 return DIE_FIND_CB_END; 557 return DIE_FIND_CB_END;
398 } 558 }
399 } 559 }
400 return DIE_FIND_CB_SIBLING; 560 if (!lw->recursive)
561 /* Don't need to search recursively */
562 return DIE_FIND_CB_SIBLING;
563
564 if (addr) {
565 fname = dwarf_decl_file(in_die);
566 if (fname && dwarf_decl_line(in_die, &lineno) == 0) {
567 lw->retval = lw->callback(fname, lineno, addr, lw->data);
568 if (lw->retval != 0)
569 return DIE_FIND_CB_END;
570 }
571 }
572
573 /* Continue to search nested inlined function call-sites */
574 return DIE_FIND_CB_CONTINUE;
401} 575}
402 576
403/* Walk on lines of blocks included in given DIE */ 577/* Walk on lines of blocks included in given DIE */
404static int __die_walk_funclines(Dwarf_Die *sp_die, 578static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive,
405 line_walk_callback_t callback, void *data) 579 line_walk_callback_t callback, void *data)
406{ 580{
407 struct __line_walk_param lw = { 581 struct __line_walk_param lw = {
582 .recursive = recursive,
408 .callback = callback, 583 .callback = callback,
409 .data = data, 584 .data = data,
410 .retval = 0, 585 .retval = 0,
411 }; 586 };
412 Dwarf_Die die_mem; 587 Dwarf_Die die_mem;
413 Dwarf_Addr addr; 588 Dwarf_Addr addr;
589 const char *fname;
414 int lineno; 590 int lineno;
415 591
416 /* Handle function declaration line */ 592 /* Handle function declaration line */
417 lw.fname = dwarf_decl_file(sp_die); 593 fname = dwarf_decl_file(sp_die);
418 if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 && 594 if (fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
419 dwarf_entrypc(sp_die, &addr) == 0) { 595 dwarf_entrypc(sp_die, &addr) == 0) {
420 lw.retval = callback(lw.fname, lineno, addr, data); 596 lw.retval = callback(fname, lineno, addr, data);
421 if (lw.retval != 0) 597 if (lw.retval != 0)
422 goto done; 598 goto done;
423 } 599 }
@@ -430,7 +606,7 @@ static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
430{ 606{
431 struct __line_walk_param *lw = data; 607 struct __line_walk_param *lw = data;
432 608
433 lw->retval = __die_walk_funclines(sp_die, lw->callback, lw->data); 609 lw->retval = __die_walk_funclines(sp_die, true, lw->callback, lw->data);
434 if (lw->retval != 0) 610 if (lw->retval != 0)
435 return DWARF_CB_ABORT; 611 return DWARF_CB_ABORT;
436 612
@@ -439,7 +615,7 @@ static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
439 615
440/** 616/**
441 * die_walk_lines - Walk on lines inside given DIE 617 * die_walk_lines - Walk on lines inside given DIE
442 * @rt_die: a root DIE (CU or subprogram) 618 * @rt_die: a root DIE (CU, subprogram or inlined_subroutine)
443 * @callback: callback routine 619 * @callback: callback routine
444 * @data: user data 620 * @data: user data
445 * 621 *
@@ -460,12 +636,12 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
460 size_t nlines, i; 636 size_t nlines, i;
461 637
462 /* Get the CU die */ 638 /* Get the CU die */
463 if (dwarf_tag(rt_die) == DW_TAG_subprogram) 639 if (dwarf_tag(rt_die) != DW_TAG_compile_unit)
464 cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); 640 cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL);
465 else 641 else
466 cu_die = rt_die; 642 cu_die = rt_die;
467 if (!cu_die) { 643 if (!cu_die) {
468 pr_debug2("Failed to get CU from subprogram\n"); 644 pr_debug2("Failed to get CU from given DIE.\n");
469 return -EINVAL; 645 return -EINVAL;
470 } 646 }
471 647
@@ -509,7 +685,11 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
509 * subroutines. We have to check functions list or given function. 685 * subroutines. We have to check functions list or given function.
510 */ 686 */
511 if (rt_die != cu_die) 687 if (rt_die != cu_die)
512 ret = __die_walk_funclines(rt_die, callback, data); 688 /*
689 * Don't need walk functions recursively, because nested
690 * inlined functions don't have lines of the specified DIE.
691 */
692 ret = __die_walk_funclines(rt_die, false, callback, data);
513 else { 693 else {
514 struct __line_walk_param param = { 694 struct __line_walk_param param = {
515 .callback = callback, 695 .callback = callback,
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index bc3b21167e7..6ce1717784b 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -34,12 +34,19 @@ extern const char *cu_get_comp_dir(Dwarf_Die *cu_die);
34extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, 34extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
35 const char **fname, int *lineno); 35 const char **fname, int *lineno);
36 36
37/* Walk on funcitons at given address */
38extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
39 int (*callback)(Dwarf_Die *, void *), void *data);
40
37/* Compare diename and tname */ 41/* Compare diename and tname */
38extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); 42extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
39 43
40/* Get callsite line number of inline-function instance */ 44/* Get callsite line number of inline-function instance */
41extern int die_get_call_lineno(Dwarf_Die *in_die); 45extern int die_get_call_lineno(Dwarf_Die *in_die);
42 46
47/* Get callsite file name of inlined function instance */
48extern const char *die_get_call_file(Dwarf_Die *in_die);
49
43/* Get type die */ 50/* Get type die */
44extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); 51extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
45 52
@@ -73,6 +80,10 @@ extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
73extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, 80extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
74 Dwarf_Die *die_mem); 81 Dwarf_Die *die_mem);
75 82
83/* Walk on the instances of given DIE */
84extern int die_walk_instances(Dwarf_Die *in_die,
85 int (*callback)(Dwarf_Die *, void *), void *data);
86
76/* Walker on lines (Note: line number will not be sorted) */ 87/* Walker on lines (Note: line number will not be sorted) */
77typedef int (* line_walk_callback_t) (const char *fname, int lineno, 88typedef int (* line_walk_callback_t) (const char *fname, int lineno,
78 Dwarf_Addr addr, void *data); 89 Dwarf_Addr addr, void *data);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e03e7bc8205..c12bd476c6f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -85,10 +85,19 @@ int perf_evlist__add_default(struct perf_evlist *evlist)
85 struct perf_evsel *evsel = perf_evsel__new(&attr, 0); 85 struct perf_evsel *evsel = perf_evsel__new(&attr, 0);
86 86
87 if (evsel == NULL) 87 if (evsel == NULL)
88 return -ENOMEM; 88 goto error;
89
90 /* use strdup() because free(evsel) assumes name is allocated */
91 evsel->name = strdup("cycles");
92 if (!evsel->name)
93 goto error_free;
89 94
90 perf_evlist__add(evlist, evsel); 95 perf_evlist__add(evlist, evsel);
91 return 0; 96 return 0;
97error_free:
98 perf_evsel__delete(evsel);
99error:
100 return -ENOMEM;
92} 101}
93 102
94void perf_evlist__disable(struct perf_evlist *evlist) 103void perf_evlist__disable(struct perf_evlist *evlist)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index d4f3101773d..b6c1ad123ca 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -726,7 +726,16 @@ static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
726 return -1; 726 return -1;
727 727
728 bev.header = old_bev.header; 728 bev.header = old_bev.header;
729 bev.pid = 0; 729
730 /*
731 * As the pid is the missing value, we need to fill
732 * it properly. The header.misc value give us nice hint.
733 */
734 bev.pid = HOST_KERNEL_ID;
735 if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
736 bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
737 bev.pid = DEFAULT_GUEST_KERNEL_ID;
738
730 memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id)); 739 memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
731 __event_process_build_id(&bev, filename, session); 740 __event_process_build_id(&bev, filename, session);
732 741
diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h
index 791f9dd27eb..547628e97f3 100644
--- a/tools/perf/util/include/linux/compiler.h
+++ b/tools/perf/util/include/linux/compiler.h
@@ -5,7 +5,9 @@
5#define __always_inline inline 5#define __always_inline inline
6#endif 6#endif
7#define __user 7#define __user
8#ifndef __attribute_const__
8#define __attribute_const__ 9#define __attribute_const__
10#endif
9 11
10#define __used __attribute__((__unused__)) 12#define __used __attribute__((__unused__))
11 13
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4ea7e19f525..928918b796b 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -697,7 +697,11 @@ parse_raw_event(const char **strp, struct perf_event_attr *attr)
697 return EVT_FAILED; 697 return EVT_FAILED;
698 n = hex2u64(str + 1, &config); 698 n = hex2u64(str + 1, &config);
699 if (n > 0) { 699 if (n > 0) {
700 *strp = str + n + 1; 700 const char *end = str + n + 1;
701 if (*end != '\0' && *end != ',' && *end != ':')
702 return EVT_FAILED;
703
704 *strp = end;
701 attr->type = PERF_TYPE_RAW; 705 attr->type = PERF_TYPE_RAW;
702 attr->config = config; 706 attr->config = config;
703 return EVT_HANDLED; 707 return EVT_HANDLED;
@@ -1097,6 +1101,4 @@ void print_events(const char *event_glob)
1097 printf("\n"); 1101 printf("\n");
1098 1102
1099 print_tracepoint_events(NULL, NULL); 1103 print_tracepoint_events(NULL, NULL);
1100
1101 exit(129);
1102} 1104}
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 3e44a3e3651..555fc3864b9 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -612,12 +612,12 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
612 return ret; 612 return ret;
613} 613}
614 614
615/* Find a variable in a subprogram die */ 615/* Find a variable in a scope DIE */
616static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf) 616static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
617{ 617{
618 Dwarf_Die vr_die, *scopes; 618 Dwarf_Die vr_die;
619 char buf[32], *ptr; 619 char buf[32], *ptr;
620 int ret, nscopes; 620 int ret = 0;
621 621
622 if (!is_c_varname(pf->pvar->var)) { 622 if (!is_c_varname(pf->pvar->var)) {
623 /* Copy raw parameters */ 623 /* Copy raw parameters */
@@ -652,30 +652,16 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)
652 if (pf->tvar->name == NULL) 652 if (pf->tvar->name == NULL)
653 return -ENOMEM; 653 return -ENOMEM;
654 654
655 pr_debug("Searching '%s' variable in context.\n", 655 pr_debug("Searching '%s' variable in context.\n", pf->pvar->var);
656 pf->pvar->var);
657 /* Search child die for local variables and parameters. */ 656 /* Search child die for local variables and parameters. */
658 if (die_find_variable_at(sp_die, pf->pvar->var, pf->addr, &vr_die)) 657 if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) {
659 ret = convert_variable(&vr_die, pf); 658 /* Search again in global variables */
660 else { 659 if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die))
661 /* Search upper class */ 660 ret = -ENOENT;
662 nscopes = dwarf_getscopes_die(sp_die, &scopes);
663 while (nscopes-- > 1) {
664 pr_debug("Searching variables in %s\n",
665 dwarf_diename(&scopes[nscopes]));
666 /* We should check this scope, so give dummy address */
667 if (die_find_variable_at(&scopes[nscopes],
668 pf->pvar->var, 0,
669 &vr_die)) {
670 ret = convert_variable(&vr_die, pf);
671 goto found;
672 }
673 }
674 if (scopes)
675 free(scopes);
676 ret = -ENOENT;
677 } 661 }
678found: 662 if (ret == 0)
663 ret = convert_variable(&vr_die, pf);
664
679 if (ret < 0) 665 if (ret < 0)
680 pr_warning("Failed to find '%s' in this function.\n", 666 pr_warning("Failed to find '%s' in this function.\n",
681 pf->pvar->var); 667 pf->pvar->var);
@@ -718,26 +704,30 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
718 return 0; 704 return 0;
719} 705}
720 706
721/* Call probe_finder callback with real subprogram DIE */ 707/* Call probe_finder callback with scope DIE */
722static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf) 708static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
723{ 709{
724 Dwarf_Die die_mem;
725 Dwarf_Attribute fb_attr; 710 Dwarf_Attribute fb_attr;
726 size_t nops; 711 size_t nops;
727 int ret; 712 int ret;
728 713
729 /* If no real subprogram, find a real one */ 714 if (!sc_die) {
730 if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) { 715 pr_err("Caller must pass a scope DIE. Program error.\n");
731 sp_die = die_find_realfunc(&pf->cu_die, pf->addr, &die_mem); 716 return -EINVAL;
732 if (!sp_die) { 717 }
718
719 /* If not a real subprogram, find a real one */
720 if (dwarf_tag(sc_die) != DW_TAG_subprogram) {
721 if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
733 pr_warning("Failed to find probe point in any " 722 pr_warning("Failed to find probe point in any "
734 "functions.\n"); 723 "functions.\n");
735 return -ENOENT; 724 return -ENOENT;
736 } 725 }
737 } 726 } else
727 memcpy(&pf->sp_die, sc_die, sizeof(Dwarf_Die));
738 728
739 /* Get the frame base attribute/ops */ 729 /* Get the frame base attribute/ops from subprogram */
740 dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr); 730 dwarf_attr(&pf->sp_die, DW_AT_frame_base, &fb_attr);
741 ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); 731 ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1);
742 if (ret <= 0 || nops == 0) { 732 if (ret <= 0 || nops == 0) {
743 pf->fb_ops = NULL; 733 pf->fb_ops = NULL;
@@ -755,7 +745,7 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
755 } 745 }
756 746
757 /* Call finder's callback handler */ 747 /* Call finder's callback handler */
758 ret = pf->callback(sp_die, pf); 748 ret = pf->callback(sc_die, pf);
759 749
760 /* *pf->fb_ops will be cached in libdw. Don't free it. */ 750 /* *pf->fb_ops will be cached in libdw. Don't free it. */
761 pf->fb_ops = NULL; 751 pf->fb_ops = NULL;
@@ -763,17 +753,82 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
763 return ret; 753 return ret;
764} 754}
765 755
756struct find_scope_param {
757 const char *function;
758 const char *file;
759 int line;
760 int diff;
761 Dwarf_Die *die_mem;
762 bool found;
763};
764
765static int find_best_scope_cb(Dwarf_Die *fn_die, void *data)
766{
767 struct find_scope_param *fsp = data;
768 const char *file;
769 int lno;
770
771 /* Skip if declared file name does not match */
772 if (fsp->file) {
773 file = dwarf_decl_file(fn_die);
774 if (!file || strcmp(fsp->file, file) != 0)
775 return 0;
776 }
777 /* If the function name is given, that's what user expects */
778 if (fsp->function) {
779 if (die_compare_name(fn_die, fsp->function)) {
780 memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
781 fsp->found = true;
782 return 1;
783 }
784 } else {
785 /* With the line number, find the nearest declared DIE */
786 dwarf_decl_line(fn_die, &lno);
787 if (lno < fsp->line && fsp->diff > fsp->line - lno) {
788 /* Keep a candidate and continue */
789 fsp->diff = fsp->line - lno;
790 memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
791 fsp->found = true;
792 }
793 }
794 return 0;
795}
796
797/* Find an appropriate scope fits to given conditions */
798static Dwarf_Die *find_best_scope(struct probe_finder *pf, Dwarf_Die *die_mem)
799{
800 struct find_scope_param fsp = {
801 .function = pf->pev->point.function,
802 .file = pf->fname,
803 .line = pf->lno,
804 .diff = INT_MAX,
805 .die_mem = die_mem,
806 .found = false,
807 };
808
809 cu_walk_functions_at(&pf->cu_die, pf->addr, find_best_scope_cb, &fsp);
810
811 return fsp.found ? die_mem : NULL;
812}
813
766static int probe_point_line_walker(const char *fname, int lineno, 814static int probe_point_line_walker(const char *fname, int lineno,
767 Dwarf_Addr addr, void *data) 815 Dwarf_Addr addr, void *data)
768{ 816{
769 struct probe_finder *pf = data; 817 struct probe_finder *pf = data;
818 Dwarf_Die *sc_die, die_mem;
770 int ret; 819 int ret;
771 820
772 if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0) 821 if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0)
773 return 0; 822 return 0;
774 823
775 pf->addr = addr; 824 pf->addr = addr;
776 ret = call_probe_finder(NULL, pf); 825 sc_die = find_best_scope(pf, &die_mem);
826 if (!sc_die) {
827 pr_warning("Failed to find scope of probe point.\n");
828 return -ENOENT;
829 }
830
831 ret = call_probe_finder(sc_die, pf);
777 832
778 /* Continue if no error, because the line will be in inline function */ 833 /* Continue if no error, because the line will be in inline function */
779 return ret < 0 ? ret : 0; 834 return ret < 0 ? ret : 0;
@@ -827,6 +882,7 @@ static int probe_point_lazy_walker(const char *fname, int lineno,
827 Dwarf_Addr addr, void *data) 882 Dwarf_Addr addr, void *data)
828{ 883{
829 struct probe_finder *pf = data; 884 struct probe_finder *pf = data;
885 Dwarf_Die *sc_die, die_mem;
830 int ret; 886 int ret;
831 887
832 if (!line_list__has_line(&pf->lcache, lineno) || 888 if (!line_list__has_line(&pf->lcache, lineno) ||
@@ -836,7 +892,14 @@ static int probe_point_lazy_walker(const char *fname, int lineno,
836 pr_debug("Probe line found: line:%d addr:0x%llx\n", 892 pr_debug("Probe line found: line:%d addr:0x%llx\n",
837 lineno, (unsigned long long)addr); 893 lineno, (unsigned long long)addr);
838 pf->addr = addr; 894 pf->addr = addr;
839 ret = call_probe_finder(NULL, pf); 895 pf->lno = lineno;
896 sc_die = find_best_scope(pf, &die_mem);
897 if (!sc_die) {
898 pr_warning("Failed to find scope of probe point.\n");
899 return -ENOENT;
900 }
901
902 ret = call_probe_finder(sc_die, pf);
840 903
841 /* 904 /*
842 * Continue if no error, because the lazy pattern will match 905 * Continue if no error, because the lazy pattern will match
@@ -861,42 +924,39 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
861 return die_walk_lines(sp_die, probe_point_lazy_walker, pf); 924 return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
862} 925}
863 926
864/* Callback parameter with return value */
865struct dwarf_callback_param {
866 void *data;
867 int retval;
868};
869
870static int probe_point_inline_cb(Dwarf_Die *in_die, void *data) 927static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
871{ 928{
872 struct dwarf_callback_param *param = data; 929 struct probe_finder *pf = data;
873 struct probe_finder *pf = param->data;
874 struct perf_probe_point *pp = &pf->pev->point; 930 struct perf_probe_point *pp = &pf->pev->point;
875 Dwarf_Addr addr; 931 Dwarf_Addr addr;
932 int ret;
876 933
877 if (pp->lazy_line) 934 if (pp->lazy_line)
878 param->retval = find_probe_point_lazy(in_die, pf); 935 ret = find_probe_point_lazy(in_die, pf);
879 else { 936 else {
880 /* Get probe address */ 937 /* Get probe address */
881 if (dwarf_entrypc(in_die, &addr) != 0) { 938 if (dwarf_entrypc(in_die, &addr) != 0) {
882 pr_warning("Failed to get entry address of %s.\n", 939 pr_warning("Failed to get entry address of %s.\n",
883 dwarf_diename(in_die)); 940 dwarf_diename(in_die));
884 param->retval = -ENOENT; 941 return -ENOENT;
885 return DWARF_CB_ABORT;
886 } 942 }
887 pf->addr = addr; 943 pf->addr = addr;
888 pf->addr += pp->offset; 944 pf->addr += pp->offset;
889 pr_debug("found inline addr: 0x%jx\n", 945 pr_debug("found inline addr: 0x%jx\n",
890 (uintmax_t)pf->addr); 946 (uintmax_t)pf->addr);
891 947
892 param->retval = call_probe_finder(in_die, pf); 948 ret = call_probe_finder(in_die, pf);
893 if (param->retval < 0)
894 return DWARF_CB_ABORT;
895 } 949 }
896 950
897 return DWARF_CB_OK; 951 return ret;
898} 952}
899 953
954/* Callback parameter with return value for libdw */
955struct dwarf_callback_param {
956 void *data;
957 int retval;
958};
959
900/* Search function from function name */ 960/* Search function from function name */
901static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) 961static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
902{ 962{
@@ -933,14 +993,10 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
933 /* TODO: Check the address in this function */ 993 /* TODO: Check the address in this function */
934 param->retval = call_probe_finder(sp_die, pf); 994 param->retval = call_probe_finder(sp_die, pf);
935 } 995 }
936 } else { 996 } else
937 struct dwarf_callback_param _param = {.data = (void *)pf,
938 .retval = 0};
939 /* Inlined function: search instances */ 997 /* Inlined function: search instances */
940 dwarf_func_inline_instances(sp_die, probe_point_inline_cb, 998 param->retval = die_walk_instances(sp_die,
941 &_param); 999 probe_point_inline_cb, (void *)pf);
942 param->retval = _param.retval;
943 }
944 1000
945 return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */ 1001 return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */
946} 1002}
@@ -1060,7 +1116,7 @@ found:
1060} 1116}
1061 1117
1062/* Add a found probe point into trace event list */ 1118/* Add a found probe point into trace event list */
1063static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf) 1119static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
1064{ 1120{
1065 struct trace_event_finder *tf = 1121 struct trace_event_finder *tf =
1066 container_of(pf, struct trace_event_finder, pf); 1122 container_of(pf, struct trace_event_finder, pf);
@@ -1075,8 +1131,9 @@ static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf)
1075 } 1131 }
1076 tev = &tf->tevs[tf->ntevs++]; 1132 tev = &tf->tevs[tf->ntevs++];
1077 1133
1078 ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe, 1134 /* Trace point should be converted from subprogram DIE */
1079 &tev->point); 1135 ret = convert_to_trace_point(&pf->sp_die, pf->addr,
1136 pf->pev->point.retprobe, &tev->point);
1080 if (ret < 0) 1137 if (ret < 0)
1081 return ret; 1138 return ret;
1082 1139
@@ -1091,7 +1148,8 @@ static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf)
1091 for (i = 0; i < pf->pev->nargs; i++) { 1148 for (i = 0; i < pf->pev->nargs; i++) {
1092 pf->pvar = &pf->pev->args[i]; 1149 pf->pvar = &pf->pev->args[i];
1093 pf->tvar = &tev->args[i]; 1150 pf->tvar = &tev->args[i];
1094 ret = find_variable(sp_die, pf); 1151 /* Variable should be found from scope DIE */
1152 ret = find_variable(sc_die, pf);
1095 if (ret != 0) 1153 if (ret != 0)
1096 return ret; 1154 return ret;
1097 } 1155 }
@@ -1159,13 +1217,13 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
1159} 1217}
1160 1218
1161/* Add a found vars into available variables list */ 1219/* Add a found vars into available variables list */
1162static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf) 1220static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf)
1163{ 1221{
1164 struct available_var_finder *af = 1222 struct available_var_finder *af =
1165 container_of(pf, struct available_var_finder, pf); 1223 container_of(pf, struct available_var_finder, pf);
1166 struct variable_list *vl; 1224 struct variable_list *vl;
1167 Dwarf_Die die_mem, *scopes = NULL; 1225 Dwarf_Die die_mem;
1168 int ret, nscopes; 1226 int ret;
1169 1227
1170 /* Check number of tevs */ 1228 /* Check number of tevs */
1171 if (af->nvls == af->max_vls) { 1229 if (af->nvls == af->max_vls) {
@@ -1174,8 +1232,9 @@ static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf)
1174 } 1232 }
1175 vl = &af->vls[af->nvls++]; 1233 vl = &af->vls[af->nvls++];
1176 1234
1177 ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe, 1235 /* Trace point should be converted from subprogram DIE */
1178 &vl->point); 1236 ret = convert_to_trace_point(&pf->sp_die, pf->addr,
1237 pf->pev->point.retprobe, &vl->point);
1179 if (ret < 0) 1238 if (ret < 0)
1180 return ret; 1239 return ret;
1181 1240
@@ -1187,19 +1246,14 @@ static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf)
1187 if (vl->vars == NULL) 1246 if (vl->vars == NULL)
1188 return -ENOMEM; 1247 return -ENOMEM;
1189 af->child = true; 1248 af->child = true;
1190 die_find_child(sp_die, collect_variables_cb, (void *)af, &die_mem); 1249 die_find_child(sc_die, collect_variables_cb, (void *)af, &die_mem);
1191 1250
1192 /* Find external variables */ 1251 /* Find external variables */
1193 if (!af->externs) 1252 if (!af->externs)
1194 goto out; 1253 goto out;
1195 /* Don't need to search child DIE for externs. */ 1254 /* Don't need to search child DIE for externs. */
1196 af->child = false; 1255 af->child = false;
1197 nscopes = dwarf_getscopes_die(sp_die, &scopes); 1256 die_find_child(&pf->cu_die, collect_variables_cb, (void *)af, &die_mem);
1198 while (nscopes-- > 1)
1199 die_find_child(&scopes[nscopes], collect_variables_cb,
1200 (void *)af, &die_mem);
1201 if (scopes)
1202 free(scopes);
1203 1257
1204out: 1258out:
1205 if (strlist__empty(vl->vars)) { 1259 if (strlist__empty(vl->vars)) {
@@ -1391,10 +1445,14 @@ static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
1391 1445
1392static int line_range_inline_cb(Dwarf_Die *in_die, void *data) 1446static int line_range_inline_cb(Dwarf_Die *in_die, void *data)
1393{ 1447{
1394 struct dwarf_callback_param *param = data; 1448 find_line_range_by_line(in_die, data);
1395 1449
1396 param->retval = find_line_range_by_line(in_die, param->data); 1450 /*
1397 return DWARF_CB_ABORT; /* No need to find other instances */ 1451 * We have to check all instances of inlined function, because
1452 * some execution paths can be optimized out depends on the
1453 * function argument of instances
1454 */
1455 return 0;
1398} 1456}
1399 1457
1400/* Search function from function name */ 1458/* Search function from function name */
@@ -1422,15 +1480,10 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
1422 pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e); 1480 pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e);
1423 lr->start = lf->lno_s; 1481 lr->start = lf->lno_s;
1424 lr->end = lf->lno_e; 1482 lr->end = lf->lno_e;
1425 if (dwarf_func_inline(sp_die)) { 1483 if (dwarf_func_inline(sp_die))
1426 struct dwarf_callback_param _param; 1484 param->retval = die_walk_instances(sp_die,
1427 _param.data = (void *)lf; 1485 line_range_inline_cb, lf);
1428 _param.retval = 0; 1486 else
1429 dwarf_func_inline_instances(sp_die,
1430 line_range_inline_cb,
1431 &_param);
1432 param->retval = _param.retval;
1433 } else
1434 param->retval = find_line_range_by_line(sp_die, lf); 1487 param->retval = find_line_range_by_line(sp_die, lf);
1435 return DWARF_CB_ABORT; 1488 return DWARF_CB_ABORT;
1436 } 1489 }
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index c478b42a247..1132c8f0ce8 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -57,7 +57,7 @@ struct probe_finder {
57 struct perf_probe_event *pev; /* Target probe event */ 57 struct perf_probe_event *pev; /* Target probe event */
58 58
59 /* Callback when a probe point is found */ 59 /* Callback when a probe point is found */
60 int (*callback)(Dwarf_Die *sp_die, struct probe_finder *pf); 60 int (*callback)(Dwarf_Die *sc_die, struct probe_finder *pf);
61 61
62 /* For function searching */ 62 /* For function searching */
63 int lno; /* Line number */ 63 int lno; /* Line number */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index a8b53714542..469c0264ed2 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1506,7 +1506,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
1506 if (strncmp(dso->name, "/tmp/perf-", 10) == 0) { 1506 if (strncmp(dso->name, "/tmp/perf-", 10) == 0) {
1507 struct stat st; 1507 struct stat st;
1508 1508
1509 if (stat(dso->name, &st) < 0) 1509 if (lstat(dso->name, &st) < 0)
1510 return -1; 1510 return -1;
1511 1511
1512 if (st.st_uid && (st.st_uid != geteuid())) { 1512 if (st.st_uid && (st.st_uid != geteuid())) {
@@ -2181,27 +2181,22 @@ size_t machines__fprintf_dsos_buildid(struct rb_root *machines,
2181 return ret; 2181 return ret;
2182} 2182}
2183 2183
2184struct dso *dso__new_kernel(const char *name) 2184static struct dso*
2185dso__kernel_findnew(struct machine *machine, const char *name,
2186 const char *short_name, int dso_type)
2185{ 2187{
2186 struct dso *dso = dso__new(name ?: "[kernel.kallsyms]"); 2188 /*
2187 2189 * The kernel dso could be created by build_id processing.
2188 if (dso != NULL) { 2190 */
2189 dso__set_short_name(dso, "[kernel]"); 2191 struct dso *dso = __dsos__findnew(&machine->kernel_dsos, name);
2190 dso->kernel = DSO_TYPE_KERNEL;
2191 }
2192
2193 return dso;
2194}
2195 2192
2196static struct dso *dso__new_guest_kernel(struct machine *machine, 2193 /*
2197 const char *name) 2194 * We need to run this in all cases, since during the build_id
2198{ 2195 * processing we had no idea this was the kernel dso.
2199 char bf[PATH_MAX]; 2196 */
2200 struct dso *dso = dso__new(name ?: machine__mmap_name(machine, bf,
2201 sizeof(bf)));
2202 if (dso != NULL) { 2197 if (dso != NULL) {
2203 dso__set_short_name(dso, "[guest.kernel]"); 2198 dso__set_short_name(dso, short_name);
2204 dso->kernel = DSO_TYPE_GUEST_KERNEL; 2199 dso->kernel = dso_type;
2205 } 2200 }
2206 2201
2207 return dso; 2202 return dso;
@@ -2219,24 +2214,36 @@ void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine)
2219 dso->has_build_id = true; 2214 dso->has_build_id = true;
2220} 2215}
2221 2216
2222static struct dso *machine__create_kernel(struct machine *machine) 2217static struct dso *machine__get_kernel(struct machine *machine)
2223{ 2218{
2224 const char *vmlinux_name = NULL; 2219 const char *vmlinux_name = NULL;
2225 struct dso *kernel; 2220 struct dso *kernel;
2226 2221
2227 if (machine__is_host(machine)) { 2222 if (machine__is_host(machine)) {
2228 vmlinux_name = symbol_conf.vmlinux_name; 2223 vmlinux_name = symbol_conf.vmlinux_name;
2229 kernel = dso__new_kernel(vmlinux_name); 2224 if (!vmlinux_name)
2225 vmlinux_name = "[kernel.kallsyms]";
2226
2227 kernel = dso__kernel_findnew(machine, vmlinux_name,
2228 "[kernel]",
2229 DSO_TYPE_KERNEL);
2230 } else { 2230 } else {
2231 char bf[PATH_MAX];
2232
2231 if (machine__is_default_guest(machine)) 2233 if (machine__is_default_guest(machine))
2232 vmlinux_name = symbol_conf.default_guest_vmlinux_name; 2234 vmlinux_name = symbol_conf.default_guest_vmlinux_name;
2233 kernel = dso__new_guest_kernel(machine, vmlinux_name); 2235 if (!vmlinux_name)
2236 vmlinux_name = machine__mmap_name(machine, bf,
2237 sizeof(bf));
2238
2239 kernel = dso__kernel_findnew(machine, vmlinux_name,
2240 "[guest.kernel]",
2241 DSO_TYPE_GUEST_KERNEL);
2234 } 2242 }
2235 2243
2236 if (kernel != NULL) { 2244 if (kernel != NULL && (!kernel->has_build_id))
2237 dso__read_running_kernel_build_id(kernel, machine); 2245 dso__read_running_kernel_build_id(kernel, machine);
2238 dsos__add(&machine->kernel_dsos, kernel); 2246
2239 }
2240 return kernel; 2247 return kernel;
2241} 2248}
2242 2249
@@ -2340,7 +2347,7 @@ void machine__destroy_kernel_maps(struct machine *machine)
2340 2347
2341int machine__create_kernel_maps(struct machine *machine) 2348int machine__create_kernel_maps(struct machine *machine)
2342{ 2349{
2343 struct dso *kernel = machine__create_kernel(machine); 2350 struct dso *kernel = machine__get_kernel(machine);
2344 2351
2345 if (kernel == NULL || 2352 if (kernel == NULL ||
2346 __machine__create_kernel_maps(machine, kernel) < 0) 2353 __machine__create_kernel_maps(machine, kernel) < 0)
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 325ee36a9d2..4f377d92e75 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -155,7 +155,6 @@ struct dso {
155}; 155};
156 156
157struct dso *dso__new(const char *name); 157struct dso *dso__new(const char *name);
158struct dso *dso__new_kernel(const char *name);
159void dso__delete(struct dso *dso); 158void dso__delete(struct dso *dso);
160 159
161int dso__name_len(const struct dso *dso); 160int dso__name_len(const struct dso *dso);
diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c
index 5a06538532a..88403cf8396 100644
--- a/tools/perf/util/ui/browsers/top.c
+++ b/tools/perf/util/ui/browsers/top.c
@@ -208,6 +208,5 @@ int perf_top__tui_browser(struct perf_top *top)
208 }, 208 },
209 }; 209 };
210 210
211 ui_helpline__push("Press <- or ESC to exit");
212 return perf_top_browser__run(&browser); 211 return perf_top_browser__run(&browser);
213} 212}