diff options
165 files changed, 5170 insertions, 934 deletions
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 3f5e0b09bed..53e6fca146d 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt | |||
@@ -45,7 +45,7 @@ arrived in memory (this becomes more likely with devices behind PCI-PCI | |||
45 | bridges). In order to ensure that all the data has arrived in memory, | 45 | bridges). In order to ensure that all the data has arrived in memory, |
46 | the interrupt handler must read a register on the device which raised | 46 | the interrupt handler must read a register on the device which raised |
47 | the interrupt. PCI transaction ordering rules require that all the data | 47 | the interrupt. PCI transaction ordering rules require that all the data |
48 | arrives in memory before the value can be returned from the register. | 48 | arrive in memory before the value may be returned from the register. |
49 | Using MSIs avoids this problem as the interrupt-generating write cannot | 49 | Using MSIs avoids this problem as the interrupt-generating write cannot |
50 | pass the data writes, so by the time the interrupt is raised, the driver | 50 | pass the data writes, so by the time the interrupt is raised, the driver |
51 | knows that all the data has arrived in memory. | 51 | knows that all the data has arrived in memory. |
@@ -86,13 +86,13 @@ device. | |||
86 | 86 | ||
87 | int pci_enable_msi(struct pci_dev *dev) | 87 | int pci_enable_msi(struct pci_dev *dev) |
88 | 88 | ||
89 | A successful call will allocate ONE interrupt to the device, regardless | 89 | A successful call allocates ONE interrupt to the device, regardless |
90 | of how many MSIs the device supports. The device will be switched from | 90 | of how many MSIs the device supports. The device is switched from |
91 | pin-based interrupt mode to MSI mode. The dev->irq number is changed | 91 | pin-based interrupt mode to MSI mode. The dev->irq number is changed |
92 | to a new number which represents the message signaled interrupt. | 92 | to a new number which represents the message signaled interrupt; |
93 | This function should be called before the driver calls request_irq() | 93 | consequently, this function should be called before the driver calls |
94 | since enabling MSIs disables the pin-based IRQ and the driver will not | 94 | request_irq(), because an MSI is delivered via a vector that is |
95 | receive interrupts on the old interrupt. | 95 | different from the vector of a pin-based interrupt. |
96 | 96 | ||
97 | 4.2.2 pci_enable_msi_block | 97 | 4.2.2 pci_enable_msi_block |
98 | 98 | ||
@@ -111,20 +111,20 @@ the device are in the range dev->irq to dev->irq + count - 1. | |||
111 | 111 | ||
112 | If this function returns a negative number, it indicates an error and | 112 | If this function returns a negative number, it indicates an error and |
113 | the driver should not attempt to request any more MSI interrupts for | 113 | the driver should not attempt to request any more MSI interrupts for |
114 | this device. If this function returns a positive number, it will be | 114 | this device. If this function returns a positive number, it is |
115 | less than 'count' and indicate the number of interrupts that could have | 115 | less than 'count' and indicates the number of interrupts that could have |
116 | been allocated. In neither case will the irq value have been | 116 | been allocated. In neither case is the irq value updated or the device |
117 | updated, nor will the device have been switched into MSI mode. | 117 | switched into MSI mode. |
118 | 118 | ||
119 | The device driver must decide what action to take if | 119 | The device driver must decide what action to take if |
120 | pci_enable_msi_block() returns a value less than the number asked for. | 120 | pci_enable_msi_block() returns a value less than the number requested. |
121 | Some devices can make use of fewer interrupts than the maximum they | 121 | For instance, the driver could still make use of fewer interrupts; |
122 | request; in this case the driver should call pci_enable_msi_block() | 122 | in this case the driver should call pci_enable_msi_block() |
123 | again. Note that it is not guaranteed to succeed, even when the | 123 | again. Note that it is not guaranteed to succeed, even when the |
124 | 'count' has been reduced to the value returned from a previous call to | 124 | 'count' has been reduced to the value returned from a previous call to |
125 | pci_enable_msi_block(). This is because there are multiple constraints | 125 | pci_enable_msi_block(). This is because there are multiple constraints |
126 | on the number of vectors that can be allocated; pci_enable_msi_block() | 126 | on the number of vectors that can be allocated; pci_enable_msi_block() |
127 | will return as soon as it finds any constraint that doesn't allow the | 127 | returns as soon as it finds any constraint that doesn't allow the |
128 | call to succeed. | 128 | call to succeed. |
129 | 129 | ||
130 | 4.2.3 pci_disable_msi | 130 | 4.2.3 pci_disable_msi |
@@ -137,10 +137,10 @@ interrupt number and frees the previously allocated message signaled | |||
137 | interrupt(s). The interrupt may subsequently be assigned to another | 137 | interrupt(s). The interrupt may subsequently be assigned to another |
138 | device, so drivers should not cache the value of dev->irq. | 138 | device, so drivers should not cache the value of dev->irq. |
139 | 139 | ||
140 | A device driver must always call free_irq() on the interrupt(s) | 140 | Before calling this function, a device driver must always call free_irq() |
141 | for which it has called request_irq() before calling this function. | 141 | on any interrupt for which it previously called request_irq(). |
142 | Failure to do so will result in a BUG_ON(), the device will be left with | 142 | Failure to do so results in a BUG_ON(), leaving the device with |
143 | MSI enabled and will leak its vector. | 143 | MSI enabled and thus leaking its vector. |
144 | 144 | ||
145 | 4.3 Using MSI-X | 145 | 4.3 Using MSI-X |
146 | 146 | ||
@@ -155,10 +155,10 @@ struct msix_entry { | |||
155 | }; | 155 | }; |
156 | 156 | ||
157 | This allows for the device to use these interrupts in a sparse fashion; | 157 | This allows for the device to use these interrupts in a sparse fashion; |
158 | for example it could use interrupts 3 and 1027 and allocate only a | 158 | for example, it could use interrupts 3 and 1027 and yet allocate only a |
159 | two-element array. The driver is expected to fill in the 'entry' value | 159 | two-element array. The driver is expected to fill in the 'entry' value |
160 | in each element of the array to indicate which entries it wants the kernel | 160 | in each element of the array to indicate for which entries the kernel |
161 | to assign interrupts for. It is invalid to fill in two entries with the | 161 | should assign interrupts; it is invalid to fill in two entries with the |
162 | same number. | 162 | same number. |
163 | 163 | ||
164 | 4.3.1 pci_enable_msix | 164 | 4.3.1 pci_enable_msix |
@@ -168,10 +168,11 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) | |||
168 | Calling this function asks the PCI subsystem to allocate 'nvec' MSIs. | 168 | Calling this function asks the PCI subsystem to allocate 'nvec' MSIs. |
169 | The 'entries' argument is a pointer to an array of msix_entry structs | 169 | The 'entries' argument is a pointer to an array of msix_entry structs |
170 | which should be at least 'nvec' entries in size. On success, the | 170 | which should be at least 'nvec' entries in size. On success, the |
171 | function will return 0 and the device will have been switched into | 171 | device is switched into MSI-X mode and the function returns 0. |
172 | MSI-X interrupt mode. The 'vector' elements in each entry will have | 172 | The 'vector' member in each entry is populated with the interrupt number; |
173 | been filled in with the interrupt number. The driver should then call | 173 | the driver should then call request_irq() for each 'vector' that it |
174 | request_irq() for each 'vector' that it decides to use. | 174 | decides to use. The device driver is responsible for keeping track of the |
175 | interrupts assigned to the MSI-X vectors so it can free them again later. | ||
175 | 176 | ||
176 | If this function returns a negative number, it indicates an error and | 177 | If this function returns a negative number, it indicates an error and |
177 | the driver should not attempt to allocate any more MSI-X interrupts for | 178 | the driver should not attempt to allocate any more MSI-X interrupts for |
@@ -181,16 +182,14 @@ below. | |||
181 | 182 | ||
182 | This function, in contrast with pci_enable_msi(), does not adjust | 183 | This function, in contrast with pci_enable_msi(), does not adjust |
183 | dev->irq. The device will not generate interrupts for this interrupt | 184 | dev->irq. The device will not generate interrupts for this interrupt |
184 | number once MSI-X is enabled. The device driver is responsible for | 185 | number once MSI-X is enabled. |
185 | keeping track of the interrupts assigned to the MSI-X vectors so it can | ||
186 | free them again later. | ||
187 | 186 | ||
188 | Device drivers should normally call this function once per device | 187 | Device drivers should normally call this function once per device |
189 | during the initialization phase. | 188 | during the initialization phase. |
190 | 189 | ||
191 | It is ideal if drivers can cope with a variable number of MSI-X interrupts, | 190 | It is ideal if drivers can cope with a variable number of MSI-X interrupts; |
192 | there are many reasons why the platform may not be able to provide the | 191 | there are many reasons why the platform may not be able to provide the |
193 | exact number a driver asks for. | 192 | exact number that a driver asks for. |
194 | 193 | ||
195 | A request loop to achieve that might look like: | 194 | A request loop to achieve that might look like: |
196 | 195 | ||
@@ -212,15 +211,15 @@ static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec) | |||
212 | 211 | ||
213 | void pci_disable_msix(struct pci_dev *dev) | 212 | void pci_disable_msix(struct pci_dev *dev) |
214 | 213 | ||
215 | This API should be used to undo the effect of pci_enable_msix(). It frees | 214 | This function should be used to undo the effect of pci_enable_msix(). It frees |
216 | the previously allocated message signaled interrupts. The interrupts may | 215 | the previously allocated message signaled interrupts. The interrupts may |
217 | subsequently be assigned to another device, so drivers should not cache | 216 | subsequently be assigned to another device, so drivers should not cache |
218 | the value of the 'vector' elements over a call to pci_disable_msix(). | 217 | the value of the 'vector' elements over a call to pci_disable_msix(). |
219 | 218 | ||
220 | A device driver must always call free_irq() on the interrupt(s) | 219 | Before calling this function, a device driver must always call free_irq() |
221 | for which it has called request_irq() before calling this function. | 220 | on any interrupt for which it previously called request_irq(). |
222 | Failure to do so will result in a BUG_ON(), the device will be left with | 221 | Failure to do so results in a BUG_ON(), leaving the device with |
223 | MSI enabled and will leak its vector. | 222 | MSI-X enabled and thus leaking its vector. |
224 | 223 | ||
225 | 4.3.3 The MSI-X Table | 224 | 4.3.3 The MSI-X Table |
226 | 225 | ||
@@ -232,10 +231,10 @@ mask or unmask an interrupt, it should call disable_irq() / enable_irq(). | |||
232 | 4.4 Handling devices implementing both MSI and MSI-X capabilities | 231 | 4.4 Handling devices implementing both MSI and MSI-X capabilities |
233 | 232 | ||
234 | If a device implements both MSI and MSI-X capabilities, it can | 233 | If a device implements both MSI and MSI-X capabilities, it can |
235 | run in either MSI mode or MSI-X mode but not both simultaneously. | 234 | run in either MSI mode or MSI-X mode, but not both simultaneously. |
236 | This is a requirement of the PCI spec, and it is enforced by the | 235 | This is a requirement of the PCI spec, and it is enforced by the |
237 | PCI layer. Calling pci_enable_msi() when MSI-X is already enabled or | 236 | PCI layer. Calling pci_enable_msi() when MSI-X is already enabled or |
238 | pci_enable_msix() when MSI is already enabled will result in an error. | 237 | pci_enable_msix() when MSI is already enabled results in an error. |
239 | If a device driver wishes to switch between MSI and MSI-X at runtime, | 238 | If a device driver wishes to switch between MSI and MSI-X at runtime, |
240 | it must first quiesce the device, then switch it back to pin-interrupt | 239 | it must first quiesce the device, then switch it back to pin-interrupt |
241 | mode, before calling pci_enable_msi() or pci_enable_msix() and resuming | 240 | mode, before calling pci_enable_msi() or pci_enable_msix() and resuming |
@@ -251,7 +250,7 @@ the MSI-X facilities in preference to the MSI facilities. As mentioned | |||
251 | above, MSI-X supports any number of interrupts between 1 and 2048. | 250 | above, MSI-X supports any number of interrupts between 1 and 2048. |
252 | In constrast, MSI is restricted to a maximum of 32 interrupts (and | 251 | In constrast, MSI is restricted to a maximum of 32 interrupts (and |
253 | must be a power of two). In addition, the MSI interrupt vectors must | 252 | must be a power of two). In addition, the MSI interrupt vectors must |
254 | be allocated consecutively, so the system may not be able to allocate | 253 | be allocated consecutively, so the system might not be able to allocate |
255 | as many vectors for MSI as it could for MSI-X. On some platforms, MSI | 254 | as many vectors for MSI as it could for MSI-X. On some platforms, MSI |
256 | interrupts must all be targeted at the same set of CPUs whereas MSI-X | 255 | interrupts must all be targeted at the same set of CPUs whereas MSI-X |
257 | interrupts can all be targeted at different CPUs. | 256 | interrupts can all be targeted at different CPUs. |
@@ -281,7 +280,7 @@ disabled to enabled and back again. | |||
281 | 280 | ||
282 | Using 'lspci -v' (as root) may show some devices with "MSI", "Message | 281 | Using 'lspci -v' (as root) may show some devices with "MSI", "Message |
283 | Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities | 282 | Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities |
284 | has an 'Enable' flag which will be followed with either "+" (enabled) | 283 | has an 'Enable' flag which is followed with either "+" (enabled) |
285 | or "-" (disabled). | 284 | or "-" (disabled). |
286 | 285 | ||
287 | 286 | ||
@@ -298,7 +297,7 @@ The PCI stack provides three ways to disable MSIs: | |||
298 | 297 | ||
299 | Some host chipsets simply don't support MSIs properly. If we're | 298 | Some host chipsets simply don't support MSIs properly. If we're |
300 | lucky, the manufacturer knows this and has indicated it in the ACPI | 299 | lucky, the manufacturer knows this and has indicated it in the ACPI |
301 | FADT table. In this case, Linux will automatically disable MSIs. | 300 | FADT table. In this case, Linux automatically disables MSIs. |
302 | Some boards don't include this information in the table and so we have | 301 | Some boards don't include this information in the table and so we have |
303 | to detect them ourselves. The complete list of these is found near the | 302 | to detect them ourselves. The complete list of these is found near the |
304 | quirk_disable_all_msi() function in drivers/pci/quirks.c. | 303 | quirk_disable_all_msi() function in drivers/pci/quirks.c. |
@@ -317,7 +316,7 @@ Some bridges allow you to enable MSIs by changing some bits in their | |||
317 | PCI configuration space (especially the Hypertransport chipsets such | 316 | PCI configuration space (especially the Hypertransport chipsets such |
318 | as the nVidia nForce and Serverworks HT2000). As with host chipsets, | 317 | as the nVidia nForce and Serverworks HT2000). As with host chipsets, |
319 | Linux mostly knows about them and automatically enables MSIs if it can. | 318 | Linux mostly knows about them and automatically enables MSIs if it can. |
320 | If you have a bridge which Linux doesn't yet know about, you can enable | 319 | If you have a bridge unknown to Linux, you can enable |
321 | MSIs in configuration space using whatever method you know works, then | 320 | MSIs in configuration space using whatever method you know works, then |
322 | enable MSIs on that bridge by doing: | 321 | enable MSIs on that bridge by doing: |
323 | 322 | ||
@@ -327,7 +326,7 @@ where $bridge is the PCI address of the bridge you've enabled (eg | |||
327 | 0000:00:0e.0). | 326 | 0000:00:0e.0). |
328 | 327 | ||
329 | To disable MSIs, echo 0 instead of 1. Changing this value should be | 328 | To disable MSIs, echo 0 instead of 1. Changing this value should be |
330 | done with caution as it can break interrupt handling for all devices | 329 | done with caution as it could break interrupt handling for all devices |
331 | below this bridge. | 330 | below this bridge. |
332 | 331 | ||
333 | Again, please notify linux-pci@vger.kernel.org of any bridges that need | 332 | Again, please notify linux-pci@vger.kernel.org of any bridges that need |
@@ -336,7 +335,7 @@ special handling. | |||
336 | 5.3. Disabling MSIs on a single device | 335 | 5.3. Disabling MSIs on a single device |
337 | 336 | ||
338 | Some devices are known to have faulty MSI implementations. Usually this | 337 | Some devices are known to have faulty MSI implementations. Usually this |
339 | is handled in the individual device driver but occasionally it's necessary | 338 | is handled in the individual device driver, but occasionally it's necessary |
340 | to handle this with a quirk. Some drivers have an option to disable use | 339 | to handle this with a quirk. Some drivers have an option to disable use |
341 | of MSI. While this is a convenient workaround for the driver author, | 340 | of MSI. While this is a convenient workaround for the driver author, |
342 | it is not good practise, and should not be emulated. | 341 | it is not good practise, and should not be emulated. |
@@ -350,7 +349,7 @@ for your machine. You should also check your .config to be sure you | |||
350 | have enabled CONFIG_PCI_MSI. | 349 | have enabled CONFIG_PCI_MSI. |
351 | 350 | ||
352 | Then, 'lspci -t' gives the list of bridges above a device. Reading | 351 | Then, 'lspci -t' gives the list of bridges above a device. Reading |
353 | /sys/bus/pci/devices/*/msi_bus will tell you whether MSI are enabled (1) | 352 | /sys/bus/pci/devices/*/msi_bus will tell you whether MSIs are enabled (1) |
354 | or disabled (0). If 0 is found in any of the msi_bus files belonging | 353 | or disabled (0). If 0 is found in any of the msi_bus files belonging |
355 | to bridges between the PCI root and the device, MSIs are disabled. | 354 | to bridges between the PCI root and the device, MSIs are disabled. |
356 | 355 | ||
diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt index e578feed6d8..6d670f57045 100644 --- a/Documentation/block/cfq-iosched.txt +++ b/Documentation/block/cfq-iosched.txt | |||
@@ -43,3 +43,74 @@ If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches | |||
43 | to IOPS mode and starts providing fairness in terms of number of requests | 43 | to IOPS mode and starts providing fairness in terms of number of requests |
44 | dispatched. Note that this mode switching takes effect only for group | 44 | dispatched. Note that this mode switching takes effect only for group |
45 | scheduling. For non-cgroup users nothing should change. | 45 | scheduling. For non-cgroup users nothing should change. |
46 | |||
47 | CFQ IO scheduler Idling Theory | ||
48 | =============================== | ||
49 | Idling on a queue is primarily about waiting for the next request to come | ||
50 | on same queue after completion of a request. In this process CFQ will not | ||
51 | dispatch requests from other cfq queues even if requests are pending there. | ||
52 | |||
53 | The rationale behind idling is that it can cut down on number of seeks | ||
54 | on rotational media. For example, if a process is doing dependent | ||
55 | sequential reads (next read will come on only after completion of previous | ||
56 | one), then not dispatching request from other queue should help as we | ||
57 | did not move the disk head and kept on dispatching sequential IO from | ||
58 | one queue. | ||
59 | |||
60 | CFQ has following service trees and various queues are put on these trees. | ||
61 | |||
62 | sync-idle sync-noidle async | ||
63 | |||
64 | All cfq queues doing synchronous sequential IO go on to sync-idle tree. | ||
65 | On this tree we idle on each queue individually. | ||
66 | |||
67 | All synchronous non-sequential queues go on sync-noidle tree. Also any | ||
68 | request which are marked with REQ_NOIDLE go on this service tree. On this | ||
69 | tree we do not idle on individual queues instead idle on the whole group | ||
70 | of queues or the tree. So if there are 4 queues waiting for IO to dispatch | ||
71 | we will idle only once last queue has dispatched the IO and there is | ||
72 | no more IO on this service tree. | ||
73 | |||
74 | All async writes go on async service tree. There is no idling on async | ||
75 | queues. | ||
76 | |||
77 | CFQ has some optimizations for SSDs and if it detects a non-rotational | ||
78 | media which can support higher queue depth (multiple requests at in | ||
79 | flight at a time), then it cuts down on idling of individual queues and | ||
80 | all the queues move to sync-noidle tree and only tree idle remains. This | ||
81 | tree idling provides isolation with buffered write queues on async tree. | ||
82 | |||
83 | FAQ | ||
84 | === | ||
85 | Q1. Why to idle at all on queues marked with REQ_NOIDLE. | ||
86 | |||
87 | A1. We only do tree idle (all queues on sync-noidle tree) on queues marked | ||
88 | with REQ_NOIDLE. This helps in providing isolation with all the sync-idle | ||
89 | queues. Otherwise in presence of many sequential readers, other | ||
90 | synchronous IO might not get fair share of disk. | ||
91 | |||
92 | For example, if there are 10 sequential readers doing IO and they get | ||
93 | 100ms each. If a REQ_NOIDLE request comes in, it will be scheduled | ||
94 | roughly after 1 second. If after completion of REQ_NOIDLE request we | ||
95 | do not idle, and after a couple of milli seconds a another REQ_NOIDLE | ||
96 | request comes in, again it will be scheduled after 1second. Repeat it | ||
97 | and notice how a workload can lose its disk share and suffer due to | ||
98 | multiple sequential readers. | ||
99 | |||
100 | fsync can generate dependent IO where bunch of data is written in the | ||
101 | context of fsync, and later some journaling data is written. Journaling | ||
102 | data comes in only after fsync has finished its IO (atleast for ext4 | ||
103 | that seemed to be the case). Now if one decides not to idle on fsync | ||
104 | thread due to REQ_NOIDLE, then next journaling write will not get | ||
105 | scheduled for another second. A process doing small fsync, will suffer | ||
106 | badly in presence of multiple sequential readers. | ||
107 | |||
108 | Hence doing tree idling on threads using REQ_NOIDLE flag on requests | ||
109 | provides isolation from multiple sequential readers and at the same | ||
110 | time we do not idle on individual threads. | ||
111 | |||
112 | Q2. When to specify REQ_NOIDLE | ||
113 | A2. I would think whenever one is doing synchronous write and not expecting | ||
114 | more writes to be dispatched from same context soon, should be able | ||
115 | to specify REQ_NOIDLE on writes and that probably should work well for | ||
116 | most of the cases. | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6ca1f5cb71e..614d0382e2c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1350,9 +1350,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1350 | it is equivalent to "nosmp", which also disables | 1350 | it is equivalent to "nosmp", which also disables |
1351 | the IO APIC. | 1351 | the IO APIC. |
1352 | 1352 | ||
1353 | max_loop= [LOOP] Maximum number of loopback devices that can | 1353 | max_loop= [LOOP] The number of loop block devices that get |
1354 | be mounted | 1354 | (loop.max_loop) unconditionally pre-created at init time. The default |
1355 | Format: <1-256> | 1355 | number is configured by BLK_DEV_LOOP_MIN_COUNT. Instead |
1356 | of statically allocating a predefined number, loop | ||
1357 | devices can be requested on-demand with the | ||
1358 | /dev/loop-control interface. | ||
1356 | 1359 | ||
1357 | mcatest= [IA-64] | 1360 | mcatest= [IA-64] |
1358 | 1361 | ||
diff --git a/Documentation/virtual/00-INDEX b/Documentation/virtual/00-INDEX index fe0251c4cfb..8e601991d91 100644 --- a/Documentation/virtual/00-INDEX +++ b/Documentation/virtual/00-INDEX | |||
@@ -8,3 +8,6 @@ lguest/ | |||
8 | - Extremely simple hypervisor for experimental/educational use. | 8 | - Extremely simple hypervisor for experimental/educational use. |
9 | uml/ | 9 | uml/ |
10 | - User Mode Linux, builds/runs Linux kernel as a userspace program. | 10 | - User Mode Linux, builds/runs Linux kernel as a userspace program. |
11 | virtio.txt | ||
12 | - Text version of draft virtio spec. | ||
13 | See http://ozlabs.org/~rusty/virtio-spec | ||
diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c index 043bd7df313..d928c134dee 100644 --- a/Documentation/virtual/lguest/lguest.c +++ b/Documentation/virtual/lguest/lguest.c | |||
@@ -1996,6 +1996,9 @@ int main(int argc, char *argv[]) | |||
1996 | /* We use a simple helper to copy the arguments separated by spaces. */ | 1996 | /* We use a simple helper to copy the arguments separated by spaces. */ |
1997 | concat((char *)(boot + 1), argv+optind+2); | 1997 | concat((char *)(boot + 1), argv+optind+2); |
1998 | 1998 | ||
1999 | /* Set kernel alignment to 16M (CONFIG_PHYSICAL_ALIGN) */ | ||
2000 | boot->hdr.kernel_alignment = 0x1000000; | ||
2001 | |||
1999 | /* Boot protocol version: 2.07 supports the fields for lguest. */ | 2002 | /* Boot protocol version: 2.07 supports the fields for lguest. */ |
2000 | boot->hdr.version = 0x207; | 2003 | boot->hdr.version = 0x207; |
2001 | 2004 | ||
diff --git a/Documentation/virtual/virtio-spec.txt b/Documentation/virtual/virtio-spec.txt new file mode 100644 index 00000000000..a350ae135b8 --- /dev/null +++ b/Documentation/virtual/virtio-spec.txt | |||
@@ -0,0 +1,2200 @@ | |||
1 | [Generated file: see http://ozlabs.org/~rusty/virtio-spec/] | ||
2 | Virtio PCI Card Specification | ||
3 | v0.9.1 DRAFT | ||
4 | - | ||
5 | |||
6 | Rusty Russell <rusty@rustcorp.com.au>IBM Corporation (Editor) | ||
7 | |||
8 | 2011 August 1. | ||
9 | |||
10 | Purpose and Description | ||
11 | |||
12 | This document describes the specifications of the “virtio” family | ||
13 | of PCI[LaTeX Command: nomenclature] devices. These are devices | ||
14 | are found in virtual environments[LaTeX Command: nomenclature], | ||
15 | yet by design they are not all that different from physical PCI | ||
16 | devices, and this document treats them as such. This allows the | ||
17 | guest to use standard PCI drivers and discovery mechanisms. | ||
18 | |||
19 | The purpose of virtio and this specification is that virtual | ||
20 | environments and guests should have a straightforward, efficient, | ||
21 | standard and extensible mechanism for virtual devices, rather | ||
22 | than boutique per-environment or per-OS mechanisms. | ||
23 | |||
24 | Straightforward: Virtio PCI devices use normal PCI mechanisms | ||
25 | of interrupts and DMA which should be familiar to any device | ||
26 | driver author. There is no exotic page-flipping or COW | ||
27 | mechanism: it's just a PCI device.[footnote: | ||
28 | This lack of page-sharing implies that the implementation of the | ||
29 | device (e.g. the hypervisor or host) needs full access to the | ||
30 | guest memory. Communication with untrusted parties (i.e. | ||
31 | inter-guest communication) requires copying. | ||
32 | ] | ||
33 | |||
34 | Efficient: Virtio PCI devices consist of rings of descriptors | ||
35 | for input and output, which are neatly separated to avoid cache | ||
36 | effects from both guest and device writing to the same cache | ||
37 | lines. | ||
38 | |||
39 | Standard: Virtio PCI makes no assumptions about the environment | ||
40 | in which it operates, beyond supporting PCI. In fact the virtio | ||
41 | devices specified in the appendices do not require PCI at all: | ||
42 | they have been implemented on non-PCI buses.[footnote: | ||
43 | The Linux implementation further separates the PCI virtio code | ||
44 | from the specific virtio drivers: these drivers are shared with | ||
45 | the non-PCI implementations (currently lguest and S/390). | ||
46 | ] | ||
47 | |||
48 | Extensible: Virtio PCI devices contain feature bits which are | ||
49 | acknowledged by the guest operating system during device setup. | ||
50 | This allows forwards and backwards compatibility: the device | ||
51 | offers all the features it knows about, and the driver | ||
52 | acknowledges those it understands and wishes to use. | ||
53 | |||
54 | Virtqueues | ||
55 | |||
56 | The mechanism for bulk data transport on virtio PCI devices is | ||
57 | pretentiously called a virtqueue. Each device can have zero or | ||
58 | more virtqueues: for example, the network device has one for | ||
59 | transmit and one for receive. | ||
60 | |||
61 | Each virtqueue occupies two or more physically-contiguous pages | ||
62 | (defined, for the purposes of this specification, as 4096 bytes), | ||
63 | and consists of three parts: | ||
64 | |||
65 | |||
66 | +-------------------+-----------------------------------+-----------+ | ||
67 | | Descriptor Table | Available Ring (padding) | Used Ring | | ||
68 | +-------------------+-----------------------------------+-----------+ | ||
69 | |||
70 | |||
71 | When the driver wants to send buffers to the device, it puts them | ||
72 | in one or more slots in the descriptor table, and writes the | ||
73 | descriptor indices into the available ring. It then notifies the | ||
74 | device. When the device has finished with the buffers, it writes | ||
75 | the descriptors into the used ring, and sends an interrupt. | ||
76 | |||
77 | Specification | ||
78 | |||
79 | PCI Discovery | ||
80 | |||
81 | Any PCI device with Vendor ID 0x1AF4, and Device ID 0x1000 | ||
82 | through 0x103F inclusive is a virtio device[footnote: | ||
83 | The actual value within this range is ignored | ||
84 | ]. The device must also have a Revision ID of 0 to match this | ||
85 | specification. | ||
86 | |||
87 | The Subsystem Device ID indicates which virtio device is | ||
88 | supported by the device. The Subsystem Vendor ID should reflect | ||
89 | the PCI Vendor ID of the environment (it's currently only used | ||
90 | for informational purposes by the guest). | ||
91 | |||
92 | |||
93 | +----------------------+--------------------+---------------+ | ||
94 | | Subsystem Device ID | Virtio Device | Specification | | ||
95 | +----------------------+--------------------+---------------+ | ||
96 | +----------------------+--------------------+---------------+ | ||
97 | | 1 | network card | Appendix C | | ||
98 | +----------------------+--------------------+---------------+ | ||
99 | | 2 | block device | Appendix D | | ||
100 | +----------------------+--------------------+---------------+ | ||
101 | | 3 | console | Appendix E | | ||
102 | +----------------------+--------------------+---------------+ | ||
103 | | 4 | entropy source | Appendix F | | ||
104 | +----------------------+--------------------+---------------+ | ||
105 | | 5 | memory ballooning | Appendix G | | ||
106 | +----------------------+--------------------+---------------+ | ||
107 | | 6 | ioMemory | - | | ||
108 | +----------------------+--------------------+---------------+ | ||
109 | | 9 | 9P transport | - | | ||
110 | +----------------------+--------------------+---------------+ | ||
111 | |||
112 | |||
113 | Device Configuration | ||
114 | |||
115 | To configure the device, we use the first I/O region of the PCI | ||
116 | device. This contains a virtio header followed by a | ||
117 | device-specific region. | ||
118 | |||
119 | There may be different widths of accesses to the I/O region; the “ | ||
120 | natural” access method for each field in the virtio header must | ||
121 | be used (i.e. 32-bit accesses for 32-bit fields, etc), but the | ||
122 | device-specific region can be accessed using any width accesses, | ||
123 | and should obtain the same results. | ||
124 | |||
125 | Note that this is possible because while the virtio header is PCI | ||
126 | (i.e. little) endian, the device-specific region is encoded in | ||
127 | the native endian of the guest (where such distinction is | ||
128 | applicable). | ||
129 | |||
130 | Device Initialization Sequence | ||
131 | |||
132 | We start with an overview of device initialization, then expand | ||
133 | on the details of the device and how each step is preformed. | ||
134 | |||
135 | Reset the device. This is not required on initial start up. | ||
136 | |||
137 | The ACKNOWLEDGE status bit is set: we have noticed the device. | ||
138 | |||
139 | The DRIVER status bit is set: we know how to drive the device. | ||
140 | |||
141 | Device-specific setup, including reading the Device Feature | ||
142 | Bits, discovery of virtqueues for the device, optional MSI-X | ||
143 | setup, and reading and possibly writing the virtio | ||
144 | configuration space. | ||
145 | |||
146 | The subset of Device Feature Bits understood by the driver is | ||
147 | written to the device. | ||
148 | |||
149 | The DRIVER_OK status bit is set. | ||
150 | |||
151 | The device can now be used (ie. buffers added to the | ||
152 | virtqueues)[footnote: | ||
153 | Historically, drivers have used the device before steps 5 and 6. | ||
154 | This is only allowed if the driver does not use any features | ||
155 | which would alter this early use of the device. | ||
156 | ] | ||
157 | |||
158 | If any of these steps go irrecoverably wrong, the guest should | ||
159 | set the FAILED status bit to indicate that it has given up on the | ||
160 | device (it can reset the device later to restart if desired). | ||
161 | |||
162 | We now cover the fields required for general setup in detail. | ||
163 | |||
164 | Virtio Header | ||
165 | |||
166 | The virtio header looks as follows: | ||
167 | |||
168 | |||
169 | +------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+ | ||
170 | | Bits || 32 | 32 | 32 | 16 | 16 | 16 | 8 | 8 | | ||
171 | +------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+ | ||
172 | | Read/Write || R | R+W | R+W | R | R+W | R+W | R+W | R | | ||
173 | +------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+ | ||
174 | | Purpose || Device | Guest | Queue | Queue | Queue | Queue | Device | ISR | | ||
175 | | || Features bits 0:31 | Features bits 0:31 | Address | Size | Select | Notify | Status | Status | | ||
176 | +------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+ | ||
177 | |||
178 | |||
179 | If MSI-X is enabled for the device, two additional fields | ||
180 | immediately follow this header: | ||
181 | |||
182 | |||
183 | +------------++----------------+--------+ | ||
184 | | Bits || 16 | 16 | | ||
185 | +----------------+--------+ | ||
186 | +------------++----------------+--------+ | ||
187 | | Read/Write || R+W | R+W | | ||
188 | +------------++----------------+--------+ | ||
189 | | Purpose || Configuration | Queue | | ||
190 | | (MSI-X) || Vector | Vector | | ||
191 | +------------++----------------+--------+ | ||
192 | |||
193 | |||
194 | Finally, if feature bits (VIRTIO_F_FEATURES_HI) this is | ||
195 | immediately followed by two additional fields: | ||
196 | |||
197 | |||
198 | +------------++----------------------+---------------------- | ||
199 | | Bits || 32 | 32 | ||
200 | +------------++----------------------+---------------------- | ||
201 | | Read/Write || R | R+W | ||
202 | +------------++----------------------+---------------------- | ||
203 | | Purpose || Device | Guest | ||
204 | | || Features bits 32:63 | Features bits 32:63 | ||
205 | +------------++----------------------+---------------------- | ||
206 | |||
207 | |||
208 | Immediately following these general headers, there may be | ||
209 | device-specific headers: | ||
210 | |||
211 | |||
212 | +------------++--------------------+ | ||
213 | | Bits || Device Specific | | ||
214 | +--------------------+ | ||
215 | +------------++--------------------+ | ||
216 | | Read/Write || Device Specific | | ||
217 | +------------++--------------------+ | ||
218 | | Purpose || Device Specific... | | ||
219 | | || | | ||
220 | +------------++--------------------+ | ||
221 | |||
222 | |||
223 | Device Status | ||
224 | |||
225 | The Device Status field is updated by the guest to indicate its | ||
226 | progress. This provides a simple low-level diagnostic: it's most | ||
227 | useful to imagine them hooked up to traffic lights on the console | ||
228 | indicating the status of each device. | ||
229 | |||
230 | The device can be reset by writing a 0 to this field, otherwise | ||
231 | at least one bit should be set: | ||
232 | |||
233 | ACKNOWLEDGE (1) Indicates that the guest OS has found the | ||
234 | device and recognized it as a valid virtio device. | ||
235 | |||
236 | DRIVER (2) Indicates that the guest OS knows how to drive the | ||
237 | device. Under Linux, drivers can be loadable modules so there | ||
238 | may be a significant (or infinite) delay before setting this | ||
239 | bit. | ||
240 | |||
241 | DRIVER_OK (3) Indicates that the driver is set up and ready to | ||
242 | drive the device. | ||
243 | |||
244 | FAILED (8) Indicates that something went wrong in the guest, | ||
245 | and it has given up on the device. This could be an internal | ||
246 | error, or the driver didn't like the device for some reason, or | ||
247 | even a fatal error during device operation. The device must be | ||
248 | reset before attempting to re-initialize. | ||
249 | |||
250 | Feature Bits | ||
251 | |||
252 | The least significant 31 bits of the first configuration field | ||
253 | indicates the features that the device supports (the high bit is | ||
254 | reserved, and will be used to indicate the presence of future | ||
255 | feature bits elsewhere). If more than 31 feature bits are | ||
256 | supported, the device indicates so by setting feature bit 31 (see | ||
257 | [cha:Reserved-Feature-Bits]). The bits are allocated as follows: | ||
258 | |||
259 | 0 to 23 Feature bits for the specific device type | ||
260 | |||
261 | 24 to 40 Feature bits reserved for extensions to the queue and | ||
262 | feature negotiation mechanisms | ||
263 | |||
264 | 41 to 63 Feature bits reserved for future extensions | ||
265 | |||
266 | For example, feature bit 0 for a network device (i.e. Subsystem | ||
267 | Device ID 1) indicates that the device supports checksumming of | ||
268 | packets. | ||
269 | |||
270 | The feature bits are negotiated: the device lists all the | ||
271 | features it understands in the Device Features field, and the | ||
272 | guest writes the subset that it understands into the Guest | ||
273 | Features field. The only way to renegotiate is to reset the | ||
274 | device. | ||
275 | |||
276 | In particular, new fields in the device configuration header are | ||
277 | indicated by offering a feature bit, so the guest can check | ||
278 | before accessing that part of the configuration space. | ||
279 | |||
280 | This allows for forwards and backwards compatibility: if the | ||
281 | device is enhanced with a new feature bit, older guests will not | ||
282 | write that feature bit back to the Guest Features field and it | ||
283 | can go into backwards compatibility mode. Similarly, if a guest | ||
284 | is enhanced with a feature that the device doesn't support, it | ||
285 | will not see that feature bit in the Device Features field and | ||
286 | can go into backwards compatibility mode (or, for poor | ||
287 | implementations, set the FAILED Device Status bit). | ||
288 | |||
289 | Access to feature bits 32 to 63 is enabled by Guest by setting | ||
290 | feature bit 31. If this bit is unset, Device must assume that all | ||
291 | feature bits > 31 are unset. | ||
292 | |||
293 | Configuration/Queue Vectors | ||
294 | |||
295 | When MSI-X capability is present and enabled in the device | ||
296 | (through standard PCI configuration space) 4 bytes at byte offset | ||
297 | 20 are used to map configuration change and queue interrupts to | ||
298 | MSI-X vectors. In this case, the ISR Status field is unused, and | ||
299 | device specific configuration starts at byte offset 24 in virtio | ||
300 | header structure. When MSI-X capability is not enabled, device | ||
301 | specific configuration starts at byte offset 20 in virtio header. | ||
302 | |||
303 | Writing a valid MSI-X Table entry number, 0 to 0x7FF, to one of | ||
304 | Configuration/Queue Vector registers, maps interrupts triggered | ||
305 | by the configuration change/selected queue events respectively to | ||
306 | the corresponding MSI-X vector. To disable interrupts for a | ||
307 | specific event type, unmap it by writing a special NO_VECTOR | ||
308 | value: | ||
309 | |||
310 | /* Vector value used to disable MSI for queue */ | ||
311 | |||
312 | #define VIRTIO_MSI_NO_VECTOR 0xffff | ||
313 | |||
314 | Reading these registers returns vector mapped to a given event, | ||
315 | or NO_VECTOR if unmapped. All queue and configuration change | ||
316 | events are unmapped by default. | ||
317 | |||
318 | Note that mapping an event to vector might require allocating | ||
319 | internal device resources, and might fail. Devices report such | ||
320 | failures by returning the NO_VECTOR value when the relevant | ||
321 | Vector field is read. After mapping an event to vector, the | ||
322 | driver must verify success by reading the Vector field value: on | ||
323 | success, the previously written value is returned, and on | ||
324 | failure, NO_VECTOR is returned. If a mapping failure is detected, | ||
325 | the driver can retry mapping with fewervectors, or disable MSI-X. | ||
326 | |||
327 | Virtqueue Configuration | ||
328 | |||
329 | As a device can have zero or more virtqueues for bulk data | ||
330 | transport (for example, the network driver has two), the driver | ||
331 | needs to configure them as part of the device-specific | ||
332 | configuration. | ||
333 | |||
334 | This is done as follows, for each virtqueue a device has: | ||
335 | |||
336 | Write the virtqueue index (first queue is 0) to the Queue | ||
337 | Select field. | ||
338 | |||
339 | Read the virtqueue size from the Queue Size field, which is | ||
340 | always a power of 2. This controls how big the virtqueue is | ||
341 | (see below). If this field is 0, the virtqueue does not exist. | ||
342 | |||
343 | Allocate and zero virtqueue in contiguous physical memory, on a | ||
344 | 4096 byte alignment. Write the physical address, divided by | ||
345 | 4096 to the Queue Address field.[footnote: | ||
346 | The 4096 is based on the x86 page size, but it's also large | ||
347 | enough to ensure that the separate parts of the virtqueue are on | ||
348 | separate cache lines. | ||
349 | ] | ||
350 | |||
351 | Optionally, if MSI-X capability is present and enabled on the | ||
352 | device, select a vector to use to request interrupts triggered | ||
353 | by virtqueue events. Write the MSI-X Table entry number | ||
354 | corresponding to this vector in Queue Vector field. Read the | ||
355 | Queue Vector field: on success, previously written value is | ||
356 | returned; on failure, NO_VECTOR value is returned. | ||
357 | |||
358 | The Queue Size field controls the total number of bytes required | ||
359 | for the virtqueue according to the following formula: | ||
360 | |||
361 | #define ALIGN(x) (((x) + 4095) & ~4095) | ||
362 | |||
363 | static inline unsigned vring_size(unsigned int qsz) | ||
364 | |||
365 | { | ||
366 | |||
367 | return ALIGN(sizeof(struct vring_desc)*qsz + sizeof(u16)*(2 | ||
368 | + qsz)) | ||
369 | |||
370 | + ALIGN(sizeof(struct vring_used_elem)*qsz); | ||
371 | |||
372 | } | ||
373 | |||
374 | This currently wastes some space with padding, but also allows | ||
375 | future extensions. The virtqueue layout structure looks like this | ||
376 | (qsz is the Queue Size field, which is a variable, so this code | ||
377 | won't compile): | ||
378 | |||
379 | struct vring { | ||
380 | |||
381 | /* The actual descriptors (16 bytes each) */ | ||
382 | |||
383 | struct vring_desc desc[qsz]; | ||
384 | |||
385 | |||
386 | |||
387 | /* A ring of available descriptor heads with free-running | ||
388 | index. */ | ||
389 | |||
390 | struct vring_avail avail; | ||
391 | |||
392 | |||
393 | |||
394 | // Padding to the next 4096 boundary. | ||
395 | |||
396 | char pad[]; | ||
397 | |||
398 | |||
399 | |||
400 | // A ring of used descriptor heads with free-running index. | ||
401 | |||
402 | struct vring_used used; | ||
403 | |||
404 | }; | ||
405 | |||
406 | A Note on Virtqueue Endianness | ||
407 | |||
408 | Note that the endian of these fields and everything else in the | ||
409 | virtqueue is the native endian of the guest, not little-endian as | ||
410 | PCI normally is. This makes for simpler guest code, and it is | ||
411 | assumed that the host already has to be deeply aware of the guest | ||
412 | endian so such an “endian-aware” device is not a significant | ||
413 | issue. | ||
414 | |||
415 | Descriptor Table | ||
416 | |||
417 | The descriptor table refers to the buffers the guest is using for | ||
418 | the device. The addresses are physical addresses, and the buffers | ||
419 | can be chained via the next field. Each descriptor describes a | ||
420 | buffer which is read-only or write-only, but a chain of | ||
421 | descriptors can contain both read-only and write-only buffers. | ||
422 | |||
423 | No descriptor chain may be more than 2^32 bytes long in total.struct vring_desc { | ||
424 | |||
425 | /* Address (guest-physical). */ | ||
426 | |||
427 | u64 addr; | ||
428 | |||
429 | /* Length. */ | ||
430 | |||
431 | u32 len; | ||
432 | |||
433 | /* This marks a buffer as continuing via the next field. */ | ||
434 | |||
435 | #define VRING_DESC_F_NEXT 1 | ||
436 | |||
437 | /* This marks a buffer as write-only (otherwise read-only). */ | ||
438 | |||
439 | #define VRING_DESC_F_WRITE 2 | ||
440 | |||
441 | /* This means the buffer contains a list of buffer descriptors. | ||
442 | */ | ||
443 | |||
444 | #define VRING_DESC_F_INDIRECT 4 | ||
445 | |||
446 | /* The flags as indicated above. */ | ||
447 | |||
448 | u16 flags; | ||
449 | |||
450 | /* Next field if flags & NEXT */ | ||
451 | |||
452 | u16 next; | ||
453 | |||
454 | }; | ||
455 | |||
456 | The number of descriptors in the table is specified by the Queue | ||
457 | Size field for this virtqueue. | ||
458 | |||
459 | <sub:Indirect-Descriptors>Indirect Descriptors | ||
460 | |||
461 | Some devices benefit by concurrently dispatching a large number | ||
462 | of large requests. The VIRTIO_RING_F_INDIRECT_DESC feature can be | ||
463 | used to allow this (see [cha:Reserved-Feature-Bits]). To increase | ||
464 | ring capacity it is possible to store a table of indirect | ||
465 | descriptors anywhere in memory, and insert a descriptor in main | ||
466 | virtqueue (with flags&INDIRECT on) that refers to memory buffer | ||
467 | containing this indirect descriptor table; fields addr and len | ||
468 | refer to the indirect table address and length in bytes, | ||
469 | respectively. The indirect table layout structure looks like this | ||
470 | (len is the length of the descriptor that refers to this table, | ||
471 | which is a variable, so this code won't compile): | ||
472 | |||
473 | struct indirect_descriptor_table { | ||
474 | |||
475 | /* The actual descriptors (16 bytes each) */ | ||
476 | |||
477 | struct vring_desc desc[len / 16]; | ||
478 | |||
479 | }; | ||
480 | |||
481 | The first indirect descriptor is located at start of the indirect | ||
482 | descriptor table (index 0), additional indirect descriptors are | ||
483 | chained by next field. An indirect descriptor without next field | ||
484 | (with flags&NEXT off) signals the end of the indirect descriptor | ||
485 | table, and transfers control back to the main virtqueue. An | ||
486 | indirect descriptor can not refer to another indirect descriptor | ||
487 | table (flags&INDIRECT must be off). A single indirect descriptor | ||
488 | table can include both read-only and write-only descriptors; | ||
489 | write-only flag (flags&WRITE) in the descriptor that refers to it | ||
490 | is ignored. | ||
491 | |||
492 | Available Ring | ||
493 | |||
494 | The available ring refers to what descriptors we are offering the | ||
495 | device: it refers to the head of a descriptor chain. The “flags” | ||
496 | field is currently 0 or 1: 1 indicating that we do not need an | ||
497 | interrupt when the device consumes a descriptor from the | ||
498 | available ring. Alternatively, the guest can ask the device to | ||
499 | delay interrupts until an entry with an index specified by the “ | ||
500 | used_event” field is written in the used ring (equivalently, | ||
501 | until the idx field in the used ring will reach the value | ||
502 | used_event + 1). The method employed by the device is controlled | ||
503 | by the VIRTIO_RING_F_EVENT_IDX feature bit (see [cha:Reserved-Feature-Bits] | ||
504 | ). This interrupt suppression is merely an optimization; it may | ||
505 | not suppress interrupts entirely. | ||
506 | |||
507 | The “idx” field indicates where we would put the next descriptor | ||
508 | entry (modulo the ring size). This starts at 0, and increases. | ||
509 | |||
510 | struct vring_avail { | ||
511 | |||
512 | #define VRING_AVAIL_F_NO_INTERRUPT 1 | ||
513 | |||
514 | u16 flags; | ||
515 | |||
516 | u16 idx; | ||
517 | |||
518 | u16 ring[qsz]; /* qsz is the Queue Size field read from device | ||
519 | */ | ||
520 | |||
521 | u16 used_event; | ||
522 | |||
523 | }; | ||
524 | |||
525 | Used Ring | ||
526 | |||
527 | The used ring is where the device returns buffers once it is done | ||
528 | with them. The flags field can be used by the device to hint that | ||
529 | no notification is necessary when the guest adds to the available | ||
530 | ring. Alternatively, the “avail_event” field can be used by the | ||
531 | device to hint that no notification is necessary until an entry | ||
532 | with an index specified by the “avail_event” is written in the | ||
533 | available ring (equivalently, until the idx field in the | ||
534 | available ring will reach the value avail_event + 1). The method | ||
535 | employed by the device is controlled by the guest through the | ||
536 | VIRTIO_RING_F_EVENT_IDX feature bit (see [cha:Reserved-Feature-Bits] | ||
537 | ). [footnote: | ||
538 | These fields are kept here because this is the only part of the | ||
539 | virtqueue written by the device | ||
540 | ]. | ||
541 | |||
542 | Each entry in the ring is a pair: the head entry of the | ||
543 | descriptor chain describing the buffer (this matches an entry | ||
544 | placed in the available ring by the guest earlier), and the total | ||
545 | of bytes written into the buffer. The latter is extremely useful | ||
546 | for guests using untrusted buffers: if you do not know exactly | ||
547 | how much has been written by the device, you usually have to zero | ||
548 | the buffer to ensure no data leakage occurs. | ||
549 | |||
550 | /* u32 is used here for ids for padding reasons. */ | ||
551 | |||
552 | struct vring_used_elem { | ||
553 | |||
554 | /* Index of start of used descriptor chain. */ | ||
555 | |||
556 | u32 id; | ||
557 | |||
558 | /* Total length of the descriptor chain which was used | ||
559 | (written to) */ | ||
560 | |||
561 | u32 len; | ||
562 | |||
563 | }; | ||
564 | |||
565 | |||
566 | |||
567 | struct vring_used { | ||
568 | |||
569 | #define VRING_USED_F_NO_NOTIFY 1 | ||
570 | |||
571 | u16 flags; | ||
572 | |||
573 | u16 idx; | ||
574 | |||
575 | struct vring_used_elem ring[qsz]; | ||
576 | |||
577 | u16 avail_event; | ||
578 | |||
579 | }; | ||
580 | |||
581 | Helpers for Managing Virtqueues | ||
582 | |||
583 | The Linux Kernel Source code contains the definitions above and | ||
584 | helper routines in a more usable form, in | ||
585 | include/linux/virtio_ring.h. This was explicitly licensed by IBM | ||
586 | and Red Hat under the (3-clause) BSD license so that it can be | ||
587 | freely used by all other projects, and is reproduced (with slight | ||
588 | variation to remove Linux assumptions) in Appendix A. | ||
589 | |||
590 | Device Operation | ||
591 | |||
592 | There are two parts to device operation: supplying new buffers to | ||
593 | the device, and processing used buffers from the device. As an | ||
594 | example, the virtio network device has two virtqueues: the | ||
595 | transmit virtqueue and the receive virtqueue. The driver adds | ||
596 | outgoing (read-only) packets to the transmit virtqueue, and then | ||
597 | frees them after they are used. Similarly, incoming (write-only) | ||
598 | buffers are added to the receive virtqueue, and processed after | ||
599 | they are used. | ||
600 | |||
601 | Supplying Buffers to The Device | ||
602 | |||
603 | Actual transfer of buffers from the guest OS to the device | ||
604 | operates as follows: | ||
605 | |||
606 | Place the buffer(s) into free descriptor(s). | ||
607 | |||
608 | If there are no free descriptors, the guest may choose to | ||
609 | notify the device even if notifications are suppressed (to | ||
610 | reduce latency).[footnote: | ||
611 | The Linux drivers do this only for read-only buffers: for | ||
612 | write-only buffers, it is assumed that the driver is merely | ||
613 | trying to keep the receive buffer ring full, and no notification | ||
614 | of this expected condition is necessary. | ||
615 | ] | ||
616 | |||
617 | Place the id of the buffer in the next ring entry of the | ||
618 | available ring. | ||
619 | |||
620 | The steps (1) and (2) may be performed repeatedly if batching | ||
621 | is possible. | ||
622 | |||
623 | A memory barrier should be executed to ensure the device sees | ||
624 | the updated descriptor table and available ring before the next | ||
625 | step. | ||
626 | |||
627 | The available “idx” field should be increased by the number of | ||
628 | entries added to the available ring. | ||
629 | |||
630 | A memory barrier should be executed to ensure that we update | ||
631 | the idx field before checking for notification suppression. | ||
632 | |||
633 | If notifications are not suppressed, the device should be | ||
634 | notified of the new buffers. | ||
635 | |||
636 | Note that the above code does not take precautions against the | ||
637 | available ring buffer wrapping around: this is not possible since | ||
638 | the ring buffer is the same size as the descriptor table, so step | ||
639 | (1) will prevent such a condition. | ||
640 | |||
641 | In addition, the maximum queue size is 32768 (it must be a power | ||
642 | of 2 which fits in 16 bits), so the 16-bit “idx” value can always | ||
643 | distinguish between a full and empty buffer. | ||
644 | |||
645 | Here is a description of each stage in more detail. | ||
646 | |||
647 | Placing Buffers Into The Descriptor Table | ||
648 | |||
649 | A buffer consists of zero or more read-only physically-contiguous | ||
650 | elements followed by zero or more physically-contiguous | ||
651 | write-only elements (it must have at least one element). This | ||
652 | algorithm maps it into the descriptor table: | ||
653 | |||
654 | for each buffer element, b: | ||
655 | |||
656 | Get the next free descriptor table entry, d | ||
657 | |||
658 | Set d.addr to the physical address of the start of b | ||
659 | |||
660 | Set d.len to the length of b. | ||
661 | |||
662 | If b is write-only, set d.flags to VRING_DESC_F_WRITE, | ||
663 | otherwise 0. | ||
664 | |||
665 | If there is a buffer element after this: | ||
666 | |||
667 | Set d.next to the index of the next free descriptor element. | ||
668 | |||
669 | Set the VRING_DESC_F_NEXT bit in d.flags. | ||
670 | |||
671 | In practice, the d.next fields are usually used to chain free | ||
672 | descriptors, and a separate count kept to check there are enough | ||
673 | free descriptors before beginning the mappings. | ||
674 | |||
675 | Updating The Available Ring | ||
676 | |||
677 | The head of the buffer we mapped is the first d in the algorithm | ||
678 | above. A naive implementation would do the following: | ||
679 | |||
680 | avail->ring[avail->idx % qsz] = head; | ||
681 | |||
682 | However, in general we can add many descriptors before we update | ||
683 | the “idx” field (at which point they become visible to the | ||
684 | device), so we keep a counter of how many we've added: | ||
685 | |||
686 | avail->ring[(avail->idx + added++) % qsz] = head; | ||
687 | |||
688 | Updating The Index Field | ||
689 | |||
690 | Once the idx field of the virtqueue is updated, the device will | ||
691 | be able to access the descriptor entries we've created and the | ||
692 | memory they refer to. This is why a memory barrier is generally | ||
693 | used before the idx update, to ensure it sees the most up-to-date | ||
694 | copy. | ||
695 | |||
696 | The idx field always increments, and we let it wrap naturally at | ||
697 | 65536: | ||
698 | |||
699 | avail->idx += added; | ||
700 | |||
701 | <sub:Notifying-The-Device>Notifying The Device | ||
702 | |||
703 | Device notification occurs by writing the 16-bit virtqueue index | ||
704 | of this virtqueue to the Queue Notify field of the virtio header | ||
705 | in the first I/O region of the PCI device. This can be expensive, | ||
706 | however, so the device can suppress such notifications if it | ||
707 | doesn't need them. We have to be careful to expose the new idx | ||
708 | value before checking the suppression flag: it's OK to notify | ||
709 | gratuitously, but not to omit a required notification. So again, | ||
710 | we use a memory barrier here before reading the flags or the | ||
711 | avail_event field. | ||
712 | |||
713 | If the VIRTIO_F_RING_EVENT_IDX feature is not negotiated, and if | ||
714 | the VRING_USED_F_NOTIFY flag is not set, we go ahead and write to | ||
715 | the PCI configuration space. | ||
716 | |||
717 | If the VIRTIO_F_RING_EVENT_IDX feature is negotiated, we read the | ||
718 | avail_event field in the available ring structure. If the | ||
719 | available index crossed_the avail_event field value since the | ||
720 | last notification, we go ahead and write to the PCI configuration | ||
721 | space. The avail_event field wraps naturally at 65536 as well: | ||
722 | |||
723 | (u16)(new_idx - avail_event - 1) < (u16)(new_idx - old_idx) | ||
724 | |||
725 | <sub:Receiving-Used-Buffers>Receiving Used Buffers From The | ||
726 | Device | ||
727 | |||
728 | Once the device has used a buffer (read from or written to it, or | ||
729 | parts of both, depending on the nature of the virtqueue and the | ||
730 | device), it sends an interrupt, following an algorithm very | ||
731 | similar to the algorithm used for the driver to send the device a | ||
732 | buffer: | ||
733 | |||
734 | Write the head descriptor number to the next field in the used | ||
735 | ring. | ||
736 | |||
737 | Update the used ring idx. | ||
738 | |||
739 | Determine whether an interrupt is necessary: | ||
740 | |||
741 | If the VIRTIO_F_RING_EVENT_IDX feature is not negotiated: check | ||
742 | if f the VRING_AVAIL_F_NO_INTERRUPT flag is not set in avail- | ||
743 | >flags | ||
744 | |||
745 | If the VIRTIO_F_RING_EVENT_IDX feature is negotiated: check | ||
746 | whether the used index crossed the used_event field value | ||
747 | since the last update. The used_event field wraps naturally | ||
748 | at 65536 as well:(u16)(new_idx - used_event - 1) < (u16)(new_idx - old_idx) | ||
749 | |||
750 | If an interrupt is necessary: | ||
751 | |||
752 | If MSI-X capability is disabled: | ||
753 | |||
754 | Set the lower bit of the ISR Status field for the device. | ||
755 | |||
756 | Send the appropriate PCI interrupt for the device. | ||
757 | |||
758 | If MSI-X capability is enabled: | ||
759 | |||
760 | Request the appropriate MSI-X interrupt message for the | ||
761 | device, Queue Vector field sets the MSI-X Table entry | ||
762 | number. | ||
763 | |||
764 | If Queue Vector field value is NO_VECTOR, no interrupt | ||
765 | message is requested for this event. | ||
766 | |||
767 | The guest interrupt handler should: | ||
768 | |||
769 | If MSI-X capability is disabled: read the ISR Status field, | ||
770 | which will reset it to zero. If the lower bit is zero, the | ||
771 | interrupt was not for this device. Otherwise, the guest driver | ||
772 | should look through the used rings of each virtqueue for the | ||
773 | device, to see if any progress has been made by the device | ||
774 | which requires servicing. | ||
775 | |||
776 | If MSI-X capability is enabled: look through the used rings of | ||
777 | each virtqueue mapped to the specific MSI-X vector for the | ||
778 | device, to see if any progress has been made by the device | ||
779 | which requires servicing. | ||
780 | |||
781 | For each ring, guest should then disable interrupts by writing | ||
782 | VRING_AVAIL_F_NO_INTERRUPT flag in avail structure, if required. | ||
783 | It can then process used ring entries finally enabling interrupts | ||
784 | by clearing the VRING_AVAIL_F_NO_INTERRUPT flag or updating the | ||
785 | EVENT_IDX field in the available structure, Guest should then | ||
786 | execute a memory barrier, and then recheck the ring empty | ||
787 | condition. This is necessary to handle the case where, after the | ||
788 | last check and before enabling interrupts, an interrupt has been | ||
789 | suppressed by the device: | ||
790 | |||
791 | vring_disable_interrupts(vq); | ||
792 | |||
793 | for (;;) { | ||
794 | |||
795 | if (vq->last_seen_used != vring->used.idx) { | ||
796 | |||
797 | vring_enable_interrupts(vq); | ||
798 | |||
799 | mb(); | ||
800 | |||
801 | if (vq->last_seen_used != vring->used.idx) | ||
802 | |||
803 | break; | ||
804 | |||
805 | } | ||
806 | |||
807 | struct vring_used_elem *e = | ||
808 | vring.used->ring[vq->last_seen_used%vsz]; | ||
809 | |||
810 | process_buffer(e); | ||
811 | |||
812 | vq->last_seen_used++; | ||
813 | |||
814 | } | ||
815 | |||
816 | Dealing With Configuration Changes | ||
817 | |||
818 | Some virtio PCI devices can change the device configuration | ||
819 | state, as reflected in the virtio header in the PCI configuration | ||
820 | space. In this case: | ||
821 | |||
822 | If MSI-X capability is disabled: an interrupt is delivered and | ||
823 | the second highest bit is set in the ISR Status field to | ||
824 | indicate that the driver should re-examine the configuration | ||
825 | space.Note that a single interrupt can indicate both that one | ||
826 | or more virtqueue has been used and that the configuration | ||
827 | space has changed: even if the config bit is set, virtqueues | ||
828 | must be scanned. | ||
829 | |||
830 | If MSI-X capability is enabled: an interrupt message is | ||
831 | requested. The Configuration Vector field sets the MSI-X Table | ||
832 | entry number to use. If Configuration Vector field value is | ||
833 | NO_VECTOR, no interrupt message is requested for this event. | ||
834 | |||
835 | Creating New Device Types | ||
836 | |||
837 | Various considerations are necessary when creating a new device | ||
838 | type: | ||
839 | |||
840 | How Many Virtqueues? | ||
841 | |||
842 | It is possible that a very simple device will operate entirely | ||
843 | through its configuration space, but most will need at least one | ||
844 | virtqueue in which it will place requests. A device with both | ||
845 | input and output (eg. console and network devices described here) | ||
846 | need two queues: one which the driver fills with buffers to | ||
847 | receive input, and one which the driver places buffers to | ||
848 | transmit output. | ||
849 | |||
850 | What Configuration Space Layout? | ||
851 | |||
852 | Configuration space is generally used for rarely-changing or | ||
853 | initialization-time parameters. But it is a limited resource, so | ||
854 | it might be better to use a virtqueue to update configuration | ||
855 | information (the network device does this for filtering, | ||
856 | otherwise the table in the config space could potentially be very | ||
857 | large). | ||
858 | |||
859 | Note that this space is generally the guest's native endian, | ||
860 | rather than PCI's little-endian. | ||
861 | |||
862 | What Device Number? | ||
863 | |||
864 | Currently device numbers are assigned quite freely: a simple | ||
865 | request mail to the author of this document or the Linux | ||
866 | virtualization mailing list[footnote: | ||
867 | |||
868 | https://lists.linux-foundation.org/mailman/listinfo/virtualization | ||
869 | ] will be sufficient to secure a unique one. | ||
870 | |||
871 | Meanwhile for experimental drivers, use 65535 and work backwards. | ||
872 | |||
873 | How many MSI-X vectors? | ||
874 | |||
875 | Using the optional MSI-X capability devices can speed up | ||
876 | interrupt processing by removing the need to read ISR Status | ||
877 | register by guest driver (which might be an expensive operation), | ||
878 | reducing interrupt sharing between devices and queues within the | ||
879 | device, and handling interrupts from multiple CPUs. However, some | ||
880 | systems impose a limit (which might be as low as 256) on the | ||
881 | total number of MSI-X vectors that can be allocated to all | ||
882 | devices. Devices and/or device drivers should take this into | ||
883 | account, limiting the number of vectors used unless the device is | ||
884 | expected to cause a high volume of interrupts. Devices can | ||
885 | control the number of vectors used by limiting the MSI-X Table | ||
886 | Size or not presenting MSI-X capability in PCI configuration | ||
887 | space. Drivers can control this by mapping events to as small | ||
888 | number of vectors as possible, or disabling MSI-X capability | ||
889 | altogether. | ||
890 | |||
891 | Message Framing | ||
892 | |||
893 | The descriptors used for a buffer should not effect the semantics | ||
894 | of the message, except for the total length of the buffer. For | ||
895 | example, a network buffer consists of a 10 byte header followed | ||
896 | by the network packet. Whether this is presented in the ring | ||
897 | descriptor chain as (say) a 10 byte buffer and a 1514 byte | ||
898 | buffer, or a single 1524 byte buffer, or even three buffers, | ||
899 | should have no effect. | ||
900 | |||
901 | In particular, no implementation should use the descriptor | ||
902 | boundaries to determine the size of any header in a request.[footnote: | ||
903 | The current qemu device implementations mistakenly insist that | ||
904 | the first descriptor cover the header in these cases exactly, so | ||
905 | a cautious driver should arrange it so. | ||
906 | ] | ||
907 | |||
908 | Device Improvements | ||
909 | |||
910 | Any change to configuration space, or new virtqueues, or | ||
911 | behavioural changes, should be indicated by negotiation of a new | ||
912 | feature bit. This establishes clarity[footnote: | ||
913 | Even if it does mean documenting design or implementation | ||
914 | mistakes! | ||
915 | ] and avoids future expansion problems. | ||
916 | |||
917 | Clusters of functionality which are always implemented together | ||
918 | can use a single bit, but if one feature makes sense without the | ||
919 | others they should not be gratuitously grouped together to | ||
920 | conserve feature bits. We can always extend the spec when the | ||
921 | first person needs more than 24 feature bits for their device. | ||
922 | |||
923 | [LaTeX Command: printnomenclature] | ||
924 | |||
925 | Appendix A: virtio_ring.h | ||
926 | |||
927 | #ifndef VIRTIO_RING_H | ||
928 | |||
929 | #define VIRTIO_RING_H | ||
930 | |||
931 | /* An interface for efficient virtio implementation. | ||
932 | |||
933 | * | ||
934 | |||
935 | * This header is BSD licensed so anyone can use the definitions | ||
936 | |||
937 | * to implement compatible drivers/servers. | ||
938 | |||
939 | * | ||
940 | |||
941 | * Copyright 2007, 2009, IBM Corporation | ||
942 | |||
943 | * Copyright 2011, Red Hat, Inc | ||
944 | |||
945 | * All rights reserved. | ||
946 | |||
947 | * | ||
948 | |||
949 | * Redistribution and use in source and binary forms, with or | ||
950 | without | ||
951 | |||
952 | * modification, are permitted provided that the following | ||
953 | conditions | ||
954 | |||
955 | * are met: | ||
956 | |||
957 | * 1. Redistributions of source code must retain the above | ||
958 | copyright | ||
959 | |||
960 | * notice, this list of conditions and the following | ||
961 | disclaimer. | ||
962 | |||
963 | * 2. Redistributions in binary form must reproduce the above | ||
964 | copyright | ||
965 | |||
966 | * notice, this list of conditions and the following | ||
967 | disclaimer in the | ||
968 | |||
969 | * documentation and/or other materials provided with the | ||
970 | distribution. | ||
971 | |||
972 | * 3. Neither the name of IBM nor the names of its contributors | ||
973 | |||
974 | * may be used to endorse or promote products derived from | ||
975 | this software | ||
976 | |||
977 | * without specific prior written permission. | ||
978 | |||
979 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND | ||
980 | CONTRIBUTORS ``AS IS'' AND | ||
981 | |||
982 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | ||
983 | TO, THE | ||
984 | |||
985 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | ||
986 | PARTICULAR PURPOSE | ||
987 | |||
988 | * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE | ||
989 | LIABLE | ||
990 | |||
991 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
992 | CONSEQUENTIAL | ||
993 | |||
994 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
995 | SUBSTITUTE GOODS | ||
996 | |||
997 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
998 | INTERRUPTION) | ||
999 | |||
1000 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
1001 | CONTRACT, STRICT | ||
1002 | |||
1003 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING | ||
1004 | IN ANY WAY | ||
1005 | |||
1006 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
1007 | POSSIBILITY OF | ||
1008 | |||
1009 | * SUCH DAMAGE. | ||
1010 | |||
1011 | */ | ||
1012 | |||
1013 | |||
1014 | |||
1015 | /* This marks a buffer as continuing via the next field. */ | ||
1016 | |||
1017 | #define VRING_DESC_F_NEXT 1 | ||
1018 | |||
1019 | /* This marks a buffer as write-only (otherwise read-only). */ | ||
1020 | |||
1021 | #define VRING_DESC_F_WRITE 2 | ||
1022 | |||
1023 | |||
1024 | |||
1025 | /* The Host uses this in used->flags to advise the Guest: don't | ||
1026 | kick me | ||
1027 | |||
1028 | * when you add a buffer. It's unreliable, so it's simply an | ||
1029 | |||
1030 | * optimization. Guest will still kick if it's out of buffers. | ||
1031 | */ | ||
1032 | |||
1033 | #define VRING_USED_F_NO_NOTIFY 1 | ||
1034 | |||
1035 | /* The Guest uses this in avail->flags to advise the Host: don't | ||
1036 | |||
1037 | * interrupt me when you consume a buffer. It's unreliable, so | ||
1038 | it's | ||
1039 | |||
1040 | * simply an optimization. */ | ||
1041 | |||
1042 | #define VRING_AVAIL_F_NO_INTERRUPT 1 | ||
1043 | |||
1044 | |||
1045 | |||
1046 | /* Virtio ring descriptors: 16 bytes. | ||
1047 | |||
1048 | * These can chain together via "next". */ | ||
1049 | |||
1050 | struct vring_desc { | ||
1051 | |||
1052 | /* Address (guest-physical). */ | ||
1053 | |||
1054 | uint64_t addr; | ||
1055 | |||
1056 | /* Length. */ | ||
1057 | |||
1058 | uint32_t len; | ||
1059 | |||
1060 | /* The flags as indicated above. */ | ||
1061 | |||
1062 | uint16_t flags; | ||
1063 | |||
1064 | /* We chain unused descriptors via this, too */ | ||
1065 | |||
1066 | uint16_t next; | ||
1067 | |||
1068 | }; | ||
1069 | |||
1070 | |||
1071 | |||
1072 | struct vring_avail { | ||
1073 | |||
1074 | uint16_t flags; | ||
1075 | |||
1076 | uint16_t idx; | ||
1077 | |||
1078 | uint16_t ring[]; | ||
1079 | |||
1080 | uint16_t used_event; | ||
1081 | |||
1082 | }; | ||
1083 | |||
1084 | |||
1085 | |||
1086 | /* u32 is used here for ids for padding reasons. */ | ||
1087 | |||
1088 | struct vring_used_elem { | ||
1089 | |||
1090 | /* Index of start of used descriptor chain. */ | ||
1091 | |||
1092 | uint32_t id; | ||
1093 | |||
1094 | /* Total length of the descriptor chain which was written | ||
1095 | to. */ | ||
1096 | |||
1097 | uint32_t len; | ||
1098 | |||
1099 | }; | ||
1100 | |||
1101 | |||
1102 | |||
1103 | struct vring_used { | ||
1104 | |||
1105 | uint16_t flags; | ||
1106 | |||
1107 | uint16_t idx; | ||
1108 | |||
1109 | struct vring_used_elem ring[]; | ||
1110 | |||
1111 | uint16_t avail_event; | ||
1112 | |||
1113 | }; | ||
1114 | |||
1115 | |||
1116 | |||
1117 | struct vring { | ||
1118 | |||
1119 | unsigned int num; | ||
1120 | |||
1121 | |||
1122 | |||
1123 | struct vring_desc *desc; | ||
1124 | |||
1125 | struct vring_avail *avail; | ||
1126 | |||
1127 | struct vring_used *used; | ||
1128 | |||
1129 | }; | ||
1130 | |||
1131 | |||
1132 | |||
1133 | /* The standard layout for the ring is a continuous chunk of | ||
1134 | memory which | ||
1135 | |||
1136 | * looks like this. We assume num is a power of 2. | ||
1137 | |||
1138 | * | ||
1139 | |||
1140 | * struct vring { | ||
1141 | |||
1142 | * // The actual descriptors (16 bytes each) | ||
1143 | |||
1144 | * struct vring_desc desc[num]; | ||
1145 | |||
1146 | * | ||
1147 | |||
1148 | * // A ring of available descriptor heads with free-running | ||
1149 | index. | ||
1150 | |||
1151 | * __u16 avail_flags; | ||
1152 | |||
1153 | * __u16 avail_idx; | ||
1154 | |||
1155 | * __u16 available[num]; | ||
1156 | |||
1157 | * | ||
1158 | |||
1159 | * // Padding to the next align boundary. | ||
1160 | |||
1161 | * char pad[]; | ||
1162 | |||
1163 | * | ||
1164 | |||
1165 | * // A ring of used descriptor heads with free-running | ||
1166 | index. | ||
1167 | |||
1168 | * __u16 used_flags; | ||
1169 | |||
1170 | * __u16 EVENT_IDX; | ||
1171 | |||
1172 | * struct vring_used_elem used[num]; | ||
1173 | |||
1174 | * }; | ||
1175 | |||
1176 | * Note: for virtio PCI, align is 4096. | ||
1177 | |||
1178 | */ | ||
1179 | |||
1180 | static inline void vring_init(struct vring *vr, unsigned int num, | ||
1181 | void *p, | ||
1182 | |||
1183 | unsigned long align) | ||
1184 | |||
1185 | { | ||
1186 | |||
1187 | vr->num = num; | ||
1188 | |||
1189 | vr->desc = p; | ||
1190 | |||
1191 | vr->avail = p + num*sizeof(struct vring_desc); | ||
1192 | |||
1193 | vr->used = (void *)(((unsigned long)&vr->avail->ring[num] | ||
1194 | |||
1195 | + align-1) | ||
1196 | |||
1197 | & ~(align - 1)); | ||
1198 | |||
1199 | } | ||
1200 | |||
1201 | |||
1202 | |||
1203 | static inline unsigned vring_size(unsigned int num, unsigned long | ||
1204 | align) | ||
1205 | |||
1206 | { | ||
1207 | |||
1208 | return ((sizeof(struct vring_desc)*num + | ||
1209 | sizeof(uint16_t)*(2+num) | ||
1210 | |||
1211 | + align - 1) & ~(align - 1)) | ||
1212 | |||
1213 | + sizeof(uint16_t)*3 + sizeof(struct | ||
1214 | vring_used_elem)*num; | ||
1215 | |||
1216 | } | ||
1217 | |||
1218 | |||
1219 | |||
1220 | static inline int vring_need_event(uint16_t event_idx, uint16_t | ||
1221 | new_idx, uint16_t old_idx) | ||
1222 | |||
1223 | { | ||
1224 | |||
1225 | return (uint16_t)(new_idx - event_idx - 1) < | ||
1226 | (uint16_t)(new_idx - old_idx); | ||
1227 | |||
1228 | } | ||
1229 | |||
1230 | #endif /* VIRTIO_RING_H */ | ||
1231 | |||
1232 | <cha:Reserved-Feature-Bits>Appendix B: Reserved Feature Bits | ||
1233 | |||
1234 | Currently there are five device-independent feature bits defined: | ||
1235 | |||
1236 | VIRTIO_F_NOTIFY_ON_EMPTY (24) Negotiating this feature | ||
1237 | indicates that the driver wants an interrupt if the device runs | ||
1238 | out of available descriptors on a virtqueue, even though | ||
1239 | interrupts are suppressed using the VRING_AVAIL_F_NO_INTERRUPT | ||
1240 | flag or the used_event field. An example of this is the | ||
1241 | networking driver: it doesn't need to know every time a packet | ||
1242 | is transmitted, but it does need to free the transmitted | ||
1243 | packets a finite time after they are transmitted. It can avoid | ||
1244 | using a timer if the device interrupts it when all the packets | ||
1245 | are transmitted. | ||
1246 | |||
1247 | VIRTIO_F_RING_INDIRECT_DESC (28) Negotiating this feature | ||
1248 | indicates that the driver can use descriptors with the | ||
1249 | VRING_DESC_F_INDIRECT flag set, as described in [sub:Indirect-Descriptors] | ||
1250 | . | ||
1251 | |||
1252 | VIRTIO_F_RING_EVENT_IDX(29) This feature enables the used_event | ||
1253 | and the avail_event fields. If set, it indicates that the | ||
1254 | device should ignore the flags field in the available ring | ||
1255 | structure. Instead, the used_event field in this structure is | ||
1256 | used by guest to suppress device interrupts. Further, the | ||
1257 | driver should ignore the flags field in the used ring | ||
1258 | structure. Instead, the avail_event field in this structure is | ||
1259 | used by the device to suppress notifications. If unset, the | ||
1260 | driver should ignore the used_event field; the device should | ||
1261 | ignore the avail_event field; the flags field is used | ||
1262 | |||
1263 | VIRTIO_F_BAD_FEATURE(30) This feature should never be | ||
1264 | negotiated by the guest; doing so is an indication that the | ||
1265 | guest is faulty[footnote: | ||
1266 | An experimental virtio PCI driver contained in Linux version | ||
1267 | 2.6.25 had this problem, and this feature bit can be used to | ||
1268 | detect it. | ||
1269 | ] | ||
1270 | |||
1271 | VIRTIO_F_FEATURES_HIGH(31) This feature indicates that the | ||
1272 | device supports feature bits 32:63. If unset, feature bits | ||
1273 | 32:63 are unset. | ||
1274 | |||
1275 | Appendix C: Network Device | ||
1276 | |||
1277 | The virtio network device is a virtual ethernet card, and is the | ||
1278 | most complex of the devices supported so far by virtio. It has | ||
1279 | enhanced rapidly and demonstrates clearly how support for new | ||
1280 | features should be added to an existing device. Empty buffers are | ||
1281 | placed in one virtqueue for receiving packets, and outgoing | ||
1282 | packets are enqueued into another for transmission in that order. | ||
1283 | A third command queue is used to control advanced filtering | ||
1284 | features. | ||
1285 | |||
1286 | Configuration | ||
1287 | |||
1288 | Subsystem Device ID 1 | ||
1289 | |||
1290 | Virtqueues 0:receiveq. 1:transmitq. 2:controlq[footnote: | ||
1291 | Only if VIRTIO_NET_F_CTRL_VQ set | ||
1292 | ] | ||
1293 | |||
1294 | Feature bits | ||
1295 | |||
1296 | VIRTIO_NET_F_CSUM (0) Device handles packets with partial | ||
1297 | checksum | ||
1298 | |||
1299 | VIRTIO_NET_F_GUEST_CSUM (1) Guest handles packets with partial | ||
1300 | checksum | ||
1301 | |||
1302 | VIRTIO_NET_F_MAC (5) Device has given MAC address. | ||
1303 | |||
1304 | VIRTIO_NET_F_GSO (6) (Deprecated) device handles packets with | ||
1305 | any GSO type.[footnote: | ||
1306 | It was supposed to indicate segmentation offload support, but | ||
1307 | upon further investigation it became clear that multiple bits | ||
1308 | were required. | ||
1309 | ] | ||
1310 | |||
1311 | VIRTIO_NET_F_GUEST_TSO4 (7) Guest can receive TSOv4. | ||
1312 | |||
1313 | VIRTIO_NET_F_GUEST_TSO6 (8) Guest can receive TSOv6. | ||
1314 | |||
1315 | VIRTIO_NET_F_GUEST_ECN (9) Guest can receive TSO with ECN. | ||
1316 | |||
1317 | VIRTIO_NET_F_GUEST_UFO (10) Guest can receive UFO. | ||
1318 | |||
1319 | VIRTIO_NET_F_HOST_TSO4 (11) Device can receive TSOv4. | ||
1320 | |||
1321 | VIRTIO_NET_F_HOST_TSO6 (12) Device can receive TSOv6. | ||
1322 | |||
1323 | VIRTIO_NET_F_HOST_ECN (13) Device can receive TSO with ECN. | ||
1324 | |||
1325 | VIRTIO_NET_F_HOST_UFO (14) Device can receive UFO. | ||
1326 | |||
1327 | VIRTIO_NET_F_MRG_RXBUF (15) Guest can merge receive buffers. | ||
1328 | |||
1329 | VIRTIO_NET_F_STATUS (16) Configuration status field is | ||
1330 | available. | ||
1331 | |||
1332 | VIRTIO_NET_F_CTRL_VQ (17) Control channel is available. | ||
1333 | |||
1334 | VIRTIO_NET_F_CTRL_RX (18) Control channel RX mode support. | ||
1335 | |||
1336 | VIRTIO_NET_F_CTRL_VLAN (19) Control channel VLAN filtering. | ||
1337 | |||
1338 | Device configuration layout Two configuration fields are | ||
1339 | currently defined. The mac address field always exists (though | ||
1340 | is only valid if VIRTIO_NET_F_MAC is set), and the status field | ||
1341 | only exists if VIRTIO_NET_F_STATUS is set. Only one bit is | ||
1342 | currently defined for the status field: VIRTIO_NET_S_LINK_UP. #define VIRTIO_NET_S_LINK_UP 1 | ||
1343 | |||
1344 | |||
1345 | |||
1346 | struct virtio_net_config { | ||
1347 | |||
1348 | u8 mac[6]; | ||
1349 | |||
1350 | u16 status; | ||
1351 | |||
1352 | }; | ||
1353 | |||
1354 | Device Initialization | ||
1355 | |||
1356 | The initialization routine should identify the receive and | ||
1357 | transmission virtqueues. | ||
1358 | |||
1359 | If the VIRTIO_NET_F_MAC feature bit is set, the configuration | ||
1360 | space “mac” entry indicates the “physical” address of the the | ||
1361 | network card, otherwise a private MAC address should be | ||
1362 | assigned. All guests are expected to negotiate this feature if | ||
1363 | it is set. | ||
1364 | |||
1365 | If the VIRTIO_NET_F_CTRL_VQ feature bit is negotiated, identify | ||
1366 | the control virtqueue. | ||
1367 | |||
1368 | If the VIRTIO_NET_F_STATUS feature bit is negotiated, the link | ||
1369 | status can be read from the bottom bit of the “status” config | ||
1370 | field. Otherwise, the link should be assumed active. | ||
1371 | |||
1372 | The receive virtqueue should be filled with receive buffers. | ||
1373 | This is described in detail below in “Setting Up Receive | ||
1374 | Buffers”. | ||
1375 | |||
1376 | A driver can indicate that it will generate checksumless | ||
1377 | packets by negotating the VIRTIO_NET_F_CSUM feature. This “ | ||
1378 | checksum offload” is a common feature on modern network cards. | ||
1379 | |||
1380 | If that feature is negotiated, a driver can use TCP or UDP | ||
1381 | segmentation offload by negotiating the VIRTIO_NET_F_HOST_TSO4 | ||
1382 | (IPv4 TCP), VIRTIO_NET_F_HOST_TSO6 (IPv6 TCP) and | ||
1383 | VIRTIO_NET_F_HOST_UFO (UDP fragmentation) features. It should | ||
1384 | not send TCP packets requiring segmentation offload which have | ||
1385 | the Explicit Congestion Notification bit set, unless the | ||
1386 | VIRTIO_NET_F_HOST_ECN feature is negotiated.[footnote: | ||
1387 | This is a common restriction in real, older network cards. | ||
1388 | ] | ||
1389 | |||
1390 | The converse features are also available: a driver can save the | ||
1391 | virtual device some work by negotiating these features.[footnote: | ||
1392 | For example, a network packet transported between two guests on | ||
1393 | the same system may not require checksumming at all, nor | ||
1394 | segmentation, if both guests are amenable. | ||
1395 | ] The VIRTIO_NET_F_GUEST_CSUM feature indicates that partially | ||
1396 | checksummed packets can be received, and if it can do that then | ||
1397 | the VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, | ||
1398 | VIRTIO_NET_F_GUEST_UFO and VIRTIO_NET_F_GUEST_ECN are the input | ||
1399 | equivalents of the features described above. See “Receiving | ||
1400 | Packets” below. | ||
1401 | |||
1402 | Device Operation | ||
1403 | |||
1404 | Packets are transmitted by placing them in the transmitq, and | ||
1405 | buffers for incoming packets are placed in the receiveq. In each | ||
1406 | case, the packet itself is preceeded by a header: | ||
1407 | |||
1408 | struct virtio_net_hdr { | ||
1409 | |||
1410 | #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 | ||
1411 | |||
1412 | u8 flags; | ||
1413 | |||
1414 | #define VIRTIO_NET_HDR_GSO_NONE 0 | ||
1415 | |||
1416 | #define VIRTIO_NET_HDR_GSO_TCPV4 1 | ||
1417 | |||
1418 | #define VIRTIO_NET_HDR_GSO_UDP 3 | ||
1419 | |||
1420 | #define VIRTIO_NET_HDR_GSO_TCPV6 4 | ||
1421 | |||
1422 | #define VIRTIO_NET_HDR_GSO_ECN 0x80 | ||
1423 | |||
1424 | u8 gso_type; | ||
1425 | |||
1426 | u16 hdr_len; | ||
1427 | |||
1428 | u16 gso_size; | ||
1429 | |||
1430 | u16 csum_start; | ||
1431 | |||
1432 | u16 csum_offset; | ||
1433 | |||
1434 | /* Only if VIRTIO_NET_F_MRG_RXBUF: */ | ||
1435 | |||
1436 | u16 num_buffers | ||
1437 | |||
1438 | }; | ||
1439 | |||
1440 | The controlq is used to control device features such as | ||
1441 | filtering. | ||
1442 | |||
1443 | Packet Transmission | ||
1444 | |||
1445 | Transmitting a single packet is simple, but varies depending on | ||
1446 | the different features the driver negotiated. | ||
1447 | |||
1448 | If the driver negotiated VIRTIO_NET_F_CSUM, and the packet has | ||
1449 | not been fully checksummed, then the virtio_net_hdr's fields | ||
1450 | are set as follows. Otherwise, the packet must be fully | ||
1451 | checksummed, and flags is zero. | ||
1452 | |||
1453 | flags has the VIRTIO_NET_HDR_F_NEEDS_CSUM set, | ||
1454 | |||
1455 | <ite:csum_start-is-set>csum_start is set to the offset within | ||
1456 | the packet to begin checksumming, and | ||
1457 | |||
1458 | csum_offset indicates how many bytes after the csum_start the | ||
1459 | new (16 bit ones' complement) checksum should be placed.[footnote: | ||
1460 | For example, consider a partially checksummed TCP (IPv4) packet. | ||
1461 | It will have a 14 byte ethernet header and 20 byte IP header | ||
1462 | followed by the TCP header (with the TCP checksum field 16 bytes | ||
1463 | into that header). csum_start will be 14+20 = 34 (the TCP | ||
1464 | checksum includes the header), and csum_offset will be 16. The | ||
1465 | value in the TCP checksum field will be the sum of the TCP pseudo | ||
1466 | header, so that replacing it by the ones' complement checksum of | ||
1467 | the TCP header and body will give the correct result. | ||
1468 | ] | ||
1469 | |||
1470 | <enu:If-the-driver>If the driver negotiated | ||
1471 | VIRTIO_NET_F_HOST_TSO4, TSO6 or UFO, and the packet requires | ||
1472 | TCP segmentation or UDP fragmentation, then the “gso_type” | ||
1473 | field is set to VIRTIO_NET_HDR_GSO_TCPV4, TCPV6 or UDP. | ||
1474 | (Otherwise, it is set to VIRTIO_NET_HDR_GSO_NONE). In this | ||
1475 | case, packets larger than 1514 bytes can be transmitted: the | ||
1476 | metadata indicates how to replicate the packet header to cut it | ||
1477 | into smaller packets. The other gso fields are set: | ||
1478 | |||
1479 | hdr_len is a hint to the device as to how much of the header | ||
1480 | needs to be kept to copy into each packet, usually set to the | ||
1481 | length of the headers, including the transport header.[footnote: | ||
1482 | Due to various bugs in implementations, this field is not useful | ||
1483 | as a guarantee of the transport header size. | ||
1484 | ] | ||
1485 | |||
1486 | gso_size is the size of the packet beyond that header (ie. | ||
1487 | MSS). | ||
1488 | |||
1489 | If the driver negotiated the VIRTIO_NET_F_HOST_ECN feature, the | ||
1490 | VIRTIO_NET_HDR_GSO_ECN bit may be set in “gso_type” as well, | ||
1491 | indicating that the TCP packet has the ECN bit set.[footnote: | ||
1492 | This case is not handled by some older hardware, so is called out | ||
1493 | specifically in the protocol. | ||
1494 | ] | ||
1495 | |||
1496 | If the driver negotiated the VIRTIO_NET_F_MRG_RXBUF feature, | ||
1497 | the num_buffers field is set to zero. | ||
1498 | |||
1499 | The header and packet are added as one output buffer to the | ||
1500 | transmitq, and the device is notified of the new entry (see [sub:Notifying-The-Device] | ||
1501 | ).[footnote: | ||
1502 | Note that the header will be two bytes longer for the | ||
1503 | VIRTIO_NET_F_MRG_RXBUF case. | ||
1504 | ] | ||
1505 | |||
1506 | Packet Transmission Interrupt | ||
1507 | |||
1508 | Often a driver will suppress transmission interrupts using the | ||
1509 | VRING_AVAIL_F_NO_INTERRUPT flag (see [sub:Receiving-Used-Buffers] | ||
1510 | ) and check for used packets in the transmit path of following | ||
1511 | packets. However, it will still receive interrupts if the | ||
1512 | VIRTIO_F_NOTIFY_ON_EMPTY feature is negotiated, indicating that | ||
1513 | the transmission queue is completely emptied. | ||
1514 | |||
1515 | The normal behavior in this interrupt handler is to retrieve and | ||
1516 | new descriptors from the used ring and free the corresponding | ||
1517 | headers and packets. | ||
1518 | |||
1519 | Setting Up Receive Buffers | ||
1520 | |||
1521 | It is generally a good idea to keep the receive virtqueue as | ||
1522 | fully populated as possible: if it runs out, network performance | ||
1523 | will suffer. | ||
1524 | |||
1525 | If the VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6 or | ||
1526 | VIRTIO_NET_F_GUEST_UFO features are used, the Guest will need to | ||
1527 | accept packets of up to 65550 bytes long (the maximum size of a | ||
1528 | TCP or UDP packet, plus the 14 byte ethernet header), otherwise | ||
1529 | 1514 bytes. So unless VIRTIO_NET_F_MRG_RXBUF is negotiated, every | ||
1530 | buffer in the receive queue needs to be at least this length [footnote: | ||
1531 | Obviously each one can be split across multiple descriptor | ||
1532 | elements. | ||
1533 | ]. | ||
1534 | |||
1535 | If VIRTIO_NET_F_MRG_RXBUF is negotiated, each buffer must be at | ||
1536 | least the size of the struct virtio_net_hdr. | ||
1537 | |||
1538 | Packet Receive Interrupt | ||
1539 | |||
1540 | When a packet is copied into a buffer in the receiveq, the | ||
1541 | optimal path is to disable further interrupts for the receiveq | ||
1542 | (see [sub:Receiving-Used-Buffers]) and process packets until no | ||
1543 | more are found, then re-enable them. | ||
1544 | |||
1545 | Processing packet involves: | ||
1546 | |||
1547 | If the driver negotiated the VIRTIO_NET_F_MRG_RXBUF feature, | ||
1548 | then the “num_buffers” field indicates how many descriptors | ||
1549 | this packet is spread over (including this one). This allows | ||
1550 | receipt of large packets without having to allocate large | ||
1551 | buffers. In this case, there will be at least “num_buffers” in | ||
1552 | the used ring, and they should be chained together to form a | ||
1553 | single packet. The other buffers will not begin with a struct | ||
1554 | virtio_net_hdr. | ||
1555 | |||
1556 | If the VIRTIO_NET_F_MRG_RXBUF feature was not negotiated, or | ||
1557 | the “num_buffers” field is one, then the entire packet will be | ||
1558 | contained within this buffer, immediately following the struct | ||
1559 | virtio_net_hdr. | ||
1560 | |||
1561 | If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the | ||
1562 | VIRTIO_NET_HDR_F_NEEDS_CSUM bit in the “flags” field may be | ||
1563 | set: if so, the checksum on the packet is incomplete and the “ | ||
1564 | csum_start” and “csum_offset” fields indicate how to calculate | ||
1565 | it (see [ite:csum_start-is-set]). | ||
1566 | |||
1567 | If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were | ||
1568 | negotiated, then the “gso_type” may be something other than | ||
1569 | VIRTIO_NET_HDR_GSO_NONE, and the “gso_size” field indicates the | ||
1570 | desired MSS (see [enu:If-the-driver]).Control Virtqueue | ||
1571 | |||
1572 | The driver uses the control virtqueue (if VIRTIO_NET_F_VTRL_VQ is | ||
1573 | negotiated) to send commands to manipulate various features of | ||
1574 | the device which would not easily map into the configuration | ||
1575 | space. | ||
1576 | |||
1577 | All commands are of the following form: | ||
1578 | |||
1579 | struct virtio_net_ctrl { | ||
1580 | |||
1581 | u8 class; | ||
1582 | |||
1583 | u8 command; | ||
1584 | |||
1585 | u8 command-specific-data[]; | ||
1586 | |||
1587 | u8 ack; | ||
1588 | |||
1589 | }; | ||
1590 | |||
1591 | |||
1592 | |||
1593 | /* ack values */ | ||
1594 | |||
1595 | #define VIRTIO_NET_OK 0 | ||
1596 | |||
1597 | #define VIRTIO_NET_ERR 1 | ||
1598 | |||
1599 | The class, command and command-specific-data are set by the | ||
1600 | driver, and the device sets the ack byte. There is little it can | ||
1601 | do except issue a diagnostic if the ack byte is not | ||
1602 | VIRTIO_NET_OK. | ||
1603 | |||
1604 | Packet Receive Filtering | ||
1605 | |||
1606 | If the VIRTIO_NET_F_CTRL_RX feature is negotiated, the driver can | ||
1607 | send control commands for promiscuous mode, multicast receiving, | ||
1608 | and filtering of MAC addresses. | ||
1609 | |||
1610 | Note that in general, these commands are best-effort: unwanted | ||
1611 | packets may still arrive. | ||
1612 | |||
1613 | Setting Promiscuous Mode | ||
1614 | |||
1615 | #define VIRTIO_NET_CTRL_RX 0 | ||
1616 | |||
1617 | #define VIRTIO_NET_CTRL_RX_PROMISC 0 | ||
1618 | |||
1619 | #define VIRTIO_NET_CTRL_RX_ALLMULTI 1 | ||
1620 | |||
1621 | The class VIRTIO_NET_CTRL_RX has two commands: | ||
1622 | VIRTIO_NET_CTRL_RX_PROMISC turns promiscuous mode on and off, and | ||
1623 | VIRTIO_NET_CTRL_RX_ALLMULTI turns all-multicast receive on and | ||
1624 | off. The command-specific-data is one byte containing 0 (off) or | ||
1625 | 1 (on). | ||
1626 | |||
1627 | Setting MAC Address Filtering | ||
1628 | |||
1629 | struct virtio_net_ctrl_mac { | ||
1630 | |||
1631 | u32 entries; | ||
1632 | |||
1633 | u8 macs[entries][ETH_ALEN]; | ||
1634 | |||
1635 | }; | ||
1636 | |||
1637 | |||
1638 | |||
1639 | #define VIRTIO_NET_CTRL_MAC 1 | ||
1640 | |||
1641 | #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 | ||
1642 | |||
1643 | The device can filter incoming packets by any number of | ||
1644 | destination MAC addresses.[footnote: | ||
1645 | Since there are no guarentees, it can use a hash filter | ||
1646 | orsilently switch to allmulti or promiscuous mode if it is given | ||
1647 | too many addresses. | ||
1648 | ] This table is set using the class VIRTIO_NET_CTRL_MAC and the | ||
1649 | command VIRTIO_NET_CTRL_MAC_TABLE_SET. The command-specific-data | ||
1650 | is two variable length tables of 6-byte MAC addresses. The first | ||
1651 | table contains unicast addresses, and the second contains | ||
1652 | multicast addresses. | ||
1653 | |||
1654 | VLAN Filtering | ||
1655 | |||
1656 | If the driver negotiates the VIRTION_NET_F_CTRL_VLAN feature, it | ||
1657 | can control a VLAN filter table in the device. | ||
1658 | |||
1659 | #define VIRTIO_NET_CTRL_VLAN 2 | ||
1660 | |||
1661 | #define VIRTIO_NET_CTRL_VLAN_ADD 0 | ||
1662 | |||
1663 | #define VIRTIO_NET_CTRL_VLAN_DEL 1 | ||
1664 | |||
1665 | Both the VIRTIO_NET_CTRL_VLAN_ADD and VIRTIO_NET_CTRL_VLAN_DEL | ||
1666 | command take a 16-bit VLAN id as the command-specific-data. | ||
1667 | |||
1668 | Appendix D: Block Device | ||
1669 | |||
1670 | The virtio block device is a simple virtual block device (ie. | ||
1671 | disk). Read and write requests (and other exotic requests) are | ||
1672 | placed in the queue, and serviced (probably out of order) by the | ||
1673 | device except where noted. | ||
1674 | |||
1675 | Configuration | ||
1676 | |||
1677 | Subsystem Device ID 2 | ||
1678 | |||
1679 | Virtqueues 0:requestq. | ||
1680 | |||
1681 | Feature bits | ||
1682 | |||
1683 | VIRTIO_BLK_F_BARRIER (0) Host supports request barriers. | ||
1684 | |||
1685 | VIRTIO_BLK_F_SIZE_MAX (1) Maximum size of any single segment is | ||
1686 | in “size_max”. | ||
1687 | |||
1688 | VIRTIO_BLK_F_SEG_MAX (2) Maximum number of segments in a | ||
1689 | request is in “seg_max”. | ||
1690 | |||
1691 | VIRTIO_BLK_F_GEOMETRY (4) Disk-style geometry specified in “ | ||
1692 | geometry”. | ||
1693 | |||
1694 | VIRTIO_BLK_F_RO (5) Device is read-only. | ||
1695 | |||
1696 | VIRTIO_BLK_F_BLK_SIZE (6) Block size of disk is in “blk_size”. | ||
1697 | |||
1698 | VIRTIO_BLK_F_SCSI (7) Device supports scsi packet commands. | ||
1699 | |||
1700 | VIRTIO_BLK_F_FLUSH (9) Cache flush command support. | ||
1701 | |||
1702 | |||
1703 | |||
1704 | Device configuration layout The capacity of the device | ||
1705 | (expressed in 512-byte sectors) is always present. The | ||
1706 | availability of the others all depend on various feature bits | ||
1707 | as indicated above. struct virtio_blk_config { | ||
1708 | |||
1709 | u64 capacity; | ||
1710 | |||
1711 | u32 size_max; | ||
1712 | |||
1713 | u32 seg_max; | ||
1714 | |||
1715 | struct virtio_blk_geometry { | ||
1716 | |||
1717 | u16 cylinders; | ||
1718 | |||
1719 | u8 heads; | ||
1720 | |||
1721 | u8 sectors; | ||
1722 | |||
1723 | } geometry; | ||
1724 | |||
1725 | u32 blk_size; | ||
1726 | |||
1727 | |||
1728 | |||
1729 | }; | ||
1730 | |||
1731 | Device Initialization | ||
1732 | |||
1733 | The device size should be read from the “capacity” | ||
1734 | configuration field. No requests should be submitted which goes | ||
1735 | beyond this limit. | ||
1736 | |||
1737 | If the VIRTIO_BLK_F_BLK_SIZE feature is negotiated, the | ||
1738 | blk_size field can be read to determine the optimal sector size | ||
1739 | for the driver to use. This does not effect the units used in | ||
1740 | the protocol (always 512 bytes), but awareness of the correct | ||
1741 | value can effect performance. | ||
1742 | |||
1743 | If the VIRTIO_BLK_F_RO feature is set by the device, any write | ||
1744 | requests will fail. | ||
1745 | |||
1746 | |||
1747 | |||
1748 | Device Operation | ||
1749 | |||
1750 | The driver queues requests to the virtqueue, and they are used by | ||
1751 | the device (not necessarily in order). Each request is of form: | ||
1752 | |||
1753 | struct virtio_blk_req { | ||
1754 | |||
1755 | |||
1756 | |||
1757 | u32 type; | ||
1758 | |||
1759 | u32 ioprio; | ||
1760 | |||
1761 | u64 sector; | ||
1762 | |||
1763 | char data[][512]; | ||
1764 | |||
1765 | u8 status; | ||
1766 | |||
1767 | }; | ||
1768 | |||
1769 | If the device has VIRTIO_BLK_F_SCSI feature, it can also support | ||
1770 | scsi packet command requests, each of these requests is of form:struct virtio_scsi_pc_req { | ||
1771 | |||
1772 | u32 type; | ||
1773 | |||
1774 | u32 ioprio; | ||
1775 | |||
1776 | u64 sector; | ||
1777 | |||
1778 | char cmd[]; | ||
1779 | |||
1780 | char data[][512]; | ||
1781 | |||
1782 | #define SCSI_SENSE_BUFFERSIZE 96 | ||
1783 | |||
1784 | u8 sense[SCSI_SENSE_BUFFERSIZE]; | ||
1785 | |||
1786 | u32 errors; | ||
1787 | |||
1788 | u32 data_len; | ||
1789 | |||
1790 | u32 sense_len; | ||
1791 | |||
1792 | u32 residual; | ||
1793 | |||
1794 | u8 status; | ||
1795 | |||
1796 | }; | ||
1797 | |||
1798 | The type of the request is either a read (VIRTIO_BLK_T_IN), a | ||
1799 | write (VIRTIO_BLK_T_OUT), a scsi packet command | ||
1800 | (VIRTIO_BLK_T_SCSI_CMD or VIRTIO_BLK_T_SCSI_CMD_OUT[footnote: | ||
1801 | the SCSI_CMD and SCSI_CMD_OUT types are equivalent, the device | ||
1802 | does not distinguish between them | ||
1803 | ]) or a flush (VIRTIO_BLK_T_FLUSH or VIRTIO_BLK_T_FLUSH_OUT[footnote: | ||
1804 | the FLUSH and FLUSH_OUT types are equivalent, the device does not | ||
1805 | distinguish between them | ||
1806 | ]). If the device has VIRTIO_BLK_F_BARRIER feature the high bit | ||
1807 | (VIRTIO_BLK_T_BARRIER) indicates that this request acts as a | ||
1808 | barrier and that all preceeding requests must be complete before | ||
1809 | this one, and all following requests must not be started until | ||
1810 | this is complete. Note that a barrier does not flush caches in | ||
1811 | the underlying backend device in host, and thus does not serve as | ||
1812 | data consistency guarantee. Driver must use FLUSH request to | ||
1813 | flush the host cache. | ||
1814 | |||
1815 | #define VIRTIO_BLK_T_IN 0 | ||
1816 | |||
1817 | #define VIRTIO_BLK_T_OUT 1 | ||
1818 | |||
1819 | #define VIRTIO_BLK_T_SCSI_CMD 2 | ||
1820 | |||
1821 | #define VIRTIO_BLK_T_SCSI_CMD_OUT 3 | ||
1822 | |||
1823 | #define VIRTIO_BLK_T_FLUSH 4 | ||
1824 | |||
1825 | #define VIRTIO_BLK_T_FLUSH_OUT 5 | ||
1826 | |||
1827 | #define VIRTIO_BLK_T_BARRIER 0x80000000 | ||
1828 | |||
1829 | The ioprio field is a hint about the relative priorities of | ||
1830 | requests to the device: higher numbers indicate more important | ||
1831 | requests. | ||
1832 | |||
1833 | The sector number indicates the offset (multiplied by 512) where | ||
1834 | the read or write is to occur. This field is unused and set to 0 | ||
1835 | for scsi packet commands and for flush commands. | ||
1836 | |||
1837 | The cmd field is only present for scsi packet command requests, | ||
1838 | and indicates the command to perform. This field must reside in a | ||
1839 | single, separate read-only buffer; command length can be derived | ||
1840 | from the length of this buffer. | ||
1841 | |||
1842 | Note that these first three (four for scsi packet commands) | ||
1843 | fields are always read-only: the data field is either read-only | ||
1844 | or write-only, depending on the request. The size of the read or | ||
1845 | write can be derived from the total size of the request buffers. | ||
1846 | |||
1847 | The sense field is only present for scsi packet command requests, | ||
1848 | and indicates the buffer for scsi sense data. | ||
1849 | |||
1850 | The data_len field is only present for scsi packet command | ||
1851 | requests, this field is deprecated, and should be ignored by the | ||
1852 | driver. Historically, devices copied data length there. | ||
1853 | |||
1854 | The sense_len field is only present for scsi packet command | ||
1855 | requests and indicates the number of bytes actually written to | ||
1856 | the sense buffer. | ||
1857 | |||
1858 | The residual field is only present for scsi packet command | ||
1859 | requests and indicates the residual size, calculated as data | ||
1860 | length - number of bytes actually transferred. | ||
1861 | |||
1862 | The final status byte is written by the device: either | ||
1863 | VIRTIO_BLK_S_OK for success, VIRTIO_BLK_S_IOERR for host or guest | ||
1864 | error or VIRTIO_BLK_S_UNSUPP for a request unsupported by host:#define VIRTIO_BLK_S_OK 0 | ||
1865 | |||
1866 | #define VIRTIO_BLK_S_IOERR 1 | ||
1867 | |||
1868 | #define VIRTIO_BLK_S_UNSUPP 2 | ||
1869 | |||
1870 | Historically, devices assumed that the fields type, ioprio and | ||
1871 | sector reside in a single, separate read-only buffer; the fields | ||
1872 | errors, data_len, sense_len and residual reside in a single, | ||
1873 | separate write-only buffer; the sense field in a separate | ||
1874 | write-only buffer of size 96 bytes, by itself; the fields errors, | ||
1875 | data_len, sense_len and residual in a single write-only buffer; | ||
1876 | and the status field is a separate read-only buffer of size 1 | ||
1877 | byte, by itself. | ||
1878 | |||
1879 | Appendix E: Console Device | ||
1880 | |||
1881 | The virtio console device is a simple device for data input and | ||
1882 | output. A device may have one or more ports. Each port has a pair | ||
1883 | of input and output virtqueues. Moreover, a device has a pair of | ||
1884 | control IO virtqueues. The control virtqueues are used to | ||
1885 | communicate information between the device and the driver about | ||
1886 | ports being opened and closed on either side of the connection, | ||
1887 | indication from the host about whether a particular port is a | ||
1888 | console port, adding new ports, port hot-plug/unplug, etc., and | ||
1889 | indication from the guest about whether a port or a device was | ||
1890 | successfully added, port open/close, etc.. For data IO, one or | ||
1891 | more empty buffers are placed in the receive queue for incoming | ||
1892 | data and outgoing characters are placed in the transmit queue. | ||
1893 | |||
1894 | Configuration | ||
1895 | |||
1896 | Subsystem Device ID 3 | ||
1897 | |||
1898 | Virtqueues 0:receiveq(port0). 1:transmitq(port0), 2:control | ||
1899 | receiveq[footnote: | ||
1900 | Ports 2 onwards only if VIRTIO_CONSOLE_F_MULTIPORT is set | ||
1901 | ], 3:control transmitq, 4:receiveq(port1), 5:transmitq(port1), | ||
1902 | ... | ||
1903 | |||
1904 | Feature bits | ||
1905 | |||
1906 | VIRTIO_CONSOLE_F_SIZE (0) Configuration cols and rows fields | ||
1907 | are valid. | ||
1908 | |||
1909 | VIRTIO_CONSOLE_F_MULTIPORT(1) Device has support for multiple | ||
1910 | ports; configuration fields nr_ports and max_nr_ports are | ||
1911 | valid and control virtqueues will be used. | ||
1912 | |||
1913 | Device configuration layout The size of the console is supplied | ||
1914 | in the configuration space if the VIRTIO_CONSOLE_F_SIZE feature | ||
1915 | is set. Furthermore, if the VIRTIO_CONSOLE_F_MULTIPORT feature | ||
1916 | is set, the maximum number of ports supported by the device can | ||
1917 | be fetched.struct virtio_console_config { | ||
1918 | |||
1919 | u16 cols; | ||
1920 | |||
1921 | u16 rows; | ||
1922 | |||
1923 | |||
1924 | |||
1925 | u32 max_nr_ports; | ||
1926 | |||
1927 | }; | ||
1928 | |||
1929 | Device Initialization | ||
1930 | |||
1931 | If the VIRTIO_CONSOLE_F_SIZE feature is negotiated, the driver | ||
1932 | can read the console dimensions from the configuration fields. | ||
1933 | |||
1934 | If the VIRTIO_CONSOLE_F_MULTIPORT feature is negotiated, the | ||
1935 | driver can spawn multiple ports, not all of which may be | ||
1936 | attached to a console. Some could be generic ports. In this | ||
1937 | case, the control virtqueues are enabled and according to the | ||
1938 | max_nr_ports configuration-space value, the appropriate number | ||
1939 | of virtqueues are created. A control message indicating the | ||
1940 | driver is ready is sent to the host. The host can then send | ||
1941 | control messages for adding new ports to the device. After | ||
1942 | creating and initializing each port, a | ||
1943 | VIRTIO_CONSOLE_PORT_READY control message is sent to the host | ||
1944 | for that port so the host can let us know of any additional | ||
1945 | configuration options set for that port. | ||
1946 | |||
1947 | The receiveq for each port is populated with one or more | ||
1948 | receive buffers. | ||
1949 | |||
1950 | Device Operation | ||
1951 | |||
1952 | For output, a buffer containing the characters is placed in the | ||
1953 | port's transmitq.[footnote: | ||
1954 | Because this is high importance and low bandwidth, the current | ||
1955 | Linux implementation polls for the buffer to be used, rather than | ||
1956 | waiting for an interrupt, simplifying the implementation | ||
1957 | significantly. However, for generic serial ports with the | ||
1958 | O_NONBLOCK flag set, the polling limitation is relaxed and the | ||
1959 | consumed buffers are freed upon the next write or poll call or | ||
1960 | when a port is closed or hot-unplugged. | ||
1961 | ] | ||
1962 | |||
1963 | When a buffer is used in the receiveq (signalled by an | ||
1964 | interrupt), the contents is the input to the port associated | ||
1965 | with the virtqueue for which the notification was received. | ||
1966 | |||
1967 | If the driver negotiated the VIRTIO_CONSOLE_F_SIZE feature, a | ||
1968 | configuration change interrupt may occur. The updated size can | ||
1969 | be read from the configuration fields. | ||
1970 | |||
1971 | If the driver negotiated the VIRTIO_CONSOLE_F_MULTIPORT | ||
1972 | feature, active ports are announced by the host using the | ||
1973 | VIRTIO_CONSOLE_PORT_ADD control message. The same message is | ||
1974 | used for port hot-plug as well. | ||
1975 | |||
1976 | If the host specified a port `name', a sysfs attribute is | ||
1977 | created with the name filled in, so that udev rules can be | ||
1978 | written that can create a symlink from the port's name to the | ||
1979 | char device for port discovery by applications in the guest. | ||
1980 | |||
1981 | Changes to ports' state are effected by control messages. | ||
1982 | Appropriate action is taken on the port indicated in the | ||
1983 | control message. The layout of the structure of the control | ||
1984 | buffer and the events associated are:struct virtio_console_control { | ||
1985 | |||
1986 | uint32_t id; /* Port number */ | ||
1987 | |||
1988 | uint16_t event; /* The kind of control event */ | ||
1989 | |||
1990 | uint16_t value; /* Extra information for the event */ | ||
1991 | |||
1992 | }; | ||
1993 | |||
1994 | |||
1995 | |||
1996 | /* Some events for the internal messages (control packets) */ | ||
1997 | |||
1998 | |||
1999 | |||
2000 | #define VIRTIO_CONSOLE_DEVICE_READY 0 | ||
2001 | |||
2002 | #define VIRTIO_CONSOLE_PORT_ADD 1 | ||
2003 | |||
2004 | #define VIRTIO_CONSOLE_PORT_REMOVE 2 | ||
2005 | |||
2006 | #define VIRTIO_CONSOLE_PORT_READY 3 | ||
2007 | |||
2008 | #define VIRTIO_CONSOLE_CONSOLE_PORT 4 | ||
2009 | |||
2010 | #define VIRTIO_CONSOLE_RESIZE 5 | ||
2011 | |||
2012 | #define VIRTIO_CONSOLE_PORT_OPEN 6 | ||
2013 | |||
2014 | #define VIRTIO_CONSOLE_PORT_NAME 7 | ||
2015 | |||
2016 | Appendix F: Entropy Device | ||
2017 | |||
2018 | The virtio entropy device supplies high-quality randomness for | ||
2019 | guest use. | ||
2020 | |||
2021 | Configuration | ||
2022 | |||
2023 | Subsystem Device ID 4 | ||
2024 | |||
2025 | Virtqueues 0:requestq. | ||
2026 | |||
2027 | Feature bits None currently defined | ||
2028 | |||
2029 | Device configuration layout None currently defined. | ||
2030 | |||
2031 | Device Initialization | ||
2032 | |||
2033 | The virtqueue is initialized | ||
2034 | |||
2035 | Device Operation | ||
2036 | |||
2037 | When the driver requires random bytes, it places the descriptor | ||
2038 | of one or more buffers in the queue. It will be completely filled | ||
2039 | by random data by the device. | ||
2040 | |||
2041 | Appendix G: Memory Balloon Device | ||
2042 | |||
2043 | The virtio memory balloon device is a primitive device for | ||
2044 | managing guest memory: the device asks for a certain amount of | ||
2045 | memory, and the guest supplies it (or withdraws it, if the device | ||
2046 | has more than it asks for). This allows the guest to adapt to | ||
2047 | changes in allowance of underlying physical memory. If the | ||
2048 | feature is negotiated, the device can also be used to communicate | ||
2049 | guest memory statistics to the host. | ||
2050 | |||
2051 | Configuration | ||
2052 | |||
2053 | Subsystem Device ID 5 | ||
2054 | |||
2055 | Virtqueues 0:inflateq. 1:deflateq. 2:statsq.[footnote: | ||
2056 | Only if VIRTIO_BALLON_F_STATS_VQ set | ||
2057 | ] | ||
2058 | |||
2059 | Feature bits | ||
2060 | |||
2061 | VIRTIO_BALLOON_F_MUST_TELL_HOST (0) Host must be told before | ||
2062 | pages from the balloon are used. | ||
2063 | |||
2064 | VIRTIO_BALLOON_F_STATS_VQ (1) A virtqueue for reporting guest | ||
2065 | memory statistics is present. | ||
2066 | |||
2067 | Device configuration layout Both fields of this configuration | ||
2068 | are always available. Note that they are little endian, despite | ||
2069 | convention that device fields are guest endian:struct virtio_balloon_config { | ||
2070 | |||
2071 | u32 num_pages; | ||
2072 | |||
2073 | u32 actual; | ||
2074 | |||
2075 | }; | ||
2076 | |||
2077 | Device Initialization | ||
2078 | |||
2079 | The inflate and deflate virtqueues are identified. | ||
2080 | |||
2081 | If the VIRTIO_BALLOON_F_STATS_VQ feature bit is negotiated: | ||
2082 | |||
2083 | Identify the stats virtqueue. | ||
2084 | |||
2085 | Add one empty buffer to the stats virtqueue and notify the | ||
2086 | host. | ||
2087 | |||
2088 | Device operation begins immediately. | ||
2089 | |||
2090 | Device Operation | ||
2091 | |||
2092 | Memory Ballooning The device is driven by the receipt of a | ||
2093 | configuration change interrupt. | ||
2094 | |||
2095 | The “num_pages” configuration field is examined. If this is | ||
2096 | greater than the “actual” number of pages, memory must be given | ||
2097 | to the balloon. If it is less than the “actual” number of | ||
2098 | pages, memory may be taken back from the balloon for general | ||
2099 | use. | ||
2100 | |||
2101 | To supply memory to the balloon (aka. inflate): | ||
2102 | |||
2103 | The driver constructs an array of addresses of unused memory | ||
2104 | pages. These addresses are divided by 4096[footnote: | ||
2105 | This is historical, and independent of the guest page size | ||
2106 | ] and the descriptor describing the resulting 32-bit array is | ||
2107 | added to the inflateq. | ||
2108 | |||
2109 | To remove memory from the balloon (aka. deflate): | ||
2110 | |||
2111 | The driver constructs an array of addresses of memory pages it | ||
2112 | has previously given to the balloon, as described above. This | ||
2113 | descriptor is added to the deflateq. | ||
2114 | |||
2115 | If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is set, the | ||
2116 | guest may not use these requested pages until that descriptor | ||
2117 | in the deflateq has been used by the device. | ||
2118 | |||
2119 | Otherwise, the guest may begin to re-use pages previously given | ||
2120 | to the balloon before the device has acknowledged their | ||
2121 | withdrawl. [footnote: | ||
2122 | In this case, deflation advice is merely a courtesy | ||
2123 | ] | ||
2124 | |||
2125 | In either case, once the device has completed the inflation or | ||
2126 | deflation, the “actual” field of the configuration should be | ||
2127 | updated to reflect the new number of pages in the balloon.[footnote: | ||
2128 | As updates to configuration space are not atomic, this field | ||
2129 | isn't particularly reliable, but can be used to diagnose buggy | ||
2130 | guests. | ||
2131 | ] | ||
2132 | |||
2133 | Memory Statistics | ||
2134 | |||
2135 | The stats virtqueue is atypical because communication is driven | ||
2136 | by the device (not the driver). The channel becomes active at | ||
2137 | driver initialization time when the driver adds an empty buffer | ||
2138 | and notifies the device. A request for memory statistics proceeds | ||
2139 | as follows: | ||
2140 | |||
2141 | The device pushes the buffer onto the used ring and sends an | ||
2142 | interrupt. | ||
2143 | |||
2144 | The driver pops the used buffer and discards it. | ||
2145 | |||
2146 | The driver collects memory statistics and writes them into a | ||
2147 | new buffer. | ||
2148 | |||
2149 | The driver adds the buffer to the virtqueue and notifies the | ||
2150 | device. | ||
2151 | |||
2152 | The device pops the buffer (retaining it to initiate a | ||
2153 | subsequent request) and consumes the statistics. | ||
2154 | |||
2155 | Memory Statistics Format Each statistic consists of a 16 bit | ||
2156 | tag and a 64 bit value. Both quantities are represented in the | ||
2157 | native endian of the guest. All statistics are optional and the | ||
2158 | driver may choose which ones to supply. To guarantee backwards | ||
2159 | compatibility, unsupported statistics should be omitted. | ||
2160 | |||
2161 | struct virtio_balloon_stat { | ||
2162 | |||
2163 | #define VIRTIO_BALLOON_S_SWAP_IN 0 | ||
2164 | |||
2165 | #define VIRTIO_BALLOON_S_SWAP_OUT 1 | ||
2166 | |||
2167 | #define VIRTIO_BALLOON_S_MAJFLT 2 | ||
2168 | |||
2169 | #define VIRTIO_BALLOON_S_MINFLT 3 | ||
2170 | |||
2171 | #define VIRTIO_BALLOON_S_MEMFREE 4 | ||
2172 | |||
2173 | #define VIRTIO_BALLOON_S_MEMTOT 5 | ||
2174 | |||
2175 | u16 tag; | ||
2176 | |||
2177 | u64 val; | ||
2178 | |||
2179 | } __attribute__((packed)); | ||
2180 | |||
2181 | Tags | ||
2182 | |||
2183 | VIRTIO_BALLOON_S_SWAP_IN The amount of memory that has been | ||
2184 | swapped in (in bytes). | ||
2185 | |||
2186 | VIRTIO_BALLOON_S_SWAP_OUT The amount of memory that has been | ||
2187 | swapped out to disk (in bytes). | ||
2188 | |||
2189 | VIRTIO_BALLOON_S_MAJFLT The number of major page faults that | ||
2190 | have occurred. | ||
2191 | |||
2192 | VIRTIO_BALLOON_S_MINFLT The number of minor page faults that | ||
2193 | have occurred. | ||
2194 | |||
2195 | VIRTIO_BALLOON_S_MEMFREE The amount of memory not being used | ||
2196 | for any purpose (in bytes). | ||
2197 | |||
2198 | VIRTIO_BALLOON_S_MEMTOT The total amount of memory available | ||
2199 | (in bytes). | ||
2200 | |||
diff --git a/MAINTAINERS b/MAINTAINERS index 1d445f57298..069ee3b5c65 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -4971,7 +4971,7 @@ M: Paul Mackerras <paulus@samba.org> | |||
4971 | M: Ingo Molnar <mingo@elte.hu> | 4971 | M: Ingo Molnar <mingo@elte.hu> |
4972 | M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | 4972 | M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> |
4973 | S: Supported | 4973 | S: Supported |
4974 | F: kernel/perf_event*.c | 4974 | F: kernel/events/* |
4975 | F: include/linux/perf_event.h | 4975 | F: include/linux/perf_event.h |
4976 | F: arch/*/kernel/perf_event*.c | 4976 | F: arch/*/kernel/perf_event*.c |
4977 | F: arch/*/kernel/*/perf_event*.c | 4977 | F: arch/*/kernel/*/perf_event*.c |
@@ -1,8 +1,8 @@ | |||
1 | VERSION = 3 | 1 | VERSION = 3 |
2 | PATCHLEVEL = 1 | 2 | PATCHLEVEL = 1 |
3 | SUBLEVEL = 0 | 3 | SUBLEVEL = 0 |
4 | EXTRAVERSION = -rc2 | 4 | EXTRAVERSION = -rc3 |
5 | NAME = Wet Seal | 5 | NAME = "Divemaster Edition" |
6 | 6 | ||
7 | # *DOCUMENTATION* | 7 | # *DOCUMENTATION* |
8 | # To see a list of typical targets execute "make help" | 8 | # To see a list of typical targets execute "make help" |
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 12485471495..3ff7785b3be 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
@@ -162,7 +162,6 @@ config IA64_GENERIC | |||
162 | select ACPI_NUMA | 162 | select ACPI_NUMA |
163 | select SWIOTLB | 163 | select SWIOTLB |
164 | select PCI_MSI | 164 | select PCI_MSI |
165 | select DMAR | ||
166 | help | 165 | help |
167 | This selects the system type of your hardware. A "generic" kernel | 166 | This selects the system type of your hardware. A "generic" kernel |
168 | will run on any supported IA-64 system. However, if you configure | 167 | will run on any supported IA-64 system. However, if you configure |
diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig index 1d7bca0a396..0e5cd1405e0 100644 --- a/arch/ia64/configs/generic_defconfig +++ b/arch/ia64/configs/generic_defconfig | |||
@@ -234,3 +234,4 @@ CONFIG_CRYPTO_MD5=y | |||
234 | # CONFIG_CRYPTO_ANSI_CPRNG is not set | 234 | # CONFIG_CRYPTO_ANSI_CPRNG is not set |
235 | CONFIG_CRC_T10DIF=y | 235 | CONFIG_CRC_T10DIF=y |
236 | CONFIG_MISC_DEVICES=y | 236 | CONFIG_MISC_DEVICES=y |
237 | CONFIG_DMAR=y | ||
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 42c67beadca..1a6f20d4e7e 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig | |||
@@ -55,6 +55,7 @@ config SPARC64 | |||
55 | select PERF_USE_VMALLOC | 55 | select PERF_USE_VMALLOC |
56 | select IRQ_PREFLOW_FASTEOI | 56 | select IRQ_PREFLOW_FASTEOI |
57 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | 57 | select ARCH_HAVE_NMI_SAFE_CMPXCHG |
58 | select HAVE_C_RECORDMCOUNT | ||
58 | 59 | ||
59 | config ARCH_DEFCONFIG | 60 | config ARCH_DEFCONFIG |
60 | string | 61 | string |
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index 5f5b8bf3f50..bcc98fc3528 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h | |||
@@ -131,6 +131,15 @@ static inline void arch_write_lock(arch_rwlock_t *rw) | |||
131 | *(volatile __u32 *)&lp->lock = ~0U; | 131 | *(volatile __u32 *)&lp->lock = ~0U; |
132 | } | 132 | } |
133 | 133 | ||
134 | static void inline arch_write_unlock(arch_rwlock_t *lock) | ||
135 | { | ||
136 | __asm__ __volatile__( | ||
137 | " st %%g0, [%0]" | ||
138 | : /* no outputs */ | ||
139 | : "r" (lock) | ||
140 | : "memory"); | ||
141 | } | ||
142 | |||
134 | static inline int arch_write_trylock(arch_rwlock_t *rw) | 143 | static inline int arch_write_trylock(arch_rwlock_t *rw) |
135 | { | 144 | { |
136 | unsigned int val; | 145 | unsigned int val; |
@@ -175,8 +184,6 @@ static inline int __arch_read_trylock(arch_rwlock_t *rw) | |||
175 | res; \ | 184 | res; \ |
176 | }) | 185 | }) |
177 | 186 | ||
178 | #define arch_write_unlock(rw) do { (rw)->lock = 0; } while(0) | ||
179 | |||
180 | #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) | 187 | #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) |
181 | #define arch_read_lock_flags(rw, flags) arch_read_lock(rw) | 188 | #define arch_read_lock_flags(rw, flags) arch_read_lock(rw) |
182 | #define arch_write_lock_flags(rw, flags) arch_write_lock(rw) | 189 | #define arch_write_lock_flags(rw, flags) arch_write_lock(rw) |
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 073936a8b27..96891769497 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h | |||
@@ -210,14 +210,8 @@ static int inline arch_write_trylock(arch_rwlock_t *lock) | |||
210 | return result; | 210 | return result; |
211 | } | 211 | } |
212 | 212 | ||
213 | #define arch_read_lock(p) arch_read_lock(p) | ||
214 | #define arch_read_lock_flags(p, f) arch_read_lock(p) | 213 | #define arch_read_lock_flags(p, f) arch_read_lock(p) |
215 | #define arch_read_trylock(p) arch_read_trylock(p) | ||
216 | #define arch_read_unlock(p) arch_read_unlock(p) | ||
217 | #define arch_write_lock(p) arch_write_lock(p) | ||
218 | #define arch_write_lock_flags(p, f) arch_write_lock(p) | 214 | #define arch_write_lock_flags(p, f) arch_write_lock(p) |
219 | #define arch_write_unlock(p) arch_write_unlock(p) | ||
220 | #define arch_write_trylock(p) arch_write_trylock(p) | ||
221 | 215 | ||
222 | #define arch_read_can_lock(rw) (!((rw)->lock & 0x80000000UL)) | 216 | #define arch_read_can_lock(rw) (!((rw)->lock & 0x80000000UL)) |
223 | #define arch_write_can_lock(rw) (!(rw)->lock) | 217 | #define arch_write_can_lock(rw) (!(rw)->lock) |
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index a19f0419547..1aaf8c180be 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c | |||
@@ -352,8 +352,8 @@ int __init pcic_probe(void) | |||
352 | strcpy(pbm->prom_name, namebuf); | 352 | strcpy(pbm->prom_name, namebuf); |
353 | 353 | ||
354 | { | 354 | { |
355 | extern volatile int t_nmi[1]; | 355 | extern volatile int t_nmi[4]; |
356 | extern int pcic_nmi_trap_patch[1]; | 356 | extern int pcic_nmi_trap_patch[4]; |
357 | 357 | ||
358 | t_nmi[0] = pcic_nmi_trap_patch[0]; | 358 | t_nmi[0] = pcic_nmi_trap_patch[0]; |
359 | t_nmi[1] = pcic_nmi_trap_patch[1]; | 359 | t_nmi[1] = pcic_nmi_trap_patch[1]; |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 64a619d47d3..7ff4669580c 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -39,7 +39,7 @@ typedef struct xpaddr { | |||
39 | ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) | 39 | ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) |
40 | 40 | ||
41 | extern unsigned long *machine_to_phys_mapping; | 41 | extern unsigned long *machine_to_phys_mapping; |
42 | extern unsigned int machine_to_phys_order; | 42 | extern unsigned long machine_to_phys_nr; |
43 | 43 | ||
44 | extern unsigned long get_phys_to_machine(unsigned long pfn); | 44 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
45 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 45 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
@@ -87,7 +87,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
87 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 87 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
88 | return mfn; | 88 | return mfn; |
89 | 89 | ||
90 | if (unlikely((mfn >> machine_to_phys_order) != 0)) { | 90 | if (unlikely(mfn >= machine_to_phys_nr)) { |
91 | pfn = ~0; | 91 | pfn = ~0; |
92 | goto try_override; | 92 | goto try_override; |
93 | } | 93 | } |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 988724b236b..ff5790d8e99 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -22,6 +22,8 @@ config KVM | |||
22 | depends on HAVE_KVM | 22 | depends on HAVE_KVM |
23 | # for device assignment: | 23 | # for device assignment: |
24 | depends on PCI | 24 | depends on PCI |
25 | # for TASKSTATS/TASK_DELAY_ACCT: | ||
26 | depends on NET | ||
25 | select PREEMPT_NOTIFIERS | 27 | select PREEMPT_NOTIFIERS |
26 | select MMU_NOTIFIER | 28 | select MMU_NOTIFIER |
27 | select ANON_INODES | 29 | select ANON_INODES |
@@ -31,6 +33,7 @@ config KVM | |||
31 | select KVM_ASYNC_PF | 33 | select KVM_ASYNC_PF |
32 | select USER_RETURN_NOTIFIER | 34 | select USER_RETURN_NOTIFIER |
33 | select KVM_MMIO | 35 | select KVM_MMIO |
36 | select TASKSTATS | ||
34 | select TASK_DELAY_ACCT | 37 | select TASK_DELAY_ACCT |
35 | ---help--- | 38 | ---help--- |
36 | Support hosting fully virtualized guest machines using hardware | 39 | Support hosting fully virtualized guest machines using hardware |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 247aae3dc00..0d17c8c50ac 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <asm/traps.h> /* dotraplinkage, ... */ | 17 | #include <asm/traps.h> /* dotraplinkage, ... */ |
18 | #include <asm/pgalloc.h> /* pgd_*(), ... */ | 18 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
19 | #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ | 19 | #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ |
20 | #include <asm/vsyscall.h> | ||
20 | 21 | ||
21 | /* | 22 | /* |
22 | * Page fault error code bits: | 23 | * Page fault error code bits: |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index ae3cb23cd89..c95330267f0 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -360,6 +360,15 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) | |||
360 | } | 360 | } |
361 | } | 361 | } |
362 | 362 | ||
363 | /* After the PCI-E bus has been walked and all devices discovered, | ||
364 | * configure any settings of the fabric that might be necessary. | ||
365 | */ | ||
366 | if (bus) { | ||
367 | struct pci_bus *child; | ||
368 | list_for_each_entry(child, &bus->children, node) | ||
369 | pcie_bus_configure_settings(child, child->self->pcie_mpss); | ||
370 | } | ||
371 | |||
363 | if (!bus) | 372 | if (!bus) |
364 | kfree(sd); | 373 | kfree(sd); |
365 | 374 | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3326204e251..add2c2d729c 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -15,7 +15,7 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | |||
15 | grant-table.o suspend.o platform-pci-unplug.o \ | 15 | grant-table.o suspend.o platform-pci-unplug.o \ |
16 | p2m.o | 16 | p2m.o |
17 | 17 | ||
18 | obj-$(CONFIG_FTRACE) += trace.o | 18 | obj-$(CONFIG_EVENT_TRACING) += trace.o |
19 | 19 | ||
20 | obj-$(CONFIG_SMP) += smp.o | 20 | obj-$(CONFIG_SMP) += smp.o |
21 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o | 21 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e2345af01af..2d69617950f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(xen_domain_type); | |||
77 | 77 | ||
78 | unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; | 78 | unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; |
79 | EXPORT_SYMBOL(machine_to_phys_mapping); | 79 | EXPORT_SYMBOL(machine_to_phys_mapping); |
80 | unsigned int machine_to_phys_order; | 80 | unsigned long machine_to_phys_nr; |
81 | EXPORT_SYMBOL(machine_to_phys_order); | 81 | EXPORT_SYMBOL(machine_to_phys_nr); |
82 | 82 | ||
83 | struct start_info *xen_start_info; | 83 | struct start_info *xen_start_info; |
84 | EXPORT_SYMBOL_GPL(xen_start_info); | 84 | EXPORT_SYMBOL_GPL(xen_start_info); |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 8cce339db5e..20a61427506 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1713,15 +1713,19 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1713 | void __init xen_setup_machphys_mapping(void) | 1713 | void __init xen_setup_machphys_mapping(void) |
1714 | { | 1714 | { |
1715 | struct xen_machphys_mapping mapping; | 1715 | struct xen_machphys_mapping mapping; |
1716 | unsigned long machine_to_phys_nr_ents; | ||
1717 | 1716 | ||
1718 | if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { | 1717 | if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { |
1719 | machine_to_phys_mapping = (unsigned long *)mapping.v_start; | 1718 | machine_to_phys_mapping = (unsigned long *)mapping.v_start; |
1720 | machine_to_phys_nr_ents = mapping.max_mfn + 1; | 1719 | machine_to_phys_nr = mapping.max_mfn + 1; |
1721 | } else { | 1720 | } else { |
1722 | machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; | 1721 | machine_to_phys_nr = MACH2PHYS_NR_ENTRIES; |
1723 | } | 1722 | } |
1724 | machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); | 1723 | #ifdef CONFIG_X86_32 |
1724 | if ((machine_to_phys_mapping + machine_to_phys_nr) | ||
1725 | < machine_to_phys_mapping) | ||
1726 | machine_to_phys_nr = (unsigned long *)NULL | ||
1727 | - machine_to_phys_mapping; | ||
1728 | #endif | ||
1725 | } | 1729 | } |
1726 | 1730 | ||
1727 | #ifdef CONFIG_X86_64 | 1731 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index b4533a86d7e..e79dbb95482 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -521,8 +521,6 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) | |||
521 | native_smp_prepare_cpus(max_cpus); | 521 | native_smp_prepare_cpus(max_cpus); |
522 | WARN_ON(xen_smp_intr_init(0)); | 522 | WARN_ON(xen_smp_intr_init(0)); |
523 | 523 | ||
524 | if (!xen_have_vector_callback) | ||
525 | return; | ||
526 | xen_init_lock_cpu(0); | 524 | xen_init_lock_cpu(0); |
527 | xen_init_spinlocks(); | 525 | xen_init_spinlocks(); |
528 | } | 526 | } |
@@ -546,6 +544,8 @@ static void xen_hvm_cpu_die(unsigned int cpu) | |||
546 | 544 | ||
547 | void __init xen_hvm_smp_init(void) | 545 | void __init xen_hvm_smp_init(void) |
548 | { | 546 | { |
547 | if (!xen_have_vector_callback) | ||
548 | return; | ||
549 | smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; | 549 | smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; |
550 | smp_ops.smp_send_reschedule = xen_smp_send_reschedule; | 550 | smp_ops.smp_send_reschedule = xen_smp_send_reschedule; |
551 | smp_ops.cpu_up = xen_hvm_cpu_up; | 551 | smp_ops.cpu_up = xen_hvm_cpu_up; |
diff --git a/block/Kconfig b/block/Kconfig index 60be1e0455d..e97934eecec 100644 --- a/block/Kconfig +++ b/block/Kconfig | |||
@@ -65,6 +65,16 @@ config BLK_DEV_BSG | |||
65 | 65 | ||
66 | If unsure, say Y. | 66 | If unsure, say Y. |
67 | 67 | ||
68 | config BLK_DEV_BSGLIB | ||
69 | bool "Block layer SG support v4 helper lib" | ||
70 | default n | ||
71 | select BLK_DEV_BSG | ||
72 | help | ||
73 | Subsystems will normally enable this if needed. Users will not | ||
74 | normally need to manually enable this. | ||
75 | |||
76 | If unsure, say N. | ||
77 | |||
68 | config BLK_DEV_INTEGRITY | 78 | config BLK_DEV_INTEGRITY |
69 | bool "Block layer data integrity support" | 79 | bool "Block layer data integrity support" |
70 | ---help--- | 80 | ---help--- |
diff --git a/block/Makefile b/block/Makefile index 0fec4b3fab5..514c6e4f427 100644 --- a/block/Makefile +++ b/block/Makefile | |||
@@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | |||
8 | blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o | 8 | blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o |
9 | 9 | ||
10 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o | 10 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o |
11 | obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o | ||
11 | obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o | 12 | obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o |
12 | obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o | 13 | obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o |
13 | obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o | 14 | obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o |
diff --git a/block/blk-core.c b/block/blk-core.c index b627558c461..90e1ffdeb41 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -1702,6 +1702,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits); | |||
1702 | int blk_insert_cloned_request(struct request_queue *q, struct request *rq) | 1702 | int blk_insert_cloned_request(struct request_queue *q, struct request *rq) |
1703 | { | 1703 | { |
1704 | unsigned long flags; | 1704 | unsigned long flags; |
1705 | int where = ELEVATOR_INSERT_BACK; | ||
1705 | 1706 | ||
1706 | if (blk_rq_check_limits(q, rq)) | 1707 | if (blk_rq_check_limits(q, rq)) |
1707 | return -EIO; | 1708 | return -EIO; |
@@ -1718,7 +1719,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) | |||
1718 | */ | 1719 | */ |
1719 | BUG_ON(blk_queued_rq(rq)); | 1720 | BUG_ON(blk_queued_rq(rq)); |
1720 | 1721 | ||
1721 | add_acct_request(q, rq, ELEVATOR_INSERT_BACK); | 1722 | if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA)) |
1723 | where = ELEVATOR_INSERT_FLUSH; | ||
1724 | |||
1725 | add_acct_request(q, rq, where); | ||
1722 | spin_unlock_irqrestore(q->queue_lock, flags); | 1726 | spin_unlock_irqrestore(q->queue_lock, flags); |
1723 | 1727 | ||
1724 | return 0; | 1728 | return 0; |
@@ -2275,7 +2279,7 @@ static bool blk_end_bidi_request(struct request *rq, int error, | |||
2275 | * %false - we are done with this request | 2279 | * %false - we are done with this request |
2276 | * %true - still buffers pending for this request | 2280 | * %true - still buffers pending for this request |
2277 | **/ | 2281 | **/ |
2278 | static bool __blk_end_bidi_request(struct request *rq, int error, | 2282 | bool __blk_end_bidi_request(struct request *rq, int error, |
2279 | unsigned int nr_bytes, unsigned int bidi_bytes) | 2283 | unsigned int nr_bytes, unsigned int bidi_bytes) |
2280 | { | 2284 | { |
2281 | if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) | 2285 | if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) |
diff --git a/block/blk-flush.c b/block/blk-flush.c index bb21e4c36f7..491eb30a242 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c | |||
@@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) | |||
95 | { | 95 | { |
96 | unsigned int policy = 0; | 96 | unsigned int policy = 0; |
97 | 97 | ||
98 | if (blk_rq_sectors(rq)) | ||
99 | policy |= REQ_FSEQ_DATA; | ||
100 | |||
98 | if (fflags & REQ_FLUSH) { | 101 | if (fflags & REQ_FLUSH) { |
99 | if (rq->cmd_flags & REQ_FLUSH) | 102 | if (rq->cmd_flags & REQ_FLUSH) |
100 | policy |= REQ_FSEQ_PREFLUSH; | 103 | policy |= REQ_FSEQ_PREFLUSH; |
101 | if (blk_rq_sectors(rq)) | ||
102 | policy |= REQ_FSEQ_DATA; | ||
103 | if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) | 104 | if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) |
104 | policy |= REQ_FSEQ_POSTFLUSH; | 105 | policy |= REQ_FSEQ_POSTFLUSH; |
105 | } | 106 | } |
@@ -122,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq) | |||
122 | 123 | ||
123 | /* make @rq a normal request */ | 124 | /* make @rq a normal request */ |
124 | rq->cmd_flags &= ~REQ_FLUSH_SEQ; | 125 | rq->cmd_flags &= ~REQ_FLUSH_SEQ; |
125 | rq->end_io = NULL; | 126 | rq->end_io = rq->flush.saved_end_io; |
126 | } | 127 | } |
127 | 128 | ||
128 | /** | 129 | /** |
@@ -300,9 +301,6 @@ void blk_insert_flush(struct request *rq) | |||
300 | unsigned int fflags = q->flush_flags; /* may change, cache */ | 301 | unsigned int fflags = q->flush_flags; /* may change, cache */ |
301 | unsigned int policy = blk_flush_policy(fflags, rq); | 302 | unsigned int policy = blk_flush_policy(fflags, rq); |
302 | 303 | ||
303 | BUG_ON(rq->end_io); | ||
304 | BUG_ON(!rq->bio || rq->bio != rq->biotail); | ||
305 | |||
306 | /* | 304 | /* |
307 | * @policy now records what operations need to be done. Adjust | 305 | * @policy now records what operations need to be done. Adjust |
308 | * REQ_FLUSH and FUA for the driver. | 306 | * REQ_FLUSH and FUA for the driver. |
@@ -312,6 +310,19 @@ void blk_insert_flush(struct request *rq) | |||
312 | rq->cmd_flags &= ~REQ_FUA; | 310 | rq->cmd_flags &= ~REQ_FUA; |
313 | 311 | ||
314 | /* | 312 | /* |
313 | * An empty flush handed down from a stacking driver may | ||
314 | * translate into nothing if the underlying device does not | ||
315 | * advertise a write-back cache. In this case, simply | ||
316 | * complete the request. | ||
317 | */ | ||
318 | if (!policy) { | ||
319 | __blk_end_bidi_request(rq, 0, 0, 0); | ||
320 | return; | ||
321 | } | ||
322 | |||
323 | BUG_ON(!rq->bio || rq->bio != rq->biotail); | ||
324 | |||
325 | /* | ||
315 | * If there's data but flush is not necessary, the request can be | 326 | * If there's data but flush is not necessary, the request can be |
316 | * processed directly without going through flush machinery. Queue | 327 | * processed directly without going through flush machinery. Queue |
317 | * for normal execution. | 328 | * for normal execution. |
@@ -319,6 +330,7 @@ void blk_insert_flush(struct request *rq) | |||
319 | if ((policy & REQ_FSEQ_DATA) && | 330 | if ((policy & REQ_FSEQ_DATA) && |
320 | !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { | 331 | !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { |
321 | list_add_tail(&rq->queuelist, &q->queue_head); | 332 | list_add_tail(&rq->queuelist, &q->queue_head); |
333 | blk_run_queue_async(q); | ||
322 | return; | 334 | return; |
323 | } | 335 | } |
324 | 336 | ||
@@ -329,6 +341,7 @@ void blk_insert_flush(struct request *rq) | |||
329 | memset(&rq->flush, 0, sizeof(rq->flush)); | 341 | memset(&rq->flush, 0, sizeof(rq->flush)); |
330 | INIT_LIST_HEAD(&rq->flush.list); | 342 | INIT_LIST_HEAD(&rq->flush.list); |
331 | rq->cmd_flags |= REQ_FLUSH_SEQ; | 343 | rq->cmd_flags |= REQ_FLUSH_SEQ; |
344 | rq->flush.saved_end_io = rq->end_io; /* Usually NULL */ | ||
332 | rq->end_io = flush_data_end_io; | 345 | rq->end_io = flush_data_end_io; |
333 | 346 | ||
334 | blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); | 347 | blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); |
diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 475fab809a8..58340d0cb23 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c | |||
@@ -124,6 +124,14 @@ void __blk_complete_request(struct request *req) | |||
124 | } else | 124 | } else |
125 | ccpu = cpu; | 125 | ccpu = cpu; |
126 | 126 | ||
127 | /* | ||
128 | * If current CPU and requested CPU are in the same group, running | ||
129 | * softirq in current CPU. One might concern this is just like | ||
130 | * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is | ||
131 | * running in interrupt handler, and currently I/O controller doesn't | ||
132 | * support multiple interrupts, so current CPU is unique actually. This | ||
133 | * avoids IPI sending from current CPU to the first CPU of a group. | ||
134 | */ | ||
127 | if (ccpu == cpu || ccpu == group_cpu) { | 135 | if (ccpu == cpu || ccpu == group_cpu) { |
128 | struct list_head *list; | 136 | struct list_head *list; |
129 | do_local: | 137 | do_local: |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index f6a79412050..a19f58c6fc3 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, | |||
746 | static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) | 746 | static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) |
747 | { | 747 | { |
748 | bool rw = bio_data_dir(bio); | 748 | bool rw = bio_data_dir(bio); |
749 | bool sync = bio->bi_rw & REQ_SYNC; | 749 | bool sync = rw_is_sync(bio->bi_rw); |
750 | 750 | ||
751 | /* Charge the bio to the group */ | 751 | /* Charge the bio to the group */ |
752 | tg->bytes_disp[rw] += bio->bi_size; | 752 | tg->bytes_disp[rw] += bio->bi_size; |
@@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) | |||
1150 | 1150 | ||
1151 | if (tg_no_rule_group(tg, rw)) { | 1151 | if (tg_no_rule_group(tg, rw)) { |
1152 | blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, | 1152 | blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, |
1153 | rw, bio->bi_rw & REQ_SYNC); | 1153 | rw, rw_is_sync(bio->bi_rw)); |
1154 | rcu_read_unlock(); | 1154 | rcu_read_unlock(); |
1155 | return 0; | 1155 | return 0; |
1156 | } | 1156 | } |
diff --git a/block/blk.h b/block/blk.h index d6586287adc..20b900a377c 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, | |||
17 | struct bio *bio); | 17 | struct bio *bio); |
18 | void blk_dequeue_request(struct request *rq); | 18 | void blk_dequeue_request(struct request *rq); |
19 | void __blk_queue_free_tags(struct request_queue *q); | 19 | void __blk_queue_free_tags(struct request_queue *q); |
20 | bool __blk_end_bidi_request(struct request *rq, int error, | ||
21 | unsigned int nr_bytes, unsigned int bidi_bytes); | ||
20 | 22 | ||
21 | void blk_rq_timed_out_timer(unsigned long data); | 23 | void blk_rq_timed_out_timer(unsigned long data); |
22 | void blk_delete_timer(struct request *); | 24 | void blk_delete_timer(struct request *); |
diff --git a/block/bsg-lib.c b/block/bsg-lib.c new file mode 100644 index 00000000000..6690e6e4103 --- /dev/null +++ b/block/bsg-lib.c | |||
@@ -0,0 +1,298 @@ | |||
1 | /* | ||
2 | * BSG helper library | ||
3 | * | ||
4 | * Copyright (C) 2008 James Smart, Emulex Corporation | ||
5 | * Copyright (C) 2011 Red Hat, Inc. All rights reserved. | ||
6 | * Copyright (C) 2011 Mike Christie | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | * | ||
22 | */ | ||
23 | #include <linux/slab.h> | ||
24 | #include <linux/blkdev.h> | ||
25 | #include <linux/delay.h> | ||
26 | #include <linux/scatterlist.h> | ||
27 | #include <linux/bsg-lib.h> | ||
28 | #include <linux/module.h> | ||
29 | #include <scsi/scsi_cmnd.h> | ||
30 | |||
31 | /** | ||
32 | * bsg_destroy_job - routine to teardown/delete a bsg job | ||
33 | * @job: bsg_job that is to be torn down | ||
34 | */ | ||
35 | static void bsg_destroy_job(struct bsg_job *job) | ||
36 | { | ||
37 | put_device(job->dev); /* release reference for the request */ | ||
38 | |||
39 | kfree(job->request_payload.sg_list); | ||
40 | kfree(job->reply_payload.sg_list); | ||
41 | kfree(job); | ||
42 | } | ||
43 | |||
44 | /** | ||
45 | * bsg_job_done - completion routine for bsg requests | ||
46 | * @job: bsg_job that is complete | ||
47 | * @result: job reply result | ||
48 | * @reply_payload_rcv_len: length of payload recvd | ||
49 | * | ||
50 | * The LLD should call this when the bsg job has completed. | ||
51 | */ | ||
52 | void bsg_job_done(struct bsg_job *job, int result, | ||
53 | unsigned int reply_payload_rcv_len) | ||
54 | { | ||
55 | struct request *req = job->req; | ||
56 | struct request *rsp = req->next_rq; | ||
57 | int err; | ||
58 | |||
59 | err = job->req->errors = result; | ||
60 | if (err < 0) | ||
61 | /* we're only returning the result field in the reply */ | ||
62 | job->req->sense_len = sizeof(u32); | ||
63 | else | ||
64 | job->req->sense_len = job->reply_len; | ||
65 | /* we assume all request payload was transferred, residual == 0 */ | ||
66 | req->resid_len = 0; | ||
67 | |||
68 | if (rsp) { | ||
69 | WARN_ON(reply_payload_rcv_len > rsp->resid_len); | ||
70 | |||
71 | /* set reply (bidi) residual */ | ||
72 | rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len); | ||
73 | } | ||
74 | blk_complete_request(req); | ||
75 | } | ||
76 | EXPORT_SYMBOL_GPL(bsg_job_done); | ||
77 | |||
78 | /** | ||
79 | * bsg_softirq_done - softirq done routine for destroying the bsg requests | ||
80 | * @rq: BSG request that holds the job to be destroyed | ||
81 | */ | ||
82 | static void bsg_softirq_done(struct request *rq) | ||
83 | { | ||
84 | struct bsg_job *job = rq->special; | ||
85 | |||
86 | blk_end_request_all(rq, rq->errors); | ||
87 | bsg_destroy_job(job); | ||
88 | } | ||
89 | |||
90 | static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) | ||
91 | { | ||
92 | size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments); | ||
93 | |||
94 | BUG_ON(!req->nr_phys_segments); | ||
95 | |||
96 | buf->sg_list = kzalloc(sz, GFP_KERNEL); | ||
97 | if (!buf->sg_list) | ||
98 | return -ENOMEM; | ||
99 | sg_init_table(buf->sg_list, req->nr_phys_segments); | ||
100 | buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list); | ||
101 | buf->payload_len = blk_rq_bytes(req); | ||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | /** | ||
106 | * bsg_create_job - create the bsg_job structure for the bsg request | ||
107 | * @dev: device that is being sent the bsg request | ||
108 | * @req: BSG request that needs a job structure | ||
109 | */ | ||
110 | static int bsg_create_job(struct device *dev, struct request *req) | ||
111 | { | ||
112 | struct request *rsp = req->next_rq; | ||
113 | struct request_queue *q = req->q; | ||
114 | struct bsg_job *job; | ||
115 | int ret; | ||
116 | |||
117 | BUG_ON(req->special); | ||
118 | |||
119 | job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL); | ||
120 | if (!job) | ||
121 | return -ENOMEM; | ||
122 | |||
123 | req->special = job; | ||
124 | job->req = req; | ||
125 | if (q->bsg_job_size) | ||
126 | job->dd_data = (void *)&job[1]; | ||
127 | job->request = req->cmd; | ||
128 | job->request_len = req->cmd_len; | ||
129 | job->reply = req->sense; | ||
130 | job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer | ||
131 | * allocated */ | ||
132 | if (req->bio) { | ||
133 | ret = bsg_map_buffer(&job->request_payload, req); | ||
134 | if (ret) | ||
135 | goto failjob_rls_job; | ||
136 | } | ||
137 | if (rsp && rsp->bio) { | ||
138 | ret = bsg_map_buffer(&job->reply_payload, rsp); | ||
139 | if (ret) | ||
140 | goto failjob_rls_rqst_payload; | ||
141 | } | ||
142 | job->dev = dev; | ||
143 | /* take a reference for the request */ | ||
144 | get_device(job->dev); | ||
145 | return 0; | ||
146 | |||
147 | failjob_rls_rqst_payload: | ||
148 | kfree(job->request_payload.sg_list); | ||
149 | failjob_rls_job: | ||
150 | kfree(job); | ||
151 | return -ENOMEM; | ||
152 | } | ||
153 | |||
154 | /* | ||
155 | * bsg_goose_queue - restart queue in case it was stopped | ||
156 | * @q: request q to be restarted | ||
157 | */ | ||
158 | void bsg_goose_queue(struct request_queue *q) | ||
159 | { | ||
160 | if (!q) | ||
161 | return; | ||
162 | |||
163 | blk_run_queue_async(q); | ||
164 | } | ||
165 | EXPORT_SYMBOL_GPL(bsg_goose_queue); | ||
166 | |||
167 | /** | ||
168 | * bsg_request_fn - generic handler for bsg requests | ||
169 | * @q: request queue to manage | ||
170 | * | ||
171 | * On error the create_bsg_job function should return a -Exyz error value | ||
172 | * that will be set to the req->errors. | ||
173 | * | ||
174 | * Drivers/subsys should pass this to the queue init function. | ||
175 | */ | ||
176 | void bsg_request_fn(struct request_queue *q) | ||
177 | { | ||
178 | struct device *dev = q->queuedata; | ||
179 | struct request *req; | ||
180 | struct bsg_job *job; | ||
181 | int ret; | ||
182 | |||
183 | if (!get_device(dev)) | ||
184 | return; | ||
185 | |||
186 | while (1) { | ||
187 | req = blk_fetch_request(q); | ||
188 | if (!req) | ||
189 | break; | ||
190 | spin_unlock_irq(q->queue_lock); | ||
191 | |||
192 | ret = bsg_create_job(dev, req); | ||
193 | if (ret) { | ||
194 | req->errors = ret; | ||
195 | blk_end_request_all(req, ret); | ||
196 | spin_lock_irq(q->queue_lock); | ||
197 | continue; | ||
198 | } | ||
199 | |||
200 | job = req->special; | ||
201 | ret = q->bsg_job_fn(job); | ||
202 | spin_lock_irq(q->queue_lock); | ||
203 | if (ret) | ||
204 | break; | ||
205 | } | ||
206 | |||
207 | spin_unlock_irq(q->queue_lock); | ||
208 | put_device(dev); | ||
209 | spin_lock_irq(q->queue_lock); | ||
210 | } | ||
211 | EXPORT_SYMBOL_GPL(bsg_request_fn); | ||
212 | |||
213 | /** | ||
214 | * bsg_setup_queue - Create and add the bsg hooks so we can receive requests | ||
215 | * @dev: device to attach bsg device to | ||
216 | * @q: request queue setup by caller | ||
217 | * @name: device to give bsg device | ||
218 | * @job_fn: bsg job handler | ||
219 | * @dd_job_size: size of LLD data needed for each job | ||
220 | * | ||
221 | * The caller should have setup the reuqest queue with bsg_request_fn | ||
222 | * as the request_fn. | ||
223 | */ | ||
224 | int bsg_setup_queue(struct device *dev, struct request_queue *q, | ||
225 | char *name, bsg_job_fn *job_fn, int dd_job_size) | ||
226 | { | ||
227 | int ret; | ||
228 | |||
229 | q->queuedata = dev; | ||
230 | q->bsg_job_size = dd_job_size; | ||
231 | q->bsg_job_fn = job_fn; | ||
232 | queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); | ||
233 | blk_queue_softirq_done(q, bsg_softirq_done); | ||
234 | blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); | ||
235 | |||
236 | ret = bsg_register_queue(q, dev, name, NULL); | ||
237 | if (ret) { | ||
238 | printk(KERN_ERR "%s: bsg interface failed to " | ||
239 | "initialize - register queue\n", dev->kobj.name); | ||
240 | return ret; | ||
241 | } | ||
242 | |||
243 | return 0; | ||
244 | } | ||
245 | EXPORT_SYMBOL_GPL(bsg_setup_queue); | ||
246 | |||
247 | /** | ||
248 | * bsg_remove_queue - Deletes the bsg dev from the q | ||
249 | * @q: the request_queue that is to be torn down. | ||
250 | * | ||
251 | * Notes: | ||
252 | * Before unregistering the queue empty any requests that are blocked | ||
253 | */ | ||
254 | void bsg_remove_queue(struct request_queue *q) | ||
255 | { | ||
256 | struct request *req; /* block request */ | ||
257 | int counts; /* totals for request_list count and starved */ | ||
258 | |||
259 | if (!q) | ||
260 | return; | ||
261 | |||
262 | /* Stop taking in new requests */ | ||
263 | spin_lock_irq(q->queue_lock); | ||
264 | blk_stop_queue(q); | ||
265 | |||
266 | /* drain all requests in the queue */ | ||
267 | while (1) { | ||
268 | /* need the lock to fetch a request | ||
269 | * this may fetch the same reqeust as the previous pass | ||
270 | */ | ||
271 | req = blk_fetch_request(q); | ||
272 | /* save requests in use and starved */ | ||
273 | counts = q->rq.count[0] + q->rq.count[1] + | ||
274 | q->rq.starved[0] + q->rq.starved[1]; | ||
275 | spin_unlock_irq(q->queue_lock); | ||
276 | /* any requests still outstanding? */ | ||
277 | if (counts == 0) | ||
278 | break; | ||
279 | |||
280 | /* This may be the same req as the previous iteration, | ||
281 | * always send the blk_end_request_all after a prefetch. | ||
282 | * It is not okay to not end the request because the | ||
283 | * prefetch started the request. | ||
284 | */ | ||
285 | if (req) { | ||
286 | /* return -ENXIO to indicate that this queue is | ||
287 | * going away | ||
288 | */ | ||
289 | req->errors = -ENXIO; | ||
290 | blk_end_request_all(req, -ENXIO); | ||
291 | } | ||
292 | |||
293 | msleep(200); /* allow bsg to possibly finish */ | ||
294 | spin_lock_irq(q->queue_lock); | ||
295 | } | ||
296 | bsg_unregister_queue(q); | ||
297 | } | ||
298 | EXPORT_SYMBOL_GPL(bsg_remove_queue); | ||
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 1f96ad6254f..a33bd4377c6 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -130,6 +130,8 @@ struct cfq_queue { | |||
130 | unsigned long slice_end; | 130 | unsigned long slice_end; |
131 | long slice_resid; | 131 | long slice_resid; |
132 | 132 | ||
133 | /* pending metadata requests */ | ||
134 | int meta_pending; | ||
133 | /* number of requests that are on the dispatch list or inside driver */ | 135 | /* number of requests that are on the dispatch list or inside driver */ |
134 | int dispatched; | 136 | int dispatched; |
135 | 137 | ||
@@ -682,6 +684,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, | |||
682 | if (rq_is_sync(rq1) != rq_is_sync(rq2)) | 684 | if (rq_is_sync(rq1) != rq_is_sync(rq2)) |
683 | return rq_is_sync(rq1) ? rq1 : rq2; | 685 | return rq_is_sync(rq1) ? rq1 : rq2; |
684 | 686 | ||
687 | if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META) | ||
688 | return rq1->cmd_flags & REQ_META ? rq1 : rq2; | ||
689 | |||
685 | s1 = blk_rq_pos(rq1); | 690 | s1 = blk_rq_pos(rq1); |
686 | s2 = blk_rq_pos(rq2); | 691 | s2 = blk_rq_pos(rq2); |
687 | 692 | ||
@@ -1209,6 +1214,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
1209 | 1214 | ||
1210 | hlist_del_init(&cfqg->cfqd_node); | 1215 | hlist_del_init(&cfqg->cfqd_node); |
1211 | 1216 | ||
1217 | BUG_ON(cfqd->nr_blkcg_linked_grps <= 0); | ||
1218 | cfqd->nr_blkcg_linked_grps--; | ||
1219 | |||
1212 | /* | 1220 | /* |
1213 | * Put the reference taken at the time of creation so that when all | 1221 | * Put the reference taken at the time of creation so that when all |
1214 | * queues are gone, group can be destroyed. | 1222 | * queues are gone, group can be destroyed. |
@@ -1604,6 +1612,10 @@ static void cfq_remove_request(struct request *rq) | |||
1604 | cfqq->cfqd->rq_queued--; | 1612 | cfqq->cfqd->rq_queued--; |
1605 | cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, | 1613 | cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, |
1606 | rq_data_dir(rq), rq_is_sync(rq)); | 1614 | rq_data_dir(rq), rq_is_sync(rq)); |
1615 | if (rq->cmd_flags & REQ_META) { | ||
1616 | WARN_ON(!cfqq->meta_pending); | ||
1617 | cfqq->meta_pending--; | ||
1618 | } | ||
1607 | } | 1619 | } |
1608 | 1620 | ||
1609 | static int cfq_merge(struct request_queue *q, struct request **req, | 1621 | static int cfq_merge(struct request_queue *q, struct request **req, |
@@ -3357,6 +3369,13 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, | |||
3357 | return true; | 3369 | return true; |
3358 | 3370 | ||
3359 | /* | 3371 | /* |
3372 | * So both queues are sync. Let the new request get disk time if | ||
3373 | * it's a metadata request and the current queue is doing regular IO. | ||
3374 | */ | ||
3375 | if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending) | ||
3376 | return true; | ||
3377 | |||
3378 | /* | ||
3360 | * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. | 3379 | * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. |
3361 | */ | 3380 | */ |
3362 | if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) | 3381 | if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) |
@@ -3420,6 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
3420 | struct cfq_io_context *cic = RQ_CIC(rq); | 3439 | struct cfq_io_context *cic = RQ_CIC(rq); |
3421 | 3440 | ||
3422 | cfqd->rq_queued++; | 3441 | cfqd->rq_queued++; |
3442 | if (rq->cmd_flags & REQ_META) | ||
3443 | cfqq->meta_pending++; | ||
3423 | 3444 | ||
3424 | cfq_update_io_thinktime(cfqd, cfqq, cic); | 3445 | cfq_update_io_thinktime(cfqd, cfqq, cic); |
3425 | cfq_update_io_seektime(cfqd, cfqq, rq); | 3446 | cfq_update_io_seektime(cfqd, cfqq, rq); |
diff --git a/block/genhd.c b/block/genhd.c index 5cb51c55f6d..e2f67902dd0 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -1146,17 +1146,17 @@ static int diskstats_show(struct seq_file *seqf, void *v) | |||
1146 | cpu = part_stat_lock(); | 1146 | cpu = part_stat_lock(); |
1147 | part_round_stats(cpu, hd); | 1147 | part_round_stats(cpu, hd); |
1148 | part_stat_unlock(); | 1148 | part_stat_unlock(); |
1149 | seq_printf(seqf, "%4d %7d %s %lu %lu %llu " | 1149 | seq_printf(seqf, "%4d %7d %s %lu %lu %lu " |
1150 | "%u %lu %lu %llu %u %u %u %u\n", | 1150 | "%u %lu %lu %lu %u %u %u %u\n", |
1151 | MAJOR(part_devt(hd)), MINOR(part_devt(hd)), | 1151 | MAJOR(part_devt(hd)), MINOR(part_devt(hd)), |
1152 | disk_name(gp, hd->partno, buf), | 1152 | disk_name(gp, hd->partno, buf), |
1153 | part_stat_read(hd, ios[READ]), | 1153 | part_stat_read(hd, ios[READ]), |
1154 | part_stat_read(hd, merges[READ]), | 1154 | part_stat_read(hd, merges[READ]), |
1155 | (unsigned long long)part_stat_read(hd, sectors[READ]), | 1155 | part_stat_read(hd, sectors[READ]), |
1156 | jiffies_to_msecs(part_stat_read(hd, ticks[READ])), | 1156 | jiffies_to_msecs(part_stat_read(hd, ticks[READ])), |
1157 | part_stat_read(hd, ios[WRITE]), | 1157 | part_stat_read(hd, ios[WRITE]), |
1158 | part_stat_read(hd, merges[WRITE]), | 1158 | part_stat_read(hd, merges[WRITE]), |
1159 | (unsigned long long)part_stat_read(hd, sectors[WRITE]), | 1159 | part_stat_read(hd, sectors[WRITE]), |
1160 | jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), | 1160 | jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), |
1161 | part_in_flight(hd), | 1161 | part_in_flight(hd), |
1162 | jiffies_to_msecs(part_stat_read(hd, io_ticks)), | 1162 | jiffies_to_msecs(part_stat_read(hd, io_ticks)), |
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index ca3e6be44a0..5987e0ba8c2 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig | |||
@@ -468,6 +468,15 @@ config PATA_ICSIDE | |||
468 | interface card. This is not required for ICS partition support. | 468 | interface card. This is not required for ICS partition support. |
469 | If you are unsure, say N to this. | 469 | If you are unsure, say N to this. |
470 | 470 | ||
471 | config PATA_IMX | ||
472 | tristate "PATA support for Freescale iMX" | ||
473 | depends on ARCH_MXC | ||
474 | help | ||
475 | This option enables support for the PATA host available on Freescale | ||
476 | iMX SoCs. | ||
477 | |||
478 | If unsure, say N. | ||
479 | |||
471 | config PATA_IT8213 | 480 | config PATA_IT8213 |
472 | tristate "IT8213 PATA support (Experimental)" | 481 | tristate "IT8213 PATA support (Experimental)" |
473 | depends on PCI && EXPERIMENTAL | 482 | depends on PCI && EXPERIMENTAL |
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 8ac64e1aa05..9550d691fd1 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile | |||
@@ -48,6 +48,7 @@ obj-$(CONFIG_PATA_HPT37X) += pata_hpt37x.o | |||
48 | obj-$(CONFIG_PATA_HPT3X2N) += pata_hpt3x2n.o | 48 | obj-$(CONFIG_PATA_HPT3X2N) += pata_hpt3x2n.o |
49 | obj-$(CONFIG_PATA_HPT3X3) += pata_hpt3x3.o | 49 | obj-$(CONFIG_PATA_HPT3X3) += pata_hpt3x3.o |
50 | obj-$(CONFIG_PATA_ICSIDE) += pata_icside.o | 50 | obj-$(CONFIG_PATA_ICSIDE) += pata_icside.o |
51 | obj-$(CONFIG_PATA_IMX) += pata_imx.o | ||
51 | obj-$(CONFIG_PATA_IT8213) += pata_it8213.o | 52 | obj-$(CONFIG_PATA_IT8213) += pata_it8213.o |
52 | obj-$(CONFIG_PATA_IT821X) += pata_it821x.o | 53 | obj-$(CONFIG_PATA_IT821X) += pata_it821x.o |
53 | obj-$(CONFIG_PATA_JMICRON) += pata_jmicron.o | 54 | obj-$(CONFIG_PATA_JMICRON) += pata_jmicron.o |
diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c new file mode 100644 index 00000000000..ca9d9caedfa --- /dev/null +++ b/drivers/ata/pata_imx.c | |||
@@ -0,0 +1,253 @@ | |||
1 | /* | ||
2 | * Freescale iMX PATA driver | ||
3 | * | ||
4 | * Copyright (C) 2011 Arnaud Patard <arnaud.patard@rtp-net.org> | ||
5 | * | ||
6 | * Based on pata_platform - Copyright (C) 2006 - 2007 Paul Mundt | ||
7 | * | ||
8 | * This file is subject to the terms and conditions of the GNU General Public | ||
9 | * License. See the file "COPYING" in the main directory of this archive | ||
10 | * for more details. | ||
11 | * | ||
12 | * TODO: | ||
13 | * - dmaengine support | ||
14 | * - check if timing stuff needed | ||
15 | */ | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/blkdev.h> | ||
20 | #include <scsi/scsi_host.h> | ||
21 | #include <linux/ata.h> | ||
22 | #include <linux/libata.h> | ||
23 | #include <linux/platform_device.h> | ||
24 | #include <linux/clk.h> | ||
25 | |||
26 | #define DRV_NAME "pata_imx" | ||
27 | |||
28 | #define PATA_IMX_ATA_CONTROL 0x24 | ||
29 | #define PATA_IMX_ATA_CTRL_FIFO_RST_B (1<<7) | ||
30 | #define PATA_IMX_ATA_CTRL_ATA_RST_B (1<<6) | ||
31 | #define PATA_IMX_ATA_CTRL_IORDY_EN (1<<0) | ||
32 | #define PATA_IMX_ATA_INT_EN 0x2C | ||
33 | #define PATA_IMX_ATA_INTR_ATA_INTRQ2 (1<<3) | ||
34 | #define PATA_IMX_DRIVE_DATA 0xA0 | ||
35 | #define PATA_IMX_DRIVE_CONTROL 0xD8 | ||
36 | |||
37 | struct pata_imx_priv { | ||
38 | struct clk *clk; | ||
39 | /* timings/interrupt/control regs */ | ||
40 | u8 *host_regs; | ||
41 | u32 ata_ctl; | ||
42 | }; | ||
43 | |||
44 | static int pata_imx_set_mode(struct ata_link *link, struct ata_device **unused) | ||
45 | { | ||
46 | struct ata_device *dev; | ||
47 | struct ata_port *ap = link->ap; | ||
48 | struct pata_imx_priv *priv = ap->host->private_data; | ||
49 | u32 val; | ||
50 | |||
51 | ata_for_each_dev(dev, link, ENABLED) { | ||
52 | dev->pio_mode = dev->xfer_mode = XFER_PIO_0; | ||
53 | dev->xfer_shift = ATA_SHIFT_PIO; | ||
54 | dev->flags |= ATA_DFLAG_PIO; | ||
55 | |||
56 | val = __raw_readl(priv->host_regs + PATA_IMX_ATA_CONTROL); | ||
57 | if (ata_pio_need_iordy(dev)) | ||
58 | val |= PATA_IMX_ATA_CTRL_IORDY_EN; | ||
59 | else | ||
60 | val &= ~PATA_IMX_ATA_CTRL_IORDY_EN; | ||
61 | __raw_writel(val, priv->host_regs + PATA_IMX_ATA_CONTROL); | ||
62 | |||
63 | ata_dev_printk(dev, KERN_INFO, "configured for PIO\n"); | ||
64 | } | ||
65 | return 0; | ||
66 | } | ||
67 | |||
68 | static struct scsi_host_template pata_imx_sht = { | ||
69 | ATA_PIO_SHT(DRV_NAME), | ||
70 | }; | ||
71 | |||
72 | static struct ata_port_operations pata_imx_port_ops = { | ||
73 | .inherits = &ata_sff_port_ops, | ||
74 | .sff_data_xfer = ata_sff_data_xfer_noirq, | ||
75 | .cable_detect = ata_cable_unknown, | ||
76 | .set_mode = pata_imx_set_mode, | ||
77 | }; | ||
78 | |||
79 | static void pata_imx_setup_port(struct ata_ioports *ioaddr) | ||
80 | { | ||
81 | /* Fixup the port shift for platforms that need it */ | ||
82 | ioaddr->data_addr = ioaddr->cmd_addr + (ATA_REG_DATA << 2); | ||
83 | ioaddr->error_addr = ioaddr->cmd_addr + (ATA_REG_ERR << 2); | ||
84 | ioaddr->feature_addr = ioaddr->cmd_addr + (ATA_REG_FEATURE << 2); | ||
85 | ioaddr->nsect_addr = ioaddr->cmd_addr + (ATA_REG_NSECT << 2); | ||
86 | ioaddr->lbal_addr = ioaddr->cmd_addr + (ATA_REG_LBAL << 2); | ||
87 | ioaddr->lbam_addr = ioaddr->cmd_addr + (ATA_REG_LBAM << 2); | ||
88 | ioaddr->lbah_addr = ioaddr->cmd_addr + (ATA_REG_LBAH << 2); | ||
89 | ioaddr->device_addr = ioaddr->cmd_addr + (ATA_REG_DEVICE << 2); | ||
90 | ioaddr->status_addr = ioaddr->cmd_addr + (ATA_REG_STATUS << 2); | ||
91 | ioaddr->command_addr = ioaddr->cmd_addr + (ATA_REG_CMD << 2); | ||
92 | } | ||
93 | |||
94 | static int __devinit pata_imx_probe(struct platform_device *pdev) | ||
95 | { | ||
96 | struct ata_host *host; | ||
97 | struct ata_port *ap; | ||
98 | struct pata_imx_priv *priv; | ||
99 | int irq = 0; | ||
100 | struct resource *io_res; | ||
101 | |||
102 | io_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
103 | if (io_res == NULL) | ||
104 | return -EINVAL; | ||
105 | |||
106 | irq = platform_get_irq(pdev, 0); | ||
107 | if (irq <= 0) | ||
108 | return -EINVAL; | ||
109 | |||
110 | priv = devm_kzalloc(&pdev->dev, | ||
111 | sizeof(struct pata_imx_priv), GFP_KERNEL); | ||
112 | if (!priv) | ||
113 | return -ENOMEM; | ||
114 | |||
115 | priv->clk = clk_get(&pdev->dev, NULL); | ||
116 | if (IS_ERR(priv->clk)) { | ||
117 | dev_err(&pdev->dev, "Failed to get clock\n"); | ||
118 | return PTR_ERR(priv->clk); | ||
119 | } | ||
120 | |||
121 | clk_enable(priv->clk); | ||
122 | |||
123 | host = ata_host_alloc(&pdev->dev, 1); | ||
124 | if (!host) | ||
125 | goto free_priv; | ||
126 | |||
127 | host->private_data = priv; | ||
128 | ap = host->ports[0]; | ||
129 | |||
130 | ap->ops = &pata_imx_port_ops; | ||
131 | ap->pio_mask = ATA_PIO0; | ||
132 | ap->flags |= ATA_FLAG_SLAVE_POSS; | ||
133 | |||
134 | priv->host_regs = devm_ioremap(&pdev->dev, io_res->start, | ||
135 | resource_size(io_res)); | ||
136 | if (!priv->host_regs) { | ||
137 | dev_err(&pdev->dev, "failed to map IO/CTL base\n"); | ||
138 | goto free_priv; | ||
139 | } | ||
140 | |||
141 | ap->ioaddr.cmd_addr = priv->host_regs + PATA_IMX_DRIVE_DATA; | ||
142 | ap->ioaddr.ctl_addr = priv->host_regs + PATA_IMX_DRIVE_CONTROL; | ||
143 | |||
144 | ap->ioaddr.altstatus_addr = ap->ioaddr.ctl_addr; | ||
145 | |||
146 | pata_imx_setup_port(&ap->ioaddr); | ||
147 | |||
148 | ata_port_desc(ap, "cmd 0x%llx ctl 0x%llx", | ||
149 | (unsigned long long)io_res->start + PATA_IMX_DRIVE_DATA, | ||
150 | (unsigned long long)io_res->start + PATA_IMX_DRIVE_CONTROL); | ||
151 | |||
152 | /* deassert resets */ | ||
153 | __raw_writel(PATA_IMX_ATA_CTRL_FIFO_RST_B | | ||
154 | PATA_IMX_ATA_CTRL_ATA_RST_B, | ||
155 | priv->host_regs + PATA_IMX_ATA_CONTROL); | ||
156 | /* enable interrupts */ | ||
157 | __raw_writel(PATA_IMX_ATA_INTR_ATA_INTRQ2, | ||
158 | priv->host_regs + PATA_IMX_ATA_INT_EN); | ||
159 | |||
160 | /* activate */ | ||
161 | return ata_host_activate(host, irq, ata_sff_interrupt, 0, | ||
162 | &pata_imx_sht); | ||
163 | |||
164 | free_priv: | ||
165 | clk_disable(priv->clk); | ||
166 | clk_put(priv->clk); | ||
167 | return -ENOMEM; | ||
168 | } | ||
169 | |||
170 | static int __devexit pata_imx_remove(struct platform_device *pdev) | ||
171 | { | ||
172 | struct ata_host *host = dev_get_drvdata(&pdev->dev); | ||
173 | struct pata_imx_priv *priv = host->private_data; | ||
174 | |||
175 | ata_host_detach(host); | ||
176 | |||
177 | __raw_writel(0, priv->host_regs + PATA_IMX_ATA_INT_EN); | ||
178 | |||
179 | clk_disable(priv->clk); | ||
180 | clk_put(priv->clk); | ||
181 | |||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | #ifdef CONFIG_PM | ||
186 | static int pata_imx_suspend(struct device *dev) | ||
187 | { | ||
188 | struct ata_host *host = dev_get_drvdata(dev); | ||
189 | struct pata_imx_priv *priv = host->private_data; | ||
190 | int ret; | ||
191 | |||
192 | ret = ata_host_suspend(host, PMSG_SUSPEND); | ||
193 | if (!ret) { | ||
194 | __raw_writel(0, priv->host_regs + PATA_IMX_ATA_INT_EN); | ||
195 | priv->ata_ctl = | ||
196 | __raw_readl(priv->host_regs + PATA_IMX_ATA_CONTROL); | ||
197 | clk_disable(priv->clk); | ||
198 | } | ||
199 | |||
200 | return ret; | ||
201 | } | ||
202 | |||
203 | static int pata_imx_resume(struct device *dev) | ||
204 | { | ||
205 | struct ata_host *host = dev_get_drvdata(dev); | ||
206 | struct pata_imx_priv *priv = host->private_data; | ||
207 | |||
208 | clk_enable(priv->clk); | ||
209 | |||
210 | __raw_writel(priv->ata_ctl, priv->host_regs + PATA_IMX_ATA_CONTROL); | ||
211 | |||
212 | __raw_writel(PATA_IMX_ATA_INTR_ATA_INTRQ2, | ||
213 | priv->host_regs + PATA_IMX_ATA_INT_EN); | ||
214 | |||
215 | ata_host_resume(host); | ||
216 | |||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | static const struct dev_pm_ops pata_imx_pm_ops = { | ||
221 | .suspend = pata_imx_suspend, | ||
222 | .resume = pata_imx_resume, | ||
223 | }; | ||
224 | #endif | ||
225 | |||
226 | static struct platform_driver pata_imx_driver = { | ||
227 | .probe = pata_imx_probe, | ||
228 | .remove = __devexit_p(pata_imx_remove), | ||
229 | .driver = { | ||
230 | .name = DRV_NAME, | ||
231 | .owner = THIS_MODULE, | ||
232 | #ifdef CONFIG_PM | ||
233 | .pm = &pata_imx_pm_ops, | ||
234 | #endif | ||
235 | }, | ||
236 | }; | ||
237 | |||
238 | static int __init pata_imx_init(void) | ||
239 | { | ||
240 | return platform_driver_register(&pata_imx_driver); | ||
241 | } | ||
242 | |||
243 | static void __exit pata_imx_exit(void) | ||
244 | { | ||
245 | platform_driver_unregister(&pata_imx_driver); | ||
246 | } | ||
247 | module_init(pata_imx_init); | ||
248 | module_exit(pata_imx_exit); | ||
249 | |||
250 | MODULE_AUTHOR("Arnaud Patard <arnaud.patard@rtp-net.org>"); | ||
251 | MODULE_DESCRIPTION("low-level driver for iMX PATA"); | ||
252 | MODULE_LICENSE("GPL"); | ||
253 | MODULE_ALIAS("platform:" DRV_NAME); | ||
diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 65e4be6be22..8e9f5048a10 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c | |||
@@ -124,6 +124,17 @@ static const struct via_isa_bridge { | |||
124 | { NULL } | 124 | { NULL } |
125 | }; | 125 | }; |
126 | 126 | ||
127 | static const struct dmi_system_id no_atapi_dma_dmi_table[] = { | ||
128 | { | ||
129 | .ident = "AVERATEC 3200", | ||
130 | .matches = { | ||
131 | DMI_MATCH(DMI_BOARD_VENDOR, "AVERATEC"), | ||
132 | DMI_MATCH(DMI_BOARD_NAME, "3200"), | ||
133 | }, | ||
134 | }, | ||
135 | { } | ||
136 | }; | ||
137 | |||
127 | struct via_port { | 138 | struct via_port { |
128 | u8 cached_device; | 139 | u8 cached_device; |
129 | }; | 140 | }; |
@@ -355,6 +366,13 @@ static unsigned long via_mode_filter(struct ata_device *dev, unsigned long mask) | |||
355 | mask &= ~ ATA_MASK_UDMA; | 366 | mask &= ~ ATA_MASK_UDMA; |
356 | } | 367 | } |
357 | } | 368 | } |
369 | |||
370 | if (dev->class == ATA_DEV_ATAPI && | ||
371 | dmi_check_system(no_atapi_dma_dmi_table)) { | ||
372 | ata_dev_warn(dev, "controller locks up on ATAPI DMA, forcing PIO\n"); | ||
373 | mask &= ATA_MASK_PIO; | ||
374 | } | ||
375 | |||
358 | return mask; | 376 | return mask; |
359 | } | 377 | } |
360 | 378 | ||
diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 0a9a774a7e1..5c4237452f5 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c | |||
@@ -1329,7 +1329,7 @@ static int sata_dwc_port_start(struct ata_port *ap) | |||
1329 | dev_err(ap->dev, "%s: dma_alloc_coherent failed\n", | 1329 | dev_err(ap->dev, "%s: dma_alloc_coherent failed\n", |
1330 | __func__); | 1330 | __func__); |
1331 | err = -ENOMEM; | 1331 | err = -ENOMEM; |
1332 | goto CLEANUP; | 1332 | goto CLEANUP_ALLOC; |
1333 | } | 1333 | } |
1334 | } | 1334 | } |
1335 | 1335 | ||
@@ -1349,15 +1349,13 @@ static int sata_dwc_port_start(struct ata_port *ap) | |||
1349 | /* Clear any error bits before libata starts issuing commands */ | 1349 | /* Clear any error bits before libata starts issuing commands */ |
1350 | clear_serror(); | 1350 | clear_serror(); |
1351 | ap->private_data = hsdevp; | 1351 | ap->private_data = hsdevp; |
1352 | dev_dbg(ap->dev, "%s: done\n", __func__); | ||
1353 | return 0; | ||
1352 | 1354 | ||
1355 | CLEANUP_ALLOC: | ||
1356 | kfree(hsdevp); | ||
1353 | CLEANUP: | 1357 | CLEANUP: |
1354 | if (err) { | 1358 | dev_dbg(ap->dev, "%s: fail. ap->id = %d\n", __func__, ap->print_id); |
1355 | sata_dwc_port_stop(ap); | ||
1356 | dev_dbg(ap->dev, "%s: fail\n", __func__); | ||
1357 | } else { | ||
1358 | dev_dbg(ap->dev, "%s: done\n", __func__); | ||
1359 | } | ||
1360 | |||
1361 | return err; | 1359 | return err; |
1362 | } | 1360 | } |
1363 | 1361 | ||
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c index 98c1d780f55..9dfb40b8c2c 100644 --- a/drivers/ata/sata_sil.c +++ b/drivers/ata/sata_sil.c | |||
@@ -438,7 +438,7 @@ static void sil_host_intr(struct ata_port *ap, u32 bmdma2) | |||
438 | u8 status; | 438 | u8 status; |
439 | 439 | ||
440 | if (unlikely(bmdma2 & SIL_DMA_SATA_IRQ)) { | 440 | if (unlikely(bmdma2 & SIL_DMA_SATA_IRQ)) { |
441 | u32 serror; | 441 | u32 serror = 0xffffffff; |
442 | 442 | ||
443 | /* SIEN doesn't mask SATA IRQs on some 3112s. Those | 443 | /* SIEN doesn't mask SATA IRQs on some 3112s. Those |
444 | * controllers continue to assert IRQ as long as | 444 | * controllers continue to assert IRQ as long as |
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index e18566a0fed..1c374579407 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c | |||
@@ -460,6 +460,21 @@ static int pm_genpd_runtime_resume(struct device *dev) | |||
460 | return 0; | 460 | return 0; |
461 | } | 461 | } |
462 | 462 | ||
463 | /** | ||
464 | * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use. | ||
465 | */ | ||
466 | void pm_genpd_poweroff_unused(void) | ||
467 | { | ||
468 | struct generic_pm_domain *genpd; | ||
469 | |||
470 | mutex_lock(&gpd_list_lock); | ||
471 | |||
472 | list_for_each_entry(genpd, &gpd_list, gpd_list_node) | ||
473 | genpd_queue_power_off_work(genpd); | ||
474 | |||
475 | mutex_unlock(&gpd_list_lock); | ||
476 | } | ||
477 | |||
463 | #else | 478 | #else |
464 | 479 | ||
465 | static inline void genpd_power_off_work_fn(struct work_struct *work) {} | 480 | static inline void genpd_power_off_work_fn(struct work_struct *work) {} |
@@ -1255,18 +1270,3 @@ void pm_genpd_init(struct generic_pm_domain *genpd, | |||
1255 | list_add(&genpd->gpd_list_node, &gpd_list); | 1270 | list_add(&genpd->gpd_list_node, &gpd_list); |
1256 | mutex_unlock(&gpd_list_lock); | 1271 | mutex_unlock(&gpd_list_lock); |
1257 | } | 1272 | } |
1258 | |||
1259 | /** | ||
1260 | * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use. | ||
1261 | */ | ||
1262 | void pm_genpd_poweroff_unused(void) | ||
1263 | { | ||
1264 | struct generic_pm_domain *genpd; | ||
1265 | |||
1266 | mutex_lock(&gpd_list_lock); | ||
1267 | |||
1268 | list_for_each_entry(genpd, &gpd_list, gpd_list_node) | ||
1269 | genpd_queue_power_off_work(genpd); | ||
1270 | |||
1271 | mutex_unlock(&gpd_list_lock); | ||
1272 | } | ||
diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c index c2231ff06cb..c4f7a45cd2c 100644 --- a/drivers/base/regmap/regmap-i2c.c +++ b/drivers/base/regmap/regmap-i2c.c | |||
@@ -113,3 +113,4 @@ struct regmap *regmap_init_i2c(struct i2c_client *i2c, | |||
113 | } | 113 | } |
114 | EXPORT_SYMBOL_GPL(regmap_init_i2c); | 114 | EXPORT_SYMBOL_GPL(regmap_init_i2c); |
115 | 115 | ||
116 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/base/regmap/regmap-spi.c b/drivers/base/regmap/regmap-spi.c index 4deba0621bc..f8396945d6e 100644 --- a/drivers/base/regmap/regmap-spi.c +++ b/drivers/base/regmap/regmap-spi.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/regmap.h> | 13 | #include <linux/regmap.h> |
14 | #include <linux/spi/spi.h> | 14 | #include <linux/spi/spi.h> |
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/module.h> | ||
16 | 17 | ||
17 | static int regmap_spi_write(struct device *dev, const void *data, size_t count) | 18 | static int regmap_spi_write(struct device *dev, const void *data, size_t count) |
18 | { | 19 | { |
@@ -70,3 +71,5 @@ struct regmap *regmap_init_spi(struct spi_device *spi, | |||
70 | return regmap_init(&spi->dev, ®map_spi, config); | 71 | return regmap_init(&spi->dev, ®map_spi, config); |
71 | } | 72 | } |
72 | EXPORT_SYMBOL_GPL(regmap_init_spi); | 73 | EXPORT_SYMBOL_GPL(regmap_init_spi); |
74 | |||
75 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index cf3565cae93..0eef4da1ac6 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c | |||
@@ -317,7 +317,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val, | |||
317 | u8[0] |= map->bus->read_flag_mask; | 317 | u8[0] |= map->bus->read_flag_mask; |
318 | 318 | ||
319 | ret = map->bus->read(map->dev, map->work_buf, map->format.reg_bytes, | 319 | ret = map->bus->read(map->dev, map->work_buf, map->format.reg_bytes, |
320 | val, map->format.val_bytes); | 320 | val, val_len); |
321 | if (ret != 0) | 321 | if (ret != 0) |
322 | return ret; | 322 | return ret; |
323 | 323 | ||
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 717d6e4e18d..6f07ec1c2f5 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
@@ -256,6 +256,21 @@ config BLK_DEV_LOOP | |||
256 | 256 | ||
257 | Most users will answer N here. | 257 | Most users will answer N here. |
258 | 258 | ||
259 | config BLK_DEV_LOOP_MIN_COUNT | ||
260 | int "Number of loop devices to pre-create at init time" | ||
261 | depends on BLK_DEV_LOOP | ||
262 | default 8 | ||
263 | help | ||
264 | Static number of loop devices to be unconditionally pre-created | ||
265 | at init time. | ||
266 | |||
267 | This default value can be overwritten on the kernel command | ||
268 | line or with module-parameter loop.max_loop. | ||
269 | |||
270 | The historic default is 8. If a late 2011 version of losetup(8) | ||
271 | is used, it can be set to 0, since needed loop devices can be | ||
272 | dynamically allocated with the /dev/loop-control interface. | ||
273 | |||
259 | config BLK_DEV_CRYPTOLOOP | 274 | config BLK_DEV_CRYPTOLOOP |
260 | tristate "Cryptoloop Support" | 275 | tristate "Cryptoloop Support" |
261 | select CRYPTO | 276 | select CRYPTO |
@@ -471,7 +486,7 @@ config XEN_BLKDEV_FRONTEND | |||
471 | in another domain which drives the actual block device. | 486 | in another domain which drives the actual block device. |
472 | 487 | ||
473 | config XEN_BLKDEV_BACKEND | 488 | config XEN_BLKDEV_BACKEND |
474 | tristate "Block-device backend driver" | 489 | tristate "Xen block-device backend driver" |
475 | depends on XEN_BACKEND | 490 | depends on XEN_BACKEND |
476 | help | 491 | help |
477 | The block-device backend driver allows the kernel to export its | 492 | The block-device backend driver allows the kernel to export its |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 515bcd948a4..0feab261e29 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -1829,10 +1829,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n | |||
1829 | 1829 | ||
1830 | /* silently ignore cpu mask on UP kernel */ | 1830 | /* silently ignore cpu mask on UP kernel */ |
1831 | if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) { | 1831 | if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) { |
1832 | err = __bitmap_parse(sc.cpu_mask, 32, 0, | 1832 | err = bitmap_parse(sc.cpu_mask, 32, |
1833 | cpumask_bits(new_cpu_mask), nr_cpu_ids); | 1833 | cpumask_bits(new_cpu_mask), nr_cpu_ids); |
1834 | if (err) { | 1834 | if (err) { |
1835 | dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); | 1835 | dev_warn(DEV, "bitmap_parse() failed with %d\n", err); |
1836 | retcode = ERR_CPU_MASK_PARSE; | 1836 | retcode = ERR_CPU_MASK_PARSE; |
1837 | goto fail; | 1837 | goto fail; |
1838 | } | 1838 | } |
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 76c8da78212..4720c7ade0a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -75,11 +75,11 @@ | |||
75 | #include <linux/kthread.h> | 75 | #include <linux/kthread.h> |
76 | #include <linux/splice.h> | 76 | #include <linux/splice.h> |
77 | #include <linux/sysfs.h> | 77 | #include <linux/sysfs.h> |
78 | 78 | #include <linux/miscdevice.h> | |
79 | #include <asm/uaccess.h> | 79 | #include <asm/uaccess.h> |
80 | 80 | ||
81 | static LIST_HEAD(loop_devices); | 81 | static DEFINE_IDR(loop_index_idr); |
82 | static DEFINE_MUTEX(loop_devices_mutex); | 82 | static DEFINE_MUTEX(loop_index_mutex); |
83 | 83 | ||
84 | static int max_part; | 84 | static int max_part; |
85 | static int part_shift; | 85 | static int part_shift; |
@@ -722,17 +722,10 @@ static inline int is_loop_device(struct file *file) | |||
722 | static ssize_t loop_attr_show(struct device *dev, char *page, | 722 | static ssize_t loop_attr_show(struct device *dev, char *page, |
723 | ssize_t (*callback)(struct loop_device *, char *)) | 723 | ssize_t (*callback)(struct loop_device *, char *)) |
724 | { | 724 | { |
725 | struct loop_device *l, *lo = NULL; | 725 | struct gendisk *disk = dev_to_disk(dev); |
726 | 726 | struct loop_device *lo = disk->private_data; | |
727 | mutex_lock(&loop_devices_mutex); | ||
728 | list_for_each_entry(l, &loop_devices, lo_list) | ||
729 | if (disk_to_dev(l->lo_disk) == dev) { | ||
730 | lo = l; | ||
731 | break; | ||
732 | } | ||
733 | mutex_unlock(&loop_devices_mutex); | ||
734 | 727 | ||
735 | return lo ? callback(lo, page) : -EIO; | 728 | return callback(lo, page); |
736 | } | 729 | } |
737 | 730 | ||
738 | #define LOOP_ATTR_RO(_name) \ | 731 | #define LOOP_ATTR_RO(_name) \ |
@@ -750,10 +743,10 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) | |||
750 | ssize_t ret; | 743 | ssize_t ret; |
751 | char *p = NULL; | 744 | char *p = NULL; |
752 | 745 | ||
753 | mutex_lock(&lo->lo_ctl_mutex); | 746 | spin_lock_irq(&lo->lo_lock); |
754 | if (lo->lo_backing_file) | 747 | if (lo->lo_backing_file) |
755 | p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1); | 748 | p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1); |
756 | mutex_unlock(&lo->lo_ctl_mutex); | 749 | spin_unlock_irq(&lo->lo_lock); |
757 | 750 | ||
758 | if (IS_ERR_OR_NULL(p)) | 751 | if (IS_ERR_OR_NULL(p)) |
759 | ret = PTR_ERR(p); | 752 | ret = PTR_ERR(p); |
@@ -1007,7 +1000,9 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) | |||
1007 | 1000 | ||
1008 | kthread_stop(lo->lo_thread); | 1001 | kthread_stop(lo->lo_thread); |
1009 | 1002 | ||
1003 | spin_lock_irq(&lo->lo_lock); | ||
1010 | lo->lo_backing_file = NULL; | 1004 | lo->lo_backing_file = NULL; |
1005 | spin_unlock_irq(&lo->lo_lock); | ||
1011 | 1006 | ||
1012 | loop_release_xfer(lo); | 1007 | loop_release_xfer(lo); |
1013 | lo->transfer = NULL; | 1008 | lo->transfer = NULL; |
@@ -1485,13 +1480,22 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, | |||
1485 | 1480 | ||
1486 | static int lo_open(struct block_device *bdev, fmode_t mode) | 1481 | static int lo_open(struct block_device *bdev, fmode_t mode) |
1487 | { | 1482 | { |
1488 | struct loop_device *lo = bdev->bd_disk->private_data; | 1483 | struct loop_device *lo; |
1484 | int err = 0; | ||
1485 | |||
1486 | mutex_lock(&loop_index_mutex); | ||
1487 | lo = bdev->bd_disk->private_data; | ||
1488 | if (!lo) { | ||
1489 | err = -ENXIO; | ||
1490 | goto out; | ||
1491 | } | ||
1489 | 1492 | ||
1490 | mutex_lock(&lo->lo_ctl_mutex); | 1493 | mutex_lock(&lo->lo_ctl_mutex); |
1491 | lo->lo_refcnt++; | 1494 | lo->lo_refcnt++; |
1492 | mutex_unlock(&lo->lo_ctl_mutex); | 1495 | mutex_unlock(&lo->lo_ctl_mutex); |
1493 | 1496 | out: | |
1494 | return 0; | 1497 | mutex_unlock(&loop_index_mutex); |
1498 | return err; | ||
1495 | } | 1499 | } |
1496 | 1500 | ||
1497 | static int lo_release(struct gendisk *disk, fmode_t mode) | 1501 | static int lo_release(struct gendisk *disk, fmode_t mode) |
@@ -1557,40 +1561,71 @@ int loop_register_transfer(struct loop_func_table *funcs) | |||
1557 | return 0; | 1561 | return 0; |
1558 | } | 1562 | } |
1559 | 1563 | ||
1564 | static int unregister_transfer_cb(int id, void *ptr, void *data) | ||
1565 | { | ||
1566 | struct loop_device *lo = ptr; | ||
1567 | struct loop_func_table *xfer = data; | ||
1568 | |||
1569 | mutex_lock(&lo->lo_ctl_mutex); | ||
1570 | if (lo->lo_encryption == xfer) | ||
1571 | loop_release_xfer(lo); | ||
1572 | mutex_unlock(&lo->lo_ctl_mutex); | ||
1573 | return 0; | ||
1574 | } | ||
1575 | |||
1560 | int loop_unregister_transfer(int number) | 1576 | int loop_unregister_transfer(int number) |
1561 | { | 1577 | { |
1562 | unsigned int n = number; | 1578 | unsigned int n = number; |
1563 | struct loop_device *lo; | ||
1564 | struct loop_func_table *xfer; | 1579 | struct loop_func_table *xfer; |
1565 | 1580 | ||
1566 | if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) | 1581 | if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) |
1567 | return -EINVAL; | 1582 | return -EINVAL; |
1568 | 1583 | ||
1569 | xfer_funcs[n] = NULL; | 1584 | xfer_funcs[n] = NULL; |
1570 | 1585 | idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer); | |
1571 | list_for_each_entry(lo, &loop_devices, lo_list) { | ||
1572 | mutex_lock(&lo->lo_ctl_mutex); | ||
1573 | |||
1574 | if (lo->lo_encryption == xfer) | ||
1575 | loop_release_xfer(lo); | ||
1576 | |||
1577 | mutex_unlock(&lo->lo_ctl_mutex); | ||
1578 | } | ||
1579 | |||
1580 | return 0; | 1586 | return 0; |
1581 | } | 1587 | } |
1582 | 1588 | ||
1583 | EXPORT_SYMBOL(loop_register_transfer); | 1589 | EXPORT_SYMBOL(loop_register_transfer); |
1584 | EXPORT_SYMBOL(loop_unregister_transfer); | 1590 | EXPORT_SYMBOL(loop_unregister_transfer); |
1585 | 1591 | ||
1586 | static struct loop_device *loop_alloc(int i) | 1592 | static int loop_add(struct loop_device **l, int i) |
1587 | { | 1593 | { |
1588 | struct loop_device *lo; | 1594 | struct loop_device *lo; |
1589 | struct gendisk *disk; | 1595 | struct gendisk *disk; |
1596 | int err; | ||
1590 | 1597 | ||
1591 | lo = kzalloc(sizeof(*lo), GFP_KERNEL); | 1598 | lo = kzalloc(sizeof(*lo), GFP_KERNEL); |
1592 | if (!lo) | 1599 | if (!lo) { |
1600 | err = -ENOMEM; | ||
1593 | goto out; | 1601 | goto out; |
1602 | } | ||
1603 | |||
1604 | err = idr_pre_get(&loop_index_idr, GFP_KERNEL); | ||
1605 | if (err < 0) | ||
1606 | goto out_free_dev; | ||
1607 | |||
1608 | if (i >= 0) { | ||
1609 | int m; | ||
1610 | |||
1611 | /* create specific i in the index */ | ||
1612 | err = idr_get_new_above(&loop_index_idr, lo, i, &m); | ||
1613 | if (err >= 0 && i != m) { | ||
1614 | idr_remove(&loop_index_idr, m); | ||
1615 | err = -EEXIST; | ||
1616 | } | ||
1617 | } else if (i == -1) { | ||
1618 | int m; | ||
1619 | |||
1620 | /* get next free nr */ | ||
1621 | err = idr_get_new(&loop_index_idr, lo, &m); | ||
1622 | if (err >= 0) | ||
1623 | i = m; | ||
1624 | } else { | ||
1625 | err = -EINVAL; | ||
1626 | } | ||
1627 | if (err < 0) | ||
1628 | goto out_free_dev; | ||
1594 | 1629 | ||
1595 | lo->lo_queue = blk_alloc_queue(GFP_KERNEL); | 1630 | lo->lo_queue = blk_alloc_queue(GFP_KERNEL); |
1596 | if (!lo->lo_queue) | 1631 | if (!lo->lo_queue) |
@@ -1611,81 +1646,158 @@ static struct loop_device *loop_alloc(int i) | |||
1611 | disk->private_data = lo; | 1646 | disk->private_data = lo; |
1612 | disk->queue = lo->lo_queue; | 1647 | disk->queue = lo->lo_queue; |
1613 | sprintf(disk->disk_name, "loop%d", i); | 1648 | sprintf(disk->disk_name, "loop%d", i); |
1614 | return lo; | 1649 | add_disk(disk); |
1650 | *l = lo; | ||
1651 | return lo->lo_number; | ||
1615 | 1652 | ||
1616 | out_free_queue: | 1653 | out_free_queue: |
1617 | blk_cleanup_queue(lo->lo_queue); | 1654 | blk_cleanup_queue(lo->lo_queue); |
1618 | out_free_dev: | 1655 | out_free_dev: |
1619 | kfree(lo); | 1656 | kfree(lo); |
1620 | out: | 1657 | out: |
1621 | return NULL; | 1658 | return err; |
1622 | } | 1659 | } |
1623 | 1660 | ||
1624 | static void loop_free(struct loop_device *lo) | 1661 | static void loop_remove(struct loop_device *lo) |
1625 | { | 1662 | { |
1663 | del_gendisk(lo->lo_disk); | ||
1626 | blk_cleanup_queue(lo->lo_queue); | 1664 | blk_cleanup_queue(lo->lo_queue); |
1627 | put_disk(lo->lo_disk); | 1665 | put_disk(lo->lo_disk); |
1628 | list_del(&lo->lo_list); | ||
1629 | kfree(lo); | 1666 | kfree(lo); |
1630 | } | 1667 | } |
1631 | 1668 | ||
1632 | static struct loop_device *loop_init_one(int i) | 1669 | static int find_free_cb(int id, void *ptr, void *data) |
1670 | { | ||
1671 | struct loop_device *lo = ptr; | ||
1672 | struct loop_device **l = data; | ||
1673 | |||
1674 | if (lo->lo_state == Lo_unbound) { | ||
1675 | *l = lo; | ||
1676 | return 1; | ||
1677 | } | ||
1678 | return 0; | ||
1679 | } | ||
1680 | |||
1681 | static int loop_lookup(struct loop_device **l, int i) | ||
1633 | { | 1682 | { |
1634 | struct loop_device *lo; | 1683 | struct loop_device *lo; |
1684 | int ret = -ENODEV; | ||
1635 | 1685 | ||
1636 | list_for_each_entry(lo, &loop_devices, lo_list) { | 1686 | if (i < 0) { |
1637 | if (lo->lo_number == i) | 1687 | int err; |
1638 | return lo; | 1688 | |
1689 | err = idr_for_each(&loop_index_idr, &find_free_cb, &lo); | ||
1690 | if (err == 1) { | ||
1691 | *l = lo; | ||
1692 | ret = lo->lo_number; | ||
1693 | } | ||
1694 | goto out; | ||
1639 | } | 1695 | } |
1640 | 1696 | ||
1641 | lo = loop_alloc(i); | 1697 | /* lookup and return a specific i */ |
1698 | lo = idr_find(&loop_index_idr, i); | ||
1642 | if (lo) { | 1699 | if (lo) { |
1643 | add_disk(lo->lo_disk); | 1700 | *l = lo; |
1644 | list_add_tail(&lo->lo_list, &loop_devices); | 1701 | ret = lo->lo_number; |
1645 | } | 1702 | } |
1646 | return lo; | 1703 | out: |
1647 | } | 1704 | return ret; |
1648 | |||
1649 | static void loop_del_one(struct loop_device *lo) | ||
1650 | { | ||
1651 | del_gendisk(lo->lo_disk); | ||
1652 | loop_free(lo); | ||
1653 | } | 1705 | } |
1654 | 1706 | ||
1655 | static struct kobject *loop_probe(dev_t dev, int *part, void *data) | 1707 | static struct kobject *loop_probe(dev_t dev, int *part, void *data) |
1656 | { | 1708 | { |
1657 | struct loop_device *lo; | 1709 | struct loop_device *lo; |
1658 | struct kobject *kobj; | 1710 | struct kobject *kobj; |
1711 | int err; | ||
1659 | 1712 | ||
1660 | mutex_lock(&loop_devices_mutex); | 1713 | mutex_lock(&loop_index_mutex); |
1661 | lo = loop_init_one(MINOR(dev) >> part_shift); | 1714 | err = loop_lookup(&lo, MINOR(dev) >> part_shift); |
1662 | kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM); | 1715 | if (err < 0) |
1663 | mutex_unlock(&loop_devices_mutex); | 1716 | err = loop_add(&lo, MINOR(dev) >> part_shift); |
1717 | if (err < 0) | ||
1718 | kobj = ERR_PTR(err); | ||
1719 | else | ||
1720 | kobj = get_disk(lo->lo_disk); | ||
1721 | mutex_unlock(&loop_index_mutex); | ||
1664 | 1722 | ||
1665 | *part = 0; | 1723 | *part = 0; |
1666 | return kobj; | 1724 | return kobj; |
1667 | } | 1725 | } |
1668 | 1726 | ||
1727 | static long loop_control_ioctl(struct file *file, unsigned int cmd, | ||
1728 | unsigned long parm) | ||
1729 | { | ||
1730 | struct loop_device *lo; | ||
1731 | int ret = -ENOSYS; | ||
1732 | |||
1733 | mutex_lock(&loop_index_mutex); | ||
1734 | switch (cmd) { | ||
1735 | case LOOP_CTL_ADD: | ||
1736 | ret = loop_lookup(&lo, parm); | ||
1737 | if (ret >= 0) { | ||
1738 | ret = -EEXIST; | ||
1739 | break; | ||
1740 | } | ||
1741 | ret = loop_add(&lo, parm); | ||
1742 | break; | ||
1743 | case LOOP_CTL_REMOVE: | ||
1744 | ret = loop_lookup(&lo, parm); | ||
1745 | if (ret < 0) | ||
1746 | break; | ||
1747 | mutex_lock(&lo->lo_ctl_mutex); | ||
1748 | if (lo->lo_state != Lo_unbound) { | ||
1749 | ret = -EBUSY; | ||
1750 | mutex_unlock(&lo->lo_ctl_mutex); | ||
1751 | break; | ||
1752 | } | ||
1753 | if (lo->lo_refcnt > 0) { | ||
1754 | ret = -EBUSY; | ||
1755 | mutex_unlock(&lo->lo_ctl_mutex); | ||
1756 | break; | ||
1757 | } | ||
1758 | lo->lo_disk->private_data = NULL; | ||
1759 | mutex_unlock(&lo->lo_ctl_mutex); | ||
1760 | idr_remove(&loop_index_idr, lo->lo_number); | ||
1761 | loop_remove(lo); | ||
1762 | break; | ||
1763 | case LOOP_CTL_GET_FREE: | ||
1764 | ret = loop_lookup(&lo, -1); | ||
1765 | if (ret >= 0) | ||
1766 | break; | ||
1767 | ret = loop_add(&lo, -1); | ||
1768 | } | ||
1769 | mutex_unlock(&loop_index_mutex); | ||
1770 | |||
1771 | return ret; | ||
1772 | } | ||
1773 | |||
1774 | static const struct file_operations loop_ctl_fops = { | ||
1775 | .open = nonseekable_open, | ||
1776 | .unlocked_ioctl = loop_control_ioctl, | ||
1777 | .compat_ioctl = loop_control_ioctl, | ||
1778 | .owner = THIS_MODULE, | ||
1779 | .llseek = noop_llseek, | ||
1780 | }; | ||
1781 | |||
1782 | static struct miscdevice loop_misc = { | ||
1783 | .minor = LOOP_CTRL_MINOR, | ||
1784 | .name = "loop-control", | ||
1785 | .fops = &loop_ctl_fops, | ||
1786 | }; | ||
1787 | |||
1788 | MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR); | ||
1789 | MODULE_ALIAS("devname:loop-control"); | ||
1790 | |||
1669 | static int __init loop_init(void) | 1791 | static int __init loop_init(void) |
1670 | { | 1792 | { |
1671 | int i, nr; | 1793 | int i, nr; |
1672 | unsigned long range; | 1794 | unsigned long range; |
1673 | struct loop_device *lo, *next; | 1795 | struct loop_device *lo; |
1796 | int err; | ||
1674 | 1797 | ||
1675 | /* | 1798 | err = misc_register(&loop_misc); |
1676 | * loop module now has a feature to instantiate underlying device | 1799 | if (err < 0) |
1677 | * structure on-demand, provided that there is an access dev node. | 1800 | return err; |
1678 | * However, this will not work well with user space tool that doesn't | ||
1679 | * know about such "feature". In order to not break any existing | ||
1680 | * tool, we do the following: | ||
1681 | * | ||
1682 | * (1) if max_loop is specified, create that many upfront, and this | ||
1683 | * also becomes a hard limit. | ||
1684 | * (2) if max_loop is not specified, create 8 loop device on module | ||
1685 | * load, user can further extend loop device by create dev node | ||
1686 | * themselves and have kernel automatically instantiate actual | ||
1687 | * device on-demand. | ||
1688 | */ | ||
1689 | 1801 | ||
1690 | part_shift = 0; | 1802 | part_shift = 0; |
1691 | if (max_part > 0) { | 1803 | if (max_part > 0) { |
@@ -1708,57 +1820,60 @@ static int __init loop_init(void) | |||
1708 | if (max_loop > 1UL << (MINORBITS - part_shift)) | 1820 | if (max_loop > 1UL << (MINORBITS - part_shift)) |
1709 | return -EINVAL; | 1821 | return -EINVAL; |
1710 | 1822 | ||
1823 | /* | ||
1824 | * If max_loop is specified, create that many devices upfront. | ||
1825 | * This also becomes a hard limit. If max_loop is not specified, | ||
1826 | * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module | ||
1827 | * init time. Loop devices can be requested on-demand with the | ||
1828 | * /dev/loop-control interface, or be instantiated by accessing | ||
1829 | * a 'dead' device node. | ||
1830 | */ | ||
1711 | if (max_loop) { | 1831 | if (max_loop) { |
1712 | nr = max_loop; | 1832 | nr = max_loop; |
1713 | range = max_loop << part_shift; | 1833 | range = max_loop << part_shift; |
1714 | } else { | 1834 | } else { |
1715 | nr = 8; | 1835 | nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT; |
1716 | range = 1UL << MINORBITS; | 1836 | range = 1UL << MINORBITS; |
1717 | } | 1837 | } |
1718 | 1838 | ||
1719 | if (register_blkdev(LOOP_MAJOR, "loop")) | 1839 | if (register_blkdev(LOOP_MAJOR, "loop")) |
1720 | return -EIO; | 1840 | return -EIO; |
1721 | 1841 | ||
1722 | for (i = 0; i < nr; i++) { | ||
1723 | lo = loop_alloc(i); | ||
1724 | if (!lo) | ||
1725 | goto Enomem; | ||
1726 | list_add_tail(&lo->lo_list, &loop_devices); | ||
1727 | } | ||
1728 | |||
1729 | /* point of no return */ | ||
1730 | |||
1731 | list_for_each_entry(lo, &loop_devices, lo_list) | ||
1732 | add_disk(lo->lo_disk); | ||
1733 | |||
1734 | blk_register_region(MKDEV(LOOP_MAJOR, 0), range, | 1842 | blk_register_region(MKDEV(LOOP_MAJOR, 0), range, |
1735 | THIS_MODULE, loop_probe, NULL, NULL); | 1843 | THIS_MODULE, loop_probe, NULL, NULL); |
1736 | 1844 | ||
1845 | /* pre-create number of devices given by config or max_loop */ | ||
1846 | mutex_lock(&loop_index_mutex); | ||
1847 | for (i = 0; i < nr; i++) | ||
1848 | loop_add(&lo, i); | ||
1849 | mutex_unlock(&loop_index_mutex); | ||
1850 | |||
1737 | printk(KERN_INFO "loop: module loaded\n"); | 1851 | printk(KERN_INFO "loop: module loaded\n"); |
1738 | return 0; | 1852 | return 0; |
1853 | } | ||
1739 | 1854 | ||
1740 | Enomem: | 1855 | static int loop_exit_cb(int id, void *ptr, void *data) |
1741 | printk(KERN_INFO "loop: out of memory\n"); | 1856 | { |
1742 | 1857 | struct loop_device *lo = ptr; | |
1743 | list_for_each_entry_safe(lo, next, &loop_devices, lo_list) | ||
1744 | loop_free(lo); | ||
1745 | 1858 | ||
1746 | unregister_blkdev(LOOP_MAJOR, "loop"); | 1859 | loop_remove(lo); |
1747 | return -ENOMEM; | 1860 | return 0; |
1748 | } | 1861 | } |
1749 | 1862 | ||
1750 | static void __exit loop_exit(void) | 1863 | static void __exit loop_exit(void) |
1751 | { | 1864 | { |
1752 | unsigned long range; | 1865 | unsigned long range; |
1753 | struct loop_device *lo, *next; | ||
1754 | 1866 | ||
1755 | range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; | 1867 | range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; |
1756 | 1868 | ||
1757 | list_for_each_entry_safe(lo, next, &loop_devices, lo_list) | 1869 | idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); |
1758 | loop_del_one(lo); | 1870 | idr_remove_all(&loop_index_idr); |
1871 | idr_destroy(&loop_index_idr); | ||
1759 | 1872 | ||
1760 | blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); | 1873 | blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); |
1761 | unregister_blkdev(LOOP_MAJOR, "loop"); | 1874 | unregister_blkdev(LOOP_MAJOR, "loop"); |
1875 | |||
1876 | misc_deregister(&loop_misc); | ||
1762 | } | 1877 | } |
1763 | 1878 | ||
1764 | module_init(loop_init); | 1879 | module_init(loop_init); |
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 773bfa79277..ae3e167e17a 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c | |||
@@ -1184,6 +1184,7 @@ static struct of_device_id swim3_match[] = | |||
1184 | { | 1184 | { |
1185 | .compatible = "swim3" | 1185 | .compatible = "swim3" |
1186 | }, | 1186 | }, |
1187 | { /* end of list */ } | ||
1187 | }; | 1188 | }; |
1188 | 1189 | ||
1189 | static struct macio_driver swim3_driver = | 1190 | static struct macio_driver swim3_driver = |
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index b536a9cef91..9ea8c2576c7 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
@@ -123,8 +123,8 @@ static DEFINE_SPINLOCK(minor_lock); | |||
123 | #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) | 123 | #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) |
124 | #define EMULATED_HD_DISK_MINOR_OFFSET (0) | 124 | #define EMULATED_HD_DISK_MINOR_OFFSET (0) |
125 | #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256) | 125 | #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256) |
126 | #define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16)) | 126 | #define EMULATED_SD_DISK_MINOR_OFFSET (0) |
127 | #define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4) | 127 | #define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256) |
128 | 128 | ||
129 | #define DEV_NAME "xvd" /* name in /dev */ | 129 | #define DEV_NAME "xvd" /* name in /dev */ |
130 | 130 | ||
@@ -529,7 +529,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, | |||
529 | minor = BLKIF_MINOR_EXT(info->vdevice); | 529 | minor = BLKIF_MINOR_EXT(info->vdevice); |
530 | nr_parts = PARTS_PER_EXT_DISK; | 530 | nr_parts = PARTS_PER_EXT_DISK; |
531 | offset = minor / nr_parts; | 531 | offset = minor / nr_parts; |
532 | if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4) | 532 | if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4) |
533 | printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with " | 533 | printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with " |
534 | "emulated IDE disks,\n\t choose an xvd device name" | 534 | "emulated IDE disks,\n\t choose an xvd device name" |
535 | "from xvde on\n", info->vdevice); | 535 | "from xvde on\n", info->vdevice); |
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 75fb965b8f7..f997c27d79e 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c | |||
@@ -1929,11 +1929,17 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s, | |||
1929 | goto out; | 1929 | goto out; |
1930 | 1930 | ||
1931 | s->manufact.len = buf[0] << 8 | buf[1]; | 1931 | s->manufact.len = buf[0] << 8 | buf[1]; |
1932 | if (s->manufact.len < 0 || s->manufact.len > 2048) { | 1932 | if (s->manufact.len < 0) { |
1933 | cdinfo(CD_WARNING, "Received invalid manufacture info length" | 1933 | cdinfo(CD_WARNING, "Received invalid manufacture info length" |
1934 | " (%d)\n", s->manufact.len); | 1934 | " (%d)\n", s->manufact.len); |
1935 | ret = -EIO; | 1935 | ret = -EIO; |
1936 | } else { | 1936 | } else { |
1937 | if (s->manufact.len > 2048) { | ||
1938 | cdinfo(CD_WARNING, "Received invalid manufacture info " | ||
1939 | "length (%d): truncating to 2048\n", | ||
1940 | s->manufact.len); | ||
1941 | s->manufact.len = 2048; | ||
1942 | } | ||
1937 | memcpy(s->manufact.value, &buf[4], s->manufact.len); | 1943 | memcpy(s->manufact.value, &buf[4], s->manufact.len); |
1938 | } | 1944 | } |
1939 | 1945 | ||
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 04f1e7ce02b..f6cf448d69b 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c | |||
@@ -1670,7 +1670,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, | |||
1670 | char *type, *optype, *err, *msg; | 1670 | char *type, *optype, *err, *msg; |
1671 | unsigned long error = m->status & 0x1ff0000l; | 1671 | unsigned long error = m->status & 0x1ff0000l; |
1672 | u32 optypenum = (m->status >> 4) & 0x07; | 1672 | u32 optypenum = (m->status >> 4) & 0x07; |
1673 | u32 core_err_cnt = (m->status >> 38) && 0x7fff; | 1673 | u32 core_err_cnt = (m->status >> 38) & 0x7fff; |
1674 | u32 dimm = (m->misc >> 16) & 0x3; | 1674 | u32 dimm = (m->misc >> 16) & 0x3; |
1675 | u32 channel = (m->misc >> 18) & 0x3; | 1675 | u32 channel = (m->misc >> 18) & 0x3; |
1676 | u32 syndrome = m->misc >> 32; | 1676 | u32 syndrome = m->misc >> 32; |
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index e6ad3bb6c1a..4799393247c 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c | |||
@@ -216,15 +216,33 @@ struct inbound_phy_packet_event { | |||
216 | struct fw_cdev_event_phy_packet phy_packet; | 216 | struct fw_cdev_event_phy_packet phy_packet; |
217 | }; | 217 | }; |
218 | 218 | ||
219 | static inline void __user *u64_to_uptr(__u64 value) | 219 | #ifdef CONFIG_COMPAT |
220 | static void __user *u64_to_uptr(u64 value) | ||
221 | { | ||
222 | if (is_compat_task()) | ||
223 | return compat_ptr(value); | ||
224 | else | ||
225 | return (void __user *)(unsigned long)value; | ||
226 | } | ||
227 | |||
228 | static u64 uptr_to_u64(void __user *ptr) | ||
229 | { | ||
230 | if (is_compat_task()) | ||
231 | return ptr_to_compat(ptr); | ||
232 | else | ||
233 | return (u64)(unsigned long)ptr; | ||
234 | } | ||
235 | #else | ||
236 | static inline void __user *u64_to_uptr(u64 value) | ||
220 | { | 237 | { |
221 | return (void __user *)(unsigned long)value; | 238 | return (void __user *)(unsigned long)value; |
222 | } | 239 | } |
223 | 240 | ||
224 | static inline __u64 uptr_to_u64(void __user *ptr) | 241 | static inline u64 uptr_to_u64(void __user *ptr) |
225 | { | 242 | { |
226 | return (__u64)(unsigned long)ptr; | 243 | return (u64)(unsigned long)ptr; |
227 | } | 244 | } |
245 | #endif /* CONFIG_COMPAT */ | ||
228 | 246 | ||
229 | static int fw_device_op_open(struct inode *inode, struct file *file) | 247 | static int fw_device_op_open(struct inode *inode, struct file *file) |
230 | { | 248 | { |
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 8ba7f7928f1..f3b890da1e8 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c | |||
@@ -455,15 +455,20 @@ static struct device_attribute fw_device_attributes[] = { | |||
455 | static int read_rom(struct fw_device *device, | 455 | static int read_rom(struct fw_device *device, |
456 | int generation, int index, u32 *data) | 456 | int generation, int index, u32 *data) |
457 | { | 457 | { |
458 | int rcode; | 458 | u64 offset = (CSR_REGISTER_BASE | CSR_CONFIG_ROM) + index * 4; |
459 | int i, rcode; | ||
459 | 460 | ||
460 | /* device->node_id, accessed below, must not be older than generation */ | 461 | /* device->node_id, accessed below, must not be older than generation */ |
461 | smp_rmb(); | 462 | smp_rmb(); |
462 | 463 | ||
463 | rcode = fw_run_transaction(device->card, TCODE_READ_QUADLET_REQUEST, | 464 | for (i = 10; i < 100; i += 10) { |
464 | device->node_id, generation, device->max_speed, | 465 | rcode = fw_run_transaction(device->card, |
465 | (CSR_REGISTER_BASE | CSR_CONFIG_ROM) + index * 4, | 466 | TCODE_READ_QUADLET_REQUEST, device->node_id, |
466 | data, 4); | 467 | generation, device->max_speed, offset, data, 4); |
468 | if (rcode != RCODE_BUSY) | ||
469 | break; | ||
470 | msleep(i); | ||
471 | } | ||
467 | be32_to_cpus(data); | 472 | be32_to_cpus(data); |
468 | 473 | ||
469 | return rcode; | 474 | return rcode; |
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index bcf792fac44..57cd3a406ed 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c | |||
@@ -2179,8 +2179,13 @@ static int ohci_enable(struct fw_card *card, | |||
2179 | ohci_driver_name, ohci)) { | 2179 | ohci_driver_name, ohci)) { |
2180 | fw_error("Failed to allocate interrupt %d.\n", dev->irq); | 2180 | fw_error("Failed to allocate interrupt %d.\n", dev->irq); |
2181 | pci_disable_msi(dev); | 2181 | pci_disable_msi(dev); |
2182 | dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE, | 2182 | |
2183 | ohci->config_rom, ohci->config_rom_bus); | 2183 | if (config_rom) { |
2184 | dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE, | ||
2185 | ohci->next_config_rom, | ||
2186 | ohci->next_config_rom_bus); | ||
2187 | ohci->next_config_rom = NULL; | ||
2188 | } | ||
2184 | return -EIO; | 2189 | return -EIO; |
2185 | } | 2190 | } |
2186 | 2191 | ||
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a8ab6263e0d..3c395a59da3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c | |||
@@ -499,7 +499,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) | |||
499 | seq_printf(m, "Interrupts received: %d\n", | 499 | seq_printf(m, "Interrupts received: %d\n", |
500 | atomic_read(&dev_priv->irq_received)); | 500 | atomic_read(&dev_priv->irq_received)); |
501 | for (i = 0; i < I915_NUM_RINGS; i++) { | 501 | for (i = 0; i < I915_NUM_RINGS; i++) { |
502 | if (IS_GEN6(dev)) { | 502 | if (IS_GEN6(dev) || IS_GEN7(dev)) { |
503 | seq_printf(m, "Graphics Interrupt mask (%s): %08x\n", | 503 | seq_printf(m, "Graphics Interrupt mask (%s): %08x\n", |
504 | dev_priv->ring[i].name, | 504 | dev_priv->ring[i].name, |
505 | I915_READ_IMR(&dev_priv->ring[i])); | 505 | I915_READ_IMR(&dev_priv->ring[i])); |
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index feb4f164fd1..7916bd97d5c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/io-mapping.h> | 36 | #include <linux/io-mapping.h> |
37 | #include <linux/i2c.h> | 37 | #include <linux/i2c.h> |
38 | #include <drm/intel-gtt.h> | 38 | #include <drm/intel-gtt.h> |
39 | #include <linux/backlight.h> | ||
39 | 40 | ||
40 | /* General customization: | 41 | /* General customization: |
41 | */ | 42 | */ |
@@ -690,6 +691,7 @@ typedef struct drm_i915_private { | |||
690 | int child_dev_num; | 691 | int child_dev_num; |
691 | struct child_device_config *child_dev; | 692 | struct child_device_config *child_dev; |
692 | struct drm_connector *int_lvds_connector; | 693 | struct drm_connector *int_lvds_connector; |
694 | struct drm_connector *int_edp_connector; | ||
693 | 695 | ||
694 | bool mchbar_need_disable; | 696 | bool mchbar_need_disable; |
695 | 697 | ||
@@ -723,6 +725,8 @@ typedef struct drm_i915_private { | |||
723 | /* list of fbdev register on this device */ | 725 | /* list of fbdev register on this device */ |
724 | struct intel_fbdev *fbdev; | 726 | struct intel_fbdev *fbdev; |
725 | 727 | ||
728 | struct backlight_device *backlight; | ||
729 | |||
726 | struct drm_property *broadcast_rgb_property; | 730 | struct drm_property *broadcast_rgb_property; |
727 | struct drm_property *force_audio_property; | 731 | struct drm_property *force_audio_property; |
728 | 732 | ||
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 02f96fd0d52..9cbb0cd8f46 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c | |||
@@ -2058,8 +2058,10 @@ void intel_irq_init(struct drm_device *dev) | |||
2058 | dev->driver->get_vblank_counter = gm45_get_vblank_counter; | 2058 | dev->driver->get_vblank_counter = gm45_get_vblank_counter; |
2059 | } | 2059 | } |
2060 | 2060 | ||
2061 | 2061 | if (drm_core_check_feature(dev, DRIVER_MODESET)) | |
2062 | dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; | 2062 | dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; |
2063 | else | ||
2064 | dev->driver->get_vblank_timestamp = NULL; | ||
2063 | dev->driver->get_scanout_position = i915_get_crtc_scanoutpos; | 2065 | dev->driver->get_scanout_position = i915_get_crtc_scanoutpos; |
2064 | 2066 | ||
2065 | if (IS_IVYBRIDGE(dev)) { | 2067 | if (IS_IVYBRIDGE(dev)) { |
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d1331f771e2..542453f7498 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h | |||
@@ -375,6 +375,7 @@ | |||
375 | # define MI_FLUSH_ENABLE (1 << 11) | 375 | # define MI_FLUSH_ENABLE (1 << 11) |
376 | 376 | ||
377 | #define GFX_MODE 0x02520 | 377 | #define GFX_MODE 0x02520 |
378 | #define GFX_MODE_GEN7 0x0229c | ||
378 | #define GFX_RUN_LIST_ENABLE (1<<15) | 379 | #define GFX_RUN_LIST_ENABLE (1<<15) |
379 | #define GFX_TLB_INVALIDATE_ALWAYS (1<<13) | 380 | #define GFX_TLB_INVALIDATE_ALWAYS (1<<13) |
380 | #define GFX_SURFACE_FAULT_ENABLE (1<<12) | 381 | #define GFX_SURFACE_FAULT_ENABLE (1<<12) |
@@ -382,6 +383,9 @@ | |||
382 | #define GFX_PSMI_GRANULARITY (1<<10) | 383 | #define GFX_PSMI_GRANULARITY (1<<10) |
383 | #define GFX_PPGTT_ENABLE (1<<9) | 384 | #define GFX_PPGTT_ENABLE (1<<9) |
384 | 385 | ||
386 | #define GFX_MODE_ENABLE(bit) (((bit) << 16) | (bit)) | ||
387 | #define GFX_MODE_DISABLE(bit) (((bit) << 16) | (0)) | ||
388 | |||
385 | #define SCPD0 0x0209c /* 915+ only */ | 389 | #define SCPD0 0x0209c /* 915+ only */ |
386 | #define IER 0x020a0 | 390 | #define IER 0x020a0 |
387 | #define IIR 0x020a4 | 391 | #define IIR 0x020a4 |
@@ -1318,6 +1322,7 @@ | |||
1318 | #define ADPA_PIPE_SELECT_MASK (1<<30) | 1322 | #define ADPA_PIPE_SELECT_MASK (1<<30) |
1319 | #define ADPA_PIPE_A_SELECT 0 | 1323 | #define ADPA_PIPE_A_SELECT 0 |
1320 | #define ADPA_PIPE_B_SELECT (1<<30) | 1324 | #define ADPA_PIPE_B_SELECT (1<<30) |
1325 | #define ADPA_PIPE_SELECT(pipe) ((pipe) << 30) | ||
1321 | #define ADPA_USE_VGA_HVPOLARITY (1<<15) | 1326 | #define ADPA_USE_VGA_HVPOLARITY (1<<15) |
1322 | #define ADPA_SETS_HVPOLARITY 0 | 1327 | #define ADPA_SETS_HVPOLARITY 0 |
1323 | #define ADPA_VSYNC_CNTL_DISABLE (1<<11) | 1328 | #define ADPA_VSYNC_CNTL_DISABLE (1<<11) |
@@ -1460,6 +1465,7 @@ | |||
1460 | /* Selects pipe B for LVDS data. Must be set on pre-965. */ | 1465 | /* Selects pipe B for LVDS data. Must be set on pre-965. */ |
1461 | #define LVDS_PIPEB_SELECT (1 << 30) | 1466 | #define LVDS_PIPEB_SELECT (1 << 30) |
1462 | #define LVDS_PIPE_MASK (1 << 30) | 1467 | #define LVDS_PIPE_MASK (1 << 30) |
1468 | #define LVDS_PIPE(pipe) ((pipe) << 30) | ||
1463 | /* LVDS dithering flag on 965/g4x platform */ | 1469 | /* LVDS dithering flag on 965/g4x platform */ |
1464 | #define LVDS_ENABLE_DITHER (1 << 25) | 1470 | #define LVDS_ENABLE_DITHER (1 << 25) |
1465 | /* LVDS sync polarity flags. Set to invert (i.e. negative) */ | 1471 | /* LVDS sync polarity flags. Set to invert (i.e. negative) */ |
@@ -1499,9 +1505,6 @@ | |||
1499 | #define LVDS_B0B3_POWER_DOWN (0 << 2) | 1505 | #define LVDS_B0B3_POWER_DOWN (0 << 2) |
1500 | #define LVDS_B0B3_POWER_UP (3 << 2) | 1506 | #define LVDS_B0B3_POWER_UP (3 << 2) |
1501 | 1507 | ||
1502 | #define LVDS_PIPE_ENABLED(V, P) \ | ||
1503 | (((V) & (LVDS_PIPE_MASK | LVDS_PORT_EN)) == ((P) << 30 | LVDS_PORT_EN)) | ||
1504 | |||
1505 | /* Video Data Island Packet control */ | 1508 | /* Video Data Island Packet control */ |
1506 | #define VIDEO_DIP_DATA 0x61178 | 1509 | #define VIDEO_DIP_DATA 0x61178 |
1507 | #define VIDEO_DIP_CTL 0x61170 | 1510 | #define VIDEO_DIP_CTL 0x61170 |
@@ -3256,14 +3259,12 @@ | |||
3256 | #define ADPA_CRT_HOTPLUG_VOLREF_475MV (1<<17) | 3259 | #define ADPA_CRT_HOTPLUG_VOLREF_475MV (1<<17) |
3257 | #define ADPA_CRT_HOTPLUG_FORCE_TRIGGER (1<<16) | 3260 | #define ADPA_CRT_HOTPLUG_FORCE_TRIGGER (1<<16) |
3258 | 3261 | ||
3259 | #define ADPA_PIPE_ENABLED(V, P) \ | ||
3260 | (((V) & (ADPA_TRANS_SELECT_MASK | ADPA_DAC_ENABLE)) == ((P) << 30 | ADPA_DAC_ENABLE)) | ||
3261 | |||
3262 | /* or SDVOB */ | 3262 | /* or SDVOB */ |
3263 | #define HDMIB 0xe1140 | 3263 | #define HDMIB 0xe1140 |
3264 | #define PORT_ENABLE (1 << 31) | 3264 | #define PORT_ENABLE (1 << 31) |
3265 | #define TRANSCODER_A (0) | 3265 | #define TRANSCODER_A (0) |
3266 | #define TRANSCODER_B (1 << 30) | 3266 | #define TRANSCODER_B (1 << 30) |
3267 | #define TRANSCODER(pipe) ((pipe) << 30) | ||
3267 | #define TRANSCODER_MASK (1 << 30) | 3268 | #define TRANSCODER_MASK (1 << 30) |
3268 | #define COLOR_FORMAT_8bpc (0) | 3269 | #define COLOR_FORMAT_8bpc (0) |
3269 | #define COLOR_FORMAT_12bpc (3 << 26) | 3270 | #define COLOR_FORMAT_12bpc (3 << 26) |
@@ -3280,9 +3281,6 @@ | |||
3280 | #define HSYNC_ACTIVE_HIGH (1 << 3) | 3281 | #define HSYNC_ACTIVE_HIGH (1 << 3) |
3281 | #define PORT_DETECTED (1 << 2) | 3282 | #define PORT_DETECTED (1 << 2) |
3282 | 3283 | ||
3283 | #define HDMI_PIPE_ENABLED(V, P) \ | ||
3284 | (((V) & (TRANSCODER_MASK | PORT_ENABLE)) == ((P) << 30 | PORT_ENABLE)) | ||
3285 | |||
3286 | /* PCH SDVOB multiplex with HDMIB */ | 3284 | /* PCH SDVOB multiplex with HDMIB */ |
3287 | #define PCH_SDVOB HDMIB | 3285 | #define PCH_SDVOB HDMIB |
3288 | 3286 | ||
@@ -3349,6 +3347,7 @@ | |||
3349 | #define PORT_TRANS_B_SEL_CPT (1<<29) | 3347 | #define PORT_TRANS_B_SEL_CPT (1<<29) |
3350 | #define PORT_TRANS_C_SEL_CPT (2<<29) | 3348 | #define PORT_TRANS_C_SEL_CPT (2<<29) |
3351 | #define PORT_TRANS_SEL_MASK (3<<29) | 3349 | #define PORT_TRANS_SEL_MASK (3<<29) |
3350 | #define PORT_TRANS_SEL_CPT(pipe) ((pipe) << 29) | ||
3352 | 3351 | ||
3353 | #define TRANS_DP_CTL_A 0xe0300 | 3352 | #define TRANS_DP_CTL_A 0xe0300 |
3354 | #define TRANS_DP_CTL_B 0xe1300 | 3353 | #define TRANS_DP_CTL_B 0xe1300 |
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 87677d60d0d..f10742359ec 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c | |||
@@ -871,7 +871,8 @@ int i915_restore_state(struct drm_device *dev) | |||
871 | } | 871 | } |
872 | mutex_unlock(&dev->struct_mutex); | 872 | mutex_unlock(&dev->struct_mutex); |
873 | 873 | ||
874 | intel_init_clock_gating(dev); | 874 | if (drm_core_check_feature(dev, DRIVER_MODESET)) |
875 | intel_init_clock_gating(dev); | ||
875 | 876 | ||
876 | if (IS_IRONLAKE_M(dev)) { | 877 | if (IS_IRONLAKE_M(dev)) { |
877 | ironlake_enable_drps(dev); | 878 | ironlake_enable_drps(dev); |
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 35364e68a09..ee1d701317f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c | |||
@@ -980,8 +980,8 @@ static void assert_transcoder_disabled(struct drm_i915_private *dev_priv, | |||
980 | pipe_name(pipe)); | 980 | pipe_name(pipe)); |
981 | } | 981 | } |
982 | 982 | ||
983 | static bool dp_pipe_enabled(struct drm_i915_private *dev_priv, enum pipe pipe, | 983 | static bool dp_pipe_enabled(struct drm_i915_private *dev_priv, |
984 | int reg, u32 port_sel, u32 val) | 984 | enum pipe pipe, u32 port_sel, u32 val) |
985 | { | 985 | { |
986 | if ((val & DP_PORT_EN) == 0) | 986 | if ((val & DP_PORT_EN) == 0) |
987 | return false; | 987 | return false; |
@@ -998,11 +998,58 @@ static bool dp_pipe_enabled(struct drm_i915_private *dev_priv, enum pipe pipe, | |||
998 | return true; | 998 | return true; |
999 | } | 999 | } |
1000 | 1000 | ||
1001 | static bool hdmi_pipe_enabled(struct drm_i915_private *dev_priv, | ||
1002 | enum pipe pipe, u32 val) | ||
1003 | { | ||
1004 | if ((val & PORT_ENABLE) == 0) | ||
1005 | return false; | ||
1006 | |||
1007 | if (HAS_PCH_CPT(dev_priv->dev)) { | ||
1008 | if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe)) | ||
1009 | return false; | ||
1010 | } else { | ||
1011 | if ((val & TRANSCODER_MASK) != TRANSCODER(pipe)) | ||
1012 | return false; | ||
1013 | } | ||
1014 | return true; | ||
1015 | } | ||
1016 | |||
1017 | static bool lvds_pipe_enabled(struct drm_i915_private *dev_priv, | ||
1018 | enum pipe pipe, u32 val) | ||
1019 | { | ||
1020 | if ((val & LVDS_PORT_EN) == 0) | ||
1021 | return false; | ||
1022 | |||
1023 | if (HAS_PCH_CPT(dev_priv->dev)) { | ||
1024 | if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe)) | ||
1025 | return false; | ||
1026 | } else { | ||
1027 | if ((val & LVDS_PIPE_MASK) != LVDS_PIPE(pipe)) | ||
1028 | return false; | ||
1029 | } | ||
1030 | return true; | ||
1031 | } | ||
1032 | |||
1033 | static bool adpa_pipe_enabled(struct drm_i915_private *dev_priv, | ||
1034 | enum pipe pipe, u32 val) | ||
1035 | { | ||
1036 | if ((val & ADPA_DAC_ENABLE) == 0) | ||
1037 | return false; | ||
1038 | if (HAS_PCH_CPT(dev_priv->dev)) { | ||
1039 | if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe)) | ||
1040 | return false; | ||
1041 | } else { | ||
1042 | if ((val & ADPA_PIPE_SELECT_MASK) != ADPA_PIPE_SELECT(pipe)) | ||
1043 | return false; | ||
1044 | } | ||
1045 | return true; | ||
1046 | } | ||
1047 | |||
1001 | static void assert_pch_dp_disabled(struct drm_i915_private *dev_priv, | 1048 | static void assert_pch_dp_disabled(struct drm_i915_private *dev_priv, |
1002 | enum pipe pipe, int reg, u32 port_sel) | 1049 | enum pipe pipe, int reg, u32 port_sel) |
1003 | { | 1050 | { |
1004 | u32 val = I915_READ(reg); | 1051 | u32 val = I915_READ(reg); |
1005 | WARN(dp_pipe_enabled(dev_priv, pipe, reg, port_sel, val), | 1052 | WARN(dp_pipe_enabled(dev_priv, pipe, port_sel, val), |
1006 | "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n", | 1053 | "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n", |
1007 | reg, pipe_name(pipe)); | 1054 | reg, pipe_name(pipe)); |
1008 | } | 1055 | } |
@@ -1011,7 +1058,7 @@ static void assert_pch_hdmi_disabled(struct drm_i915_private *dev_priv, | |||
1011 | enum pipe pipe, int reg) | 1058 | enum pipe pipe, int reg) |
1012 | { | 1059 | { |
1013 | u32 val = I915_READ(reg); | 1060 | u32 val = I915_READ(reg); |
1014 | WARN(HDMI_PIPE_ENABLED(val, pipe), | 1061 | WARN(hdmi_pipe_enabled(dev_priv, val, pipe), |
1015 | "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n", | 1062 | "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n", |
1016 | reg, pipe_name(pipe)); | 1063 | reg, pipe_name(pipe)); |
1017 | } | 1064 | } |
@@ -1028,13 +1075,13 @@ static void assert_pch_ports_disabled(struct drm_i915_private *dev_priv, | |||
1028 | 1075 | ||
1029 | reg = PCH_ADPA; | 1076 | reg = PCH_ADPA; |
1030 | val = I915_READ(reg); | 1077 | val = I915_READ(reg); |
1031 | WARN(ADPA_PIPE_ENABLED(val, pipe), | 1078 | WARN(adpa_pipe_enabled(dev_priv, val, pipe), |
1032 | "PCH VGA enabled on transcoder %c, should be disabled\n", | 1079 | "PCH VGA enabled on transcoder %c, should be disabled\n", |
1033 | pipe_name(pipe)); | 1080 | pipe_name(pipe)); |
1034 | 1081 | ||
1035 | reg = PCH_LVDS; | 1082 | reg = PCH_LVDS; |
1036 | val = I915_READ(reg); | 1083 | val = I915_READ(reg); |
1037 | WARN(LVDS_PIPE_ENABLED(val, pipe), | 1084 | WARN(lvds_pipe_enabled(dev_priv, val, pipe), |
1038 | "PCH LVDS enabled on transcoder %c, should be disabled\n", | 1085 | "PCH LVDS enabled on transcoder %c, should be disabled\n", |
1039 | pipe_name(pipe)); | 1086 | pipe_name(pipe)); |
1040 | 1087 | ||
@@ -1360,7 +1407,7 @@ static void disable_pch_dp(struct drm_i915_private *dev_priv, | |||
1360 | enum pipe pipe, int reg, u32 port_sel) | 1407 | enum pipe pipe, int reg, u32 port_sel) |
1361 | { | 1408 | { |
1362 | u32 val = I915_READ(reg); | 1409 | u32 val = I915_READ(reg); |
1363 | if (dp_pipe_enabled(dev_priv, pipe, reg, port_sel, val)) { | 1410 | if (dp_pipe_enabled(dev_priv, pipe, port_sel, val)) { |
1364 | DRM_DEBUG_KMS("Disabling pch dp %x on pipe %d\n", reg, pipe); | 1411 | DRM_DEBUG_KMS("Disabling pch dp %x on pipe %d\n", reg, pipe); |
1365 | I915_WRITE(reg, val & ~DP_PORT_EN); | 1412 | I915_WRITE(reg, val & ~DP_PORT_EN); |
1366 | } | 1413 | } |
@@ -1370,7 +1417,7 @@ static void disable_pch_hdmi(struct drm_i915_private *dev_priv, | |||
1370 | enum pipe pipe, int reg) | 1417 | enum pipe pipe, int reg) |
1371 | { | 1418 | { |
1372 | u32 val = I915_READ(reg); | 1419 | u32 val = I915_READ(reg); |
1373 | if (HDMI_PIPE_ENABLED(val, pipe)) { | 1420 | if (hdmi_pipe_enabled(dev_priv, val, pipe)) { |
1374 | DRM_DEBUG_KMS("Disabling pch HDMI %x on pipe %d\n", | 1421 | DRM_DEBUG_KMS("Disabling pch HDMI %x on pipe %d\n", |
1375 | reg, pipe); | 1422 | reg, pipe); |
1376 | I915_WRITE(reg, val & ~PORT_ENABLE); | 1423 | I915_WRITE(reg, val & ~PORT_ENABLE); |
@@ -1392,12 +1439,13 @@ static void intel_disable_pch_ports(struct drm_i915_private *dev_priv, | |||
1392 | 1439 | ||
1393 | reg = PCH_ADPA; | 1440 | reg = PCH_ADPA; |
1394 | val = I915_READ(reg); | 1441 | val = I915_READ(reg); |
1395 | if (ADPA_PIPE_ENABLED(val, pipe)) | 1442 | if (adpa_pipe_enabled(dev_priv, val, pipe)) |
1396 | I915_WRITE(reg, val & ~ADPA_DAC_ENABLE); | 1443 | I915_WRITE(reg, val & ~ADPA_DAC_ENABLE); |
1397 | 1444 | ||
1398 | reg = PCH_LVDS; | 1445 | reg = PCH_LVDS; |
1399 | val = I915_READ(reg); | 1446 | val = I915_READ(reg); |
1400 | if (LVDS_PIPE_ENABLED(val, pipe)) { | 1447 | if (lvds_pipe_enabled(dev_priv, val, pipe)) { |
1448 | DRM_DEBUG_KMS("disable lvds on pipe %d val 0x%08x\n", pipe, val); | ||
1401 | I915_WRITE(reg, val & ~LVDS_PORT_EN); | 1449 | I915_WRITE(reg, val & ~LVDS_PORT_EN); |
1402 | POSTING_READ(reg); | 1450 | POSTING_READ(reg); |
1403 | udelay(100); | 1451 | udelay(100); |
@@ -5049,6 +5097,81 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc, | |||
5049 | return ret; | 5097 | return ret; |
5050 | } | 5098 | } |
5051 | 5099 | ||
5100 | static void ironlake_update_pch_refclk(struct drm_device *dev) | ||
5101 | { | ||
5102 | struct drm_i915_private *dev_priv = dev->dev_private; | ||
5103 | struct drm_mode_config *mode_config = &dev->mode_config; | ||
5104 | struct drm_crtc *crtc; | ||
5105 | struct intel_encoder *encoder; | ||
5106 | struct intel_encoder *has_edp_encoder = NULL; | ||
5107 | u32 temp; | ||
5108 | bool has_lvds = false; | ||
5109 | |||
5110 | /* We need to take the global config into account */ | ||
5111 | list_for_each_entry(crtc, &mode_config->crtc_list, head) { | ||
5112 | if (!crtc->enabled) | ||
5113 | continue; | ||
5114 | |||
5115 | list_for_each_entry(encoder, &mode_config->encoder_list, | ||
5116 | base.head) { | ||
5117 | if (encoder->base.crtc != crtc) | ||
5118 | continue; | ||
5119 | |||
5120 | switch (encoder->type) { | ||
5121 | case INTEL_OUTPUT_LVDS: | ||
5122 | has_lvds = true; | ||
5123 | case INTEL_OUTPUT_EDP: | ||
5124 | has_edp_encoder = encoder; | ||
5125 | break; | ||
5126 | } | ||
5127 | } | ||
5128 | } | ||
5129 | |||
5130 | /* Ironlake: try to setup display ref clock before DPLL | ||
5131 | * enabling. This is only under driver's control after | ||
5132 | * PCH B stepping, previous chipset stepping should be | ||
5133 | * ignoring this setting. | ||
5134 | */ | ||
5135 | temp = I915_READ(PCH_DREF_CONTROL); | ||
5136 | /* Always enable nonspread source */ | ||
5137 | temp &= ~DREF_NONSPREAD_SOURCE_MASK; | ||
5138 | temp |= DREF_NONSPREAD_SOURCE_ENABLE; | ||
5139 | temp &= ~DREF_SSC_SOURCE_MASK; | ||
5140 | temp |= DREF_SSC_SOURCE_ENABLE; | ||
5141 | I915_WRITE(PCH_DREF_CONTROL, temp); | ||
5142 | |||
5143 | POSTING_READ(PCH_DREF_CONTROL); | ||
5144 | udelay(200); | ||
5145 | |||
5146 | if (has_edp_encoder) { | ||
5147 | if (intel_panel_use_ssc(dev_priv)) { | ||
5148 | temp |= DREF_SSC1_ENABLE; | ||
5149 | I915_WRITE(PCH_DREF_CONTROL, temp); | ||
5150 | |||
5151 | POSTING_READ(PCH_DREF_CONTROL); | ||
5152 | udelay(200); | ||
5153 | } | ||
5154 | temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK; | ||
5155 | |||
5156 | /* Enable CPU source on CPU attached eDP */ | ||
5157 | if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) { | ||
5158 | if (intel_panel_use_ssc(dev_priv)) | ||
5159 | temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD; | ||
5160 | else | ||
5161 | temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD; | ||
5162 | } else { | ||
5163 | /* Enable SSC on PCH eDP if needed */ | ||
5164 | if (intel_panel_use_ssc(dev_priv)) { | ||
5165 | DRM_ERROR("enabling SSC on PCH\n"); | ||
5166 | temp |= DREF_SUPERSPREAD_SOURCE_ENABLE; | ||
5167 | } | ||
5168 | } | ||
5169 | I915_WRITE(PCH_DREF_CONTROL, temp); | ||
5170 | POSTING_READ(PCH_DREF_CONTROL); | ||
5171 | udelay(200); | ||
5172 | } | ||
5173 | } | ||
5174 | |||
5052 | static int ironlake_crtc_mode_set(struct drm_crtc *crtc, | 5175 | static int ironlake_crtc_mode_set(struct drm_crtc *crtc, |
5053 | struct drm_display_mode *mode, | 5176 | struct drm_display_mode *mode, |
5054 | struct drm_display_mode *adjusted_mode, | 5177 | struct drm_display_mode *adjusted_mode, |
@@ -5244,49 +5367,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, | |||
5244 | ironlake_compute_m_n(intel_crtc->bpp, lane, target_clock, link_bw, | 5367 | ironlake_compute_m_n(intel_crtc->bpp, lane, target_clock, link_bw, |
5245 | &m_n); | 5368 | &m_n); |
5246 | 5369 | ||
5247 | /* Ironlake: try to setup display ref clock before DPLL | 5370 | ironlake_update_pch_refclk(dev); |
5248 | * enabling. This is only under driver's control after | ||
5249 | * PCH B stepping, previous chipset stepping should be | ||
5250 | * ignoring this setting. | ||
5251 | */ | ||
5252 | temp = I915_READ(PCH_DREF_CONTROL); | ||
5253 | /* Always enable nonspread source */ | ||
5254 | temp &= ~DREF_NONSPREAD_SOURCE_MASK; | ||
5255 | temp |= DREF_NONSPREAD_SOURCE_ENABLE; | ||
5256 | temp &= ~DREF_SSC_SOURCE_MASK; | ||
5257 | temp |= DREF_SSC_SOURCE_ENABLE; | ||
5258 | I915_WRITE(PCH_DREF_CONTROL, temp); | ||
5259 | |||
5260 | POSTING_READ(PCH_DREF_CONTROL); | ||
5261 | udelay(200); | ||
5262 | |||
5263 | if (has_edp_encoder) { | ||
5264 | if (intel_panel_use_ssc(dev_priv)) { | ||
5265 | temp |= DREF_SSC1_ENABLE; | ||
5266 | I915_WRITE(PCH_DREF_CONTROL, temp); | ||
5267 | |||
5268 | POSTING_READ(PCH_DREF_CONTROL); | ||
5269 | udelay(200); | ||
5270 | } | ||
5271 | temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK; | ||
5272 | |||
5273 | /* Enable CPU source on CPU attached eDP */ | ||
5274 | if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) { | ||
5275 | if (intel_panel_use_ssc(dev_priv)) | ||
5276 | temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD; | ||
5277 | else | ||
5278 | temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD; | ||
5279 | } else { | ||
5280 | /* Enable SSC on PCH eDP if needed */ | ||
5281 | if (intel_panel_use_ssc(dev_priv)) { | ||
5282 | DRM_ERROR("enabling SSC on PCH\n"); | ||
5283 | temp |= DREF_SUPERSPREAD_SOURCE_ENABLE; | ||
5284 | } | ||
5285 | } | ||
5286 | I915_WRITE(PCH_DREF_CONTROL, temp); | ||
5287 | POSTING_READ(PCH_DREF_CONTROL); | ||
5288 | udelay(200); | ||
5289 | } | ||
5290 | 5371 | ||
5291 | fp = clock.n << 16 | clock.m1 << 8 | clock.m2; | 5372 | fp = clock.n << 16 | clock.m1 << 8 | clock.m2; |
5292 | if (has_reduced_clock) | 5373 | if (has_reduced_clock) |
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0feae908bb3..44fef5e1c49 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c | |||
@@ -1841,6 +1841,11 @@ done: | |||
1841 | static void | 1841 | static void |
1842 | intel_dp_destroy (struct drm_connector *connector) | 1842 | intel_dp_destroy (struct drm_connector *connector) |
1843 | { | 1843 | { |
1844 | struct drm_device *dev = connector->dev; | ||
1845 | |||
1846 | if (intel_dpd_is_edp(dev)) | ||
1847 | intel_panel_destroy_backlight(dev); | ||
1848 | |||
1844 | drm_sysfs_connector_remove(connector); | 1849 | drm_sysfs_connector_remove(connector); |
1845 | drm_connector_cleanup(connector); | 1850 | drm_connector_cleanup(connector); |
1846 | kfree(connector); | 1851 | kfree(connector); |
@@ -2072,6 +2077,8 @@ intel_dp_init(struct drm_device *dev, int output_reg) | |||
2072 | DRM_MODE_TYPE_PREFERRED; | 2077 | DRM_MODE_TYPE_PREFERRED; |
2073 | } | 2078 | } |
2074 | } | 2079 | } |
2080 | dev_priv->int_edp_connector = connector; | ||
2081 | intel_panel_setup_backlight(dev); | ||
2075 | } | 2082 | } |
2076 | 2083 | ||
2077 | intel_dp_add_properties(intel_dp, connector); | 2084 | intel_dp_add_properties(intel_dp, connector); |
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7b330e76a43..0b2ee9d3998 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h | |||
@@ -297,9 +297,10 @@ extern void intel_pch_panel_fitting(struct drm_device *dev, | |||
297 | extern u32 intel_panel_get_max_backlight(struct drm_device *dev); | 297 | extern u32 intel_panel_get_max_backlight(struct drm_device *dev); |
298 | extern u32 intel_panel_get_backlight(struct drm_device *dev); | 298 | extern u32 intel_panel_get_backlight(struct drm_device *dev); |
299 | extern void intel_panel_set_backlight(struct drm_device *dev, u32 level); | 299 | extern void intel_panel_set_backlight(struct drm_device *dev, u32 level); |
300 | extern void intel_panel_setup_backlight(struct drm_device *dev); | 300 | extern int intel_panel_setup_backlight(struct drm_device *dev); |
301 | extern void intel_panel_enable_backlight(struct drm_device *dev); | 301 | extern void intel_panel_enable_backlight(struct drm_device *dev); |
302 | extern void intel_panel_disable_backlight(struct drm_device *dev); | 302 | extern void intel_panel_disable_backlight(struct drm_device *dev); |
303 | extern void intel_panel_destroy_backlight(struct drm_device *dev); | ||
303 | extern enum drm_connector_status intel_panel_detect(struct drm_device *dev); | 304 | extern enum drm_connector_status intel_panel_detect(struct drm_device *dev); |
304 | 305 | ||
305 | extern void intel_crtc_load_lut(struct drm_crtc *crtc); | 306 | extern void intel_crtc_load_lut(struct drm_crtc *crtc); |
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 2e8ddfcba40..31da77f5c05 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c | |||
@@ -72,14 +72,16 @@ static void intel_lvds_enable(struct intel_lvds *intel_lvds) | |||
72 | { | 72 | { |
73 | struct drm_device *dev = intel_lvds->base.base.dev; | 73 | struct drm_device *dev = intel_lvds->base.base.dev; |
74 | struct drm_i915_private *dev_priv = dev->dev_private; | 74 | struct drm_i915_private *dev_priv = dev->dev_private; |
75 | u32 ctl_reg, lvds_reg; | 75 | u32 ctl_reg, lvds_reg, stat_reg; |
76 | 76 | ||
77 | if (HAS_PCH_SPLIT(dev)) { | 77 | if (HAS_PCH_SPLIT(dev)) { |
78 | ctl_reg = PCH_PP_CONTROL; | 78 | ctl_reg = PCH_PP_CONTROL; |
79 | lvds_reg = PCH_LVDS; | 79 | lvds_reg = PCH_LVDS; |
80 | stat_reg = PCH_PP_STATUS; | ||
80 | } else { | 81 | } else { |
81 | ctl_reg = PP_CONTROL; | 82 | ctl_reg = PP_CONTROL; |
82 | lvds_reg = LVDS; | 83 | lvds_reg = LVDS; |
84 | stat_reg = PP_STATUS; | ||
83 | } | 85 | } |
84 | 86 | ||
85 | I915_WRITE(lvds_reg, I915_READ(lvds_reg) | LVDS_PORT_EN); | 87 | I915_WRITE(lvds_reg, I915_READ(lvds_reg) | LVDS_PORT_EN); |
@@ -94,17 +96,16 @@ static void intel_lvds_enable(struct intel_lvds *intel_lvds) | |||
94 | DRM_DEBUG_KMS("applying panel-fitter: %x, %x\n", | 96 | DRM_DEBUG_KMS("applying panel-fitter: %x, %x\n", |
95 | intel_lvds->pfit_control, | 97 | intel_lvds->pfit_control, |
96 | intel_lvds->pfit_pgm_ratios); | 98 | intel_lvds->pfit_pgm_ratios); |
97 | if (wait_for((I915_READ(PP_STATUS) & PP_ON) == 0, 1000)) { | 99 | |
98 | DRM_ERROR("timed out waiting for panel to power off\n"); | 100 | I915_WRITE(PFIT_PGM_RATIOS, intel_lvds->pfit_pgm_ratios); |
99 | } else { | 101 | I915_WRITE(PFIT_CONTROL, intel_lvds->pfit_control); |
100 | I915_WRITE(PFIT_PGM_RATIOS, intel_lvds->pfit_pgm_ratios); | 102 | intel_lvds->pfit_dirty = false; |
101 | I915_WRITE(PFIT_CONTROL, intel_lvds->pfit_control); | ||
102 | intel_lvds->pfit_dirty = false; | ||
103 | } | ||
104 | } | 103 | } |
105 | 104 | ||
106 | I915_WRITE(ctl_reg, I915_READ(ctl_reg) | POWER_TARGET_ON); | 105 | I915_WRITE(ctl_reg, I915_READ(ctl_reg) | POWER_TARGET_ON); |
107 | POSTING_READ(lvds_reg); | 106 | POSTING_READ(lvds_reg); |
107 | if (wait_for((I915_READ(stat_reg) & PP_ON) != 0, 1000)) | ||
108 | DRM_ERROR("timed out waiting for panel to power on\n"); | ||
108 | 109 | ||
109 | intel_panel_enable_backlight(dev); | 110 | intel_panel_enable_backlight(dev); |
110 | } | 111 | } |
@@ -113,24 +114,25 @@ static void intel_lvds_disable(struct intel_lvds *intel_lvds) | |||
113 | { | 114 | { |
114 | struct drm_device *dev = intel_lvds->base.base.dev; | 115 | struct drm_device *dev = intel_lvds->base.base.dev; |
115 | struct drm_i915_private *dev_priv = dev->dev_private; | 116 | struct drm_i915_private *dev_priv = dev->dev_private; |
116 | u32 ctl_reg, lvds_reg; | 117 | u32 ctl_reg, lvds_reg, stat_reg; |
117 | 118 | ||
118 | if (HAS_PCH_SPLIT(dev)) { | 119 | if (HAS_PCH_SPLIT(dev)) { |
119 | ctl_reg = PCH_PP_CONTROL; | 120 | ctl_reg = PCH_PP_CONTROL; |
120 | lvds_reg = PCH_LVDS; | 121 | lvds_reg = PCH_LVDS; |
122 | stat_reg = PCH_PP_STATUS; | ||
121 | } else { | 123 | } else { |
122 | ctl_reg = PP_CONTROL; | 124 | ctl_reg = PP_CONTROL; |
123 | lvds_reg = LVDS; | 125 | lvds_reg = LVDS; |
126 | stat_reg = PP_STATUS; | ||
124 | } | 127 | } |
125 | 128 | ||
126 | intel_panel_disable_backlight(dev); | 129 | intel_panel_disable_backlight(dev); |
127 | 130 | ||
128 | I915_WRITE(ctl_reg, I915_READ(ctl_reg) & ~POWER_TARGET_ON); | 131 | I915_WRITE(ctl_reg, I915_READ(ctl_reg) & ~POWER_TARGET_ON); |
132 | if (wait_for((I915_READ(stat_reg) & PP_ON) == 0, 1000)) | ||
133 | DRM_ERROR("timed out waiting for panel to power off\n"); | ||
129 | 134 | ||
130 | if (intel_lvds->pfit_control) { | 135 | if (intel_lvds->pfit_control) { |
131 | if (wait_for((I915_READ(PP_STATUS) & PP_ON) == 0, 1000)) | ||
132 | DRM_ERROR("timed out waiting for panel to power off\n"); | ||
133 | |||
134 | I915_WRITE(PFIT_CONTROL, 0); | 136 | I915_WRITE(PFIT_CONTROL, 0); |
135 | intel_lvds->pfit_dirty = true; | 137 | intel_lvds->pfit_dirty = true; |
136 | } | 138 | } |
@@ -398,53 +400,21 @@ out: | |||
398 | 400 | ||
399 | static void intel_lvds_prepare(struct drm_encoder *encoder) | 401 | static void intel_lvds_prepare(struct drm_encoder *encoder) |
400 | { | 402 | { |
401 | struct drm_device *dev = encoder->dev; | ||
402 | struct drm_i915_private *dev_priv = dev->dev_private; | ||
403 | struct intel_lvds *intel_lvds = to_intel_lvds(encoder); | 403 | struct intel_lvds *intel_lvds = to_intel_lvds(encoder); |
404 | 404 | ||
405 | /* We try to do the minimum that is necessary in order to unlock | 405 | /* |
406 | * the registers for mode setting. | ||
407 | * | ||
408 | * On Ironlake, this is quite simple as we just set the unlock key | ||
409 | * and ignore all subtleties. (This may cause some issues...) | ||
410 | * | ||
411 | * Prior to Ironlake, we must disable the pipe if we want to adjust | 406 | * Prior to Ironlake, we must disable the pipe if we want to adjust |
412 | * the panel fitter. However at all other times we can just reset | 407 | * the panel fitter. However at all other times we can just reset |
413 | * the registers regardless. | 408 | * the registers regardless. |
414 | */ | 409 | */ |
415 | 410 | if (!HAS_PCH_SPLIT(encoder->dev) && intel_lvds->pfit_dirty) | |
416 | if (HAS_PCH_SPLIT(dev)) { | 411 | intel_lvds_disable(intel_lvds); |
417 | I915_WRITE(PCH_PP_CONTROL, | ||
418 | I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS); | ||
419 | } else if (intel_lvds->pfit_dirty) { | ||
420 | I915_WRITE(PP_CONTROL, | ||
421 | (I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS) | ||
422 | & ~POWER_TARGET_ON); | ||
423 | } else { | ||
424 | I915_WRITE(PP_CONTROL, | ||
425 | I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS); | ||
426 | } | ||
427 | } | 412 | } |
428 | 413 | ||
429 | static void intel_lvds_commit(struct drm_encoder *encoder) | 414 | static void intel_lvds_commit(struct drm_encoder *encoder) |
430 | { | 415 | { |
431 | struct drm_device *dev = encoder->dev; | ||
432 | struct drm_i915_private *dev_priv = dev->dev_private; | ||
433 | struct intel_lvds *intel_lvds = to_intel_lvds(encoder); | 416 | struct intel_lvds *intel_lvds = to_intel_lvds(encoder); |
434 | 417 | ||
435 | /* Undo any unlocking done in prepare to prevent accidental | ||
436 | * adjustment of the registers. | ||
437 | */ | ||
438 | if (HAS_PCH_SPLIT(dev)) { | ||
439 | u32 val = I915_READ(PCH_PP_CONTROL); | ||
440 | if ((val & PANEL_UNLOCK_REGS) == PANEL_UNLOCK_REGS) | ||
441 | I915_WRITE(PCH_PP_CONTROL, val & 0x3); | ||
442 | } else { | ||
443 | u32 val = I915_READ(PP_CONTROL); | ||
444 | if ((val & PANEL_UNLOCK_REGS) == PANEL_UNLOCK_REGS) | ||
445 | I915_WRITE(PP_CONTROL, val & 0x3); | ||
446 | } | ||
447 | |||
448 | /* Always do a full power on as we do not know what state | 418 | /* Always do a full power on as we do not know what state |
449 | * we were left in. | 419 | * we were left in. |
450 | */ | 420 | */ |
@@ -582,6 +552,8 @@ static void intel_lvds_destroy(struct drm_connector *connector) | |||
582 | struct drm_device *dev = connector->dev; | 552 | struct drm_device *dev = connector->dev; |
583 | struct drm_i915_private *dev_priv = dev->dev_private; | 553 | struct drm_i915_private *dev_priv = dev->dev_private; |
584 | 554 | ||
555 | intel_panel_destroy_backlight(dev); | ||
556 | |||
585 | if (dev_priv->lid_notifier.notifier_call) | 557 | if (dev_priv->lid_notifier.notifier_call) |
586 | acpi_lid_notifier_unregister(&dev_priv->lid_notifier); | 558 | acpi_lid_notifier_unregister(&dev_priv->lid_notifier); |
587 | drm_sysfs_connector_remove(connector); | 559 | drm_sysfs_connector_remove(connector); |
@@ -1040,6 +1012,19 @@ out: | |||
1040 | pwm = I915_READ(BLC_PWM_PCH_CTL1); | 1012 | pwm = I915_READ(BLC_PWM_PCH_CTL1); |
1041 | pwm |= PWM_PCH_ENABLE; | 1013 | pwm |= PWM_PCH_ENABLE; |
1042 | I915_WRITE(BLC_PWM_PCH_CTL1, pwm); | 1014 | I915_WRITE(BLC_PWM_PCH_CTL1, pwm); |
1015 | /* | ||
1016 | * Unlock registers and just | ||
1017 | * leave them unlocked | ||
1018 | */ | ||
1019 | I915_WRITE(PCH_PP_CONTROL, | ||
1020 | I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS); | ||
1021 | } else { | ||
1022 | /* | ||
1023 | * Unlock registers and just | ||
1024 | * leave them unlocked | ||
1025 | */ | ||
1026 | I915_WRITE(PP_CONTROL, | ||
1027 | I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS); | ||
1043 | } | 1028 | } |
1044 | dev_priv->lid_notifier.notifier_call = intel_lid_notify; | 1029 | dev_priv->lid_notifier.notifier_call = intel_lid_notify; |
1045 | if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) { | 1030 | if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) { |
@@ -1049,6 +1034,9 @@ out: | |||
1049 | /* keep the LVDS connector */ | 1034 | /* keep the LVDS connector */ |
1050 | dev_priv->int_lvds_connector = connector; | 1035 | dev_priv->int_lvds_connector = connector; |
1051 | drm_sysfs_connector_add(connector); | 1036 | drm_sysfs_connector_add(connector); |
1037 | |||
1038 | intel_panel_setup_backlight(dev); | ||
1039 | |||
1052 | return true; | 1040 | return true; |
1053 | 1041 | ||
1054 | failed: | 1042 | failed: |
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index b7c5ddb564d..b8e8158bb16 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c | |||
@@ -227,7 +227,6 @@ void intel_opregion_asle_intr(struct drm_device *dev) | |||
227 | asle->aslc = asle_stat; | 227 | asle->aslc = asle_stat; |
228 | } | 228 | } |
229 | 229 | ||
230 | /* Only present on Ironlake+ */ | ||
231 | void intel_opregion_gse_intr(struct drm_device *dev) | 230 | void intel_opregion_gse_intr(struct drm_device *dev) |
232 | { | 231 | { |
233 | struct drm_i915_private *dev_priv = dev->dev_private; | 232 | struct drm_i915_private *dev_priv = dev->dev_private; |
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 05f500cd9c2..a9e0c7bcd31 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c | |||
@@ -277,7 +277,7 @@ void intel_panel_enable_backlight(struct drm_device *dev) | |||
277 | dev_priv->backlight_enabled = true; | 277 | dev_priv->backlight_enabled = true; |
278 | } | 278 | } |
279 | 279 | ||
280 | void intel_panel_setup_backlight(struct drm_device *dev) | 280 | static void intel_panel_init_backlight(struct drm_device *dev) |
281 | { | 281 | { |
282 | struct drm_i915_private *dev_priv = dev->dev_private; | 282 | struct drm_i915_private *dev_priv = dev->dev_private; |
283 | 283 | ||
@@ -309,3 +309,73 @@ intel_panel_detect(struct drm_device *dev) | |||
309 | 309 | ||
310 | return connector_status_unknown; | 310 | return connector_status_unknown; |
311 | } | 311 | } |
312 | |||
313 | #ifdef CONFIG_BACKLIGHT_CLASS_DEVICE | ||
314 | static int intel_panel_update_status(struct backlight_device *bd) | ||
315 | { | ||
316 | struct drm_device *dev = bl_get_data(bd); | ||
317 | intel_panel_set_backlight(dev, bd->props.brightness); | ||
318 | return 0; | ||
319 | } | ||
320 | |||
321 | static int intel_panel_get_brightness(struct backlight_device *bd) | ||
322 | { | ||
323 | struct drm_device *dev = bl_get_data(bd); | ||
324 | return intel_panel_get_backlight(dev); | ||
325 | } | ||
326 | |||
327 | static const struct backlight_ops intel_panel_bl_ops = { | ||
328 | .update_status = intel_panel_update_status, | ||
329 | .get_brightness = intel_panel_get_brightness, | ||
330 | }; | ||
331 | |||
332 | int intel_panel_setup_backlight(struct drm_device *dev) | ||
333 | { | ||
334 | struct drm_i915_private *dev_priv = dev->dev_private; | ||
335 | struct backlight_properties props; | ||
336 | struct drm_connector *connector; | ||
337 | |||
338 | intel_panel_init_backlight(dev); | ||
339 | |||
340 | if (dev_priv->int_lvds_connector) | ||
341 | connector = dev_priv->int_lvds_connector; | ||
342 | else if (dev_priv->int_edp_connector) | ||
343 | connector = dev_priv->int_edp_connector; | ||
344 | else | ||
345 | return -ENODEV; | ||
346 | |||
347 | props.type = BACKLIGHT_RAW; | ||
348 | props.max_brightness = intel_panel_get_max_backlight(dev); | ||
349 | dev_priv->backlight = | ||
350 | backlight_device_register("intel_backlight", | ||
351 | &connector->kdev, dev, | ||
352 | &intel_panel_bl_ops, &props); | ||
353 | |||
354 | if (IS_ERR(dev_priv->backlight)) { | ||
355 | DRM_ERROR("Failed to register backlight: %ld\n", | ||
356 | PTR_ERR(dev_priv->backlight)); | ||
357 | dev_priv->backlight = NULL; | ||
358 | return -ENODEV; | ||
359 | } | ||
360 | dev_priv->backlight->props.brightness = intel_panel_get_backlight(dev); | ||
361 | return 0; | ||
362 | } | ||
363 | |||
364 | void intel_panel_destroy_backlight(struct drm_device *dev) | ||
365 | { | ||
366 | struct drm_i915_private *dev_priv = dev->dev_private; | ||
367 | if (dev_priv->backlight) | ||
368 | backlight_device_unregister(dev_priv->backlight); | ||
369 | } | ||
370 | #else | ||
371 | int intel_panel_setup_backlight(struct drm_device *dev) | ||
372 | { | ||
373 | intel_panel_init_backlight(dev); | ||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | void intel_panel_destroy_backlight(struct drm_device *dev) | ||
378 | { | ||
379 | return; | ||
380 | } | ||
381 | #endif | ||
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 47b9b277703..c30626ea9f9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c | |||
@@ -290,6 +290,10 @@ static int init_render_ring(struct intel_ring_buffer *ring) | |||
290 | if (IS_GEN6(dev) || IS_GEN7(dev)) | 290 | if (IS_GEN6(dev) || IS_GEN7(dev)) |
291 | mode |= MI_FLUSH_ENABLE << 16 | MI_FLUSH_ENABLE; | 291 | mode |= MI_FLUSH_ENABLE << 16 | MI_FLUSH_ENABLE; |
292 | I915_WRITE(MI_MODE, mode); | 292 | I915_WRITE(MI_MODE, mode); |
293 | if (IS_GEN7(dev)) | ||
294 | I915_WRITE(GFX_MODE_GEN7, | ||
295 | GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | | ||
296 | GFX_MODE_ENABLE(GFX_REPLAY_MODE)); | ||
293 | } | 297 | } |
294 | 298 | ||
295 | if (INTEL_INFO(dev)->gen >= 6) { | 299 | if (INTEL_INFO(dev)->gen >= 6) { |
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 645b84b3d20..7ad43c6b1db 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c | |||
@@ -613,6 +613,18 @@ static bool radeon_dp_get_link_status(struct radeon_connector *radeon_connector, | |||
613 | return true; | 613 | return true; |
614 | } | 614 | } |
615 | 615 | ||
616 | bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector) | ||
617 | { | ||
618 | u8 link_status[DP_LINK_STATUS_SIZE]; | ||
619 | struct radeon_connector_atom_dig *dig = radeon_connector->con_priv; | ||
620 | |||
621 | if (!radeon_dp_get_link_status(radeon_connector, link_status)) | ||
622 | return false; | ||
623 | if (dp_channel_eq_ok(link_status, dig->dp_lane_count)) | ||
624 | return false; | ||
625 | return true; | ||
626 | } | ||
627 | |||
616 | struct radeon_dp_link_train_info { | 628 | struct radeon_dp_link_train_info { |
617 | struct radeon_device *rdev; | 629 | struct radeon_device *rdev; |
618 | struct drm_encoder *encoder; | 630 | struct drm_encoder *encoder; |
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 14dce9f2217..fb5fa089886 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c | |||
@@ -743,7 +743,7 @@ static void evergreen_program_watermarks(struct radeon_device *rdev, | |||
743 | !evergreen_average_bandwidth_vs_available_bandwidth(&wm) || | 743 | !evergreen_average_bandwidth_vs_available_bandwidth(&wm) || |
744 | !evergreen_check_latency_hiding(&wm) || | 744 | !evergreen_check_latency_hiding(&wm) || |
745 | (rdev->disp_priority == 2)) { | 745 | (rdev->disp_priority == 2)) { |
746 | DRM_INFO("force priority to high\n"); | 746 | DRM_DEBUG_KMS("force priority to high\n"); |
747 | priority_a_cnt |= PRIORITY_ALWAYS_ON; | 747 | priority_a_cnt |= PRIORITY_ALWAYS_ON; |
748 | priority_b_cnt |= PRIORITY_ALWAYS_ON; | 748 | priority_b_cnt |= PRIORITY_ALWAYS_ON; |
749 | } | 749 | } |
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 6d6b5f16bc0..7f65940f918 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c | |||
@@ -60,18 +60,20 @@ void radeon_connector_hotplug(struct drm_connector *connector) | |||
60 | 60 | ||
61 | radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); | 61 | radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); |
62 | 62 | ||
63 | /* powering up/down the eDP panel generates hpd events which | 63 | /* if the connector is already off, don't turn it back on */ |
64 | * can interfere with modesetting. | 64 | if (connector->dpms != DRM_MODE_DPMS_ON) |
65 | */ | ||
66 | if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) | ||
67 | return; | 65 | return; |
68 | 66 | ||
69 | /* pre-r600 did not always have the hpd pins mapped accurately to connectors */ | 67 | /* just deal with DP (not eDP) here. */ |
70 | if (rdev->family >= CHIP_R600) { | 68 | if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { |
71 | if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) | 69 | int saved_dpms = connector->dpms; |
70 | |||
71 | if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) && | ||
72 | radeon_dp_needs_link_train(radeon_connector)) | ||
72 | drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); | 73 | drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); |
73 | else | 74 | else |
74 | drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); | 75 | drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); |
76 | connector->dpms = saved_dpms; | ||
75 | } | 77 | } |
76 | } | 78 | } |
77 | 79 | ||
@@ -474,11 +476,19 @@ static void radeon_fixup_lvds_native_mode(struct drm_encoder *encoder, | |||
474 | { | 476 | { |
475 | struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); | 477 | struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); |
476 | struct drm_display_mode *native_mode = &radeon_encoder->native_mode; | 478 | struct drm_display_mode *native_mode = &radeon_encoder->native_mode; |
479 | struct drm_display_mode *t, *mode; | ||
480 | |||
481 | /* If the EDID preferred mode doesn't match the native mode, use it */ | ||
482 | list_for_each_entry_safe(mode, t, &connector->probed_modes, head) { | ||
483 | if (mode->type & DRM_MODE_TYPE_PREFERRED) { | ||
484 | if (mode->hdisplay != native_mode->hdisplay || | ||
485 | mode->vdisplay != native_mode->vdisplay) | ||
486 | memcpy(native_mode, mode, sizeof(*mode)); | ||
487 | } | ||
488 | } | ||
477 | 489 | ||
478 | /* Try to get native mode details from EDID if necessary */ | 490 | /* Try to get native mode details from EDID if necessary */ |
479 | if (!native_mode->clock) { | 491 | if (!native_mode->clock) { |
480 | struct drm_display_mode *t, *mode; | ||
481 | |||
482 | list_for_each_entry_safe(mode, t, &connector->probed_modes, head) { | 492 | list_for_each_entry_safe(mode, t, &connector->probed_modes, head) { |
483 | if (mode->hdisplay == native_mode->hdisplay && | 493 | if (mode->hdisplay == native_mode->hdisplay && |
484 | mode->vdisplay == native_mode->vdisplay) { | 494 | mode->vdisplay == native_mode->vdisplay) { |
@@ -489,6 +499,7 @@ static void radeon_fixup_lvds_native_mode(struct drm_encoder *encoder, | |||
489 | } | 499 | } |
490 | } | 500 | } |
491 | } | 501 | } |
502 | |||
492 | if (!native_mode->clock) { | 503 | if (!native_mode->clock) { |
493 | DRM_DEBUG_KMS("No LVDS native mode details, disabling RMX\n"); | 504 | DRM_DEBUG_KMS("No LVDS native mode details, disabling RMX\n"); |
494 | radeon_encoder->rmx_type = RMX_OFF; | 505 | radeon_encoder->rmx_type = RMX_OFF; |
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 440e6ecccc4..a3b011b4946 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <drm/radeon_drm.h> | 32 | #include <drm/radeon_drm.h> |
33 | #include <linux/vgaarb.h> | 33 | #include <linux/vgaarb.h> |
34 | #include <linux/vga_switcheroo.h> | 34 | #include <linux/vga_switcheroo.h> |
35 | #include <linux/efi.h> | ||
35 | #include "radeon_reg.h" | 36 | #include "radeon_reg.h" |
36 | #include "radeon.h" | 37 | #include "radeon.h" |
37 | #include "atom.h" | 38 | #include "atom.h" |
@@ -348,6 +349,9 @@ bool radeon_card_posted(struct radeon_device *rdev) | |||
348 | { | 349 | { |
349 | uint32_t reg; | 350 | uint32_t reg; |
350 | 351 | ||
352 | if (efi_enabled && rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) | ||
353 | return false; | ||
354 | |||
351 | /* first check CRTCs */ | 355 | /* first check CRTCs */ |
352 | if (ASIC_IS_DCE41(rdev)) { | 356 | if (ASIC_IS_DCE41(rdev)) { |
353 | reg = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET) | | 357 | reg = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET) | |
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index b293487e5aa..319d85d7e75 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c | |||
@@ -2323,6 +2323,9 @@ radeon_add_atom_encoder(struct drm_device *dev, | |||
2323 | default: | 2323 | default: |
2324 | encoder->possible_crtcs = 0x3; | 2324 | encoder->possible_crtcs = 0x3; |
2325 | break; | 2325 | break; |
2326 | case 4: | ||
2327 | encoder->possible_crtcs = 0xf; | ||
2328 | break; | ||
2326 | case 6: | 2329 | case 6: |
2327 | encoder->possible_crtcs = 0x3f; | 2330 | encoder->possible_crtcs = 0x3f; |
2328 | break; | 2331 | break; |
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index d09031c03e2..68820f5f630 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h | |||
@@ -479,6 +479,7 @@ extern void radeon_dp_set_link_config(struct drm_connector *connector, | |||
479 | struct drm_display_mode *mode); | 479 | struct drm_display_mode *mode); |
480 | extern void radeon_dp_link_train(struct drm_encoder *encoder, | 480 | extern void radeon_dp_link_train(struct drm_encoder *encoder, |
481 | struct drm_connector *connector); | 481 | struct drm_connector *connector); |
482 | extern bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector); | ||
482 | extern u8 radeon_dp_getsinktype(struct radeon_connector *radeon_connector); | 483 | extern u8 radeon_dp_getsinktype(struct radeon_connector *radeon_connector); |
483 | extern bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector); | 484 | extern bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector); |
484 | extern void atombios_dig_encoder_setup(struct drm_encoder *encoder, int action, int panel_mode); | 485 | extern void atombios_dig_encoder_setup(struct drm_encoder *encoder, int action, int panel_mode); |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 43f89ba0a90..fe89c4660d5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c | |||
@@ -717,11 +717,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
717 | { | 717 | { |
718 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 718 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
719 | struct ipoib_neigh *neigh; | 719 | struct ipoib_neigh *neigh; |
720 | struct neighbour *n; | 720 | struct neighbour *n = NULL; |
721 | unsigned long flags; | 721 | unsigned long flags; |
722 | 722 | ||
723 | n = dst_get_neighbour(skb_dst(skb)); | 723 | if (likely(skb_dst(skb))) |
724 | if (likely(skb_dst(skb) && n)) { | 724 | n = dst_get_neighbour(skb_dst(skb)); |
725 | |||
726 | if (likely(n)) { | ||
725 | if (unlikely(!*to_ipoib_neigh(n))) { | 727 | if (unlikely(!*to_ipoib_neigh(n))) { |
726 | ipoib_path_lookup(skb, dev); | 728 | ipoib_path_lookup(skb, dev); |
727 | return NETDEV_TX_OK; | 729 | return NETDEV_TX_OK; |
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 8db008de539..9c61b9c2c59 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c | |||
@@ -101,13 +101,17 @@ iscsi_iser_recv(struct iscsi_conn *conn, | |||
101 | 101 | ||
102 | /* verify PDU length */ | 102 | /* verify PDU length */ |
103 | datalen = ntoh24(hdr->dlength); | 103 | datalen = ntoh24(hdr->dlength); |
104 | if (datalen != rx_data_len) { | 104 | if (datalen > rx_data_len || (datalen + 4) < rx_data_len) { |
105 | printk(KERN_ERR "iscsi_iser: datalen %d (hdr) != %d (IB) \n", | 105 | iser_err("wrong datalen %d (hdr), %d (IB)\n", |
106 | datalen, rx_data_len); | 106 | datalen, rx_data_len); |
107 | rc = ISCSI_ERR_DATALEN; | 107 | rc = ISCSI_ERR_DATALEN; |
108 | goto error; | 108 | goto error; |
109 | } | 109 | } |
110 | 110 | ||
111 | if (datalen != rx_data_len) | ||
112 | iser_dbg("aligned datalen (%d) hdr, %d (IB)\n", | ||
113 | datalen, rx_data_len); | ||
114 | |||
111 | /* read AHS */ | 115 | /* read AHS */ |
112 | ahslen = hdr->hlength * 4; | 116 | ahslen = hdr->hlength * 4; |
113 | 117 | ||
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 342cbc1bdaa..db6f3ce9f3b 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h | |||
@@ -89,7 +89,7 @@ | |||
89 | } while (0) | 89 | } while (0) |
90 | 90 | ||
91 | #define SHIFT_4K 12 | 91 | #define SHIFT_4K 12 |
92 | #define SIZE_4K (1UL << SHIFT_4K) | 92 | #define SIZE_4K (1ULL << SHIFT_4K) |
93 | #define MASK_4K (~(SIZE_4K-1)) | 93 | #define MASK_4K (~(SIZE_4K-1)) |
94 | 94 | ||
95 | /* support up to 512KB in one RDMA */ | 95 | /* support up to 512KB in one RDMA */ |
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 5745b7fe158..f299de6b419 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c | |||
@@ -412,7 +412,7 @@ int iser_send_control(struct iscsi_conn *conn, | |||
412 | memcpy(iser_conn->ib_conn->login_buf, task->data, | 412 | memcpy(iser_conn->ib_conn->login_buf, task->data, |
413 | task->data_count); | 413 | task->data_count); |
414 | tx_dsg->addr = iser_conn->ib_conn->login_dma; | 414 | tx_dsg->addr = iser_conn->ib_conn->login_dma; |
415 | tx_dsg->length = data_seg_len; | 415 | tx_dsg->length = task->data_count; |
416 | tx_dsg->lkey = device->mr->lkey; | 416 | tx_dsg->lkey = device->mr->lkey; |
417 | mdesc->num_sge = 2; | 417 | mdesc->num_sge = 2; |
418 | } | 418 | } |
diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c index 749fdf07031..753b21aaea6 100644 --- a/drivers/pci/hotplug/pcihp_slot.c +++ b/drivers/pci/hotplug/pcihp_slot.c | |||
@@ -158,47 +158,6 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) | |||
158 | */ | 158 | */ |
159 | } | 159 | } |
160 | 160 | ||
161 | /* Program PCIE MaxPayload setting on device: ensure parent maxpayload <= device */ | ||
162 | static int pci_set_payload(struct pci_dev *dev) | ||
163 | { | ||
164 | int pos, ppos; | ||
165 | u16 pctl, psz; | ||
166 | u16 dctl, dsz, dcap, dmax; | ||
167 | struct pci_dev *parent; | ||
168 | |||
169 | parent = dev->bus->self; | ||
170 | pos = pci_find_capability(dev, PCI_CAP_ID_EXP); | ||
171 | if (!pos) | ||
172 | return 0; | ||
173 | |||
174 | /* Read Device MaxPayload capability and setting */ | ||
175 | pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &dctl); | ||
176 | pci_read_config_word(dev, pos + PCI_EXP_DEVCAP, &dcap); | ||
177 | dsz = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; | ||
178 | dmax = (dcap & PCI_EXP_DEVCAP_PAYLOAD); | ||
179 | |||
180 | /* Read Parent MaxPayload setting */ | ||
181 | ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); | ||
182 | if (!ppos) | ||
183 | return 0; | ||
184 | pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl); | ||
185 | psz = (pctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; | ||
186 | |||
187 | /* If parent payload > device max payload -> error | ||
188 | * If parent payload > device payload -> set speed | ||
189 | * If parent payload <= device payload -> do nothing | ||
190 | */ | ||
191 | if (psz > dmax) | ||
192 | return -1; | ||
193 | else if (psz > dsz) { | ||
194 | dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << psz); | ||
195 | pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, | ||
196 | (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) + | ||
197 | (psz << 5)); | ||
198 | } | ||
199 | return 0; | ||
200 | } | ||
201 | |||
202 | void pci_configure_slot(struct pci_dev *dev) | 161 | void pci_configure_slot(struct pci_dev *dev) |
203 | { | 162 | { |
204 | struct pci_dev *cdev; | 163 | struct pci_dev *cdev; |
@@ -210,9 +169,7 @@ void pci_configure_slot(struct pci_dev *dev) | |||
210 | (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) | 169 | (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) |
211 | return; | 170 | return; |
212 | 171 | ||
213 | ret = pci_set_payload(dev); | 172 | pcie_bus_configure_settings(dev->bus, dev->bus->self->pcie_mpss); |
214 | if (ret) | ||
215 | dev_warn(&dev->dev, "could not set device max payload\n"); | ||
216 | 173 | ||
217 | memset(&hpp, 0, sizeof(hpp)); | 174 | memset(&hpp, 0, sizeof(hpp)); |
218 | ret = pci_get_hp_params(dev, &hpp); | 175 | ret = pci_get_hp_params(dev, &hpp); |
diff --git a/drivers/pci/of.c b/drivers/pci/of.c index c94d37ec55c..f0929934bb7 100644 --- a/drivers/pci/of.c +++ b/drivers/pci/of.c | |||
@@ -55,7 +55,7 @@ struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus) | |||
55 | */ | 55 | */ |
56 | if (bus->bridge->of_node) | 56 | if (bus->bridge->of_node) |
57 | return of_node_get(bus->bridge->of_node); | 57 | return of_node_get(bus->bridge->of_node); |
58 | if (bus->bridge->parent->of_node) | 58 | if (bus->bridge->parent && bus->bridge->parent->of_node) |
59 | return of_node_get(bus->bridge->parent->of_node); | 59 | return of_node_get(bus->bridge->parent->of_node); |
60 | return NULL; | 60 | return NULL; |
61 | } | 61 | } |
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 08a95b369d8..0ce67423a0a 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c | |||
@@ -77,6 +77,8 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE; | |||
77 | unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; | 77 | unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; |
78 | unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; | 78 | unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; |
79 | 79 | ||
80 | enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PERFORMANCE; | ||
81 | |||
80 | /* | 82 | /* |
81 | * The default CLS is used if arch didn't set CLS explicitly and not | 83 | * The default CLS is used if arch didn't set CLS explicitly and not |
82 | * all pci devices agree on the same value. Arch can override either | 84 | * all pci devices agree on the same value. Arch can override either |
@@ -3223,6 +3225,67 @@ out: | |||
3223 | EXPORT_SYMBOL(pcie_set_readrq); | 3225 | EXPORT_SYMBOL(pcie_set_readrq); |
3224 | 3226 | ||
3225 | /** | 3227 | /** |
3228 | * pcie_get_mps - get PCI Express maximum payload size | ||
3229 | * @dev: PCI device to query | ||
3230 | * | ||
3231 | * Returns maximum payload size in bytes | ||
3232 | * or appropriate error value. | ||
3233 | */ | ||
3234 | int pcie_get_mps(struct pci_dev *dev) | ||
3235 | { | ||
3236 | int ret, cap; | ||
3237 | u16 ctl; | ||
3238 | |||
3239 | cap = pci_pcie_cap(dev); | ||
3240 | if (!cap) | ||
3241 | return -EINVAL; | ||
3242 | |||
3243 | ret = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl); | ||
3244 | if (!ret) | ||
3245 | ret = 128 << ((ctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5); | ||
3246 | |||
3247 | return ret; | ||
3248 | } | ||
3249 | |||
3250 | /** | ||
3251 | * pcie_set_mps - set PCI Express maximum payload size | ||
3252 | * @dev: PCI device to query | ||
3253 | * @mps: maximum payload size in bytes | ||
3254 | * valid values are 128, 256, 512, 1024, 2048, 4096 | ||
3255 | * | ||
3256 | * If possible sets maximum payload size | ||
3257 | */ | ||
3258 | int pcie_set_mps(struct pci_dev *dev, int mps) | ||
3259 | { | ||
3260 | int cap, err = -EINVAL; | ||
3261 | u16 ctl, v; | ||
3262 | |||
3263 | if (mps < 128 || mps > 4096 || !is_power_of_2(mps)) | ||
3264 | goto out; | ||
3265 | |||
3266 | v = ffs(mps) - 8; | ||
3267 | if (v > dev->pcie_mpss) | ||
3268 | goto out; | ||
3269 | v <<= 5; | ||
3270 | |||
3271 | cap = pci_pcie_cap(dev); | ||
3272 | if (!cap) | ||
3273 | goto out; | ||
3274 | |||
3275 | err = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl); | ||
3276 | if (err) | ||
3277 | goto out; | ||
3278 | |||
3279 | if ((ctl & PCI_EXP_DEVCTL_PAYLOAD) != v) { | ||
3280 | ctl &= ~PCI_EXP_DEVCTL_PAYLOAD; | ||
3281 | ctl |= v; | ||
3282 | err = pci_write_config_word(dev, cap + PCI_EXP_DEVCTL, ctl); | ||
3283 | } | ||
3284 | out: | ||
3285 | return err; | ||
3286 | } | ||
3287 | |||
3288 | /** | ||
3226 | * pci_select_bars - Make BAR mask from the type of resource | 3289 | * pci_select_bars - Make BAR mask from the type of resource |
3227 | * @dev: the PCI device for which BAR mask is made | 3290 | * @dev: the PCI device for which BAR mask is made |
3228 | * @flags: resource type mask to be selected | 3291 | * @flags: resource type mask to be selected |
@@ -3505,6 +3568,10 @@ static int __init pci_setup(char *str) | |||
3505 | pci_hotplug_io_size = memparse(str + 9, &str); | 3568 | pci_hotplug_io_size = memparse(str + 9, &str); |
3506 | } else if (!strncmp(str, "hpmemsize=", 10)) { | 3569 | } else if (!strncmp(str, "hpmemsize=", 10)) { |
3507 | pci_hotplug_mem_size = memparse(str + 10, &str); | 3570 | pci_hotplug_mem_size = memparse(str + 10, &str); |
3571 | } else if (!strncmp(str, "pcie_bus_safe", 13)) { | ||
3572 | pcie_bus_config = PCIE_BUS_SAFE; | ||
3573 | } else if (!strncmp(str, "pcie_bus_perf", 13)) { | ||
3574 | pcie_bus_config = PCIE_BUS_PERFORMANCE; | ||
3508 | } else { | 3575 | } else { |
3509 | printk(KERN_ERR "PCI: Unknown option `%s'\n", | 3576 | printk(KERN_ERR "PCI: Unknown option `%s'\n", |
3510 | str); | 3577 | str); |
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index c8cee764b0d..b74084e9ca1 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h | |||
@@ -283,6 +283,8 @@ static inline int pci_iov_bus_range(struct pci_bus *bus) | |||
283 | 283 | ||
284 | #endif /* CONFIG_PCI_IOV */ | 284 | #endif /* CONFIG_PCI_IOV */ |
285 | 285 | ||
286 | extern unsigned long pci_cardbus_resource_alignment(struct resource *); | ||
287 | |||
286 | static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, | 288 | static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, |
287 | struct resource *res) | 289 | struct resource *res) |
288 | { | 290 | { |
@@ -292,6 +294,8 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, | |||
292 | if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END) | 294 | if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END) |
293 | return pci_sriov_resource_alignment(dev, resno); | 295 | return pci_sriov_resource_alignment(dev, resno); |
294 | #endif | 296 | #endif |
297 | if (dev->class >> 8 == PCI_CLASS_BRIDGE_CARDBUS) | ||
298 | return pci_cardbus_resource_alignment(res); | ||
295 | return resource_alignment(res); | 299 | return resource_alignment(res); |
296 | } | 300 | } |
297 | 301 | ||
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 795c9026d55..8473727b29f 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c | |||
@@ -856,6 +856,8 @@ void set_pcie_port_type(struct pci_dev *pdev) | |||
856 | pdev->pcie_cap = pos; | 856 | pdev->pcie_cap = pos; |
857 | pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); | 857 | pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); |
858 | pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; | 858 | pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; |
859 | pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, ®16); | ||
860 | pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD; | ||
859 | } | 861 | } |
860 | 862 | ||
861 | void set_pcie_hotplug_bridge(struct pci_dev *pdev) | 863 | void set_pcie_hotplug_bridge(struct pci_dev *pdev) |
@@ -1326,6 +1328,150 @@ int pci_scan_slot(struct pci_bus *bus, int devfn) | |||
1326 | return nr; | 1328 | return nr; |
1327 | } | 1329 | } |
1328 | 1330 | ||
1331 | static int pcie_find_smpss(struct pci_dev *dev, void *data) | ||
1332 | { | ||
1333 | u8 *smpss = data; | ||
1334 | |||
1335 | if (!pci_is_pcie(dev)) | ||
1336 | return 0; | ||
1337 | |||
1338 | /* For PCIE hotplug enabled slots not connected directly to a | ||
1339 | * PCI-E root port, there can be problems when hotplugging | ||
1340 | * devices. This is due to the possibility of hotplugging a | ||
1341 | * device into the fabric with a smaller MPS that the devices | ||
1342 | * currently running have configured. Modifying the MPS on the | ||
1343 | * running devices could cause a fatal bus error due to an | ||
1344 | * incoming frame being larger than the newly configured MPS. | ||
1345 | * To work around this, the MPS for the entire fabric must be | ||
1346 | * set to the minimum size. Any devices hotplugged into this | ||
1347 | * fabric will have the minimum MPS set. If the PCI hotplug | ||
1348 | * slot is directly connected to the root port and there are not | ||
1349 | * other devices on the fabric (which seems to be the most | ||
1350 | * common case), then this is not an issue and MPS discovery | ||
1351 | * will occur as normal. | ||
1352 | */ | ||
1353 | if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) || | ||
1354 | dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT)) | ||
1355 | *smpss = 0; | ||
1356 | |||
1357 | if (*smpss > dev->pcie_mpss) | ||
1358 | *smpss = dev->pcie_mpss; | ||
1359 | |||
1360 | return 0; | ||
1361 | } | ||
1362 | |||
1363 | static void pcie_write_mps(struct pci_dev *dev, int mps) | ||
1364 | { | ||
1365 | int rc, dev_mpss; | ||
1366 | |||
1367 | dev_mpss = 128 << dev->pcie_mpss; | ||
1368 | |||
1369 | if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { | ||
1370 | if (dev->bus->self) { | ||
1371 | dev_dbg(&dev->bus->dev, "Bus MPSS %d\n", | ||
1372 | 128 << dev->bus->self->pcie_mpss); | ||
1373 | |||
1374 | /* For "MPS Force Max", the assumption is made that | ||
1375 | * downstream communication will never be larger than | ||
1376 | * the MRRS. So, the MPS only needs to be configured | ||
1377 | * for the upstream communication. This being the case, | ||
1378 | * walk from the top down and set the MPS of the child | ||
1379 | * to that of the parent bus. | ||
1380 | */ | ||
1381 | mps = 128 << dev->bus->self->pcie_mpss; | ||
1382 | if (mps > dev_mpss) | ||
1383 | dev_warn(&dev->dev, "MPS configured higher than" | ||
1384 | " maximum supported by the device. If" | ||
1385 | " a bus issue occurs, try running with" | ||
1386 | " pci=pcie_bus_safe.\n"); | ||
1387 | } | ||
1388 | |||
1389 | dev->pcie_mpss = ffs(mps) - 8; | ||
1390 | } | ||
1391 | |||
1392 | rc = pcie_set_mps(dev, mps); | ||
1393 | if (rc) | ||
1394 | dev_err(&dev->dev, "Failed attempting to set the MPS\n"); | ||
1395 | } | ||
1396 | |||
1397 | static void pcie_write_mrrs(struct pci_dev *dev, int mps) | ||
1398 | { | ||
1399 | int rc, mrrs; | ||
1400 | |||
1401 | if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { | ||
1402 | int dev_mpss = 128 << dev->pcie_mpss; | ||
1403 | |||
1404 | /* For Max performance, the MRRS must be set to the largest | ||
1405 | * supported value. However, it cannot be configured larger | ||
1406 | * than the MPS the device or the bus can support. This assumes | ||
1407 | * that the largest MRRS available on the device cannot be | ||
1408 | * smaller than the device MPSS. | ||
1409 | */ | ||
1410 | mrrs = mps < dev_mpss ? mps : dev_mpss; | ||
1411 | } else | ||
1412 | /* In the "safe" case, configure the MRRS for fairness on the | ||
1413 | * bus by making all devices have the same size | ||
1414 | */ | ||
1415 | mrrs = mps; | ||
1416 | |||
1417 | |||
1418 | /* MRRS is a R/W register. Invalid values can be written, but a | ||
1419 | * subsiquent read will verify if the value is acceptable or not. | ||
1420 | * If the MRRS value provided is not acceptable (e.g., too large), | ||
1421 | * shrink the value until it is acceptable to the HW. | ||
1422 | */ | ||
1423 | while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) { | ||
1424 | rc = pcie_set_readrq(dev, mrrs); | ||
1425 | if (rc) | ||
1426 | dev_err(&dev->dev, "Failed attempting to set the MRRS\n"); | ||
1427 | |||
1428 | mrrs /= 2; | ||
1429 | } | ||
1430 | } | ||
1431 | |||
1432 | static int pcie_bus_configure_set(struct pci_dev *dev, void *data) | ||
1433 | { | ||
1434 | int mps = 128 << *(u8 *)data; | ||
1435 | |||
1436 | if (!pci_is_pcie(dev)) | ||
1437 | return 0; | ||
1438 | |||
1439 | dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", | ||
1440 | pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev)); | ||
1441 | |||
1442 | pcie_write_mps(dev, mps); | ||
1443 | pcie_write_mrrs(dev, mps); | ||
1444 | |||
1445 | dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", | ||
1446 | pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev)); | ||
1447 | |||
1448 | return 0; | ||
1449 | } | ||
1450 | |||
1451 | /* pcie_bus_configure_mps requires that pci_walk_bus work in a top-down, | ||
1452 | * parents then children fashion. If this changes, then this code will not | ||
1453 | * work as designed. | ||
1454 | */ | ||
1455 | void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss) | ||
1456 | { | ||
1457 | u8 smpss = mpss; | ||
1458 | |||
1459 | if (!bus->self) | ||
1460 | return; | ||
1461 | |||
1462 | if (!pci_is_pcie(bus->self)) | ||
1463 | return; | ||
1464 | |||
1465 | if (pcie_bus_config == PCIE_BUS_SAFE) { | ||
1466 | pcie_find_smpss(bus->self, &smpss); | ||
1467 | pci_walk_bus(bus, pcie_find_smpss, &smpss); | ||
1468 | } | ||
1469 | |||
1470 | pcie_bus_configure_set(bus->self, &smpss); | ||
1471 | pci_walk_bus(bus, pcie_bus_configure_set, &smpss); | ||
1472 | } | ||
1473 | EXPORT_SYMBOL_GPL(pcie_bus_configure_settings); | ||
1474 | |||
1329 | unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) | 1475 | unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) |
1330 | { | 1476 | { |
1331 | unsigned int devfn, pass, max = bus->secondary; | 1477 | unsigned int devfn, pass, max = bus->secondary; |
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 8a1d3c7863a..784da9d3602 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c | |||
@@ -34,6 +34,7 @@ struct resource_list_x { | |||
34 | resource_size_t start; | 34 | resource_size_t start; |
35 | resource_size_t end; | 35 | resource_size_t end; |
36 | resource_size_t add_size; | 36 | resource_size_t add_size; |
37 | resource_size_t min_align; | ||
37 | unsigned long flags; | 38 | unsigned long flags; |
38 | }; | 39 | }; |
39 | 40 | ||
@@ -65,7 +66,7 @@ void pci_realloc(void) | |||
65 | */ | 66 | */ |
66 | static void add_to_list(struct resource_list_x *head, | 67 | static void add_to_list(struct resource_list_x *head, |
67 | struct pci_dev *dev, struct resource *res, | 68 | struct pci_dev *dev, struct resource *res, |
68 | resource_size_t add_size) | 69 | resource_size_t add_size, resource_size_t min_align) |
69 | { | 70 | { |
70 | struct resource_list_x *list = head; | 71 | struct resource_list_x *list = head; |
71 | struct resource_list_x *ln = list->next; | 72 | struct resource_list_x *ln = list->next; |
@@ -84,13 +85,16 @@ static void add_to_list(struct resource_list_x *head, | |||
84 | tmp->end = res->end; | 85 | tmp->end = res->end; |
85 | tmp->flags = res->flags; | 86 | tmp->flags = res->flags; |
86 | tmp->add_size = add_size; | 87 | tmp->add_size = add_size; |
88 | tmp->min_align = min_align; | ||
87 | list->next = tmp; | 89 | list->next = tmp; |
88 | } | 90 | } |
89 | 91 | ||
90 | static void add_to_failed_list(struct resource_list_x *head, | 92 | static void add_to_failed_list(struct resource_list_x *head, |
91 | struct pci_dev *dev, struct resource *res) | 93 | struct pci_dev *dev, struct resource *res) |
92 | { | 94 | { |
93 | add_to_list(head, dev, res, 0); | 95 | add_to_list(head, dev, res, |
96 | 0 /* dont care */, | ||
97 | 0 /* dont care */); | ||
94 | } | 98 | } |
95 | 99 | ||
96 | static void __dev_sort_resources(struct pci_dev *dev, | 100 | static void __dev_sort_resources(struct pci_dev *dev, |
@@ -121,18 +125,18 @@ static inline void reset_resource(struct resource *res) | |||
121 | } | 125 | } |
122 | 126 | ||
123 | /** | 127 | /** |
124 | * adjust_resources_sorted() - satisfy any additional resource requests | 128 | * reassign_resources_sorted() - satisfy any additional resource requests |
125 | * | 129 | * |
126 | * @add_head : head of the list tracking requests requiring additional | 130 | * @realloc_head : head of the list tracking requests requiring additional |
127 | * resources | 131 | * resources |
128 | * @head : head of the list tracking requests with allocated | 132 | * @head : head of the list tracking requests with allocated |
129 | * resources | 133 | * resources |
130 | * | 134 | * |
131 | * Walk through each element of the add_head and try to procure | 135 | * Walk through each element of the realloc_head and try to procure |
132 | * additional resources for the element, provided the element | 136 | * additional resources for the element, provided the element |
133 | * is in the head list. | 137 | * is in the head list. |
134 | */ | 138 | */ |
135 | static void adjust_resources_sorted(struct resource_list_x *add_head, | 139 | static void reassign_resources_sorted(struct resource_list_x *realloc_head, |
136 | struct resource_list *head) | 140 | struct resource_list *head) |
137 | { | 141 | { |
138 | struct resource *res; | 142 | struct resource *res; |
@@ -141,8 +145,8 @@ static void adjust_resources_sorted(struct resource_list_x *add_head, | |||
141 | resource_size_t add_size; | 145 | resource_size_t add_size; |
142 | int idx; | 146 | int idx; |
143 | 147 | ||
144 | prev = add_head; | 148 | prev = realloc_head; |
145 | for (list = add_head->next; list;) { | 149 | for (list = realloc_head->next; list;) { |
146 | res = list->res; | 150 | res = list->res; |
147 | /* skip resource that has been reset */ | 151 | /* skip resource that has been reset */ |
148 | if (!res->flags) | 152 | if (!res->flags) |
@@ -159,13 +163,17 @@ static void adjust_resources_sorted(struct resource_list_x *add_head, | |||
159 | 163 | ||
160 | idx = res - &list->dev->resource[0]; | 164 | idx = res - &list->dev->resource[0]; |
161 | add_size=list->add_size; | 165 | add_size=list->add_size; |
162 | if (!resource_size(res) && add_size) { | 166 | if (!resource_size(res)) { |
163 | res->end = res->start + add_size - 1; | 167 | res->start = list->start; |
164 | if(pci_assign_resource(list->dev, idx)) | 168 | res->end = res->start + add_size - 1; |
169 | if(pci_assign_resource(list->dev, idx)) | ||
165 | reset_resource(res); | 170 | reset_resource(res); |
166 | } else if (add_size) { | 171 | } else { |
167 | adjust_resource(res, res->start, | 172 | resource_size_t align = list->min_align; |
168 | resource_size(res) + add_size); | 173 | res->flags |= list->flags & (IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN); |
174 | if (pci_reassign_resource(list->dev, idx, add_size, align)) | ||
175 | dev_printk(KERN_DEBUG, &list->dev->dev, "failed to add optional resources res=%pR\n", | ||
176 | res); | ||
169 | } | 177 | } |
170 | out: | 178 | out: |
171 | tmp = list; | 179 | tmp = list; |
@@ -210,16 +218,16 @@ static void assign_requested_resources_sorted(struct resource_list *head, | |||
210 | } | 218 | } |
211 | 219 | ||
212 | static void __assign_resources_sorted(struct resource_list *head, | 220 | static void __assign_resources_sorted(struct resource_list *head, |
213 | struct resource_list_x *add_head, | 221 | struct resource_list_x *realloc_head, |
214 | struct resource_list_x *fail_head) | 222 | struct resource_list_x *fail_head) |
215 | { | 223 | { |
216 | /* Satisfy the must-have resource requests */ | 224 | /* Satisfy the must-have resource requests */ |
217 | assign_requested_resources_sorted(head, fail_head); | 225 | assign_requested_resources_sorted(head, fail_head); |
218 | 226 | ||
219 | /* Try to satisfy any additional nice-to-have resource | 227 | /* Try to satisfy any additional optional resource |
220 | requests */ | 228 | requests */ |
221 | if (add_head) | 229 | if (realloc_head) |
222 | adjust_resources_sorted(add_head, head); | 230 | reassign_resources_sorted(realloc_head, head); |
223 | free_list(resource_list, head); | 231 | free_list(resource_list, head); |
224 | } | 232 | } |
225 | 233 | ||
@@ -235,7 +243,7 @@ static void pdev_assign_resources_sorted(struct pci_dev *dev, | |||
235 | } | 243 | } |
236 | 244 | ||
237 | static void pbus_assign_resources_sorted(const struct pci_bus *bus, | 245 | static void pbus_assign_resources_sorted(const struct pci_bus *bus, |
238 | struct resource_list_x *add_head, | 246 | struct resource_list_x *realloc_head, |
239 | struct resource_list_x *fail_head) | 247 | struct resource_list_x *fail_head) |
240 | { | 248 | { |
241 | struct pci_dev *dev; | 249 | struct pci_dev *dev; |
@@ -245,7 +253,7 @@ static void pbus_assign_resources_sorted(const struct pci_bus *bus, | |||
245 | list_for_each_entry(dev, &bus->devices, bus_list) | 253 | list_for_each_entry(dev, &bus->devices, bus_list) |
246 | __dev_sort_resources(dev, &head); | 254 | __dev_sort_resources(dev, &head); |
247 | 255 | ||
248 | __assign_resources_sorted(&head, add_head, fail_head); | 256 | __assign_resources_sorted(&head, realloc_head, fail_head); |
249 | } | 257 | } |
250 | 258 | ||
251 | void pci_setup_cardbus(struct pci_bus *bus) | 259 | void pci_setup_cardbus(struct pci_bus *bus) |
@@ -540,13 +548,27 @@ static resource_size_t calculate_memsize(resource_size_t size, | |||
540 | return size; | 548 | return size; |
541 | } | 549 | } |
542 | 550 | ||
551 | static resource_size_t get_res_add_size(struct resource_list_x *realloc_head, | ||
552 | struct resource *res) | ||
553 | { | ||
554 | struct resource_list_x *list; | ||
555 | |||
556 | /* check if it is in realloc_head list */ | ||
557 | for (list = realloc_head->next; list && list->res != res; | ||
558 | list = list->next); | ||
559 | if (list) | ||
560 | return list->add_size; | ||
561 | |||
562 | return 0; | ||
563 | } | ||
564 | |||
543 | /** | 565 | /** |
544 | * pbus_size_io() - size the io window of a given bus | 566 | * pbus_size_io() - size the io window of a given bus |
545 | * | 567 | * |
546 | * @bus : the bus | 568 | * @bus : the bus |
547 | * @min_size : the minimum io window that must to be allocated | 569 | * @min_size : the minimum io window that must to be allocated |
548 | * @add_size : additional optional io window | 570 | * @add_size : additional optional io window |
549 | * @add_head : track the additional io window on this list | 571 | * @realloc_head : track the additional io window on this list |
550 | * | 572 | * |
551 | * Sizing the IO windows of the PCI-PCI bridge is trivial, | 573 | * Sizing the IO windows of the PCI-PCI bridge is trivial, |
552 | * since these windows have 4K granularity and the IO ranges | 574 | * since these windows have 4K granularity and the IO ranges |
@@ -554,11 +576,12 @@ static resource_size_t calculate_memsize(resource_size_t size, | |||
554 | * We must be careful with the ISA aliasing though. | 576 | * We must be careful with the ISA aliasing though. |
555 | */ | 577 | */ |
556 | static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, | 578 | static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, |
557 | resource_size_t add_size, struct resource_list_x *add_head) | 579 | resource_size_t add_size, struct resource_list_x *realloc_head) |
558 | { | 580 | { |
559 | struct pci_dev *dev; | 581 | struct pci_dev *dev; |
560 | struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO); | 582 | struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO); |
561 | unsigned long size = 0, size0 = 0, size1 = 0; | 583 | unsigned long size = 0, size0 = 0, size1 = 0; |
584 | resource_size_t children_add_size = 0; | ||
562 | 585 | ||
563 | if (!b_res) | 586 | if (!b_res) |
564 | return; | 587 | return; |
@@ -579,11 +602,16 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, | |||
579 | size += r_size; | 602 | size += r_size; |
580 | else | 603 | else |
581 | size1 += r_size; | 604 | size1 += r_size; |
605 | |||
606 | if (realloc_head) | ||
607 | children_add_size += get_res_add_size(realloc_head, r); | ||
582 | } | 608 | } |
583 | } | 609 | } |
584 | size0 = calculate_iosize(size, min_size, size1, | 610 | size0 = calculate_iosize(size, min_size, size1, |
585 | resource_size(b_res), 4096); | 611 | resource_size(b_res), 4096); |
586 | size1 = (!add_head || (add_head && !add_size)) ? size0 : | 612 | if (children_add_size > add_size) |
613 | add_size = children_add_size; | ||
614 | size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 : | ||
587 | calculate_iosize(size, min_size+add_size, size1, | 615 | calculate_iosize(size, min_size+add_size, size1, |
588 | resource_size(b_res), 4096); | 616 | resource_size(b_res), 4096); |
589 | if (!size0 && !size1) { | 617 | if (!size0 && !size1) { |
@@ -598,8 +626,8 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, | |||
598 | b_res->start = 4096; | 626 | b_res->start = 4096; |
599 | b_res->end = b_res->start + size0 - 1; | 627 | b_res->end = b_res->start + size0 - 1; |
600 | b_res->flags |= IORESOURCE_STARTALIGN; | 628 | b_res->flags |= IORESOURCE_STARTALIGN; |
601 | if (size1 > size0 && add_head) | 629 | if (size1 > size0 && realloc_head) |
602 | add_to_list(add_head, bus->self, b_res, size1-size0); | 630 | add_to_list(realloc_head, bus->self, b_res, size1-size0, 4096); |
603 | } | 631 | } |
604 | 632 | ||
605 | /** | 633 | /** |
@@ -608,7 +636,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, | |||
608 | * @bus : the bus | 636 | * @bus : the bus |
609 | * @min_size : the minimum memory window that must to be allocated | 637 | * @min_size : the minimum memory window that must to be allocated |
610 | * @add_size : additional optional memory window | 638 | * @add_size : additional optional memory window |
611 | * @add_head : track the additional memory window on this list | 639 | * @realloc_head : track the additional memory window on this list |
612 | * | 640 | * |
613 | * Calculate the size of the bus and minimal alignment which | 641 | * Calculate the size of the bus and minimal alignment which |
614 | * guarantees that all child resources fit in this size. | 642 | * guarantees that all child resources fit in this size. |
@@ -616,7 +644,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, | |||
616 | static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, | 644 | static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, |
617 | unsigned long type, resource_size_t min_size, | 645 | unsigned long type, resource_size_t min_size, |
618 | resource_size_t add_size, | 646 | resource_size_t add_size, |
619 | struct resource_list_x *add_head) | 647 | struct resource_list_x *realloc_head) |
620 | { | 648 | { |
621 | struct pci_dev *dev; | 649 | struct pci_dev *dev; |
622 | resource_size_t min_align, align, size, size0, size1; | 650 | resource_size_t min_align, align, size, size0, size1; |
@@ -624,6 +652,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, | |||
624 | int order, max_order; | 652 | int order, max_order; |
625 | struct resource *b_res = find_free_bus_resource(bus, type); | 653 | struct resource *b_res = find_free_bus_resource(bus, type); |
626 | unsigned int mem64_mask = 0; | 654 | unsigned int mem64_mask = 0; |
655 | resource_size_t children_add_size = 0; | ||
627 | 656 | ||
628 | if (!b_res) | 657 | if (!b_res) |
629 | return 0; | 658 | return 0; |
@@ -645,6 +674,16 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, | |||
645 | if (r->parent || (r->flags & mask) != type) | 674 | if (r->parent || (r->flags & mask) != type) |
646 | continue; | 675 | continue; |
647 | r_size = resource_size(r); | 676 | r_size = resource_size(r); |
677 | #ifdef CONFIG_PCI_IOV | ||
678 | /* put SRIOV requested res to the optional list */ | ||
679 | if (realloc_head && i >= PCI_IOV_RESOURCES && | ||
680 | i <= PCI_IOV_RESOURCE_END) { | ||
681 | r->end = r->start - 1; | ||
682 | add_to_list(realloc_head, dev, r, r_size, 0/* dont' care */); | ||
683 | children_add_size += r_size; | ||
684 | continue; | ||
685 | } | ||
686 | #endif | ||
648 | /* For bridges size != alignment */ | 687 | /* For bridges size != alignment */ |
649 | align = pci_resource_alignment(dev, r); | 688 | align = pci_resource_alignment(dev, r); |
650 | order = __ffs(align) - 20; | 689 | order = __ffs(align) - 20; |
@@ -665,6 +704,9 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, | |||
665 | if (order > max_order) | 704 | if (order > max_order) |
666 | max_order = order; | 705 | max_order = order; |
667 | mem64_mask &= r->flags & IORESOURCE_MEM_64; | 706 | mem64_mask &= r->flags & IORESOURCE_MEM_64; |
707 | |||
708 | if (realloc_head) | ||
709 | children_add_size += get_res_add_size(realloc_head, r); | ||
668 | } | 710 | } |
669 | } | 711 | } |
670 | align = 0; | 712 | align = 0; |
@@ -681,7 +723,9 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, | |||
681 | align += aligns[order]; | 723 | align += aligns[order]; |
682 | } | 724 | } |
683 | size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align); | 725 | size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align); |
684 | size1 = (!add_head || (add_head && !add_size)) ? size0 : | 726 | if (children_add_size > add_size) |
727 | add_size = children_add_size; | ||
728 | size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 : | ||
685 | calculate_memsize(size, min_size+add_size, 0, | 729 | calculate_memsize(size, min_size+add_size, 0, |
686 | resource_size(b_res), min_align); | 730 | resource_size(b_res), min_align); |
687 | if (!size0 && !size1) { | 731 | if (!size0 && !size1) { |
@@ -695,12 +739,22 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, | |||
695 | b_res->start = min_align; | 739 | b_res->start = min_align; |
696 | b_res->end = size0 + min_align - 1; | 740 | b_res->end = size0 + min_align - 1; |
697 | b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask; | 741 | b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask; |
698 | if (size1 > size0 && add_head) | 742 | if (size1 > size0 && realloc_head) |
699 | add_to_list(add_head, bus->self, b_res, size1-size0); | 743 | add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align); |
700 | return 1; | 744 | return 1; |
701 | } | 745 | } |
702 | 746 | ||
703 | static void pci_bus_size_cardbus(struct pci_bus *bus) | 747 | unsigned long pci_cardbus_resource_alignment(struct resource *res) |
748 | { | ||
749 | if (res->flags & IORESOURCE_IO) | ||
750 | return pci_cardbus_io_size; | ||
751 | if (res->flags & IORESOURCE_MEM) | ||
752 | return pci_cardbus_mem_size; | ||
753 | return 0; | ||
754 | } | ||
755 | |||
756 | static void pci_bus_size_cardbus(struct pci_bus *bus, | ||
757 | struct resource_list_x *realloc_head) | ||
704 | { | 758 | { |
705 | struct pci_dev *bridge = bus->self; | 759 | struct pci_dev *bridge = bus->self; |
706 | struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES]; | 760 | struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES]; |
@@ -711,12 +765,14 @@ static void pci_bus_size_cardbus(struct pci_bus *bus) | |||
711 | * a fixed amount of bus space for CardBus bridges. | 765 | * a fixed amount of bus space for CardBus bridges. |
712 | */ | 766 | */ |
713 | b_res[0].start = 0; | 767 | b_res[0].start = 0; |
714 | b_res[0].end = pci_cardbus_io_size - 1; | ||
715 | b_res[0].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; | 768 | b_res[0].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; |
769 | if (realloc_head) | ||
770 | add_to_list(realloc_head, bridge, b_res, pci_cardbus_io_size, 0 /* dont care */); | ||
716 | 771 | ||
717 | b_res[1].start = 0; | 772 | b_res[1].start = 0; |
718 | b_res[1].end = pci_cardbus_io_size - 1; | ||
719 | b_res[1].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; | 773 | b_res[1].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; |
774 | if (realloc_head) | ||
775 | add_to_list(realloc_head, bridge, b_res+1, pci_cardbus_io_size, 0 /* dont care */); | ||
720 | 776 | ||
721 | /* | 777 | /* |
722 | * Check whether prefetchable memory is supported | 778 | * Check whether prefetchable memory is supported |
@@ -736,21 +792,31 @@ static void pci_bus_size_cardbus(struct pci_bus *bus) | |||
736 | */ | 792 | */ |
737 | if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) { | 793 | if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) { |
738 | b_res[2].start = 0; | 794 | b_res[2].start = 0; |
739 | b_res[2].end = pci_cardbus_mem_size - 1; | ||
740 | b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_SIZEALIGN; | 795 | b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_SIZEALIGN; |
796 | if (realloc_head) | ||
797 | add_to_list(realloc_head, bridge, b_res+2, pci_cardbus_mem_size, 0 /* dont care */); | ||
741 | 798 | ||
742 | b_res[3].start = 0; | 799 | b_res[3].start = 0; |
743 | b_res[3].end = pci_cardbus_mem_size - 1; | ||
744 | b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; | 800 | b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; |
801 | if (realloc_head) | ||
802 | add_to_list(realloc_head, bridge, b_res+3, pci_cardbus_mem_size, 0 /* dont care */); | ||
745 | } else { | 803 | } else { |
746 | b_res[3].start = 0; | 804 | b_res[3].start = 0; |
747 | b_res[3].end = pci_cardbus_mem_size * 2 - 1; | ||
748 | b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; | 805 | b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; |
806 | if (realloc_head) | ||
807 | add_to_list(realloc_head, bridge, b_res+3, pci_cardbus_mem_size * 2, 0 /* dont care */); | ||
749 | } | 808 | } |
809 | |||
810 | /* set the size of the resource to zero, so that the resource does not | ||
811 | * get assigned during required-resource allocation cycle but gets assigned | ||
812 | * during the optional-resource allocation cycle. | ||
813 | */ | ||
814 | b_res[0].start = b_res[1].start = b_res[2].start = b_res[3].start = 1; | ||
815 | b_res[0].end = b_res[1].end = b_res[2].end = b_res[3].end = 0; | ||
750 | } | 816 | } |
751 | 817 | ||
752 | void __ref __pci_bus_size_bridges(struct pci_bus *bus, | 818 | void __ref __pci_bus_size_bridges(struct pci_bus *bus, |
753 | struct resource_list_x *add_head) | 819 | struct resource_list_x *realloc_head) |
754 | { | 820 | { |
755 | struct pci_dev *dev; | 821 | struct pci_dev *dev; |
756 | unsigned long mask, prefmask; | 822 | unsigned long mask, prefmask; |
@@ -763,12 +829,12 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, | |||
763 | 829 | ||
764 | switch (dev->class >> 8) { | 830 | switch (dev->class >> 8) { |
765 | case PCI_CLASS_BRIDGE_CARDBUS: | 831 | case PCI_CLASS_BRIDGE_CARDBUS: |
766 | pci_bus_size_cardbus(b); | 832 | pci_bus_size_cardbus(b, realloc_head); |
767 | break; | 833 | break; |
768 | 834 | ||
769 | case PCI_CLASS_BRIDGE_PCI: | 835 | case PCI_CLASS_BRIDGE_PCI: |
770 | default: | 836 | default: |
771 | __pci_bus_size_bridges(b, add_head); | 837 | __pci_bus_size_bridges(b, realloc_head); |
772 | break; | 838 | break; |
773 | } | 839 | } |
774 | } | 840 | } |
@@ -792,7 +858,7 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, | |||
792 | * Follow thru | 858 | * Follow thru |
793 | */ | 859 | */ |
794 | default: | 860 | default: |
795 | pbus_size_io(bus, 0, additional_io_size, add_head); | 861 | pbus_size_io(bus, 0, additional_io_size, realloc_head); |
796 | /* If the bridge supports prefetchable range, size it | 862 | /* If the bridge supports prefetchable range, size it |
797 | separately. If it doesn't, or its prefetchable window | 863 | separately. If it doesn't, or its prefetchable window |
798 | has already been allocated by arch code, try | 864 | has already been allocated by arch code, try |
@@ -800,11 +866,11 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, | |||
800 | resources. */ | 866 | resources. */ |
801 | mask = IORESOURCE_MEM; | 867 | mask = IORESOURCE_MEM; |
802 | prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH; | 868 | prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH; |
803 | if (pbus_size_mem(bus, prefmask, prefmask, 0, additional_mem_size, add_head)) | 869 | if (pbus_size_mem(bus, prefmask, prefmask, 0, additional_mem_size, realloc_head)) |
804 | mask = prefmask; /* Success, size non-prefetch only. */ | 870 | mask = prefmask; /* Success, size non-prefetch only. */ |
805 | else | 871 | else |
806 | additional_mem_size += additional_mem_size; | 872 | additional_mem_size += additional_mem_size; |
807 | pbus_size_mem(bus, mask, IORESOURCE_MEM, 0, additional_mem_size, add_head); | 873 | pbus_size_mem(bus, mask, IORESOURCE_MEM, 0, additional_mem_size, realloc_head); |
808 | break; | 874 | break; |
809 | } | 875 | } |
810 | } | 876 | } |
@@ -816,20 +882,20 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus) | |||
816 | EXPORT_SYMBOL(pci_bus_size_bridges); | 882 | EXPORT_SYMBOL(pci_bus_size_bridges); |
817 | 883 | ||
818 | static void __ref __pci_bus_assign_resources(const struct pci_bus *bus, | 884 | static void __ref __pci_bus_assign_resources(const struct pci_bus *bus, |
819 | struct resource_list_x *add_head, | 885 | struct resource_list_x *realloc_head, |
820 | struct resource_list_x *fail_head) | 886 | struct resource_list_x *fail_head) |
821 | { | 887 | { |
822 | struct pci_bus *b; | 888 | struct pci_bus *b; |
823 | struct pci_dev *dev; | 889 | struct pci_dev *dev; |
824 | 890 | ||
825 | pbus_assign_resources_sorted(bus, add_head, fail_head); | 891 | pbus_assign_resources_sorted(bus, realloc_head, fail_head); |
826 | 892 | ||
827 | list_for_each_entry(dev, &bus->devices, bus_list) { | 893 | list_for_each_entry(dev, &bus->devices, bus_list) { |
828 | b = dev->subordinate; | 894 | b = dev->subordinate; |
829 | if (!b) | 895 | if (!b) |
830 | continue; | 896 | continue; |
831 | 897 | ||
832 | __pci_bus_assign_resources(b, add_head, fail_head); | 898 | __pci_bus_assign_resources(b, realloc_head, fail_head); |
833 | 899 | ||
834 | switch (dev->class >> 8) { | 900 | switch (dev->class >> 8) { |
835 | case PCI_CLASS_BRIDGE_PCI: | 901 | case PCI_CLASS_BRIDGE_PCI: |
@@ -1039,7 +1105,7 @@ void __init | |||
1039 | pci_assign_unassigned_resources(void) | 1105 | pci_assign_unassigned_resources(void) |
1040 | { | 1106 | { |
1041 | struct pci_bus *bus; | 1107 | struct pci_bus *bus; |
1042 | struct resource_list_x add_list; /* list of resources that | 1108 | struct resource_list_x realloc_list; /* list of resources that |
1043 | want additional resources */ | 1109 | want additional resources */ |
1044 | int tried_times = 0; | 1110 | int tried_times = 0; |
1045 | enum release_type rel_type = leaf_only; | 1111 | enum release_type rel_type = leaf_only; |
@@ -1052,7 +1118,7 @@ pci_assign_unassigned_resources(void) | |||
1052 | 1118 | ||
1053 | 1119 | ||
1054 | head.next = NULL; | 1120 | head.next = NULL; |
1055 | add_list.next = NULL; | 1121 | realloc_list.next = NULL; |
1056 | 1122 | ||
1057 | pci_try_num = max_depth + 1; | 1123 | pci_try_num = max_depth + 1; |
1058 | printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n", | 1124 | printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n", |
@@ -1062,12 +1128,12 @@ again: | |||
1062 | /* Depth first, calculate sizes and alignments of all | 1128 | /* Depth first, calculate sizes and alignments of all |
1063 | subordinate buses. */ | 1129 | subordinate buses. */ |
1064 | list_for_each_entry(bus, &pci_root_buses, node) | 1130 | list_for_each_entry(bus, &pci_root_buses, node) |
1065 | __pci_bus_size_bridges(bus, &add_list); | 1131 | __pci_bus_size_bridges(bus, &realloc_list); |
1066 | 1132 | ||
1067 | /* Depth last, allocate resources and update the hardware. */ | 1133 | /* Depth last, allocate resources and update the hardware. */ |
1068 | list_for_each_entry(bus, &pci_root_buses, node) | 1134 | list_for_each_entry(bus, &pci_root_buses, node) |
1069 | __pci_bus_assign_resources(bus, &add_list, &head); | 1135 | __pci_bus_assign_resources(bus, &realloc_list, &head); |
1070 | BUG_ON(add_list.next); | 1136 | BUG_ON(realloc_list.next); |
1071 | tried_times++; | 1137 | tried_times++; |
1072 | 1138 | ||
1073 | /* any device complain? */ | 1139 | /* any device complain? */ |
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 319f359906e..51a9095c7da 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c | |||
@@ -128,16 +128,16 @@ void pci_disable_bridge_window(struct pci_dev *dev) | |||
128 | } | 128 | } |
129 | #endif /* CONFIG_PCI_QUIRKS */ | 129 | #endif /* CONFIG_PCI_QUIRKS */ |
130 | 130 | ||
131 | |||
132 | |||
131 | static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, | 133 | static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, |
132 | int resno) | 134 | int resno, resource_size_t size, resource_size_t align) |
133 | { | 135 | { |
134 | struct resource *res = dev->resource + resno; | 136 | struct resource *res = dev->resource + resno; |
135 | resource_size_t size, min, align; | 137 | resource_size_t min; |
136 | int ret; | 138 | int ret; |
137 | 139 | ||
138 | size = resource_size(res); | ||
139 | min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; | 140 | min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; |
140 | align = pci_resource_alignment(dev, res); | ||
141 | 141 | ||
142 | /* First, try exact prefetching match.. */ | 142 | /* First, try exact prefetching match.. */ |
143 | ret = pci_bus_alloc_resource(bus, res, size, align, min, | 143 | ret = pci_bus_alloc_resource(bus, res, size, align, min, |
@@ -154,56 +154,101 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, | |||
154 | ret = pci_bus_alloc_resource(bus, res, size, align, min, 0, | 154 | ret = pci_bus_alloc_resource(bus, res, size, align, min, 0, |
155 | pcibios_align_resource, dev); | 155 | pcibios_align_resource, dev); |
156 | } | 156 | } |
157 | return ret; | ||
158 | } | ||
157 | 159 | ||
158 | if (ret < 0 && dev->fw_addr[resno]) { | 160 | static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, |
159 | struct resource *root, *conflict; | 161 | int resno, resource_size_t size) |
160 | resource_size_t start, end; | 162 | { |
163 | struct resource *root, *conflict; | ||
164 | resource_size_t start, end; | ||
165 | int ret = 0; | ||
161 | 166 | ||
162 | /* | 167 | if (res->flags & IORESOURCE_IO) |
163 | * If we failed to assign anything, let's try the address | 168 | root = &ioport_resource; |
164 | * where firmware left it. That at least has a chance of | 169 | else |
165 | * working, which is better than just leaving it disabled. | 170 | root = &iomem_resource; |
166 | */ | 171 | |
172 | start = res->start; | ||
173 | end = res->end; | ||
174 | res->start = dev->fw_addr[resno]; | ||
175 | res->end = res->start + size - 1; | ||
176 | dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n", | ||
177 | resno, res); | ||
178 | conflict = request_resource_conflict(root, res); | ||
179 | if (conflict) { | ||
180 | dev_info(&dev->dev, | ||
181 | "BAR %d: %pR conflicts with %s %pR\n", resno, | ||
182 | res, conflict->name, conflict); | ||
183 | res->start = start; | ||
184 | res->end = end; | ||
185 | ret = 1; | ||
186 | } | ||
187 | return ret; | ||
188 | } | ||
189 | |||
190 | static int _pci_assign_resource(struct pci_dev *dev, int resno, int size, resource_size_t min_align) | ||
191 | { | ||
192 | struct resource *res = dev->resource + resno; | ||
193 | struct pci_bus *bus; | ||
194 | int ret; | ||
195 | char *type; | ||
167 | 196 | ||
168 | if (res->flags & IORESOURCE_IO) | 197 | bus = dev->bus; |
169 | root = &ioport_resource; | 198 | while ((ret = __pci_assign_resource(bus, dev, resno, size, min_align))) { |
199 | if (!bus->parent || !bus->self->transparent) | ||
200 | break; | ||
201 | bus = bus->parent; | ||
202 | } | ||
203 | |||
204 | if (ret) { | ||
205 | if (res->flags & IORESOURCE_MEM) | ||
206 | if (res->flags & IORESOURCE_PREFETCH) | ||
207 | type = "mem pref"; | ||
208 | else | ||
209 | type = "mem"; | ||
210 | else if (res->flags & IORESOURCE_IO) | ||
211 | type = "io"; | ||
170 | else | 212 | else |
171 | root = &iomem_resource; | 213 | type = "unknown"; |
172 | 214 | dev_info(&dev->dev, | |
173 | start = res->start; | 215 | "BAR %d: can't assign %s (size %#llx)\n", |
174 | end = res->end; | 216 | resno, type, (unsigned long long) resource_size(res)); |
175 | res->start = dev->fw_addr[resno]; | ||
176 | res->end = res->start + size - 1; | ||
177 | dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n", | ||
178 | resno, res); | ||
179 | conflict = request_resource_conflict(root, res); | ||
180 | if (conflict) { | ||
181 | dev_info(&dev->dev, | ||
182 | "BAR %d: %pR conflicts with %s %pR\n", resno, | ||
183 | res, conflict->name, conflict); | ||
184 | res->start = start; | ||
185 | res->end = end; | ||
186 | } else | ||
187 | ret = 0; | ||
188 | } | 217 | } |
189 | 218 | ||
219 | return ret; | ||
220 | } | ||
221 | |||
222 | int pci_reassign_resource(struct pci_dev *dev, int resno, resource_size_t addsize, | ||
223 | resource_size_t min_align) | ||
224 | { | ||
225 | struct resource *res = dev->resource + resno; | ||
226 | resource_size_t new_size; | ||
227 | int ret; | ||
228 | |||
229 | if (!res->parent) { | ||
230 | dev_info(&dev->dev, "BAR %d: can't reassign an unassigned resouce %pR " | ||
231 | "\n", resno, res); | ||
232 | return -EINVAL; | ||
233 | } | ||
234 | |||
235 | new_size = resource_size(res) + addsize + min_align; | ||
236 | ret = _pci_assign_resource(dev, resno, new_size, min_align); | ||
190 | if (!ret) { | 237 | if (!ret) { |
191 | res->flags &= ~IORESOURCE_STARTALIGN; | 238 | res->flags &= ~IORESOURCE_STARTALIGN; |
192 | dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); | 239 | dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); |
193 | if (resno < PCI_BRIDGE_RESOURCES) | 240 | if (resno < PCI_BRIDGE_RESOURCES) |
194 | pci_update_resource(dev, resno); | 241 | pci_update_resource(dev, resno); |
195 | } | 242 | } |
196 | |||
197 | return ret; | 243 | return ret; |
198 | } | 244 | } |
199 | 245 | ||
200 | int pci_assign_resource(struct pci_dev *dev, int resno) | 246 | int pci_assign_resource(struct pci_dev *dev, int resno) |
201 | { | 247 | { |
202 | struct resource *res = dev->resource + resno; | 248 | struct resource *res = dev->resource + resno; |
203 | resource_size_t align; | 249 | resource_size_t align, size; |
204 | struct pci_bus *bus; | 250 | struct pci_bus *bus; |
205 | int ret; | 251 | int ret; |
206 | char *type; | ||
207 | 252 | ||
208 | align = pci_resource_alignment(dev, res); | 253 | align = pci_resource_alignment(dev, res); |
209 | if (!align) { | 254 | if (!align) { |
@@ -213,34 +258,27 @@ int pci_assign_resource(struct pci_dev *dev, int resno) | |||
213 | } | 258 | } |
214 | 259 | ||
215 | bus = dev->bus; | 260 | bus = dev->bus; |
216 | while ((ret = __pci_assign_resource(bus, dev, resno))) { | 261 | size = resource_size(res); |
217 | if (bus->parent && bus->self->transparent) | 262 | ret = _pci_assign_resource(dev, resno, size, align); |
218 | bus = bus->parent; | ||
219 | else | ||
220 | bus = NULL; | ||
221 | if (bus) | ||
222 | continue; | ||
223 | break; | ||
224 | } | ||
225 | 263 | ||
226 | if (ret) { | 264 | /* |
227 | if (res->flags & IORESOURCE_MEM) | 265 | * If we failed to assign anything, let's try the address |
228 | if (res->flags & IORESOURCE_PREFETCH) | 266 | * where firmware left it. That at least has a chance of |
229 | type = "mem pref"; | 267 | * working, which is better than just leaving it disabled. |
230 | else | 268 | */ |
231 | type = "mem"; | 269 | if (ret < 0 && dev->fw_addr[resno]) |
232 | else if (res->flags & IORESOURCE_IO) | 270 | ret = pci_revert_fw_address(res, dev, resno, size); |
233 | type = "io"; | ||
234 | else | ||
235 | type = "unknown"; | ||
236 | dev_info(&dev->dev, | ||
237 | "BAR %d: can't assign %s (size %#llx)\n", | ||
238 | resno, type, (unsigned long long) resource_size(res)); | ||
239 | } | ||
240 | 271 | ||
272 | if (!ret) { | ||
273 | res->flags &= ~IORESOURCE_STARTALIGN; | ||
274 | dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); | ||
275 | if (resno < PCI_BRIDGE_RESOURCES) | ||
276 | pci_update_resource(dev, resno); | ||
277 | } | ||
241 | return ret; | 278 | return ret; |
242 | } | 279 | } |
243 | 280 | ||
281 | |||
244 | /* Sort resources by alignment */ | 282 | /* Sort resources by alignment */ |
245 | void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) | 283 | void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) |
246 | { | 284 | { |
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 3195dbd3ec3..44e91e598f8 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c | |||
@@ -639,7 +639,7 @@ EXPORT_SYMBOL_GPL(rtc_irq_unregister); | |||
639 | static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled) | 639 | static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled) |
640 | { | 640 | { |
641 | /* | 641 | /* |
642 | * We unconditionally cancel the timer here, because otherwise | 642 | * We always cancel the timer here first, because otherwise |
643 | * we could run into BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); | 643 | * we could run into BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); |
644 | * when we manage to start the timer before the callback | 644 | * when we manage to start the timer before the callback |
645 | * returns HRTIMER_RESTART. | 645 | * returns HRTIMER_RESTART. |
@@ -708,7 +708,7 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq) | |||
708 | int err = 0; | 708 | int err = 0; |
709 | unsigned long flags; | 709 | unsigned long flags; |
710 | 710 | ||
711 | if (freq <= 0 || freq > 5000) | 711 | if (freq <= 0 || freq > RTC_MAX_FREQ) |
712 | return -EINVAL; | 712 | return -EINVAL; |
713 | retry: | 713 | retry: |
714 | spin_lock_irqsave(&rtc->irq_task_lock, flags); | 714 | spin_lock_irqsave(&rtc->irq_task_lock, flags); |
diff --git a/drivers/staging/gma500/mdfld_dsi_dbi.c b/drivers/staging/gma500/mdfld_dsi_dbi.c index 02e17c9c863..fd211f3467c 100644 --- a/drivers/staging/gma500/mdfld_dsi_dbi.c +++ b/drivers/staging/gma500/mdfld_dsi_dbi.c | |||
@@ -711,10 +711,11 @@ struct mdfld_dsi_encoder *mdfld_dsi_dbi_init(struct drm_device *dev, | |||
711 | /* Create drm encoder object */ | 711 | /* Create drm encoder object */ |
712 | connector = &dsi_connector->base.base; | 712 | connector = &dsi_connector->base.base; |
713 | encoder = &dbi_output->base.base; | 713 | encoder = &dbi_output->base.base; |
714 | /* Review this if we ever get MIPI-HDMI bridges or similar */ | ||
714 | drm_encoder_init(dev, | 715 | drm_encoder_init(dev, |
715 | encoder, | 716 | encoder, |
716 | p_funcs->encoder_funcs, | 717 | p_funcs->encoder_funcs, |
717 | DRM_MODE_ENCODER_MIPI); | 718 | DRM_MODE_ENCODER_LVDS); |
718 | drm_encoder_helper_add(encoder, p_funcs->encoder_helper_funcs); | 719 | drm_encoder_helper_add(encoder, p_funcs->encoder_helper_funcs); |
719 | 720 | ||
720 | /* Attach to given connector */ | 721 | /* Attach to given connector */ |
diff --git a/drivers/staging/gma500/mdfld_dsi_dbi.h b/drivers/staging/gma500/mdfld_dsi_dbi.h index dc6242c51d0..f0fa986fd93 100644 --- a/drivers/staging/gma500/mdfld_dsi_dbi.h +++ b/drivers/staging/gma500/mdfld_dsi_dbi.h | |||
@@ -42,9 +42,6 @@ | |||
42 | #include "mdfld_dsi_output.h" | 42 | #include "mdfld_dsi_output.h" |
43 | #include "mdfld_output.h" | 43 | #include "mdfld_output.h" |
44 | 44 | ||
45 | #define DRM_MODE_ENCODER_MIPI 5 | ||
46 | |||
47 | |||
48 | /* | 45 | /* |
49 | * DBI encoder which inherits from mdfld_dsi_encoder | 46 | * DBI encoder which inherits from mdfld_dsi_encoder |
50 | */ | 47 | */ |
diff --git a/drivers/staging/gma500/mdfld_dsi_dpi.c b/drivers/staging/gma500/mdfld_dsi_dpi.c index 6e03a91e947..e685f1217ba 100644 --- a/drivers/staging/gma500/mdfld_dsi_dpi.c +++ b/drivers/staging/gma500/mdfld_dsi_dpi.c | |||
@@ -777,10 +777,15 @@ struct mdfld_dsi_encoder *mdfld_dsi_dpi_init(struct drm_device *dev, | |||
777 | /* Create drm encoder object */ | 777 | /* Create drm encoder object */ |
778 | connector = &dsi_connector->base.base; | 778 | connector = &dsi_connector->base.base; |
779 | encoder = &dpi_output->base.base; | 779 | encoder = &dpi_output->base.base; |
780 | /* | ||
781 | * On existing hardware this will be a panel of some form, | ||
782 | * if future devices also have HDMI bridges this will need | ||
783 | * revisiting | ||
784 | */ | ||
780 | drm_encoder_init(dev, | 785 | drm_encoder_init(dev, |
781 | encoder, | 786 | encoder, |
782 | p_funcs->encoder_funcs, | 787 | p_funcs->encoder_funcs, |
783 | DRM_MODE_ENCODER_MIPI); | 788 | DRM_MODE_ENCODER_LVDS); |
784 | drm_encoder_helper_add(encoder, | 789 | drm_encoder_helper_add(encoder, |
785 | p_funcs->encoder_helper_funcs); | 790 | p_funcs->encoder_helper_funcs); |
786 | 791 | ||
diff --git a/drivers/staging/gma500/mdfld_dsi_output.c b/drivers/staging/gma500/mdfld_dsi_output.c index 7536095c30a..9050c0f78b1 100644 --- a/drivers/staging/gma500/mdfld_dsi_output.c +++ b/drivers/staging/gma500/mdfld_dsi_output.c | |||
@@ -955,7 +955,9 @@ void mdfld_dsi_output_init(struct drm_device *dev, | |||
955 | psb_output->type = (pipe == 0) ? INTEL_OUTPUT_MIPI : INTEL_OUTPUT_MIPI2; | 955 | psb_output->type = (pipe == 0) ? INTEL_OUTPUT_MIPI : INTEL_OUTPUT_MIPI2; |
956 | 956 | ||
957 | connector = &psb_output->base; | 957 | connector = &psb_output->base; |
958 | drm_connector_init(dev, connector, &mdfld_dsi_connector_funcs, DRM_MODE_CONNECTOR_MIPI); | 958 | /* Revisit type if MIPI/HDMI bridges ever appear on Medfield */ |
959 | drm_connector_init(dev, connector, &mdfld_dsi_connector_funcs, | ||
960 | DRM_MODE_CONNECTOR_LVDS); | ||
959 | drm_connector_helper_add(connector, &mdfld_dsi_connector_helper_funcs); | 961 | drm_connector_helper_add(connector, &mdfld_dsi_connector_helper_funcs); |
960 | 962 | ||
961 | connector->display_info.subpixel_order = SubPixelHorizontalRGB; | 963 | connector->display_info.subpixel_order = SubPixelHorizontalRGB; |
diff --git a/drivers/staging/gma500/medfield.h b/drivers/staging/gma500/medfield.h index 38165e8367e..09e9687431f 100644 --- a/drivers/staging/gma500/medfield.h +++ b/drivers/staging/gma500/medfield.h | |||
@@ -21,8 +21,6 @@ | |||
21 | * DEALINGS IN THE SOFTWARE. | 21 | * DEALINGS IN THE SOFTWARE. |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #define DRM_MODE_ENCODER_MIPI 5 | ||
25 | |||
26 | /* Medfield DSI controller registers */ | 24 | /* Medfield DSI controller registers */ |
27 | 25 | ||
28 | #define MIPIA_DEVICE_READY_REG 0xb000 | 26 | #define MIPIA_DEVICE_READY_REG 0xb000 |
diff --git a/drivers/staging/gma500/psb_drv.h b/drivers/staging/gma500/psb_drv.h index 72f487a2a1b..fd4732dd783 100644 --- a/drivers/staging/gma500/psb_drv.h +++ b/drivers/staging/gma500/psb_drv.h | |||
@@ -35,7 +35,6 @@ | |||
35 | 35 | ||
36 | /* Append new drm mode definition here, align with libdrm definition */ | 36 | /* Append new drm mode definition here, align with libdrm definition */ |
37 | #define DRM_MODE_SCALE_NO_SCALE 2 | 37 | #define DRM_MODE_SCALE_NO_SCALE 2 |
38 | #define DRM_MODE_CONNECTOR_MIPI 15 | ||
39 | 38 | ||
40 | enum { | 39 | enum { |
41 | CHIP_PSB_8108 = 0, /* Poulsbo */ | 40 | CHIP_PSB_8108 = 0, /* Poulsbo */ |
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 1b4afd81f87..6ea852e2516 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c | |||
@@ -70,6 +70,7 @@ | |||
70 | #include <linux/kernel.h> | 70 | #include <linux/kernel.h> |
71 | #include <linux/mm.h> | 71 | #include <linux/mm.h> |
72 | #include <linux/mman.h> | 72 | #include <linux/mman.h> |
73 | #include <linux/module.h> | ||
73 | #include <linux/workqueue.h> | 74 | #include <linux/workqueue.h> |
74 | #include <xen/balloon.h> | 75 | #include <xen/balloon.h> |
75 | #include <xen/tmem.h> | 76 | #include <xen/tmem.h> |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 54b8c28bebc..720d885e8dc 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -474,17 +474,22 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
474 | befs_data_stream *data = &befs_ino->i_data.ds; | 474 | befs_data_stream *data = &befs_ino->i_data.ds; |
475 | befs_off_t len = data->size; | 475 | befs_off_t len = data->size; |
476 | 476 | ||
477 | befs_debug(sb, "Follow long symlink"); | 477 | if (len == 0) { |
478 | 478 | befs_error(sb, "Long symlink with illegal length"); | |
479 | link = kmalloc(len, GFP_NOFS); | ||
480 | if (!link) { | ||
481 | link = ERR_PTR(-ENOMEM); | ||
482 | } else if (befs_read_lsymlink(sb, data, link, len) != len) { | ||
483 | kfree(link); | ||
484 | befs_error(sb, "Failed to read entire long symlink"); | ||
485 | link = ERR_PTR(-EIO); | 479 | link = ERR_PTR(-EIO); |
486 | } else { | 480 | } else { |
487 | link[len - 1] = '\0'; | 481 | befs_debug(sb, "Follow long symlink"); |
482 | |||
483 | link = kmalloc(len, GFP_NOFS); | ||
484 | if (!link) { | ||
485 | link = ERR_PTR(-ENOMEM); | ||
486 | } else if (befs_read_lsymlink(sb, data, link, len) != len) { | ||
487 | kfree(link); | ||
488 | befs_error(sb, "Failed to read entire long symlink"); | ||
489 | link = ERR_PTR(-EIO); | ||
490 | } else { | ||
491 | link[len - 1] = '\0'; | ||
492 | } | ||
488 | } | 493 | } |
489 | } else { | 494 | } else { |
490 | link = befs_ino->i_data.symlink; | 495 | link = befs_ino->i_data.symlink; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0469263e327..03912c5c6f4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -1415,17 +1415,15 @@ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); | |||
1415 | #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ | 1415 | #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ |
1416 | static inline u##bits btrfs_##name(struct extent_buffer *eb) \ | 1416 | static inline u##bits btrfs_##name(struct extent_buffer *eb) \ |
1417 | { \ | 1417 | { \ |
1418 | type *p = kmap_atomic(eb->first_page, KM_USER0); \ | 1418 | type *p = page_address(eb->first_page); \ |
1419 | u##bits res = le##bits##_to_cpu(p->member); \ | 1419 | u##bits res = le##bits##_to_cpu(p->member); \ |
1420 | kunmap_atomic(p, KM_USER0); \ | ||
1421 | return res; \ | 1420 | return res; \ |
1422 | } \ | 1421 | } \ |
1423 | static inline void btrfs_set_##name(struct extent_buffer *eb, \ | 1422 | static inline void btrfs_set_##name(struct extent_buffer *eb, \ |
1424 | u##bits val) \ | 1423 | u##bits val) \ |
1425 | { \ | 1424 | { \ |
1426 | type *p = kmap_atomic(eb->first_page, KM_USER0); \ | 1425 | type *p = page_address(eb->first_page); \ |
1427 | p->member = cpu_to_le##bits(val); \ | 1426 | p->member = cpu_to_le##bits(val); \ |
1428 | kunmap_atomic(p, KM_USER0); \ | ||
1429 | } | 1427 | } |
1430 | 1428 | ||
1431 | #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ | 1429 | #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ |
@@ -2367,8 +2365,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, | |||
2367 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2365 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2368 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2366 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2369 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); | 2367 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); |
2370 | int btrfs_drop_snapshot(struct btrfs_root *root, | 2368 | void btrfs_drop_snapshot(struct btrfs_root *root, |
2371 | struct btrfs_block_rsv *block_rsv, int update_ref); | 2369 | struct btrfs_block_rsv *block_rsv, int update_ref); |
2372 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | 2370 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, |
2373 | struct btrfs_root *root, | 2371 | struct btrfs_root *root, |
2374 | struct extent_buffer *node, | 2372 | struct extent_buffer *node, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 66bac226944..f5be06a2462 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1782,6 +1782,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1782 | 1782 | ||
1783 | 1783 | ||
1784 | for (i = 0; i < multi->num_stripes; i++, stripe++) { | 1784 | for (i = 0; i < multi->num_stripes; i++, stripe++) { |
1785 | if (!stripe->dev->can_discard) | ||
1786 | continue; | ||
1787 | |||
1785 | ret = btrfs_issue_discard(stripe->dev->bdev, | 1788 | ret = btrfs_issue_discard(stripe->dev->bdev, |
1786 | stripe->physical, | 1789 | stripe->physical, |
1787 | stripe->length); | 1790 | stripe->length); |
@@ -1789,11 +1792,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1789 | discarded_bytes += stripe->length; | 1792 | discarded_bytes += stripe->length; |
1790 | else if (ret != -EOPNOTSUPP) | 1793 | else if (ret != -EOPNOTSUPP) |
1791 | break; | 1794 | break; |
1795 | |||
1796 | /* | ||
1797 | * Just in case we get back EOPNOTSUPP for some reason, | ||
1798 | * just ignore the return value so we don't screw up | ||
1799 | * people calling discard_extent. | ||
1800 | */ | ||
1801 | ret = 0; | ||
1792 | } | 1802 | } |
1793 | kfree(multi); | 1803 | kfree(multi); |
1794 | } | 1804 | } |
1795 | if (discarded_bytes && ret == -EOPNOTSUPP) | ||
1796 | ret = 0; | ||
1797 | 1805 | ||
1798 | if (actual_bytes) | 1806 | if (actual_bytes) |
1799 | *actual_bytes = discarded_bytes; | 1807 | *actual_bytes = discarded_bytes; |
@@ -6269,8 +6277,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
6269 | * also make sure backrefs for the shared block and all lower level | 6277 | * also make sure backrefs for the shared block and all lower level |
6270 | * blocks are properly updated. | 6278 | * blocks are properly updated. |
6271 | */ | 6279 | */ |
6272 | int btrfs_drop_snapshot(struct btrfs_root *root, | 6280 | void btrfs_drop_snapshot(struct btrfs_root *root, |
6273 | struct btrfs_block_rsv *block_rsv, int update_ref) | 6281 | struct btrfs_block_rsv *block_rsv, int update_ref) |
6274 | { | 6282 | { |
6275 | struct btrfs_path *path; | 6283 | struct btrfs_path *path; |
6276 | struct btrfs_trans_handle *trans; | 6284 | struct btrfs_trans_handle *trans; |
@@ -6283,13 +6291,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6283 | int level; | 6291 | int level; |
6284 | 6292 | ||
6285 | path = btrfs_alloc_path(); | 6293 | path = btrfs_alloc_path(); |
6286 | if (!path) | 6294 | if (!path) { |
6287 | return -ENOMEM; | 6295 | err = -ENOMEM; |
6296 | goto out; | ||
6297 | } | ||
6288 | 6298 | ||
6289 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 6299 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
6290 | if (!wc) { | 6300 | if (!wc) { |
6291 | btrfs_free_path(path); | 6301 | btrfs_free_path(path); |
6292 | return -ENOMEM; | 6302 | err = -ENOMEM; |
6303 | goto out; | ||
6293 | } | 6304 | } |
6294 | 6305 | ||
6295 | trans = btrfs_start_transaction(tree_root, 0); | 6306 | trans = btrfs_start_transaction(tree_root, 0); |
@@ -6318,7 +6329,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6318 | path->lowest_level = 0; | 6329 | path->lowest_level = 0; |
6319 | if (ret < 0) { | 6330 | if (ret < 0) { |
6320 | err = ret; | 6331 | err = ret; |
6321 | goto out; | 6332 | goto out_free; |
6322 | } | 6333 | } |
6323 | WARN_ON(ret > 0); | 6334 | WARN_ON(ret > 0); |
6324 | 6335 | ||
@@ -6425,11 +6436,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6425 | free_extent_buffer(root->commit_root); | 6436 | free_extent_buffer(root->commit_root); |
6426 | kfree(root); | 6437 | kfree(root); |
6427 | } | 6438 | } |
6428 | out: | 6439 | out_free: |
6429 | btrfs_end_transaction_throttle(trans, tree_root); | 6440 | btrfs_end_transaction_throttle(trans, tree_root); |
6430 | kfree(wc); | 6441 | kfree(wc); |
6431 | btrfs_free_path(path); | 6442 | btrfs_free_path(path); |
6432 | return err; | 6443 | out: |
6444 | if (err) | ||
6445 | btrfs_std_error(root->fs_info, err); | ||
6446 | return; | ||
6433 | } | 6447 | } |
6434 | 6448 | ||
6435 | /* | 6449 | /* |
@@ -6720,6 +6734,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6720 | struct btrfs_space_info *space_info; | 6734 | struct btrfs_space_info *space_info; |
6721 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 6735 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
6722 | struct btrfs_device *device; | 6736 | struct btrfs_device *device; |
6737 | u64 min_free; | ||
6738 | u64 dev_min = 1; | ||
6739 | u64 dev_nr = 0; | ||
6740 | int index; | ||
6723 | int full = 0; | 6741 | int full = 0; |
6724 | int ret = 0; | 6742 | int ret = 0; |
6725 | 6743 | ||
@@ -6729,8 +6747,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6729 | if (!block_group) | 6747 | if (!block_group) |
6730 | return -1; | 6748 | return -1; |
6731 | 6749 | ||
6750 | min_free = btrfs_block_group_used(&block_group->item); | ||
6751 | |||
6732 | /* no bytes used, we're good */ | 6752 | /* no bytes used, we're good */ |
6733 | if (!btrfs_block_group_used(&block_group->item)) | 6753 | if (!min_free) |
6734 | goto out; | 6754 | goto out; |
6735 | 6755 | ||
6736 | space_info = block_group->space_info; | 6756 | space_info = block_group->space_info; |
@@ -6746,10 +6766,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6746 | * all of the extents from this block group. If we can, we're good | 6766 | * all of the extents from this block group. If we can, we're good |
6747 | */ | 6767 | */ |
6748 | if ((space_info->total_bytes != block_group->key.offset) && | 6768 | if ((space_info->total_bytes != block_group->key.offset) && |
6749 | (space_info->bytes_used + space_info->bytes_reserved + | 6769 | (space_info->bytes_used + space_info->bytes_reserved + |
6750 | space_info->bytes_pinned + space_info->bytes_readonly + | 6770 | space_info->bytes_pinned + space_info->bytes_readonly + |
6751 | btrfs_block_group_used(&block_group->item) < | 6771 | min_free < space_info->total_bytes)) { |
6752 | space_info->total_bytes)) { | ||
6753 | spin_unlock(&space_info->lock); | 6772 | spin_unlock(&space_info->lock); |
6754 | goto out; | 6773 | goto out; |
6755 | } | 6774 | } |
@@ -6766,9 +6785,31 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6766 | if (full) | 6785 | if (full) |
6767 | goto out; | 6786 | goto out; |
6768 | 6787 | ||
6788 | /* | ||
6789 | * index: | ||
6790 | * 0: raid10 | ||
6791 | * 1: raid1 | ||
6792 | * 2: dup | ||
6793 | * 3: raid0 | ||
6794 | * 4: single | ||
6795 | */ | ||
6796 | index = get_block_group_index(block_group); | ||
6797 | if (index == 0) { | ||
6798 | dev_min = 4; | ||
6799 | /* Divide by 2 */ | ||
6800 | min_free >>= 1; | ||
6801 | } else if (index == 1) { | ||
6802 | dev_min = 2; | ||
6803 | } else if (index == 2) { | ||
6804 | /* Multiply by 2 */ | ||
6805 | min_free <<= 1; | ||
6806 | } else if (index == 3) { | ||
6807 | dev_min = fs_devices->rw_devices; | ||
6808 | do_div(min_free, dev_min); | ||
6809 | } | ||
6810 | |||
6769 | mutex_lock(&root->fs_info->chunk_mutex); | 6811 | mutex_lock(&root->fs_info->chunk_mutex); |
6770 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 6812 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
6771 | u64 min_free = btrfs_block_group_used(&block_group->item); | ||
6772 | u64 dev_offset; | 6813 | u64 dev_offset; |
6773 | 6814 | ||
6774 | /* | 6815 | /* |
@@ -6779,7 +6820,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6779 | ret = find_free_dev_extent(NULL, device, min_free, | 6820 | ret = find_free_dev_extent(NULL, device, min_free, |
6780 | &dev_offset, NULL); | 6821 | &dev_offset, NULL); |
6781 | if (!ret) | 6822 | if (!ret) |
6823 | dev_nr++; | ||
6824 | |||
6825 | if (dev_nr >= dev_min) | ||
6782 | break; | 6826 | break; |
6827 | |||
6783 | ret = -1; | 6828 | ret = -1; |
6784 | } | 6829 | } |
6785 | } | 6830 | } |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 658d66959ab..e7872e485f1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -150,6 +150,8 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
150 | spin_lock(&root->fs_info->defrag_inodes_lock); | 150 | spin_lock(&root->fs_info->defrag_inodes_lock); |
151 | if (!BTRFS_I(inode)->in_defrag) | 151 | if (!BTRFS_I(inode)->in_defrag) |
152 | __btrfs_add_inode_defrag(inode, defrag); | 152 | __btrfs_add_inode_defrag(inode, defrag); |
153 | else | ||
154 | kfree(defrag); | ||
153 | spin_unlock(&root->fs_info->defrag_inodes_lock); | 155 | spin_unlock(&root->fs_info->defrag_inodes_lock); |
154 | return 0; | 156 | return 0; |
155 | } | 157 | } |
@@ -1638,11 +1640,15 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1638 | 1640 | ||
1639 | cur_offset = alloc_start; | 1641 | cur_offset = alloc_start; |
1640 | while (1) { | 1642 | while (1) { |
1643 | u64 actual_end; | ||
1644 | |||
1641 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | 1645 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, |
1642 | alloc_end - cur_offset, 0); | 1646 | alloc_end - cur_offset, 0); |
1643 | BUG_ON(IS_ERR_OR_NULL(em)); | 1647 | BUG_ON(IS_ERR_OR_NULL(em)); |
1644 | last_byte = min(extent_map_end(em), alloc_end); | 1648 | last_byte = min(extent_map_end(em), alloc_end); |
1649 | actual_end = min_t(u64, extent_map_end(em), offset + len); | ||
1645 | last_byte = (last_byte + mask) & ~mask; | 1650 | last_byte = (last_byte + mask) & ~mask; |
1651 | |||
1646 | if (em->block_start == EXTENT_MAP_HOLE || | 1652 | if (em->block_start == EXTENT_MAP_HOLE || |
1647 | (cur_offset >= inode->i_size && | 1653 | (cur_offset >= inode->i_size && |
1648 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 1654 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
@@ -1655,6 +1661,16 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1655 | free_extent_map(em); | 1661 | free_extent_map(em); |
1656 | break; | 1662 | break; |
1657 | } | 1663 | } |
1664 | } else if (actual_end > inode->i_size && | ||
1665 | !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
1666 | /* | ||
1667 | * We didn't need to allocate any more space, but we | ||
1668 | * still extended the size of the file so we need to | ||
1669 | * update i_size. | ||
1670 | */ | ||
1671 | inode->i_ctime = CURRENT_TIME; | ||
1672 | i_size_write(inode, actual_end); | ||
1673 | btrfs_ordered_update_i_size(inode, actual_end, NULL); | ||
1658 | } | 1674 | } |
1659 | free_extent_map(em); | 1675 | free_extent_map(em); |
1660 | 1676 | ||
@@ -1804,10 +1820,14 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) | |||
1804 | } | 1820 | } |
1805 | } | 1821 | } |
1806 | 1822 | ||
1807 | if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | 1823 | if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) { |
1808 | return -EINVAL; | 1824 | ret = -EINVAL; |
1809 | if (offset > inode->i_sb->s_maxbytes) | 1825 | goto out; |
1810 | return -EINVAL; | 1826 | } |
1827 | if (offset > inode->i_sb->s_maxbytes) { | ||
1828 | ret = -EINVAL; | ||
1829 | goto out; | ||
1830 | } | ||
1811 | 1831 | ||
1812 | /* Special lock needed here? */ | 1832 | /* Special lock needed here? */ |
1813 | if (offset != file->f_pos) { | 1833 | if (offset != file->f_pos) { |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6377713f639..6a265b9f85f 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -1168,9 +1168,9 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) | |||
1168 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); | 1168 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); |
1169 | } | 1169 | } |
1170 | 1170 | ||
1171 | static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, | 1171 | static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, |
1172 | struct btrfs_free_space *info, u64 offset, | 1172 | struct btrfs_free_space *info, |
1173 | u64 bytes) | 1173 | u64 offset, u64 bytes) |
1174 | { | 1174 | { |
1175 | unsigned long start, count; | 1175 | unsigned long start, count; |
1176 | 1176 | ||
@@ -1181,6 +1181,13 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, | |||
1181 | bitmap_clear(info->bitmap, start, count); | 1181 | bitmap_clear(info->bitmap, start, count); |
1182 | 1182 | ||
1183 | info->bytes -= bytes; | 1183 | info->bytes -= bytes; |
1184 | } | ||
1185 | |||
1186 | static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, | ||
1187 | struct btrfs_free_space *info, u64 offset, | ||
1188 | u64 bytes) | ||
1189 | { | ||
1190 | __bitmap_clear_bits(ctl, info, offset, bytes); | ||
1184 | ctl->free_space -= bytes; | 1191 | ctl->free_space -= bytes; |
1185 | } | 1192 | } |
1186 | 1193 | ||
@@ -1984,7 +1991,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, | |||
1984 | return 0; | 1991 | return 0; |
1985 | 1992 | ||
1986 | ret = search_start; | 1993 | ret = search_start; |
1987 | bitmap_clear_bits(ctl, entry, ret, bytes); | 1994 | __bitmap_clear_bits(ctl, entry, ret, bytes); |
1988 | 1995 | ||
1989 | return ret; | 1996 | return ret; |
1990 | } | 1997 | } |
@@ -2039,7 +2046,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
2039 | continue; | 2046 | continue; |
2040 | } | 2047 | } |
2041 | } else { | 2048 | } else { |
2042 | |||
2043 | ret = entry->offset; | 2049 | ret = entry->offset; |
2044 | 2050 | ||
2045 | entry->offset += bytes; | 2051 | entry->offset += bytes; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 15fceefbca0..0ccc7438ad3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -7354,11 +7354,15 @@ static int btrfs_set_page_dirty(struct page *page) | |||
7354 | static int btrfs_permission(struct inode *inode, int mask) | 7354 | static int btrfs_permission(struct inode *inode, int mask) |
7355 | { | 7355 | { |
7356 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7356 | struct btrfs_root *root = BTRFS_I(inode)->root; |
7357 | umode_t mode = inode->i_mode; | ||
7357 | 7358 | ||
7358 | if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) | 7359 | if (mask & MAY_WRITE && |
7359 | return -EROFS; | 7360 | (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { |
7360 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) | 7361 | if (btrfs_root_readonly(root)) |
7361 | return -EACCES; | 7362 | return -EROFS; |
7363 | if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) | ||
7364 | return -EACCES; | ||
7365 | } | ||
7362 | return generic_permission(inode, mask); | 7366 | return generic_permission(inode, mask); |
7363 | } | 7367 | } |
7364 | 7368 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7cf01334994..970977aab22 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -2236,6 +2236,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2236 | btrfs_wait_ordered_range(src, off, len); | 2236 | btrfs_wait_ordered_range(src, off, len); |
2237 | } | 2237 | } |
2238 | 2238 | ||
2239 | /* truncate page cache pages from target inode range */ | ||
2240 | truncate_inode_pages_range(&inode->i_data, off, | ||
2241 | ALIGN(off + len, PAGE_CACHE_SIZE) - 1); | ||
2242 | |||
2239 | /* clone data */ | 2243 | /* clone data */ |
2240 | key.objectid = btrfs_ino(src); | 2244 | key.objectid = btrfs_ino(src); |
2241 | key.type = BTRFS_EXTENT_DATA_KEY; | 2245 | key.type = BTRFS_EXTENT_DATA_KEY; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index babee65f8ed..786639fca06 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -799,14 +799,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | |||
799 | struct extent_buffer *eb, int slot, | 799 | struct extent_buffer *eb, int slot, |
800 | struct btrfs_key *key) | 800 | struct btrfs_key *key) |
801 | { | 801 | { |
802 | struct inode *dir; | ||
803 | int ret; | ||
804 | struct btrfs_inode_ref *ref; | 802 | struct btrfs_inode_ref *ref; |
803 | struct btrfs_dir_item *di; | ||
804 | struct inode *dir; | ||
805 | struct inode *inode; | 805 | struct inode *inode; |
806 | char *name; | ||
807 | int namelen; | ||
808 | unsigned long ref_ptr; | 806 | unsigned long ref_ptr; |
809 | unsigned long ref_end; | 807 | unsigned long ref_end; |
808 | char *name; | ||
809 | int namelen; | ||
810 | int ret; | ||
810 | int search_done = 0; | 811 | int search_done = 0; |
811 | 812 | ||
812 | /* | 813 | /* |
@@ -909,6 +910,25 @@ again: | |||
909 | } | 910 | } |
910 | btrfs_release_path(path); | 911 | btrfs_release_path(path); |
911 | 912 | ||
913 | /* look for a conflicting sequence number */ | ||
914 | di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), | ||
915 | btrfs_inode_ref_index(eb, ref), | ||
916 | name, namelen, 0); | ||
917 | if (di && !IS_ERR(di)) { | ||
918 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
919 | BUG_ON(ret); | ||
920 | } | ||
921 | btrfs_release_path(path); | ||
922 | |||
923 | /* look for a conflicing name */ | ||
924 | di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), | ||
925 | name, namelen, 0); | ||
926 | if (di && !IS_ERR(di)) { | ||
927 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
928 | BUG_ON(ret); | ||
929 | } | ||
930 | btrfs_release_path(path); | ||
931 | |||
912 | insert: | 932 | insert: |
913 | /* insert our name */ | 933 | /* insert our name */ |
914 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, | 934 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 53875ae73ad..f2a4cc79da6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -142,6 +142,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
142 | unsigned long limit; | 142 | unsigned long limit; |
143 | unsigned long last_waited = 0; | 143 | unsigned long last_waited = 0; |
144 | int force_reg = 0; | 144 | int force_reg = 0; |
145 | int sync_pending = 0; | ||
145 | struct blk_plug plug; | 146 | struct blk_plug plug; |
146 | 147 | ||
147 | /* | 148 | /* |
@@ -229,6 +230,22 @@ loop_lock: | |||
229 | 230 | ||
230 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 231 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
231 | 232 | ||
233 | /* | ||
234 | * if we're doing the sync list, record that our | ||
235 | * plug has some sync requests on it | ||
236 | * | ||
237 | * If we're doing the regular list and there are | ||
238 | * sync requests sitting around, unplug before | ||
239 | * we add more | ||
240 | */ | ||
241 | if (pending_bios == &device->pending_sync_bios) { | ||
242 | sync_pending = 1; | ||
243 | } else if (sync_pending) { | ||
244 | blk_finish_plug(&plug); | ||
245 | blk_start_plug(&plug); | ||
246 | sync_pending = 0; | ||
247 | } | ||
248 | |||
232 | submit_bio(cur->bi_rw, cur); | 249 | submit_bio(cur->bi_rw, cur); |
233 | num_run++; | 250 | num_run++; |
234 | batch_run++; | 251 | batch_run++; |
@@ -500,6 +517,9 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
500 | fs_devices->rw_devices--; | 517 | fs_devices->rw_devices--; |
501 | } | 518 | } |
502 | 519 | ||
520 | if (device->can_discard) | ||
521 | fs_devices->num_can_discard--; | ||
522 | |||
503 | new_device = kmalloc(sizeof(*new_device), GFP_NOFS); | 523 | new_device = kmalloc(sizeof(*new_device), GFP_NOFS); |
504 | BUG_ON(!new_device); | 524 | BUG_ON(!new_device); |
505 | memcpy(new_device, device, sizeof(*new_device)); | 525 | memcpy(new_device, device, sizeof(*new_device)); |
@@ -508,6 +528,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
508 | new_device->bdev = NULL; | 528 | new_device->bdev = NULL; |
509 | new_device->writeable = 0; | 529 | new_device->writeable = 0; |
510 | new_device->in_fs_metadata = 0; | 530 | new_device->in_fs_metadata = 0; |
531 | new_device->can_discard = 0; | ||
511 | list_replace_rcu(&device->dev_list, &new_device->dev_list); | 532 | list_replace_rcu(&device->dev_list, &new_device->dev_list); |
512 | 533 | ||
513 | call_rcu(&device->rcu, free_device); | 534 | call_rcu(&device->rcu, free_device); |
@@ -547,6 +568,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
547 | static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | 568 | static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, |
548 | fmode_t flags, void *holder) | 569 | fmode_t flags, void *holder) |
549 | { | 570 | { |
571 | struct request_queue *q; | ||
550 | struct block_device *bdev; | 572 | struct block_device *bdev; |
551 | struct list_head *head = &fs_devices->devices; | 573 | struct list_head *head = &fs_devices->devices; |
552 | struct btrfs_device *device; | 574 | struct btrfs_device *device; |
@@ -603,6 +625,12 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
603 | seeding = 0; | 625 | seeding = 0; |
604 | } | 626 | } |
605 | 627 | ||
628 | q = bdev_get_queue(bdev); | ||
629 | if (blk_queue_discard(q)) { | ||
630 | device->can_discard = 1; | ||
631 | fs_devices->num_can_discard++; | ||
632 | } | ||
633 | |||
606 | device->bdev = bdev; | 634 | device->bdev = bdev; |
607 | device->in_fs_metadata = 0; | 635 | device->in_fs_metadata = 0; |
608 | device->mode = flags; | 636 | device->mode = flags; |
@@ -835,6 +863,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
835 | 863 | ||
836 | max_hole_start = search_start; | 864 | max_hole_start = search_start; |
837 | max_hole_size = 0; | 865 | max_hole_size = 0; |
866 | hole_size = 0; | ||
838 | 867 | ||
839 | if (search_start >= search_end) { | 868 | if (search_start >= search_end) { |
840 | ret = -ENOSPC; | 869 | ret = -ENOSPC; |
@@ -917,7 +946,14 @@ next: | |||
917 | cond_resched(); | 946 | cond_resched(); |
918 | } | 947 | } |
919 | 948 | ||
920 | hole_size = search_end- search_start; | 949 | /* |
950 | * At this point, search_start should be the end of | ||
951 | * allocated dev extents, and when shrinking the device, | ||
952 | * search_end may be smaller than search_start. | ||
953 | */ | ||
954 | if (search_end > search_start) | ||
955 | hole_size = search_end - search_start; | ||
956 | |||
921 | if (hole_size > max_hole_size) { | 957 | if (hole_size > max_hole_size) { |
922 | max_hole_start = search_start; | 958 | max_hole_start = search_start; |
923 | max_hole_size = hole_size; | 959 | max_hole_size = hole_size; |
@@ -1543,6 +1579,7 @@ error: | |||
1543 | 1579 | ||
1544 | int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | 1580 | int btrfs_init_new_device(struct btrfs_root *root, char *device_path) |
1545 | { | 1581 | { |
1582 | struct request_queue *q; | ||
1546 | struct btrfs_trans_handle *trans; | 1583 | struct btrfs_trans_handle *trans; |
1547 | struct btrfs_device *device; | 1584 | struct btrfs_device *device; |
1548 | struct block_device *bdev; | 1585 | struct block_device *bdev; |
@@ -1612,6 +1649,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1612 | 1649 | ||
1613 | lock_chunks(root); | 1650 | lock_chunks(root); |
1614 | 1651 | ||
1652 | q = bdev_get_queue(bdev); | ||
1653 | if (blk_queue_discard(q)) | ||
1654 | device->can_discard = 1; | ||
1615 | device->writeable = 1; | 1655 | device->writeable = 1; |
1616 | device->work.func = pending_bios_fn; | 1656 | device->work.func = pending_bios_fn; |
1617 | generate_random_uuid(device->uuid); | 1657 | generate_random_uuid(device->uuid); |
@@ -1647,6 +1687,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1647 | root->fs_info->fs_devices->num_devices++; | 1687 | root->fs_info->fs_devices->num_devices++; |
1648 | root->fs_info->fs_devices->open_devices++; | 1688 | root->fs_info->fs_devices->open_devices++; |
1649 | root->fs_info->fs_devices->rw_devices++; | 1689 | root->fs_info->fs_devices->rw_devices++; |
1690 | if (device->can_discard) | ||
1691 | root->fs_info->fs_devices->num_can_discard++; | ||
1650 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; | 1692 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; |
1651 | 1693 | ||
1652 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) | 1694 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) |
@@ -2413,9 +2455,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2413 | total_avail = device->total_bytes - device->bytes_used; | 2455 | total_avail = device->total_bytes - device->bytes_used; |
2414 | else | 2456 | else |
2415 | total_avail = 0; | 2457 | total_avail = 0; |
2416 | /* avail is off by max(alloc_start, 1MB), but that is the same | 2458 | |
2417 | * for all devices, so it doesn't hurt the sorting later on | 2459 | /* If there is no space on this device, skip it. */ |
2418 | */ | 2460 | if (total_avail == 0) |
2461 | continue; | ||
2419 | 2462 | ||
2420 | ret = find_free_dev_extent(trans, device, | 2463 | ret = find_free_dev_extent(trans, device, |
2421 | max_stripe_size * dev_stripes, | 2464 | max_stripe_size * dev_stripes, |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7c12d61ae7a..6d866db4e17 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -48,6 +48,7 @@ struct btrfs_device { | |||
48 | int writeable; | 48 | int writeable; |
49 | int in_fs_metadata; | 49 | int in_fs_metadata; |
50 | int missing; | 50 | int missing; |
51 | int can_discard; | ||
51 | 52 | ||
52 | spinlock_t io_lock; | 53 | spinlock_t io_lock; |
53 | 54 | ||
@@ -104,6 +105,7 @@ struct btrfs_fs_devices { | |||
104 | u64 rw_devices; | 105 | u64 rw_devices; |
105 | u64 missing_devices; | 106 | u64 missing_devices; |
106 | u64 total_rw_bytes; | 107 | u64 total_rw_bytes; |
108 | u64 num_can_discard; | ||
107 | struct block_device *latest_bdev; | 109 | struct block_device *latest_bdev; |
108 | 110 | ||
109 | /* all of the devices in the FS, protected by a mutex | 111 | /* all of the devices in the FS, protected by a mutex |
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 2fe3cf13b2e..6d40656e1e2 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c | |||
@@ -176,7 +176,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) | |||
176 | 176 | ||
177 | #ifdef CONFIG_CIFS_STATS2 | 177 | #ifdef CONFIG_CIFS_STATS2 |
178 | seq_printf(m, " In Send: %d In MaxReq Wait: %d", | 178 | seq_printf(m, " In Send: %d In MaxReq Wait: %d", |
179 | atomic_read(&server->inSend), | 179 | atomic_read(&server->in_send), |
180 | atomic_read(&server->num_waiters)); | 180 | atomic_read(&server->num_waiters)); |
181 | #endif | 181 | #endif |
182 | 182 | ||
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 21de1d6d584..d0f59faefb7 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -991,24 +991,6 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, | |||
991 | return pntsd; | 991 | return pntsd; |
992 | } | 992 | } |
993 | 993 | ||
994 | static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid, | ||
995 | struct cifs_ntsd *pnntsd, u32 acllen) | ||
996 | { | ||
997 | int xid, rc; | ||
998 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); | ||
999 | |||
1000 | if (IS_ERR(tlink)) | ||
1001 | return PTR_ERR(tlink); | ||
1002 | |||
1003 | xid = GetXid(); | ||
1004 | rc = CIFSSMBSetCIFSACL(xid, tlink_tcon(tlink), fid, pnntsd, acllen); | ||
1005 | FreeXid(xid); | ||
1006 | cifs_put_tlink(tlink); | ||
1007 | |||
1008 | cFYI(DBG2, "SetCIFSACL rc = %d", rc); | ||
1009 | return rc; | ||
1010 | } | ||
1011 | |||
1012 | static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, | 994 | static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, |
1013 | struct cifs_ntsd *pnntsd, u32 acllen) | 995 | struct cifs_ntsd *pnntsd, u32 acllen) |
1014 | { | 996 | { |
@@ -1047,18 +1029,10 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, | |||
1047 | struct inode *inode, const char *path) | 1029 | struct inode *inode, const char *path) |
1048 | { | 1030 | { |
1049 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 1031 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
1050 | struct cifsFileInfo *open_file; | ||
1051 | int rc; | ||
1052 | 1032 | ||
1053 | cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode); | 1033 | cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode); |
1054 | 1034 | ||
1055 | open_file = find_readable_file(CIFS_I(inode), true); | 1035 | return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); |
1056 | if (!open_file) | ||
1057 | return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); | ||
1058 | |||
1059 | rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen); | ||
1060 | cifsFileInfo_put(open_file); | ||
1061 | return rc; | ||
1062 | } | 1036 | } |
1063 | 1037 | ||
1064 | /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ | 1038 | /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index cb71dc1f94d..95da8027983 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -125,5 +125,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); | |||
125 | extern const struct export_operations cifs_export_ops; | 125 | extern const struct export_operations cifs_export_ops; |
126 | #endif /* CIFS_NFSD_EXPORT */ | 126 | #endif /* CIFS_NFSD_EXPORT */ |
127 | 127 | ||
128 | #define CIFS_VERSION "1.74" | 128 | #define CIFS_VERSION "1.75" |
129 | #endif /* _CIFSFS_H */ | 129 | #endif /* _CIFSFS_H */ |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 38ce6d44b14..95dad9d14cf 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -291,7 +291,7 @@ struct TCP_Server_Info { | |||
291 | struct fscache_cookie *fscache; /* client index cache cookie */ | 291 | struct fscache_cookie *fscache; /* client index cache cookie */ |
292 | #endif | 292 | #endif |
293 | #ifdef CONFIG_CIFS_STATS2 | 293 | #ifdef CONFIG_CIFS_STATS2 |
294 | atomic_t inSend; /* requests trying to send */ | 294 | atomic_t in_send; /* requests trying to send */ |
295 | atomic_t num_waiters; /* blocked waiting to get in sendrecv */ | 295 | atomic_t num_waiters; /* blocked waiting to get in sendrecv */ |
296 | #endif | 296 | #endif |
297 | }; | 297 | }; |
@@ -672,12 +672,54 @@ struct mid_q_entry { | |||
672 | bool multiEnd:1; /* both received */ | 672 | bool multiEnd:1; /* both received */ |
673 | }; | 673 | }; |
674 | 674 | ||
675 | struct oplock_q_entry { | 675 | /* Make code in transport.c a little cleaner by moving |
676 | struct list_head qhead; | 676 | update of optional stats into function below */ |
677 | struct inode *pinode; | 677 | #ifdef CONFIG_CIFS_STATS2 |
678 | struct cifs_tcon *tcon; | 678 | |
679 | __u16 netfid; | 679 | static inline void cifs_in_send_inc(struct TCP_Server_Info *server) |
680 | }; | 680 | { |
681 | atomic_inc(&server->in_send); | ||
682 | } | ||
683 | |||
684 | static inline void cifs_in_send_dec(struct TCP_Server_Info *server) | ||
685 | { | ||
686 | atomic_dec(&server->in_send); | ||
687 | } | ||
688 | |||
689 | static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server) | ||
690 | { | ||
691 | atomic_inc(&server->num_waiters); | ||
692 | } | ||
693 | |||
694 | static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server) | ||
695 | { | ||
696 | atomic_dec(&server->num_waiters); | ||
697 | } | ||
698 | |||
699 | static inline void cifs_save_when_sent(struct mid_q_entry *mid) | ||
700 | { | ||
701 | mid->when_sent = jiffies; | ||
702 | } | ||
703 | #else | ||
704 | static inline void cifs_in_send_inc(struct TCP_Server_Info *server) | ||
705 | { | ||
706 | } | ||
707 | static inline void cifs_in_send_dec(struct TCP_Server_Info *server) | ||
708 | { | ||
709 | } | ||
710 | |||
711 | static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server) | ||
712 | { | ||
713 | } | ||
714 | |||
715 | static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server) | ||
716 | { | ||
717 | } | ||
718 | |||
719 | static inline void cifs_save_when_sent(struct mid_q_entry *mid) | ||
720 | { | ||
721 | } | ||
722 | #endif | ||
681 | 723 | ||
682 | /* for pending dnotify requests */ | 724 | /* for pending dnotify requests */ |
683 | struct dir_notify_req { | 725 | struct dir_notify_req { |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 80c2e3add3a..633c246b677 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -2878,7 +2878,8 @@ cleanup_volume_info_contents(struct smb_vol *volume_info) | |||
2878 | kfree(volume_info->username); | 2878 | kfree(volume_info->username); |
2879 | kzfree(volume_info->password); | 2879 | kzfree(volume_info->password); |
2880 | kfree(volume_info->UNC); | 2880 | kfree(volume_info->UNC); |
2881 | kfree(volume_info->UNCip); | 2881 | if (volume_info->UNCip != volume_info->UNC + 2) |
2882 | kfree(volume_info->UNCip); | ||
2882 | kfree(volume_info->domainname); | 2883 | kfree(volume_info->domainname); |
2883 | kfree(volume_info->iocharset); | 2884 | kfree(volume_info->iocharset); |
2884 | kfree(volume_info->prepath); | 2885 | kfree(volume_info->prepath); |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index ae576fbb514..72d448bf96c 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -105,8 +105,8 @@ cifs_bp_rename_retry: | |||
105 | } | 105 | } |
106 | rcu_read_unlock(); | 106 | rcu_read_unlock(); |
107 | if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) { | 107 | if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) { |
108 | cERROR(1, "did not end path lookup where expected namelen is %d", | 108 | cFYI(1, "did not end path lookup where expected. namelen=%d " |
109 | namelen); | 109 | "dfsplen=%d", namelen, dfsplen); |
110 | /* presumably this is only possible if racing with a rename | 110 | /* presumably this is only possible if racing with a rename |
111 | of one of the parent directories (we can not lock the dentries | 111 | of one of the parent directories (we can not lock the dentries |
112 | above us to prevent this, but retrying should be harmless) */ | 112 | above us to prevent this, but retrying should be harmless) */ |
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index c1b9c4b1073..10ca6b2c26b 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -266,15 +266,11 @@ static int wait_for_free_request(struct TCP_Server_Info *server, | |||
266 | while (1) { | 266 | while (1) { |
267 | if (atomic_read(&server->inFlight) >= cifs_max_pending) { | 267 | if (atomic_read(&server->inFlight) >= cifs_max_pending) { |
268 | spin_unlock(&GlobalMid_Lock); | 268 | spin_unlock(&GlobalMid_Lock); |
269 | #ifdef CONFIG_CIFS_STATS2 | 269 | cifs_num_waiters_inc(server); |
270 | atomic_inc(&server->num_waiters); | ||
271 | #endif | ||
272 | wait_event(server->request_q, | 270 | wait_event(server->request_q, |
273 | atomic_read(&server->inFlight) | 271 | atomic_read(&server->inFlight) |
274 | < cifs_max_pending); | 272 | < cifs_max_pending); |
275 | #ifdef CONFIG_CIFS_STATS2 | 273 | cifs_num_waiters_dec(server); |
276 | atomic_dec(&server->num_waiters); | ||
277 | #endif | ||
278 | spin_lock(&GlobalMid_Lock); | 274 | spin_lock(&GlobalMid_Lock); |
279 | } else { | 275 | } else { |
280 | if (server->tcpStatus == CifsExiting) { | 276 | if (server->tcpStatus == CifsExiting) { |
@@ -381,15 +377,13 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, | |||
381 | mid->callback = callback; | 377 | mid->callback = callback; |
382 | mid->callback_data = cbdata; | 378 | mid->callback_data = cbdata; |
383 | mid->midState = MID_REQUEST_SUBMITTED; | 379 | mid->midState = MID_REQUEST_SUBMITTED; |
384 | #ifdef CONFIG_CIFS_STATS2 | 380 | |
385 | atomic_inc(&server->inSend); | 381 | cifs_in_send_inc(server); |
386 | #endif | ||
387 | rc = smb_sendv(server, iov, nvec); | 382 | rc = smb_sendv(server, iov, nvec); |
388 | #ifdef CONFIG_CIFS_STATS2 | 383 | cifs_in_send_dec(server); |
389 | atomic_dec(&server->inSend); | 384 | cifs_save_when_sent(mid); |
390 | mid->when_sent = jiffies; | ||
391 | #endif | ||
392 | mutex_unlock(&server->srv_mutex); | 385 | mutex_unlock(&server->srv_mutex); |
386 | |||
393 | if (rc) | 387 | if (rc) |
394 | goto out_err; | 388 | goto out_err; |
395 | 389 | ||
@@ -575,14 +569,10 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, | |||
575 | } | 569 | } |
576 | 570 | ||
577 | midQ->midState = MID_REQUEST_SUBMITTED; | 571 | midQ->midState = MID_REQUEST_SUBMITTED; |
578 | #ifdef CONFIG_CIFS_STATS2 | 572 | cifs_in_send_inc(ses->server); |
579 | atomic_inc(&ses->server->inSend); | ||
580 | #endif | ||
581 | rc = smb_sendv(ses->server, iov, n_vec); | 573 | rc = smb_sendv(ses->server, iov, n_vec); |
582 | #ifdef CONFIG_CIFS_STATS2 | 574 | cifs_in_send_dec(ses->server); |
583 | atomic_dec(&ses->server->inSend); | 575 | cifs_save_when_sent(midQ); |
584 | midQ->when_sent = jiffies; | ||
585 | #endif | ||
586 | 576 | ||
587 | mutex_unlock(&ses->server->srv_mutex); | 577 | mutex_unlock(&ses->server->srv_mutex); |
588 | 578 | ||
@@ -703,14 +693,11 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, | |||
703 | } | 693 | } |
704 | 694 | ||
705 | midQ->midState = MID_REQUEST_SUBMITTED; | 695 | midQ->midState = MID_REQUEST_SUBMITTED; |
706 | #ifdef CONFIG_CIFS_STATS2 | 696 | |
707 | atomic_inc(&ses->server->inSend); | 697 | cifs_in_send_inc(ses->server); |
708 | #endif | ||
709 | rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); | 698 | rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); |
710 | #ifdef CONFIG_CIFS_STATS2 | 699 | cifs_in_send_dec(ses->server); |
711 | atomic_dec(&ses->server->inSend); | 700 | cifs_save_when_sent(midQ); |
712 | midQ->when_sent = jiffies; | ||
713 | #endif | ||
714 | mutex_unlock(&ses->server->srv_mutex); | 701 | mutex_unlock(&ses->server->srv_mutex); |
715 | 702 | ||
716 | if (rc < 0) | 703 | if (rc < 0) |
@@ -843,14 +830,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, | |||
843 | } | 830 | } |
844 | 831 | ||
845 | midQ->midState = MID_REQUEST_SUBMITTED; | 832 | midQ->midState = MID_REQUEST_SUBMITTED; |
846 | #ifdef CONFIG_CIFS_STATS2 | 833 | cifs_in_send_inc(ses->server); |
847 | atomic_inc(&ses->server->inSend); | ||
848 | #endif | ||
849 | rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); | 834 | rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); |
850 | #ifdef CONFIG_CIFS_STATS2 | 835 | cifs_in_send_dec(ses->server); |
851 | atomic_dec(&ses->server->inSend); | 836 | cifs_save_when_sent(midQ); |
852 | midQ->when_sent = jiffies; | ||
853 | #endif | ||
854 | mutex_unlock(&ses->server->srv_mutex); | 837 | mutex_unlock(&ses->server->srv_mutex); |
855 | 838 | ||
856 | if (rc < 0) { | 839 | if (rc < 0) { |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index bb85757689b..5802fa1dab1 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -289,10 +289,10 @@ static inline int ext4_should_order_data(struct inode *inode) | |||
289 | 289 | ||
290 | static inline int ext4_should_writeback_data(struct inode *inode) | 290 | static inline int ext4_should_writeback_data(struct inode *inode) |
291 | { | 291 | { |
292 | if (!S_ISREG(inode->i_mode)) | ||
293 | return 0; | ||
294 | if (EXT4_JOURNAL(inode) == NULL) | 292 | if (EXT4_JOURNAL(inode) == NULL) |
295 | return 1; | 293 | return 1; |
294 | if (!S_ISREG(inode->i_mode)) | ||
295 | return 0; | ||
296 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) | 296 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) |
297 | return 0; | 297 | return 0; |
298 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | 298 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index b8602cde5b5..0962642119c 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -800,12 +800,17 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
800 | } | 800 | } |
801 | 801 | ||
802 | retry: | 802 | retry: |
803 | if (rw == READ && ext4_should_dioread_nolock(inode)) | 803 | if (rw == READ && ext4_should_dioread_nolock(inode)) { |
804 | if (unlikely(!list_empty(&ei->i_completed_io_list))) { | ||
805 | mutex_lock(&inode->i_mutex); | ||
806 | ext4_flush_completed_IO(inode); | ||
807 | mutex_unlock(&inode->i_mutex); | ||
808 | } | ||
804 | ret = __blockdev_direct_IO(rw, iocb, inode, | 809 | ret = __blockdev_direct_IO(rw, iocb, inode, |
805 | inode->i_sb->s_bdev, iov, | 810 | inode->i_sb->s_bdev, iov, |
806 | offset, nr_segs, | 811 | offset, nr_segs, |
807 | ext4_get_block, NULL, NULL, 0); | 812 | ext4_get_block, NULL, NULL, 0); |
808 | else { | 813 | } else { |
809 | ret = blockdev_direct_IO(rw, iocb, inode, iov, | 814 | ret = blockdev_direct_IO(rw, iocb, inode, iov, |
810 | offset, nr_segs, ext4_get_block); | 815 | offset, nr_segs, ext4_get_block); |
811 | 816 | ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d47264cafee..c4da98a959a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -120,6 +120,12 @@ void ext4_evict_inode(struct inode *inode) | |||
120 | int err; | 120 | int err; |
121 | 121 | ||
122 | trace_ext4_evict_inode(inode); | 122 | trace_ext4_evict_inode(inode); |
123 | |||
124 | mutex_lock(&inode->i_mutex); | ||
125 | ext4_flush_completed_IO(inode); | ||
126 | mutex_unlock(&inode->i_mutex); | ||
127 | ext4_ioend_wait(inode); | ||
128 | |||
123 | if (inode->i_nlink) { | 129 | if (inode->i_nlink) { |
124 | /* | 130 | /* |
125 | * When journalling data dirty buffers are tracked only in the | 131 | * When journalling data dirty buffers are tracked only in the |
@@ -983,6 +989,8 @@ static int ext4_journalled_write_end(struct file *file, | |||
983 | from = pos & (PAGE_CACHE_SIZE - 1); | 989 | from = pos & (PAGE_CACHE_SIZE - 1); |
984 | to = from + len; | 990 | to = from + len; |
985 | 991 | ||
992 | BUG_ON(!ext4_handle_valid(handle)); | ||
993 | |||
986 | if (copied < len) { | 994 | if (copied < len) { |
987 | if (!PageUptodate(page)) | 995 | if (!PageUptodate(page)) |
988 | copied = 0; | 996 | copied = 0; |
@@ -1283,7 +1291,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
1283 | else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) | 1291 | else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) |
1284 | err = ext4_bio_write_page(&io_submit, page, | 1292 | err = ext4_bio_write_page(&io_submit, page, |
1285 | len, mpd->wbc); | 1293 | len, mpd->wbc); |
1286 | else | 1294 | else if (buffer_uninit(page_bufs)) { |
1295 | ext4_set_bh_endio(page_bufs, inode); | ||
1296 | err = block_write_full_page_endio(page, | ||
1297 | noalloc_get_block_write, | ||
1298 | mpd->wbc, ext4_end_io_buffer_write); | ||
1299 | } else | ||
1287 | err = block_write_full_page(page, | 1300 | err = block_write_full_page(page, |
1288 | noalloc_get_block_write, mpd->wbc); | 1301 | noalloc_get_block_write, mpd->wbc); |
1289 | 1302 | ||
@@ -1699,6 +1712,8 @@ static int __ext4_journalled_writepage(struct page *page, | |||
1699 | goto out; | 1712 | goto out; |
1700 | } | 1713 | } |
1701 | 1714 | ||
1715 | BUG_ON(!ext4_handle_valid(handle)); | ||
1716 | |||
1702 | ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, | 1717 | ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, |
1703 | do_journal_get_write_access); | 1718 | do_journal_get_write_access); |
1704 | 1719 | ||
@@ -2668,8 +2683,15 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
2668 | goto out; | 2683 | goto out; |
2669 | } | 2684 | } |
2670 | 2685 | ||
2671 | io_end->flag = EXT4_IO_END_UNWRITTEN; | 2686 | /* |
2687 | * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now, | ||
2688 | * but being more careful is always safe for the future change. | ||
2689 | */ | ||
2672 | inode = io_end->inode; | 2690 | inode = io_end->inode; |
2691 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
2692 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | ||
2693 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | ||
2694 | } | ||
2673 | 2695 | ||
2674 | /* Add the io_end to per-inode completed io list*/ | 2696 | /* Add the io_end to per-inode completed io list*/ |
2675 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | 2697 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 430c401d089..78839af7ce2 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -334,8 +334,10 @@ submit_and_retry: | |||
334 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && | 334 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && |
335 | (io_end->pages[io_end->num_io_pages-1] != io_page)) | 335 | (io_end->pages[io_end->num_io_pages-1] != io_page)) |
336 | goto submit_and_retry; | 336 | goto submit_and_retry; |
337 | if (buffer_uninit(bh)) | 337 | if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
338 | io->io_end->flag |= EXT4_IO_END_UNWRITTEN; | 338 | io_end->flag |= EXT4_IO_END_UNWRITTEN; |
339 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | ||
340 | } | ||
339 | io->io_end->size += bh->b_size; | 341 | io->io_end->size += bh->b_size; |
340 | io->io_next_block++; | 342 | io->io_next_block++; |
341 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | 343 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4687fea0c00..44d0c8db223 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -919,7 +919,6 @@ static void ext4_i_callback(struct rcu_head *head) | |||
919 | 919 | ||
920 | static void ext4_destroy_inode(struct inode *inode) | 920 | static void ext4_destroy_inode(struct inode *inode) |
921 | { | 921 | { |
922 | ext4_ioend_wait(inode); | ||
923 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { | 922 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { |
924 | ext4_msg(inode->i_sb, KERN_ERR, | 923 | ext4_msg(inode->i_sb, KERN_ERR, |
925 | "Inode %lu (%p): orphan list check failed!", | 924 | "Inode %lu (%p): orphan list check failed!", |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 4ad64732cbc..5efbd5d7701 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
@@ -1231,7 +1231,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots, | |||
1231 | struct super_block *sb = dir->i_sb; | 1231 | struct super_block *sb = dir->i_sb; |
1232 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | 1232 | struct msdos_sb_info *sbi = MSDOS_SB(sb); |
1233 | struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */ | 1233 | struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */ |
1234 | struct msdos_dir_entry *de; | 1234 | struct msdos_dir_entry *uninitialized_var(de); |
1235 | int err, free_slots, i, nr_bhs; | 1235 | int err, free_slots, i, nr_bhs; |
1236 | loff_t pos, i_pos; | 1236 | loff_t pos, i_pos; |
1237 | 1237 | ||
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5942fec22c6..1726d730304 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -1188,9 +1188,9 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, | |||
1188 | out: | 1188 | out: |
1189 | /* UTF-8 doesn't provide FAT semantics */ | 1189 | /* UTF-8 doesn't provide FAT semantics */ |
1190 | if (!strcmp(opts->iocharset, "utf8")) { | 1190 | if (!strcmp(opts->iocharset, "utf8")) { |
1191 | fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset" | 1191 | fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset" |
1192 | " for FAT filesystems, filesystem will be " | 1192 | " for FAT filesystems, filesystem will be " |
1193 | "case sensitive!\n"); | 1193 | "case sensitive!"); |
1194 | } | 1194 | } |
1195 | 1195 | ||
1196 | /* If user doesn't specify allow_utime, it's initialized from dmask. */ | 1196 | /* If user doesn't specify allow_utime, it's initialized from dmask. */ |
@@ -1367,6 +1367,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, | |||
1367 | sbi->free_clusters = -1; /* Don't know yet */ | 1367 | sbi->free_clusters = -1; /* Don't know yet */ |
1368 | sbi->free_clus_valid = 0; | 1368 | sbi->free_clus_valid = 0; |
1369 | sbi->prev_free = FAT_START_ENT; | 1369 | sbi->prev_free = FAT_START_ENT; |
1370 | sb->s_maxbytes = 0xffffffff; | ||
1370 | 1371 | ||
1371 | if (!sbi->fat_length && b->fat32_length) { | 1372 | if (!sbi->fat_length && b->fat32_length) { |
1372 | struct fat_boot_fsinfo *fsinfo; | 1373 | struct fat_boot_fsinfo *fsinfo; |
@@ -1377,8 +1378,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, | |||
1377 | sbi->fat_length = le32_to_cpu(b->fat32_length); | 1378 | sbi->fat_length = le32_to_cpu(b->fat32_length); |
1378 | sbi->root_cluster = le32_to_cpu(b->root_cluster); | 1379 | sbi->root_cluster = le32_to_cpu(b->root_cluster); |
1379 | 1380 | ||
1380 | sb->s_maxbytes = 0xffffffff; | ||
1381 | |||
1382 | /* MC - if info_sector is 0, don't multiply by 0 */ | 1381 | /* MC - if info_sector is 0, don't multiply by 0 */ |
1383 | sbi->fsinfo_sector = le16_to_cpu(b->info_sector); | 1382 | sbi->fsinfo_sector = le16_to_cpu(b->info_sector); |
1384 | if (sbi->fsinfo_sector == 0) | 1383 | if (sbi->fsinfo_sector == 0) |
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index adcf92d3b60..7971f37534a 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c | |||
@@ -68,7 +68,7 @@ int jfs_umount(struct super_block *sb) | |||
68 | /* | 68 | /* |
69 | * Wait for outstanding transactions to be written to log: | 69 | * Wait for outstanding transactions to be written to log: |
70 | */ | 70 | */ |
71 | jfs_flush_journal(log, 1); | 71 | jfs_flush_journal(log, 2); |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * close fileset inode allocation map (aka fileset inode) | 74 | * close fileset inode allocation map (aka fileset inode) |
@@ -146,7 +146,7 @@ int jfs_umount_rw(struct super_block *sb) | |||
146 | * | 146 | * |
147 | * remove file system from log active file system list. | 147 | * remove file system from log active file system list. |
148 | */ | 148 | */ |
149 | jfs_flush_journal(log, 1); | 149 | jfs_flush_journal(log, 2); |
150 | 150 | ||
151 | /* | 151 | /* |
152 | * Make sure all metadata makes it to disk | 152 | * Make sure all metadata makes it to disk |
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index e56564d2ef9..9561c8fc8bd 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/namei.h> | 36 | #include <linux/namei.h> |
37 | #include <linux/bio.h> /* struct bio */ | 37 | #include <linux/bio.h> /* struct bio */ |
38 | #include <linux/buffer_head.h> /* various write calls */ | 38 | #include <linux/buffer_head.h> /* various write calls */ |
39 | #include <linux/prefetch.h> | ||
39 | 40 | ||
40 | #include "blocklayout.h" | 41 | #include "blocklayout.h" |
41 | 42 | ||
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index b257383bb56..07df5f1d85e 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h | |||
@@ -38,6 +38,7 @@ enum nfs4_callback_opnum { | |||
38 | struct cb_process_state { | 38 | struct cb_process_state { |
39 | __be32 drc_status; | 39 | __be32 drc_status; |
40 | struct nfs_client *clp; | 40 | struct nfs_client *clp; |
41 | int slotid; | ||
41 | }; | 42 | }; |
42 | 43 | ||
43 | struct cb_compound_hdr_arg { | 44 | struct cb_compound_hdr_arg { |
@@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall( | |||
166 | void *dummy, struct cb_process_state *cps); | 167 | void *dummy, struct cb_process_state *cps); |
167 | 168 | ||
168 | extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); | 169 | extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); |
169 | extern void nfs4_cb_take_slot(struct nfs_client *clp); | ||
170 | 170 | ||
171 | struct cb_devicenotifyitem { | 171 | struct cb_devicenotifyitem { |
172 | uint32_t cbd_notify_type; | 172 | uint32_t cbd_notify_type; |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 74780f9f852..43926add945 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -348,7 +348,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) | |||
348 | /* Normal */ | 348 | /* Normal */ |
349 | if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { | 349 | if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { |
350 | slot->seq_nr++; | 350 | slot->seq_nr++; |
351 | return htonl(NFS4_OK); | 351 | goto out_ok; |
352 | } | 352 | } |
353 | 353 | ||
354 | /* Replay */ | 354 | /* Replay */ |
@@ -367,11 +367,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) | |||
367 | /* Wraparound */ | 367 | /* Wraparound */ |
368 | if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { | 368 | if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { |
369 | slot->seq_nr = 1; | 369 | slot->seq_nr = 1; |
370 | return htonl(NFS4_OK); | 370 | goto out_ok; |
371 | } | 371 | } |
372 | 372 | ||
373 | /* Misordered request */ | 373 | /* Misordered request */ |
374 | return htonl(NFS4ERR_SEQ_MISORDERED); | 374 | return htonl(NFS4ERR_SEQ_MISORDERED); |
375 | out_ok: | ||
376 | tbl->highest_used_slotid = args->csa_slotid; | ||
377 | return htonl(NFS4_OK); | ||
375 | } | 378 | } |
376 | 379 | ||
377 | /* | 380 | /* |
@@ -433,26 +436,37 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
433 | struct cb_sequenceres *res, | 436 | struct cb_sequenceres *res, |
434 | struct cb_process_state *cps) | 437 | struct cb_process_state *cps) |
435 | { | 438 | { |
439 | struct nfs4_slot_table *tbl; | ||
436 | struct nfs_client *clp; | 440 | struct nfs_client *clp; |
437 | int i; | 441 | int i; |
438 | __be32 status = htonl(NFS4ERR_BADSESSION); | 442 | __be32 status = htonl(NFS4ERR_BADSESSION); |
439 | 443 | ||
440 | cps->clp = NULL; | ||
441 | |||
442 | clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); | 444 | clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); |
443 | if (clp == NULL) | 445 | if (clp == NULL) |
444 | goto out; | 446 | goto out; |
445 | 447 | ||
448 | tbl = &clp->cl_session->bc_slot_table; | ||
449 | |||
450 | spin_lock(&tbl->slot_tbl_lock); | ||
446 | /* state manager is resetting the session */ | 451 | /* state manager is resetting the session */ |
447 | if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { | 452 | if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { |
448 | status = NFS4ERR_DELAY; | 453 | spin_unlock(&tbl->slot_tbl_lock); |
454 | status = htonl(NFS4ERR_DELAY); | ||
455 | /* Return NFS4ERR_BADSESSION if we're draining the session | ||
456 | * in order to reset it. | ||
457 | */ | ||
458 | if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) | ||
459 | status = htonl(NFS4ERR_BADSESSION); | ||
449 | goto out; | 460 | goto out; |
450 | } | 461 | } |
451 | 462 | ||
452 | status = validate_seqid(&clp->cl_session->bc_slot_table, args); | 463 | status = validate_seqid(&clp->cl_session->bc_slot_table, args); |
464 | spin_unlock(&tbl->slot_tbl_lock); | ||
453 | if (status) | 465 | if (status) |
454 | goto out; | 466 | goto out; |
455 | 467 | ||
468 | cps->slotid = args->csa_slotid; | ||
469 | |||
456 | /* | 470 | /* |
457 | * Check for pending referring calls. If a match is found, a | 471 | * Check for pending referring calls. If a match is found, a |
458 | * related callback was received before the response to the original | 472 | * related callback was received before the response to the original |
@@ -469,7 +483,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
469 | res->csr_slotid = args->csa_slotid; | 483 | res->csr_slotid = args->csa_slotid; |
470 | res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; | 484 | res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; |
471 | res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; | 485 | res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; |
472 | nfs4_cb_take_slot(clp); | ||
473 | 486 | ||
474 | out: | 487 | out: |
475 | cps->clp = clp; /* put in nfs4_callback_compound */ | 488 | cps->clp = clp; /* put in nfs4_callback_compound */ |
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index c6c86a77e04..918ad647afe 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
@@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) | |||
754 | * Let the state manager know callback processing done. | 754 | * Let the state manager know callback processing done. |
755 | * A single slot, so highest used slotid is either 0 or -1 | 755 | * A single slot, so highest used slotid is either 0 or -1 |
756 | */ | 756 | */ |
757 | tbl->highest_used_slotid--; | 757 | tbl->highest_used_slotid = -1; |
758 | nfs4_check_drain_bc_complete(session); | 758 | nfs4_check_drain_bc_complete(session); |
759 | spin_unlock(&tbl->slot_tbl_lock); | 759 | spin_unlock(&tbl->slot_tbl_lock); |
760 | } | 760 | } |
761 | 761 | ||
762 | static void nfs4_cb_free_slot(struct nfs_client *clp) | 762 | static void nfs4_cb_free_slot(struct cb_process_state *cps) |
763 | { | 763 | { |
764 | if (clp && clp->cl_session) | 764 | if (cps->slotid != -1) |
765 | nfs4_callback_free_slot(clp->cl_session); | 765 | nfs4_callback_free_slot(cps->clp->cl_session); |
766 | } | ||
767 | |||
768 | /* A single slot, so highest used slotid is either 0 or -1 */ | ||
769 | void nfs4_cb_take_slot(struct nfs_client *clp) | ||
770 | { | ||
771 | struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table; | ||
772 | |||
773 | spin_lock(&tbl->slot_tbl_lock); | ||
774 | tbl->highest_used_slotid++; | ||
775 | BUG_ON(tbl->highest_used_slotid != 0); | ||
776 | spin_unlock(&tbl->slot_tbl_lock); | ||
777 | } | 766 | } |
778 | 767 | ||
779 | #else /* CONFIG_NFS_V4_1 */ | 768 | #else /* CONFIG_NFS_V4_1 */ |
@@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) | |||
784 | return htonl(NFS4ERR_MINOR_VERS_MISMATCH); | 773 | return htonl(NFS4ERR_MINOR_VERS_MISMATCH); |
785 | } | 774 | } |
786 | 775 | ||
787 | static void nfs4_cb_free_slot(struct nfs_client *clp) | 776 | static void nfs4_cb_free_slot(struct cb_process_state *cps) |
788 | { | 777 | { |
789 | } | 778 | } |
790 | #endif /* CONFIG_NFS_V4_1 */ | 779 | #endif /* CONFIG_NFS_V4_1 */ |
@@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r | |||
866 | struct cb_process_state cps = { | 855 | struct cb_process_state cps = { |
867 | .drc_status = 0, | 856 | .drc_status = 0, |
868 | .clp = NULL, | 857 | .clp = NULL, |
858 | .slotid = -1, | ||
869 | }; | 859 | }; |
870 | unsigned int nops = 0; | 860 | unsigned int nops = 0; |
871 | 861 | ||
@@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r | |||
906 | 896 | ||
907 | *hdr_res.status = status; | 897 | *hdr_res.status = status; |
908 | *hdr_res.nops = htonl(nops); | 898 | *hdr_res.nops = htonl(nops); |
909 | nfs4_cb_free_slot(cps.clp); | 899 | nfs4_cb_free_slot(&cps); |
910 | nfs_put_client(cps.clp); | 900 | nfs_put_client(cps.clp); |
911 | dprintk("%s: done, status = %u\n", __func__, ntohl(status)); | 901 | dprintk("%s: done, status = %u\n", __func__, ntohl(status)); |
912 | return rpc_success; | 902 | return rpc_success; |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 9383ca7245b..d0cda12fddc 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -479,7 +479,6 @@ static int _io_check(struct objio_state *ios, bool is_write) | |||
479 | for (i = 0; i < ios->numdevs; i++) { | 479 | for (i = 0; i < ios->numdevs; i++) { |
480 | struct osd_sense_info osi; | 480 | struct osd_sense_info osi; |
481 | struct osd_request *or = ios->per_dev[i].or; | 481 | struct osd_request *or = ios->per_dev[i].or; |
482 | unsigned dev; | ||
483 | int ret; | 482 | int ret; |
484 | 483 | ||
485 | if (!or) | 484 | if (!or) |
@@ -500,9 +499,8 @@ static int _io_check(struct objio_state *ios, bool is_write) | |||
500 | 499 | ||
501 | continue; /* we recovered */ | 500 | continue; /* we recovered */ |
502 | } | 501 | } |
503 | dev = ios->per_dev[i].dev; | 502 | objlayout_io_set_result(&ios->ol_state, i, |
504 | objlayout_io_set_result(&ios->ol_state, dev, | 503 | &ios->layout->comps[i].oc_object_id, |
505 | &ios->layout->comps[dev].oc_object_id, | ||
506 | osd_pri_2_pnfs_err(osi.osd_err_pri), | 504 | osd_pri_2_pnfs_err(osi.osd_err_pri), |
507 | ios->per_dev[i].offset, | 505 | ios->per_dev[i].offset, |
508 | ios->per_dev[i].length, | 506 | ios->per_dev[i].length, |
@@ -589,22 +587,19 @@ static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, | |||
589 | } | 587 | } |
590 | 588 | ||
591 | static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, | 589 | static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, |
592 | unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, | 590 | unsigned pgbase, struct _objio_per_comp *per_dev, int len, |
593 | gfp_t gfp_flags) | 591 | gfp_t gfp_flags) |
594 | { | 592 | { |
595 | unsigned pg = *cur_pg; | 593 | unsigned pg = *cur_pg; |
594 | int cur_len = len; | ||
596 | struct request_queue *q = | 595 | struct request_queue *q = |
597 | osd_request_queue(_io_od(ios, per_dev->dev)); | 596 | osd_request_queue(_io_od(ios, per_dev->dev)); |
598 | 597 | ||
599 | per_dev->length += cur_len; | ||
600 | |||
601 | if (per_dev->bio == NULL) { | 598 | if (per_dev->bio == NULL) { |
602 | unsigned stripes = ios->layout->num_comps / | 599 | unsigned pages_in_stripe = ios->layout->group_width * |
603 | ios->layout->mirrors_p1; | ||
604 | unsigned pages_in_stripe = stripes * | ||
605 | (ios->layout->stripe_unit / PAGE_SIZE); | 600 | (ios->layout->stripe_unit / PAGE_SIZE); |
606 | unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / | 601 | unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / |
607 | stripes; | 602 | ios->layout->group_width; |
608 | 603 | ||
609 | if (BIO_MAX_PAGES_KMALLOC < bio_size) | 604 | if (BIO_MAX_PAGES_KMALLOC < bio_size) |
610 | bio_size = BIO_MAX_PAGES_KMALLOC; | 605 | bio_size = BIO_MAX_PAGES_KMALLOC; |
@@ -632,6 +627,7 @@ static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, | |||
632 | } | 627 | } |
633 | BUG_ON(cur_len); | 628 | BUG_ON(cur_len); |
634 | 629 | ||
630 | per_dev->length += len; | ||
635 | *cur_pg = pg; | 631 | *cur_pg = pg; |
636 | return 0; | 632 | return 0; |
637 | } | 633 | } |
@@ -650,7 +646,7 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, | |||
650 | int ret = 0; | 646 | int ret = 0; |
651 | 647 | ||
652 | while (length) { | 648 | while (length) { |
653 | struct _objio_per_comp *per_dev = &ios->per_dev[dev]; | 649 | struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev]; |
654 | unsigned cur_len, page_off = 0; | 650 | unsigned cur_len, page_off = 0; |
655 | 651 | ||
656 | if (!per_dev->length) { | 652 | if (!per_dev->length) { |
@@ -670,8 +666,8 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, | |||
670 | cur_len = stripe_unit; | 666 | cur_len = stripe_unit; |
671 | } | 667 | } |
672 | 668 | ||
673 | if (max_comp < dev) | 669 | if (max_comp < dev - first_dev) |
674 | max_comp = dev; | 670 | max_comp = dev - first_dev; |
675 | } else { | 671 | } else { |
676 | cur_len = stripe_unit; | 672 | cur_len = stripe_unit; |
677 | } | 673 | } |
@@ -806,7 +802,7 @@ static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) | |||
806 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; | 802 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; |
807 | unsigned dev = per_dev->dev; | 803 | unsigned dev = per_dev->dev; |
808 | struct pnfs_osd_object_cred *cred = | 804 | struct pnfs_osd_object_cred *cred = |
809 | &ios->layout->comps[dev]; | 805 | &ios->layout->comps[cur_comp]; |
810 | struct osd_obj_id obj = { | 806 | struct osd_obj_id obj = { |
811 | .partition = cred->oc_object_id.oid_partition_id, | 807 | .partition = cred->oc_object_id.oid_partition_id, |
812 | .id = cred->oc_object_id.oid_object_id, | 808 | .id = cred->oc_object_id.oid_object_id, |
@@ -904,7 +900,7 @@ static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) | |||
904 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { | 900 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { |
905 | struct osd_request *or = NULL; | 901 | struct osd_request *or = NULL; |
906 | struct pnfs_osd_object_cred *cred = | 902 | struct pnfs_osd_object_cred *cred = |
907 | &ios->layout->comps[dev]; | 903 | &ios->layout->comps[cur_comp]; |
908 | struct osd_obj_id obj = { | 904 | struct osd_obj_id obj = { |
909 | .partition = cred->oc_object_id.oid_partition_id, | 905 | .partition = cred->oc_object_id.oid_partition_id, |
910 | .id = cred->oc_object_id.oid_object_id, | 906 | .id = cred->oc_object_id.oid_object_id, |
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c index 16fc758e912..b3918f7ac34 100644 --- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c +++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c | |||
@@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, | |||
170 | p = _osd_xdr_decode_data_map(p, &layout->olo_map); | 170 | p = _osd_xdr_decode_data_map(p, &layout->olo_map); |
171 | layout->olo_comps_index = be32_to_cpup(p++); | 171 | layout->olo_comps_index = be32_to_cpup(p++); |
172 | layout->olo_num_comps = be32_to_cpup(p++); | 172 | layout->olo_num_comps = be32_to_cpup(p++); |
173 | dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__, | ||
174 | layout->olo_comps_index, layout->olo_num_comps); | ||
175 | |||
173 | iter->total_comps = layout->olo_num_comps; | 176 | iter->total_comps = layout->olo_num_comps; |
174 | return 0; | 177 | return 0; |
175 | } | 178 | } |
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index fb2d63f13f4..aea9e45efce 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h | |||
@@ -39,7 +39,7 @@ | |||
39 | }) | 39 | }) |
40 | 40 | ||
41 | #define __page_to_pfn(pg) \ | 41 | #define __page_to_pfn(pg) \ |
42 | ({ struct page *__pg = (pg); \ | 42 | ({ const struct page *__pg = (pg); \ |
43 | struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg)); \ | 43 | struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg)); \ |
44 | (unsigned long)(__pg - __pgdat->node_mem_map) + \ | 44 | (unsigned long)(__pg - __pgdat->node_mem_map) + \ |
45 | __pgdat->node_start_pfn; \ | 45 | __pgdat->node_start_pfn; \ |
@@ -57,7 +57,7 @@ | |||
57 | * section[i].section_mem_map == mem_map's address - start_pfn; | 57 | * section[i].section_mem_map == mem_map's address - start_pfn; |
58 | */ | 58 | */ |
59 | #define __page_to_pfn(pg) \ | 59 | #define __page_to_pfn(pg) \ |
60 | ({ struct page *__pg = (pg); \ | 60 | ({ const struct page *__pg = (pg); \ |
61 | int __sec = page_to_section(__pg); \ | 61 | int __sec = page_to_section(__pg); \ |
62 | (unsigned long)(__pg - __section_mem_map_addr(__nr_to_section(__sec))); \ | 62 | (unsigned long)(__pg - __section_mem_map_addr(__nr_to_section(__sec))); \ |
63 | }) | 63 | }) |
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6395692b2e7..32f0076e844 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -125,7 +125,11 @@ enum rq_flag_bits { | |||
125 | __REQ_SYNC, /* request is sync (sync write or read) */ | 125 | __REQ_SYNC, /* request is sync (sync write or read) */ |
126 | __REQ_META, /* metadata io request */ | 126 | __REQ_META, /* metadata io request */ |
127 | __REQ_DISCARD, /* request to discard sectors */ | 127 | __REQ_DISCARD, /* request to discard sectors */ |
128 | __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ | ||
129 | |||
128 | __REQ_NOIDLE, /* don't anticipate more IO after this one */ | 130 | __REQ_NOIDLE, /* don't anticipate more IO after this one */ |
131 | __REQ_FUA, /* forced unit access */ | ||
132 | __REQ_FLUSH, /* request for cache flush */ | ||
129 | 133 | ||
130 | /* bio only flags */ | 134 | /* bio only flags */ |
131 | __REQ_RAHEAD, /* read ahead, can fail anytime */ | 135 | __REQ_RAHEAD, /* read ahead, can fail anytime */ |
@@ -135,7 +139,6 @@ enum rq_flag_bits { | |||
135 | /* request only flags */ | 139 | /* request only flags */ |
136 | __REQ_SORTED, /* elevator knows about this request */ | 140 | __REQ_SORTED, /* elevator knows about this request */ |
137 | __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ | 141 | __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ |
138 | __REQ_FUA, /* forced unit access */ | ||
139 | __REQ_NOMERGE, /* don't touch this for merging */ | 142 | __REQ_NOMERGE, /* don't touch this for merging */ |
140 | __REQ_STARTED, /* drive already may have started this one */ | 143 | __REQ_STARTED, /* drive already may have started this one */ |
141 | __REQ_DONTPREP, /* don't call prep for this one */ | 144 | __REQ_DONTPREP, /* don't call prep for this one */ |
@@ -146,11 +149,9 @@ enum rq_flag_bits { | |||
146 | __REQ_PREEMPT, /* set for "ide_preempt" requests */ | 149 | __REQ_PREEMPT, /* set for "ide_preempt" requests */ |
147 | __REQ_ALLOCED, /* request came from our alloc pool */ | 150 | __REQ_ALLOCED, /* request came from our alloc pool */ |
148 | __REQ_COPY_USER, /* contains copies of user pages */ | 151 | __REQ_COPY_USER, /* contains copies of user pages */ |
149 | __REQ_FLUSH, /* request for cache flush */ | ||
150 | __REQ_FLUSH_SEQ, /* request for flush sequence */ | 152 | __REQ_FLUSH_SEQ, /* request for flush sequence */ |
151 | __REQ_IO_STAT, /* account I/O stat */ | 153 | __REQ_IO_STAT, /* account I/O stat */ |
152 | __REQ_MIXED_MERGE, /* merge of different types, fail separately */ | 154 | __REQ_MIXED_MERGE, /* merge of different types, fail separately */ |
153 | __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ | ||
154 | __REQ_NR_BITS, /* stops here */ | 155 | __REQ_NR_BITS, /* stops here */ |
155 | }; | 156 | }; |
156 | 157 | ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0e67c45b3bc..84b15d54f8c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -30,6 +30,7 @@ struct request_pm_state; | |||
30 | struct blk_trace; | 30 | struct blk_trace; |
31 | struct request; | 31 | struct request; |
32 | struct sg_io_hdr; | 32 | struct sg_io_hdr; |
33 | struct bsg_job; | ||
33 | 34 | ||
34 | #define BLKDEV_MIN_RQ 4 | 35 | #define BLKDEV_MIN_RQ 4 |
35 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ | 36 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ |
@@ -117,6 +118,7 @@ struct request { | |||
117 | struct { | 118 | struct { |
118 | unsigned int seq; | 119 | unsigned int seq; |
119 | struct list_head list; | 120 | struct list_head list; |
121 | rq_end_io_fn *saved_end_io; | ||
120 | } flush; | 122 | } flush; |
121 | }; | 123 | }; |
122 | 124 | ||
@@ -209,6 +211,7 @@ typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, | |||
209 | typedef void (softirq_done_fn)(struct request *); | 211 | typedef void (softirq_done_fn)(struct request *); |
210 | typedef int (dma_drain_needed_fn)(struct request *); | 212 | typedef int (dma_drain_needed_fn)(struct request *); |
211 | typedef int (lld_busy_fn) (struct request_queue *q); | 213 | typedef int (lld_busy_fn) (struct request_queue *q); |
214 | typedef int (bsg_job_fn) (struct bsg_job *); | ||
212 | 215 | ||
213 | enum blk_eh_timer_return { | 216 | enum blk_eh_timer_return { |
214 | BLK_EH_NOT_HANDLED, | 217 | BLK_EH_NOT_HANDLED, |
@@ -375,6 +378,8 @@ struct request_queue { | |||
375 | struct mutex sysfs_lock; | 378 | struct mutex sysfs_lock; |
376 | 379 | ||
377 | #if defined(CONFIG_BLK_DEV_BSG) | 380 | #if defined(CONFIG_BLK_DEV_BSG) |
381 | bsg_job_fn *bsg_job_fn; | ||
382 | int bsg_job_size; | ||
378 | struct bsg_class_device bsg_dev; | 383 | struct bsg_class_device bsg_dev; |
379 | #endif | 384 | #endif |
380 | 385 | ||
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 8c7c2de7631..8e9e4bc6d73 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h | |||
@@ -14,7 +14,7 @@ | |||
14 | enum blktrace_cat { | 14 | enum blktrace_cat { |
15 | BLK_TC_READ = 1 << 0, /* reads */ | 15 | BLK_TC_READ = 1 << 0, /* reads */ |
16 | BLK_TC_WRITE = 1 << 1, /* writes */ | 16 | BLK_TC_WRITE = 1 << 1, /* writes */ |
17 | BLK_TC_BARRIER = 1 << 2, /* barrier */ | 17 | BLK_TC_FLUSH = 1 << 2, /* flush */ |
18 | BLK_TC_SYNC = 1 << 3, /* sync IO */ | 18 | BLK_TC_SYNC = 1 << 3, /* sync IO */ |
19 | BLK_TC_SYNCIO = BLK_TC_SYNC, | 19 | BLK_TC_SYNCIO = BLK_TC_SYNC, |
20 | BLK_TC_QUEUE = 1 << 4, /* queueing/merging */ | 20 | BLK_TC_QUEUE = 1 << 4, /* queueing/merging */ |
@@ -28,8 +28,9 @@ enum blktrace_cat { | |||
28 | BLK_TC_META = 1 << 12, /* metadata */ | 28 | BLK_TC_META = 1 << 12, /* metadata */ |
29 | BLK_TC_DISCARD = 1 << 13, /* discard requests */ | 29 | BLK_TC_DISCARD = 1 << 13, /* discard requests */ |
30 | BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */ | 30 | BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */ |
31 | BLK_TC_FUA = 1 << 15, /* fua requests */ | ||
31 | 32 | ||
32 | BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ | 33 | BLK_TC_END = 1 << 15, /* we've run out of bits! */ |
33 | }; | 34 | }; |
34 | 35 | ||
35 | #define BLK_TC_SHIFT (16) | 36 | #define BLK_TC_SHIFT (16) |
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h new file mode 100644 index 00000000000..f55ab8cdc10 --- /dev/null +++ b/include/linux/bsg-lib.h | |||
@@ -0,0 +1,73 @@ | |||
1 | /* | ||
2 | * BSG helper library | ||
3 | * | ||
4 | * Copyright (C) 2008 James Smart, Emulex Corporation | ||
5 | * Copyright (C) 2011 Red Hat, Inc. All rights reserved. | ||
6 | * Copyright (C) 2011 Mike Christie | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | * | ||
22 | */ | ||
23 | #ifndef _BLK_BSG_ | ||
24 | #define _BLK_BSG_ | ||
25 | |||
26 | #include <linux/blkdev.h> | ||
27 | |||
28 | struct request; | ||
29 | struct device; | ||
30 | struct scatterlist; | ||
31 | struct request_queue; | ||
32 | |||
33 | struct bsg_buffer { | ||
34 | unsigned int payload_len; | ||
35 | int sg_cnt; | ||
36 | struct scatterlist *sg_list; | ||
37 | }; | ||
38 | |||
39 | struct bsg_job { | ||
40 | struct device *dev; | ||
41 | struct request *req; | ||
42 | |||
43 | /* Transport/driver specific request/reply structs */ | ||
44 | void *request; | ||
45 | void *reply; | ||
46 | |||
47 | unsigned int request_len; | ||
48 | unsigned int reply_len; | ||
49 | /* | ||
50 | * On entry : reply_len indicates the buffer size allocated for | ||
51 | * the reply. | ||
52 | * | ||
53 | * Upon completion : the message handler must set reply_len | ||
54 | * to indicates the size of the reply to be returned to the | ||
55 | * caller. | ||
56 | */ | ||
57 | |||
58 | /* DMA payloads for the request/response */ | ||
59 | struct bsg_buffer request_payload; | ||
60 | struct bsg_buffer reply_payload; | ||
61 | |||
62 | void *dd_data; /* Used for driver-specific storage */ | ||
63 | }; | ||
64 | |||
65 | void bsg_job_done(struct bsg_job *job, int result, | ||
66 | unsigned int reply_payload_rcv_len); | ||
67 | int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name, | ||
68 | bsg_job_fn *job_fn, int dd_job_size); | ||
69 | void bsg_request_fn(struct request_queue *q); | ||
70 | void bsg_remove_queue(struct request_queue *q); | ||
71 | void bsg_goose_queue(struct request_queue *q); | ||
72 | |||
73 | #endif | ||
diff --git a/include/linux/hash.h b/include/linux/hash.h index 06d25c189cc..b80506bdd73 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h | |||
@@ -63,7 +63,7 @@ static inline u32 hash_32(u32 val, unsigned int bits) | |||
63 | return hash >> (32 - bits); | 63 | return hash >> (32 - bits); |
64 | } | 64 | } |
65 | 65 | ||
66 | static inline unsigned long hash_ptr(void *ptr, unsigned int bits) | 66 | static inline unsigned long hash_ptr(const void *ptr, unsigned int bits) |
67 | { | 67 | { |
68 | return hash_long((unsigned long)ptr, bits); | 68 | return hash_long((unsigned long)ptr, bits); |
69 | } | 69 | } |
diff --git a/include/linux/irq.h b/include/linux/irq.h index 87a06f345bd..59517300a31 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
24 | #include <linux/topology.h> | 24 | #include <linux/topology.h> |
25 | #include <linux/wait.h> | 25 | #include <linux/wait.h> |
26 | #include <linux/module.h> | ||
26 | 27 | ||
27 | #include <asm/irq.h> | 28 | #include <asm/irq.h> |
28 | #include <asm/ptrace.h> | 29 | #include <asm/ptrace.h> |
@@ -547,7 +548,15 @@ static inline struct msi_desc *irq_data_get_msi(struct irq_data *d) | |||
547 | return d->msi_desc; | 548 | return d->msi_desc; |
548 | } | 549 | } |
549 | 550 | ||
550 | int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node); | 551 | int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, |
552 | struct module *owner); | ||
553 | |||
554 | static inline int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, | ||
555 | int node) | ||
556 | { | ||
557 | return __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE); | ||
558 | } | ||
559 | |||
551 | void irq_free_descs(unsigned int irq, unsigned int cnt); | 560 | void irq_free_descs(unsigned int irq, unsigned int cnt); |
552 | int irq_reserve_irqs(unsigned int from, unsigned int cnt); | 561 | int irq_reserve_irqs(unsigned int from, unsigned int cnt); |
553 | 562 | ||
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 2d921b35212..150134ac709 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h | |||
@@ -66,6 +66,7 @@ struct irq_desc { | |||
66 | #ifdef CONFIG_PROC_FS | 66 | #ifdef CONFIG_PROC_FS |
67 | struct proc_dir_entry *dir; | 67 | struct proc_dir_entry *dir; |
68 | #endif | 68 | #endif |
69 | struct module *owner; | ||
69 | const char *name; | 70 | const char *name; |
70 | } ____cacheline_internodealigned_in_smp; | 71 | } ____cacheline_internodealigned_in_smp; |
71 | 72 | ||
diff --git a/include/linux/loop.h b/include/linux/loop.h index 66c194e2d9b..683d6989011 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h | |||
@@ -64,7 +64,6 @@ struct loop_device { | |||
64 | 64 | ||
65 | struct request_queue *lo_queue; | 65 | struct request_queue *lo_queue; |
66 | struct gendisk *lo_disk; | 66 | struct gendisk *lo_disk; |
67 | struct list_head lo_list; | ||
68 | }; | 67 | }; |
69 | 68 | ||
70 | #endif /* __KERNEL__ */ | 69 | #endif /* __KERNEL__ */ |
@@ -161,4 +160,8 @@ int loop_unregister_transfer(int number); | |||
161 | #define LOOP_CHANGE_FD 0x4C06 | 160 | #define LOOP_CHANGE_FD 0x4C06 |
162 | #define LOOP_SET_CAPACITY 0x4C07 | 161 | #define LOOP_SET_CAPACITY 0x4C07 |
163 | 162 | ||
163 | /* /dev/loop-control interface */ | ||
164 | #define LOOP_CTL_ADD 0x4C80 | ||
165 | #define LOOP_CTL_REMOVE 0x4C81 | ||
166 | #define LOOP_CTL_GET_FREE 0x4C82 | ||
164 | #endif | 167 | #endif |
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 18fd13028ba..c309b1ecdc1 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h | |||
@@ -40,6 +40,7 @@ | |||
40 | #define BTRFS_MINOR 234 | 40 | #define BTRFS_MINOR 234 |
41 | #define AUTOFS_MINOR 235 | 41 | #define AUTOFS_MINOR 235 |
42 | #define MAPPER_CTRL_MINOR 236 | 42 | #define MAPPER_CTRL_MINOR 236 |
43 | #define LOOP_CTRL_MINOR 237 | ||
43 | #define MISC_DYNAMIC_MINOR 255 | 44 | #define MISC_DYNAMIC_MINOR 255 |
44 | 45 | ||
45 | struct device; | 46 | struct device; |
diff --git a/include/linux/mm.h b/include/linux/mm.h index fd599f4bb84..7438071b44a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -685,7 +685,7 @@ static inline void set_page_section(struct page *page, unsigned long section) | |||
685 | page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; | 685 | page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; |
686 | } | 686 | } |
687 | 687 | ||
688 | static inline unsigned long page_to_section(struct page *page) | 688 | static inline unsigned long page_to_section(const struct page *page) |
689 | { | 689 | { |
690 | return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; | 690 | return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; |
691 | } | 691 | } |
@@ -720,7 +720,7 @@ static inline void set_page_links(struct page *page, enum zone_type zone, | |||
720 | 720 | ||
721 | static __always_inline void *lowmem_page_address(const struct page *page) | 721 | static __always_inline void *lowmem_page_address(const struct page *page) |
722 | { | 722 | { |
723 | return __va(PFN_PHYS(page_to_pfn((struct page *)page))); | 723 | return __va(PFN_PHYS(page_to_pfn(page))); |
724 | } | 724 | } |
725 | 725 | ||
726 | #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) | 726 | #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) |
@@ -737,7 +737,7 @@ static __always_inline void *lowmem_page_address(const struct page *page) | |||
737 | #endif | 737 | #endif |
738 | 738 | ||
739 | #if defined(HASHED_PAGE_VIRTUAL) | 739 | #if defined(HASHED_PAGE_VIRTUAL) |
740 | void *page_address(struct page *page); | 740 | void *page_address(const struct page *page); |
741 | void set_page_address(struct page *page, void *virtual); | 741 | void set_page_address(struct page *page, void *virtual); |
742 | void page_address_init(void); | 742 | void page_address_init(void); |
743 | #endif | 743 | #endif |
diff --git a/include/linux/pci.h b/include/linux/pci.h index f27893b3b72..8c230cbcbb4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h | |||
@@ -251,7 +251,8 @@ struct pci_dev { | |||
251 | u8 revision; /* PCI revision, low byte of class word */ | 251 | u8 revision; /* PCI revision, low byte of class word */ |
252 | u8 hdr_type; /* PCI header type (`multi' flag masked out) */ | 252 | u8 hdr_type; /* PCI header type (`multi' flag masked out) */ |
253 | u8 pcie_cap; /* PCI-E capability offset */ | 253 | u8 pcie_cap; /* PCI-E capability offset */ |
254 | u8 pcie_type; /* PCI-E device/port type */ | 254 | u8 pcie_type:4; /* PCI-E device/port type */ |
255 | u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */ | ||
255 | u8 rom_base_reg; /* which config register controls the ROM */ | 256 | u8 rom_base_reg; /* which config register controls the ROM */ |
256 | u8 pin; /* which interrupt pin this device uses */ | 257 | u8 pin; /* which interrupt pin this device uses */ |
257 | 258 | ||
@@ -617,6 +618,16 @@ struct pci_driver { | |||
617 | /* these external functions are only available when PCI support is enabled */ | 618 | /* these external functions are only available when PCI support is enabled */ |
618 | #ifdef CONFIG_PCI | 619 | #ifdef CONFIG_PCI |
619 | 620 | ||
621 | extern void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss); | ||
622 | |||
623 | enum pcie_bus_config_types { | ||
624 | PCIE_BUS_PERFORMANCE, | ||
625 | PCIE_BUS_SAFE, | ||
626 | PCIE_BUS_PEER2PEER, | ||
627 | }; | ||
628 | |||
629 | extern enum pcie_bus_config_types pcie_bus_config; | ||
630 | |||
620 | extern struct bus_type pci_bus_type; | 631 | extern struct bus_type pci_bus_type; |
621 | 632 | ||
622 | /* Do NOT directly access these two variables, unless you are arch specific pci | 633 | /* Do NOT directly access these two variables, unless you are arch specific pci |
@@ -796,10 +807,13 @@ int pcix_get_mmrbc(struct pci_dev *dev); | |||
796 | int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); | 807 | int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); |
797 | int pcie_get_readrq(struct pci_dev *dev); | 808 | int pcie_get_readrq(struct pci_dev *dev); |
798 | int pcie_set_readrq(struct pci_dev *dev, int rq); | 809 | int pcie_set_readrq(struct pci_dev *dev, int rq); |
810 | int pcie_get_mps(struct pci_dev *dev); | ||
811 | int pcie_set_mps(struct pci_dev *dev, int mps); | ||
799 | int __pci_reset_function(struct pci_dev *dev); | 812 | int __pci_reset_function(struct pci_dev *dev); |
800 | int pci_reset_function(struct pci_dev *dev); | 813 | int pci_reset_function(struct pci_dev *dev); |
801 | void pci_update_resource(struct pci_dev *dev, int resno); | 814 | void pci_update_resource(struct pci_dev *dev, int resno); |
802 | int __must_check pci_assign_resource(struct pci_dev *dev, int i); | 815 | int __must_check pci_assign_resource(struct pci_dev *dev, int i); |
816 | int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); | ||
803 | int pci_select_bars(struct pci_dev *dev, unsigned long flags); | 817 | int pci_select_bars(struct pci_dev *dev, unsigned long flags); |
804 | 818 | ||
805 | /* ROM control related routines */ | 819 | /* ROM control related routines */ |
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 21097cb086f..f9ec1736a11 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h | |||
@@ -72,8 +72,6 @@ extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, | |||
72 | extern void pm_genpd_init(struct generic_pm_domain *genpd, | 72 | extern void pm_genpd_init(struct generic_pm_domain *genpd, |
73 | struct dev_power_governor *gov, bool is_off); | 73 | struct dev_power_governor *gov, bool is_off); |
74 | extern int pm_genpd_poweron(struct generic_pm_domain *genpd); | 74 | extern int pm_genpd_poweron(struct generic_pm_domain *genpd); |
75 | extern void pm_genpd_poweroff_unused(void); | ||
76 | extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); | ||
77 | #else | 75 | #else |
78 | static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, | 76 | static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, |
79 | struct device *dev) | 77 | struct device *dev) |
@@ -101,8 +99,14 @@ static inline int pm_genpd_poweron(struct generic_pm_domain *genpd) | |||
101 | { | 99 | { |
102 | return -ENOSYS; | 100 | return -ENOSYS; |
103 | } | 101 | } |
104 | static inline void pm_genpd_poweroff_unused(void) {} | 102 | #endif |
103 | |||
104 | #ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME | ||
105 | extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); | ||
106 | extern void pm_genpd_poweroff_unused(void); | ||
107 | #else | ||
105 | static inline void genpd_queue_power_off_work(struct generic_pm_domain *gpd) {} | 108 | static inline void genpd_queue_power_off_work(struct generic_pm_domain *gpd) {} |
109 | static inline void pm_genpd_poweroff_unused(void) {} | ||
106 | #endif | 110 | #endif |
107 | 111 | ||
108 | #endif /* _LINUX_PM_DOMAIN_H */ | 112 | #endif /* _LINUX_PM_DOMAIN_H */ |
diff --git a/include/linux/rtc.h b/include/linux/rtc.h index b27ebea2566..93f4d035076 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h | |||
@@ -97,6 +97,9 @@ struct rtc_pll_info { | |||
97 | #define RTC_AF 0x20 /* Alarm interrupt */ | 97 | #define RTC_AF 0x20 /* Alarm interrupt */ |
98 | #define RTC_UF 0x10 /* Update interrupt for 1Hz RTC */ | 98 | #define RTC_UF 0x10 /* Update interrupt for 1Hz RTC */ |
99 | 99 | ||
100 | |||
101 | #define RTC_MAX_FREQ 8192 | ||
102 | |||
100 | #ifdef __KERNEL__ | 103 | #ifdef __KERNEL__ |
101 | 104 | ||
102 | #include <linux/types.h> | 105 | #include <linux/types.h> |
diff --git a/include/trace/events/block.h b/include/trace/events/block.h index bf366547da2..05c5e61f0a7 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h | |||
@@ -8,6 +8,8 @@ | |||
8 | #include <linux/blkdev.h> | 8 | #include <linux/blkdev.h> |
9 | #include <linux/tracepoint.h> | 9 | #include <linux/tracepoint.h> |
10 | 10 | ||
11 | #define RWBS_LEN 8 | ||
12 | |||
11 | DECLARE_EVENT_CLASS(block_rq_with_error, | 13 | DECLARE_EVENT_CLASS(block_rq_with_error, |
12 | 14 | ||
13 | TP_PROTO(struct request_queue *q, struct request *rq), | 15 | TP_PROTO(struct request_queue *q, struct request *rq), |
@@ -19,7 +21,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error, | |||
19 | __field( sector_t, sector ) | 21 | __field( sector_t, sector ) |
20 | __field( unsigned int, nr_sector ) | 22 | __field( unsigned int, nr_sector ) |
21 | __field( int, errors ) | 23 | __field( int, errors ) |
22 | __array( char, rwbs, 6 ) | 24 | __array( char, rwbs, RWBS_LEN ) |
23 | __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) | 25 | __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) |
24 | ), | 26 | ), |
25 | 27 | ||
@@ -104,7 +106,7 @@ DECLARE_EVENT_CLASS(block_rq, | |||
104 | __field( sector_t, sector ) | 106 | __field( sector_t, sector ) |
105 | __field( unsigned int, nr_sector ) | 107 | __field( unsigned int, nr_sector ) |
106 | __field( unsigned int, bytes ) | 108 | __field( unsigned int, bytes ) |
107 | __array( char, rwbs, 6 ) | 109 | __array( char, rwbs, RWBS_LEN ) |
108 | __array( char, comm, TASK_COMM_LEN ) | 110 | __array( char, comm, TASK_COMM_LEN ) |
109 | __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) | 111 | __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) |
110 | ), | 112 | ), |
@@ -183,7 +185,7 @@ TRACE_EVENT(block_bio_bounce, | |||
183 | __field( dev_t, dev ) | 185 | __field( dev_t, dev ) |
184 | __field( sector_t, sector ) | 186 | __field( sector_t, sector ) |
185 | __field( unsigned int, nr_sector ) | 187 | __field( unsigned int, nr_sector ) |
186 | __array( char, rwbs, 6 ) | 188 | __array( char, rwbs, RWBS_LEN ) |
187 | __array( char, comm, TASK_COMM_LEN ) | 189 | __array( char, comm, TASK_COMM_LEN ) |
188 | ), | 190 | ), |
189 | 191 | ||
@@ -222,7 +224,7 @@ TRACE_EVENT(block_bio_complete, | |||
222 | __field( sector_t, sector ) | 224 | __field( sector_t, sector ) |
223 | __field( unsigned, nr_sector ) | 225 | __field( unsigned, nr_sector ) |
224 | __field( int, error ) | 226 | __field( int, error ) |
225 | __array( char, rwbs, 6 ) | 227 | __array( char, rwbs, RWBS_LEN) |
226 | ), | 228 | ), |
227 | 229 | ||
228 | TP_fast_assign( | 230 | TP_fast_assign( |
@@ -249,7 +251,7 @@ DECLARE_EVENT_CLASS(block_bio, | |||
249 | __field( dev_t, dev ) | 251 | __field( dev_t, dev ) |
250 | __field( sector_t, sector ) | 252 | __field( sector_t, sector ) |
251 | __field( unsigned int, nr_sector ) | 253 | __field( unsigned int, nr_sector ) |
252 | __array( char, rwbs, 6 ) | 254 | __array( char, rwbs, RWBS_LEN ) |
253 | __array( char, comm, TASK_COMM_LEN ) | 255 | __array( char, comm, TASK_COMM_LEN ) |
254 | ), | 256 | ), |
255 | 257 | ||
@@ -321,7 +323,7 @@ DECLARE_EVENT_CLASS(block_get_rq, | |||
321 | __field( dev_t, dev ) | 323 | __field( dev_t, dev ) |
322 | __field( sector_t, sector ) | 324 | __field( sector_t, sector ) |
323 | __field( unsigned int, nr_sector ) | 325 | __field( unsigned int, nr_sector ) |
324 | __array( char, rwbs, 6 ) | 326 | __array( char, rwbs, RWBS_LEN ) |
325 | __array( char, comm, TASK_COMM_LEN ) | 327 | __array( char, comm, TASK_COMM_LEN ) |
326 | ), | 328 | ), |
327 | 329 | ||
@@ -456,7 +458,7 @@ TRACE_EVENT(block_split, | |||
456 | __field( dev_t, dev ) | 458 | __field( dev_t, dev ) |
457 | __field( sector_t, sector ) | 459 | __field( sector_t, sector ) |
458 | __field( sector_t, new_sector ) | 460 | __field( sector_t, new_sector ) |
459 | __array( char, rwbs, 6 ) | 461 | __array( char, rwbs, RWBS_LEN ) |
460 | __array( char, comm, TASK_COMM_LEN ) | 462 | __array( char, comm, TASK_COMM_LEN ) |
461 | ), | 463 | ), |
462 | 464 | ||
@@ -498,7 +500,7 @@ TRACE_EVENT(block_bio_remap, | |||
498 | __field( unsigned int, nr_sector ) | 500 | __field( unsigned int, nr_sector ) |
499 | __field( dev_t, old_dev ) | 501 | __field( dev_t, old_dev ) |
500 | __field( sector_t, old_sector ) | 502 | __field( sector_t, old_sector ) |
501 | __array( char, rwbs, 6 ) | 503 | __array( char, rwbs, RWBS_LEN) |
502 | ), | 504 | ), |
503 | 505 | ||
504 | TP_fast_assign( | 506 | TP_fast_assign( |
@@ -542,7 +544,7 @@ TRACE_EVENT(block_rq_remap, | |||
542 | __field( unsigned int, nr_sector ) | 544 | __field( unsigned int, nr_sector ) |
543 | __field( dev_t, old_dev ) | 545 | __field( dev_t, old_dev ) |
544 | __field( sector_t, old_sector ) | 546 | __field( sector_t, old_sector ) |
545 | __array( char, rwbs, 6 ) | 547 | __array( char, rwbs, RWBS_LEN) |
546 | ), | 548 | ), |
547 | 549 | ||
548 | TP_fast_assign( | 550 | TP_fast_assign( |
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 3a2cab407b9..e38544dddb1 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c | |||
@@ -246,7 +246,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, | |||
246 | gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask); | 246 | gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask); |
247 | 247 | ||
248 | for (i = gc->irq_base; msk; msk >>= 1, i++) { | 248 | for (i = gc->irq_base; msk; msk >>= 1, i++) { |
249 | if (!msk & 0x01) | 249 | if (!(msk & 0x01)) |
250 | continue; | 250 | continue; |
251 | 251 | ||
252 | if (flags & IRQ_GC_INIT_NESTED_LOCK) | 252 | if (flags & IRQ_GC_INIT_NESTED_LOCK) |
@@ -301,7 +301,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, | |||
301 | raw_spin_unlock(&gc_lock); | 301 | raw_spin_unlock(&gc_lock); |
302 | 302 | ||
303 | for (; msk; msk >>= 1, i++) { | 303 | for (; msk; msk >>= 1, i++) { |
304 | if (!msk & 0x01) | 304 | if (!(msk & 0x01)) |
305 | continue; | 305 | continue; |
306 | 306 | ||
307 | /* Remove handler first. That will mask the irq line */ | 307 | /* Remove handler first. That will mask the irq line */ |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 4c60a50e66b..039b889ea05 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -70,7 +70,8 @@ static inline void desc_smp_init(struct irq_desc *desc, int node) { } | |||
70 | static inline int desc_node(struct irq_desc *desc) { return 0; } | 70 | static inline int desc_node(struct irq_desc *desc) { return 0; } |
71 | #endif | 71 | #endif |
72 | 72 | ||
73 | static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | 73 | static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, |
74 | struct module *owner) | ||
74 | { | 75 | { |
75 | int cpu; | 76 | int cpu; |
76 | 77 | ||
@@ -86,6 +87,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | |||
86 | desc->irq_count = 0; | 87 | desc->irq_count = 0; |
87 | desc->irqs_unhandled = 0; | 88 | desc->irqs_unhandled = 0; |
88 | desc->name = NULL; | 89 | desc->name = NULL; |
90 | desc->owner = owner; | ||
89 | for_each_possible_cpu(cpu) | 91 | for_each_possible_cpu(cpu) |
90 | *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; | 92 | *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; |
91 | desc_smp_init(desc, node); | 93 | desc_smp_init(desc, node); |
@@ -128,7 +130,7 @@ static void free_masks(struct irq_desc *desc) | |||
128 | static inline void free_masks(struct irq_desc *desc) { } | 130 | static inline void free_masks(struct irq_desc *desc) { } |
129 | #endif | 131 | #endif |
130 | 132 | ||
131 | static struct irq_desc *alloc_desc(int irq, int node) | 133 | static struct irq_desc *alloc_desc(int irq, int node, struct module *owner) |
132 | { | 134 | { |
133 | struct irq_desc *desc; | 135 | struct irq_desc *desc; |
134 | gfp_t gfp = GFP_KERNEL; | 136 | gfp_t gfp = GFP_KERNEL; |
@@ -147,7 +149,7 @@ static struct irq_desc *alloc_desc(int irq, int node) | |||
147 | raw_spin_lock_init(&desc->lock); | 149 | raw_spin_lock_init(&desc->lock); |
148 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); | 150 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); |
149 | 151 | ||
150 | desc_set_defaults(irq, desc, node); | 152 | desc_set_defaults(irq, desc, node, owner); |
151 | 153 | ||
152 | return desc; | 154 | return desc; |
153 | 155 | ||
@@ -173,13 +175,14 @@ static void free_desc(unsigned int irq) | |||
173 | kfree(desc); | 175 | kfree(desc); |
174 | } | 176 | } |
175 | 177 | ||
176 | static int alloc_descs(unsigned int start, unsigned int cnt, int node) | 178 | static int alloc_descs(unsigned int start, unsigned int cnt, int node, |
179 | struct module *owner) | ||
177 | { | 180 | { |
178 | struct irq_desc *desc; | 181 | struct irq_desc *desc; |
179 | int i; | 182 | int i; |
180 | 183 | ||
181 | for (i = 0; i < cnt; i++) { | 184 | for (i = 0; i < cnt; i++) { |
182 | desc = alloc_desc(start + i, node); | 185 | desc = alloc_desc(start + i, node, owner); |
183 | if (!desc) | 186 | if (!desc) |
184 | goto err; | 187 | goto err; |
185 | mutex_lock(&sparse_irq_lock); | 188 | mutex_lock(&sparse_irq_lock); |
@@ -227,7 +230,7 @@ int __init early_irq_init(void) | |||
227 | nr_irqs = initcnt; | 230 | nr_irqs = initcnt; |
228 | 231 | ||
229 | for (i = 0; i < initcnt; i++) { | 232 | for (i = 0; i < initcnt; i++) { |
230 | desc = alloc_desc(i, node); | 233 | desc = alloc_desc(i, node, NULL); |
231 | set_bit(i, allocated_irqs); | 234 | set_bit(i, allocated_irqs); |
232 | irq_insert_desc(i, desc); | 235 | irq_insert_desc(i, desc); |
233 | } | 236 | } |
@@ -261,7 +264,7 @@ int __init early_irq_init(void) | |||
261 | alloc_masks(&desc[i], GFP_KERNEL, node); | 264 | alloc_masks(&desc[i], GFP_KERNEL, node); |
262 | raw_spin_lock_init(&desc[i].lock); | 265 | raw_spin_lock_init(&desc[i].lock); |
263 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); | 266 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); |
264 | desc_set_defaults(i, &desc[i], node); | 267 | desc_set_defaults(i, &desc[i], node, NULL); |
265 | } | 268 | } |
266 | return arch_early_irq_init(); | 269 | return arch_early_irq_init(); |
267 | } | 270 | } |
@@ -276,8 +279,16 @@ static void free_desc(unsigned int irq) | |||
276 | dynamic_irq_cleanup(irq); | 279 | dynamic_irq_cleanup(irq); |
277 | } | 280 | } |
278 | 281 | ||
279 | static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) | 282 | static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, |
283 | struct module *owner) | ||
280 | { | 284 | { |
285 | u32 i; | ||
286 | |||
287 | for (i = 0; i < cnt; i++) { | ||
288 | struct irq_desc *desc = irq_to_desc(start + i); | ||
289 | |||
290 | desc->owner = owner; | ||
291 | } | ||
281 | return start; | 292 | return start; |
282 | } | 293 | } |
283 | 294 | ||
@@ -333,11 +344,13 @@ EXPORT_SYMBOL_GPL(irq_free_descs); | |||
333 | * @from: Start the search from this irq number | 344 | * @from: Start the search from this irq number |
334 | * @cnt: Number of consecutive irqs to allocate. | 345 | * @cnt: Number of consecutive irqs to allocate. |
335 | * @node: Preferred node on which the irq descriptor should be allocated | 346 | * @node: Preferred node on which the irq descriptor should be allocated |
347 | * @owner: Owning module (can be NULL) | ||
336 | * | 348 | * |
337 | * Returns the first irq number or error code | 349 | * Returns the first irq number or error code |
338 | */ | 350 | */ |
339 | int __ref | 351 | int __ref |
340 | irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) | 352 | __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, |
353 | struct module *owner) | ||
341 | { | 354 | { |
342 | int start, ret; | 355 | int start, ret; |
343 | 356 | ||
@@ -366,13 +379,13 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) | |||
366 | 379 | ||
367 | bitmap_set(allocated_irqs, start, cnt); | 380 | bitmap_set(allocated_irqs, start, cnt); |
368 | mutex_unlock(&sparse_irq_lock); | 381 | mutex_unlock(&sparse_irq_lock); |
369 | return alloc_descs(start, cnt, node); | 382 | return alloc_descs(start, cnt, node, owner); |
370 | 383 | ||
371 | err: | 384 | err: |
372 | mutex_unlock(&sparse_irq_lock); | 385 | mutex_unlock(&sparse_irq_lock); |
373 | return ret; | 386 | return ret; |
374 | } | 387 | } |
375 | EXPORT_SYMBOL_GPL(irq_alloc_descs); | 388 | EXPORT_SYMBOL_GPL(__irq_alloc_descs); |
376 | 389 | ||
377 | /** | 390 | /** |
378 | * irq_reserve_irqs - mark irqs allocated | 391 | * irq_reserve_irqs - mark irqs allocated |
@@ -440,7 +453,7 @@ void dynamic_irq_cleanup(unsigned int irq) | |||
440 | unsigned long flags; | 453 | unsigned long flags; |
441 | 454 | ||
442 | raw_spin_lock_irqsave(&desc->lock, flags); | 455 | raw_spin_lock_irqsave(&desc->lock, flags); |
443 | desc_set_defaults(irq, desc, desc_node(desc)); | 456 | desc_set_defaults(irq, desc, desc_node(desc), NULL); |
444 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 457 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
445 | } | 458 | } |
446 | 459 | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0a7840aeb0f..2e9425889fa 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -883,6 +883,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
883 | 883 | ||
884 | if (desc->irq_data.chip == &no_irq_chip) | 884 | if (desc->irq_data.chip == &no_irq_chip) |
885 | return -ENOSYS; | 885 | return -ENOSYS; |
886 | if (!try_module_get(desc->owner)) | ||
887 | return -ENODEV; | ||
886 | /* | 888 | /* |
887 | * Some drivers like serial.c use request_irq() heavily, | 889 | * Some drivers like serial.c use request_irq() heavily, |
888 | * so we have to be careful not to interfere with a | 890 | * so we have to be careful not to interfere with a |
@@ -906,8 +908,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
906 | */ | 908 | */ |
907 | nested = irq_settings_is_nested_thread(desc); | 909 | nested = irq_settings_is_nested_thread(desc); |
908 | if (nested) { | 910 | if (nested) { |
909 | if (!new->thread_fn) | 911 | if (!new->thread_fn) { |
910 | return -EINVAL; | 912 | ret = -EINVAL; |
913 | goto out_mput; | ||
914 | } | ||
911 | /* | 915 | /* |
912 | * Replace the primary handler which was provided from | 916 | * Replace the primary handler which was provided from |
913 | * the driver for non nested interrupt handling by the | 917 | * the driver for non nested interrupt handling by the |
@@ -929,8 +933,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
929 | 933 | ||
930 | t = kthread_create(irq_thread, new, "irq/%d-%s", irq, | 934 | t = kthread_create(irq_thread, new, "irq/%d-%s", irq, |
931 | new->name); | 935 | new->name); |
932 | if (IS_ERR(t)) | 936 | if (IS_ERR(t)) { |
933 | return PTR_ERR(t); | 937 | ret = PTR_ERR(t); |
938 | goto out_mput; | ||
939 | } | ||
934 | /* | 940 | /* |
935 | * We keep the reference to the task struct even if | 941 | * We keep the reference to the task struct even if |
936 | * the thread dies to avoid that the interrupt code | 942 | * the thread dies to avoid that the interrupt code |
@@ -1095,6 +1101,8 @@ out_thread: | |||
1095 | kthread_stop(t); | 1101 | kthread_stop(t); |
1096 | put_task_struct(t); | 1102 | put_task_struct(t); |
1097 | } | 1103 | } |
1104 | out_mput: | ||
1105 | module_put(desc->owner); | ||
1098 | return ret; | 1106 | return ret; |
1099 | } | 1107 | } |
1100 | 1108 | ||
@@ -1203,6 +1211,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
1203 | put_task_struct(action->thread); | 1211 | put_task_struct(action->thread); |
1204 | } | 1212 | } |
1205 | 1213 | ||
1214 | module_put(desc->owner); | ||
1206 | return action; | 1215 | return action; |
1207 | } | 1216 | } |
1208 | 1217 | ||
@@ -1322,6 +1331,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, | |||
1322 | if (!thread_fn) | 1331 | if (!thread_fn) |
1323 | return -EINVAL; | 1332 | return -EINVAL; |
1324 | handler = irq_default_primary_handler; | 1333 | handler = irq_default_primary_handler; |
1334 | irqflags |= IRQF_ONESHOT; | ||
1325 | } | 1335 | } |
1326 | 1336 | ||
1327 | action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); | 1337 | action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 8c24294e477..91d67ce3a8d 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -3111,7 +3111,13 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) | |||
3111 | if (!class) | 3111 | if (!class) |
3112 | class = look_up_lock_class(lock, 0); | 3112 | class = look_up_lock_class(lock, 0); |
3113 | 3113 | ||
3114 | if (DEBUG_LOCKS_WARN_ON(!class)) | 3114 | /* |
3115 | * If look_up_lock_class() failed to find a class, we're trying | ||
3116 | * to test if we hold a lock that has never yet been acquired. | ||
3117 | * Clearly if the lock hasn't been acquired _ever_, we're not | ||
3118 | * holding it either, so report failure. | ||
3119 | */ | ||
3120 | if (!class) | ||
3115 | return 0; | 3121 | return 0; |
3116 | 3122 | ||
3117 | if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) | 3123 | if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index b1914cb9095..3744c594b19 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -231,3 +231,7 @@ config PM_CLK | |||
231 | config PM_GENERIC_DOMAINS | 231 | config PM_GENERIC_DOMAINS |
232 | bool | 232 | bool |
233 | depends on PM | 233 | depends on PM |
234 | |||
235 | config PM_GENERIC_DOMAINS_RUNTIME | ||
236 | def_bool y | ||
237 | depends on PM_RUNTIME && PM_GENERIC_DOMAINS | ||
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 6957aa298df..7c910a5593a 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
206 | what |= MASK_TC_BIT(rw, RAHEAD); | 206 | what |= MASK_TC_BIT(rw, RAHEAD); |
207 | what |= MASK_TC_BIT(rw, META); | 207 | what |= MASK_TC_BIT(rw, META); |
208 | what |= MASK_TC_BIT(rw, DISCARD); | 208 | what |= MASK_TC_BIT(rw, DISCARD); |
209 | what |= MASK_TC_BIT(rw, FLUSH); | ||
210 | what |= MASK_TC_BIT(rw, FUA); | ||
209 | 211 | ||
210 | pid = tsk->pid; | 212 | pid = tsk->pid; |
211 | if (act_log_check(bt, what, sector, pid)) | 213 | if (act_log_check(bt, what, sector, pid)) |
@@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) | |||
1054 | goto out; | 1056 | goto out; |
1055 | } | 1057 | } |
1056 | 1058 | ||
1059 | if (tc & BLK_TC_FLUSH) | ||
1060 | rwbs[i++] = 'F'; | ||
1061 | |||
1057 | if (tc & BLK_TC_DISCARD) | 1062 | if (tc & BLK_TC_DISCARD) |
1058 | rwbs[i++] = 'D'; | 1063 | rwbs[i++] = 'D'; |
1059 | else if (tc & BLK_TC_WRITE) | 1064 | else if (tc & BLK_TC_WRITE) |
@@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) | |||
1063 | else | 1068 | else |
1064 | rwbs[i++] = 'N'; | 1069 | rwbs[i++] = 'N'; |
1065 | 1070 | ||
1071 | if (tc & BLK_TC_FUA) | ||
1072 | rwbs[i++] = 'F'; | ||
1066 | if (tc & BLK_TC_AHEAD) | 1073 | if (tc & BLK_TC_AHEAD) |
1067 | rwbs[i++] = 'A'; | 1074 | rwbs[i++] = 'A'; |
1068 | if (tc & BLK_TC_BARRIER) | ||
1069 | rwbs[i++] = 'B'; | ||
1070 | if (tc & BLK_TC_SYNC) | 1075 | if (tc & BLK_TC_SYNC) |
1071 | rwbs[i++] = 'S'; | 1076 | rwbs[i++] = 'S'; |
1072 | if (tc & BLK_TC_META) | 1077 | if (tc & BLK_TC_META) |
@@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); | |||
1132 | 1137 | ||
1133 | static int blk_log_action_classic(struct trace_iterator *iter, const char *act) | 1138 | static int blk_log_action_classic(struct trace_iterator *iter, const char *act) |
1134 | { | 1139 | { |
1135 | char rwbs[6]; | 1140 | char rwbs[RWBS_LEN]; |
1136 | unsigned long long ts = iter->ts; | 1141 | unsigned long long ts = iter->ts; |
1137 | unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); | 1142 | unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); |
1138 | unsigned secs = (unsigned long)ts; | 1143 | unsigned secs = (unsigned long)ts; |
@@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act) | |||
1148 | 1153 | ||
1149 | static int blk_log_action(struct trace_iterator *iter, const char *act) | 1154 | static int blk_log_action(struct trace_iterator *iter, const char *act) |
1150 | { | 1155 | { |
1151 | char rwbs[6]; | 1156 | char rwbs[RWBS_LEN]; |
1152 | const struct blk_io_trace *t = te_blk_io_trace(iter->ent); | 1157 | const struct blk_io_trace *t = te_blk_io_trace(iter->ent); |
1153 | 1158 | ||
1154 | fill_rwbs(rwbs, t); | 1159 | fill_rwbs(rwbs, t); |
@@ -1561,7 +1566,7 @@ static const struct { | |||
1561 | } mask_maps[] = { | 1566 | } mask_maps[] = { |
1562 | { BLK_TC_READ, "read" }, | 1567 | { BLK_TC_READ, "read" }, |
1563 | { BLK_TC_WRITE, "write" }, | 1568 | { BLK_TC_WRITE, "write" }, |
1564 | { BLK_TC_BARRIER, "barrier" }, | 1569 | { BLK_TC_FLUSH, "flush" }, |
1565 | { BLK_TC_SYNC, "sync" }, | 1570 | { BLK_TC_SYNC, "sync" }, |
1566 | { BLK_TC_QUEUE, "queue" }, | 1571 | { BLK_TC_QUEUE, "queue" }, |
1567 | { BLK_TC_REQUEUE, "requeue" }, | 1572 | { BLK_TC_REQUEUE, "requeue" }, |
@@ -1573,6 +1578,7 @@ static const struct { | |||
1573 | { BLK_TC_META, "meta" }, | 1578 | { BLK_TC_META, "meta" }, |
1574 | { BLK_TC_DISCARD, "discard" }, | 1579 | { BLK_TC_DISCARD, "discard" }, |
1575 | { BLK_TC_DRV_DATA, "drv_data" }, | 1580 | { BLK_TC_DRV_DATA, "drv_data" }, |
1581 | { BLK_TC_FUA, "fua" }, | ||
1576 | }; | 1582 | }; |
1577 | 1583 | ||
1578 | static int blk_trace_str2mask(const char *str) | 1584 | static int blk_trace_str2mask(const char *str) |
@@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) | |||
1788 | { | 1794 | { |
1789 | int i = 0; | 1795 | int i = 0; |
1790 | 1796 | ||
1797 | if (rw & REQ_FLUSH) | ||
1798 | rwbs[i++] = 'F'; | ||
1799 | |||
1791 | if (rw & WRITE) | 1800 | if (rw & WRITE) |
1792 | rwbs[i++] = 'W'; | 1801 | rwbs[i++] = 'W'; |
1793 | else if (rw & REQ_DISCARD) | 1802 | else if (rw & REQ_DISCARD) |
@@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) | |||
1797 | else | 1806 | else |
1798 | rwbs[i++] = 'N'; | 1807 | rwbs[i++] = 'N'; |
1799 | 1808 | ||
1809 | if (rw & REQ_FUA) | ||
1810 | rwbs[i++] = 'F'; | ||
1800 | if (rw & REQ_RAHEAD) | 1811 | if (rw & REQ_RAHEAD) |
1801 | rwbs[i++] = 'A'; | 1812 | rwbs[i++] = 'A'; |
1802 | if (rw & REQ_SYNC) | 1813 | if (rw & REQ_SYNC) |
diff --git a/mm/highmem.c b/mm/highmem.c index 693394daa2e..5ef672c07f7 100644 --- a/mm/highmem.c +++ b/mm/highmem.c | |||
@@ -326,7 +326,7 @@ static struct page_address_slot { | |||
326 | spinlock_t lock; /* Protect this bucket's list */ | 326 | spinlock_t lock; /* Protect this bucket's list */ |
327 | } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER]; | 327 | } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER]; |
328 | 328 | ||
329 | static struct page_address_slot *page_slot(struct page *page) | 329 | static struct page_address_slot *page_slot(const struct page *page) |
330 | { | 330 | { |
331 | return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)]; | 331 | return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)]; |
332 | } | 332 | } |
@@ -337,7 +337,7 @@ static struct page_address_slot *page_slot(struct page *page) | |||
337 | * | 337 | * |
338 | * Returns the page's virtual address. | 338 | * Returns the page's virtual address. |
339 | */ | 339 | */ |
340 | void *page_address(struct page *page) | 340 | void *page_address(const struct page *page) |
341 | { | 341 | { |
342 | unsigned long flags; | 342 | unsigned long flags; |
343 | void *ret; | 343 | void *ret; |
diff --git a/sound/aoa/fabrics/layout.c b/sound/aoa/fabrics/layout.c index 3fd1a7e2492..552b97afbca 100644 --- a/sound/aoa/fabrics/layout.c +++ b/sound/aoa/fabrics/layout.c | |||
@@ -1073,10 +1073,10 @@ static int aoa_fabric_layout_probe(struct soundbus_dev *sdev) | |||
1073 | sdev->pcmid = -1; | 1073 | sdev->pcmid = -1; |
1074 | list_del(&ldev->list); | 1074 | list_del(&ldev->list); |
1075 | layouts_list_items--; | 1075 | layouts_list_items--; |
1076 | kfree(ldev); | ||
1076 | outnodev: | 1077 | outnodev: |
1077 | of_node_put(sound); | 1078 | of_node_put(sound); |
1078 | layout_device = NULL; | 1079 | layout_device = NULL; |
1079 | kfree(ldev); | ||
1080 | return -ENODEV; | 1080 | return -ENODEV; |
1081 | } | 1081 | } |
1082 | 1082 | ||
diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 200c9a1d48b..a872d0a8297 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c | |||
@@ -1909,6 +1909,7 @@ static unsigned int ad1981_jacks_whitelist[] = { | |||
1909 | 0x103c0944, /* HP nc6220 */ | 1909 | 0x103c0944, /* HP nc6220 */ |
1910 | 0x103c0934, /* HP nc8220 */ | 1910 | 0x103c0934, /* HP nc8220 */ |
1911 | 0x103c006d, /* HP nx9105 */ | 1911 | 0x103c006d, /* HP nx9105 */ |
1912 | 0x103c300d, /* HP Compaq dc5100 SFF(PT003AW) */ | ||
1912 | 0x17340088, /* FSC Scenic-W */ | 1913 | 0x17340088, /* FSC Scenic-W */ |
1913 | 0 /* end */ | 1914 | 0 /* end */ |
1914 | }; | 1915 | }; |
diff --git a/sound/pci/hda/alc268_quirks.c b/sound/pci/hda/alc268_quirks.c index be58bf2f3ae..2e5876ce71f 100644 --- a/sound/pci/hda/alc268_quirks.c +++ b/sound/pci/hda/alc268_quirks.c | |||
@@ -476,8 +476,8 @@ static const struct snd_pci_quirk alc268_ssid_cfg_tbl[] = { | |||
476 | 476 | ||
477 | static const struct alc_config_preset alc268_presets[] = { | 477 | static const struct alc_config_preset alc268_presets[] = { |
478 | [ALC267_QUANTA_IL1] = { | 478 | [ALC267_QUANTA_IL1] = { |
479 | .mixers = { alc267_quanta_il1_mixer, alc268_beep_mixer, | 479 | .mixers = { alc267_quanta_il1_mixer, alc268_beep_mixer }, |
480 | alc268_capture_nosrc_mixer }, | 480 | .cap_mixer = alc268_capture_nosrc_mixer, |
481 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, | 481 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, |
482 | alc267_quanta_il1_verbs }, | 482 | alc267_quanta_il1_verbs }, |
483 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), | 483 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), |
@@ -492,8 +492,8 @@ static const struct alc_config_preset alc268_presets[] = { | |||
492 | .init_hook = alc_inithook, | 492 | .init_hook = alc_inithook, |
493 | }, | 493 | }, |
494 | [ALC268_3ST] = { | 494 | [ALC268_3ST] = { |
495 | .mixers = { alc268_base_mixer, alc268_capture_alt_mixer, | 495 | .mixers = { alc268_base_mixer, alc268_beep_mixer }, |
496 | alc268_beep_mixer }, | 496 | .cap_mixer = alc268_capture_alt_mixer, |
497 | .init_verbs = { alc268_base_init_verbs }, | 497 | .init_verbs = { alc268_base_init_verbs }, |
498 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), | 498 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), |
499 | .dac_nids = alc268_dac_nids, | 499 | .dac_nids = alc268_dac_nids, |
@@ -507,8 +507,8 @@ static const struct alc_config_preset alc268_presets[] = { | |||
507 | .input_mux = &alc268_capture_source, | 507 | .input_mux = &alc268_capture_source, |
508 | }, | 508 | }, |
509 | [ALC268_TOSHIBA] = { | 509 | [ALC268_TOSHIBA] = { |
510 | .mixers = { alc268_toshiba_mixer, alc268_capture_alt_mixer, | 510 | .mixers = { alc268_toshiba_mixer, alc268_beep_mixer }, |
511 | alc268_beep_mixer }, | 511 | .cap_mixer = alc268_capture_alt_mixer, |
512 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, | 512 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, |
513 | alc268_toshiba_verbs }, | 513 | alc268_toshiba_verbs }, |
514 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), | 514 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), |
@@ -525,8 +525,8 @@ static const struct alc_config_preset alc268_presets[] = { | |||
525 | .init_hook = alc_inithook, | 525 | .init_hook = alc_inithook, |
526 | }, | 526 | }, |
527 | [ALC268_ACER] = { | 527 | [ALC268_ACER] = { |
528 | .mixers = { alc268_acer_mixer, alc268_capture_alt_mixer, | 528 | .mixers = { alc268_acer_mixer, alc268_beep_mixer }, |
529 | alc268_beep_mixer }, | 529 | .cap_mixer = alc268_capture_alt_mixer, |
530 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, | 530 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, |
531 | alc268_acer_verbs }, | 531 | alc268_acer_verbs }, |
532 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), | 532 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), |
@@ -543,8 +543,8 @@ static const struct alc_config_preset alc268_presets[] = { | |||
543 | .init_hook = alc_inithook, | 543 | .init_hook = alc_inithook, |
544 | }, | 544 | }, |
545 | [ALC268_ACER_DMIC] = { | 545 | [ALC268_ACER_DMIC] = { |
546 | .mixers = { alc268_acer_dmic_mixer, alc268_capture_alt_mixer, | 546 | .mixers = { alc268_acer_dmic_mixer, alc268_beep_mixer }, |
547 | alc268_beep_mixer }, | 547 | .cap_mixer = alc268_capture_alt_mixer, |
548 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, | 548 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, |
549 | alc268_acer_verbs }, | 549 | alc268_acer_verbs }, |
550 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), | 550 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), |
@@ -561,9 +561,8 @@ static const struct alc_config_preset alc268_presets[] = { | |||
561 | .init_hook = alc_inithook, | 561 | .init_hook = alc_inithook, |
562 | }, | 562 | }, |
563 | [ALC268_ACER_ASPIRE_ONE] = { | 563 | [ALC268_ACER_ASPIRE_ONE] = { |
564 | .mixers = { alc268_acer_aspire_one_mixer, | 564 | .mixers = { alc268_acer_aspire_one_mixer, alc268_beep_mixer}, |
565 | alc268_beep_mixer, | 565 | .cap_mixer = alc268_capture_nosrc_mixer, |
566 | alc268_capture_nosrc_mixer }, | ||
567 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, | 566 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, |
568 | alc268_acer_aspire_one_verbs }, | 567 | alc268_acer_aspire_one_verbs }, |
569 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), | 568 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), |
@@ -579,8 +578,8 @@ static const struct alc_config_preset alc268_presets[] = { | |||
579 | .init_hook = alc_inithook, | 578 | .init_hook = alc_inithook, |
580 | }, | 579 | }, |
581 | [ALC268_DELL] = { | 580 | [ALC268_DELL] = { |
582 | .mixers = { alc268_dell_mixer, alc268_beep_mixer, | 581 | .mixers = { alc268_dell_mixer, alc268_beep_mixer}, |
583 | alc268_capture_nosrc_mixer }, | 582 | .cap_mixer = alc268_capture_nosrc_mixer, |
584 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, | 583 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, |
585 | alc268_dell_verbs }, | 584 | alc268_dell_verbs }, |
586 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), | 585 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), |
@@ -596,8 +595,8 @@ static const struct alc_config_preset alc268_presets[] = { | |||
596 | .init_hook = alc_inithook, | 595 | .init_hook = alc_inithook, |
597 | }, | 596 | }, |
598 | [ALC268_ZEPTO] = { | 597 | [ALC268_ZEPTO] = { |
599 | .mixers = { alc268_base_mixer, alc268_capture_alt_mixer, | 598 | .mixers = { alc268_base_mixer, alc268_beep_mixer }, |
600 | alc268_beep_mixer }, | 599 | .cap_mixer = alc268_capture_alt_mixer, |
601 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, | 600 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, |
602 | alc268_toshiba_verbs }, | 601 | alc268_toshiba_verbs }, |
603 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), | 602 | .num_dacs = ARRAY_SIZE(alc268_dac_nids), |
@@ -616,7 +615,8 @@ static const struct alc_config_preset alc268_presets[] = { | |||
616 | }, | 615 | }, |
617 | #ifdef CONFIG_SND_DEBUG | 616 | #ifdef CONFIG_SND_DEBUG |
618 | [ALC268_TEST] = { | 617 | [ALC268_TEST] = { |
619 | .mixers = { alc268_test_mixer, alc268_capture_mixer }, | 618 | .mixers = { alc268_test_mixer }, |
619 | .cap_mixer = alc268_capture_mixer, | ||
620 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, | 620 | .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, |
621 | alc268_volume_init_verbs, | 621 | alc268_volume_init_verbs, |
622 | alc268_beep_init_verbs }, | 622 | alc268_beep_init_verbs }, |
diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c index 28ce17d09c3..c34f730f481 100644 --- a/sound/pci/hda/hda_eld.c +++ b/sound/pci/hda/hda_eld.c | |||
@@ -144,25 +144,17 @@ static int cea_sampling_frequencies[8] = { | |||
144 | SNDRV_PCM_RATE_192000, /* 7: 192000Hz */ | 144 | SNDRV_PCM_RATE_192000, /* 7: 192000Hz */ |
145 | }; | 145 | }; |
146 | 146 | ||
147 | static unsigned char hdmi_get_eld_byte(struct hda_codec *codec, hda_nid_t nid, | 147 | static unsigned int hdmi_get_eld_data(struct hda_codec *codec, hda_nid_t nid, |
148 | int byte_index) | 148 | int byte_index) |
149 | { | 149 | { |
150 | unsigned int val; | 150 | unsigned int val; |
151 | 151 | ||
152 | val = snd_hda_codec_read(codec, nid, 0, | 152 | val = snd_hda_codec_read(codec, nid, 0, |
153 | AC_VERB_GET_HDMI_ELDD, byte_index); | 153 | AC_VERB_GET_HDMI_ELDD, byte_index); |
154 | |||
155 | #ifdef BE_PARANOID | 154 | #ifdef BE_PARANOID |
156 | printk(KERN_INFO "HDMI: ELD data byte %d: 0x%x\n", byte_index, val); | 155 | printk(KERN_INFO "HDMI: ELD data byte %d: 0x%x\n", byte_index, val); |
157 | #endif | 156 | #endif |
158 | 157 | return val; | |
159 | if ((val & AC_ELDD_ELD_VALID) == 0) { | ||
160 | snd_printd(KERN_INFO "HDMI: invalid ELD data byte %d\n", | ||
161 | byte_index); | ||
162 | val = 0; | ||
163 | } | ||
164 | |||
165 | return val & AC_ELDD_ELD_DATA; | ||
166 | } | 158 | } |
167 | 159 | ||
168 | #define GRAB_BITS(buf, byte, lowbit, bits) \ | 160 | #define GRAB_BITS(buf, byte, lowbit, bits) \ |
@@ -344,11 +336,26 @@ int snd_hdmi_get_eld(struct hdmi_eld *eld, | |||
344 | if (!buf) | 336 | if (!buf) |
345 | return -ENOMEM; | 337 | return -ENOMEM; |
346 | 338 | ||
347 | for (i = 0; i < size; i++) | 339 | for (i = 0; i < size; i++) { |
348 | buf[i] = hdmi_get_eld_byte(codec, nid, i); | 340 | unsigned int val = hdmi_get_eld_data(codec, nid, i); |
341 | if (!(val & AC_ELDD_ELD_VALID)) { | ||
342 | if (!i) { | ||
343 | snd_printd(KERN_INFO | ||
344 | "HDMI: invalid ELD data\n"); | ||
345 | ret = -EINVAL; | ||
346 | goto error; | ||
347 | } | ||
348 | snd_printd(KERN_INFO | ||
349 | "HDMI: invalid ELD data byte %d\n", i); | ||
350 | val = 0; | ||
351 | } else | ||
352 | val &= AC_ELDD_ELD_DATA; | ||
353 | buf[i] = val; | ||
354 | } | ||
349 | 355 | ||
350 | ret = hdmi_update_eld(eld, buf, size); | 356 | ret = hdmi_update_eld(eld, buf, size); |
351 | 357 | ||
358 | error: | ||
352 | kfree(buf); | 359 | kfree(buf); |
353 | return ret; | 360 | return ret; |
354 | } | 361 | } |
diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 47d6ffc9b5b..d6c93d92b55 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c | |||
@@ -375,7 +375,7 @@ static int is_ext_mic(struct hda_codec *codec, unsigned int idx) | |||
375 | static hda_nid_t get_adc(struct hda_codec *codec, hda_nid_t pin, | 375 | static hda_nid_t get_adc(struct hda_codec *codec, hda_nid_t pin, |
376 | unsigned int *idxp) | 376 | unsigned int *idxp) |
377 | { | 377 | { |
378 | int i; | 378 | int i, idx; |
379 | hda_nid_t nid; | 379 | hda_nid_t nid; |
380 | 380 | ||
381 | nid = codec->start_nid; | 381 | nid = codec->start_nid; |
@@ -384,9 +384,11 @@ static hda_nid_t get_adc(struct hda_codec *codec, hda_nid_t pin, | |||
384 | type = get_wcaps_type(get_wcaps(codec, nid)); | 384 | type = get_wcaps_type(get_wcaps(codec, nid)); |
385 | if (type != AC_WID_AUD_IN) | 385 | if (type != AC_WID_AUD_IN) |
386 | continue; | 386 | continue; |
387 | *idxp = snd_hda_get_conn_index(codec, nid, pin, false); | 387 | idx = snd_hda_get_conn_index(codec, nid, pin, false); |
388 | if (*idxp >= 0) | 388 | if (idx >= 0) { |
389 | *idxp = idx; | ||
389 | return nid; | 390 | return nid; |
391 | } | ||
390 | } | 392 | } |
391 | return 0; | 393 | return 0; |
392 | } | 394 | } |
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9a1aa09f47f..fcb11af9ad2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c | |||
@@ -1784,6 +1784,7 @@ static const char * const alc_slave_vols[] = { | |||
1784 | "Speaker Playback Volume", | 1784 | "Speaker Playback Volume", |
1785 | "Mono Playback Volume", | 1785 | "Mono Playback Volume", |
1786 | "Line-Out Playback Volume", | 1786 | "Line-Out Playback Volume", |
1787 | "PCM Playback Volume", | ||
1787 | NULL, | 1788 | NULL, |
1788 | }; | 1789 | }; |
1789 | 1790 | ||
@@ -1798,6 +1799,7 @@ static const char * const alc_slave_sws[] = { | |||
1798 | "Mono Playback Switch", | 1799 | "Mono Playback Switch", |
1799 | "IEC958 Playback Switch", | 1800 | "IEC958 Playback Switch", |
1800 | "Line-Out Playback Switch", | 1801 | "Line-Out Playback Switch", |
1802 | "PCM Playback Switch", | ||
1801 | NULL, | 1803 | NULL, |
1802 | }; | 1804 | }; |
1803 | 1805 | ||
diff --git a/sound/usb/caiaq/audio.c b/sound/usb/caiaq/audio.c index aa52b3e13bb..2cf87f5afed 100644 --- a/sound/usb/caiaq/audio.c +++ b/sound/usb/caiaq/audio.c | |||
@@ -139,8 +139,12 @@ static void stream_stop(struct snd_usb_caiaqdev *dev) | |||
139 | 139 | ||
140 | for (i = 0; i < N_URBS; i++) { | 140 | for (i = 0; i < N_URBS; i++) { |
141 | usb_kill_urb(dev->data_urbs_in[i]); | 141 | usb_kill_urb(dev->data_urbs_in[i]); |
142 | usb_kill_urb(dev->data_urbs_out[i]); | 142 | |
143 | if (test_bit(i, &dev->outurb_active_mask)) | ||
144 | usb_kill_urb(dev->data_urbs_out[i]); | ||
143 | } | 145 | } |
146 | |||
147 | dev->outurb_active_mask = 0; | ||
144 | } | 148 | } |
145 | 149 | ||
146 | static int snd_usb_caiaq_substream_open(struct snd_pcm_substream *substream) | 150 | static int snd_usb_caiaq_substream_open(struct snd_pcm_substream *substream) |
@@ -612,8 +616,8 @@ static void read_completed(struct urb *urb) | |||
612 | { | 616 | { |
613 | struct snd_usb_caiaq_cb_info *info = urb->context; | 617 | struct snd_usb_caiaq_cb_info *info = urb->context; |
614 | struct snd_usb_caiaqdev *dev; | 618 | struct snd_usb_caiaqdev *dev; |
615 | struct urb *out; | 619 | struct urb *out = NULL; |
616 | int frame, len, send_it = 0, outframe = 0; | 620 | int i, frame, len, send_it = 0, outframe = 0; |
617 | size_t offset = 0; | 621 | size_t offset = 0; |
618 | 622 | ||
619 | if (urb->status || !info) | 623 | if (urb->status || !info) |
@@ -624,7 +628,17 @@ static void read_completed(struct urb *urb) | |||
624 | if (!dev->streaming) | 628 | if (!dev->streaming) |
625 | return; | 629 | return; |
626 | 630 | ||
627 | out = dev->data_urbs_out[info->index]; | 631 | /* find an unused output urb that is unused */ |
632 | for (i = 0; i < N_URBS; i++) | ||
633 | if (test_and_set_bit(i, &dev->outurb_active_mask) == 0) { | ||
634 | out = dev->data_urbs_out[i]; | ||
635 | break; | ||
636 | } | ||
637 | |||
638 | if (!out) { | ||
639 | log("Unable to find an output urb to use\n"); | ||
640 | goto requeue; | ||
641 | } | ||
628 | 642 | ||
629 | /* read the recently received packet and send back one which has | 643 | /* read the recently received packet and send back one which has |
630 | * the same layout */ | 644 | * the same layout */ |
@@ -655,8 +669,12 @@ static void read_completed(struct urb *urb) | |||
655 | out->number_of_packets = outframe; | 669 | out->number_of_packets = outframe; |
656 | out->transfer_flags = URB_ISO_ASAP; | 670 | out->transfer_flags = URB_ISO_ASAP; |
657 | usb_submit_urb(out, GFP_ATOMIC); | 671 | usb_submit_urb(out, GFP_ATOMIC); |
672 | } else { | ||
673 | struct snd_usb_caiaq_cb_info *oinfo = out->context; | ||
674 | clear_bit(oinfo->index, &dev->outurb_active_mask); | ||
658 | } | 675 | } |
659 | 676 | ||
677 | requeue: | ||
660 | /* re-submit inbound urb */ | 678 | /* re-submit inbound urb */ |
661 | for (frame = 0; frame < FRAMES_PER_URB; frame++) { | 679 | for (frame = 0; frame < FRAMES_PER_URB; frame++) { |
662 | urb->iso_frame_desc[frame].offset = BYTES_PER_FRAME * frame; | 680 | urb->iso_frame_desc[frame].offset = BYTES_PER_FRAME * frame; |
@@ -678,6 +696,8 @@ static void write_completed(struct urb *urb) | |||
678 | dev->output_running = 1; | 696 | dev->output_running = 1; |
679 | wake_up(&dev->prepare_wait_queue); | 697 | wake_up(&dev->prepare_wait_queue); |
680 | } | 698 | } |
699 | |||
700 | clear_bit(info->index, &dev->outurb_active_mask); | ||
681 | } | 701 | } |
682 | 702 | ||
683 | static struct urb **alloc_urbs(struct snd_usb_caiaqdev *dev, int dir, int *ret) | 703 | static struct urb **alloc_urbs(struct snd_usb_caiaqdev *dev, int dir, int *ret) |
@@ -829,6 +849,9 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *dev) | |||
829 | if (!dev->data_cb_info) | 849 | if (!dev->data_cb_info) |
830 | return -ENOMEM; | 850 | return -ENOMEM; |
831 | 851 | ||
852 | dev->outurb_active_mask = 0; | ||
853 | BUILD_BUG_ON(N_URBS > (sizeof(dev->outurb_active_mask) * 8)); | ||
854 | |||
832 | for (i = 0; i < N_URBS; i++) { | 855 | for (i = 0; i < N_URBS; i++) { |
833 | dev->data_cb_info[i].dev = dev; | 856 | dev->data_cb_info[i].dev = dev; |
834 | dev->data_cb_info[i].index = i; | 857 | dev->data_cb_info[i].index = i; |
diff --git a/sound/usb/caiaq/device.h b/sound/usb/caiaq/device.h index b2b310194ff..3f9c6339ae9 100644 --- a/sound/usb/caiaq/device.h +++ b/sound/usb/caiaq/device.h | |||
@@ -96,6 +96,7 @@ struct snd_usb_caiaqdev { | |||
96 | int input_panic, output_panic, warned; | 96 | int input_panic, output_panic, warned; |
97 | char *audio_in_buf, *audio_out_buf; | 97 | char *audio_in_buf, *audio_out_buf; |
98 | unsigned int samplerates, bpp; | 98 | unsigned int samplerates, bpp; |
99 | unsigned long outurb_active_mask; | ||
99 | 100 | ||
100 | struct snd_pcm_substream *sub_playback[MAX_STREAMS]; | 101 | struct snd_pcm_substream *sub_playback[MAX_STREAMS]; |
101 | struct snd_pcm_substream *sub_capture[MAX_STREAMS]; | 102 | struct snd_pcm_substream *sub_capture[MAX_STREAMS]; |
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index c04d7c71ac8..cdd19d7fe50 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c | |||
@@ -152,6 +152,7 @@ static inline void check_mapped_dB(const struct usbmix_name_map *p, | |||
152 | if (p && p->dB) { | 152 | if (p && p->dB) { |
153 | cval->dBmin = p->dB->min; | 153 | cval->dBmin = p->dB->min; |
154 | cval->dBmax = p->dB->max; | 154 | cval->dBmax = p->dB->max; |
155 | cval->initialized = 1; | ||
155 | } | 156 | } |
156 | } | 157 | } |
157 | 158 | ||
@@ -1092,7 +1093,7 @@ static void build_feature_ctl(struct mixer_build *state, void *raw_desc, | |||
1092 | " Switch" : " Volume"); | 1093 | " Switch" : " Volume"); |
1093 | if (control == UAC_FU_VOLUME) { | 1094 | if (control == UAC_FU_VOLUME) { |
1094 | check_mapped_dB(map, cval); | 1095 | check_mapped_dB(map, cval); |
1095 | if (cval->dBmin < cval->dBmax) { | 1096 | if (cval->dBmin < cval->dBmax || !cval->initialized) { |
1096 | kctl->tlv.c = mixer_vol_tlv; | 1097 | kctl->tlv.c = mixer_vol_tlv; |
1097 | kctl->vd[0].access |= | 1098 | kctl->vd[0].access |= |
1098 | SNDRV_CTL_ELEM_ACCESS_TLV_READ | | 1099 | SNDRV_CTL_ELEM_ACCESS_TLV_READ | |
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 5f2a5c7046d..710ae3d0a48 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c | |||
@@ -134,10 +134,18 @@ static int opt_show_lines(const struct option *opt __used, | |||
134 | { | 134 | { |
135 | int ret = 0; | 135 | int ret = 0; |
136 | 136 | ||
137 | if (str) | 137 | if (!str) |
138 | ret = parse_line_range_desc(str, ¶ms.line_range); | 138 | return 0; |
139 | INIT_LIST_HEAD(¶ms.line_range.line_list); | 139 | |
140 | if (params.show_lines) { | ||
141 | pr_warning("Warning: more than one --line options are" | ||
142 | " detected. Only the first one is valid.\n"); | ||
143 | return 0; | ||
144 | } | ||
145 | |||
140 | params.show_lines = true; | 146 | params.show_lines = true; |
147 | ret = parse_line_range_desc(str, ¶ms.line_range); | ||
148 | INIT_LIST_HEAD(¶ms.line_range.line_list); | ||
141 | 149 | ||
142 | return ret; | 150 | return ret; |
143 | } | 151 | } |
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f6426b496f4..6b0519f885e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -45,7 +45,7 @@ static int freq = 1000; | |||
45 | static int output; | 45 | static int output; |
46 | static int pipe_output = 0; | 46 | static int pipe_output = 0; |
47 | static const char *output_name = NULL; | 47 | static const char *output_name = NULL; |
48 | static int group = 0; | 48 | static bool group = false; |
49 | static int realtime_prio = 0; | 49 | static int realtime_prio = 0; |
50 | static bool nodelay = false; | 50 | static bool nodelay = false; |
51 | static bool raw_samples = false; | 51 | static bool raw_samples = false; |
@@ -753,6 +753,8 @@ const struct option record_options[] = { | |||
753 | "child tasks do not inherit counters"), | 753 | "child tasks do not inherit counters"), |
754 | OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"), | 754 | OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"), |
755 | OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), | 755 | OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), |
756 | OPT_BOOLEAN(0, "group", &group, | ||
757 | "put the counters into a counter group"), | ||
756 | OPT_BOOLEAN('g', "call-graph", &call_graph, | 758 | OPT_BOOLEAN('g', "call-graph", &call_graph, |
757 | "do call-graph (stack chain/backtrace) recording"), | 759 | "do call-graph (stack chain/backtrace) recording"), |
758 | OPT_INCR('v', "verbose", &verbose, | 760 | OPT_INCR('v', "verbose", &verbose, |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1ad04ce29c3..5deb17d9e79 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -193,6 +193,7 @@ static int big_num_opt = -1; | |||
193 | static const char *cpu_list; | 193 | static const char *cpu_list; |
194 | static const char *csv_sep = NULL; | 194 | static const char *csv_sep = NULL; |
195 | static bool csv_output = false; | 195 | static bool csv_output = false; |
196 | static bool group = false; | ||
196 | 197 | ||
197 | static volatile int done = 0; | 198 | static volatile int done = 0; |
198 | 199 | ||
@@ -280,14 +281,14 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) | |||
280 | attr->inherit = !no_inherit; | 281 | attr->inherit = !no_inherit; |
281 | 282 | ||
282 | if (system_wide) | 283 | if (system_wide) |
283 | return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false); | 284 | return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, group); |
284 | 285 | ||
285 | if (target_pid == -1 && target_tid == -1) { | 286 | if (target_pid == -1 && target_tid == -1) { |
286 | attr->disabled = 1; | 287 | attr->disabled = 1; |
287 | attr->enable_on_exec = 1; | 288 | attr->enable_on_exec = 1; |
288 | } | 289 | } |
289 | 290 | ||
290 | return perf_evsel__open_per_thread(evsel, evsel_list->threads, false); | 291 | return perf_evsel__open_per_thread(evsel, evsel_list->threads, group); |
291 | } | 292 | } |
292 | 293 | ||
293 | /* | 294 | /* |
@@ -1043,6 +1044,8 @@ static const struct option options[] = { | |||
1043 | "stat events on existing thread id"), | 1044 | "stat events on existing thread id"), |
1044 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 1045 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
1045 | "system-wide collection from all CPUs"), | 1046 | "system-wide collection from all CPUs"), |
1047 | OPT_BOOLEAN('g', "group", &group, | ||
1048 | "put the counters into a counter group"), | ||
1046 | OPT_BOOLEAN('c', "scale", &scale, | 1049 | OPT_BOOLEAN('c', "scale", &scale, |
1047 | "scale/normalize counters"), | 1050 | "scale/normalize counters"), |
1048 | OPT_INCR('v', "verbose", &verbose, | 1051 | OPT_INCR('v', "verbose", &verbose, |
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index fddf40f30d3..ee51e9b4dc0 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c | |||
@@ -96,6 +96,39 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, unsigned long addr, | |||
96 | return *lineno ?: -ENOENT; | 96 | return *lineno ?: -ENOENT; |
97 | } | 97 | } |
98 | 98 | ||
99 | static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data); | ||
100 | |||
101 | /** | ||
102 | * cu_walk_functions_at - Walk on function DIEs at given address | ||
103 | * @cu_die: A CU DIE | ||
104 | * @addr: An address | ||
105 | * @callback: A callback which called with found DIEs | ||
106 | * @data: A user data | ||
107 | * | ||
108 | * Walk on function DIEs at given @addr in @cu_die. Passed DIEs | ||
109 | * should be subprogram or inlined-subroutines. | ||
110 | */ | ||
111 | int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, | ||
112 | int (*callback)(Dwarf_Die *, void *), void *data) | ||
113 | { | ||
114 | Dwarf_Die die_mem; | ||
115 | Dwarf_Die *sc_die; | ||
116 | int ret = -ENOENT; | ||
117 | |||
118 | /* Inlined function could be recursive. Trace it until fail */ | ||
119 | for (sc_die = die_find_realfunc(cu_die, addr, &die_mem); | ||
120 | sc_die != NULL; | ||
121 | sc_die = die_find_child(sc_die, __die_find_inline_cb, &addr, | ||
122 | &die_mem)) { | ||
123 | ret = callback(sc_die, data); | ||
124 | if (ret) | ||
125 | break; | ||
126 | } | ||
127 | |||
128 | return ret; | ||
129 | |||
130 | } | ||
131 | |||
99 | /** | 132 | /** |
100 | * die_compare_name - Compare diename and tname | 133 | * die_compare_name - Compare diename and tname |
101 | * @dw_die: a DIE | 134 | * @dw_die: a DIE |
@@ -198,6 +231,19 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, | |||
198 | return 0; | 231 | return 0; |
199 | } | 232 | } |
200 | 233 | ||
234 | /* Get attribute and translate it as a sdata */ | ||
235 | static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, | ||
236 | Dwarf_Sword *result) | ||
237 | { | ||
238 | Dwarf_Attribute attr; | ||
239 | |||
240 | if (dwarf_attr(tp_die, attr_name, &attr) == NULL || | ||
241 | dwarf_formsdata(&attr, result) != 0) | ||
242 | return -ENOENT; | ||
243 | |||
244 | return 0; | ||
245 | } | ||
246 | |||
201 | /** | 247 | /** |
202 | * die_is_signed_type - Check whether a type DIE is signed or not | 248 | * die_is_signed_type - Check whether a type DIE is signed or not |
203 | * @tp_die: a DIE of a type | 249 | * @tp_die: a DIE of a type |
@@ -250,6 +296,50 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs) | |||
250 | return 0; | 296 | return 0; |
251 | } | 297 | } |
252 | 298 | ||
299 | /* Get the call file index number in CU DIE */ | ||
300 | static int die_get_call_fileno(Dwarf_Die *in_die) | ||
301 | { | ||
302 | Dwarf_Sword idx; | ||
303 | |||
304 | if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0) | ||
305 | return (int)idx; | ||
306 | else | ||
307 | return -ENOENT; | ||
308 | } | ||
309 | |||
310 | /* Get the declared file index number in CU DIE */ | ||
311 | static int die_get_decl_fileno(Dwarf_Die *pdie) | ||
312 | { | ||
313 | Dwarf_Sword idx; | ||
314 | |||
315 | if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0) | ||
316 | return (int)idx; | ||
317 | else | ||
318 | return -ENOENT; | ||
319 | } | ||
320 | |||
321 | /** | ||
322 | * die_get_call_file - Get callsite file name of inlined function instance | ||
323 | * @in_die: a DIE of an inlined function instance | ||
324 | * | ||
325 | * Get call-site file name of @in_die. This means from which file the inline | ||
326 | * function is called. | ||
327 | */ | ||
328 | const char *die_get_call_file(Dwarf_Die *in_die) | ||
329 | { | ||
330 | Dwarf_Die cu_die; | ||
331 | Dwarf_Files *files; | ||
332 | int idx; | ||
333 | |||
334 | idx = die_get_call_fileno(in_die); | ||
335 | if (idx < 0 || !dwarf_diecu(in_die, &cu_die, NULL, NULL) || | ||
336 | dwarf_getsrcfiles(&cu_die, &files, NULL) != 0) | ||
337 | return NULL; | ||
338 | |||
339 | return dwarf_filesrc(files, idx, NULL, NULL); | ||
340 | } | ||
341 | |||
342 | |||
253 | /** | 343 | /** |
254 | * die_find_child - Generic DIE search function in DIE tree | 344 | * die_find_child - Generic DIE search function in DIE tree |
255 | * @rt_die: a root DIE | 345 | * @rt_die: a root DIE |
@@ -374,9 +464,78 @@ Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, | |||
374 | return die_mem; | 464 | return die_mem; |
375 | } | 465 | } |
376 | 466 | ||
467 | struct __instance_walk_param { | ||
468 | void *addr; | ||
469 | int (*callback)(Dwarf_Die *, void *); | ||
470 | void *data; | ||
471 | int retval; | ||
472 | }; | ||
473 | |||
474 | static int __die_walk_instances_cb(Dwarf_Die *inst, void *data) | ||
475 | { | ||
476 | struct __instance_walk_param *iwp = data; | ||
477 | Dwarf_Attribute attr_mem; | ||
478 | Dwarf_Die origin_mem; | ||
479 | Dwarf_Attribute *attr; | ||
480 | Dwarf_Die *origin; | ||
481 | int tmp; | ||
482 | |||
483 | attr = dwarf_attr(inst, DW_AT_abstract_origin, &attr_mem); | ||
484 | if (attr == NULL) | ||
485 | return DIE_FIND_CB_CONTINUE; | ||
486 | |||
487 | origin = dwarf_formref_die(attr, &origin_mem); | ||
488 | if (origin == NULL || origin->addr != iwp->addr) | ||
489 | return DIE_FIND_CB_CONTINUE; | ||
490 | |||
491 | /* Ignore redundant instances */ | ||
492 | if (dwarf_tag(inst) == DW_TAG_inlined_subroutine) { | ||
493 | dwarf_decl_line(origin, &tmp); | ||
494 | if (die_get_call_lineno(inst) == tmp) { | ||
495 | tmp = die_get_decl_fileno(origin); | ||
496 | if (die_get_call_fileno(inst) == tmp) | ||
497 | return DIE_FIND_CB_CONTINUE; | ||
498 | } | ||
499 | } | ||
500 | |||
501 | iwp->retval = iwp->callback(inst, iwp->data); | ||
502 | |||
503 | return (iwp->retval) ? DIE_FIND_CB_END : DIE_FIND_CB_CONTINUE; | ||
504 | } | ||
505 | |||
506 | /** | ||
507 | * die_walk_instances - Walk on instances of given DIE | ||
508 | * @or_die: an abstract original DIE | ||
509 | * @callback: a callback function which is called with instance DIE | ||
510 | * @data: user data | ||
511 | * | ||
512 | * Walk on the instances of give @in_die. @in_die must be an inlined function | ||
513 | * declartion. This returns the return value of @callback if it returns | ||
514 | * non-zero value, or -ENOENT if there is no instance. | ||
515 | */ | ||
516 | int die_walk_instances(Dwarf_Die *or_die, int (*callback)(Dwarf_Die *, void *), | ||
517 | void *data) | ||
518 | { | ||
519 | Dwarf_Die cu_die; | ||
520 | Dwarf_Die die_mem; | ||
521 | struct __instance_walk_param iwp = { | ||
522 | .addr = or_die->addr, | ||
523 | .callback = callback, | ||
524 | .data = data, | ||
525 | .retval = -ENOENT, | ||
526 | }; | ||
527 | |||
528 | if (dwarf_diecu(or_die, &cu_die, NULL, NULL) == NULL) | ||
529 | return -ENOENT; | ||
530 | |||
531 | die_find_child(&cu_die, __die_walk_instances_cb, &iwp, &die_mem); | ||
532 | |||
533 | return iwp.retval; | ||
534 | } | ||
535 | |||
377 | /* Line walker internal parameters */ | 536 | /* Line walker internal parameters */ |
378 | struct __line_walk_param { | 537 | struct __line_walk_param { |
379 | const char *fname; | 538 | bool recursive; |
380 | line_walk_callback_t callback; | 539 | line_walk_callback_t callback; |
381 | void *data; | 540 | void *data; |
382 | int retval; | 541 | int retval; |
@@ -385,39 +544,56 @@ struct __line_walk_param { | |||
385 | static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data) | 544 | static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data) |
386 | { | 545 | { |
387 | struct __line_walk_param *lw = data; | 546 | struct __line_walk_param *lw = data; |
388 | Dwarf_Addr addr; | 547 | Dwarf_Addr addr = 0; |
548 | const char *fname; | ||
389 | int lineno; | 549 | int lineno; |
390 | 550 | ||
391 | if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) { | 551 | if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) { |
552 | fname = die_get_call_file(in_die); | ||
392 | lineno = die_get_call_lineno(in_die); | 553 | lineno = die_get_call_lineno(in_die); |
393 | if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) { | 554 | if (fname && lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) { |
394 | lw->retval = lw->callback(lw->fname, lineno, addr, | 555 | lw->retval = lw->callback(fname, lineno, addr, lw->data); |
395 | lw->data); | ||
396 | if (lw->retval != 0) | 556 | if (lw->retval != 0) |
397 | return DIE_FIND_CB_END; | 557 | return DIE_FIND_CB_END; |
398 | } | 558 | } |
399 | } | 559 | } |
400 | return DIE_FIND_CB_SIBLING; | 560 | if (!lw->recursive) |
561 | /* Don't need to search recursively */ | ||
562 | return DIE_FIND_CB_SIBLING; | ||
563 | |||
564 | if (addr) { | ||
565 | fname = dwarf_decl_file(in_die); | ||
566 | if (fname && dwarf_decl_line(in_die, &lineno) == 0) { | ||
567 | lw->retval = lw->callback(fname, lineno, addr, lw->data); | ||
568 | if (lw->retval != 0) | ||
569 | return DIE_FIND_CB_END; | ||
570 | } | ||
571 | } | ||
572 | |||
573 | /* Continue to search nested inlined function call-sites */ | ||
574 | return DIE_FIND_CB_CONTINUE; | ||
401 | } | 575 | } |
402 | 576 | ||
403 | /* Walk on lines of blocks included in given DIE */ | 577 | /* Walk on lines of blocks included in given DIE */ |
404 | static int __die_walk_funclines(Dwarf_Die *sp_die, | 578 | static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive, |
405 | line_walk_callback_t callback, void *data) | 579 | line_walk_callback_t callback, void *data) |
406 | { | 580 | { |
407 | struct __line_walk_param lw = { | 581 | struct __line_walk_param lw = { |
582 | .recursive = recursive, | ||
408 | .callback = callback, | 583 | .callback = callback, |
409 | .data = data, | 584 | .data = data, |
410 | .retval = 0, | 585 | .retval = 0, |
411 | }; | 586 | }; |
412 | Dwarf_Die die_mem; | 587 | Dwarf_Die die_mem; |
413 | Dwarf_Addr addr; | 588 | Dwarf_Addr addr; |
589 | const char *fname; | ||
414 | int lineno; | 590 | int lineno; |
415 | 591 | ||
416 | /* Handle function declaration line */ | 592 | /* Handle function declaration line */ |
417 | lw.fname = dwarf_decl_file(sp_die); | 593 | fname = dwarf_decl_file(sp_die); |
418 | if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 && | 594 | if (fname && dwarf_decl_line(sp_die, &lineno) == 0 && |
419 | dwarf_entrypc(sp_die, &addr) == 0) { | 595 | dwarf_entrypc(sp_die, &addr) == 0) { |
420 | lw.retval = callback(lw.fname, lineno, addr, data); | 596 | lw.retval = callback(fname, lineno, addr, data); |
421 | if (lw.retval != 0) | 597 | if (lw.retval != 0) |
422 | goto done; | 598 | goto done; |
423 | } | 599 | } |
@@ -430,7 +606,7 @@ static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data) | |||
430 | { | 606 | { |
431 | struct __line_walk_param *lw = data; | 607 | struct __line_walk_param *lw = data; |
432 | 608 | ||
433 | lw->retval = __die_walk_funclines(sp_die, lw->callback, lw->data); | 609 | lw->retval = __die_walk_funclines(sp_die, true, lw->callback, lw->data); |
434 | if (lw->retval != 0) | 610 | if (lw->retval != 0) |
435 | return DWARF_CB_ABORT; | 611 | return DWARF_CB_ABORT; |
436 | 612 | ||
@@ -439,7 +615,7 @@ static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data) | |||
439 | 615 | ||
440 | /** | 616 | /** |
441 | * die_walk_lines - Walk on lines inside given DIE | 617 | * die_walk_lines - Walk on lines inside given DIE |
442 | * @rt_die: a root DIE (CU or subprogram) | 618 | * @rt_die: a root DIE (CU, subprogram or inlined_subroutine) |
443 | * @callback: callback routine | 619 | * @callback: callback routine |
444 | * @data: user data | 620 | * @data: user data |
445 | * | 621 | * |
@@ -460,12 +636,12 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) | |||
460 | size_t nlines, i; | 636 | size_t nlines, i; |
461 | 637 | ||
462 | /* Get the CU die */ | 638 | /* Get the CU die */ |
463 | if (dwarf_tag(rt_die) == DW_TAG_subprogram) | 639 | if (dwarf_tag(rt_die) != DW_TAG_compile_unit) |
464 | cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); | 640 | cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); |
465 | else | 641 | else |
466 | cu_die = rt_die; | 642 | cu_die = rt_die; |
467 | if (!cu_die) { | 643 | if (!cu_die) { |
468 | pr_debug2("Failed to get CU from subprogram\n"); | 644 | pr_debug2("Failed to get CU from given DIE.\n"); |
469 | return -EINVAL; | 645 | return -EINVAL; |
470 | } | 646 | } |
471 | 647 | ||
@@ -509,7 +685,11 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) | |||
509 | * subroutines. We have to check functions list or given function. | 685 | * subroutines. We have to check functions list or given function. |
510 | */ | 686 | */ |
511 | if (rt_die != cu_die) | 687 | if (rt_die != cu_die) |
512 | ret = __die_walk_funclines(rt_die, callback, data); | 688 | /* |
689 | * Don't need walk functions recursively, because nested | ||
690 | * inlined functions don't have lines of the specified DIE. | ||
691 | */ | ||
692 | ret = __die_walk_funclines(rt_die, false, callback, data); | ||
513 | else { | 693 | else { |
514 | struct __line_walk_param param = { | 694 | struct __line_walk_param param = { |
515 | .callback = callback, | 695 | .callback = callback, |
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index bc3b21167e7..6ce1717784b 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h | |||
@@ -34,12 +34,19 @@ extern const char *cu_get_comp_dir(Dwarf_Die *cu_die); | |||
34 | extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, | 34 | extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, |
35 | const char **fname, int *lineno); | 35 | const char **fname, int *lineno); |
36 | 36 | ||
37 | /* Walk on funcitons at given address */ | ||
38 | extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, | ||
39 | int (*callback)(Dwarf_Die *, void *), void *data); | ||
40 | |||
37 | /* Compare diename and tname */ | 41 | /* Compare diename and tname */ |
38 | extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); | 42 | extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); |
39 | 43 | ||
40 | /* Get callsite line number of inline-function instance */ | 44 | /* Get callsite line number of inline-function instance */ |
41 | extern int die_get_call_lineno(Dwarf_Die *in_die); | 45 | extern int die_get_call_lineno(Dwarf_Die *in_die); |
42 | 46 | ||
47 | /* Get callsite file name of inlined function instance */ | ||
48 | extern const char *die_get_call_file(Dwarf_Die *in_die); | ||
49 | |||
43 | /* Get type die */ | 50 | /* Get type die */ |
44 | extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); | 51 | extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); |
45 | 52 | ||
@@ -73,6 +80,10 @@ extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, | |||
73 | extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, | 80 | extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, |
74 | Dwarf_Die *die_mem); | 81 | Dwarf_Die *die_mem); |
75 | 82 | ||
83 | /* Walk on the instances of given DIE */ | ||
84 | extern int die_walk_instances(Dwarf_Die *in_die, | ||
85 | int (*callback)(Dwarf_Die *, void *), void *data); | ||
86 | |||
76 | /* Walker on lines (Note: line number will not be sorted) */ | 87 | /* Walker on lines (Note: line number will not be sorted) */ |
77 | typedef int (* line_walk_callback_t) (const char *fname, int lineno, | 88 | typedef int (* line_walk_callback_t) (const char *fname, int lineno, |
78 | Dwarf_Addr addr, void *data); | 89 | Dwarf_Addr addr, void *data); |
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e03e7bc8205..c12bd476c6f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c | |||
@@ -85,10 +85,19 @@ int perf_evlist__add_default(struct perf_evlist *evlist) | |||
85 | struct perf_evsel *evsel = perf_evsel__new(&attr, 0); | 85 | struct perf_evsel *evsel = perf_evsel__new(&attr, 0); |
86 | 86 | ||
87 | if (evsel == NULL) | 87 | if (evsel == NULL) |
88 | return -ENOMEM; | 88 | goto error; |
89 | |||
90 | /* use strdup() because free(evsel) assumes name is allocated */ | ||
91 | evsel->name = strdup("cycles"); | ||
92 | if (!evsel->name) | ||
93 | goto error_free; | ||
89 | 94 | ||
90 | perf_evlist__add(evlist, evsel); | 95 | perf_evlist__add(evlist, evsel); |
91 | return 0; | 96 | return 0; |
97 | error_free: | ||
98 | perf_evsel__delete(evsel); | ||
99 | error: | ||
100 | return -ENOMEM; | ||
92 | } | 101 | } |
93 | 102 | ||
94 | void perf_evlist__disable(struct perf_evlist *evlist) | 103 | void perf_evlist__disable(struct perf_evlist *evlist) |
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d4f3101773d..b6c1ad123ca 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c | |||
@@ -726,7 +726,16 @@ static int perf_header__read_build_ids_abi_quirk(struct perf_header *header, | |||
726 | return -1; | 726 | return -1; |
727 | 727 | ||
728 | bev.header = old_bev.header; | 728 | bev.header = old_bev.header; |
729 | bev.pid = 0; | 729 | |
730 | /* | ||
731 | * As the pid is the missing value, we need to fill | ||
732 | * it properly. The header.misc value give us nice hint. | ||
733 | */ | ||
734 | bev.pid = HOST_KERNEL_ID; | ||
735 | if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER || | ||
736 | bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL) | ||
737 | bev.pid = DEFAULT_GUEST_KERNEL_ID; | ||
738 | |||
730 | memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id)); | 739 | memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id)); |
731 | __event_process_build_id(&bev, filename, session); | 740 | __event_process_build_id(&bev, filename, session); |
732 | 741 | ||
diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h index 791f9dd27eb..547628e97f3 100644 --- a/tools/perf/util/include/linux/compiler.h +++ b/tools/perf/util/include/linux/compiler.h | |||
@@ -5,7 +5,9 @@ | |||
5 | #define __always_inline inline | 5 | #define __always_inline inline |
6 | #endif | 6 | #endif |
7 | #define __user | 7 | #define __user |
8 | #ifndef __attribute_const__ | ||
8 | #define __attribute_const__ | 9 | #define __attribute_const__ |
10 | #endif | ||
9 | 11 | ||
10 | #define __used __attribute__((__unused__)) | 12 | #define __used __attribute__((__unused__)) |
11 | 13 | ||
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4ea7e19f525..928918b796b 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
@@ -697,7 +697,11 @@ parse_raw_event(const char **strp, struct perf_event_attr *attr) | |||
697 | return EVT_FAILED; | 697 | return EVT_FAILED; |
698 | n = hex2u64(str + 1, &config); | 698 | n = hex2u64(str + 1, &config); |
699 | if (n > 0) { | 699 | if (n > 0) { |
700 | *strp = str + n + 1; | 700 | const char *end = str + n + 1; |
701 | if (*end != '\0' && *end != ',' && *end != ':') | ||
702 | return EVT_FAILED; | ||
703 | |||
704 | *strp = end; | ||
701 | attr->type = PERF_TYPE_RAW; | 705 | attr->type = PERF_TYPE_RAW; |
702 | attr->config = config; | 706 | attr->config = config; |
703 | return EVT_HANDLED; | 707 | return EVT_HANDLED; |
@@ -1097,6 +1101,4 @@ void print_events(const char *event_glob) | |||
1097 | printf("\n"); | 1101 | printf("\n"); |
1098 | 1102 | ||
1099 | print_tracepoint_events(NULL, NULL); | 1103 | print_tracepoint_events(NULL, NULL); |
1100 | |||
1101 | exit(129); | ||
1102 | } | 1104 | } |
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 3e44a3e3651..555fc3864b9 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c | |||
@@ -612,12 +612,12 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) | |||
612 | return ret; | 612 | return ret; |
613 | } | 613 | } |
614 | 614 | ||
615 | /* Find a variable in a subprogram die */ | 615 | /* Find a variable in a scope DIE */ |
616 | static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf) | 616 | static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) |
617 | { | 617 | { |
618 | Dwarf_Die vr_die, *scopes; | 618 | Dwarf_Die vr_die; |
619 | char buf[32], *ptr; | 619 | char buf[32], *ptr; |
620 | int ret, nscopes; | 620 | int ret = 0; |
621 | 621 | ||
622 | if (!is_c_varname(pf->pvar->var)) { | 622 | if (!is_c_varname(pf->pvar->var)) { |
623 | /* Copy raw parameters */ | 623 | /* Copy raw parameters */ |
@@ -652,30 +652,16 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf) | |||
652 | if (pf->tvar->name == NULL) | 652 | if (pf->tvar->name == NULL) |
653 | return -ENOMEM; | 653 | return -ENOMEM; |
654 | 654 | ||
655 | pr_debug("Searching '%s' variable in context.\n", | 655 | pr_debug("Searching '%s' variable in context.\n", pf->pvar->var); |
656 | pf->pvar->var); | ||
657 | /* Search child die for local variables and parameters. */ | 656 | /* Search child die for local variables and parameters. */ |
658 | if (die_find_variable_at(sp_die, pf->pvar->var, pf->addr, &vr_die)) | 657 | if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) { |
659 | ret = convert_variable(&vr_die, pf); | 658 | /* Search again in global variables */ |
660 | else { | 659 | if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die)) |
661 | /* Search upper class */ | 660 | ret = -ENOENT; |
662 | nscopes = dwarf_getscopes_die(sp_die, &scopes); | ||
663 | while (nscopes-- > 1) { | ||
664 | pr_debug("Searching variables in %s\n", | ||
665 | dwarf_diename(&scopes[nscopes])); | ||
666 | /* We should check this scope, so give dummy address */ | ||
667 | if (die_find_variable_at(&scopes[nscopes], | ||
668 | pf->pvar->var, 0, | ||
669 | &vr_die)) { | ||
670 | ret = convert_variable(&vr_die, pf); | ||
671 | goto found; | ||
672 | } | ||
673 | } | ||
674 | if (scopes) | ||
675 | free(scopes); | ||
676 | ret = -ENOENT; | ||
677 | } | 661 | } |
678 | found: | 662 | if (ret == 0) |
663 | ret = convert_variable(&vr_die, pf); | ||
664 | |||
679 | if (ret < 0) | 665 | if (ret < 0) |
680 | pr_warning("Failed to find '%s' in this function.\n", | 666 | pr_warning("Failed to find '%s' in this function.\n", |
681 | pf->pvar->var); | 667 | pf->pvar->var); |
@@ -718,26 +704,30 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr, | |||
718 | return 0; | 704 | return 0; |
719 | } | 705 | } |
720 | 706 | ||
721 | /* Call probe_finder callback with real subprogram DIE */ | 707 | /* Call probe_finder callback with scope DIE */ |
722 | static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf) | 708 | static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) |
723 | { | 709 | { |
724 | Dwarf_Die die_mem; | ||
725 | Dwarf_Attribute fb_attr; | 710 | Dwarf_Attribute fb_attr; |
726 | size_t nops; | 711 | size_t nops; |
727 | int ret; | 712 | int ret; |
728 | 713 | ||
729 | /* If no real subprogram, find a real one */ | 714 | if (!sc_die) { |
730 | if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) { | 715 | pr_err("Caller must pass a scope DIE. Program error.\n"); |
731 | sp_die = die_find_realfunc(&pf->cu_die, pf->addr, &die_mem); | 716 | return -EINVAL; |
732 | if (!sp_die) { | 717 | } |
718 | |||
719 | /* If not a real subprogram, find a real one */ | ||
720 | if (dwarf_tag(sc_die) != DW_TAG_subprogram) { | ||
721 | if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) { | ||
733 | pr_warning("Failed to find probe point in any " | 722 | pr_warning("Failed to find probe point in any " |
734 | "functions.\n"); | 723 | "functions.\n"); |
735 | return -ENOENT; | 724 | return -ENOENT; |
736 | } | 725 | } |
737 | } | 726 | } else |
727 | memcpy(&pf->sp_die, sc_die, sizeof(Dwarf_Die)); | ||
738 | 728 | ||
739 | /* Get the frame base attribute/ops */ | 729 | /* Get the frame base attribute/ops from subprogram */ |
740 | dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr); | 730 | dwarf_attr(&pf->sp_die, DW_AT_frame_base, &fb_attr); |
741 | ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); | 731 | ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); |
742 | if (ret <= 0 || nops == 0) { | 732 | if (ret <= 0 || nops == 0) { |
743 | pf->fb_ops = NULL; | 733 | pf->fb_ops = NULL; |
@@ -755,7 +745,7 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf) | |||
755 | } | 745 | } |
756 | 746 | ||
757 | /* Call finder's callback handler */ | 747 | /* Call finder's callback handler */ |
758 | ret = pf->callback(sp_die, pf); | 748 | ret = pf->callback(sc_die, pf); |
759 | 749 | ||
760 | /* *pf->fb_ops will be cached in libdw. Don't free it. */ | 750 | /* *pf->fb_ops will be cached in libdw. Don't free it. */ |
761 | pf->fb_ops = NULL; | 751 | pf->fb_ops = NULL; |
@@ -763,17 +753,82 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf) | |||
763 | return ret; | 753 | return ret; |
764 | } | 754 | } |
765 | 755 | ||
756 | struct find_scope_param { | ||
757 | const char *function; | ||
758 | const char *file; | ||
759 | int line; | ||
760 | int diff; | ||
761 | Dwarf_Die *die_mem; | ||
762 | bool found; | ||
763 | }; | ||
764 | |||
765 | static int find_best_scope_cb(Dwarf_Die *fn_die, void *data) | ||
766 | { | ||
767 | struct find_scope_param *fsp = data; | ||
768 | const char *file; | ||
769 | int lno; | ||
770 | |||
771 | /* Skip if declared file name does not match */ | ||
772 | if (fsp->file) { | ||
773 | file = dwarf_decl_file(fn_die); | ||
774 | if (!file || strcmp(fsp->file, file) != 0) | ||
775 | return 0; | ||
776 | } | ||
777 | /* If the function name is given, that's what user expects */ | ||
778 | if (fsp->function) { | ||
779 | if (die_compare_name(fn_die, fsp->function)) { | ||
780 | memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die)); | ||
781 | fsp->found = true; | ||
782 | return 1; | ||
783 | } | ||
784 | } else { | ||
785 | /* With the line number, find the nearest declared DIE */ | ||
786 | dwarf_decl_line(fn_die, &lno); | ||
787 | if (lno < fsp->line && fsp->diff > fsp->line - lno) { | ||
788 | /* Keep a candidate and continue */ | ||
789 | fsp->diff = fsp->line - lno; | ||
790 | memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die)); | ||
791 | fsp->found = true; | ||
792 | } | ||
793 | } | ||
794 | return 0; | ||
795 | } | ||
796 | |||
797 | /* Find an appropriate scope fits to given conditions */ | ||
798 | static Dwarf_Die *find_best_scope(struct probe_finder *pf, Dwarf_Die *die_mem) | ||
799 | { | ||
800 | struct find_scope_param fsp = { | ||
801 | .function = pf->pev->point.function, | ||
802 | .file = pf->fname, | ||
803 | .line = pf->lno, | ||
804 | .diff = INT_MAX, | ||
805 | .die_mem = die_mem, | ||
806 | .found = false, | ||
807 | }; | ||
808 | |||
809 | cu_walk_functions_at(&pf->cu_die, pf->addr, find_best_scope_cb, &fsp); | ||
810 | |||
811 | return fsp.found ? die_mem : NULL; | ||
812 | } | ||
813 | |||
766 | static int probe_point_line_walker(const char *fname, int lineno, | 814 | static int probe_point_line_walker(const char *fname, int lineno, |
767 | Dwarf_Addr addr, void *data) | 815 | Dwarf_Addr addr, void *data) |
768 | { | 816 | { |
769 | struct probe_finder *pf = data; | 817 | struct probe_finder *pf = data; |
818 | Dwarf_Die *sc_die, die_mem; | ||
770 | int ret; | 819 | int ret; |
771 | 820 | ||
772 | if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0) | 821 | if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0) |
773 | return 0; | 822 | return 0; |
774 | 823 | ||
775 | pf->addr = addr; | 824 | pf->addr = addr; |
776 | ret = call_probe_finder(NULL, pf); | 825 | sc_die = find_best_scope(pf, &die_mem); |
826 | if (!sc_die) { | ||
827 | pr_warning("Failed to find scope of probe point.\n"); | ||
828 | return -ENOENT; | ||
829 | } | ||
830 | |||
831 | ret = call_probe_finder(sc_die, pf); | ||
777 | 832 | ||
778 | /* Continue if no error, because the line will be in inline function */ | 833 | /* Continue if no error, because the line will be in inline function */ |
779 | return ret < 0 ? ret : 0; | 834 | return ret < 0 ? ret : 0; |
@@ -827,6 +882,7 @@ static int probe_point_lazy_walker(const char *fname, int lineno, | |||
827 | Dwarf_Addr addr, void *data) | 882 | Dwarf_Addr addr, void *data) |
828 | { | 883 | { |
829 | struct probe_finder *pf = data; | 884 | struct probe_finder *pf = data; |
885 | Dwarf_Die *sc_die, die_mem; | ||
830 | int ret; | 886 | int ret; |
831 | 887 | ||
832 | if (!line_list__has_line(&pf->lcache, lineno) || | 888 | if (!line_list__has_line(&pf->lcache, lineno) || |
@@ -836,7 +892,14 @@ static int probe_point_lazy_walker(const char *fname, int lineno, | |||
836 | pr_debug("Probe line found: line:%d addr:0x%llx\n", | 892 | pr_debug("Probe line found: line:%d addr:0x%llx\n", |
837 | lineno, (unsigned long long)addr); | 893 | lineno, (unsigned long long)addr); |
838 | pf->addr = addr; | 894 | pf->addr = addr; |
839 | ret = call_probe_finder(NULL, pf); | 895 | pf->lno = lineno; |
896 | sc_die = find_best_scope(pf, &die_mem); | ||
897 | if (!sc_die) { | ||
898 | pr_warning("Failed to find scope of probe point.\n"); | ||
899 | return -ENOENT; | ||
900 | } | ||
901 | |||
902 | ret = call_probe_finder(sc_die, pf); | ||
840 | 903 | ||
841 | /* | 904 | /* |
842 | * Continue if no error, because the lazy pattern will match | 905 | * Continue if no error, because the lazy pattern will match |
@@ -861,42 +924,39 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) | |||
861 | return die_walk_lines(sp_die, probe_point_lazy_walker, pf); | 924 | return die_walk_lines(sp_die, probe_point_lazy_walker, pf); |
862 | } | 925 | } |
863 | 926 | ||
864 | /* Callback parameter with return value */ | ||
865 | struct dwarf_callback_param { | ||
866 | void *data; | ||
867 | int retval; | ||
868 | }; | ||
869 | |||
870 | static int probe_point_inline_cb(Dwarf_Die *in_die, void *data) | 927 | static int probe_point_inline_cb(Dwarf_Die *in_die, void *data) |
871 | { | 928 | { |
872 | struct dwarf_callback_param *param = data; | 929 | struct probe_finder *pf = data; |
873 | struct probe_finder *pf = param->data; | ||
874 | struct perf_probe_point *pp = &pf->pev->point; | 930 | struct perf_probe_point *pp = &pf->pev->point; |
875 | Dwarf_Addr addr; | 931 | Dwarf_Addr addr; |
932 | int ret; | ||
876 | 933 | ||
877 | if (pp->lazy_line) | 934 | if (pp->lazy_line) |
878 | param->retval = find_probe_point_lazy(in_die, pf); | 935 | ret = find_probe_point_lazy(in_die, pf); |
879 | else { | 936 | else { |
880 | /* Get probe address */ | 937 | /* Get probe address */ |
881 | if (dwarf_entrypc(in_die, &addr) != 0) { | 938 | if (dwarf_entrypc(in_die, &addr) != 0) { |
882 | pr_warning("Failed to get entry address of %s.\n", | 939 | pr_warning("Failed to get entry address of %s.\n", |
883 | dwarf_diename(in_die)); | 940 | dwarf_diename(in_die)); |
884 | param->retval = -ENOENT; | 941 | return -ENOENT; |
885 | return DWARF_CB_ABORT; | ||
886 | } | 942 | } |
887 | pf->addr = addr; | 943 | pf->addr = addr; |
888 | pf->addr += pp->offset; | 944 | pf->addr += pp->offset; |
889 | pr_debug("found inline addr: 0x%jx\n", | 945 | pr_debug("found inline addr: 0x%jx\n", |
890 | (uintmax_t)pf->addr); | 946 | (uintmax_t)pf->addr); |
891 | 947 | ||
892 | param->retval = call_probe_finder(in_die, pf); | 948 | ret = call_probe_finder(in_die, pf); |
893 | if (param->retval < 0) | ||
894 | return DWARF_CB_ABORT; | ||
895 | } | 949 | } |
896 | 950 | ||
897 | return DWARF_CB_OK; | 951 | return ret; |
898 | } | 952 | } |
899 | 953 | ||
954 | /* Callback parameter with return value for libdw */ | ||
955 | struct dwarf_callback_param { | ||
956 | void *data; | ||
957 | int retval; | ||
958 | }; | ||
959 | |||
900 | /* Search function from function name */ | 960 | /* Search function from function name */ |
901 | static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) | 961 | static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) |
902 | { | 962 | { |
@@ -933,14 +993,10 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) | |||
933 | /* TODO: Check the address in this function */ | 993 | /* TODO: Check the address in this function */ |
934 | param->retval = call_probe_finder(sp_die, pf); | 994 | param->retval = call_probe_finder(sp_die, pf); |
935 | } | 995 | } |
936 | } else { | 996 | } else |
937 | struct dwarf_callback_param _param = {.data = (void *)pf, | ||
938 | .retval = 0}; | ||
939 | /* Inlined function: search instances */ | 997 | /* Inlined function: search instances */ |
940 | dwarf_func_inline_instances(sp_die, probe_point_inline_cb, | 998 | param->retval = die_walk_instances(sp_die, |
941 | &_param); | 999 | probe_point_inline_cb, (void *)pf); |
942 | param->retval = _param.retval; | ||
943 | } | ||
944 | 1000 | ||
945 | return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */ | 1001 | return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */ |
946 | } | 1002 | } |
@@ -1060,7 +1116,7 @@ found: | |||
1060 | } | 1116 | } |
1061 | 1117 | ||
1062 | /* Add a found probe point into trace event list */ | 1118 | /* Add a found probe point into trace event list */ |
1063 | static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf) | 1119 | static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) |
1064 | { | 1120 | { |
1065 | struct trace_event_finder *tf = | 1121 | struct trace_event_finder *tf = |
1066 | container_of(pf, struct trace_event_finder, pf); | 1122 | container_of(pf, struct trace_event_finder, pf); |
@@ -1075,8 +1131,9 @@ static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf) | |||
1075 | } | 1131 | } |
1076 | tev = &tf->tevs[tf->ntevs++]; | 1132 | tev = &tf->tevs[tf->ntevs++]; |
1077 | 1133 | ||
1078 | ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe, | 1134 | /* Trace point should be converted from subprogram DIE */ |
1079 | &tev->point); | 1135 | ret = convert_to_trace_point(&pf->sp_die, pf->addr, |
1136 | pf->pev->point.retprobe, &tev->point); | ||
1080 | if (ret < 0) | 1137 | if (ret < 0) |
1081 | return ret; | 1138 | return ret; |
1082 | 1139 | ||
@@ -1091,7 +1148,8 @@ static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf) | |||
1091 | for (i = 0; i < pf->pev->nargs; i++) { | 1148 | for (i = 0; i < pf->pev->nargs; i++) { |
1092 | pf->pvar = &pf->pev->args[i]; | 1149 | pf->pvar = &pf->pev->args[i]; |
1093 | pf->tvar = &tev->args[i]; | 1150 | pf->tvar = &tev->args[i]; |
1094 | ret = find_variable(sp_die, pf); | 1151 | /* Variable should be found from scope DIE */ |
1152 | ret = find_variable(sc_die, pf); | ||
1095 | if (ret != 0) | 1153 | if (ret != 0) |
1096 | return ret; | 1154 | return ret; |
1097 | } | 1155 | } |
@@ -1159,13 +1217,13 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) | |||
1159 | } | 1217 | } |
1160 | 1218 | ||
1161 | /* Add a found vars into available variables list */ | 1219 | /* Add a found vars into available variables list */ |
1162 | static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf) | 1220 | static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) |
1163 | { | 1221 | { |
1164 | struct available_var_finder *af = | 1222 | struct available_var_finder *af = |
1165 | container_of(pf, struct available_var_finder, pf); | 1223 | container_of(pf, struct available_var_finder, pf); |
1166 | struct variable_list *vl; | 1224 | struct variable_list *vl; |
1167 | Dwarf_Die die_mem, *scopes = NULL; | 1225 | Dwarf_Die die_mem; |
1168 | int ret, nscopes; | 1226 | int ret; |
1169 | 1227 | ||
1170 | /* Check number of tevs */ | 1228 | /* Check number of tevs */ |
1171 | if (af->nvls == af->max_vls) { | 1229 | if (af->nvls == af->max_vls) { |
@@ -1174,8 +1232,9 @@ static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf) | |||
1174 | } | 1232 | } |
1175 | vl = &af->vls[af->nvls++]; | 1233 | vl = &af->vls[af->nvls++]; |
1176 | 1234 | ||
1177 | ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe, | 1235 | /* Trace point should be converted from subprogram DIE */ |
1178 | &vl->point); | 1236 | ret = convert_to_trace_point(&pf->sp_die, pf->addr, |
1237 | pf->pev->point.retprobe, &vl->point); | ||
1179 | if (ret < 0) | 1238 | if (ret < 0) |
1180 | return ret; | 1239 | return ret; |
1181 | 1240 | ||
@@ -1187,19 +1246,14 @@ static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf) | |||
1187 | if (vl->vars == NULL) | 1246 | if (vl->vars == NULL) |
1188 | return -ENOMEM; | 1247 | return -ENOMEM; |
1189 | af->child = true; | 1248 | af->child = true; |
1190 | die_find_child(sp_die, collect_variables_cb, (void *)af, &die_mem); | 1249 | die_find_child(sc_die, collect_variables_cb, (void *)af, &die_mem); |
1191 | 1250 | ||
1192 | /* Find external variables */ | 1251 | /* Find external variables */ |
1193 | if (!af->externs) | 1252 | if (!af->externs) |
1194 | goto out; | 1253 | goto out; |
1195 | /* Don't need to search child DIE for externs. */ | 1254 | /* Don't need to search child DIE for externs. */ |
1196 | af->child = false; | 1255 | af->child = false; |
1197 | nscopes = dwarf_getscopes_die(sp_die, &scopes); | 1256 | die_find_child(&pf->cu_die, collect_variables_cb, (void *)af, &die_mem); |
1198 | while (nscopes-- > 1) | ||
1199 | die_find_child(&scopes[nscopes], collect_variables_cb, | ||
1200 | (void *)af, &die_mem); | ||
1201 | if (scopes) | ||
1202 | free(scopes); | ||
1203 | 1257 | ||
1204 | out: | 1258 | out: |
1205 | if (strlist__empty(vl->vars)) { | 1259 | if (strlist__empty(vl->vars)) { |
@@ -1391,10 +1445,14 @@ static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf) | |||
1391 | 1445 | ||
1392 | static int line_range_inline_cb(Dwarf_Die *in_die, void *data) | 1446 | static int line_range_inline_cb(Dwarf_Die *in_die, void *data) |
1393 | { | 1447 | { |
1394 | struct dwarf_callback_param *param = data; | 1448 | find_line_range_by_line(in_die, data); |
1395 | 1449 | ||
1396 | param->retval = find_line_range_by_line(in_die, param->data); | 1450 | /* |
1397 | return DWARF_CB_ABORT; /* No need to find other instances */ | 1451 | * We have to check all instances of inlined function, because |
1452 | * some execution paths can be optimized out depends on the | ||
1453 | * function argument of instances | ||
1454 | */ | ||
1455 | return 0; | ||
1398 | } | 1456 | } |
1399 | 1457 | ||
1400 | /* Search function from function name */ | 1458 | /* Search function from function name */ |
@@ -1422,15 +1480,10 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) | |||
1422 | pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e); | 1480 | pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e); |
1423 | lr->start = lf->lno_s; | 1481 | lr->start = lf->lno_s; |
1424 | lr->end = lf->lno_e; | 1482 | lr->end = lf->lno_e; |
1425 | if (dwarf_func_inline(sp_die)) { | 1483 | if (dwarf_func_inline(sp_die)) |
1426 | struct dwarf_callback_param _param; | 1484 | param->retval = die_walk_instances(sp_die, |
1427 | _param.data = (void *)lf; | 1485 | line_range_inline_cb, lf); |
1428 | _param.retval = 0; | 1486 | else |
1429 | dwarf_func_inline_instances(sp_die, | ||
1430 | line_range_inline_cb, | ||
1431 | &_param); | ||
1432 | param->retval = _param.retval; | ||
1433 | } else | ||
1434 | param->retval = find_line_range_by_line(sp_die, lf); | 1487 | param->retval = find_line_range_by_line(sp_die, lf); |
1435 | return DWARF_CB_ABORT; | 1488 | return DWARF_CB_ABORT; |
1436 | } | 1489 | } |
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index c478b42a247..1132c8f0ce8 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h | |||
@@ -57,7 +57,7 @@ struct probe_finder { | |||
57 | struct perf_probe_event *pev; /* Target probe event */ | 57 | struct perf_probe_event *pev; /* Target probe event */ |
58 | 58 | ||
59 | /* Callback when a probe point is found */ | 59 | /* Callback when a probe point is found */ |
60 | int (*callback)(Dwarf_Die *sp_die, struct probe_finder *pf); | 60 | int (*callback)(Dwarf_Die *sc_die, struct probe_finder *pf); |
61 | 61 | ||
62 | /* For function searching */ | 62 | /* For function searching */ |
63 | int lno; /* Line number */ | 63 | int lno; /* Line number */ |
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a8b53714542..469c0264ed2 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
@@ -1506,7 +1506,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) | |||
1506 | if (strncmp(dso->name, "/tmp/perf-", 10) == 0) { | 1506 | if (strncmp(dso->name, "/tmp/perf-", 10) == 0) { |
1507 | struct stat st; | 1507 | struct stat st; |
1508 | 1508 | ||
1509 | if (stat(dso->name, &st) < 0) | 1509 | if (lstat(dso->name, &st) < 0) |
1510 | return -1; | 1510 | return -1; |
1511 | 1511 | ||
1512 | if (st.st_uid && (st.st_uid != geteuid())) { | 1512 | if (st.st_uid && (st.st_uid != geteuid())) { |
@@ -2181,27 +2181,22 @@ size_t machines__fprintf_dsos_buildid(struct rb_root *machines, | |||
2181 | return ret; | 2181 | return ret; |
2182 | } | 2182 | } |
2183 | 2183 | ||
2184 | struct dso *dso__new_kernel(const char *name) | 2184 | static struct dso* |
2185 | dso__kernel_findnew(struct machine *machine, const char *name, | ||
2186 | const char *short_name, int dso_type) | ||
2185 | { | 2187 | { |
2186 | struct dso *dso = dso__new(name ?: "[kernel.kallsyms]"); | 2188 | /* |
2187 | 2189 | * The kernel dso could be created by build_id processing. | |
2188 | if (dso != NULL) { | 2190 | */ |
2189 | dso__set_short_name(dso, "[kernel]"); | 2191 | struct dso *dso = __dsos__findnew(&machine->kernel_dsos, name); |
2190 | dso->kernel = DSO_TYPE_KERNEL; | ||
2191 | } | ||
2192 | |||
2193 | return dso; | ||
2194 | } | ||
2195 | 2192 | ||
2196 | static struct dso *dso__new_guest_kernel(struct machine *machine, | 2193 | /* |
2197 | const char *name) | 2194 | * We need to run this in all cases, since during the build_id |
2198 | { | 2195 | * processing we had no idea this was the kernel dso. |
2199 | char bf[PATH_MAX]; | 2196 | */ |
2200 | struct dso *dso = dso__new(name ?: machine__mmap_name(machine, bf, | ||
2201 | sizeof(bf))); | ||
2202 | if (dso != NULL) { | 2197 | if (dso != NULL) { |
2203 | dso__set_short_name(dso, "[guest.kernel]"); | 2198 | dso__set_short_name(dso, short_name); |
2204 | dso->kernel = DSO_TYPE_GUEST_KERNEL; | 2199 | dso->kernel = dso_type; |
2205 | } | 2200 | } |
2206 | 2201 | ||
2207 | return dso; | 2202 | return dso; |
@@ -2219,24 +2214,36 @@ void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine) | |||
2219 | dso->has_build_id = true; | 2214 | dso->has_build_id = true; |
2220 | } | 2215 | } |
2221 | 2216 | ||
2222 | static struct dso *machine__create_kernel(struct machine *machine) | 2217 | static struct dso *machine__get_kernel(struct machine *machine) |
2223 | { | 2218 | { |
2224 | const char *vmlinux_name = NULL; | 2219 | const char *vmlinux_name = NULL; |
2225 | struct dso *kernel; | 2220 | struct dso *kernel; |
2226 | 2221 | ||
2227 | if (machine__is_host(machine)) { | 2222 | if (machine__is_host(machine)) { |
2228 | vmlinux_name = symbol_conf.vmlinux_name; | 2223 | vmlinux_name = symbol_conf.vmlinux_name; |
2229 | kernel = dso__new_kernel(vmlinux_name); | 2224 | if (!vmlinux_name) |
2225 | vmlinux_name = "[kernel.kallsyms]"; | ||
2226 | |||
2227 | kernel = dso__kernel_findnew(machine, vmlinux_name, | ||
2228 | "[kernel]", | ||
2229 | DSO_TYPE_KERNEL); | ||
2230 | } else { | 2230 | } else { |
2231 | char bf[PATH_MAX]; | ||
2232 | |||
2231 | if (machine__is_default_guest(machine)) | 2233 | if (machine__is_default_guest(machine)) |
2232 | vmlinux_name = symbol_conf.default_guest_vmlinux_name; | 2234 | vmlinux_name = symbol_conf.default_guest_vmlinux_name; |
2233 | kernel = dso__new_guest_kernel(machine, vmlinux_name); | 2235 | if (!vmlinux_name) |
2236 | vmlinux_name = machine__mmap_name(machine, bf, | ||
2237 | sizeof(bf)); | ||
2238 | |||
2239 | kernel = dso__kernel_findnew(machine, vmlinux_name, | ||
2240 | "[guest.kernel]", | ||
2241 | DSO_TYPE_GUEST_KERNEL); | ||
2234 | } | 2242 | } |
2235 | 2243 | ||
2236 | if (kernel != NULL) { | 2244 | if (kernel != NULL && (!kernel->has_build_id)) |
2237 | dso__read_running_kernel_build_id(kernel, machine); | 2245 | dso__read_running_kernel_build_id(kernel, machine); |
2238 | dsos__add(&machine->kernel_dsos, kernel); | 2246 | |
2239 | } | ||
2240 | return kernel; | 2247 | return kernel; |
2241 | } | 2248 | } |
2242 | 2249 | ||
@@ -2340,7 +2347,7 @@ void machine__destroy_kernel_maps(struct machine *machine) | |||
2340 | 2347 | ||
2341 | int machine__create_kernel_maps(struct machine *machine) | 2348 | int machine__create_kernel_maps(struct machine *machine) |
2342 | { | 2349 | { |
2343 | struct dso *kernel = machine__create_kernel(machine); | 2350 | struct dso *kernel = machine__get_kernel(machine); |
2344 | 2351 | ||
2345 | if (kernel == NULL || | 2352 | if (kernel == NULL || |
2346 | __machine__create_kernel_maps(machine, kernel) < 0) | 2353 | __machine__create_kernel_maps(machine, kernel) < 0) |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 325ee36a9d2..4f377d92e75 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -155,7 +155,6 @@ struct dso { | |||
155 | }; | 155 | }; |
156 | 156 | ||
157 | struct dso *dso__new(const char *name); | 157 | struct dso *dso__new(const char *name); |
158 | struct dso *dso__new_kernel(const char *name); | ||
159 | void dso__delete(struct dso *dso); | 158 | void dso__delete(struct dso *dso); |
160 | 159 | ||
161 | int dso__name_len(const struct dso *dso); | 160 | int dso__name_len(const struct dso *dso); |
diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c index 5a06538532a..88403cf8396 100644 --- a/tools/perf/util/ui/browsers/top.c +++ b/tools/perf/util/ui/browsers/top.c | |||
@@ -208,6 +208,5 @@ int perf_top__tui_browser(struct perf_top *top) | |||
208 | }, | 208 | }, |
209 | }; | 209 | }; |
210 | 210 | ||
211 | ui_helpline__push("Press <- or ESC to exit"); | ||
212 | return perf_top_browser__run(&browser); | 211 | return perf_top_browser__run(&browser); |
213 | } | 212 | } |