diff options
-rw-r--r-- | Documentation/kernel-parameters.txt | 17 | ||||
-rw-r--r-- | Documentation/virtual/virtio-spec.txt | 1164 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 21 | ||||
-rw-r--r-- | drivers/virtio/Kconfig | 11 | ||||
-rw-r--r-- | drivers/virtio/virtio.c | 11 | ||||
-rw-r--r-- | drivers/virtio/virtio_balloon.c | 33 | ||||
-rw-r--r-- | drivers/virtio/virtio_mmio.c | 163 | ||||
-rw-r--r-- | include/linux/virtio_config.h | 11 | ||||
-rw-r--r-- | net/9p/trans_virtio.c | 3 |
9 files changed, 1314 insertions, 120 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0e90453e4acb..e275432ef2c7 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -110,6 +110,7 @@ parameter is applicable: | |||
110 | USB USB support is enabled. | 110 | USB USB support is enabled. |
111 | USBHID USB Human Interface Device support is enabled. | 111 | USBHID USB Human Interface Device support is enabled. |
112 | V4L Video For Linux support is enabled. | 112 | V4L Video For Linux support is enabled. |
113 | VMMIO Driver for memory mapped virtio devices is enabled. | ||
113 | VGA The VGA console has been enabled. | 114 | VGA The VGA console has been enabled. |
114 | VT Virtual terminal support is enabled. | 115 | VT Virtual terminal support is enabled. |
115 | WDT Watchdog support is enabled. | 116 | WDT Watchdog support is enabled. |
@@ -2932,6 +2933,22 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2932 | video= [FB] Frame buffer configuration | 2933 | video= [FB] Frame buffer configuration |
2933 | See Documentation/fb/modedb.txt. | 2934 | See Documentation/fb/modedb.txt. |
2934 | 2935 | ||
2936 | virtio_mmio.device= | ||
2937 | [VMMIO] Memory mapped virtio (platform) device. | ||
2938 | |||
2939 | <size>@<baseaddr>:<irq>[:<id>] | ||
2940 | where: | ||
2941 | <size> := size (can use standard suffixes | ||
2942 | like K, M and G) | ||
2943 | <baseaddr> := physical base address | ||
2944 | <irq> := interrupt number (as passed to | ||
2945 | request_irq()) | ||
2946 | <id> := (optional) platform device id | ||
2947 | example: | ||
2948 | virtio_mmio.device=1K@0x100b0000:48:7 | ||
2949 | |||
2950 | Can be used multiple times for multiple devices. | ||
2951 | |||
2935 | vga= [BOOT,X86-32] Select a particular video mode | 2952 | vga= [BOOT,X86-32] Select a particular video mode |
2936 | See Documentation/x86/boot.txt and | 2953 | See Documentation/x86/boot.txt and |
2937 | Documentation/svga.txt. | 2954 | Documentation/svga.txt. |
diff --git a/Documentation/virtual/virtio-spec.txt b/Documentation/virtual/virtio-spec.txt index da094737e2f8..0d6ec85481cb 100644 --- a/Documentation/virtual/virtio-spec.txt +++ b/Documentation/virtual/virtio-spec.txt | |||
@@ -1,11 +1,11 @@ | |||
1 | [Generated file: see http://ozlabs.org/~rusty/virtio-spec/] | 1 | [Generated file: see http://ozlabs.org/~rusty/virtio-spec/] |
2 | Virtio PCI Card Specification | 2 | Virtio PCI Card Specification |
3 | v0.9.1 DRAFT | 3 | v0.9.5 DRAFT |
4 | - | 4 | - |
5 | 5 | ||
6 | Rusty Russell <rusty@rustcorp.com.au>IBM Corporation (Editor) | 6 | Rusty Russell <rusty@rustcorp.com.au> IBM Corporation (Editor) |
7 | 7 | ||
8 | 2011 August 1. | 8 | 2012 May 7. |
9 | 9 | ||
10 | Purpose and Description | 10 | Purpose and Description |
11 | 11 | ||
@@ -68,11 +68,11 @@ and consists of three parts: | |||
68 | +-------------------+-----------------------------------+-----------+ | 68 | +-------------------+-----------------------------------+-----------+ |
69 | 69 | ||
70 | 70 | ||
71 | When the driver wants to send buffers to the device, it puts them | 71 | When the driver wants to send a buffer to the device, it fills in |
72 | in one or more slots in the descriptor table, and writes the | 72 | a slot in the descriptor table (or chains several together), and |
73 | descriptor indices into the available ring. It then notifies the | 73 | writes the descriptor index into the available ring. It then |
74 | device. When the device has finished with the buffers, it writes | 74 | notifies the device. When the device has finished a buffer, it |
75 | the descriptors into the used ring, and sends an interrupt. | 75 | writes the descriptor into the used ring, and sends an interrupt. |
76 | 76 | ||
77 | Specification | 77 | Specification |
78 | 78 | ||
@@ -106,8 +106,14 @@ for informational purposes by the guest). | |||
106 | +----------------------+--------------------+---------------+ | 106 | +----------------------+--------------------+---------------+ |
107 | | 6 | ioMemory | - | | 107 | | 6 | ioMemory | - | |
108 | +----------------------+--------------------+---------------+ | 108 | +----------------------+--------------------+---------------+ |
109 | | 7 | rpmsg | Appendix H | | ||
110 | +----------------------+--------------------+---------------+ | ||
111 | | 8 | SCSI host | Appendix I | | ||
112 | +----------------------+--------------------+---------------+ | ||
109 | | 9 | 9P transport | - | | 113 | | 9 | 9P transport | - | |
110 | +----------------------+--------------------+---------------+ | 114 | +----------------------+--------------------+---------------+ |
115 | | 10 | mac80211 wlan | - | | ||
116 | +----------------------+--------------------+---------------+ | ||
111 | 117 | ||
112 | 118 | ||
113 | Device Configuration | 119 | Device Configuration |
@@ -127,7 +133,7 @@ Note that this is possible because while the virtio header is PCI | |||
127 | the native endian of the guest (where such distinction is | 133 | the native endian of the guest (where such distinction is |
128 | applicable). | 134 | applicable). |
129 | 135 | ||
130 | Device Initialization Sequence | 136 | Device Initialization Sequence<sub:Device-Initialization-Sequence> |
131 | 137 | ||
132 | We start with an overview of device initialization, then expand | 138 | We start with an overview of device initialization, then expand |
133 | on the details of the device and how each step is preformed. | 139 | on the details of the device and how each step is preformed. |
@@ -177,7 +183,10 @@ The virtio header looks as follows: | |||
177 | 183 | ||
178 | 184 | ||
179 | If MSI-X is enabled for the device, two additional fields | 185 | If MSI-X is enabled for the device, two additional fields |
180 | immediately follow this header: | 186 | immediately follow this header:[footnote: |
187 | ie. once you enable MSI-X on the device, the other fields move. | ||
188 | If you turn it off again, they move back! | ||
189 | ] | ||
181 | 190 | ||
182 | 191 | ||
183 | +------------++----------------+--------+ | 192 | +------------++----------------+--------+ |
@@ -191,20 +200,6 @@ immediately follow this header: | |||
191 | +------------++----------------+--------+ | 200 | +------------++----------------+--------+ |
192 | 201 | ||
193 | 202 | ||
194 | Finally, if feature bits (VIRTIO_F_FEATURES_HI) this is | ||
195 | immediately followed by two additional fields: | ||
196 | |||
197 | |||
198 | +------------++----------------------+---------------------- | ||
199 | | Bits || 32 | 32 | ||
200 | +------------++----------------------+---------------------- | ||
201 | | Read/Write || R | R+W | ||
202 | +------------++----------------------+---------------------- | ||
203 | | Purpose || Device | Guest | ||
204 | | || Features bits 32:63 | Features bits 32:63 | ||
205 | +------------++----------------------+---------------------- | ||
206 | |||
207 | |||
208 | Immediately following these general headers, there may be | 203 | Immediately following these general headers, there may be |
209 | device-specific headers: | 204 | device-specific headers: |
210 | 205 | ||
@@ -238,31 +233,25 @@ at least one bit should be set: | |||
238 | may be a significant (or infinite) delay before setting this | 233 | may be a significant (or infinite) delay before setting this |
239 | bit. | 234 | bit. |
240 | 235 | ||
241 | DRIVER_OK (3) Indicates that the driver is set up and ready to | 236 | DRIVER_OK (4) Indicates that the driver is set up and ready to |
242 | drive the device. | 237 | drive the device. |
243 | 238 | ||
244 | FAILED (8) Indicates that something went wrong in the guest, | 239 | FAILED (128) Indicates that something went wrong in the guest, |
245 | and it has given up on the device. This could be an internal | 240 | and it has given up on the device. This could be an internal |
246 | error, or the driver didn't like the device for some reason, or | 241 | error, or the driver didn't like the device for some reason, or |
247 | even a fatal error during device operation. The device must be | 242 | even a fatal error during device operation. The device must be |
248 | reset before attempting to re-initialize. | 243 | reset before attempting to re-initialize. |
249 | 244 | ||
250 | Feature Bits | 245 | Feature Bits<sub:Feature-Bits> |
251 | 246 | ||
252 | The least significant 31 bits of the first configuration field | 247 | Thefirst configuration field indicates the features that the |
253 | indicates the features that the device supports (the high bit is | 248 | device supports. The bits are allocated as follows: |
254 | reserved, and will be used to indicate the presence of future | ||
255 | feature bits elsewhere). If more than 31 feature bits are | ||
256 | supported, the device indicates so by setting feature bit 31 (see | ||
257 | [cha:Reserved-Feature-Bits]). The bits are allocated as follows: | ||
258 | 249 | ||
259 | 0 to 23 Feature bits for the specific device type | 250 | 0 to 23 Feature bits for the specific device type |
260 | 251 | ||
261 | 24 to 40 Feature bits reserved for extensions to the queue and | 252 | 24 to 32 Feature bits reserved for extensions to the queue and |
262 | feature negotiation mechanisms | 253 | feature negotiation mechanisms |
263 | 254 | ||
264 | 41 to 63 Feature bits reserved for future extensions | ||
265 | |||
266 | For example, feature bit 0 for a network device (i.e. Subsystem | 255 | For example, feature bit 0 for a network device (i.e. Subsystem |
267 | Device ID 1) indicates that the device supports checksumming of | 256 | Device ID 1) indicates that the device supports checksumming of |
268 | packets. | 257 | packets. |
@@ -286,10 +275,6 @@ will not see that feature bit in the Device Features field and | |||
286 | can go into backwards compatibility mode (or, for poor | 275 | can go into backwards compatibility mode (or, for poor |
287 | implementations, set the FAILED Device Status bit). | 276 | implementations, set the FAILED Device Status bit). |
288 | 277 | ||
289 | Access to feature bits 32 to 63 is enabled by Guest by setting | ||
290 | feature bit 31. If this bit is unset, Device must assume that all | ||
291 | feature bits > 31 are unset. | ||
292 | |||
293 | Configuration/Queue Vectors | 278 | Configuration/Queue Vectors |
294 | 279 | ||
295 | When MSI-X capability is present and enabled in the device | 280 | When MSI-X capability is present and enabled in the device |
@@ -324,7 +309,7 @@ success, the previously written value is returned, and on | |||
324 | failure, NO_VECTOR is returned. If a mapping failure is detected, | 309 | failure, NO_VECTOR is returned. If a mapping failure is detected, |
325 | the driver can retry mapping with fewervectors, or disable MSI-X. | 310 | the driver can retry mapping with fewervectors, or disable MSI-X. |
326 | 311 | ||
327 | Virtqueue Configuration | 312 | Virtqueue Configuration<sec:Virtqueue-Configuration> |
328 | 313 | ||
329 | As a device can have zero or more virtqueues for bulk data | 314 | As a device can have zero or more virtqueues for bulk data |
330 | transport (for example, the network driver has two), the driver | 315 | transport (for example, the network driver has two), the driver |
@@ -587,7 +572,7 @@ and Red Hat under the (3-clause) BSD license so that it can be | |||
587 | freely used by all other projects, and is reproduced (with slight | 572 | freely used by all other projects, and is reproduced (with slight |
588 | variation to remove Linux assumptions) in Appendix A. | 573 | variation to remove Linux assumptions) in Appendix A. |
589 | 574 | ||
590 | Device Operation | 575 | Device Operation<sec:Device-Operation> |
591 | 576 | ||
592 | There are two parts to device operation: supplying new buffers to | 577 | There are two parts to device operation: supplying new buffers to |
593 | the device, and processing used buffers from the device. As an | 578 | the device, and processing used buffers from the device. As an |
@@ -813,7 +798,7 @@ vring.used->ring[vq->last_seen_used%vsz]; | |||
813 | 798 | ||
814 | } | 799 | } |
815 | 800 | ||
816 | Dealing With Configuration Changes | 801 | Dealing With Configuration Changes<sub:Dealing-With-Configuration> |
817 | 802 | ||
818 | Some virtio PCI devices can change the device configuration | 803 | Some virtio PCI devices can change the device configuration |
819 | state, as reflected in the virtio header in the PCI configuration | 804 | state, as reflected in the virtio header in the PCI configuration |
@@ -1260,18 +1245,6 @@ Currently there are five device-independent feature bits defined: | |||
1260 | driver should ignore the used_event field; the device should | 1245 | driver should ignore the used_event field; the device should |
1261 | ignore the avail_event field; the flags field is used | 1246 | ignore the avail_event field; the flags field is used |
1262 | 1247 | ||
1263 | VIRTIO_F_BAD_FEATURE(30) This feature should never be | ||
1264 | negotiated by the guest; doing so is an indication that the | ||
1265 | guest is faulty[footnote: | ||
1266 | An experimental virtio PCI driver contained in Linux version | ||
1267 | 2.6.25 had this problem, and this feature bit can be used to | ||
1268 | detect it. | ||
1269 | ] | ||
1270 | |||
1271 | VIRTIO_F_FEATURES_HIGH(31) This feature indicates that the | ||
1272 | device supports feature bits 32:63. If unset, feature bits | ||
1273 | 32:63 are unset. | ||
1274 | |||
1275 | Appendix C: Network Device | 1248 | Appendix C: Network Device |
1276 | 1249 | ||
1277 | The virtio network device is a virtual ethernet card, and is the | 1250 | The virtio network device is a virtual ethernet card, and is the |
@@ -1335,11 +1308,17 @@ were required. | |||
1335 | 1308 | ||
1336 | VIRTIO_NET_F_CTRL_VLAN (19) Control channel VLAN filtering. | 1309 | VIRTIO_NET_F_CTRL_VLAN (19) Control channel VLAN filtering. |
1337 | 1310 | ||
1311 | VIRTIO_NET_F_GUEST_ANNOUNCE(21) Guest can send gratuitous | ||
1312 | packets. | ||
1313 | |||
1338 | Device configuration layout Two configuration fields are | 1314 | Device configuration layout Two configuration fields are |
1339 | currently defined. The mac address field always exists (though | 1315 | currently defined. The mac address field always exists (though |
1340 | is only valid if VIRTIO_NET_F_MAC is set), and the status field | 1316 | is only valid if VIRTIO_NET_F_MAC is set), and the status field |
1341 | only exists if VIRTIO_NET_F_STATUS is set. Only one bit is | 1317 | only exists if VIRTIO_NET_F_STATUS is set. Two read-only bits |
1342 | currently defined for the status field: VIRTIO_NET_S_LINK_UP. #define VIRTIO_NET_S_LINK_UP 1 | 1318 | are currently defined for the status field: |
1319 | VIRTIO_NET_S_LINK_UP and VIRTIO_NET_S_ANNOUNCE. #define VIRTIO_NET_S_LINK_UP 1 | ||
1320 | |||
1321 | #define VIRTIO_NET_S_ANNOUNCE 2 | ||
1343 | 1322 | ||
1344 | 1323 | ||
1345 | 1324 | ||
@@ -1377,12 +1356,19 @@ struct virtio_net_config { | |||
1377 | packets by negotating the VIRTIO_NET_F_CSUM feature. This “ | 1356 | packets by negotating the VIRTIO_NET_F_CSUM feature. This “ |
1378 | checksum offload” is a common feature on modern network cards. | 1357 | checksum offload” is a common feature on modern network cards. |
1379 | 1358 | ||
1380 | If that feature is negotiated, a driver can use TCP or UDP | 1359 | If that feature is negotiated[footnote: |
1381 | segmentation offload by negotiating the VIRTIO_NET_F_HOST_TSO4 | 1360 | ie. VIRTIO_NET_F_HOST_TSO* and VIRTIO_NET_F_HOST_UFO are |
1382 | (IPv4 TCP), VIRTIO_NET_F_HOST_TSO6 (IPv6 TCP) and | 1361 | dependent on VIRTIO_NET_F_CSUM; a dvice which offers the offload |
1383 | VIRTIO_NET_F_HOST_UFO (UDP fragmentation) features. It should | 1362 | features must offer the checksum feature, and a driver which |
1384 | not send TCP packets requiring segmentation offload which have | 1363 | accepts the offload features must accept the checksum feature. |
1385 | the Explicit Congestion Notification bit set, unless the | 1364 | Similar logic applies to the VIRTIO_NET_F_GUEST_TSO4 features |
1365 | depending on VIRTIO_NET_F_GUEST_CSUM. | ||
1366 | ], a driver can use TCP or UDP segmentation offload by | ||
1367 | negotiating the VIRTIO_NET_F_HOST_TSO4 (IPv4 TCP), | ||
1368 | VIRTIO_NET_F_HOST_TSO6 (IPv6 TCP) and VIRTIO_NET_F_HOST_UFO | ||
1369 | (UDP fragmentation) features. It should not send TCP packets | ||
1370 | requiring segmentation offload which have the Explicit | ||
1371 | Congestion Notification bit set, unless the | ||
1386 | VIRTIO_NET_F_HOST_ECN feature is negotiated.[footnote: | 1372 | VIRTIO_NET_F_HOST_ECN feature is negotiated.[footnote: |
1387 | This is a common restriction in real, older network cards. | 1373 | This is a common restriction in real, older network cards. |
1388 | ] | 1374 | ] |
@@ -1403,7 +1389,7 @@ segmentation, if both guests are amenable. | |||
1403 | 1389 | ||
1404 | Packets are transmitted by placing them in the transmitq, and | 1390 | Packets are transmitted by placing them in the transmitq, and |
1405 | buffers for incoming packets are placed in the receiveq. In each | 1391 | buffers for incoming packets are placed in the receiveq. In each |
1406 | case, the packet itself is preceded by a header: | 1392 | case, the packet itself is preceeded by a header: |
1407 | 1393 | ||
1408 | struct virtio_net_hdr { | 1394 | struct virtio_net_hdr { |
1409 | 1395 | ||
@@ -1462,9 +1448,10 @@ It will have a 14 byte ethernet header and 20 byte IP header | |||
1462 | followed by the TCP header (with the TCP checksum field 16 bytes | 1448 | followed by the TCP header (with the TCP checksum field 16 bytes |
1463 | into that header). csum_start will be 14+20 = 34 (the TCP | 1449 | into that header). csum_start will be 14+20 = 34 (the TCP |
1464 | checksum includes the header), and csum_offset will be 16. The | 1450 | checksum includes the header), and csum_offset will be 16. The |
1465 | value in the TCP checksum field will be the sum of the TCP pseudo | 1451 | value in the TCP checksum field should be initialized to the sum |
1466 | header, so that replacing it by the ones' complement checksum of | 1452 | of the TCP pseudo header, so that replacing it by the ones' |
1467 | the TCP header and body will give the correct result. | 1453 | complement checksum of the TCP header and body will give the |
1454 | correct result. | ||
1468 | ] | 1455 | ] |
1469 | 1456 | ||
1470 | <enu:If-the-driver>If the driver negotiated | 1457 | <enu:If-the-driver>If the driver negotiated |
@@ -1483,8 +1470,8 @@ Due to various bugs in implementations, this field is not useful | |||
1483 | as a guarantee of the transport header size. | 1470 | as a guarantee of the transport header size. |
1484 | ] | 1471 | ] |
1485 | 1472 | ||
1486 | gso_size is the size of the packet beyond that header (ie. | 1473 | gso_size is the maximum size of each packet beyond that header |
1487 | MSS). | 1474 | (ie. MSS). |
1488 | 1475 | ||
1489 | If the driver negotiated the VIRTIO_NET_F_HOST_ECN feature, the | 1476 | If the driver negotiated the VIRTIO_NET_F_HOST_ECN feature, the |
1490 | VIRTIO_NET_HDR_GSO_ECN bit may be set in “gso_type” as well, | 1477 | VIRTIO_NET_HDR_GSO_ECN bit may be set in “gso_type” as well, |
@@ -1567,7 +1554,9 @@ Processing packet involves: | |||
1567 | If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were | 1554 | If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were |
1568 | negotiated, then the “gso_type” may be something other than | 1555 | negotiated, then the “gso_type” may be something other than |
1569 | VIRTIO_NET_HDR_GSO_NONE, and the “gso_size” field indicates the | 1556 | VIRTIO_NET_HDR_GSO_NONE, and the “gso_size” field indicates the |
1570 | desired MSS (see [enu:If-the-driver]).Control Virtqueue | 1557 | desired MSS (see [enu:If-the-driver]). |
1558 | |||
1559 | Control Virtqueue | ||
1571 | 1560 | ||
1572 | The driver uses the control virtqueue (if VIRTIO_NET_F_VTRL_VQ is | 1561 | The driver uses the control virtqueue (if VIRTIO_NET_F_VTRL_VQ is |
1573 | negotiated) to send commands to manipulate various features of | 1562 | negotiated) to send commands to manipulate various features of |
@@ -1642,7 +1631,7 @@ struct virtio_net_ctrl_mac { | |||
1642 | 1631 | ||
1643 | The device can filter incoming packets by any number of | 1632 | The device can filter incoming packets by any number of |
1644 | destination MAC addresses.[footnote: | 1633 | destination MAC addresses.[footnote: |
1645 | Since there are no guarantees, it can use a hash filter | 1634 | Since there are no guarentees, it can use a hash filter |
1646 | orsilently switch to allmulti or promiscuous mode if it is given | 1635 | orsilently switch to allmulti or promiscuous mode if it is given |
1647 | too many addresses. | 1636 | too many addresses. |
1648 | ] This table is set using the class VIRTIO_NET_CTRL_MAC and the | 1637 | ] This table is set using the class VIRTIO_NET_CTRL_MAC and the |
@@ -1665,6 +1654,38 @@ can control a VLAN filter table in the device. | |||
1665 | Both the VIRTIO_NET_CTRL_VLAN_ADD and VIRTIO_NET_CTRL_VLAN_DEL | 1654 | Both the VIRTIO_NET_CTRL_VLAN_ADD and VIRTIO_NET_CTRL_VLAN_DEL |
1666 | command take a 16-bit VLAN id as the command-specific-data. | 1655 | command take a 16-bit VLAN id as the command-specific-data. |
1667 | 1656 | ||
1657 | Gratuitous Packet Sending | ||
1658 | |||
1659 | If the driver negotiates the VIRTIO_NET_F_GUEST_ANNOUNCE (depends | ||
1660 | on VIRTIO_NET_F_CTRL_VQ), it can ask the guest to send gratuitous | ||
1661 | packets; this is usually done after the guest has been physically | ||
1662 | migrated, and needs to announce its presence on the new network | ||
1663 | links. (As hypervisor does not have the knowledge of guest | ||
1664 | network configuration (eg. tagged vlan) it is simplest to prod | ||
1665 | the guest in this way). | ||
1666 | |||
1667 | #define VIRTIO_NET_CTRL_ANNOUNCE 3 | ||
1668 | |||
1669 | #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 | ||
1670 | |||
1671 | The Guest needs to check VIRTIO_NET_S_ANNOUNCE bit in status | ||
1672 | field when it notices the changes of device configuration. The | ||
1673 | command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that | ||
1674 | driver has recevied the notification and device would clear the | ||
1675 | VIRTIO_NET_S_ANNOUNCE bit in the status filed after it received | ||
1676 | this command. | ||
1677 | |||
1678 | Processing this notification involves: | ||
1679 | |||
1680 | Sending the gratuitous packets or marking there are pending | ||
1681 | gratuitous packets to be sent and letting deferred routine to | ||
1682 | send them. | ||
1683 | |||
1684 | Sending VIRTIO_NET_CTRL_ANNOUNCE_ACK command through control | ||
1685 | vq. | ||
1686 | |||
1687 | . | ||
1688 | |||
1668 | Appendix D: Block Device | 1689 | Appendix D: Block Device |
1669 | 1690 | ||
1670 | The virtio block device is a simple virtual block device (ie. | 1691 | The virtio block device is a simple virtual block device (ie. |
@@ -1699,8 +1720,6 @@ device except where noted. | |||
1699 | 1720 | ||
1700 | VIRTIO_BLK_F_FLUSH (9) Cache flush command support. | 1721 | VIRTIO_BLK_F_FLUSH (9) Cache flush command support. |
1701 | 1722 | ||
1702 | |||
1703 | |||
1704 | Device configuration layout The capacity of the device | 1723 | Device configuration layout The capacity of the device |
1705 | (expressed in 512-byte sectors) is always present. The | 1724 | (expressed in 512-byte sectors) is always present. The |
1706 | availability of the others all depend on various feature bits | 1725 | availability of the others all depend on various feature bits |
@@ -1743,8 +1762,6 @@ device except where noted. | |||
1743 | If the VIRTIO_BLK_F_RO feature is set by the device, any write | 1762 | If the VIRTIO_BLK_F_RO feature is set by the device, any write |
1744 | requests will fail. | 1763 | requests will fail. |
1745 | 1764 | ||
1746 | |||
1747 | |||
1748 | Device Operation | 1765 | Device Operation |
1749 | 1766 | ||
1750 | The driver queues requests to the virtqueue, and they are used by | 1767 | The driver queues requests to the virtqueue, and they are used by |
@@ -1805,7 +1822,7 @@ the FLUSH and FLUSH_OUT types are equivalent, the device does not | |||
1805 | distinguish between them | 1822 | distinguish between them |
1806 | ]). If the device has VIRTIO_BLK_F_BARRIER feature the high bit | 1823 | ]). If the device has VIRTIO_BLK_F_BARRIER feature the high bit |
1807 | (VIRTIO_BLK_T_BARRIER) indicates that this request acts as a | 1824 | (VIRTIO_BLK_T_BARRIER) indicates that this request acts as a |
1808 | barrier and that all preceding requests must be complete before | 1825 | barrier and that all preceeding requests must be complete before |
1809 | this one, and all following requests must not be started until | 1826 | this one, and all following requests must not be started until |
1810 | this is complete. Note that a barrier does not flush caches in | 1827 | this is complete. Note that a barrier does not flush caches in |
1811 | the underlying backend device in host, and thus does not serve as | 1828 | the underlying backend device in host, and thus does not serve as |
@@ -2118,7 +2135,7 @@ This is historical, and independent of the guest page size | |||
2118 | 2135 | ||
2119 | Otherwise, the guest may begin to re-use pages previously given | 2136 | Otherwise, the guest may begin to re-use pages previously given |
2120 | to the balloon before the device has acknowledged their | 2137 | to the balloon before the device has acknowledged their |
2121 | withdrawal. [footnote: | 2138 | withdrawl. [footnote: |
2122 | In this case, deflation advice is merely a courtesy | 2139 | In this case, deflation advice is merely a courtesy |
2123 | ] | 2140 | ] |
2124 | 2141 | ||
@@ -2198,3 +2215,996 @@ as follows: | |||
2198 | VIRTIO_BALLOON_S_MEMTOT The total amount of memory available | 2215 | VIRTIO_BALLOON_S_MEMTOT The total amount of memory available |
2199 | (in bytes). | 2216 | (in bytes). |
2200 | 2217 | ||
2218 | Appendix H: Rpmsg: Remote Processor Messaging | ||
2219 | |||
2220 | Virtio rpmsg devices represent remote processors on the system | ||
2221 | which run in asymmetric multi-processing (AMP) configuration, and | ||
2222 | which are usually used to offload cpu-intensive tasks from the | ||
2223 | main application processor (a typical SoC methodology). | ||
2224 | |||
2225 | Virtio is being used to communicate with those remote processors; | ||
2226 | empty buffers are placed in one virtqueue for receiving messages, | ||
2227 | and non-empty buffers, containing outbound messages, are enqueued | ||
2228 | in a second virtqueue for transmission. | ||
2229 | |||
2230 | Numerous communication channels can be multiplexed over those two | ||
2231 | virtqueues, so different entities, running on the application and | ||
2232 | remote processor, can directly communicate in a point-to-point | ||
2233 | fashion. | ||
2234 | |||
2235 | Configuration | ||
2236 | |||
2237 | Subsystem Device ID 7 | ||
2238 | |||
2239 | Virtqueues 0:receiveq. 1:transmitq. | ||
2240 | |||
2241 | Feature bits | ||
2242 | |||
2243 | VIRTIO_RPMSG_F_NS (0) Device sends (and capable of receiving) | ||
2244 | name service messages announcing the creation (or | ||
2245 | destruction) of a channel:/** | ||
2246 | |||
2247 | * struct rpmsg_ns_msg - dynamic name service announcement | ||
2248 | message | ||
2249 | |||
2250 | * @name: name of remote service that is published | ||
2251 | |||
2252 | * @addr: address of remote service that is published | ||
2253 | |||
2254 | * @flags: indicates whether service is created or destroyed | ||
2255 | |||
2256 | * | ||
2257 | |||
2258 | * This message is sent across to publish a new service (or | ||
2259 | announce | ||
2260 | |||
2261 | * about its removal). When we receives these messages, an | ||
2262 | appropriate | ||
2263 | |||
2264 | * rpmsg channel (i.e device) is created/destroyed. | ||
2265 | |||
2266 | */ | ||
2267 | |||
2268 | struct rpmsg_ns_msgoon_config { | ||
2269 | |||
2270 | char name[RPMSG_NAME_SIZE]; | ||
2271 | |||
2272 | u32 addr; | ||
2273 | |||
2274 | u32 flags; | ||
2275 | |||
2276 | } __packed; | ||
2277 | |||
2278 | |||
2279 | |||
2280 | /** | ||
2281 | |||
2282 | * enum rpmsg_ns_flags - dynamic name service announcement flags | ||
2283 | |||
2284 | * | ||
2285 | |||
2286 | * @RPMSG_NS_CREATE: a new remote service was just created | ||
2287 | |||
2288 | * @RPMSG_NS_DESTROY: a remote service was just destroyed | ||
2289 | |||
2290 | */ | ||
2291 | |||
2292 | enum rpmsg_ns_flags { | ||
2293 | |||
2294 | RPMSG_NS_CREATE = 0, | ||
2295 | |||
2296 | RPMSG_NS_DESTROY = 1, | ||
2297 | |||
2298 | }; | ||
2299 | |||
2300 | Device configuration layout | ||
2301 | |||
2302 | At his point none currently defined. | ||
2303 | |||
2304 | Device Initialization | ||
2305 | |||
2306 | The initialization routine should identify the receive and | ||
2307 | transmission virtqueues. | ||
2308 | |||
2309 | The receive virtqueue should be filled with receive buffers. | ||
2310 | |||
2311 | Device Operation | ||
2312 | |||
2313 | Messages are transmitted by placing them in the transmitq, and | ||
2314 | buffers for inbound messages are placed in the receiveq. In any | ||
2315 | case, messages are always preceded by the following header: /** | ||
2316 | |||
2317 | * struct rpmsg_hdr - common header for all rpmsg messages | ||
2318 | |||
2319 | * @src: source address | ||
2320 | |||
2321 | * @dst: destination address | ||
2322 | |||
2323 | * @reserved: reserved for future use | ||
2324 | |||
2325 | * @len: length of payload (in bytes) | ||
2326 | |||
2327 | * @flags: message flags | ||
2328 | |||
2329 | * @data: @len bytes of message payload data | ||
2330 | |||
2331 | * | ||
2332 | |||
2333 | * Every message sent(/received) on the rpmsg bus begins with | ||
2334 | this header. | ||
2335 | |||
2336 | */ | ||
2337 | |||
2338 | struct rpmsg_hdr { | ||
2339 | |||
2340 | u32 src; | ||
2341 | |||
2342 | u32 dst; | ||
2343 | |||
2344 | u32 reserved; | ||
2345 | |||
2346 | u16 len; | ||
2347 | |||
2348 | u16 flags; | ||
2349 | |||
2350 | u8 data[0]; | ||
2351 | |||
2352 | } __packed; | ||
2353 | |||
2354 | Appendix I: SCSI Host Device | ||
2355 | |||
2356 | The virtio SCSI host device groups together one or more virtual | ||
2357 | logical units (such as disks), and allows communicating to them | ||
2358 | using the SCSI protocol. An instance of the device represents a | ||
2359 | SCSI host to which many targets and LUNs are attached. | ||
2360 | |||
2361 | The virtio SCSI device services two kinds of requests: | ||
2362 | |||
2363 | command requests for a logical unit; | ||
2364 | |||
2365 | task management functions related to a logical unit, target or | ||
2366 | command. | ||
2367 | |||
2368 | The device is also able to send out notifications about added and | ||
2369 | removed logical units. Together, these capabilities provide a | ||
2370 | SCSI transport protocol that uses virtqueues as the transfer | ||
2371 | medium. In the transport protocol, the virtio driver acts as the | ||
2372 | initiator, while the virtio SCSI host provides one or more | ||
2373 | targets that receive and process the requests. | ||
2374 | |||
2375 | Configuration | ||
2376 | |||
2377 | Subsystem Device ID 8 | ||
2378 | |||
2379 | Virtqueues 0:controlq; 1:eventq; 2..n:request queues. | ||
2380 | |||
2381 | Feature bits | ||
2382 | |||
2383 | VIRTIO_SCSI_F_INOUT (0) A single request can include both | ||
2384 | read-only and write-only data buffers. | ||
2385 | |||
2386 | VIRTIO_SCSI_F_HOTPLUG (1) The host should enable | ||
2387 | hot-plug/hot-unplug of new LUNs and targets on the SCSI bus. | ||
2388 | |||
2389 | Device configuration layout All fields of this configuration | ||
2390 | are always available. sense_size and cdb_size are writable by | ||
2391 | the guest.struct virtio_scsi_config { | ||
2392 | |||
2393 | u32 num_queues; | ||
2394 | |||
2395 | u32 seg_max; | ||
2396 | |||
2397 | u32 max_sectors; | ||
2398 | |||
2399 | u32 cmd_per_lun; | ||
2400 | |||
2401 | u32 event_info_size; | ||
2402 | |||
2403 | u32 sense_size; | ||
2404 | |||
2405 | u32 cdb_size; | ||
2406 | |||
2407 | u16 max_channel; | ||
2408 | |||
2409 | u16 max_target; | ||
2410 | |||
2411 | u32 max_lun; | ||
2412 | |||
2413 | }; | ||
2414 | |||
2415 | num_queues is the total number of request virtqueues exposed by | ||
2416 | the device. The driver is free to use only one request queue, | ||
2417 | or it can use more to achieve better performance. | ||
2418 | |||
2419 | seg_max is the maximum number of segments that can be in a | ||
2420 | command. A bidirectional command can include seg_max input | ||
2421 | segments and seg_max output segments. | ||
2422 | |||
2423 | max_sectors is a hint to the guest about the maximum transfer | ||
2424 | size it should use. | ||
2425 | |||
2426 | cmd_per_lun is a hint to the guest about the maximum number of | ||
2427 | linked commands it should send to one LUN. The actual value | ||
2428 | to be used is the minimum of cmd_per_lun and the virtqueue | ||
2429 | size. | ||
2430 | |||
2431 | event_info_size is the maximum size that the device will fill | ||
2432 | for buffers that the driver places in the eventq. The driver | ||
2433 | should always put buffers at least of this size. It is | ||
2434 | written by the device depending on the set of negotated | ||
2435 | features. | ||
2436 | |||
2437 | sense_size is the maximum size of the sense data that the | ||
2438 | device will write. The default value is written by the device | ||
2439 | and will always be 96, but the driver can modify it. It is | ||
2440 | restored to the default when the device is reset. | ||
2441 | |||
2442 | cdb_size is the maximum size of the CDB that the driver will | ||
2443 | write. The default value is written by the device and will | ||
2444 | always be 32, but the driver can likewise modify it. It is | ||
2445 | restored to the default when the device is reset. | ||
2446 | |||
2447 | max_channel, max_target and max_lun can be used by the driver | ||
2448 | as hints to constrain scanning the logical units on the | ||
2449 | host.h | ||
2450 | |||
2451 | Device Initialization | ||
2452 | |||
2453 | The initialization routine should first of all discover the | ||
2454 | device's virtqueues. | ||
2455 | |||
2456 | If the driver uses the eventq, it should then place at least a | ||
2457 | buffer in the eventq. | ||
2458 | |||
2459 | The driver can immediately issue requests (for example, INQUIRY | ||
2460 | or REPORT LUNS) or task management functions (for example, I_T | ||
2461 | RESET). | ||
2462 | |||
2463 | Device Operation: request queues | ||
2464 | |||
2465 | The driver queues requests to an arbitrary request queue, and | ||
2466 | they are used by the device on that same queue. It is the | ||
2467 | responsibility of the driver to ensure strict request ordering | ||
2468 | for commands placed on different queues, because they will be | ||
2469 | consumed with no order constraints. | ||
2470 | |||
2471 | Requests have the following format: | ||
2472 | |||
2473 | struct virtio_scsi_req_cmd { | ||
2474 | |||
2475 | // Read-only | ||
2476 | |||
2477 | u8 lun[8]; | ||
2478 | |||
2479 | u64 id; | ||
2480 | |||
2481 | u8 task_attr; | ||
2482 | |||
2483 | u8 prio; | ||
2484 | |||
2485 | u8 crn; | ||
2486 | |||
2487 | char cdb[cdb_size]; | ||
2488 | |||
2489 | char dataout[]; | ||
2490 | |||
2491 | // Write-only part | ||
2492 | |||
2493 | u32 sense_len; | ||
2494 | |||
2495 | u32 residual; | ||
2496 | |||
2497 | u16 status_qualifier; | ||
2498 | |||
2499 | u8 status; | ||
2500 | |||
2501 | u8 response; | ||
2502 | |||
2503 | u8 sense[sense_size]; | ||
2504 | |||
2505 | char datain[]; | ||
2506 | |||
2507 | }; | ||
2508 | |||
2509 | |||
2510 | |||
2511 | /* command-specific response values */ | ||
2512 | |||
2513 | #define VIRTIO_SCSI_S_OK 0 | ||
2514 | |||
2515 | #define VIRTIO_SCSI_S_OVERRUN 1 | ||
2516 | |||
2517 | #define VIRTIO_SCSI_S_ABORTED 2 | ||
2518 | |||
2519 | #define VIRTIO_SCSI_S_BAD_TARGET 3 | ||
2520 | |||
2521 | #define VIRTIO_SCSI_S_RESET 4 | ||
2522 | |||
2523 | #define VIRTIO_SCSI_S_BUSY 5 | ||
2524 | |||
2525 | #define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6 | ||
2526 | |||
2527 | #define VIRTIO_SCSI_S_TARGET_FAILURE 7 | ||
2528 | |||
2529 | #define VIRTIO_SCSI_S_NEXUS_FAILURE 8 | ||
2530 | |||
2531 | #define VIRTIO_SCSI_S_FAILURE 9 | ||
2532 | |||
2533 | |||
2534 | |||
2535 | /* task_attr */ | ||
2536 | |||
2537 | #define VIRTIO_SCSI_S_SIMPLE 0 | ||
2538 | |||
2539 | #define VIRTIO_SCSI_S_ORDERED 1 | ||
2540 | |||
2541 | #define VIRTIO_SCSI_S_HEAD 2 | ||
2542 | |||
2543 | #define VIRTIO_SCSI_S_ACA 3 | ||
2544 | |||
2545 | The lun field addresses a target and logical unit in the | ||
2546 | virtio-scsi device's SCSI domain. The only supported format for | ||
2547 | the LUN field is: first byte set to 1, second byte set to target, | ||
2548 | third and fourth byte representing a single level LUN structure, | ||
2549 | followed by four zero bytes. With this representation, a | ||
2550 | virtio-scsi device can serve up to 256 targets and 16384 LUNs per | ||
2551 | target. | ||
2552 | |||
2553 | The id field is the command identifier (“tag”). | ||
2554 | |||
2555 | task_attr, prio and crn should be left to zero. task_attr defines | ||
2556 | the task attribute as in the table above, but all task attributes | ||
2557 | may be mapped to SIMPLE by the device; crn may also be provided | ||
2558 | by clients, but is generally expected to be 0. The maximum CRN | ||
2559 | value defined by the protocol is 255, since CRN is stored in an | ||
2560 | 8-bit integer. | ||
2561 | |||
2562 | All of these fields are defined in SAM. They are always | ||
2563 | read-only, as are the cdb and dataout field. The cdb_size is | ||
2564 | taken from the configuration space. | ||
2565 | |||
2566 | sense and subsequent fields are always write-only. The sense_len | ||
2567 | field indicates the number of bytes actually written to the sense | ||
2568 | buffer. The residual field indicates the residual size, | ||
2569 | calculated as “data_length - number_of_transferred_bytes”, for | ||
2570 | read or write operations. For bidirectional commands, the | ||
2571 | number_of_transferred_bytes includes both read and written bytes. | ||
2572 | A residual field that is less than the size of datain means that | ||
2573 | the dataout field was processed entirely. A residual field that | ||
2574 | exceeds the size of datain means that the dataout field was | ||
2575 | processed partially and the datain field was not processed at | ||
2576 | all. | ||
2577 | |||
2578 | The status byte is written by the device to be the status code as | ||
2579 | defined in SAM. | ||
2580 | |||
2581 | The response byte is written by the device to be one of the | ||
2582 | following: | ||
2583 | |||
2584 | VIRTIO_SCSI_S_OK when the request was completed and the status | ||
2585 | byte is filled with a SCSI status code (not necessarily | ||
2586 | "GOOD"). | ||
2587 | |||
2588 | VIRTIO_SCSI_S_OVERRUN if the content of the CDB requires | ||
2589 | transferring more data than is available in the data buffers. | ||
2590 | |||
2591 | VIRTIO_SCSI_S_ABORTED if the request was cancelled due to an | ||
2592 | ABORT TASK or ABORT TASK SET task management function. | ||
2593 | |||
2594 | VIRTIO_SCSI_S_BAD_TARGET if the request was never processed | ||
2595 | because the target indicated by the lun field does not exist. | ||
2596 | |||
2597 | VIRTIO_SCSI_S_RESET if the request was cancelled due to a bus | ||
2598 | or device reset (including a task management function). | ||
2599 | |||
2600 | VIRTIO_SCSI_S_TRANSPORT_FAILURE if the request failed due to a | ||
2601 | problem in the connection between the host and the target | ||
2602 | (severed link). | ||
2603 | |||
2604 | VIRTIO_SCSI_S_TARGET_FAILURE if the target is suffering a | ||
2605 | failure and the guest should not retry on other paths. | ||
2606 | |||
2607 | VIRTIO_SCSI_S_NEXUS_FAILURE if the nexus is suffering a failure | ||
2608 | but retrying on other paths might yield a different result. | ||
2609 | |||
2610 | VIRTIO_SCSI_S_BUSY if the request failed but retrying on the | ||
2611 | same path should work. | ||
2612 | |||
2613 | VIRTIO_SCSI_S_FAILURE for other host or guest error. In | ||
2614 | particular, if neither dataout nor datain is empty, and the | ||
2615 | VIRTIO_SCSI_F_INOUT feature has not been negotiated, the | ||
2616 | request will be immediately returned with a response equal to | ||
2617 | VIRTIO_SCSI_S_FAILURE. | ||
2618 | |||
2619 | Device Operation: controlq | ||
2620 | |||
2621 | The controlq is used for other SCSI transport operations. | ||
2622 | Requests have the following format: | ||
2623 | |||
2624 | struct virtio_scsi_ctrl { | ||
2625 | |||
2626 | u32 type; | ||
2627 | |||
2628 | ... | ||
2629 | |||
2630 | u8 response; | ||
2631 | |||
2632 | }; | ||
2633 | |||
2634 | |||
2635 | |||
2636 | /* response values valid for all commands */ | ||
2637 | |||
2638 | #define VIRTIO_SCSI_S_OK 0 | ||
2639 | |||
2640 | #define VIRTIO_SCSI_S_BAD_TARGET 3 | ||
2641 | |||
2642 | #define VIRTIO_SCSI_S_BUSY 5 | ||
2643 | |||
2644 | #define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6 | ||
2645 | |||
2646 | #define VIRTIO_SCSI_S_TARGET_FAILURE 7 | ||
2647 | |||
2648 | #define VIRTIO_SCSI_S_NEXUS_FAILURE 8 | ||
2649 | |||
2650 | #define VIRTIO_SCSI_S_FAILURE 9 | ||
2651 | |||
2652 | #define VIRTIO_SCSI_S_INCORRECT_LUN 12 | ||
2653 | |||
2654 | The type identifies the remaining fields. | ||
2655 | |||
2656 | The following commands are defined: | ||
2657 | |||
2658 | Task management function | ||
2659 | #define VIRTIO_SCSI_T_TMF 0 | ||
2660 | |||
2661 | |||
2662 | |||
2663 | #define VIRTIO_SCSI_T_TMF_ABORT_TASK 0 | ||
2664 | |||
2665 | #define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1 | ||
2666 | |||
2667 | #define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2 | ||
2668 | |||
2669 | #define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3 | ||
2670 | |||
2671 | #define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4 | ||
2672 | |||
2673 | #define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5 | ||
2674 | |||
2675 | #define VIRTIO_SCSI_T_TMF_QUERY_TASK 6 | ||
2676 | |||
2677 | #define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7 | ||
2678 | |||
2679 | |||
2680 | |||
2681 | struct virtio_scsi_ctrl_tmf | ||
2682 | |||
2683 | { | ||
2684 | |||
2685 | // Read-only part | ||
2686 | |||
2687 | u32 type; | ||
2688 | |||
2689 | u32 subtype; | ||
2690 | |||
2691 | u8 lun[8]; | ||
2692 | |||
2693 | u64 id; | ||
2694 | |||
2695 | // Write-only part | ||
2696 | |||
2697 | u8 response; | ||
2698 | |||
2699 | } | ||
2700 | |||
2701 | |||
2702 | |||
2703 | /* command-specific response values */ | ||
2704 | |||
2705 | #define VIRTIO_SCSI_S_FUNCTION_COMPLETE 0 | ||
2706 | |||
2707 | #define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10 | ||
2708 | |||
2709 | #define VIRTIO_SCSI_S_FUNCTION_REJECTED 11 | ||
2710 | |||
2711 | The type is VIRTIO_SCSI_T_TMF; the subtype field defines. All | ||
2712 | fields except response are filled by the driver. The subtype | ||
2713 | field must always be specified and identifies the requested | ||
2714 | task management function. | ||
2715 | |||
2716 | Other fields may be irrelevant for the requested TMF; if so, | ||
2717 | they are ignored but they should still be present. The lun | ||
2718 | field is in the same format specified for request queues; the | ||
2719 | single level LUN is ignored when the task management function | ||
2720 | addresses a whole I_T nexus. When relevant, the value of the id | ||
2721 | field is matched against the id values passed on the requestq. | ||
2722 | |||
2723 | The outcome of the task management function is written by the | ||
2724 | device in the response field. The command-specific response | ||
2725 | values map 1-to-1 with those defined in SAM. | ||
2726 | |||
2727 | Asynchronous notification query | ||
2728 | #define VIRTIO_SCSI_T_AN_QUERY 1 | ||
2729 | |||
2730 | |||
2731 | |||
2732 | struct virtio_scsi_ctrl_an { | ||
2733 | |||
2734 | // Read-only part | ||
2735 | |||
2736 | u32 type; | ||
2737 | |||
2738 | u8 lun[8]; | ||
2739 | |||
2740 | u32 event_requested; | ||
2741 | |||
2742 | // Write-only part | ||
2743 | |||
2744 | u32 event_actual; | ||
2745 | |||
2746 | u8 response; | ||
2747 | |||
2748 | } | ||
2749 | |||
2750 | |||
2751 | |||
2752 | #define VIRTIO_SCSI_EVT_ASYNC_OPERATIONAL_CHANGE 2 | ||
2753 | |||
2754 | #define VIRTIO_SCSI_EVT_ASYNC_POWER_MGMT 4 | ||
2755 | |||
2756 | #define VIRTIO_SCSI_EVT_ASYNC_EXTERNAL_REQUEST 8 | ||
2757 | |||
2758 | #define VIRTIO_SCSI_EVT_ASYNC_MEDIA_CHANGE 16 | ||
2759 | |||
2760 | #define VIRTIO_SCSI_EVT_ASYNC_MULTI_HOST 32 | ||
2761 | |||
2762 | #define VIRTIO_SCSI_EVT_ASYNC_DEVICE_BUSY 64 | ||
2763 | |||
2764 | By sending this command, the driver asks the device which | ||
2765 | events the given LUN can report, as described in paragraphs 6.6 | ||
2766 | and A.6 of the SCSI MMC specification. The driver writes the | ||
2767 | events it is interested in into the event_requested; the device | ||
2768 | responds by writing the events that it supports into | ||
2769 | event_actual. | ||
2770 | |||
2771 | The type is VIRTIO_SCSI_T_AN_QUERY. The lun and event_requested | ||
2772 | fields are written by the driver. The event_actual and response | ||
2773 | fields are written by the device. | ||
2774 | |||
2775 | No command-specific values are defined for the response byte. | ||
2776 | |||
2777 | Asynchronous notification subscription | ||
2778 | #define VIRTIO_SCSI_T_AN_SUBSCRIBE 2 | ||
2779 | |||
2780 | |||
2781 | |||
2782 | struct virtio_scsi_ctrl_an { | ||
2783 | |||
2784 | // Read-only part | ||
2785 | |||
2786 | u32 type; | ||
2787 | |||
2788 | u8 lun[8]; | ||
2789 | |||
2790 | u32 event_requested; | ||
2791 | |||
2792 | // Write-only part | ||
2793 | |||
2794 | u32 event_actual; | ||
2795 | |||
2796 | u8 response; | ||
2797 | |||
2798 | } | ||
2799 | |||
2800 | By sending this command, the driver asks the specified LUN to | ||
2801 | report events for its physical interface, again as described in | ||
2802 | the SCSI MMC specification. The driver writes the events it is | ||
2803 | interested in into the event_requested; the device responds by | ||
2804 | writing the events that it supports into event_actual. | ||
2805 | |||
2806 | Event types are the same as for the asynchronous notification | ||
2807 | query message. | ||
2808 | |||
2809 | The type is VIRTIO_SCSI_T_AN_SUBSCRIBE. The lun and | ||
2810 | event_requested fields are written by the driver. The | ||
2811 | event_actual and response fields are written by the device. | ||
2812 | |||
2813 | No command-specific values are defined for the response byte. | ||
2814 | |||
2815 | Device Operation: eventq | ||
2816 | |||
2817 | The eventq is used by the device to report information on logical | ||
2818 | units that are attached to it. The driver should always leave a | ||
2819 | few buffers ready in the eventq. In general, the device will not | ||
2820 | queue events to cope with an empty eventq, and will end up | ||
2821 | dropping events if it finds no buffer ready. However, when | ||
2822 | reporting events for many LUNs (e.g. when a whole target | ||
2823 | disappears), the device can throttle events to avoid dropping | ||
2824 | them. For this reason, placing 10-15 buffers on the event queue | ||
2825 | should be enough. | ||
2826 | |||
2827 | Buffers are placed in the eventq and filled by the device when | ||
2828 | interesting events occur. The buffers should be strictly | ||
2829 | write-only (device-filled) and the size of the buffers should be | ||
2830 | at least the value given in the device's configuration | ||
2831 | information. | ||
2832 | |||
2833 | Buffers returned by the device on the eventq will be referred to | ||
2834 | as "events" in the rest of this section. Events have the | ||
2835 | following format: | ||
2836 | |||
2837 | #define VIRTIO_SCSI_T_EVENTS_MISSED 0x80000000 | ||
2838 | |||
2839 | |||
2840 | |||
2841 | struct virtio_scsi_event { | ||
2842 | |||
2843 | // Write-only part | ||
2844 | |||
2845 | u32 event; | ||
2846 | |||
2847 | ... | ||
2848 | |||
2849 | } | ||
2850 | |||
2851 | If bit 31 is set in the event field, the device failed to report | ||
2852 | an event due to missing buffers. In this case, the driver should | ||
2853 | poll the logical units for unit attention conditions, and/or do | ||
2854 | whatever form of bus scan is appropriate for the guest operating | ||
2855 | system. | ||
2856 | |||
2857 | Other data that the device writes to the buffer depends on the | ||
2858 | contents of the event field. The following events are defined: | ||
2859 | |||
2860 | No event | ||
2861 | #define VIRTIO_SCSI_T_NO_EVENT 0 | ||
2862 | |||
2863 | This event is fired in the following cases: | ||
2864 | |||
2865 | When the device detects in the eventq a buffer that is shorter | ||
2866 | than what is indicated in the configuration field, it might | ||
2867 | use it immediately and put this dummy value in the event | ||
2868 | field. A well-written driver will never observe this | ||
2869 | situation. | ||
2870 | |||
2871 | When events are dropped, the device may signal this event as | ||
2872 | soon as the drivers makes a buffer available, in order to | ||
2873 | request action from the driver. In this case, of course, this | ||
2874 | event will be reported with the VIRTIO_SCSI_T_EVENTS_MISSED | ||
2875 | flag. | ||
2876 | |||
2877 | Transport reset | ||
2878 | #define VIRTIO_SCSI_T_TRANSPORT_RESET 1 | ||
2879 | |||
2880 | |||
2881 | |||
2882 | struct virtio_scsi_event_reset { | ||
2883 | |||
2884 | // Write-only part | ||
2885 | |||
2886 | u32 event; | ||
2887 | |||
2888 | u8 lun[8]; | ||
2889 | |||
2890 | u32 reason; | ||
2891 | |||
2892 | } | ||
2893 | |||
2894 | |||
2895 | |||
2896 | #define VIRTIO_SCSI_EVT_RESET_HARD 0 | ||
2897 | |||
2898 | #define VIRTIO_SCSI_EVT_RESET_RESCAN 1 | ||
2899 | |||
2900 | #define VIRTIO_SCSI_EVT_RESET_REMOVED 2 | ||
2901 | |||
2902 | By sending this event, the device signals that a logical unit | ||
2903 | on a target has been reset, including the case of a new device | ||
2904 | appearing or disappearing on the bus.The device fills in all | ||
2905 | fields. The event field is set to | ||
2906 | VIRTIO_SCSI_T_TRANSPORT_RESET. The lun field addresses a | ||
2907 | logical unit in the SCSI host. | ||
2908 | |||
2909 | The reason value is one of the three #define values appearing | ||
2910 | above: | ||
2911 | |||
2912 | VIRTIO_SCSI_EVT_RESET_REMOVED (“LUN/target removed”) is used if | ||
2913 | the target or logical unit is no longer able to receive | ||
2914 | commands. | ||
2915 | |||
2916 | VIRTIO_SCSI_EVT_RESET_HARD (“LUN hard reset”) is used if the | ||
2917 | logical unit has been reset, but is still present. | ||
2918 | |||
2919 | VIRTIO_SCSI_EVT_RESET_RESCAN (“rescan LUN/target”) is used if a | ||
2920 | target or logical unit has just appeared on the device. | ||
2921 | |||
2922 | The “removed” and “rescan” events, when sent for LUN 0, may | ||
2923 | apply to the entire target. After receiving them the driver | ||
2924 | should ask the initiator to rescan the target, in order to | ||
2925 | detect the case when an entire target has appeared or | ||
2926 | disappeared. These two events will never be reported unless the | ||
2927 | VIRTIO_SCSI_F_HOTPLUG feature was negotiated between the host | ||
2928 | and the guest. | ||
2929 | |||
2930 | Events will also be reported via sense codes (this obviously | ||
2931 | does not apply to newly appeared buses or targets, since the | ||
2932 | application has never discovered them): | ||
2933 | |||
2934 | “LUN/target removed” maps to sense key ILLEGAL REQUEST, asc | ||
2935 | 0x25, ascq 0x00 (LOGICAL UNIT NOT SUPPORTED) | ||
2936 | |||
2937 | “LUN hard reset” maps to sense key UNIT ATTENTION, asc 0x29 | ||
2938 | (POWER ON, RESET OR BUS DEVICE RESET OCCURRED) | ||
2939 | |||
2940 | “rescan LUN/target” maps to sense key UNIT ATTENTION, asc 0x3f, | ||
2941 | ascq 0x0e (REPORTED LUNS DATA HAS CHANGED) | ||
2942 | |||
2943 | The preferred way to detect transport reset is always to use | ||
2944 | events, because sense codes are only seen by the driver when it | ||
2945 | sends a SCSI command to the logical unit or target. However, in | ||
2946 | case events are dropped, the initiator will still be able to | ||
2947 | synchronize with the actual state of the controller if the | ||
2948 | driver asks the initiator to rescan of the SCSI bus. During the | ||
2949 | rescan, the initiator will be able to observe the above sense | ||
2950 | codes, and it will process them as if it the driver had | ||
2951 | received the equivalent event. | ||
2952 | |||
2953 | Asynchronous notification | ||
2954 | #define VIRTIO_SCSI_T_ASYNC_NOTIFY 2 | ||
2955 | |||
2956 | |||
2957 | |||
2958 | struct virtio_scsi_event_an { | ||
2959 | |||
2960 | // Write-only part | ||
2961 | |||
2962 | u32 event; | ||
2963 | |||
2964 | u8 lun[8]; | ||
2965 | |||
2966 | u32 reason; | ||
2967 | |||
2968 | } | ||
2969 | |||
2970 | By sending this event, the device signals that an asynchronous | ||
2971 | event was fired from a physical interface. | ||
2972 | |||
2973 | All fields are written by the device. The event field is set to | ||
2974 | VIRTIO_SCSI_T_ASYNC_NOTIFY. The lun field addresses a logical | ||
2975 | unit in the SCSI host. The reason field is a subset of the | ||
2976 | events that the driver has subscribed to via the "Asynchronous | ||
2977 | notification subscription" command. | ||
2978 | |||
2979 | When dropped events are reported, the driver should poll for | ||
2980 | asynchronous events manually using SCSI commands. | ||
2981 | |||
2982 | Appendix X: virtio-mmio | ||
2983 | |||
2984 | Virtual environments without PCI support (a common situation in | ||
2985 | embedded devices models) might use simple memory mapped device (“ | ||
2986 | virtio-mmio”) instead of the PCI device. | ||
2987 | |||
2988 | The memory mapped virtio device behaviour is based on the PCI | ||
2989 | device specification. Therefore most of operations like device | ||
2990 | initialization, queues configuration and buffer transfers are | ||
2991 | nearly identical. Existing differences are described in the | ||
2992 | following sections. | ||
2993 | |||
2994 | Device Initialization | ||
2995 | |||
2996 | Instead of using the PCI IO space for virtio header, the “ | ||
2997 | virtio-mmio” device provides a set of memory mapped control | ||
2998 | registers, all 32 bits wide, followed by device-specific | ||
2999 | configuration space. The following list presents their layout: | ||
3000 | |||
3001 | Offset from the device base address | Direction | Name | ||
3002 | Description | ||
3003 | |||
3004 | 0x000 | R | MagicValue | ||
3005 | “virt” string. | ||
3006 | |||
3007 | 0x004 | R | Version | ||
3008 | Device version number. Currently must be 1. | ||
3009 | |||
3010 | 0x008 | R | DeviceID | ||
3011 | Virtio Subsystem Device ID (ie. 1 for network card). | ||
3012 | |||
3013 | 0x00c | R | VendorID | ||
3014 | Virtio Subsystem Vendor ID. | ||
3015 | |||
3016 | 0x010 | R | HostFeatures | ||
3017 | Flags representing features the device supports. | ||
3018 | Reading from this register returns 32 consecutive flag bits, | ||
3019 | first bit depending on the last value written to | ||
3020 | HostFeaturesSel register. Access to this register returns bits HostFeaturesSel*32 | ||
3021 | |||
3022 | to (HostFeaturesSel*32)+31 | ||
3023 | , eg. feature bits 0 to 31 if | ||
3024 | HostFeaturesSel is set to 0 and features bits 32 to 63 if | ||
3025 | HostFeaturesSel is set to 1. Also see [sub:Feature-Bits] | ||
3026 | |||
3027 | 0x014 | W | HostFeaturesSel | ||
3028 | Device (Host) features word selection. | ||
3029 | Writing to this register selects a set of 32 device feature bits | ||
3030 | accessible by reading from HostFeatures register. Device driver | ||
3031 | must write a value to the HostFeaturesSel register before | ||
3032 | reading from the HostFeatures register. | ||
3033 | |||
3034 | 0x020 | W | GuestFeatures | ||
3035 | Flags representing device features understood and activated by | ||
3036 | the driver. | ||
3037 | Writing to this register sets 32 consecutive flag bits, first | ||
3038 | bit depending on the last value written to GuestFeaturesSel | ||
3039 | register. Access to this register sets bits GuestFeaturesSel*32 | ||
3040 | |||
3041 | to (GuestFeaturesSel*32)+31 | ||
3042 | , eg. feature bits 0 to 31 if | ||
3043 | GuestFeaturesSel is set to 0 and features bits 32 to 63 if | ||
3044 | GuestFeaturesSel is set to 1. Also see [sub:Feature-Bits] | ||
3045 | |||
3046 | 0x024 | W | GuestFeaturesSel | ||
3047 | Activated (Guest) features word selection. | ||
3048 | Writing to this register selects a set of 32 activated feature | ||
3049 | bits accessible by writing to the GuestFeatures register. | ||
3050 | Device driver must write a value to the GuestFeaturesSel | ||
3051 | register before writing to the GuestFeatures register. | ||
3052 | |||
3053 | 0x028 | W | GuestPageSize | ||
3054 | Guest page size. | ||
3055 | Device driver must write the guest page size in bytes to the | ||
3056 | register during initialization, before any queues are used. | ||
3057 | This value must be a power of 2 and is used by the Host to | ||
3058 | calculate Guest address of the first queue page (see QueuePFN). | ||
3059 | |||
3060 | 0x030 | W | QueueSel | ||
3061 | Virtual queue index (first queue is 0). | ||
3062 | Writing to this register selects the virtual queue that the | ||
3063 | following operations on QueueNum, QueueAlign and QueuePFN apply | ||
3064 | to. | ||
3065 | |||
3066 | 0x034 | R | QueueNumMax | ||
3067 | Maximum virtual queue size. | ||
3068 | Reading from the register returns the maximum size of the queue | ||
3069 | the Host is ready to process or zero (0x0) if the queue is not | ||
3070 | available. This applies to the queue selected by writing to | ||
3071 | QueueSel and is allowed only when QueuePFN is set to zero | ||
3072 | (0x0), so when the queue is not actively used. | ||
3073 | |||
3074 | 0x038 | W | QueueNum | ||
3075 | Virtual queue size. | ||
3076 | Queue size is a number of elements in the queue, therefore size | ||
3077 | of the descriptor table and both available and used rings. | ||
3078 | Writing to this register notifies the Host what size of the | ||
3079 | queue the Guest will use. This applies to the queue selected by | ||
3080 | writing to QueueSel. | ||
3081 | |||
3082 | 0x03c | W | QueueAlign | ||
3083 | Used Ring alignment in the virtual queue. | ||
3084 | Writing to this register notifies the Host about alignment | ||
3085 | boundary of the Used Ring in bytes. This value must be a power | ||
3086 | of 2 and applies to the queue selected by writing to QueueSel. | ||
3087 | |||
3088 | 0x040 | RW | QueuePFN | ||
3089 | Guest physical page number of the virtual queue. | ||
3090 | Writing to this register notifies the host about location of the | ||
3091 | virtual queue in the Guest's physical address space. This value | ||
3092 | is the index number of a page starting with the queue | ||
3093 | Descriptor Table. Value zero (0x0) means physical address zero | ||
3094 | (0x00000000) and is illegal. When the Guest stops using the | ||
3095 | queue it must write zero (0x0) to this register. | ||
3096 | Reading from this register returns the currently used page | ||
3097 | number of the queue, therefore a value other than zero (0x0) | ||
3098 | means that the queue is in use. | ||
3099 | Both read and write accesses apply to the queue selected by | ||
3100 | writing to QueueSel. | ||
3101 | |||
3102 | 0x050 | W | QueueNotify | ||
3103 | Queue notifier. | ||
3104 | Writing a queue index to this register notifies the Host that | ||
3105 | there are new buffers to process in the queue. | ||
3106 | |||
3107 | 0x60 | R | InterruptStatus | ||
3108 | Interrupt status. | ||
3109 | Reading from this register returns a bit mask of interrupts | ||
3110 | asserted by the device. An interrupt is asserted if the | ||
3111 | corresponding bit is set, ie. equals one (1). | ||
3112 | |||
3113 | Bit 0 | Used Ring Update | ||
3114 | This interrupt is asserted when the Host has updated the Used | ||
3115 | Ring in at least one of the active virtual queues. | ||
3116 | |||
3117 | Bit 1 | Configuration change | ||
3118 | This interrupt is asserted when configuration of the device has | ||
3119 | changed. | ||
3120 | |||
3121 | 0x064 | W | InterruptACK | ||
3122 | Interrupt acknowledge. | ||
3123 | Writing to this register notifies the Host that the Guest | ||
3124 | finished handling interrupts. Set bits in the value clear the | ||
3125 | corresponding bits of the InterruptStatus register. | ||
3126 | |||
3127 | 0x070 | RW | Status | ||
3128 | Device status. | ||
3129 | Reading from this register returns the current device status | ||
3130 | flags. | ||
3131 | Writing non-zero values to this register sets the status flags, | ||
3132 | indicating the Guest progress. Writing zero (0x0) to this | ||
3133 | register triggers a device reset. | ||
3134 | Also see [sub:Device-Initialization-Sequence] | ||
3135 | |||
3136 | 0x100+ | RW | Config | ||
3137 | Device-specific configuration space starts at an offset 0x100 | ||
3138 | and is accessed with byte alignment. Its meaning and size | ||
3139 | depends on the device and the driver. | ||
3140 | |||
3141 | Virtual queue size is a number of elements in the queue, | ||
3142 | therefore size of the descriptor table and both available and | ||
3143 | used rings. | ||
3144 | |||
3145 | The endianness of the registers follows the native endianness of | ||
3146 | the Guest. Writing to registers described as “R” and reading from | ||
3147 | registers described as “W” is not permitted and can cause | ||
3148 | undefined behavior. | ||
3149 | |||
3150 | The device initialization is performed as described in [sub:Device-Initialization-Sequence] | ||
3151 | with one exception: the Guest must notify the Host about its | ||
3152 | page size, writing the size in bytes to GuestPageSize register | ||
3153 | before the initialization is finished. | ||
3154 | |||
3155 | The memory mapped virtio devices generate single interrupt only, | ||
3156 | therefore no special configuration is required. | ||
3157 | |||
3158 | Virtqueue Configuration | ||
3159 | |||
3160 | The virtual queue configuration is performed in a similar way to | ||
3161 | the one described in [sec:Virtqueue-Configuration] with a few | ||
3162 | additional operations: | ||
3163 | |||
3164 | Select the queue writing its index (first queue is 0) to the | ||
3165 | QueueSel register. | ||
3166 | |||
3167 | Check if the queue is not already in use: read QueuePFN | ||
3168 | register, returned value should be zero (0x0). | ||
3169 | |||
3170 | Read maximum queue size (number of elements) from the | ||
3171 | QueueNumMax register. If the returned value is zero (0x0) the | ||
3172 | queue is not available. | ||
3173 | |||
3174 | Allocate and zero the queue pages in contiguous virtual memory, | ||
3175 | aligning the Used Ring to an optimal boundary (usually page | ||
3176 | size). Size of the allocated queue may be smaller than or equal | ||
3177 | to the maximum size returned by the Host. | ||
3178 | |||
3179 | Notify the Host about the queue size by writing the size to | ||
3180 | QueueNum register. | ||
3181 | |||
3182 | Notify the Host about the used alignment by writing its value | ||
3183 | in bytes to QueueAlign register. | ||
3184 | |||
3185 | Write the physical number of the first page of the queue to the | ||
3186 | QueuePFN register. | ||
3187 | |||
3188 | The queue and the device are ready to begin normal operations | ||
3189 | now. | ||
3190 | |||
3191 | Device Operation | ||
3192 | |||
3193 | The memory mapped virtio device behaves in the same way as | ||
3194 | described in [sec:Device-Operation], with the following | ||
3195 | exceptions: | ||
3196 | |||
3197 | The device is notified about new buffers available in a queue | ||
3198 | by writing the queue index to register QueueNum instead of the | ||
3199 | virtio header in PCI I/O space ([sub:Notifying-The-Device]). | ||
3200 | |||
3201 | The memory mapped virtio device is using single, dedicated | ||
3202 | interrupt signal, which is raised when at least one of the | ||
3203 | interrupts described in the InterruptStatus register | ||
3204 | description is asserted. After receiving an interrupt, the | ||
3205 | driver must read the InterruptStatus register to check what | ||
3206 | caused the interrupt (see the register description). After the | ||
3207 | interrupt is handled, the driver must acknowledge it by writing | ||
3208 | a bit mask corresponding to the serviced interrupt to the | ||
3209 | InterruptACK register. | ||
3210 | |||
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 0d39f2f4294a..693187df7601 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -29,9 +29,6 @@ struct virtio_blk | |||
29 | /* The disk structure for the kernel. */ | 29 | /* The disk structure for the kernel. */ |
30 | struct gendisk *disk; | 30 | struct gendisk *disk; |
31 | 31 | ||
32 | /* Request tracking. */ | ||
33 | struct list_head reqs; | ||
34 | |||
35 | mempool_t *pool; | 32 | mempool_t *pool; |
36 | 33 | ||
37 | /* Process context for config space updates */ | 34 | /* Process context for config space updates */ |
@@ -55,7 +52,6 @@ struct virtio_blk | |||
55 | 52 | ||
56 | struct virtblk_req | 53 | struct virtblk_req |
57 | { | 54 | { |
58 | struct list_head list; | ||
59 | struct request *req; | 55 | struct request *req; |
60 | struct virtio_blk_outhdr out_hdr; | 56 | struct virtio_blk_outhdr out_hdr; |
61 | struct virtio_scsi_inhdr in_hdr; | 57 | struct virtio_scsi_inhdr in_hdr; |
@@ -99,7 +95,6 @@ static void blk_done(struct virtqueue *vq) | |||
99 | } | 95 | } |
100 | 96 | ||
101 | __blk_end_request_all(vbr->req, error); | 97 | __blk_end_request_all(vbr->req, error); |
102 | list_del(&vbr->list); | ||
103 | mempool_free(vbr, vblk->pool); | 98 | mempool_free(vbr, vblk->pool); |
104 | } | 99 | } |
105 | /* In case queue is stopped waiting for more buffers. */ | 100 | /* In case queue is stopped waiting for more buffers. */ |
@@ -184,7 +179,6 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | |||
184 | return false; | 179 | return false; |
185 | } | 180 | } |
186 | 181 | ||
187 | list_add_tail(&vbr->list, &vblk->reqs); | ||
188 | return true; | 182 | return true; |
189 | } | 183 | } |
190 | 184 | ||
@@ -437,7 +431,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) | |||
437 | goto out_free_index; | 431 | goto out_free_index; |
438 | } | 432 | } |
439 | 433 | ||
440 | INIT_LIST_HEAD(&vblk->reqs); | ||
441 | spin_lock_init(&vblk->lock); | 434 | spin_lock_init(&vblk->lock); |
442 | vblk->vdev = vdev; | 435 | vblk->vdev = vdev; |
443 | vblk->sg_elems = sg_elems; | 436 | vblk->sg_elems = sg_elems; |
@@ -583,21 +576,29 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) | |||
583 | { | 576 | { |
584 | struct virtio_blk *vblk = vdev->priv; | 577 | struct virtio_blk *vblk = vdev->priv; |
585 | int index = vblk->index; | 578 | int index = vblk->index; |
579 | struct virtblk_req *vbr; | ||
580 | unsigned long flags; | ||
586 | 581 | ||
587 | /* Prevent config work handler from accessing the device. */ | 582 | /* Prevent config work handler from accessing the device. */ |
588 | mutex_lock(&vblk->config_lock); | 583 | mutex_lock(&vblk->config_lock); |
589 | vblk->config_enable = false; | 584 | vblk->config_enable = false; |
590 | mutex_unlock(&vblk->config_lock); | 585 | mutex_unlock(&vblk->config_lock); |
591 | 586 | ||
592 | /* Nothing should be pending. */ | ||
593 | BUG_ON(!list_empty(&vblk->reqs)); | ||
594 | |||
595 | /* Stop all the virtqueues. */ | 587 | /* Stop all the virtqueues. */ |
596 | vdev->config->reset(vdev); | 588 | vdev->config->reset(vdev); |
597 | 589 | ||
598 | flush_work(&vblk->config_work); | 590 | flush_work(&vblk->config_work); |
599 | 591 | ||
600 | del_gendisk(vblk->disk); | 592 | del_gendisk(vblk->disk); |
593 | |||
594 | /* Abort requests dispatched to driver. */ | ||
595 | spin_lock_irqsave(&vblk->lock, flags); | ||
596 | while ((vbr = virtqueue_detach_unused_buf(vblk->vq))) { | ||
597 | __blk_end_request_all(vbr->req, -EIO); | ||
598 | mempool_free(vbr, vblk->pool); | ||
599 | } | ||
600 | spin_unlock_irqrestore(&vblk->lock, flags); | ||
601 | |||
601 | blk_cleanup_queue(vblk->disk->queue); | 602 | blk_cleanup_queue(vblk->disk->queue); |
602 | put_disk(vblk->disk); | 603 | put_disk(vblk->disk); |
603 | mempool_destroy(vblk->pool); | 604 | mempool_destroy(vblk->pool); |
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 1a61939b85fc..f38b17a86c35 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig | |||
@@ -46,4 +46,15 @@ config VIRTIO_BALLOON | |||
46 | 46 | ||
47 | If unsure, say N. | 47 | If unsure, say N. |
48 | 48 | ||
49 | config VIRTIO_MMIO_CMDLINE_DEVICES | ||
50 | bool "Memory mapped virtio devices parameter parsing" | ||
51 | depends on VIRTIO_MMIO | ||
52 | ---help--- | ||
53 | Allow virtio-mmio devices instantiation via the kernel command line | ||
54 | or module parameters. Be aware that using incorrect parameters (base | ||
55 | address in particular) can crash your system - you have been warned. | ||
56 | See Documentation/kernel-parameters.txt for details. | ||
57 | |||
58 | If unsure, say 'N'. | ||
59 | |||
49 | endmenu | 60 | endmenu |
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 984c501c258f..f3558070e375 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c | |||
@@ -2,9 +2,10 @@ | |||
2 | #include <linux/spinlock.h> | 2 | #include <linux/spinlock.h> |
3 | #include <linux/virtio_config.h> | 3 | #include <linux/virtio_config.h> |
4 | #include <linux/module.h> | 4 | #include <linux/module.h> |
5 | #include <linux/idr.h> | ||
5 | 6 | ||
6 | /* Unique numbering for virtio devices. */ | 7 | /* Unique numbering for virtio devices. */ |
7 | static unsigned int dev_index; | 8 | static DEFINE_IDA(virtio_index_ida); |
8 | 9 | ||
9 | static ssize_t device_show(struct device *_d, | 10 | static ssize_t device_show(struct device *_d, |
10 | struct device_attribute *attr, char *buf) | 11 | struct device_attribute *attr, char *buf) |
@@ -193,7 +194,11 @@ int register_virtio_device(struct virtio_device *dev) | |||
193 | dev->dev.bus = &virtio_bus; | 194 | dev->dev.bus = &virtio_bus; |
194 | 195 | ||
195 | /* Assign a unique device index and hence name. */ | 196 | /* Assign a unique device index and hence name. */ |
196 | dev->index = dev_index++; | 197 | err = ida_simple_get(&virtio_index_ida, 0, 0, GFP_KERNEL); |
198 | if (err < 0) | ||
199 | goto out; | ||
200 | |||
201 | dev->index = err; | ||
197 | dev_set_name(&dev->dev, "virtio%u", dev->index); | 202 | dev_set_name(&dev->dev, "virtio%u", dev->index); |
198 | 203 | ||
199 | /* We always start by resetting the device, in case a previous | 204 | /* We always start by resetting the device, in case a previous |
@@ -208,6 +213,7 @@ int register_virtio_device(struct virtio_device *dev) | |||
208 | /* device_register() causes the bus infrastructure to look for a | 213 | /* device_register() causes the bus infrastructure to look for a |
209 | * matching driver. */ | 214 | * matching driver. */ |
210 | err = device_register(&dev->dev); | 215 | err = device_register(&dev->dev); |
216 | out: | ||
211 | if (err) | 217 | if (err) |
212 | add_status(dev, VIRTIO_CONFIG_S_FAILED); | 218 | add_status(dev, VIRTIO_CONFIG_S_FAILED); |
213 | return err; | 219 | return err; |
@@ -217,6 +223,7 @@ EXPORT_SYMBOL_GPL(register_virtio_device); | |||
217 | void unregister_virtio_device(struct virtio_device *dev) | 223 | void unregister_virtio_device(struct virtio_device *dev) |
218 | { | 224 | { |
219 | device_unregister(&dev->dev); | 225 | device_unregister(&dev->dev); |
226 | ida_simple_remove(&virtio_index_ida, dev->index); | ||
220 | } | 227 | } |
221 | EXPORT_SYMBOL_GPL(unregister_virtio_device); | 228 | EXPORT_SYMBOL_GPL(unregister_virtio_device); |
222 | 229 | ||
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 8807fe501d20..bfbc15ca38dd 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c | |||
@@ -381,21 +381,25 @@ out: | |||
381 | return err; | 381 | return err; |
382 | } | 382 | } |
383 | 383 | ||
384 | static void __devexit virtballoon_remove(struct virtio_device *vdev) | 384 | static void remove_common(struct virtio_balloon *vb) |
385 | { | 385 | { |
386 | struct virtio_balloon *vb = vdev->priv; | ||
387 | |||
388 | kthread_stop(vb->thread); | ||
389 | |||
390 | /* There might be pages left in the balloon: free them. */ | 386 | /* There might be pages left in the balloon: free them. */ |
391 | while (vb->num_pages) | 387 | while (vb->num_pages) |
392 | leak_balloon(vb, vb->num_pages); | 388 | leak_balloon(vb, vb->num_pages); |
393 | update_balloon_size(vb); | 389 | update_balloon_size(vb); |
394 | 390 | ||
395 | /* Now we reset the device so we can clean up the queues. */ | 391 | /* Now we reset the device so we can clean up the queues. */ |
396 | vdev->config->reset(vdev); | 392 | vb->vdev->config->reset(vb->vdev); |
397 | 393 | ||
398 | vdev->config->del_vqs(vdev); | 394 | vb->vdev->config->del_vqs(vb->vdev); |
395 | } | ||
396 | |||
397 | static void __devexit virtballoon_remove(struct virtio_device *vdev) | ||
398 | { | ||
399 | struct virtio_balloon *vb = vdev->priv; | ||
400 | |||
401 | kthread_stop(vb->thread); | ||
402 | remove_common(vb); | ||
399 | kfree(vb); | 403 | kfree(vb); |
400 | } | 404 | } |
401 | 405 | ||
@@ -409,17 +413,11 @@ static int virtballoon_freeze(struct virtio_device *vdev) | |||
409 | * function is called. | 413 | * function is called. |
410 | */ | 414 | */ |
411 | 415 | ||
412 | while (vb->num_pages) | 416 | remove_common(vb); |
413 | leak_balloon(vb, vb->num_pages); | ||
414 | update_balloon_size(vb); | ||
415 | |||
416 | /* Ensure we don't get any more requests from the host */ | ||
417 | vdev->config->reset(vdev); | ||
418 | vdev->config->del_vqs(vdev); | ||
419 | return 0; | 417 | return 0; |
420 | } | 418 | } |
421 | 419 | ||
422 | static int restore_common(struct virtio_device *vdev) | 420 | static int virtballoon_restore(struct virtio_device *vdev) |
423 | { | 421 | { |
424 | struct virtio_balloon *vb = vdev->priv; | 422 | struct virtio_balloon *vb = vdev->priv; |
425 | int ret; | 423 | int ret; |
@@ -432,11 +430,6 @@ static int restore_common(struct virtio_device *vdev) | |||
432 | update_balloon_size(vb); | 430 | update_balloon_size(vb); |
433 | return 0; | 431 | return 0; |
434 | } | 432 | } |
435 | |||
436 | static int virtballoon_restore(struct virtio_device *vdev) | ||
437 | { | ||
438 | return restore_common(vdev); | ||
439 | } | ||
440 | #endif | 433 | #endif |
441 | 434 | ||
442 | static unsigned int features[] = { | 435 | static unsigned int features[] = { |
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 01d6dc250d5c..453db0c403d8 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c | |||
@@ -6,6 +6,50 @@ | |||
6 | * This module allows virtio devices to be used over a virtual, memory mapped | 6 | * This module allows virtio devices to be used over a virtual, memory mapped |
7 | * platform device. | 7 | * platform device. |
8 | * | 8 | * |
9 | * The guest device(s) may be instantiated in one of three equivalent ways: | ||
10 | * | ||
11 | * 1. Static platform device in board's code, eg.: | ||
12 | * | ||
13 | * static struct platform_device v2m_virtio_device = { | ||
14 | * .name = "virtio-mmio", | ||
15 | * .id = -1, | ||
16 | * .num_resources = 2, | ||
17 | * .resource = (struct resource []) { | ||
18 | * { | ||
19 | * .start = 0x1001e000, | ||
20 | * .end = 0x1001e0ff, | ||
21 | * .flags = IORESOURCE_MEM, | ||
22 | * }, { | ||
23 | * .start = 42 + 32, | ||
24 | * .end = 42 + 32, | ||
25 | * .flags = IORESOURCE_IRQ, | ||
26 | * }, | ||
27 | * } | ||
28 | * }; | ||
29 | * | ||
30 | * 2. Device Tree node, eg.: | ||
31 | * | ||
32 | * virtio_block@1e000 { | ||
33 | * compatible = "virtio,mmio"; | ||
34 | * reg = <0x1e000 0x100>; | ||
35 | * interrupts = <42>; | ||
36 | * } | ||
37 | * | ||
38 | * 3. Kernel module (or command line) parameter. Can be used more than once - | ||
39 | * one device will be created for each one. Syntax: | ||
40 | * | ||
41 | * [virtio_mmio.]device=<size>@<baseaddr>:<irq>[:<id>] | ||
42 | * where: | ||
43 | * <size> := size (can use standard suffixes like K, M or G) | ||
44 | * <baseaddr> := physical base address | ||
45 | * <irq> := interrupt number (as passed to request_irq()) | ||
46 | * <id> := (optional) platform device id | ||
47 | * eg.: | ||
48 | * virtio_mmio.device=0x100@0x100b0000:48 \ | ||
49 | * virtio_mmio.device=1K@0x1001e000:74 | ||
50 | * | ||
51 | * | ||
52 | * | ||
9 | * Registers layout (all 32-bit wide): | 53 | * Registers layout (all 32-bit wide): |
10 | * | 54 | * |
11 | * offset d. name description | 55 | * offset d. name description |
@@ -42,6 +86,8 @@ | |||
42 | * See the COPYING file in the top-level directory. | 86 | * See the COPYING file in the top-level directory. |
43 | */ | 87 | */ |
44 | 88 | ||
89 | #define pr_fmt(fmt) "virtio-mmio: " fmt | ||
90 | |||
45 | #include <linux/highmem.h> | 91 | #include <linux/highmem.h> |
46 | #include <linux/interrupt.h> | 92 | #include <linux/interrupt.h> |
47 | #include <linux/io.h> | 93 | #include <linux/io.h> |
@@ -449,6 +495,122 @@ static int __devexit virtio_mmio_remove(struct platform_device *pdev) | |||
449 | 495 | ||
450 | 496 | ||
451 | 497 | ||
498 | /* Devices list parameter */ | ||
499 | |||
500 | #if defined(CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES) | ||
501 | |||
502 | static struct device vm_cmdline_parent = { | ||
503 | .init_name = "virtio-mmio-cmdline", | ||
504 | }; | ||
505 | |||
506 | static int vm_cmdline_parent_registered; | ||
507 | static int vm_cmdline_id; | ||
508 | |||
509 | static int vm_cmdline_set(const char *device, | ||
510 | const struct kernel_param *kp) | ||
511 | { | ||
512 | int err; | ||
513 | struct resource resources[2] = {}; | ||
514 | char *str; | ||
515 | long long int base; | ||
516 | int processed, consumed = 0; | ||
517 | struct platform_device *pdev; | ||
518 | |||
519 | resources[0].flags = IORESOURCE_MEM; | ||
520 | resources[1].flags = IORESOURCE_IRQ; | ||
521 | |||
522 | resources[0].end = memparse(device, &str) - 1; | ||
523 | |||
524 | processed = sscanf(str, "@%lli:%u%n:%d%n", | ||
525 | &base, &resources[1].start, &consumed, | ||
526 | &vm_cmdline_id, &consumed); | ||
527 | |||
528 | if (processed < 2 || processed > 3 || str[consumed]) | ||
529 | return -EINVAL; | ||
530 | |||
531 | resources[0].start = base; | ||
532 | resources[0].end += base; | ||
533 | resources[1].end = resources[1].start; | ||
534 | |||
535 | if (!vm_cmdline_parent_registered) { | ||
536 | err = device_register(&vm_cmdline_parent); | ||
537 | if (err) { | ||
538 | pr_err("Failed to register parent device!\n"); | ||
539 | return err; | ||
540 | } | ||
541 | vm_cmdline_parent_registered = 1; | ||
542 | } | ||
543 | |||
544 | pr_info("Registering device virtio-mmio.%d at 0x%llx-0x%llx, IRQ %d.\n", | ||
545 | vm_cmdline_id, | ||
546 | (unsigned long long)resources[0].start, | ||
547 | (unsigned long long)resources[0].end, | ||
548 | (int)resources[1].start); | ||
549 | |||
550 | pdev = platform_device_register_resndata(&vm_cmdline_parent, | ||
551 | "virtio-mmio", vm_cmdline_id++, | ||
552 | resources, ARRAY_SIZE(resources), NULL, 0); | ||
553 | if (IS_ERR(pdev)) | ||
554 | return PTR_ERR(pdev); | ||
555 | |||
556 | return 0; | ||
557 | } | ||
558 | |||
559 | static int vm_cmdline_get_device(struct device *dev, void *data) | ||
560 | { | ||
561 | char *buffer = data; | ||
562 | unsigned int len = strlen(buffer); | ||
563 | struct platform_device *pdev = to_platform_device(dev); | ||
564 | |||
565 | snprintf(buffer + len, PAGE_SIZE - len, "0x%llx@0x%llx:%llu:%d\n", | ||
566 | pdev->resource[0].end - pdev->resource[0].start + 1ULL, | ||
567 | (unsigned long long)pdev->resource[0].start, | ||
568 | (unsigned long long)pdev->resource[1].start, | ||
569 | pdev->id); | ||
570 | return 0; | ||
571 | } | ||
572 | |||
573 | static int vm_cmdline_get(char *buffer, const struct kernel_param *kp) | ||
574 | { | ||
575 | buffer[0] = '\0'; | ||
576 | device_for_each_child(&vm_cmdline_parent, buffer, | ||
577 | vm_cmdline_get_device); | ||
578 | return strlen(buffer) + 1; | ||
579 | } | ||
580 | |||
581 | static struct kernel_param_ops vm_cmdline_param_ops = { | ||
582 | .set = vm_cmdline_set, | ||
583 | .get = vm_cmdline_get, | ||
584 | }; | ||
585 | |||
586 | device_param_cb(device, &vm_cmdline_param_ops, NULL, S_IRUSR); | ||
587 | |||
588 | static int vm_unregister_cmdline_device(struct device *dev, | ||
589 | void *data) | ||
590 | { | ||
591 | platform_device_unregister(to_platform_device(dev)); | ||
592 | |||
593 | return 0; | ||
594 | } | ||
595 | |||
596 | static void vm_unregister_cmdline_devices(void) | ||
597 | { | ||
598 | if (vm_cmdline_parent_registered) { | ||
599 | device_for_each_child(&vm_cmdline_parent, NULL, | ||
600 | vm_unregister_cmdline_device); | ||
601 | device_unregister(&vm_cmdline_parent); | ||
602 | vm_cmdline_parent_registered = 0; | ||
603 | } | ||
604 | } | ||
605 | |||
606 | #else | ||
607 | |||
608 | static void vm_unregister_cmdline_devices(void) | ||
609 | { | ||
610 | } | ||
611 | |||
612 | #endif | ||
613 | |||
452 | /* Platform driver */ | 614 | /* Platform driver */ |
453 | 615 | ||
454 | static struct of_device_id virtio_mmio_match[] = { | 616 | static struct of_device_id virtio_mmio_match[] = { |
@@ -475,6 +637,7 @@ static int __init virtio_mmio_init(void) | |||
475 | static void __exit virtio_mmio_exit(void) | 637 | static void __exit virtio_mmio_exit(void) |
476 | { | 638 | { |
477 | platform_driver_unregister(&virtio_mmio_driver); | 639 | platform_driver_unregister(&virtio_mmio_driver); |
640 | vm_unregister_cmdline_devices(); | ||
478 | } | 641 | } |
479 | 642 | ||
480 | module_init(virtio_mmio_init); | 643 | module_init(virtio_mmio_init); |
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 7323a3390206..fc457f452f64 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h | |||
@@ -74,15 +74,6 @@ | |||
74 | * @set_status: write the status byte | 74 | * @set_status: write the status byte |
75 | * vdev: the virtio_device | 75 | * vdev: the virtio_device |
76 | * status: the new status byte | 76 | * status: the new status byte |
77 | * @request_vqs: request the specified number of virtqueues | ||
78 | * vdev: the virtio_device | ||
79 | * max_vqs: the max number of virtqueues we want | ||
80 | * If supplied, must call before any virtqueues are instantiated. | ||
81 | * To modify the max number of virtqueues after request_vqs has been | ||
82 | * called, call free_vqs and then request_vqs with a new value. | ||
83 | * @free_vqs: cleanup resources allocated by request_vqs | ||
84 | * vdev: the virtio_device | ||
85 | * If supplied, must call after all virtqueues have been deleted. | ||
86 | * @reset: reset the device | 77 | * @reset: reset the device |
87 | * vdev: the virtio device | 78 | * vdev: the virtio device |
88 | * After this, status and feature negotiation must be done again | 79 | * After this, status and feature negotiation must be done again |
@@ -156,7 +147,7 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev, | |||
156 | * @vdev: the virtio device | 147 | * @vdev: the virtio device |
157 | * @fbit: the feature bit | 148 | * @fbit: the feature bit |
158 | * @offset: the type to search for. | 149 | * @offset: the type to search for. |
159 | * @val: a pointer to the value to fill in. | 150 | * @v: a pointer to the value to fill in. |
160 | * | 151 | * |
161 | * The return value is -ENOENT if the feature doesn't exist. Otherwise | 152 | * The return value is -ENOENT if the feature doesn't exist. Otherwise |
162 | * the config value is copied into whatever is pointed to by v. */ | 153 | * the config value is copied into whatever is pointed to by v. */ |
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 3d432068f627..5af18d11b518 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c | |||
@@ -615,7 +615,8 @@ static void p9_virtio_remove(struct virtio_device *vdev) | |||
615 | { | 615 | { |
616 | struct virtio_chan *chan = vdev->priv; | 616 | struct virtio_chan *chan = vdev->priv; |
617 | 617 | ||
618 | BUG_ON(chan->inuse); | 618 | if (chan->inuse) |
619 | p9_virtio_close(chan->client); | ||
619 | vdev->config->del_vqs(vdev); | 620 | vdev->config->del_vqs(vdev); |
620 | 621 | ||
621 | mutex_lock(&virtio_9p_lock); | 622 | mutex_lock(&virtio_9p_lock); |