1 files changed, 210 insertions, 35 deletions
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index c7a2c208f6ea..79e1b292c030 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -22,23 +22,27 @@
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/hrtimer.h>
 /* virtio guest is communicating with a virtual "device" that actually runs on
 * a host processor.  Memory barriers are used to control SMP effects. */
 #ifdef CONFIG_SMP
 /* Where possible, use SMP barriers which are more lightweight than mandatory
 * barriers, because mandatory barriers control MMIO effects on accesses
- * through relaxed memory I/O windows (which virtio does not use). */
+ * through relaxed memory I/O windows (which virtio-pci does not use). */
-#define virtio_mb() smp_mb()
+#define virtio_mb(vq) \
-#define virtio_rmb() smp_rmb()
+        do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0)
-#define virtio_wmb() smp_wmb()
+#define virtio_rmb(vq) \
+        do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
+#define virtio_wmb(vq) \
+        do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
 #else
 /* We must force memory ordering even if guest is UP since host could be
 * running on another CPU, but SMP barriers are defined to barrier() in that
 * configuration. So fall back to mandatory barriers instead. */
-#define virtio_mb() mb()
+#define virtio_mb(vq) mb()
-#define virtio_rmb() rmb()
+#define virtio_rmb(vq) rmb()
-#define virtio_wmb() wmb()
+#define virtio_wmb(vq) wmb()
 #endif
 #ifdef DEBUG
@@ -77,6 +81,9 @@ struct vring_virtqueue
        /* Actual memory layout for this queue */
        struct vring vring;
+        /* Can we use weak barriers? */
+        bool weak_barriers;
        /* Other side has made a mess, don't try any more. */
        bool broken;
@@ -102,6 +109,10 @@ struct vring_virtqueue
 #ifdef DEBUG
        /* They're supposed to lock for us. */
        unsigned int in_use;
+        /* Figure out if their kicks are too delayed. */
+        bool last_add_time_valid;
+        ktime_t last_add_time;
 #endif
        /* Tokens for callbacks. */
@@ -160,12 +171,29 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
        return head;
 }
-int virtqueue_add_buf_gfp(struct virtqueue *_vq,
+/**
-                          struct scatterlist sg[],
+ * virtqueue_add_buf - expose buffer to other end
-                          unsigned int out,
+ * @vq: the struct virtqueue we're talking about.
-                          unsigned int in,
+ * @sg: the description of the buffer(s).
-                          void *data,
+ * @out_num: the number of sg readable by other side
-                          gfp_t gfp)
+ * @in_num: the number of sg which are writable (after readable ones)
+ * @data: the token identifying the buffer.
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns remaining capacity of queue or a negative error
+ * (ie. ENOSPC).  Note that it only really makes sense to treat all
+ * positive return values as "available": indirect buffers mean that
+ * we can put an entire sg[] array inside a single queue entry.
+ */
+int virtqueue_add_buf(struct virtqueue *_vq,
+                      struct scatterlist sg[],
+                      unsigned int out,
+                      unsigned int in,
+                      void *data,
+                      gfp_t gfp)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
        unsigned int i, avail, uninitialized_var(prev);
@@ -175,6 +203,19 @@ int virtqueue_add_buf_gfp(struct virtqueue *_vq,
        BUG_ON(data == NULL);
+#ifdef DEBUG
+        {
+                ktime_t now = ktime_get();
+                /* No kick or get, with .1 second between?  Warn. */
+                if (vq->last_add_time_valid)
+                        WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
+                                            > 100);
+                vq->last_add_time = now;
+                vq->last_add_time_valid = true;
+        }
+#endif
        /* If the host supports indirect descriptor tables, and we have multiple
         * buffers, then go indirect. FIXME: tune this threshold */
        if (vq->indirect && (out + in) > 1 && vq->num_free) {
@@ -227,40 +268,102 @@ add_head:
        vq->data[head] = data;
        /* Put entry in available array (but don't update avail->idx until they
-         * do sync).  FIXME: avoid modulus here? */
+         * do sync). */
-        avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num;
+        avail = (vq->vring.avail->idx & (vq->vring.num-1));
        vq->vring.avail->ring[avail] = head;
+        /* Descriptors and available array need to be set before we expose the
+         * new available array entries. */
+        virtio_wmb(vq);
+        vq->vring.avail->idx++;
+        vq->num_added++;
+        /* This is very unlikely, but theoretically possible.  Kick
+         * just in case. */
+        if (unlikely(vq->num_added == (1 << 16) - 1))
+                virtqueue_kick(_vq);
        pr_debug("Added buffer head %i to %p\n", head, vq);
        END_USE(vq);
        return vq->num_free;
 }
-EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp);
+EXPORT_SYMBOL_GPL(virtqueue_add_buf);
-void virtqueue_kick(struct virtqueue *_vq)
+/**
+ * virtqueue_kick_prepare - first half of split virtqueue_kick call.
+ * @vq: the struct virtqueue
+ *
+ * Instead of virtqueue_kick(), you can do:
+ *      if (virtqueue_kick_prepare(vq))
+ *              virtqueue_notify(vq);
+ *
+ * This is sometimes useful because the virtqueue_kick_prepare() needs
+ * to be serialized, but the actual virtqueue_notify() call does not.
+ */
+bool virtqueue_kick_prepare(struct virtqueue *_vq)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
        u16 new, old;
+        bool needs_kick;
        START_USE(vq);
        /* Descriptors and available array need to be set before we expose the
         * new available array entries. */
-        virtio_wmb();
+        virtio_wmb(vq);
-        old = vq->vring.avail->idx;
+        old = vq->vring.avail->idx - vq->num_added;
-        new = vq->vring.avail->idx = old + vq->num_added;
+        new = vq->vring.avail->idx;
        vq->num_added = 0;
-        /* Need to update avail index before checking if we should notify */
+#ifdef DEBUG
-        virtio_mb();
+        if (vq->last_add_time_valid) {
+                WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
-        if (vq->event ?
+                                              vq->last_add_time)) > 100);
-            vring_need_event(vring_avail_event(&vq->vring), new, old) :
+        }
-            !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
+        vq->last_add_time_valid = false;
-                /* Prod other side to tell it about changes. */
+#endif
-                vq->notify(&vq->vq);
+        if (vq->event) {
+                needs_kick = vring_need_event(vring_avail_event(&vq->vring),
+                                              new, old);
+        } else {
+                needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
+        }
        END_USE(vq);
+        return needs_kick;
+}
+EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
+/**
+ * virtqueue_notify - second half of split virtqueue_kick call.
+ * @vq: the struct virtqueue
+ *
+ * This does not need to be serialized.
+ */
+void virtqueue_notify(struct virtqueue *_vq)
+{
+        struct vring_virtqueue *vq = to_vvq(_vq);
+        /* Prod other side to tell it about changes. */
+        vq->notify(_vq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_notify);
+/**
+ * virtqueue_kick - update after add_buf
+ * @vq: the struct virtqueue
+ *
+ * After one or more virtqueue_add_buf calls, invoke this to kick
+ * the other side.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
+void virtqueue_kick(struct virtqueue *vq)
+{
+        if (virtqueue_kick_prepare(vq))
+                virtqueue_notify(vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick);
@@ -294,11 +397,28 @@ static inline bool more_used(const struct vring_virtqueue *vq)
        return vq->last_used_idx != vq->vring.used->idx;
 }
+/**
+ * virtqueue_get_buf - get the next used buffer
+ * @vq: the struct virtqueue we're talking about.
+ * @len: the length written into the buffer
+ *
+ * If the driver wrote data into the buffer, @len will be set to the
+ * amount written.  This means you don't need to clear the buffer
+ * beforehand to ensure there's no data leakage in the case of short
+ * writes.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ *
+ * Returns NULL if there are no used buffers, or the "data" token
+ * handed to virtqueue_add_buf().
+ */
 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
        void *ret;
        unsigned int i;
+        u16 last_used;
        START_USE(vq);
@@ -314,10 +434,11 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
        }
        /* Only get used array entries after they have been exposed by host. */
-        virtio_rmb();
+        virtio_rmb(vq);
-        i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;
+        last_used = (vq->last_used_idx & (vq->vring.num - 1));
-        *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len;
+        i = vq->vring.used->ring[last_used].id;
+        *len = vq->vring.used->ring[last_used].len;
        if (unlikely(i >= vq->vring.num)) {
                BAD_RING(vq, "id %u out of range\n", i);
@@ -337,14 +458,27 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
         * the read in the next get_buf call. */
        if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
                vring_used_event(&vq->vring) = vq->last_used_idx;
-                virtio_mb();
+                virtio_mb(vq);
        }
+#ifdef DEBUG
+        vq->last_add_time_valid = false;
+#endif
        END_USE(vq);
        return ret;
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
+/**
+ * virtqueue_disable_cb - disable callbacks
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * Note that this is not necessarily synchronous, hence unreliable and only
+ * useful as an optimization.
+ *
+ * Unlike other operations, this need not be serialized.
+ */
 void virtqueue_disable_cb(struct virtqueue *_vq)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
@@ -353,6 +487,17 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
+/**
+ * virtqueue_enable_cb - restart callbacks after disable_cb.
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * This re-enables callbacks; it returns "false" if there are pending
+ * buffers in the queue, to detect a possible race between the driver
+ * checking for more work, and enabling callbacks.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
 bool virtqueue_enable_cb(struct virtqueue *_vq)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
@@ -366,7 +511,7 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
         * entry. Always do both to keep code simple. */
        vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
        vring_used_event(&vq->vring) = vq->last_used_idx;
-        virtio_mb();
+        virtio_mb(vq);
        if (unlikely(more_used(vq))) {
                END_USE(vq);
                return false;
@@ -377,6 +522,19 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
+/**
+ * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * This re-enables callbacks but hints to the other side to delay
+ * interrupts until most of the available buffers have been processed;
+ * it returns "false" if there are many pending buffers in the queue,
+ * to detect a possible race between the driver checking for more work,
+ * and enabling callbacks.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
@@ -393,7 +551,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
        /* TODO: tune this threshold */
        bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
        vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
-        virtio_mb();
+        virtio_mb(vq);
        if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
                END_USE(vq);
                return false;
@@ -404,6 +562,14 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
+/**
+ * virtqueue_detach_unused_buf - detach first unused buffer
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * Returns NULL or the "data" token handed to virtqueue_add_buf().
+ * This is not valid on an active queue; it is useful only for device
+ * shutdown.
+ */
 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
@@ -453,6 +619,7 @@ EXPORT_SYMBOL_GPL(vring_interrupt);
 struct virtqueue *vring_new_virtqueue(unsigned int num,
                                      unsigned int vring_align,
                                      struct virtio_device *vdev,
+                                      bool weak_barriers,
                                      void *pages,
                                      void (*notify)(struct virtqueue *),
                                      void (*callback)(struct virtqueue *),
@@ -476,12 +643,14 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
        vq->vq.vdev = vdev;
        vq->vq.name = name;
        vq->notify = notify;
+        vq->weak_barriers = weak_barriers;
        vq->broken = false;
        vq->last_used_idx = 0;
        vq->num_added = 0;
        list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
        vq->in_use = false;
+        vq->last_add_time_valid = false;
 #endif
        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
@@ -530,7 +699,13 @@ void vring_transport_features(struct virtio_device *vdev)
 }
 EXPORT_SYMBOL_GPL(vring_transport_features);
-/* return the size of the vring within the virtqueue */
+/**
+ * virtqueue_get_vring_size - return the size of the virtqueue's vring
+ * @vq: the struct virtqueue containing the vring of interest.
+ *
+ * Returns the size of the vring.  This is mainly used for boasting to
+ * userspace.  Unlike other operations, this need not be serialized.
+ */
 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
 {

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index c7a2c208f6ea..79e1b292c030 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c
@@ -22,23 +22,27 @@
22	#include <linux/device.h>	22	#include <linux/device.h>
23	#include <linux/slab.h>	23	#include <linux/slab.h>
24	#include <linux/module.h>	24	#include <linux/module.h>
		25	#include <linux/hrtimer.h>
25		26
26	/* virtio guest is communicating with a virtual "device" that actually runs on	27	/* virtio guest is communicating with a virtual "device" that actually runs on
27	* a host processor. Memory barriers are used to control SMP effects. */	28	* a host processor. Memory barriers are used to control SMP effects. */
28	#ifdef CONFIG_SMP	29	#ifdef CONFIG_SMP
29	/* Where possible, use SMP barriers which are more lightweight than mandatory	30	/* Where possible, use SMP barriers which are more lightweight than mandatory
30	* barriers, because mandatory barriers control MMIO effects on accesses	31	* barriers, because mandatory barriers control MMIO effects on accesses
31	* through relaxed memory I/O windows (which virtio does not use). */	32	* through relaxed memory I/O windows (which virtio-pci does not use). */
32	#define virtio_mb() smp_mb()	33	#define virtio_mb(vq) \
33	#define virtio_rmb() smp_rmb()	34	do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0)
34	#define virtio_wmb() smp_wmb()	35	#define virtio_rmb(vq) \
		36	do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
		37	#define virtio_wmb(vq) \
		38	do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
35	#else	39	#else
36	/* We must force memory ordering even if guest is UP since host could be	40	/* We must force memory ordering even if guest is UP since host could be
37	* running on another CPU, but SMP barriers are defined to barrier() in that	41	* running on another CPU, but SMP barriers are defined to barrier() in that
38	* configuration. So fall back to mandatory barriers instead. */	42	* configuration. So fall back to mandatory barriers instead. */
39	#define virtio_mb() mb()	43	#define virtio_mb(vq) mb()
40	#define virtio_rmb() rmb()	44	#define virtio_rmb(vq) rmb()
41	#define virtio_wmb() wmb()	45	#define virtio_wmb(vq) wmb()
42	#endif	46	#endif
43		47
44	#ifdef DEBUG	48	#ifdef DEBUG
@@ -77,6 +81,9 @@ struct vring_virtqueue
77	/* Actual memory layout for this queue */	81	/* Actual memory layout for this queue */
78	struct vring vring;	82	struct vring vring;
79		83
		84	/* Can we use weak barriers? */
		85	bool weak_barriers;
		86
80	/* Other side has made a mess, don't try any more. */	87	/* Other side has made a mess, don't try any more. */
81	bool broken;	88	bool broken;
82		89
@@ -102,6 +109,10 @@ struct vring_virtqueue
102	#ifdef DEBUG	109	#ifdef DEBUG
103	/* They're supposed to lock for us. */	110	/* They're supposed to lock for us. */
104	unsigned int in_use;	111	unsigned int in_use;
		112
		113	/* Figure out if their kicks are too delayed. */
		114	bool last_add_time_valid;
		115	ktime_t last_add_time;
105	#endif	116	#endif
106		117
107	/* Tokens for callbacks. */	118	/* Tokens for callbacks. */
@@ -160,12 +171,29 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
160	return head;	171	return head;
161	}	172	}
162		173
163	int virtqueue_add_buf_gfp(struct virtqueue *_vq,	174	/**
164	struct scatterlist sg[],	175	* virtqueue_add_buf - expose buffer to other end
165	unsigned int out,	176	* @vq: the struct virtqueue we're talking about.
166	unsigned int in,	177	* @sg: the description of the buffer(s).
167	void *data,	178	* @out_num: the number of sg readable by other side
168	gfp_t gfp)	179	* @in_num: the number of sg which are writable (after readable ones)
		180	* @data: the token identifying the buffer.
		181	* @gfp: how to do memory allocations (if necessary).
		182	*
		183	* Caller must ensure we don't call this with other virtqueue operations
		184	* at the same time (except where noted).
		185	*
		186	* Returns remaining capacity of queue or a negative error
		187	* (ie. ENOSPC). Note that it only really makes sense to treat all
		188	* positive return values as "available": indirect buffers mean that
		189	* we can put an entire sg[] array inside a single queue entry.
		190	*/
		191	int virtqueue_add_buf(struct virtqueue *_vq,
		192	struct scatterlist sg[],
		193	unsigned int out,
		194	unsigned int in,
		195	void *data,
		196	gfp_t gfp)
169	{	197	{
170	struct vring_virtqueue *vq = to_vvq(_vq);	198	struct vring_virtqueue *vq = to_vvq(_vq);
171	unsigned int i, avail, uninitialized_var(prev);	199	unsigned int i, avail, uninitialized_var(prev);
@@ -175,6 +203,19 @@ int virtqueue_add_buf_gfp(struct virtqueue *_vq,
175		203
176	BUG_ON(data == NULL);	204	BUG_ON(data == NULL);
177		205
		206	#ifdef DEBUG
		207	{
		208	ktime_t now = ktime_get();
		209
		210	/* No kick or get, with .1 second between? Warn. */
		211	if (vq->last_add_time_valid)
		212	WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
		213	> 100);
		214	vq->last_add_time = now;
		215	vq->last_add_time_valid = true;
		216	}
		217	#endif
		218
178	/* If the host supports indirect descriptor tables, and we have multiple	219	/* If the host supports indirect descriptor tables, and we have multiple
179	* buffers, then go indirect. FIXME: tune this threshold */	220	* buffers, then go indirect. FIXME: tune this threshold */
180	if (vq->indirect && (out + in) > 1 && vq->num_free) {	221	if (vq->indirect && (out + in) > 1 && vq->num_free) {
@@ -227,40 +268,102 @@ add_head:
227	vq->data[head] = data;	268	vq->data[head] = data;
228		269
229	/* Put entry in available array (but don't update avail->idx until they	270	/* Put entry in available array (but don't update avail->idx until they
230	* do sync). FIXME: avoid modulus here? */	271	* do sync). */
231	avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num;	272	avail = (vq->vring.avail->idx & (vq->vring.num-1));
232	vq->vring.avail->ring[avail] = head;	273	vq->vring.avail->ring[avail] = head;
233		274
		275	/* Descriptors and available array need to be set before we expose the
		276	* new available array entries. */
		277	virtio_wmb(vq);
		278	vq->vring.avail->idx++;
		279	vq->num_added++;
		280
		281	/* This is very unlikely, but theoretically possible. Kick
		282	* just in case. */
		283	if (unlikely(vq->num_added == (1 << 16) - 1))
		284	virtqueue_kick(_vq);
		285
234	pr_debug("Added buffer head %i to %p\n", head, vq);	286	pr_debug("Added buffer head %i to %p\n", head, vq);
235	END_USE(vq);	287	END_USE(vq);
236		288
237	return vq->num_free;	289	return vq->num_free;
238	}	290	}
239	EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp);	291	EXPORT_SYMBOL_GPL(virtqueue_add_buf);
240		292
241	void virtqueue_kick(struct virtqueue *_vq)	293	/**
		294	* virtqueue_kick_prepare - first half of split virtqueue_kick call.
		295	* @vq: the struct virtqueue
		296	*
		297	* Instead of virtqueue_kick(), you can do:
		298	* if (virtqueue_kick_prepare(vq))
		299	* virtqueue_notify(vq);
		300	*
		301	* This is sometimes useful because the virtqueue_kick_prepare() needs
		302	* to be serialized, but the actual virtqueue_notify() call does not.
		303	*/
		304	bool virtqueue_kick_prepare(struct virtqueue *_vq)
242	{	305	{
243	struct vring_virtqueue *vq = to_vvq(_vq);	306	struct vring_virtqueue *vq = to_vvq(_vq);
244	u16 new, old;	307	u16 new, old;
		308	bool needs_kick;
		309
245	START_USE(vq);	310	START_USE(vq);
246	/* Descriptors and available array need to be set before we expose the	311	/* Descriptors and available array need to be set before we expose the
247	* new available array entries. */	312	* new available array entries. */
248	virtio_wmb();	313	virtio_wmb(vq);
249		314
250	old = vq->vring.avail->idx;	315	old = vq->vring.avail->idx - vq->num_added;
251	new = vq->vring.avail->idx = old + vq->num_added;	316	new = vq->vring.avail->idx;
252	vq->num_added = 0;	317	vq->num_added = 0;
253		318
254	/* Need to update avail index before checking if we should notify */	319	#ifdef DEBUG
255	virtio_mb();	320	if (vq->last_add_time_valid) {
256		321	WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
257	if (vq->event ?	322	vq->last_add_time)) > 100);
258	vring_need_event(vring_avail_event(&vq->vring), new, old) :	323	}
259	!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))	324	vq->last_add_time_valid = false;
260	/* Prod other side to tell it about changes. */	325	#endif
261	vq->notify(&vq->vq);
262		326
		327	if (vq->event) {
		328	needs_kick = vring_need_event(vring_avail_event(&vq->vring),
		329	new, old);
		330	} else {
		331	needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
		332	}
263	END_USE(vq);	333	END_USE(vq);
		334	return needs_kick;
		335	}
		336	EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
		337
		338	/**
		339	* virtqueue_notify - second half of split virtqueue_kick call.
		340	* @vq: the struct virtqueue
		341	*
		342	* This does not need to be serialized.
		343	*/
		344	void virtqueue_notify(struct virtqueue *_vq)
		345	{
		346	struct vring_virtqueue *vq = to_vvq(_vq);
		347
		348	/* Prod other side to tell it about changes. */
		349	vq->notify(_vq);
		350	}
		351	EXPORT_SYMBOL_GPL(virtqueue_notify);
		352
		353	/**
		354	* virtqueue_kick - update after add_buf
		355	* @vq: the struct virtqueue
		356	*
		357	* After one or more virtqueue_add_buf calls, invoke this to kick
		358	* the other side.
		359	*
		360	* Caller must ensure we don't call this with other virtqueue
		361	* operations at the same time (except where noted).
		362	*/
		363	void virtqueue_kick(struct virtqueue *vq)
		364	{
		365	if (virtqueue_kick_prepare(vq))
		366	virtqueue_notify(vq);
264	}	367	}
265	EXPORT_SYMBOL_GPL(virtqueue_kick);	368	EXPORT_SYMBOL_GPL(virtqueue_kick);
266		369
@@ -294,11 +397,28 @@ static inline bool more_used(const struct vring_virtqueue *vq)
294	return vq->last_used_idx != vq->vring.used->idx;	397	return vq->last_used_idx != vq->vring.used->idx;
295	}	398	}
296		399
		400	/**
		401	* virtqueue_get_buf - get the next used buffer
		402	* @vq: the struct virtqueue we're talking about.
		403	* @len: the length written into the buffer
		404	*
		405	* If the driver wrote data into the buffer, @len will be set to the
		406	* amount written. This means you don't need to clear the buffer
		407	* beforehand to ensure there's no data leakage in the case of short
		408	* writes.
		409	*
		410	* Caller must ensure we don't call this with other virtqueue
		411	* operations at the same time (except where noted).
		412	*
		413	* Returns NULL if there are no used buffers, or the "data" token
		414	* handed to virtqueue_add_buf().
		415	*/
297	void virtqueue_get_buf(struct virtqueue _vq, unsigned int *len)	416	void virtqueue_get_buf(struct virtqueue _vq, unsigned int *len)
298	{	417	{
299	struct vring_virtqueue *vq = to_vvq(_vq);	418	struct vring_virtqueue *vq = to_vvq(_vq);
300	void *ret;	419	void *ret;
301	unsigned int i;	420	unsigned int i;
		421	u16 last_used;
302		422
303	START_USE(vq);	423	START_USE(vq);
304		424
@@ -314,10 +434,11 @@ void virtqueue_get_buf(struct virtqueue _vq, unsigned int *len)
314	}	434	}
315		435
316	/* Only get used array entries after they have been exposed by host. */	436	/* Only get used array entries after they have been exposed by host. */
317	virtio_rmb();	437	virtio_rmb(vq);
318		438
319	i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;	439	last_used = (vq->last_used_idx & (vq->vring.num - 1));
320	*len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len;	440	i = vq->vring.used->ring[last_used].id;
		441	*len = vq->vring.used->ring[last_used].len;
321		442
322	if (unlikely(i >= vq->vring.num)) {	443	if (unlikely(i >= vq->vring.num)) {
323	BAD_RING(vq, "id %u out of range\n", i);	444	BAD_RING(vq, "id %u out of range\n", i);
@@ -337,14 +458,27 @@ void virtqueue_get_buf(struct virtqueue _vq, unsigned int *len)
337	* the read in the next get_buf call. */	458	* the read in the next get_buf call. */
338	if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {	459	if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
339	vring_used_event(&vq->vring) = vq->last_used_idx;	460	vring_used_event(&vq->vring) = vq->last_used_idx;
340	virtio_mb();	461	virtio_mb(vq);
341	}	462	}
342		463
		464	#ifdef DEBUG
		465	vq->last_add_time_valid = false;
		466	#endif
		467
343	END_USE(vq);	468	END_USE(vq);
344	return ret;	469	return ret;
345	}	470	}
346	EXPORT_SYMBOL_GPL(virtqueue_get_buf);	471	EXPORT_SYMBOL_GPL(virtqueue_get_buf);
347		472
		473	/**
		474	* virtqueue_disable_cb - disable callbacks
		475	* @vq: the struct virtqueue we're talking about.
		476	*
		477	* Note that this is not necessarily synchronous, hence unreliable and only
		478	* useful as an optimization.
		479	*
		480	* Unlike other operations, this need not be serialized.
		481	*/
348	void virtqueue_disable_cb(struct virtqueue *_vq)	482	void virtqueue_disable_cb(struct virtqueue *_vq)
349	{	483	{
350	struct vring_virtqueue *vq = to_vvq(_vq);	484	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -353,6 +487,17 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
353	}	487	}
354	EXPORT_SYMBOL_GPL(virtqueue_disable_cb);	488	EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
355		489
		490	/**
		491	* virtqueue_enable_cb - restart callbacks after disable_cb.
		492	* @vq: the struct virtqueue we're talking about.
		493	*
		494	* This re-enables callbacks; it returns "false" if there are pending
		495	* buffers in the queue, to detect a possible race between the driver
		496	* checking for more work, and enabling callbacks.
		497	*
		498	* Caller must ensure we don't call this with other virtqueue
		499	* operations at the same time (except where noted).
		500	*/
356	bool virtqueue_enable_cb(struct virtqueue *_vq)	501	bool virtqueue_enable_cb(struct virtqueue *_vq)
357	{	502	{
358	struct vring_virtqueue *vq = to_vvq(_vq);	503	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -366,7 +511,7 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
366	* entry. Always do both to keep code simple. */	511	* entry. Always do both to keep code simple. */
367	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;	512	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
368	vring_used_event(&vq->vring) = vq->last_used_idx;	513	vring_used_event(&vq->vring) = vq->last_used_idx;
369	virtio_mb();	514	virtio_mb(vq);
370	if (unlikely(more_used(vq))) {	515	if (unlikely(more_used(vq))) {
371	END_USE(vq);	516	END_USE(vq);
372	return false;	517	return false;
@@ -377,6 +522,19 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
377	}	522	}
378	EXPORT_SYMBOL_GPL(virtqueue_enable_cb);	523	EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
379		524
		525	/**
		526	* virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
		527	* @vq: the struct virtqueue we're talking about.
		528	*
		529	* This re-enables callbacks but hints to the other side to delay
		530	* interrupts until most of the available buffers have been processed;
		531	* it returns "false" if there are many pending buffers in the queue,
		532	* to detect a possible race between the driver checking for more work,
		533	* and enabling callbacks.
		534	*
		535	* Caller must ensure we don't call this with other virtqueue
		536	* operations at the same time (except where noted).
		537	*/
380	bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)	538	bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
381	{	539	{
382	struct vring_virtqueue *vq = to_vvq(_vq);	540	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -393,7 +551,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
393	/* TODO: tune this threshold */	551	/* TODO: tune this threshold */
394	bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;	552	bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
395	vring_used_event(&vq->vring) = vq->last_used_idx + bufs;	553	vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
396	virtio_mb();	554	virtio_mb(vq);
397	if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {	555	if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
398	END_USE(vq);	556	END_USE(vq);
399	return false;	557	return false;
@@ -404,6 +562,14 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
404	}	562	}
405	EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);	563	EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
406		564
		565	/**
		566	* virtqueue_detach_unused_buf - detach first unused buffer
		567	* @vq: the struct virtqueue we're talking about.
		568	*
		569	* Returns NULL or the "data" token handed to virtqueue_add_buf().
		570	* This is not valid on an active queue; it is useful only for device
		571	* shutdown.
		572	*/
407	void virtqueue_detach_unused_buf(struct virtqueue _vq)	573	void virtqueue_detach_unused_buf(struct virtqueue _vq)
408	{	574	{
409	struct vring_virtqueue *vq = to_vvq(_vq);	575	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -453,6 +619,7 @@ EXPORT_SYMBOL_GPL(vring_interrupt);
453	struct virtqueue *vring_new_virtqueue(unsigned int num,	619	struct virtqueue *vring_new_virtqueue(unsigned int num,
454	unsigned int vring_align,	620	unsigned int vring_align,
455	struct virtio_device *vdev,	621	struct virtio_device *vdev,
		622	bool weak_barriers,
456	void *pages,	623	void *pages,
457	void (notify)(struct virtqueue ),	624	void (notify)(struct virtqueue ),
458	void (callback)(struct virtqueue ),	625	void (callback)(struct virtqueue ),
@@ -476,12 +643,14 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
476	vq->vq.vdev = vdev;	643	vq->vq.vdev = vdev;
477	vq->vq.name = name;	644	vq->vq.name = name;
478	vq->notify = notify;	645	vq->notify = notify;
		646	vq->weak_barriers = weak_barriers;
479	vq->broken = false;	647	vq->broken = false;
480	vq->last_used_idx = 0;	648	vq->last_used_idx = 0;
481	vq->num_added = 0;	649	vq->num_added = 0;
482	list_add_tail(&vq->vq.list, &vdev->vqs);	650	list_add_tail(&vq->vq.list, &vdev->vqs);
483	#ifdef DEBUG	651	#ifdef DEBUG
484	vq->in_use = false;	652	vq->in_use = false;
		653	vq->last_add_time_valid = false;
485	#endif	654	#endif
486		655
487	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);	656	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
@@ -530,7 +699,13 @@ void vring_transport_features(struct virtio_device *vdev)
530	}	699	}
531	EXPORT_SYMBOL_GPL(vring_transport_features);	700	EXPORT_SYMBOL_GPL(vring_transport_features);
532		701
533	/* return the size of the vring within the virtqueue */	702	/**
		703	* virtqueue_get_vring_size - return the size of the virtqueue's vring
		704	* @vq: the struct virtqueue containing the vring of interest.
		705	*
		706	* Returns the size of the vring. This is mainly used for boasting to
		707	* userspace. Unlike other operations, this need not be serialized.
		708	*/
534	unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)	709	unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
535	{	710	{
536		711