aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/sh/include/asm/barrier.h1
-rw-r--r--drivers/virtio/virtio_pci_common.c2
-rw-r--r--tools/virtio/asm/barrier.h22
-rw-r--r--tools/virtio/linux/compiler.h9
-rw-r--r--tools/virtio/linux/kernel.h1
-rw-r--r--tools/virtio/ringtest/Makefile22
-rw-r--r--tools/virtio/ringtest/README2
-rw-r--r--tools/virtio/ringtest/main.c366
-rw-r--r--tools/virtio/ringtest/main.h119
-rw-r--r--tools/virtio/ringtest/ring.c272
-rwxr-xr-xtools/virtio/ringtest/run-on-all.sh24
-rw-r--r--tools/virtio/ringtest/virtio_ring_0_9.c316
-rw-r--r--tools/virtio/ringtest/virtio_ring_poll.c2
13 files changed, 1148 insertions, 10 deletions
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index f887c6465a82..8a84e05adb2e 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -33,7 +33,6 @@
33#endif 33#endif
34 34
35#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) 35#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
36#define smp_store_mb(var, value) __smp_store_mb(var, value)
37 36
38#include <asm-generic/barrier.h> 37#include <asm-generic/barrier.h>
39 38
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 36205c27c4d0..f6bed86c17f9 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -545,6 +545,7 @@ err_enable_device:
545static void virtio_pci_remove(struct pci_dev *pci_dev) 545static void virtio_pci_remove(struct pci_dev *pci_dev)
546{ 546{
547 struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); 547 struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
548 struct device *dev = get_device(&vp_dev->vdev.dev);
548 549
549 unregister_virtio_device(&vp_dev->vdev); 550 unregister_virtio_device(&vp_dev->vdev);
550 551
@@ -554,6 +555,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
554 virtio_pci_modern_remove(vp_dev); 555 virtio_pci_modern_remove(vp_dev);
555 556
556 pci_disable_device(pci_dev); 557 pci_disable_device(pci_dev);
558 put_device(dev);
557} 559}
558 560
559static struct pci_driver virtio_pci_driver = { 561static struct pci_driver virtio_pci_driver = {
diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
index 26b7926bda88..ba34f9e96efd 100644
--- a/tools/virtio/asm/barrier.h
+++ b/tools/virtio/asm/barrier.h
@@ -1,15 +1,19 @@
1#if defined(__i386__) || defined(__x86_64__) 1#if defined(__i386__) || defined(__x86_64__)
2#define barrier() asm volatile("" ::: "memory") 2#define barrier() asm volatile("" ::: "memory")
3#define mb() __sync_synchronize() 3#define virt_mb() __sync_synchronize()
4 4#define virt_rmb() barrier()
5#define smp_mb() mb() 5#define virt_wmb() barrier()
6# define dma_rmb() barrier() 6/* Atomic store should be enough, but gcc generates worse code in that case. */
7# define dma_wmb() barrier() 7#define virt_store_mb(var, value) do { \
8# define smp_rmb() barrier() 8 typeof(var) virt_store_mb_value = (value); \
9# define smp_wmb() barrier() 9 __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \
10 __ATOMIC_SEQ_CST); \
11 barrier(); \
12} while (0);
10/* Weak barriers should be used. If not - it's a bug */ 13/* Weak barriers should be used. If not - it's a bug */
11# define rmb() abort() 14# define mb() abort()
12# define wmb() abort() 15# define rmb() abort()
16# define wmb() abort()
13#else 17#else
14#error Please fill in barrier macros 18#error Please fill in barrier macros
15#endif 19#endif
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
new file mode 100644
index 000000000000..845960e1cbf2
--- /dev/null
+++ b/tools/virtio/linux/compiler.h
@@ -0,0 +1,9 @@
1#ifndef LINUX_COMPILER_H
2#define LINUX_COMPILER_H
3
4#define WRITE_ONCE(var, val) \
5 (*((volatile typeof(val) *)(&(var))) = (val))
6
7#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var))))
8
9#endif
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 4db7d5691ba7..033849948215 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -8,6 +8,7 @@
8#include <assert.h> 8#include <assert.h>
9#include <stdarg.h> 9#include <stdarg.h>
10 10
11#include <linux/compiler.h>
11#include <linux/types.h> 12#include <linux/types.h>
12#include <linux/printk.h> 13#include <linux/printk.h>
13#include <linux/bug.h> 14#include <linux/bug.h>
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
new file mode 100644
index 000000000000..feaa64ac4630
--- /dev/null
+++ b/tools/virtio/ringtest/Makefile
@@ -0,0 +1,22 @@
1all:
2
3all: ring virtio_ring_0_9 virtio_ring_poll
4
5CFLAGS += -Wall
6CFLAGS += -pthread -O2 -ggdb
7LDFLAGS += -pthread -O2 -ggdb
8
9main.o: main.c main.h
10ring.o: ring.c main.h
11virtio_ring_0_9.o: virtio_ring_0_9.c main.h
12virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
13ring: ring.o main.o
14virtio_ring_0_9: virtio_ring_0_9.o main.o
15virtio_ring_poll: virtio_ring_poll.o main.o
16clean:
17 -rm main.o
18 -rm ring.o ring
19 -rm virtio_ring_0_9.o virtio_ring_0_9
20 -rm virtio_ring_poll.o virtio_ring_poll
21
22.PHONY: all clean
diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README
new file mode 100644
index 000000000000..34e94c46104f
--- /dev/null
+++ b/tools/virtio/ringtest/README
@@ -0,0 +1,2 @@
1Partial implementation of various ring layouts, useful to tune virtio design.
2Uses shared memory heavily.
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
new file mode 100644
index 000000000000..3a5ff438bd62
--- /dev/null
+++ b/tools/virtio/ringtest/main.c
@@ -0,0 +1,366 @@
1/*
2 * Copyright (C) 2016 Red Hat, Inc.
3 * Author: Michael S. Tsirkin <mst@redhat.com>
4 * This work is licensed under the terms of the GNU GPL, version 2.
5 *
6 * Command line processing and common functions for ring benchmarking.
7 */
8#define _GNU_SOURCE
9#include <getopt.h>
10#include <pthread.h>
11#include <assert.h>
12#include <sched.h>
13#include "main.h"
14#include <sys/eventfd.h>
15#include <stdlib.h>
16#include <stdio.h>
17#include <unistd.h>
18#include <limits.h>
19
20int runcycles = 10000000;
21int max_outstanding = INT_MAX;
22int batch = 1;
23
24bool do_sleep = false;
25bool do_relax = false;
26bool do_exit = true;
27
28unsigned ring_size = 256;
29
30static int kickfd = -1;
31static int callfd = -1;
32
33void notify(int fd)
34{
35 unsigned long long v = 1;
36 int r;
37
38 vmexit();
39 r = write(fd, &v, sizeof v);
40 assert(r == sizeof v);
41 vmentry();
42}
43
44void wait_for_notify(int fd)
45{
46 unsigned long long v = 1;
47 int r;
48
49 vmexit();
50 r = read(fd, &v, sizeof v);
51 assert(r == sizeof v);
52 vmentry();
53}
54
55void kick(void)
56{
57 notify(kickfd);
58}
59
60void wait_for_kick(void)
61{
62 wait_for_notify(kickfd);
63}
64
65void call(void)
66{
67 notify(callfd);
68}
69
70void wait_for_call(void)
71{
72 wait_for_notify(callfd);
73}
74
75void set_affinity(const char *arg)
76{
77 cpu_set_t cpuset;
78 int ret;
79 pthread_t self;
80 long int cpu;
81 char *endptr;
82
83 if (!arg)
84 return;
85
86 cpu = strtol(arg, &endptr, 0);
87 assert(!*endptr);
88
89 assert(cpu >= 0 || cpu < CPU_SETSIZE);
90
91 self = pthread_self();
92 CPU_ZERO(&cpuset);
93 CPU_SET(cpu, &cpuset);
94
95 ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
96 assert(!ret);
97}
98
99static void run_guest(void)
100{
101 int completed_before;
102 int completed = 0;
103 int started = 0;
104 int bufs = runcycles;
105 int spurious = 0;
106 int r;
107 unsigned len;
108 void *buf;
109 int tokick = batch;
110
111 for (;;) {
112 if (do_sleep)
113 disable_call();
114 completed_before = completed;
115 do {
116 if (started < bufs &&
117 started - completed < max_outstanding) {
118 r = add_inbuf(0, NULL, "Hello, world!");
119 if (__builtin_expect(r == 0, true)) {
120 ++started;
121 if (!--tokick) {
122 tokick = batch;
123 if (do_sleep)
124 kick_available();
125 }
126
127 }
128 } else
129 r = -1;
130
131 /* Flush out completed bufs if any */
132 if (get_buf(&len, &buf)) {
133 ++completed;
134 if (__builtin_expect(completed == bufs, false))
135 return;
136 r = 0;
137 }
138 } while (r == 0);
139 if (completed == completed_before)
140 ++spurious;
141 assert(completed <= bufs);
142 assert(started <= bufs);
143 if (do_sleep) {
144 if (enable_call())
145 wait_for_call();
146 } else {
147 poll_used();
148 }
149 }
150}
151
152static void run_host(void)
153{
154 int completed_before;
155 int completed = 0;
156 int spurious = 0;
157 int bufs = runcycles;
158 unsigned len;
159 void *buf;
160
161 for (;;) {
162 if (do_sleep) {
163 if (enable_kick())
164 wait_for_kick();
165 } else {
166 poll_avail();
167 }
168 if (do_sleep)
169 disable_kick();
170 completed_before = completed;
171 while (__builtin_expect(use_buf(&len, &buf), true)) {
172 if (do_sleep)
173 call_used();
174 ++completed;
175 if (__builtin_expect(completed == bufs, false))
176 return;
177 }
178 if (completed == completed_before)
179 ++spurious;
180 assert(completed <= bufs);
181 if (completed == bufs)
182 break;
183 }
184}
185
186void *start_guest(void *arg)
187{
188 set_affinity(arg);
189 run_guest();
190 pthread_exit(NULL);
191}
192
193void *start_host(void *arg)
194{
195 set_affinity(arg);
196 run_host();
197 pthread_exit(NULL);
198}
199
200static const char optstring[] = "";
201static const struct option longopts[] = {
202 {
203 .name = "help",
204 .has_arg = no_argument,
205 .val = 'h',
206 },
207 {
208 .name = "host-affinity",
209 .has_arg = required_argument,
210 .val = 'H',
211 },
212 {
213 .name = "guest-affinity",
214 .has_arg = required_argument,
215 .val = 'G',
216 },
217 {
218 .name = "ring-size",
219 .has_arg = required_argument,
220 .val = 'R',
221 },
222 {
223 .name = "run-cycles",
224 .has_arg = required_argument,
225 .val = 'C',
226 },
227 {
228 .name = "outstanding",
229 .has_arg = required_argument,
230 .val = 'o',
231 },
232 {
233 .name = "batch",
234 .has_arg = required_argument,
235 .val = 'b',
236 },
237 {
238 .name = "sleep",
239 .has_arg = no_argument,
240 .val = 's',
241 },
242 {
243 .name = "relax",
244 .has_arg = no_argument,
245 .val = 'x',
246 },
247 {
248 .name = "exit",
249 .has_arg = no_argument,
250 .val = 'e',
251 },
252 {
253 }
254};
255
256static void help(void)
257{
258 fprintf(stderr, "Usage: <test> [--help]"
259 " [--host-affinity H]"
260 " [--guest-affinity G]"
261 " [--ring-size R (default: %d)]"
262 " [--run-cycles C (default: %d)]"
263 " [--batch b]"
264 " [--outstanding o]"
265 " [--sleep]"
266 " [--relax]"
267 " [--exit]"
268 "\n",
269 ring_size,
270 runcycles);
271}
272
273int main(int argc, char **argv)
274{
275 int ret;
276 pthread_t host, guest;
277 void *tret;
278 char *host_arg = NULL;
279 char *guest_arg = NULL;
280 char *endptr;
281 long int c;
282
283 kickfd = eventfd(0, 0);
284 assert(kickfd >= 0);
285 callfd = eventfd(0, 0);
286 assert(callfd >= 0);
287
288 for (;;) {
289 int o = getopt_long(argc, argv, optstring, longopts, NULL);
290 switch (o) {
291 case -1:
292 goto done;
293 case '?':
294 help();
295 exit(2);
296 case 'H':
297 host_arg = optarg;
298 break;
299 case 'G':
300 guest_arg = optarg;
301 break;
302 case 'R':
303 ring_size = strtol(optarg, &endptr, 0);
304 assert(ring_size && !(ring_size & (ring_size - 1)));
305 assert(!*endptr);
306 break;
307 case 'C':
308 c = strtol(optarg, &endptr, 0);
309 assert(!*endptr);
310 assert(c > 0 && c < INT_MAX);
311 runcycles = c;
312 break;
313 case 'o':
314 c = strtol(optarg, &endptr, 0);
315 assert(!*endptr);
316 assert(c > 0 && c < INT_MAX);
317 max_outstanding = c;
318 break;
319 case 'b':
320 c = strtol(optarg, &endptr, 0);
321 assert(!*endptr);
322 assert(c > 0 && c < INT_MAX);
323 batch = c;
324 break;
325 case 's':
326 do_sleep = true;
327 break;
328 case 'x':
329 do_relax = true;
330 break;
331 case 'e':
332 do_exit = true;
333 break;
334 default:
335 help();
336 exit(4);
337 break;
338 }
339 }
340
341 /* does nothing here, used to make sure all smp APIs compile */
342 smp_acquire();
343 smp_release();
344 smp_mb();
345done:
346
347 if (batch > max_outstanding)
348 batch = max_outstanding;
349
350 if (optind < argc) {
351 help();
352 exit(4);
353 }
354 alloc_ring();
355
356 ret = pthread_create(&host, NULL, start_host, host_arg);
357 assert(!ret);
358 ret = pthread_create(&guest, NULL, start_guest, guest_arg);
359 assert(!ret);
360
361 ret = pthread_join(guest, &tret);
362 assert(!ret);
363 ret = pthread_join(host, &tret);
364 assert(!ret);
365 return 0;
366}
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
new file mode 100644
index 000000000000..16917acb0ade
--- /dev/null
+++ b/tools/virtio/ringtest/main.h
@@ -0,0 +1,119 @@
1/*
2 * Copyright (C) 2016 Red Hat, Inc.
3 * Author: Michael S. Tsirkin <mst@redhat.com>
4 * This work is licensed under the terms of the GNU GPL, version 2.
5 *
6 * Common macros and functions for ring benchmarking.
7 */
8#ifndef MAIN_H
9#define MAIN_H
10
11#include <stdbool.h>
12
13extern bool do_exit;
14
15#if defined(__x86_64__) || defined(__i386__)
16#include "x86intrin.h"
17
18static inline void wait_cycles(unsigned long long cycles)
19{
20 unsigned long long t;
21
22 t = __rdtsc();
23 while (__rdtsc() - t < cycles) {}
24}
25
26#define VMEXIT_CYCLES 500
27#define VMENTRY_CYCLES 500
28
29#else
30static inline void wait_cycles(unsigned long long cycles)
31{
32 _Exit(5);
33}
34#define VMEXIT_CYCLES 0
35#define VMENTRY_CYCLES 0
36#endif
37
38static inline void vmexit(void)
39{
40 if (!do_exit)
41 return;
42
43 wait_cycles(VMEXIT_CYCLES);
44}
45static inline void vmentry(void)
46{
47 if (!do_exit)
48 return;
49
50 wait_cycles(VMENTRY_CYCLES);
51}
52
53/* implemented by ring */
54void alloc_ring(void);
55/* guest side */
56int add_inbuf(unsigned, void *, void *);
57void *get_buf(unsigned *, void **);
58void disable_call();
59bool enable_call();
60void kick_available();
61void poll_used();
62/* host side */
63void disable_kick();
64bool enable_kick();
65bool use_buf(unsigned *, void **);
66void call_used();
67void poll_avail();
68
69/* implemented by main */
70extern bool do_sleep;
71void kick(void);
72void wait_for_kick(void);
73void call(void);
74void wait_for_call(void);
75
76extern unsigned ring_size;
77
78/* Compiler barrier - similar to what Linux uses */
79#define barrier() asm volatile("" ::: "memory")
80
81/* Is there a portable way to do this? */
82#if defined(__x86_64__) || defined(__i386__)
83#define cpu_relax() asm ("rep; nop" ::: "memory")
84#else
85#define cpu_relax() assert(0)
86#endif
87
88extern bool do_relax;
89
90static inline void busy_wait(void)
91{
92 if (do_relax)
93 cpu_relax();
94 else
95 /* prevent compiler from removing busy loops */
96 barrier();
97}
98
99/*
100 * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
101 * with other __ATOMIC_SEQ_CST calls.
102 */
103#define smp_mb() __sync_synchronize()
104
105/*
106 * This abuses the atomic builtins for thread fences, and
107 * adds a compiler barrier.
108 */
109#define smp_release() do { \
110 barrier(); \
111 __atomic_thread_fence(__ATOMIC_RELEASE); \
112} while (0)
113
114#define smp_acquire() do { \
115 __atomic_thread_fence(__ATOMIC_ACQUIRE); \
116 barrier(); \
117} while (0)
118
119#endif
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
new file mode 100644
index 000000000000..c25c8d248b6b
--- /dev/null
+++ b/tools/virtio/ringtest/ring.c
@@ -0,0 +1,272 @@
1/*
2 * Copyright (C) 2016 Red Hat, Inc.
3 * Author: Michael S. Tsirkin <mst@redhat.com>
4 * This work is licensed under the terms of the GNU GPL, version 2.
5 *
6 * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
7 * signalling, unconditionally.
8 */
9#define _GNU_SOURCE
10#include "main.h"
11#include <stdlib.h>
12#include <stdio.h>
13#include <string.h>
14
15/* Next - Where next entry will be written.
16 * Prev - "Next" value when event triggered previously.
17 * Event - Peer requested event after writing this entry.
18 */
19static inline bool need_event(unsigned short event,
20 unsigned short next,
21 unsigned short prev)
22{
23 return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
24}
25
26/* Design:
27 * Guest adds descriptors with unique index values and DESC_HW in flags.
28 * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
29 * Flags are always set last.
30 */
31#define DESC_HW 0x1
32
33struct desc {
34 unsigned short flags;
35 unsigned short index;
36 unsigned len;
37 unsigned long long addr;
38};
39
40/* how much padding is needed to avoid false cache sharing */
41#define HOST_GUEST_PADDING 0x80
42
43/* Mostly read */
44struct event {
45 unsigned short kick_index;
46 unsigned char reserved0[HOST_GUEST_PADDING - 2];
47 unsigned short call_index;
48 unsigned char reserved1[HOST_GUEST_PADDING - 2];
49};
50
51struct data {
52 void *buf; /* descriptor is writeable, we can't get buf from there */
53 void *data;
54} *data;
55
56struct desc *ring;
57struct event *event;
58
59struct guest {
60 unsigned avail_idx;
61 unsigned last_used_idx;
62 unsigned num_free;
63 unsigned kicked_avail_idx;
64 unsigned char reserved[HOST_GUEST_PADDING - 12];
65} guest;
66
67struct host {
68 /* we do not need to track last avail index
69 * unless we have more than one in flight.
70 */
71 unsigned used_idx;
72 unsigned called_used_idx;
73 unsigned char reserved[HOST_GUEST_PADDING - 4];
74} host;
75
76/* implemented by ring */
77void alloc_ring(void)
78{
79 int ret;
80 int i;
81
82 ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
83 if (ret) {
84 perror("Unable to allocate ring buffer.\n");
85 exit(3);
86 }
87 event = malloc(sizeof *event);
88 if (!event) {
89 perror("Unable to allocate event buffer.\n");
90 exit(3);
91 }
92 memset(event, 0, sizeof *event);
93 guest.avail_idx = 0;
94 guest.kicked_avail_idx = -1;
95 guest.last_used_idx = 0;
96 host.used_idx = 0;
97 host.called_used_idx = -1;
98 for (i = 0; i < ring_size; ++i) {
99 struct desc desc = {
100 .index = i,
101 };
102 ring[i] = desc;
103 }
104 guest.num_free = ring_size;
105 data = malloc(ring_size * sizeof *data);
106 if (!data) {
107 perror("Unable to allocate data buffer.\n");
108 exit(3);
109 }
110 memset(data, 0, ring_size * sizeof *data);
111}
112
113/* guest side */
114int add_inbuf(unsigned len, void *buf, void *datap)
115{
116 unsigned head, index;
117
118 if (!guest.num_free)
119 return -1;
120
121 guest.num_free--;
122 head = (ring_size - 1) & (guest.avail_idx++);
123
124 /* Start with a write. On MESI architectures this helps
125 * avoid a shared state with consumer that is polling this descriptor.
126 */
127 ring[head].addr = (unsigned long)(void*)buf;
128 ring[head].len = len;
129 /* read below might bypass write above. That is OK because it's just an
130 * optimization. If this happens, we will get the cache line in a
131 * shared state which is unfortunate, but probably not worth it to
132 * add an explicit full barrier to avoid this.
133 */
134 barrier();
135 index = ring[head].index;
136 data[index].buf = buf;
137 data[index].data = datap;
138 /* Barrier A (for pairing) */
139 smp_release();
140 ring[head].flags = DESC_HW;
141
142 return 0;
143}
144
145void *get_buf(unsigned *lenp, void **bufp)
146{
147 unsigned head = (ring_size - 1) & guest.last_used_idx;
148 unsigned index;
149 void *datap;
150
151 if (ring[head].flags & DESC_HW)
152 return NULL;
153 /* Barrier B (for pairing) */
154 smp_acquire();
155 *lenp = ring[head].len;
156 index = ring[head].index & (ring_size - 1);
157 datap = data[index].data;
158 *bufp = data[index].buf;
159 data[index].buf = NULL;
160 data[index].data = NULL;
161 guest.num_free++;
162 guest.last_used_idx++;
163 return datap;
164}
165
166void poll_used(void)
167{
168 unsigned head = (ring_size - 1) & guest.last_used_idx;
169
170 while (ring[head].flags & DESC_HW)
171 busy_wait();
172}
173
174void disable_call()
175{
176 /* Doing nothing to disable calls might cause
177 * extra interrupts, but reduces the number of cache misses.
178 */
179}
180
181bool enable_call()
182{
183 unsigned head = (ring_size - 1) & guest.last_used_idx;
184
185 event->call_index = guest.last_used_idx;
186 /* Flush call index write */
187 /* Barrier D (for pairing) */
188 smp_mb();
189 return ring[head].flags & DESC_HW;
190}
191
192void kick_available(void)
193{
194 /* Flush in previous flags write */
195 /* Barrier C (for pairing) */
196 smp_mb();
197 if (!need_event(event->kick_index,
198 guest.avail_idx,
199 guest.kicked_avail_idx))
200 return;
201
202 guest.kicked_avail_idx = guest.avail_idx;
203 kick();
204}
205
206/* host side */
207void disable_kick()
208{
209 /* Doing nothing to disable kicks might cause
210 * extra interrupts, but reduces the number of cache misses.
211 */
212}
213
214bool enable_kick()
215{
216 unsigned head = (ring_size - 1) & host.used_idx;
217
218 event->kick_index = host.used_idx;
219 /* Barrier C (for pairing) */
220 smp_mb();
221 return !(ring[head].flags & DESC_HW);
222}
223
224void poll_avail(void)
225{
226 unsigned head = (ring_size - 1) & host.used_idx;
227
228 while (!(ring[head].flags & DESC_HW))
229 busy_wait();
230}
231
232bool use_buf(unsigned *lenp, void **bufp)
233{
234 unsigned head = (ring_size - 1) & host.used_idx;
235
236 if (!(ring[head].flags & DESC_HW))
237 return false;
238
239 /* make sure length read below is not speculated */
240 /* Barrier A (for pairing) */
241 smp_acquire();
242
243 /* simple in-order completion: we don't need
244 * to touch index at all. This also means we
245 * can just modify the descriptor in-place.
246 */
247 ring[head].len--;
248 /* Make sure len is valid before flags.
249 * Note: alternative is to write len and flags in one access -
250 * possible on 64 bit architectures but wmb is free on Intel anyway
251 * so I have no way to test whether it's a gain.
252 */
253 /* Barrier B (for pairing) */
254 smp_release();
255 ring[head].flags = 0;
256 host.used_idx++;
257 return true;
258}
259
260void call_used(void)
261{
262 /* Flush in previous flags write */
263 /* Barrier D (for pairing) */
264 smp_mb();
265 if (!need_event(event->call_index,
266 host.used_idx,
267 host.called_used_idx))
268 return;
269
270 host.called_used_idx = host.used_idx;
271 call();
272}
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
new file mode 100755
index 000000000000..52b0f71ffa8d
--- /dev/null
+++ b/tools/virtio/ringtest/run-on-all.sh
@@ -0,0 +1,24 @@
1#!/bin/sh
2
3#use last CPU for host. Why not the first?
4#many devices tend to use cpu0 by default so
5#it tends to be busier
6HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1)
7
8#run command on all cpus
9for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n);
10do
11 #Don't run guest and host on same CPU
12 #It actually works ok if using signalling
13 if
14 (echo "$@" | grep -e "--sleep" > /dev/null) || \
15 test $HOST_AFFINITY '!=' $cpu
16 then
17 echo "GUEST AFFINITY $cpu"
18 "$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu
19 fi
20done
21echo "NO GUEST AFFINITY"
22"$@" --host-affinity $HOST_AFFINITY
23echo "NO AFFINITY"
24"$@"
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
new file mode 100644
index 000000000000..47c9a1a18d36
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_0_9.c
@@ -0,0 +1,316 @@
1/*
2 * Copyright (C) 2016 Red Hat, Inc.
3 * Author: Michael S. Tsirkin <mst@redhat.com>
4 * This work is licensed under the terms of the GNU GPL, version 2.
5 *
6 * Partial implementation of virtio 0.9. event index is used for signalling,
7 * unconditionally. Design roughly follows linux kernel implementation in order
8 * to be able to judge its performance.
9 */
10#define _GNU_SOURCE
11#include "main.h"
12#include <stdlib.h>
13#include <stdio.h>
14#include <assert.h>
15#include <string.h>
16#include <linux/virtio_ring.h>
17
18struct data {
19 void *data;
20} *data;
21
22struct vring ring;
23
24/* enabling the below activates experimental ring polling code
25 * (which skips index reads on consumer in favor of looking at
26 * high bits of ring id ^ 0x8000).
27 */
28/* #ifdef RING_POLL */
29
30/* how much padding is needed to avoid false cache sharing */
31#define HOST_GUEST_PADDING 0x80
32
33struct guest {
34 unsigned short avail_idx;
35 unsigned short last_used_idx;
36 unsigned short num_free;
37 unsigned short kicked_avail_idx;
38 unsigned short free_head;
39 unsigned char reserved[HOST_GUEST_PADDING - 10];
40} guest;
41
42struct host {
43 /* we do not need to track last avail index
44 * unless we have more than one in flight.
45 */
46 unsigned short used_idx;
47 unsigned short called_used_idx;
48 unsigned char reserved[HOST_GUEST_PADDING - 4];
49} host;
50
51/* implemented by ring */
52void alloc_ring(void)
53{
54 int ret;
55 int i;
56 void *p;
57
58 ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000));
59 if (ret) {
60 perror("Unable to allocate ring buffer.\n");
61 exit(3);
62 }
63 memset(p, 0, vring_size(ring_size, 0x1000));
64 vring_init(&ring, ring_size, p, 0x1000);
65
66 guest.avail_idx = 0;
67 guest.kicked_avail_idx = -1;
68 guest.last_used_idx = 0;
69 /* Put everything in free lists. */
70 guest.free_head = 0;
71 for (i = 0; i < ring_size - 1; i++)
72 ring.desc[i].next = i + 1;
73 host.used_idx = 0;
74 host.called_used_idx = -1;
75 guest.num_free = ring_size;
76 data = malloc(ring_size * sizeof *data);
77 if (!data) {
78 perror("Unable to allocate data buffer.\n");
79 exit(3);
80 }
81 memset(data, 0, ring_size * sizeof *data);
82}
83
84/* guest side */
85int add_inbuf(unsigned len, void *buf, void *datap)
86{
87 unsigned head, avail;
88 struct vring_desc *desc;
89
90 if (!guest.num_free)
91 return -1;
92
93 head = guest.free_head;
94 guest.num_free--;
95
96 desc = ring.desc;
97 desc[head].flags = VRING_DESC_F_NEXT;
98 desc[head].addr = (unsigned long)(void *)buf;
99 desc[head].len = len;
100 /* We do it like this to simulate the way
101 * we'd have to flip it if we had multiple
102 * descriptors.
103 */
104 desc[head].flags &= ~VRING_DESC_F_NEXT;
105 guest.free_head = desc[head].next;
106
107 data[head].data = datap;
108
109#ifdef RING_POLL
110 /* Barrier A (for pairing) */
111 smp_release();
112 avail = guest.avail_idx++;
113 ring.avail->ring[avail & (ring_size - 1)] =
114 (head | (avail & ~(ring_size - 1))) ^ 0x8000;
115#else
116 avail = (ring_size - 1) & (guest.avail_idx++);
117 ring.avail->ring[avail] = head;
118 /* Barrier A (for pairing) */
119 smp_release();
120#endif
121 ring.avail->idx = guest.avail_idx;
122 return 0;
123}
124
125void *get_buf(unsigned *lenp, void **bufp)
126{
127 unsigned head;
128 unsigned index;
129 void *datap;
130
131#ifdef RING_POLL
132 head = (ring_size - 1) & guest.last_used_idx;
133 index = ring.used->ring[head].id;
134 if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
135 return NULL;
136 /* Barrier B (for pairing) */
137 smp_acquire();
138 index &= ring_size - 1;
139#else
140 if (ring.used->idx == guest.last_used_idx)
141 return NULL;
142 /* Barrier B (for pairing) */
143 smp_acquire();
144 head = (ring_size - 1) & guest.last_used_idx;
145 index = ring.used->ring[head].id;
146#endif
147 *lenp = ring.used->ring[head].len;
148 datap = data[index].data;
149 *bufp = (void*)(unsigned long)ring.desc[index].addr;
150 data[index].data = NULL;
151 ring.desc[index].next = guest.free_head;
152 guest.free_head = index;
153 guest.num_free++;
154 guest.last_used_idx++;
155 return datap;
156}
157
158void poll_used(void)
159{
160#ifdef RING_POLL
161 unsigned head = (ring_size - 1) & guest.last_used_idx;
162
163 for (;;) {
164 unsigned index = ring.used->ring[head].id;
165
166 if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
167 busy_wait();
168 else
169 break;
170 }
171#else
172 unsigned head = guest.last_used_idx;
173
174 while (ring.used->idx == head)
175 busy_wait();
176#endif
177}
178
179void disable_call()
180{
181 /* Doing nothing to disable calls might cause
182 * extra interrupts, but reduces the number of cache misses.
183 */
184}
185
186bool enable_call()
187{
188 unsigned short last_used_idx;
189
190 vring_used_event(&ring) = (last_used_idx = guest.last_used_idx);
191 /* Flush call index write */
192 /* Barrier D (for pairing) */
193 smp_mb();
194#ifdef RING_POLL
195 {
196 unsigned short head = last_used_idx & (ring_size - 1);
197 unsigned index = ring.used->ring[head].id;
198
199 return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
200 }
201#else
202 return ring.used->idx == last_used_idx;
203#endif
204}
205
206void kick_available(void)
207{
208 /* Flush in previous flags write */
209 /* Barrier C (for pairing) */
210 smp_mb();
211 if (!vring_need_event(vring_avail_event(&ring),
212 guest.avail_idx,
213 guest.kicked_avail_idx))
214 return;
215
216 guest.kicked_avail_idx = guest.avail_idx;
217 kick();
218}
219
220/* host side */
221void disable_kick()
222{
223 /* Doing nothing to disable kicks might cause
224 * extra interrupts, but reduces the number of cache misses.
225 */
226}
227
228bool enable_kick()
229{
230 unsigned head = host.used_idx;
231
232 vring_avail_event(&ring) = head;
233 /* Barrier C (for pairing) */
234 smp_mb();
235#ifdef RING_POLL
236 {
237 unsigned index = ring.avail->ring[head & (ring_size - 1)];
238
239 return (index ^ head ^ 0x8000) & ~(ring_size - 1);
240 }
241#else
242 return head == ring.avail->idx;
243#endif
244}
245
246void poll_avail(void)
247{
248 unsigned head = host.used_idx;
249#ifdef RING_POLL
250 for (;;) {
251 unsigned index = ring.avail->ring[head & (ring_size - 1)];
252 if ((index ^ head ^ 0x8000) & ~(ring_size - 1))
253 busy_wait();
254 else
255 break;
256 }
257#else
258 while (ring.avail->idx == head)
259 busy_wait();
260#endif
261}
262
263bool use_buf(unsigned *lenp, void **bufp)
264{
265 unsigned used_idx = host.used_idx;
266 struct vring_desc *desc;
267 unsigned head;
268
269#ifdef RING_POLL
270 head = ring.avail->ring[used_idx & (ring_size - 1)];
271 if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1))
272 return false;
273 /* Barrier A (for pairing) */
274 smp_acquire();
275
276 used_idx &= ring_size - 1;
277 desc = &ring.desc[head & (ring_size - 1)];
278#else
279 if (used_idx == ring.avail->idx)
280 return false;
281
282 /* Barrier A (for pairing) */
283 smp_acquire();
284
285 used_idx &= ring_size - 1;
286 head = ring.avail->ring[used_idx];
287 desc = &ring.desc[head];
288#endif
289
290 *lenp = desc->len;
291 *bufp = (void *)(unsigned long)desc->addr;
292
293 /* now update used ring */
294 ring.used->ring[used_idx].id = head;
295 ring.used->ring[used_idx].len = desc->len - 1;
296 /* Barrier B (for pairing) */
297 smp_release();
298 host.used_idx++;
299 ring.used->idx = host.used_idx;
300
301 return true;
302}
303
304void call_used(void)
305{
306 /* Flush in previous flags write */
307 /* Barrier D (for pairing) */
308 smp_mb();
309 if (!vring_need_event(vring_used_event(&ring),
310 host.used_idx,
311 host.called_used_idx))
312 return;
313
314 host.called_used_idx = host.used_idx;
315 call();
316}
diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c
new file mode 100644
index 000000000000..84fc2c557aaa
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_poll.c
@@ -0,0 +1,2 @@
1#define RING_POLL 1
2#include "virtio_ring_0_9.c"