aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile11
-rw-r--r--tools/lguest/lguest.txt2
-rw-r--r--tools/net/Makefile15
-rw-r--r--tools/net/bpf_jit_disasm.c199
-rw-r--r--tools/testing/selftests/Makefile9
-rw-r--r--tools/testing/selftests/net/.gitignore3
-rw-r--r--tools/testing/selftests/net/Makefile19
-rw-r--r--tools/testing/selftests/net/psock_fanout.c312
-rw-r--r--tools/testing/selftests/net/psock_lib.h127
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c824
-rw-r--r--tools/testing/selftests/net/run_afpackettests26
-rw-r--r--tools/testing/selftests/net/run_netsocktests12
-rw-r--r--tools/testing/selftests/net/socket.c92
-rw-r--r--tools/testing/selftests/ptrace/Makefile10
-rw-r--r--tools/testing/selftests/ptrace/peeksiginfo.c214
-rw-r--r--tools/testing/selftests/soft-dirty/Makefile10
-rw-r--r--tools/testing/selftests/soft-dirty/soft-dirty.c114
-rw-r--r--tools/virtio/Makefile10
-rw-r--r--tools/virtio/asm/barrier.h14
-rw-r--r--tools/virtio/linux/bug.h10
-rw-r--r--tools/virtio/linux/err.h26
-rw-r--r--tools/virtio/linux/export.h5
-rw-r--r--tools/virtio/linux/irqreturn.h1
-rw-r--r--tools/virtio/linux/kernel.h112
-rw-r--r--tools/virtio/linux/module.h1
-rw-r--r--tools/virtio/linux/printk.h4
-rw-r--r--tools/virtio/linux/ratelimit.h4
-rw-r--r--tools/virtio/linux/scatterlist.h189
-rw-r--r--tools/virtio/linux/types.h28
-rw-r--r--tools/virtio/linux/uaccess.h50
-rw-r--r--tools/virtio/linux/uio.h3
-rw-r--r--tools/virtio/linux/virtio.h171
-rw-r--r--tools/virtio/linux/virtio_config.h6
-rw-r--r--tools/virtio/linux/virtio_ring.h1
-rw-r--r--tools/virtio/linux/vringh.h1
-rw-r--r--tools/virtio/uapi/linux/uio.h1
-rw-r--r--tools/virtio/uapi/linux/virtio_config.h1
-rw-r--r--tools/virtio/uapi/linux/virtio_ring.h4
-rw-r--r--tools/virtio/virtio_test.c13
-rw-r--r--tools/virtio/vringh_test.c741
40 files changed, 3228 insertions, 167 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 6aaeb6cd867d..41067f304215 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -12,6 +12,7 @@ help:
12 @echo ' turbostat - Intel CPU idle stats and freq reporting tool' 12 @echo ' turbostat - Intel CPU idle stats and freq reporting tool'
13 @echo ' usb - USB testing tools' 13 @echo ' usb - USB testing tools'
14 @echo ' virtio - vhost test module' 14 @echo ' virtio - vhost test module'
15 @echo ' net - misc networking tools'
15 @echo ' vm - misc vm tools' 16 @echo ' vm - misc vm tools'
16 @echo ' x86_energy_perf_policy - Intel energy policy tool' 17 @echo ' x86_energy_perf_policy - Intel energy policy tool'
17 @echo '' 18 @echo ''
@@ -34,7 +35,7 @@ help:
34cpupower: FORCE 35cpupower: FORCE
35 $(call descend,power/$@) 36 $(call descend,power/$@)
36 37
37cgroup firewire guest usb virtio vm: FORCE 38cgroup firewire guest usb virtio vm net: FORCE
38 $(call descend,$@) 39 $(call descend,$@)
39 40
40liblk: FORCE 41liblk: FORCE
@@ -52,7 +53,7 @@ turbostat x86_energy_perf_policy: FORCE
52cpupower_install: 53cpupower_install:
53 $(call descend,power/$(@:_install=),install) 54 $(call descend,power/$(@:_install=),install)
54 55
55cgroup_install firewire_install lguest_install perf_install usb_install virtio_install vm_install: 56cgroup_install firewire_install lguest_install perf_install usb_install virtio_install vm_install net_install:
56 $(call descend,$(@:_install=),install) 57 $(call descend,$(@:_install=),install)
57 58
58selftests_install: 59selftests_install:
@@ -63,12 +64,12 @@ turbostat_install x86_energy_perf_policy_install:
63 64
64install: cgroup_install cpupower_install firewire_install lguest_install \ 65install: cgroup_install cpupower_install firewire_install lguest_install \
65 perf_install selftests_install turbostat_install usb_install \ 66 perf_install selftests_install turbostat_install usb_install \
66 virtio_install vm_install x86_energy_perf_policy_install 67 virtio_install vm_install net_install x86_energy_perf_policy_install
67 68
68cpupower_clean: 69cpupower_clean:
69 $(call descend,power/cpupower,clean) 70 $(call descend,power/cpupower,clean)
70 71
71cgroup_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clean: 72cgroup_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clean net_clean:
72 $(call descend,$(@:_clean=),clean) 73 $(call descend,$(@:_clean=),clean)
73 74
74liblk_clean: 75liblk_clean:
@@ -85,6 +86,6 @@ turbostat_clean x86_energy_perf_policy_clean:
85 86
86clean: cgroup_clean cpupower_clean firewire_clean lguest_clean perf_clean \ 87clean: cgroup_clean cpupower_clean firewire_clean lguest_clean perf_clean \
87 selftests_clean turbostat_clean usb_clean virtio_clean \ 88 selftests_clean turbostat_clean usb_clean virtio_clean \
88 vm_clean x86_energy_perf_policy_clean 89 vm_clean net_clean x86_energy_perf_policy_clean
89 90
90.PHONY: FORCE 91.PHONY: FORCE
diff --git a/tools/lguest/lguest.txt b/tools/lguest/lguest.txt
index 7203ace65e83..06e1f4649511 100644
--- a/tools/lguest/lguest.txt
+++ b/tools/lguest/lguest.txt
@@ -70,7 +70,7 @@ Running Lguest:
70 70
71- Run an lguest as root: 71- Run an lguest as root:
72 72
73 Documentation/virtual/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \ 73 tools/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \
74 --block=rootfile root=/dev/vda 74 --block=rootfile root=/dev/vda
75 75
76 Explanation: 76 Explanation:
diff --git a/tools/net/Makefile b/tools/net/Makefile
new file mode 100644
index 000000000000..b4444d53b73f
--- /dev/null
+++ b/tools/net/Makefile
@@ -0,0 +1,15 @@
1prefix = /usr
2
3CC = gcc
4
5all : bpf_jit_disasm
6
7bpf_jit_disasm : CFLAGS = -Wall -O2
8bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
9bpf_jit_disasm : bpf_jit_disasm.o
10
11clean :
12 rm -rf *.o bpf_jit_disasm
13
14install :
15 install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm
diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c
new file mode 100644
index 000000000000..cfe0cdcda3de
--- /dev/null
+++ b/tools/net/bpf_jit_disasm.c
@@ -0,0 +1,199 @@
1/*
2 * Minimal BPF JIT image disassembler
3 *
4 * Disassembles BPF JIT compiler emitted opcodes back to asm insn's for
5 * debugging or verification purposes.
6 *
7 * To get the disassembly of the JIT code, do the following:
8 *
9 * 1) `echo 2 > /proc/sys/net/core/bpf_jit_enable`
10 * 2) Load a BPF filter (e.g. `tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24`)
11 * 3) Run e.g. `bpf_jit_disasm -o` to read out the last JIT code
12 *
13 * Copyright 2013 Daniel Borkmann <borkmann@redhat.com>
14 * Licensed under the GNU General Public License, version 2.0 (GPLv2)
15 */
16
17#include <stdint.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <assert.h>
21#include <unistd.h>
22#include <string.h>
23#include <bfd.h>
24#include <dis-asm.h>
25#include <sys/klog.h>
26#include <sys/types.h>
27#include <regex.h>
28
29static void get_exec_path(char *tpath, size_t size)
30{
31 char *path;
32 ssize_t len;
33
34 snprintf(tpath, size, "/proc/%d/exe", (int) getpid());
35 tpath[size - 1] = 0;
36
37 path = strdup(tpath);
38 assert(path);
39
40 len = readlink(path, tpath, size);
41 tpath[len] = 0;
42
43 free(path);
44}
45
46static void get_asm_insns(uint8_t *image, size_t len, unsigned long base,
47 int opcodes)
48{
49 int count, i, pc = 0;
50 char tpath[256];
51 struct disassemble_info info;
52 disassembler_ftype disassemble;
53 bfd *bfdf;
54
55 memset(tpath, 0, sizeof(tpath));
56 get_exec_path(tpath, sizeof(tpath));
57
58 bfdf = bfd_openr(tpath, NULL);
59 assert(bfdf);
60 assert(bfd_check_format(bfdf, bfd_object));
61
62 init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf);
63 info.arch = bfd_get_arch(bfdf);
64 info.mach = bfd_get_mach(bfdf);
65 info.buffer = image;
66 info.buffer_length = len;
67
68 disassemble_init_for_target(&info);
69
70 disassemble = disassembler(bfdf);
71 assert(disassemble);
72
73 do {
74 printf("%4x:\t", pc);
75
76 count = disassemble(pc, &info);
77
78 if (opcodes) {
79 printf("\n\t");
80 for (i = 0; i < count; ++i)
81 printf("%02x ", (uint8_t) image[pc + i]);
82 }
83 printf("\n");
84
85 pc += count;
86 } while(count > 0 && pc < len);
87
88 bfd_close(bfdf);
89}
90
91static char *get_klog_buff(int *klen)
92{
93 int ret, len = klogctl(10, NULL, 0);
94 char *buff = malloc(len);
95
96 assert(buff && klen);
97 ret = klogctl(3, buff, len);
98 assert(ret >= 0);
99 *klen = ret;
100
101 return buff;
102}
103
104static void put_klog_buff(char *buff)
105{
106 free(buff);
107}
108
109static int get_last_jit_image(char *haystack, size_t hlen,
110 uint8_t *image, size_t ilen,
111 unsigned long *base)
112{
113 char *ptr, *pptr, *tmp;
114 off_t off = 0;
115 int ret, flen, proglen, pass, ulen = 0;
116 regmatch_t pmatch[1];
117 regex_t regex;
118
119 if (hlen == 0)
120 return 0;
121
122 ret = regcomp(&regex, "flen=[[:alnum:]]+ proglen=[[:digit:]]+ "
123 "pass=[[:digit:]]+ image=[[:xdigit:]]+", REG_EXTENDED);
124 assert(ret == 0);
125
126 ptr = haystack;
127 while (1) {
128 ret = regexec(&regex, ptr, 1, pmatch, 0);
129 if (ret == 0) {
130 ptr += pmatch[0].rm_eo;
131 off += pmatch[0].rm_eo;
132 assert(off < hlen);
133 } else
134 break;
135 }
136
137 ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so);
138 ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx",
139 &flen, &proglen, &pass, base);
140 if (ret != 4)
141 return 0;
142
143 tmp = ptr = haystack + off;
144 while ((ptr = strtok(tmp, "\n")) != NULL && ulen < ilen) {
145 tmp = NULL;
146 if (!strstr(ptr, "JIT code"))
147 continue;
148 pptr = ptr;
149 while ((ptr = strstr(pptr, ":")))
150 pptr = ptr + 1;
151 ptr = pptr;
152 do {
153 image[ulen++] = (uint8_t) strtoul(pptr, &pptr, 16);
154 if (ptr == pptr || ulen >= ilen) {
155 ulen--;
156 break;
157 }
158 ptr = pptr;
159 } while (1);
160 }
161
162 assert(ulen == proglen);
163 printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n",
164 proglen, pass, flen);
165 printf("%lx + <x>:\n", *base);
166
167 regfree(&regex);
168 return ulen;
169}
170
171int main(int argc, char **argv)
172{
173 int len, klen, opcodes = 0;
174 char *kbuff;
175 unsigned long base;
176 uint8_t image[4096];
177
178 if (argc > 1) {
179 if (!strncmp("-o", argv[argc - 1], 2)) {
180 opcodes = 1;
181 } else {
182 printf("usage: bpf_jit_disasm [-o: show opcodes]\n");
183 exit(0);
184 }
185 }
186
187 bfd_init();
188 memset(image, 0, sizeof(image));
189
190 kbuff = get_klog_buff(&klen);
191
192 len = get_last_jit_image(kbuff, klen, image, sizeof(image), &base);
193 if (len > 0 && base > 0)
194 get_asm_insns(image, len, base, opcodes);
195
196 put_klog_buff(kbuff);
197
198 return 0;
199}
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 3cc0ad7ae863..d4abc59ce1d9 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,10 +1,13 @@
1TARGETS = breakpoints 1TARGETS = breakpoints
2TARGETS += cpu-hotplug
3TARGETS += efivarfs
2TARGETS += kcmp 4TARGETS += kcmp
5TARGETS += memory-hotplug
3TARGETS += mqueue 6TARGETS += mqueue
7TARGETS += net
8TARGETS += ptrace
9TARGETS += soft-dirty
4TARGETS += vm 10TARGETS += vm
5TARGETS += cpu-hotplug
6TARGETS += memory-hotplug
7TARGETS += efivarfs
8 11
9all: 12all:
10 for TARGET in $(TARGETS); do \ 13 for TARGET in $(TARGETS); do \
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
new file mode 100644
index 000000000000..00326629d4af
--- /dev/null
+++ b/tools/testing/selftests/net/.gitignore
@@ -0,0 +1,3 @@
1socket
2psock_fanout
3psock_tpacket
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
new file mode 100644
index 000000000000..750512ba2c88
--- /dev/null
+++ b/tools/testing/selftests/net/Makefile
@@ -0,0 +1,19 @@
1# Makefile for net selftests
2
3CC = $(CROSS_COMPILE)gcc
4CFLAGS = -Wall -O2 -g
5
6CFLAGS += -I../../../../usr/include/
7
8NET_PROGS = socket psock_fanout psock_tpacket
9
10all: $(NET_PROGS)
11%: %.c
12 $(CC) $(CFLAGS) -o $@ $^
13
14run_tests: all
15 @/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]"
16 @/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]"
17
18clean:
19 $(RM) $(NET_PROGS)
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
new file mode 100644
index 000000000000..57b9c2b7c4ff
--- /dev/null
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -0,0 +1,312 @@
1/*
2 * Copyright 2013 Google Inc.
3 * Author: Willem de Bruijn (willemb@google.com)
4 *
5 * A basic test of packet socket fanout behavior.
6 *
7 * Control:
8 * - create fanout fails as expected with illegal flag combinations
9 * - join fanout fails as expected with diverging types or flags
10 *
11 * Datapath:
12 * Open a pair of packet sockets and a pair of INET sockets, send a known
13 * number of packets across the two INET sockets and count the number of
14 * packets enqueued onto the two packet sockets.
15 *
16 * The test currently runs for
17 * - PACKET_FANOUT_HASH
18 * - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER
19 * - PACKET_FANOUT_LB
20 * - PACKET_FANOUT_CPU
21 * - PACKET_FANOUT_ROLLOVER
22 *
23 * Todo:
24 * - functionality: PACKET_FANOUT_FLAG_DEFRAG
25 *
26 * License (GPLv2):
27 *
28 * This program is free software; you can redistribute it and/or modify it
29 * under the terms and conditions of the GNU General Public License,
30 * version 2, as published by the Free Software Foundation.
31 *
32 * This program is distributed in the hope it will be useful, but WITHOUT
33 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
34 * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
35 * more details.
36 *
37 * You should have received a copy of the GNU General Public License along with
38 * this program; if not, write to the Free Software Foundation, Inc.,
39 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
40 */
41
42#define _GNU_SOURCE /* for sched_setaffinity */
43
44#include <arpa/inet.h>
45#include <errno.h>
46#include <fcntl.h>
47#include <linux/filter.h>
48#include <linux/if_packet.h>
49#include <net/ethernet.h>
50#include <netinet/ip.h>
51#include <netinet/udp.h>
52#include <poll.h>
53#include <sched.h>
54#include <stdint.h>
55#include <stdio.h>
56#include <stdlib.h>
57#include <string.h>
58#include <sys/mman.h>
59#include <sys/socket.h>
60#include <sys/stat.h>
61#include <sys/types.h>
62#include <unistd.h>
63
64#include "psock_lib.h"
65
66#define RING_NUM_FRAMES 20
67
68/* Open a socket in a given fanout mode.
69 * @return -1 if mode is bad, a valid socket otherwise */
70static int sock_fanout_open(uint16_t typeflags, int num_packets)
71{
72 int fd, val;
73
74 fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP));
75 if (fd < 0) {
76 perror("socket packet");
77 exit(1);
78 }
79
80 /* fanout group ID is always 0: tests whether old groups are deleted */
81 val = ((int) typeflags) << 16;
82 if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
83 if (close(fd)) {
84 perror("close packet");
85 exit(1);
86 }
87 return -1;
88 }
89
90 pair_udp_setfilter(fd);
91 return fd;
92}
93
94static char *sock_fanout_open_ring(int fd)
95{
96 struct tpacket_req req = {
97 .tp_block_size = getpagesize(),
98 .tp_frame_size = getpagesize(),
99 .tp_block_nr = RING_NUM_FRAMES,
100 .tp_frame_nr = RING_NUM_FRAMES,
101 };
102 char *ring;
103 int val = TPACKET_V2;
104
105 if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val,
106 sizeof(val))) {
107 perror("packetsock ring setsockopt version");
108 exit(1);
109 }
110 if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req,
111 sizeof(req))) {
112 perror("packetsock ring setsockopt");
113 exit(1);
114 }
115
116 ring = mmap(0, req.tp_block_size * req.tp_block_nr,
117 PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
118 if (!ring) {
119 fprintf(stderr, "packetsock ring mmap\n");
120 exit(1);
121 }
122
123 return ring;
124}
125
126static int sock_fanout_read_ring(int fd, void *ring)
127{
128 struct tpacket2_hdr *header = ring;
129 int count = 0;
130
131 while (header->tp_status & TP_STATUS_USER && count < RING_NUM_FRAMES) {
132 count++;
133 header = ring + (count * getpagesize());
134 }
135
136 return count;
137}
138
139static int sock_fanout_read(int fds[], char *rings[], const int expect[])
140{
141 int ret[2];
142
143 ret[0] = sock_fanout_read_ring(fds[0], rings[0]);
144 ret[1] = sock_fanout_read_ring(fds[1], rings[1]);
145
146 fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n",
147 ret[0], ret[1], expect[0], expect[1]);
148
149 if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
150 (!(ret[0] == expect[1] && ret[1] == expect[0]))) {
151 fprintf(stderr, "ERROR: incorrect queue lengths\n");
152 return 1;
153 }
154
155 return 0;
156}
157
158/* Test illegal mode + flag combination */
159static void test_control_single(void)
160{
161 fprintf(stderr, "test: control single socket\n");
162
163 if (sock_fanout_open(PACKET_FANOUT_ROLLOVER |
164 PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
165 fprintf(stderr, "ERROR: opened socket with dual rollover\n");
166 exit(1);
167 }
168}
169
170/* Test illegal group with different modes or flags */
171static void test_control_group(void)
172{
173 int fds[2];
174
175 fprintf(stderr, "test: control multiple sockets\n");
176
177 fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 20);
178 if (fds[0] == -1) {
179 fprintf(stderr, "ERROR: failed to open HASH socket\n");
180 exit(1);
181 }
182 if (sock_fanout_open(PACKET_FANOUT_HASH |
183 PACKET_FANOUT_FLAG_DEFRAG, 10) != -1) {
184 fprintf(stderr, "ERROR: joined group with wrong flag defrag\n");
185 exit(1);
186 }
187 if (sock_fanout_open(PACKET_FANOUT_HASH |
188 PACKET_FANOUT_FLAG_ROLLOVER, 10) != -1) {
189 fprintf(stderr, "ERROR: joined group with wrong flag ro\n");
190 exit(1);
191 }
192 if (sock_fanout_open(PACKET_FANOUT_CPU, 10) != -1) {
193 fprintf(stderr, "ERROR: joined group with wrong mode\n");
194 exit(1);
195 }
196 fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 20);
197 if (fds[1] == -1) {
198 fprintf(stderr, "ERROR: failed to join group\n");
199 exit(1);
200 }
201 if (close(fds[1]) || close(fds[0])) {
202 fprintf(stderr, "ERROR: closing sockets\n");
203 exit(1);
204 }
205}
206
207static int test_datapath(uint16_t typeflags, int port_off,
208 const int expect1[], const int expect2[])
209{
210 const int expect0[] = { 0, 0 };
211 char *rings[2];
212 int fds[2], fds_udp[2][2], ret;
213
214 fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
215
216 fds[0] = sock_fanout_open(typeflags, 20);
217 fds[1] = sock_fanout_open(typeflags, 20);
218 if (fds[0] == -1 || fds[1] == -1) {
219 fprintf(stderr, "ERROR: failed open\n");
220 exit(1);
221 }
222 rings[0] = sock_fanout_open_ring(fds[0]);
223 rings[1] = sock_fanout_open_ring(fds[1]);
224 pair_udp_open(fds_udp[0], PORT_BASE);
225 pair_udp_open(fds_udp[1], PORT_BASE + port_off);
226 sock_fanout_read(fds, rings, expect0);
227
228 /* Send data, but not enough to overflow a queue */
229 pair_udp_send(fds_udp[0], 15);
230 pair_udp_send(fds_udp[1], 5);
231 ret = sock_fanout_read(fds, rings, expect1);
232
233 /* Send more data, overflow the queue */
234 pair_udp_send(fds_udp[0], 15);
235 /* TODO: ensure consistent order between expect1 and expect2 */
236 ret |= sock_fanout_read(fds, rings, expect2);
237
238 if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) ||
239 munmap(rings[0], RING_NUM_FRAMES * getpagesize())) {
240 fprintf(stderr, "close rings\n");
241 exit(1);
242 }
243 if (close(fds_udp[1][1]) || close(fds_udp[1][0]) ||
244 close(fds_udp[0][1]) || close(fds_udp[0][0]) ||
245 close(fds[1]) || close(fds[0])) {
246 fprintf(stderr, "close datapath\n");
247 exit(1);
248 }
249
250 return ret;
251}
252
253static int set_cpuaffinity(int cpuid)
254{
255 cpu_set_t mask;
256
257 CPU_ZERO(&mask);
258 CPU_SET(cpuid, &mask);
259 if (sched_setaffinity(0, sizeof(mask), &mask)) {
260 if (errno != EINVAL) {
261 fprintf(stderr, "setaffinity %d\n", cpuid);
262 exit(1);
263 }
264 return 1;
265 }
266
267 return 0;
268}
269
270int main(int argc, char **argv)
271{
272 const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } };
273 const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } };
274 const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } };
275 const int expect_rb[2][2] = { { 20, 0 }, { 20, 15 } };
276 const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } };
277 const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } };
278 int port_off = 2, tries = 5, ret;
279
280 test_control_single();
281 test_control_group();
282
283 /* find a set of ports that do not collide onto the same socket */
284 ret = test_datapath(PACKET_FANOUT_HASH, port_off,
285 expect_hash[0], expect_hash[1]);
286 while (ret && tries--) {
287 fprintf(stderr, "info: trying alternate ports (%d)\n", tries);
288 ret = test_datapath(PACKET_FANOUT_HASH, ++port_off,
289 expect_hash[0], expect_hash[1]);
290 }
291
292 ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
293 port_off, expect_hash_rb[0], expect_hash_rb[1]);
294 ret |= test_datapath(PACKET_FANOUT_LB,
295 port_off, expect_lb[0], expect_lb[1]);
296 ret |= test_datapath(PACKET_FANOUT_ROLLOVER,
297 port_off, expect_rb[0], expect_rb[1]);
298
299 set_cpuaffinity(0);
300 ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
301 expect_cpu0[0], expect_cpu0[1]);
302 if (!set_cpuaffinity(1))
303 /* TODO: test that choice alternates with previous */
304 ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
305 expect_cpu1[0], expect_cpu1[1]);
306
307 if (ret)
308 return 1;
309
310 printf("OK. All tests passed\n");
311 return 0;
312}
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
new file mode 100644
index 000000000000..37da54ac85a9
--- /dev/null
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -0,0 +1,127 @@
1/*
2 * Copyright 2013 Google Inc.
3 * Author: Willem de Bruijn <willemb@google.com>
4 * Daniel Borkmann <dborkman@redhat.com>
5 *
6 * License (GPLv2):
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#ifndef PSOCK_LIB_H
23#define PSOCK_LIB_H
24
25#include <sys/types.h>
26#include <sys/socket.h>
27#include <string.h>
28#include <arpa/inet.h>
29#include <unistd.h>
30
31#define DATA_LEN 100
32#define DATA_CHAR 'a'
33
34#define PORT_BASE 8000
35
36#ifndef __maybe_unused
37# define __maybe_unused __attribute__ ((__unused__))
38#endif
39
40static __maybe_unused void pair_udp_setfilter(int fd)
41{
42 struct sock_filter bpf_filter[] = {
43 { 0x80, 0, 0, 0x00000000 }, /* LD pktlen */
44 { 0x35, 0, 5, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/
45 { 0x30, 0, 0, 0x00000050 }, /* LD ip[80] */
46 { 0x15, 0, 3, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/
47 { 0x30, 0, 0, 0x00000051 }, /* LD ip[81] */
48 { 0x15, 0, 1, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/
49 { 0x06, 0, 0, 0x00000060 }, /* RET match */
50 { 0x06, 0, 0, 0x00000000 }, /* RET no match */
51 };
52 struct sock_fprog bpf_prog;
53
54 bpf_prog.filter = bpf_filter;
55 bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
56 if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
57 sizeof(bpf_prog))) {
58 perror("setsockopt SO_ATTACH_FILTER");
59 exit(1);
60 }
61}
62
63static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
64{
65 struct sockaddr_in saddr, daddr;
66
67 fds[0] = socket(PF_INET, SOCK_DGRAM, 0);
68 fds[1] = socket(PF_INET, SOCK_DGRAM, 0);
69 if (fds[0] == -1 || fds[1] == -1) {
70 fprintf(stderr, "ERROR: socket dgram\n");
71 exit(1);
72 }
73
74 memset(&saddr, 0, sizeof(saddr));
75 saddr.sin_family = AF_INET;
76 saddr.sin_port = htons(port);
77 saddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
78
79 memset(&daddr, 0, sizeof(daddr));
80 daddr.sin_family = AF_INET;
81 daddr.sin_port = htons(port + 1);
82 daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
83
84 /* must bind both to get consistent hash result */
85 if (bind(fds[1], (void *) &daddr, sizeof(daddr))) {
86 perror("bind");
87 exit(1);
88 }
89 if (bind(fds[0], (void *) &saddr, sizeof(saddr))) {
90 perror("bind");
91 exit(1);
92 }
93 if (connect(fds[0], (void *) &daddr, sizeof(daddr))) {
94 perror("connect");
95 exit(1);
96 }
97}
98
99static __maybe_unused void pair_udp_send(int fds[], int num)
100{
101 char buf[DATA_LEN], rbuf[DATA_LEN];
102
103 memset(buf, DATA_CHAR, sizeof(buf));
104 while (num--) {
105 /* Should really handle EINTR and EAGAIN */
106 if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) {
107 fprintf(stderr, "ERROR: send failed left=%d\n", num);
108 exit(1);
109 }
110 if (read(fds[1], rbuf, sizeof(rbuf)) != sizeof(rbuf)) {
111 fprintf(stderr, "ERROR: recv failed left=%d\n", num);
112 exit(1);
113 }
114 if (memcmp(buf, rbuf, sizeof(buf))) {
115 fprintf(stderr, "ERROR: data failed left=%d\n", num);
116 exit(1);
117 }
118 }
119}
120
121static __maybe_unused void pair_udp_close(int fds[])
122{
123 close(fds[0]);
124 close(fds[1]);
125}
126
127#endif /* PSOCK_LIB_H */
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
new file mode 100644
index 000000000000..c41b58640a05
--- /dev/null
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -0,0 +1,824 @@
1/*
2 * Copyright 2013 Red Hat, Inc.
3 * Author: Daniel Borkmann <dborkman@redhat.com>
4 *
5 * A basic test of packet socket's TPACKET_V1/TPACKET_V2/TPACKET_V3 behavior.
6 *
7 * Control:
8 * Test the setup of the TPACKET socket with different patterns that are
9 * known to fail (TODO) resp. succeed (OK).
10 *
11 * Datapath:
12 * Open a pair of packet sockets and send resp. receive an a priori known
13 * packet pattern accross the sockets and check if it was received resp.
14 * sent correctly. Fanout in combination with RX_RING is currently not
15 * tested here.
16 *
17 * The test currently runs for
18 * - TPACKET_V1: RX_RING, TX_RING
19 * - TPACKET_V2: RX_RING, TX_RING
20 * - TPACKET_V3: RX_RING
21 *
22 * License (GPLv2):
23 *
24 * This program is free software; you can redistribute it and/or modify it
25 * under the terms and conditions of the GNU General Public License,
26 * version 2, as published by the Free Software Foundation.
27 *
28 * This program is distributed in the hope it will be useful, but WITHOUT
29 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
30 * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
31 * more details.
32 *
33 * You should have received a copy of the GNU General Public License along with
34 * this program; if not, write to the Free Software Foundation, Inc.,
35 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
36 */
37
38#include <stdio.h>
39#include <stdlib.h>
40#include <sys/types.h>
41#include <sys/stat.h>
42#include <sys/socket.h>
43#include <sys/mman.h>
44#include <linux/if_packet.h>
45#include <linux/filter.h>
46#include <ctype.h>
47#include <fcntl.h>
48#include <unistd.h>
49#include <bits/wordsize.h>
50#include <net/ethernet.h>
51#include <netinet/ip.h>
52#include <arpa/inet.h>
53#include <stdint.h>
54#include <string.h>
55#include <assert.h>
56#include <net/if.h>
57#include <inttypes.h>
58#include <poll.h>
59
60#include "psock_lib.h"
61
62#ifndef bug_on
63# define bug_on(cond) assert(!(cond))
64#endif
65
66#ifndef __aligned_tpacket
67# define __aligned_tpacket __attribute__((aligned(TPACKET_ALIGNMENT)))
68#endif
69
70#ifndef __align_tpacket
71# define __align_tpacket(x) __attribute__((aligned(TPACKET_ALIGN(x))))
72#endif
73
74#define BLOCK_STATUS(x) ((x)->h1.block_status)
75#define BLOCK_NUM_PKTS(x) ((x)->h1.num_pkts)
76#define BLOCK_O2FP(x) ((x)->h1.offset_to_first_pkt)
77#define BLOCK_LEN(x) ((x)->h1.blk_len)
78#define BLOCK_SNUM(x) ((x)->h1.seq_num)
79#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
80#define BLOCK_PRIV(x) ((void *) ((uint8_t *) (x) + BLOCK_O2PRIV(x)))
81#define BLOCK_HDR_LEN (ALIGN_8(sizeof(struct block_desc)))
82#define ALIGN_8(x) (((x) + 8 - 1) & ~(8 - 1))
83#define BLOCK_PLUS_PRIV(sz_pri) (BLOCK_HDR_LEN + ALIGN_8((sz_pri)))
84
85#define NUM_PACKETS 100
86
87struct ring {
88 struct iovec *rd;
89 uint8_t *mm_space;
90 size_t mm_len, rd_len;
91 struct sockaddr_ll ll;
92 void (*walk)(int sock, struct ring *ring);
93 int type, rd_num, flen, version;
94 union {
95 struct tpacket_req req;
96 struct tpacket_req3 req3;
97 };
98};
99
100struct block_desc {
101 uint32_t version;
102 uint32_t offset_to_priv;
103 struct tpacket_hdr_v1 h1;
104};
105
106union frame_map {
107 struct {
108 struct tpacket_hdr tp_h __aligned_tpacket;
109 struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket_hdr));
110 } *v1;
111 struct {
112 struct tpacket2_hdr tp_h __aligned_tpacket;
113 struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr));
114 } *v2;
115 void *raw;
116};
117
118static unsigned int total_packets, total_bytes;
119
120static int pfsocket(int ver)
121{
122 int ret, sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
123 if (sock == -1) {
124 perror("socket");
125 exit(1);
126 }
127
128 ret = setsockopt(sock, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver));
129 if (ret == -1) {
130 perror("setsockopt");
131 exit(1);
132 }
133
134 return sock;
135}
136
137static void status_bar_update(void)
138{
139 if (total_packets % 10 == 0) {
140 fprintf(stderr, ".");
141 fflush(stderr);
142 }
143}
144
145static void test_payload(void *pay, size_t len)
146{
147 struct ethhdr *eth = pay;
148
149 if (len < sizeof(struct ethhdr)) {
150 fprintf(stderr, "test_payload: packet too "
151 "small: %zu bytes!\n", len);
152 exit(1);
153 }
154
155 if (eth->h_proto != htons(ETH_P_IP)) {
156 fprintf(stderr, "test_payload: wrong ethernet "
157 "type: 0x%x!\n", ntohs(eth->h_proto));
158 exit(1);
159 }
160}
161
162static void create_payload(void *pay, size_t *len)
163{
164 int i;
165 struct ethhdr *eth = pay;
166 struct iphdr *ip = pay + sizeof(*eth);
167
168 /* Lets create some broken crap, that still passes
169 * our BPF filter.
170 */
171
172 *len = DATA_LEN + 42;
173
174 memset(pay, 0xff, ETH_ALEN * 2);
175 eth->h_proto = htons(ETH_P_IP);
176
177 for (i = 0; i < sizeof(*ip); ++i)
178 ((uint8_t *) pay)[i + sizeof(*eth)] = (uint8_t) rand();
179
180 ip->ihl = 5;
181 ip->version = 4;
182 ip->protocol = 0x11;
183 ip->frag_off = 0;
184 ip->ttl = 64;
185 ip->tot_len = htons((uint16_t) *len - sizeof(*eth));
186
187 ip->saddr = htonl(INADDR_LOOPBACK);
188 ip->daddr = htonl(INADDR_LOOPBACK);
189
190 memset(pay + sizeof(*eth) + sizeof(*ip),
191 DATA_CHAR, DATA_LEN);
192}
193
194static inline int __v1_rx_kernel_ready(struct tpacket_hdr *hdr)
195{
196 return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
197}
198
199static inline void __v1_rx_user_ready(struct tpacket_hdr *hdr)
200{
201 hdr->tp_status = TP_STATUS_KERNEL;
202 __sync_synchronize();
203}
204
205static inline int __v2_rx_kernel_ready(struct tpacket2_hdr *hdr)
206{
207 return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
208}
209
210static inline void __v2_rx_user_ready(struct tpacket2_hdr *hdr)
211{
212 hdr->tp_status = TP_STATUS_KERNEL;
213 __sync_synchronize();
214}
215
216static inline int __v1_v2_rx_kernel_ready(void *base, int version)
217{
218 switch (version) {
219 case TPACKET_V1:
220 return __v1_rx_kernel_ready(base);
221 case TPACKET_V2:
222 return __v2_rx_kernel_ready(base);
223 default:
224 bug_on(1);
225 return 0;
226 }
227}
228
229static inline void __v1_v2_rx_user_ready(void *base, int version)
230{
231 switch (version) {
232 case TPACKET_V1:
233 __v1_rx_user_ready(base);
234 break;
235 case TPACKET_V2:
236 __v2_rx_user_ready(base);
237 break;
238 }
239}
240
241static void walk_v1_v2_rx(int sock, struct ring *ring)
242{
243 struct pollfd pfd;
244 int udp_sock[2];
245 union frame_map ppd;
246 unsigned int frame_num = 0;
247
248 bug_on(ring->type != PACKET_RX_RING);
249
250 pair_udp_open(udp_sock, PORT_BASE);
251 pair_udp_setfilter(sock);
252
253 memset(&pfd, 0, sizeof(pfd));
254 pfd.fd = sock;
255 pfd.events = POLLIN | POLLERR;
256 pfd.revents = 0;
257
258 pair_udp_send(udp_sock, NUM_PACKETS);
259
260 while (total_packets < NUM_PACKETS * 2) {
261 while (__v1_v2_rx_kernel_ready(ring->rd[frame_num].iov_base,
262 ring->version)) {
263 ppd.raw = ring->rd[frame_num].iov_base;
264
265 switch (ring->version) {
266 case TPACKET_V1:
267 test_payload((uint8_t *) ppd.raw + ppd.v1->tp_h.tp_mac,
268 ppd.v1->tp_h.tp_snaplen);
269 total_bytes += ppd.v1->tp_h.tp_snaplen;
270 break;
271
272 case TPACKET_V2:
273 test_payload((uint8_t *) ppd.raw + ppd.v2->tp_h.tp_mac,
274 ppd.v2->tp_h.tp_snaplen);
275 total_bytes += ppd.v2->tp_h.tp_snaplen;
276 break;
277 }
278
279 status_bar_update();
280 total_packets++;
281
282 __v1_v2_rx_user_ready(ppd.raw, ring->version);
283
284 frame_num = (frame_num + 1) % ring->rd_num;
285 }
286
287 poll(&pfd, 1, 1);
288 }
289
290 pair_udp_close(udp_sock);
291
292 if (total_packets != 2 * NUM_PACKETS) {
293 fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
294 ring->version, total_packets, NUM_PACKETS);
295 exit(1);
296 }
297
298 fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
299}
300
301static inline int __v1_tx_kernel_ready(struct tpacket_hdr *hdr)
302{
303 return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
304}
305
306static inline void __v1_tx_user_ready(struct tpacket_hdr *hdr)
307{
308 hdr->tp_status = TP_STATUS_SEND_REQUEST;
309 __sync_synchronize();
310}
311
312static inline int __v2_tx_kernel_ready(struct tpacket2_hdr *hdr)
313{
314 return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
315}
316
317static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr)
318{
319 hdr->tp_status = TP_STATUS_SEND_REQUEST;
320 __sync_synchronize();
321}
322
323static inline int __v1_v2_tx_kernel_ready(void *base, int version)
324{
325 switch (version) {
326 case TPACKET_V1:
327 return __v1_tx_kernel_ready(base);
328 case TPACKET_V2:
329 return __v2_tx_kernel_ready(base);
330 default:
331 bug_on(1);
332 return 0;
333 }
334}
335
336static inline void __v1_v2_tx_user_ready(void *base, int version)
337{
338 switch (version) {
339 case TPACKET_V1:
340 __v1_tx_user_ready(base);
341 break;
342 case TPACKET_V2:
343 __v2_tx_user_ready(base);
344 break;
345 }
346}
347
348static void __v1_v2_set_packet_loss_discard(int sock)
349{
350 int ret, discard = 1;
351
352 ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *) &discard,
353 sizeof(discard));
354 if (ret == -1) {
355 perror("setsockopt");
356 exit(1);
357 }
358}
359
360static void walk_v1_v2_tx(int sock, struct ring *ring)
361{
362 struct pollfd pfd;
363 int rcv_sock, ret;
364 size_t packet_len;
365 union frame_map ppd;
366 char packet[1024];
367 unsigned int frame_num = 0, got = 0;
368 struct sockaddr_ll ll = {
369 .sll_family = PF_PACKET,
370 .sll_halen = ETH_ALEN,
371 };
372
373 bug_on(ring->type != PACKET_TX_RING);
374 bug_on(ring->rd_num < NUM_PACKETS);
375
376 rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
377 if (rcv_sock == -1) {
378 perror("socket");
379 exit(1);
380 }
381
382 pair_udp_setfilter(rcv_sock);
383
384 ll.sll_ifindex = if_nametoindex("lo");
385 ret = bind(rcv_sock, (struct sockaddr *) &ll, sizeof(ll));
386 if (ret == -1) {
387 perror("bind");
388 exit(1);
389 }
390
391 memset(&pfd, 0, sizeof(pfd));
392 pfd.fd = sock;
393 pfd.events = POLLOUT | POLLERR;
394 pfd.revents = 0;
395
396 total_packets = NUM_PACKETS;
397 create_payload(packet, &packet_len);
398
399 while (total_packets > 0) {
400 while (__v1_v2_tx_kernel_ready(ring->rd[frame_num].iov_base,
401 ring->version) &&
402 total_packets > 0) {
403 ppd.raw = ring->rd[frame_num].iov_base;
404
405 switch (ring->version) {
406 case TPACKET_V1:
407 ppd.v1->tp_h.tp_snaplen = packet_len;
408 ppd.v1->tp_h.tp_len = packet_len;
409
410 memcpy((uint8_t *) ppd.raw + TPACKET_HDRLEN -
411 sizeof(struct sockaddr_ll), packet,
412 packet_len);
413 total_bytes += ppd.v1->tp_h.tp_snaplen;
414 break;
415
416 case TPACKET_V2:
417 ppd.v2->tp_h.tp_snaplen = packet_len;
418 ppd.v2->tp_h.tp_len = packet_len;
419
420 memcpy((uint8_t *) ppd.raw + TPACKET2_HDRLEN -
421 sizeof(struct sockaddr_ll), packet,
422 packet_len);
423 total_bytes += ppd.v2->tp_h.tp_snaplen;
424 break;
425 }
426
427 status_bar_update();
428 total_packets--;
429
430 __v1_v2_tx_user_ready(ppd.raw, ring->version);
431
432 frame_num = (frame_num + 1) % ring->rd_num;
433 }
434
435 poll(&pfd, 1, 1);
436 }
437
438 bug_on(total_packets != 0);
439
440 ret = sendto(sock, NULL, 0, 0, NULL, 0);
441 if (ret == -1) {
442 perror("sendto");
443 exit(1);
444 }
445
446 while ((ret = recvfrom(rcv_sock, packet, sizeof(packet),
447 0, NULL, NULL)) > 0 &&
448 total_packets < NUM_PACKETS) {
449 got += ret;
450 test_payload(packet, ret);
451
452 status_bar_update();
453 total_packets++;
454 }
455
456 close(rcv_sock);
457
458 if (total_packets != NUM_PACKETS) {
459 fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
460 ring->version, total_packets, NUM_PACKETS);
461 exit(1);
462 }
463
464 fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, got);
465}
466
467static void walk_v1_v2(int sock, struct ring *ring)
468{
469 if (ring->type == PACKET_RX_RING)
470 walk_v1_v2_rx(sock, ring);
471 else
472 walk_v1_v2_tx(sock, ring);
473}
474
475static uint64_t __v3_prev_block_seq_num = 0;
476
477void __v3_test_block_seq_num(struct block_desc *pbd)
478{
479 if (__v3_prev_block_seq_num + 1 != BLOCK_SNUM(pbd)) {
480 fprintf(stderr, "\nprev_block_seq_num:%"PRIu64", expected "
481 "seq:%"PRIu64" != actual seq:%"PRIu64"\n",
482 __v3_prev_block_seq_num, __v3_prev_block_seq_num + 1,
483 (uint64_t) BLOCK_SNUM(pbd));
484 exit(1);
485 }
486
487 __v3_prev_block_seq_num = BLOCK_SNUM(pbd);
488}
489
490static void __v3_test_block_len(struct block_desc *pbd, uint32_t bytes, int block_num)
491{
492 if (BLOCK_NUM_PKTS(pbd)) {
493 if (bytes != BLOCK_LEN(pbd)) {
494 fprintf(stderr, "\nblock:%u with %upackets, expected "
495 "len:%u != actual len:%u\n", block_num,
496 BLOCK_NUM_PKTS(pbd), bytes, BLOCK_LEN(pbd));
497 exit(1);
498 }
499 } else {
500 if (BLOCK_LEN(pbd) != BLOCK_PLUS_PRIV(13)) {
501 fprintf(stderr, "\nblock:%u, expected len:%lu != "
502 "actual len:%u\n", block_num, BLOCK_HDR_LEN,
503 BLOCK_LEN(pbd));
504 exit(1);
505 }
506 }
507}
508
509static void __v3_test_block_header(struct block_desc *pbd, const int block_num)
510{
511 uint32_t block_status = BLOCK_STATUS(pbd);
512
513 if ((block_status & TP_STATUS_USER) == 0) {
514 fprintf(stderr, "\nblock %u: not in TP_STATUS_USER\n", block_num);
515 exit(1);
516 }
517
518 __v3_test_block_seq_num(pbd);
519}
520
521static void __v3_walk_block(struct block_desc *pbd, const int block_num)
522{
523 int num_pkts = BLOCK_NUM_PKTS(pbd), i;
524 unsigned long bytes = 0;
525 unsigned long bytes_with_padding = BLOCK_PLUS_PRIV(13);
526 struct tpacket3_hdr *ppd;
527
528 __v3_test_block_header(pbd, block_num);
529
530 ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd + BLOCK_O2FP(pbd));
531 for (i = 0; i < num_pkts; ++i) {
532 bytes += ppd->tp_snaplen;
533
534 if (ppd->tp_next_offset)
535 bytes_with_padding += ppd->tp_next_offset;
536 else
537 bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac);
538
539 test_payload((uint8_t *) ppd + ppd->tp_mac, ppd->tp_snaplen);
540
541 status_bar_update();
542 total_packets++;
543
544 ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset);
545 __sync_synchronize();
546 }
547
548 __v3_test_block_len(pbd, bytes_with_padding, block_num);
549 total_bytes += bytes;
550}
551
552void __v3_flush_block(struct block_desc *pbd)
553{
554 BLOCK_STATUS(pbd) = TP_STATUS_KERNEL;
555 __sync_synchronize();
556}
557
558static void walk_v3_rx(int sock, struct ring *ring)
559{
560 unsigned int block_num = 0;
561 struct pollfd pfd;
562 struct block_desc *pbd;
563 int udp_sock[2];
564
565 bug_on(ring->type != PACKET_RX_RING);
566
567 pair_udp_open(udp_sock, PORT_BASE);
568 pair_udp_setfilter(sock);
569
570 memset(&pfd, 0, sizeof(pfd));
571 pfd.fd = sock;
572 pfd.events = POLLIN | POLLERR;
573 pfd.revents = 0;
574
575 pair_udp_send(udp_sock, NUM_PACKETS);
576
577 while (total_packets < NUM_PACKETS * 2) {
578 pbd = (struct block_desc *) ring->rd[block_num].iov_base;
579
580 while ((BLOCK_STATUS(pbd) & TP_STATUS_USER) == 0)
581 poll(&pfd, 1, 1);
582
583 __v3_walk_block(pbd, block_num);
584 __v3_flush_block(pbd);
585
586 block_num = (block_num + 1) % ring->rd_num;
587 }
588
589 pair_udp_close(udp_sock);
590
591 if (total_packets != 2 * NUM_PACKETS) {
592 fprintf(stderr, "walk_v3_rx: received %u out of %u pkts\n",
593 total_packets, NUM_PACKETS);
594 exit(1);
595 }
596
597 fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
598}
599
600static void walk_v3(int sock, struct ring *ring)
601{
602 if (ring->type == PACKET_RX_RING)
603 walk_v3_rx(sock, ring);
604 else
605 bug_on(1);
606}
607
608static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
609{
610 ring->req.tp_block_size = getpagesize() << 2;
611 ring->req.tp_frame_size = TPACKET_ALIGNMENT << 7;
612 ring->req.tp_block_nr = blocks;
613
614 ring->req.tp_frame_nr = ring->req.tp_block_size /
615 ring->req.tp_frame_size *
616 ring->req.tp_block_nr;
617
618 ring->mm_len = ring->req.tp_block_size * ring->req.tp_block_nr;
619 ring->walk = walk_v1_v2;
620 ring->rd_num = ring->req.tp_frame_nr;
621 ring->flen = ring->req.tp_frame_size;
622}
623
624static void __v3_fill(struct ring *ring, unsigned int blocks)
625{
626 ring->req3.tp_retire_blk_tov = 64;
627 ring->req3.tp_sizeof_priv = 13;
628 ring->req3.tp_feature_req_word |= TP_FT_REQ_FILL_RXHASH;
629
630 ring->req3.tp_block_size = getpagesize() << 2;
631 ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7;
632 ring->req3.tp_block_nr = blocks;
633
634 ring->req3.tp_frame_nr = ring->req3.tp_block_size /
635 ring->req3.tp_frame_size *
636 ring->req3.tp_block_nr;
637
638 ring->mm_len = ring->req3.tp_block_size * ring->req3.tp_block_nr;
639 ring->walk = walk_v3;
640 ring->rd_num = ring->req3.tp_block_nr;
641 ring->flen = ring->req3.tp_block_size;
642}
643
644static void setup_ring(int sock, struct ring *ring, int version, int type)
645{
646 int ret = 0;
647 unsigned int blocks = 256;
648
649 ring->type = type;
650 ring->version = version;
651
652 switch (version) {
653 case TPACKET_V1:
654 case TPACKET_V2:
655 if (type == PACKET_TX_RING)
656 __v1_v2_set_packet_loss_discard(sock);
657 __v1_v2_fill(ring, blocks);
658 ret = setsockopt(sock, SOL_PACKET, type, &ring->req,
659 sizeof(ring->req));
660 break;
661
662 case TPACKET_V3:
663 __v3_fill(ring, blocks);
664 ret = setsockopt(sock, SOL_PACKET, type, &ring->req3,
665 sizeof(ring->req3));
666 break;
667 }
668
669 if (ret == -1) {
670 perror("setsockopt");
671 exit(1);
672 }
673
674 ring->rd_len = ring->rd_num * sizeof(*ring->rd);
675 ring->rd = malloc(ring->rd_len);
676 if (ring->rd == NULL) {
677 perror("malloc");
678 exit(1);
679 }
680
681 total_packets = 0;
682 total_bytes = 0;
683}
684
685static void mmap_ring(int sock, struct ring *ring)
686{
687 int i;
688
689 ring->mm_space = mmap(0, ring->mm_len, PROT_READ | PROT_WRITE,
690 MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0);
691 if (ring->mm_space == MAP_FAILED) {
692 perror("mmap");
693 exit(1);
694 }
695
696 memset(ring->rd, 0, ring->rd_len);
697 for (i = 0; i < ring->rd_num; ++i) {
698 ring->rd[i].iov_base = ring->mm_space + (i * ring->flen);
699 ring->rd[i].iov_len = ring->flen;
700 }
701}
702
703static void bind_ring(int sock, struct ring *ring)
704{
705 int ret;
706
707 ring->ll.sll_family = PF_PACKET;
708 ring->ll.sll_protocol = htons(ETH_P_ALL);
709 ring->ll.sll_ifindex = if_nametoindex("lo");
710 ring->ll.sll_hatype = 0;
711 ring->ll.sll_pkttype = 0;
712 ring->ll.sll_halen = 0;
713
714 ret = bind(sock, (struct sockaddr *) &ring->ll, sizeof(ring->ll));
715 if (ret == -1) {
716 perror("bind");
717 exit(1);
718 }
719}
720
721static void walk_ring(int sock, struct ring *ring)
722{
723 ring->walk(sock, ring);
724}
725
726static void unmap_ring(int sock, struct ring *ring)
727{
728 munmap(ring->mm_space, ring->mm_len);
729 free(ring->rd);
730}
731
732static int test_kernel_bit_width(void)
733{
734 char in[512], *ptr;
735 int num = 0, fd;
736 ssize_t ret;
737
738 fd = open("/proc/kallsyms", O_RDONLY);
739 if (fd == -1) {
740 perror("open");
741 exit(1);
742 }
743
744 ret = read(fd, in, sizeof(in));
745 if (ret <= 0) {
746 perror("read");
747 exit(1);
748 }
749
750 close(fd);
751
752 ptr = in;
753 while(!isspace(*ptr)) {
754 num++;
755 ptr++;
756 }
757
758 return num * 4;
759}
760
761static int test_user_bit_width(void)
762{
763 return __WORDSIZE;
764}
765
766static const char *tpacket_str[] = {
767 [TPACKET_V1] = "TPACKET_V1",
768 [TPACKET_V2] = "TPACKET_V2",
769 [TPACKET_V3] = "TPACKET_V3",
770};
771
772static const char *type_str[] = {
773 [PACKET_RX_RING] = "PACKET_RX_RING",
774 [PACKET_TX_RING] = "PACKET_TX_RING",
775};
776
777static int test_tpacket(int version, int type)
778{
779 int sock;
780 struct ring ring;
781
782 fprintf(stderr, "test: %s with %s ", tpacket_str[version],
783 type_str[type]);
784 fflush(stderr);
785
786 if (version == TPACKET_V1 &&
787 test_kernel_bit_width() != test_user_bit_width()) {
788 fprintf(stderr, "test: skip %s %s since user and kernel "
789 "space have different bit width\n",
790 tpacket_str[version], type_str[type]);
791 return 0;
792 }
793
794 sock = pfsocket(version);
795 memset(&ring, 0, sizeof(ring));
796 setup_ring(sock, &ring, version, type);
797 mmap_ring(sock, &ring);
798 bind_ring(sock, &ring);
799 walk_ring(sock, &ring);
800 unmap_ring(sock, &ring);
801 close(sock);
802
803 fprintf(stderr, "\n");
804 return 0;
805}
806
807int main(void)
808{
809 int ret = 0;
810
811 ret |= test_tpacket(TPACKET_V1, PACKET_RX_RING);
812 ret |= test_tpacket(TPACKET_V1, PACKET_TX_RING);
813
814 ret |= test_tpacket(TPACKET_V2, PACKET_RX_RING);
815 ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING);
816
817 ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING);
818
819 if (ret)
820 return 1;
821
822 printf("OK. All tests passed\n");
823 return 0;
824}
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
new file mode 100644
index 000000000000..5246e782d6e8
--- /dev/null
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -0,0 +1,26 @@
1#!/bin/sh
2
3if [ $(id -u) != 0 ]; then
4 echo $msg must be run as root >&2
5 exit 0
6fi
7
8echo "--------------------"
9echo "running psock_fanout test"
10echo "--------------------"
11./psock_fanout
12if [ $? -ne 0 ]; then
13 echo "[FAIL]"
14else
15 echo "[PASS]"
16fi
17
18echo "--------------------"
19echo "running psock_tpacket test"
20echo "--------------------"
21./psock_tpacket
22if [ $? -ne 0 ]; then
23 echo "[FAIL]"
24else
25 echo "[PASS]"
26fi
diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests
new file mode 100644
index 000000000000..c09a682df56a
--- /dev/null
+++ b/tools/testing/selftests/net/run_netsocktests
@@ -0,0 +1,12 @@
1#!/bin/bash
2
3echo "--------------------"
4echo "running socket test"
5echo "--------------------"
6./socket
7if [ $? -ne 0 ]; then
8 echo "[FAIL]"
9else
10 echo "[PASS]"
11fi
12
diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c
new file mode 100644
index 000000000000..0f227f2f9be9
--- /dev/null
+++ b/tools/testing/selftests/net/socket.c
@@ -0,0 +1,92 @@
1#include <stdio.h>
2#include <errno.h>
3#include <unistd.h>
4#include <string.h>
5#include <sys/types.h>
6#include <sys/socket.h>
7#include <netinet/in.h>
8
9struct socket_testcase {
10 int domain;
11 int type;
12 int protocol;
13
14 /* 0 = valid file descriptor
15 * -foo = error foo
16 */
17 int expect;
18
19 /* If non-zero, accept EAFNOSUPPORT to handle the case
20 * of the protocol not being configured into the kernel.
21 */
22 int nosupport_ok;
23};
24
25static struct socket_testcase tests[] = {
26 { AF_MAX, 0, 0, -EAFNOSUPPORT, 0 },
27 { AF_INET, SOCK_STREAM, IPPROTO_TCP, 0, 1 },
28 { AF_INET, SOCK_DGRAM, IPPROTO_TCP, -EPROTONOSUPPORT, 1 },
29 { AF_INET, SOCK_DGRAM, IPPROTO_UDP, 0, 1 },
30 { AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1 },
31};
32
33#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
34#define ERR_STRING_SZ 64
35
36static int run_tests(void)
37{
38 char err_string1[ERR_STRING_SZ];
39 char err_string2[ERR_STRING_SZ];
40 int i, err;
41
42 err = 0;
43 for (i = 0; i < ARRAY_SIZE(tests); i++) {
44 struct socket_testcase *s = &tests[i];
45 int fd;
46
47 fd = socket(s->domain, s->type, s->protocol);
48 if (fd < 0) {
49 if (s->nosupport_ok &&
50 errno == EAFNOSUPPORT)
51 continue;
52
53 if (s->expect < 0 &&
54 errno == -s->expect)
55 continue;
56
57 strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
58 strerror_r(errno, err_string2, ERR_STRING_SZ);
59
60 fprintf(stderr, "socket(%d, %d, %d) expected "
61 "err (%s) got (%s)\n",
62 s->domain, s->type, s->protocol,
63 err_string1, err_string2);
64
65 err = -1;
66 break;
67 } else {
68 close(fd);
69
70 if (s->expect < 0) {
71 strerror_r(errno, err_string1, ERR_STRING_SZ);
72
73 fprintf(stderr, "socket(%d, %d, %d) expected "
74 "success got err (%s)\n",
75 s->domain, s->type, s->protocol,
76 err_string1);
77
78 err = -1;
79 break;
80 }
81 }
82 }
83
84 return err;
85}
86
87int main(void)
88{
89 int err = run_tests();
90
91 return err;
92}
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile
new file mode 100644
index 000000000000..47ae2d385ce8
--- /dev/null
+++ b/tools/testing/selftests/ptrace/Makefile
@@ -0,0 +1,10 @@
1CFLAGS += -iquote../../../../include/uapi -Wall
2peeksiginfo: peeksiginfo.c
3
4all: peeksiginfo
5
6clean:
7 rm -f peeksiginfo
8
9run_tests: all
10 @./peeksiginfo || echo "peeksiginfo selftests: [FAIL]"
diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c
new file mode 100644
index 000000000000..d46558b1f58d
--- /dev/null
+++ b/tools/testing/selftests/ptrace/peeksiginfo.c
@@ -0,0 +1,214 @@
1#define _GNU_SOURCE
2#include <stdio.h>
3#include <signal.h>
4#include <unistd.h>
5#include <errno.h>
6#include <linux/types.h>
7#include <sys/wait.h>
8#include <sys/syscall.h>
9#include <sys/user.h>
10#include <sys/mman.h>
11
12#include "linux/ptrace.h"
13
14static int sys_rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *uinfo)
15{
16 return syscall(SYS_rt_sigqueueinfo, tgid, sig, uinfo);
17}
18
19static int sys_rt_tgsigqueueinfo(pid_t tgid, pid_t tid,
20 int sig, siginfo_t *uinfo)
21{
22 return syscall(SYS_rt_tgsigqueueinfo, tgid, tid, sig, uinfo);
23}
24
25static int sys_ptrace(int request, pid_t pid, void *addr, void *data)
26{
27 return syscall(SYS_ptrace, request, pid, addr, data);
28}
29
30#define SIGNR 10
31#define TEST_SICODE_PRIV -1
32#define TEST_SICODE_SHARE -2
33
34#define err(fmt, ...) \
35 fprintf(stderr, \
36 "Error (%s:%d): " fmt, \
37 __FILE__, __LINE__, ##__VA_ARGS__)
38
39static int check_error_paths(pid_t child)
40{
41 struct ptrace_peeksiginfo_args arg;
42 int ret, exit_code = -1;
43 void *addr_rw, *addr_ro;
44
45 /*
46 * Allocate two contiguous pages. The first one is for read-write,
47 * another is for read-only.
48 */
49 addr_rw = mmap(NULL, 2 * PAGE_SIZE, PROT_READ | PROT_WRITE,
50 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
51 if (addr_rw == MAP_FAILED) {
52 err("mmap() failed: %m\n");
53 return 1;
54 }
55
56 addr_ro = mmap(addr_rw + PAGE_SIZE, PAGE_SIZE, PROT_READ,
57 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
58 if (addr_ro == MAP_FAILED) {
59 err("mmap() failed: %m\n");
60 goto out;
61 }
62
63 arg.nr = SIGNR;
64 arg.off = 0;
65
66 /* Unsupported flags */
67 arg.flags = ~0;
68 ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_rw);
69 if (ret != -1 || errno != EINVAL) {
70 err("sys_ptrace() returns %d (expected -1),"
71 " errno %d (expected %d): %m\n",
72 ret, errno, EINVAL);
73 goto out;
74 }
75 arg.flags = 0;
76
77 /* A part of the buffer is read-only */
78 ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg,
79 addr_ro - sizeof(siginfo_t) * 2);
80 if (ret != 2) {
81 err("sys_ptrace() returns %d (expected 2): %m\n", ret);
82 goto out;
83 }
84
85 /* Read-only buffer */
86 ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_ro);
87 if (ret != -1 && errno != EFAULT) {
88 err("sys_ptrace() returns %d (expected -1),"
89 " errno %d (expected %d): %m\n",
90 ret, errno, EFAULT);
91 goto out;
92 }
93
94 exit_code = 0;
95out:
96 munmap(addr_rw, 2 * PAGE_SIZE);
97 return exit_code;
98}
99
100int check_direct_path(pid_t child, int shared, int nr)
101{
102 struct ptrace_peeksiginfo_args arg = {.flags = 0, .nr = nr, .off = 0};
103 int i, j, ret, exit_code = -1;
104 siginfo_t siginfo[SIGNR];
105 int si_code;
106
107 if (shared == 1) {
108 arg.flags = PTRACE_PEEKSIGINFO_SHARED;
109 si_code = TEST_SICODE_SHARE;
110 } else {
111 arg.flags = 0;
112 si_code = TEST_SICODE_PRIV;
113 }
114
115 for (i = 0; i < SIGNR; ) {
116 arg.off = i;
117 ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, siginfo);
118 if (ret == -1) {
119 err("ptrace() failed: %m\n");
120 goto out;
121 }
122
123 if (ret == 0)
124 break;
125
126 for (j = 0; j < ret; j++, i++) {
127 if (siginfo[j].si_code == si_code &&
128 siginfo[j].si_int == i)
129 continue;
130
131 err("%d: Wrong siginfo i=%d si_code=%d si_int=%d\n",
132 shared, i, siginfo[j].si_code, siginfo[j].si_int);
133 goto out;
134 }
135 }
136
137 if (i != SIGNR) {
138 err("Only %d signals were read\n", i);
139 goto out;
140 }
141
142 exit_code = 0;
143out:
144 return exit_code;
145}
146
147int main(int argc, char *argv[])
148{
149 siginfo_t siginfo[SIGNR];
150 int i, exit_code = 1;
151 sigset_t blockmask;
152 pid_t child;
153
154 sigemptyset(&blockmask);
155 sigaddset(&blockmask, SIGRTMIN);
156 sigprocmask(SIG_BLOCK, &blockmask, NULL);
157
158 child = fork();
159 if (child == -1) {
160 err("fork() failed: %m");
161 return 1;
162 } else if (child == 0) {
163 pid_t ppid = getppid();
164 while (1) {
165 if (ppid != getppid())
166 break;
167 sleep(1);
168 }
169 return 1;
170 }
171
172 /* Send signals in process-wide and per-thread queues */
173 for (i = 0; i < SIGNR; i++) {
174 siginfo->si_code = TEST_SICODE_SHARE;
175 siginfo->si_int = i;
176 sys_rt_sigqueueinfo(child, SIGRTMIN, siginfo);
177
178 siginfo->si_code = TEST_SICODE_PRIV;
179 siginfo->si_int = i;
180 sys_rt_tgsigqueueinfo(child, child, SIGRTMIN, siginfo);
181 }
182
183 if (sys_ptrace(PTRACE_ATTACH, child, NULL, NULL) == -1)
184 return 1;
185
186 waitpid(child, NULL, 0);
187
188 /* Dump signals one by one*/
189 if (check_direct_path(child, 0, 1))
190 goto out;
191 /* Dump all signals for one call */
192 if (check_direct_path(child, 0, SIGNR))
193 goto out;
194
195 /*
196 * Dump signal from the process-wide queue.
197 * The number of signals is not multible to the buffer size
198 */
199 if (check_direct_path(child, 1, 3))
200 goto out;
201
202 if (check_error_paths(child))
203 goto out;
204
205 printf("PASS\n");
206 exit_code = 0;
207out:
208 if (sys_ptrace(PTRACE_KILL, child, NULL, NULL) == -1)
209 return 1;
210
211 waitpid(child, NULL, 0);
212
213 return exit_code;
214}
diff --git a/tools/testing/selftests/soft-dirty/Makefile b/tools/testing/selftests/soft-dirty/Makefile
new file mode 100644
index 000000000000..a9cdc823d6e0
--- /dev/null
+++ b/tools/testing/selftests/soft-dirty/Makefile
@@ -0,0 +1,10 @@
1CFLAGS += -iquote../../../../include/uapi -Wall
2soft-dirty: soft-dirty.c
3
4all: soft-dirty
5
6clean:
7 rm -f soft-dirty
8
9run_tests: all
10 @./soft-dirty || echo "soft-dirty selftests: [FAIL]"
diff --git a/tools/testing/selftests/soft-dirty/soft-dirty.c b/tools/testing/selftests/soft-dirty/soft-dirty.c
new file mode 100644
index 000000000000..aba4f87f87f0
--- /dev/null
+++ b/tools/testing/selftests/soft-dirty/soft-dirty.c
@@ -0,0 +1,114 @@
1#include <stdlib.h>
2#include <stdio.h>
3#include <sys/mman.h>
4#include <unistd.h>
5#include <fcntl.h>
6#include <sys/types.h>
7
8typedef unsigned long long u64;
9
10#define PME_PRESENT (1ULL << 63)
11#define PME_SOFT_DIRTY (1Ull << 55)
12
13#define PAGES_TO_TEST 3
14#ifndef PAGE_SIZE
15#define PAGE_SIZE 4096
16#endif
17
18static void get_pagemap2(char *mem, u64 *map)
19{
20 int fd;
21
22 fd = open("/proc/self/pagemap2", O_RDONLY);
23 if (fd < 0) {
24 perror("Can't open pagemap2");
25 exit(1);
26 }
27
28 lseek(fd, (unsigned long)mem / PAGE_SIZE * sizeof(u64), SEEK_SET);
29 read(fd, map, sizeof(u64) * PAGES_TO_TEST);
30 close(fd);
31}
32
33static inline char map_p(u64 map)
34{
35 return map & PME_PRESENT ? 'p' : '-';
36}
37
38static inline char map_sd(u64 map)
39{
40 return map & PME_SOFT_DIRTY ? 'd' : '-';
41}
42
43static int check_pte(int step, int page, u64 *map, u64 want)
44{
45 if ((map[page] & want) != want) {
46 printf("Step %d Page %d has %c%c, want %c%c\n",
47 step, page,
48 map_p(map[page]), map_sd(map[page]),
49 map_p(want), map_sd(want));
50 return 1;
51 }
52
53 return 0;
54}
55
56static void clear_refs(void)
57{
58 int fd;
59 char *v = "4";
60
61 fd = open("/proc/self/clear_refs", O_WRONLY);
62 if (write(fd, v, 3) < 3) {
63 perror("Can't clear soft-dirty bit");
64 exit(1);
65 }
66 close(fd);
67}
68
69int main(void)
70{
71 char *mem, x;
72 u64 map[PAGES_TO_TEST];
73
74 mem = mmap(NULL, PAGES_TO_TEST * PAGE_SIZE,
75 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 0, 0);
76
77 x = mem[0];
78 mem[2 * PAGE_SIZE] = 'c';
79 get_pagemap2(mem, map);
80
81 if (check_pte(1, 0, map, PME_PRESENT))
82 return 1;
83 if (check_pte(1, 1, map, 0))
84 return 1;
85 if (check_pte(1, 2, map, PME_PRESENT | PME_SOFT_DIRTY))
86 return 1;
87
88 clear_refs();
89 get_pagemap2(mem, map);
90
91 if (check_pte(2, 0, map, PME_PRESENT))
92 return 1;
93 if (check_pte(2, 1, map, 0))
94 return 1;
95 if (check_pte(2, 2, map, PME_PRESENT))
96 return 1;
97
98 mem[0] = 'a';
99 mem[PAGE_SIZE] = 'b';
100 x = mem[2 * PAGE_SIZE];
101 get_pagemap2(mem, map);
102
103 if (check_pte(3, 0, map, PME_PRESENT | PME_SOFT_DIRTY))
104 return 1;
105 if (check_pte(3, 1, map, PME_PRESENT | PME_SOFT_DIRTY))
106 return 1;
107 if (check_pte(3, 2, map, PME_PRESENT))
108 return 1;
109
110 (void)x; /* gcc warn */
111
112 printf("PASS\n");
113 return 0;
114}
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
index d1d442ed106a..3187c62d9814 100644
--- a/tools/virtio/Makefile
+++ b/tools/virtio/Makefile
@@ -1,12 +1,14 @@
1all: test mod 1all: test mod
2test: virtio_test 2test: virtio_test vringh_test
3virtio_test: virtio_ring.o virtio_test.o 3virtio_test: virtio_ring.o virtio_test.o
4CFLAGS += -g -O2 -Wall -I. -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -MMD 4vringh_test: vringh_test.o vringh.o virtio_ring.o
5vpath %.c ../../drivers/virtio 5
6CFLAGS += -g -O2 -Wall -I. -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE
7vpath %.c ../../drivers/virtio ../../drivers/vhost
6mod: 8mod:
7 ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test 9 ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test
8.PHONY: all test mod clean 10.PHONY: all test mod clean
9clean: 11clean:
10 ${RM} *.o vhost_test/*.o vhost_test/.*.cmd \ 12 ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \
11 vhost_test/Module.symvers vhost_test/modules.order *.d 13 vhost_test/Module.symvers vhost_test/modules.order *.d
12-include *.d 14-include *.d
diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
new file mode 100644
index 000000000000..aff61e13306c
--- /dev/null
+++ b/tools/virtio/asm/barrier.h
@@ -0,0 +1,14 @@
1#if defined(__i386__) || defined(__x86_64__)
2#define barrier() asm volatile("" ::: "memory")
3#define mb() __sync_synchronize()
4
5#define smp_mb() mb()
6# define smp_rmb() barrier()
7# define smp_wmb() barrier()
8/* Weak barriers should be used. If not - it's a bug */
9# define rmb() abort()
10# define wmb() abort()
11#else
12#error Please fill in barrier macros
13#endif
14
diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h
new file mode 100644
index 000000000000..fb94f0787c47
--- /dev/null
+++ b/tools/virtio/linux/bug.h
@@ -0,0 +1,10 @@
1#ifndef BUG_H
2#define BUG_H
3
4#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
5
6#define BUILD_BUG_ON(x)
7
8#define BUG() abort()
9
10#endif /* BUG_H */
diff --git a/tools/virtio/linux/err.h b/tools/virtio/linux/err.h
new file mode 100644
index 000000000000..e32eff8b2a14
--- /dev/null
+++ b/tools/virtio/linux/err.h
@@ -0,0 +1,26 @@
1#ifndef ERR_H
2#define ERR_H
3#define MAX_ERRNO 4095
4
5#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
6
7static inline void * __must_check ERR_PTR(long error)
8{
9 return (void *) error;
10}
11
12static inline long __must_check PTR_ERR(const void *ptr)
13{
14 return (long) ptr;
15}
16
17static inline long __must_check IS_ERR(const void *ptr)
18{
19 return IS_ERR_VALUE((unsigned long)ptr);
20}
21
22static inline long __must_check IS_ERR_OR_NULL(const void *ptr)
23{
24 return !ptr || IS_ERR_VALUE((unsigned long)ptr);
25}
26#endif /* ERR_H */
diff --git a/tools/virtio/linux/export.h b/tools/virtio/linux/export.h
new file mode 100644
index 000000000000..7311d326894a
--- /dev/null
+++ b/tools/virtio/linux/export.h
@@ -0,0 +1,5 @@
1#define EXPORT_SYMBOL(sym)
2#define EXPORT_SYMBOL_GPL(sym)
3#define EXPORT_SYMBOL_GPL_FUTURE(sym)
4#define EXPORT_UNUSED_SYMBOL(sym)
5#define EXPORT_UNUSED_SYMBOL_GPL(sym)
diff --git a/tools/virtio/linux/irqreturn.h b/tools/virtio/linux/irqreturn.h
new file mode 100644
index 000000000000..a3c4e7be7089
--- /dev/null
+++ b/tools/virtio/linux/irqreturn.h
@@ -0,0 +1 @@
#include "../../../include/linux/irqreturn.h"
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
new file mode 100644
index 000000000000..fba705963968
--- /dev/null
+++ b/tools/virtio/linux/kernel.h
@@ -0,0 +1,112 @@
1#ifndef KERNEL_H
2#define KERNEL_H
3#include <stdbool.h>
4#include <stdlib.h>
5#include <stddef.h>
6#include <stdio.h>
7#include <string.h>
8#include <assert.h>
9#include <stdarg.h>
10
11#include <linux/types.h>
12#include <linux/printk.h>
13#include <linux/bug.h>
14#include <errno.h>
15#include <unistd.h>
16#include <asm/barrier.h>
17
18#define CONFIG_SMP
19
20#define PAGE_SIZE getpagesize()
21#define PAGE_MASK (~(PAGE_SIZE-1))
22
23typedef unsigned long long dma_addr_t;
24typedef size_t __kernel_size_t;
25
26struct page {
27 unsigned long long dummy;
28};
29
30/* Physical == Virtual */
31#define virt_to_phys(p) ((unsigned long)p)
32#define phys_to_virt(a) ((void *)(unsigned long)(a))
33/* Page address: Virtual / 4K */
34#define page_to_phys(p) ((dma_addr_t)(unsigned long)(p))
35#define virt_to_page(p) ((struct page *)((unsigned long)p & PAGE_MASK))
36
37#define offset_in_page(p) (((unsigned long)p) % PAGE_SIZE)
38
39#define __printf(a,b) __attribute__((format(printf,a,b)))
40
41typedef enum {
42 GFP_KERNEL,
43 GFP_ATOMIC,
44 __GFP_HIGHMEM,
45 __GFP_HIGH
46} gfp_t;
47
48#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
49
50extern void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
51static inline void *kmalloc(size_t s, gfp_t gfp)
52{
53 if (__kmalloc_fake)
54 return __kmalloc_fake;
55 return malloc(s);
56}
57
58static inline void kfree(void *p)
59{
60 if (p >= __kfree_ignore_start && p < __kfree_ignore_end)
61 return;
62 free(p);
63}
64
65static inline void *krealloc(void *p, size_t s, gfp_t gfp)
66{
67 return realloc(p, s);
68}
69
70
71static inline unsigned long __get_free_page(gfp_t gfp)
72{
73 void *p;
74
75 posix_memalign(&p, PAGE_SIZE, PAGE_SIZE);
76 return (unsigned long)p;
77}
78
79static inline void free_page(unsigned long addr)
80{
81 free((void *)addr);
82}
83
84#define container_of(ptr, type, member) ({ \
85 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
86 (type *)( (char *)__mptr - offsetof(type,member) );})
87
88#define uninitialized_var(x) x = x
89
90# ifndef likely
91# define likely(x) (__builtin_expect(!!(x), 1))
92# endif
93# ifndef unlikely
94# define unlikely(x) (__builtin_expect(!!(x), 0))
95# endif
96
97#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
98#ifdef DEBUG
99#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
100#else
101#define pr_debug(format, ...) do {} while (0)
102#endif
103#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
104#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
105
106#define min(x, y) ({ \
107 typeof(x) _min1 = (x); \
108 typeof(y) _min2 = (y); \
109 (void) (&_min1 == &_min2); \
110 _min1 < _min2 ? _min1 : _min2; })
111
112#endif /* KERNEL_H */
diff --git a/tools/virtio/linux/module.h b/tools/virtio/linux/module.h
index e69de29bb2d1..3039a7e972b6 100644
--- a/tools/virtio/linux/module.h
+++ b/tools/virtio/linux/module.h
@@ -0,0 +1 @@
#include <linux/export.h>
diff --git a/tools/virtio/linux/printk.h b/tools/virtio/linux/printk.h
new file mode 100644
index 000000000000..9f2423bd89c2
--- /dev/null
+++ b/tools/virtio/linux/printk.h
@@ -0,0 +1,4 @@
1#include "../../../include/linux/kern_levels.h"
2
3#define printk printf
4#define vprintk vprintf
diff --git a/tools/virtio/linux/ratelimit.h b/tools/virtio/linux/ratelimit.h
new file mode 100644
index 000000000000..dcce1725f90d
--- /dev/null
+++ b/tools/virtio/linux/ratelimit.h
@@ -0,0 +1,4 @@
1#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) int name = 0
2
3#define __ratelimit(x) (*(x))
4
diff --git a/tools/virtio/linux/scatterlist.h b/tools/virtio/linux/scatterlist.h
new file mode 100644
index 000000000000..68c9e2adc996
--- /dev/null
+++ b/tools/virtio/linux/scatterlist.h
@@ -0,0 +1,189 @@
1#ifndef SCATTERLIST_H
2#define SCATTERLIST_H
3#include <linux/kernel.h>
4
5struct scatterlist {
6 unsigned long page_link;
7 unsigned int offset;
8 unsigned int length;
9 dma_addr_t dma_address;
10};
11
12/* Scatterlist helpers, stolen from linux/scatterlist.h */
13#define sg_is_chain(sg) ((sg)->page_link & 0x01)
14#define sg_is_last(sg) ((sg)->page_link & 0x02)
15#define sg_chain_ptr(sg) \
16 ((struct scatterlist *) ((sg)->page_link & ~0x03))
17
18/**
19 * sg_assign_page - Assign a given page to an SG entry
20 * @sg: SG entry
21 * @page: The page
22 *
23 * Description:
24 * Assign page to sg entry. Also see sg_set_page(), the most commonly used
25 * variant.
26 *
27 **/
28static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
29{
30 unsigned long page_link = sg->page_link & 0x3;
31
32 /*
33 * In order for the low bit stealing approach to work, pages
34 * must be aligned at a 32-bit boundary as a minimum.
35 */
36 BUG_ON((unsigned long) page & 0x03);
37#ifdef CONFIG_DEBUG_SG
38 BUG_ON(sg->sg_magic != SG_MAGIC);
39 BUG_ON(sg_is_chain(sg));
40#endif
41 sg->page_link = page_link | (unsigned long) page;
42}
43
44/**
45 * sg_set_page - Set sg entry to point at given page
46 * @sg: SG entry
47 * @page: The page
48 * @len: Length of data
49 * @offset: Offset into page
50 *
51 * Description:
52 * Use this function to set an sg entry pointing at a page, never assign
53 * the page directly. We encode sg table information in the lower bits
54 * of the page pointer. See sg_page() for looking up the page belonging
55 * to an sg entry.
56 *
57 **/
58static inline void sg_set_page(struct scatterlist *sg, struct page *page,
59 unsigned int len, unsigned int offset)
60{
61 sg_assign_page(sg, page);
62 sg->offset = offset;
63 sg->length = len;
64}
65
66static inline struct page *sg_page(struct scatterlist *sg)
67{
68#ifdef CONFIG_DEBUG_SG
69 BUG_ON(sg->sg_magic != SG_MAGIC);
70 BUG_ON(sg_is_chain(sg));
71#endif
72 return (struct page *)((sg)->page_link & ~0x3);
73}
74
75/*
76 * Loop over each sg element, following the pointer to a new list if necessary
77 */
78#define for_each_sg(sglist, sg, nr, __i) \
79 for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))
80
81/**
82 * sg_chain - Chain two sglists together
83 * @prv: First scatterlist
84 * @prv_nents: Number of entries in prv
85 * @sgl: Second scatterlist
86 *
87 * Description:
88 * Links @prv@ and @sgl@ together, to form a longer scatterlist.
89 *
90 **/
91static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
92 struct scatterlist *sgl)
93{
94 /*
95 * offset and length are unused for chain entry. Clear them.
96 */
97 prv[prv_nents - 1].offset = 0;
98 prv[prv_nents - 1].length = 0;
99
100 /*
101 * Set lowest bit to indicate a link pointer, and make sure to clear
102 * the termination bit if it happens to be set.
103 */
104 prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02;
105}
106
107/**
108 * sg_mark_end - Mark the end of the scatterlist
109 * @sg: SG entryScatterlist
110 *
111 * Description:
112 * Marks the passed in sg entry as the termination point for the sg
113 * table. A call to sg_next() on this entry will return NULL.
114 *
115 **/
116static inline void sg_mark_end(struct scatterlist *sg)
117{
118#ifdef CONFIG_DEBUG_SG
119 BUG_ON(sg->sg_magic != SG_MAGIC);
120#endif
121 /*
122 * Set termination bit, clear potential chain bit
123 */
124 sg->page_link |= 0x02;
125 sg->page_link &= ~0x01;
126}
127
128/**
129 * sg_unmark_end - Undo setting the end of the scatterlist
130 * @sg: SG entryScatterlist
131 *
132 * Description:
133 * Removes the termination marker from the given entry of the scatterlist.
134 *
135 **/
136static inline void sg_unmark_end(struct scatterlist *sg)
137{
138#ifdef CONFIG_DEBUG_SG
139 BUG_ON(sg->sg_magic != SG_MAGIC);
140#endif
141 sg->page_link &= ~0x02;
142}
143
144static inline struct scatterlist *sg_next(struct scatterlist *sg)
145{
146#ifdef CONFIG_DEBUG_SG
147 BUG_ON(sg->sg_magic != SG_MAGIC);
148#endif
149 if (sg_is_last(sg))
150 return NULL;
151
152 sg++;
153 if (unlikely(sg_is_chain(sg)))
154 sg = sg_chain_ptr(sg);
155
156 return sg;
157}
158
159static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
160{
161 memset(sgl, 0, sizeof(*sgl) * nents);
162#ifdef CONFIG_DEBUG_SG
163 {
164 unsigned int i;
165 for (i = 0; i < nents; i++)
166 sgl[i].sg_magic = SG_MAGIC;
167 }
168#endif
169 sg_mark_end(&sgl[nents - 1]);
170}
171
172static inline dma_addr_t sg_phys(struct scatterlist *sg)
173{
174 return page_to_phys(sg_page(sg)) + sg->offset;
175}
176
177static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
178 unsigned int buflen)
179{
180 sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
181}
182
183static inline void sg_init_one(struct scatterlist *sg,
184 const void *buf, unsigned int buflen)
185{
186 sg_init_table(sg, 1);
187 sg_set_buf(sg, buf, buflen);
188}
189#endif /* SCATTERLIST_H */
diff --git a/tools/virtio/linux/types.h b/tools/virtio/linux/types.h
new file mode 100644
index 000000000000..f8ebb9a2b3d6
--- /dev/null
+++ b/tools/virtio/linux/types.h
@@ -0,0 +1,28 @@
1#ifndef TYPES_H
2#define TYPES_H
3#include <stdint.h>
4
5#define __force
6#define __user
7#define __must_check
8#define __cold
9
10typedef uint64_t u64;
11typedef int64_t s64;
12typedef uint32_t u32;
13typedef int32_t s32;
14typedef uint16_t u16;
15typedef int16_t s16;
16typedef uint8_t u8;
17typedef int8_t s8;
18
19typedef uint64_t __u64;
20typedef int64_t __s64;
21typedef uint32_t __u32;
22typedef int32_t __s32;
23typedef uint16_t __u16;
24typedef int16_t __s16;
25typedef uint8_t __u8;
26typedef int8_t __s8;
27
28#endif /* TYPES_H */
diff --git a/tools/virtio/linux/uaccess.h b/tools/virtio/linux/uaccess.h
new file mode 100644
index 000000000000..0a578fe18653
--- /dev/null
+++ b/tools/virtio/linux/uaccess.h
@@ -0,0 +1,50 @@
1#ifndef UACCESS_H
2#define UACCESS_H
3extern void *__user_addr_min, *__user_addr_max;
4
5#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
6
7static inline void __chk_user_ptr(const volatile void *p, size_t size)
8{
9 assert(p >= __user_addr_min && p + size <= __user_addr_max);
10}
11
12#define put_user(x, ptr) \
13({ \
14 typeof(ptr) __pu_ptr = (ptr); \
15 __chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \
16 ACCESS_ONCE(*(__pu_ptr)) = x; \
17 0; \
18})
19
20#define get_user(x, ptr) \
21({ \
22 typeof(ptr) __pu_ptr = (ptr); \
23 __chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \
24 x = ACCESS_ONCE(*(__pu_ptr)); \
25 0; \
26})
27
28static void volatile_memcpy(volatile char *to, const volatile char *from,
29 unsigned long n)
30{
31 while (n--)
32 *(to++) = *(from++);
33}
34
35static inline int copy_from_user(void *to, const void __user volatile *from,
36 unsigned long n)
37{
38 __chk_user_ptr(from, n);
39 volatile_memcpy(to, from, n);
40 return 0;
41}
42
43static inline int copy_to_user(void __user volatile *to, const void *from,
44 unsigned long n)
45{
46 __chk_user_ptr(to, n);
47 volatile_memcpy(to, from, n);
48 return 0;
49}
50#endif /* UACCESS_H */
diff --git a/tools/virtio/linux/uio.h b/tools/virtio/linux/uio.h
new file mode 100644
index 000000000000..cd20f0ba3081
--- /dev/null
+++ b/tools/virtio/linux/uio.h
@@ -0,0 +1,3 @@
1#include <linux/kernel.h>
2
3#include "../../../include/linux/uio.h"
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 81847dd08bd0..cd801838156f 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -1,127 +1,7 @@
1#ifndef LINUX_VIRTIO_H 1#ifndef LINUX_VIRTIO_H
2#define LINUX_VIRTIO_H 2#define LINUX_VIRTIO_H
3 3#include <linux/scatterlist.h>
4#include <stdbool.h> 4#include <linux/kernel.h>
5#include <stdlib.h>
6#include <stddef.h>
7#include <stdio.h>
8#include <string.h>
9#include <assert.h>
10
11#include <linux/types.h>
12#include <errno.h>
13
14typedef unsigned long long dma_addr_t;
15
16struct scatterlist {
17 unsigned long page_link;
18 unsigned int offset;
19 unsigned int length;
20 dma_addr_t dma_address;
21};
22
23struct page {
24 unsigned long long dummy;
25};
26
27#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
28
29/* Physical == Virtual */
30#define virt_to_phys(p) ((unsigned long)p)
31#define phys_to_virt(a) ((void *)(unsigned long)(a))
32/* Page address: Virtual / 4K */
33#define virt_to_page(p) ((struct page*)((virt_to_phys(p) / 4096) * \
34 sizeof(struct page)))
35#define offset_in_page(p) (((unsigned long)p) % 4096)
36#define sg_phys(sg) ((sg->page_link & ~0x3) / sizeof(struct page) * 4096 + \
37 sg->offset)
38static inline void sg_mark_end(struct scatterlist *sg)
39{
40 /*
41 * Set termination bit, clear potential chain bit
42 */
43 sg->page_link |= 0x02;
44 sg->page_link &= ~0x01;
45}
46static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
47{
48 memset(sgl, 0, sizeof(*sgl) * nents);
49 sg_mark_end(&sgl[nents - 1]);
50}
51static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
52{
53 unsigned long page_link = sg->page_link & 0x3;
54
55 /*
56 * In order for the low bit stealing approach to work, pages
57 * must be aligned at a 32-bit boundary as a minimum.
58 */
59 BUG_ON((unsigned long) page & 0x03);
60 sg->page_link = page_link | (unsigned long) page;
61}
62
63static inline void sg_set_page(struct scatterlist *sg, struct page *page,
64 unsigned int len, unsigned int offset)
65{
66 sg_assign_page(sg, page);
67 sg->offset = offset;
68 sg->length = len;
69}
70
71static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
72 unsigned int buflen)
73{
74 sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
75}
76
77static inline void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen)
78{
79 sg_init_table(sg, 1);
80 sg_set_buf(sg, buf, buflen);
81}
82
83typedef __u16 u16;
84
85typedef enum {
86 GFP_KERNEL,
87 GFP_ATOMIC,
88} gfp_t;
89typedef enum {
90 IRQ_NONE,
91 IRQ_HANDLED
92} irqreturn_t;
93
94static inline void *kmalloc(size_t s, gfp_t gfp)
95{
96 return malloc(s);
97}
98
99static inline void kfree(void *p)
100{
101 free(p);
102}
103
104#define container_of(ptr, type, member) ({ \
105 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
106 (type *)( (char *)__mptr - offsetof(type,member) );})
107
108#define uninitialized_var(x) x = x
109
110# ifndef likely
111# define likely(x) (__builtin_expect(!!(x), 1))
112# endif
113# ifndef unlikely
114# define unlikely(x) (__builtin_expect(!!(x), 0))
115# endif
116
117#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
118#ifdef DEBUG
119#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
120#else
121#define pr_debug(format, ...) do {} while (0)
122#endif
123#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
124#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
125 5
126/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */ 6/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
127#define list_add_tail(a, b) do {} while (0) 7#define list_add_tail(a, b) do {} while (0)
@@ -131,6 +11,7 @@ static inline void kfree(void *p)
131#define BITS_PER_BYTE 8 11#define BITS_PER_BYTE 8
132#define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE) 12#define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE)
133#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) 13#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
14
134/* TODO: Not atomic as it should be: 15/* TODO: Not atomic as it should be:
135 * we don't use this for anything important. */ 16 * we don't use this for anything important. */
136static inline void clear_bit(int nr, volatile unsigned long *addr) 17static inline void clear_bit(int nr, volatile unsigned long *addr)
@@ -145,10 +26,6 @@ static inline int test_bit(int nr, const volatile unsigned long *addr)
145{ 26{
146 return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); 27 return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
147} 28}
148
149/* The only feature we care to support */
150#define virtio_has_feature(dev, feature) \
151 test_bit((feature), (dev)->features)
152/* end of stubs */ 29/* end of stubs */
153 30
154struct virtio_device { 31struct virtio_device {
@@ -163,39 +40,32 @@ struct virtqueue {
163 void (*callback)(struct virtqueue *vq); 40 void (*callback)(struct virtqueue *vq);
164 const char *name; 41 const char *name;
165 struct virtio_device *vdev; 42 struct virtio_device *vdev;
43 unsigned int index;
44 unsigned int num_free;
166 void *priv; 45 void *priv;
167}; 46};
168 47
169#define EXPORT_SYMBOL_GPL(__EXPORT_SYMBOL_GPL_name) \
170 void __EXPORT_SYMBOL_GPL##__EXPORT_SYMBOL_GPL_name() { \
171}
172#define MODULE_LICENSE(__MODULE_LICENSE_value) \ 48#define MODULE_LICENSE(__MODULE_LICENSE_value) \
173 const char *__MODULE_LICENSE_name = __MODULE_LICENSE_value 49 const char *__MODULE_LICENSE_name = __MODULE_LICENSE_value
174 50
175#define CONFIG_SMP
176
177#if defined(__i386__) || defined(__x86_64__)
178#define barrier() asm volatile("" ::: "memory")
179#define mb() __sync_synchronize()
180
181#define smp_mb() mb()
182# define smp_rmb() barrier()
183# define smp_wmb() barrier()
184/* Weak barriers should be used. If not - it's a bug */
185# define rmb() abort()
186# define wmb() abort()
187#else
188#error Please fill in barrier macros
189#endif
190
191/* Interfaces exported by virtio_ring. */ 51/* Interfaces exported by virtio_ring. */
192int virtqueue_add_buf(struct virtqueue *vq, 52int virtqueue_add_sgs(struct virtqueue *vq,
193 struct scatterlist sg[], 53 struct scatterlist *sgs[],
194 unsigned int out_num, 54 unsigned int out_sgs,
195 unsigned int in_num, 55 unsigned int in_sgs,
196 void *data, 56 void *data,
197 gfp_t gfp); 57 gfp_t gfp);
198 58
59int virtqueue_add_outbuf(struct virtqueue *vq,
60 struct scatterlist sg[], unsigned int num,
61 void *data,
62 gfp_t gfp);
63
64int virtqueue_add_inbuf(struct virtqueue *vq,
65 struct scatterlist sg[], unsigned int num,
66 void *data,
67 gfp_t gfp);
68
199void virtqueue_kick(struct virtqueue *vq); 69void virtqueue_kick(struct virtqueue *vq);
200 70
201void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); 71void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
@@ -206,7 +76,8 @@ bool virtqueue_enable_cb(struct virtqueue *vq);
206bool virtqueue_enable_cb_delayed(struct virtqueue *vq); 76bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
207 77
208void *virtqueue_detach_unused_buf(struct virtqueue *vq); 78void *virtqueue_detach_unused_buf(struct virtqueue *vq);
209struct virtqueue *vring_new_virtqueue(unsigned int num, 79struct virtqueue *vring_new_virtqueue(unsigned int index,
80 unsigned int num,
210 unsigned int vring_align, 81 unsigned int vring_align,
211 struct virtio_device *vdev, 82 struct virtio_device *vdev,
212 bool weak_barriers, 83 bool weak_barriers,
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
new file mode 100644
index 000000000000..5049967f99f7
--- /dev/null
+++ b/tools/virtio/linux/virtio_config.h
@@ -0,0 +1,6 @@
1#define VIRTIO_TRANSPORT_F_START 28
2#define VIRTIO_TRANSPORT_F_END 32
3
4#define virtio_has_feature(dev, feature) \
5 test_bit((feature), (dev)->features)
6
diff --git a/tools/virtio/linux/virtio_ring.h b/tools/virtio/linux/virtio_ring.h
new file mode 100644
index 000000000000..8949c4e2772c
--- /dev/null
+++ b/tools/virtio/linux/virtio_ring.h
@@ -0,0 +1 @@
#include "../../../include/linux/virtio_ring.h"
diff --git a/tools/virtio/linux/vringh.h b/tools/virtio/linux/vringh.h
new file mode 100644
index 000000000000..9348957be56e
--- /dev/null
+++ b/tools/virtio/linux/vringh.h
@@ -0,0 +1 @@
#include "../../../include/linux/vringh.h"
diff --git a/tools/virtio/uapi/linux/uio.h b/tools/virtio/uapi/linux/uio.h
new file mode 100644
index 000000000000..7230e9002207
--- /dev/null
+++ b/tools/virtio/uapi/linux/uio.h
@@ -0,0 +1 @@
#include <sys/uio.h>
diff --git a/tools/virtio/uapi/linux/virtio_config.h b/tools/virtio/uapi/linux/virtio_config.h
new file mode 100644
index 000000000000..4c86675f0159
--- /dev/null
+++ b/tools/virtio/uapi/linux/virtio_config.h
@@ -0,0 +1 @@
#include "../../../../include/uapi/linux/virtio_config.h"
diff --git a/tools/virtio/uapi/linux/virtio_ring.h b/tools/virtio/uapi/linux/virtio_ring.h
new file mode 100644
index 000000000000..4d99c78234d3
--- /dev/null
+++ b/tools/virtio/uapi/linux/virtio_ring.h
@@ -0,0 +1,4 @@
1#ifndef VIRTIO_RING_H
2#define VIRTIO_RING_H
3#include "../../../../include/uapi/linux/virtio_ring.h"
4#endif /* VIRTIO_RING_H */
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index fcc9aa25fd08..da7a19558281 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -10,11 +10,15 @@
10#include <sys/stat.h> 10#include <sys/stat.h>
11#include <sys/types.h> 11#include <sys/types.h>
12#include <fcntl.h> 12#include <fcntl.h>
13#include <stdbool.h>
13#include <linux/vhost.h> 14#include <linux/vhost.h>
14#include <linux/virtio.h> 15#include <linux/virtio.h>
15#include <linux/virtio_ring.h> 16#include <linux/virtio_ring.h>
16#include "../../drivers/vhost/test.h" 17#include "../../drivers/vhost/test.h"
17 18
19/* Unused */
20void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
21
18struct vq_info { 22struct vq_info {
19 int kick; 23 int kick;
20 int call; 24 int call;
@@ -92,7 +96,8 @@ static void vq_info_add(struct vdev_info *dev, int num)
92 assert(r >= 0); 96 assert(r >= 0);
93 memset(info->ring, 0, vring_size(num, 4096)); 97 memset(info->ring, 0, vring_size(num, 4096));
94 vring_init(&info->vring, num, info->ring, 4096); 98 vring_init(&info->vring, num, info->ring, 4096);
95 info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, 99 info->vq = vring_new_virtqueue(info->idx,
100 info->vring.num, 4096, &dev->vdev,
96 true, info->ring, 101 true, info->ring,
97 vq_notify, vq_callback, "test"); 102 vq_notify, vq_callback, "test");
98 assert(info->vq); 103 assert(info->vq);
@@ -161,9 +166,9 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq,
161 do { 166 do {
162 if (started < bufs) { 167 if (started < bufs) {
163 sg_init_one(&sl, dev->buf, dev->buf_size); 168 sg_init_one(&sl, dev->buf, dev->buf_size);
164 r = virtqueue_add_buf(vq->vq, &sl, 1, 0, 169 r = virtqueue_add_outbuf(vq->vq, &sl, 1,
165 dev->buf + started, 170 dev->buf + started,
166 GFP_ATOMIC); 171 GFP_ATOMIC);
167 if (likely(r == 0)) { 172 if (likely(r == 0)) {
168 ++started; 173 ++started;
169 virtqueue_kick(vq->vq); 174 virtqueue_kick(vq->vq);
diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c
new file mode 100644
index 000000000000..d053ea40c001
--- /dev/null
+++ b/tools/virtio/vringh_test.c
@@ -0,0 +1,741 @@
1/* Simple test of virtio code, entirely in userpsace. */
2#define _GNU_SOURCE
3#include <sched.h>
4#include <err.h>
5#include <linux/kernel.h>
6#include <linux/err.h>
7#include <linux/virtio.h>
8#include <linux/vringh.h>
9#include <linux/virtio_ring.h>
10#include <linux/uaccess.h>
11#include <sys/types.h>
12#include <sys/stat.h>
13#include <sys/mman.h>
14#include <sys/wait.h>
15#include <fcntl.h>
16
17#define USER_MEM (1024*1024)
18void *__user_addr_min, *__user_addr_max;
19void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
20static u64 user_addr_offset;
21
22#define RINGSIZE 256
23#define ALIGN 4096
24
25static void never_notify_host(struct virtqueue *vq)
26{
27 abort();
28}
29
30static void never_callback_guest(struct virtqueue *vq)
31{
32 abort();
33}
34
35static bool getrange_iov(struct vringh *vrh, u64 addr, struct vringh_range *r)
36{
37 if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset)
38 return false;
39 if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset)
40 return false;
41
42 r->start = (u64)(unsigned long)__user_addr_min - user_addr_offset;
43 r->end_incl = (u64)(unsigned long)__user_addr_max - 1 - user_addr_offset;
44 r->offset = user_addr_offset;
45 return true;
46}
47
48/* We return single byte ranges. */
49static bool getrange_slow(struct vringh *vrh, u64 addr, struct vringh_range *r)
50{
51 if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset)
52 return false;
53 if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset)
54 return false;
55
56 r->start = addr;
57 r->end_incl = r->start;
58 r->offset = user_addr_offset;
59 return true;
60}
61
62struct guest_virtio_device {
63 struct virtio_device vdev;
64 int to_host_fd;
65 unsigned long notifies;
66};
67
68static void parallel_notify_host(struct virtqueue *vq)
69{
70 struct guest_virtio_device *gvdev;
71
72 gvdev = container_of(vq->vdev, struct guest_virtio_device, vdev);
73 write(gvdev->to_host_fd, "", 1);
74 gvdev->notifies++;
75}
76
77static void no_notify_host(struct virtqueue *vq)
78{
79}
80
81#define NUM_XFERS (10000000)
82
83/* We aim for two "distant" cpus. */
84static void find_cpus(unsigned int *first, unsigned int *last)
85{
86 unsigned int i;
87
88 *first = -1U;
89 *last = 0;
90 for (i = 0; i < 4096; i++) {
91 cpu_set_t set;
92 CPU_ZERO(&set);
93 CPU_SET(i, &set);
94 if (sched_setaffinity(getpid(), sizeof(set), &set) == 0) {
95 if (i < *first)
96 *first = i;
97 if (i > *last)
98 *last = i;
99 }
100 }
101}
102
103/* Opencoded version for fast mode */
104static inline int vringh_get_head(struct vringh *vrh, u16 *head)
105{
106 u16 avail_idx, i;
107 int err;
108
109 err = get_user(avail_idx, &vrh->vring.avail->idx);
110 if (err)
111 return err;
112
113 if (vrh->last_avail_idx == avail_idx)
114 return 0;
115
116 /* Only get avail ring entries after they have been exposed by guest. */
117 virtio_rmb(vrh->weak_barriers);
118
119 i = vrh->last_avail_idx & (vrh->vring.num - 1);
120
121 err = get_user(*head, &vrh->vring.avail->ring[i]);
122 if (err)
123 return err;
124
125 vrh->last_avail_idx++;
126 return 1;
127}
128
129static int parallel_test(unsigned long features,
130 bool (*getrange)(struct vringh *vrh,
131 u64 addr, struct vringh_range *r),
132 bool fast_vringh)
133{
134 void *host_map, *guest_map;
135 int fd, mapsize, to_guest[2], to_host[2];
136 unsigned long xfers = 0, notifies = 0, receives = 0;
137 unsigned int first_cpu, last_cpu;
138 cpu_set_t cpu_set;
139 char buf[128];
140
141 /* Create real file to mmap. */
142 fd = open("/tmp/vringh_test-file", O_RDWR|O_CREAT|O_TRUNC, 0600);
143 if (fd < 0)
144 err(1, "Opening /tmp/vringh_test-file");
145
146 /* Extra room at the end for some data, and indirects */
147 mapsize = vring_size(RINGSIZE, ALIGN)
148 + RINGSIZE * 2 * sizeof(int)
149 + RINGSIZE * 6 * sizeof(struct vring_desc);
150 mapsize = (mapsize + getpagesize() - 1) & ~(getpagesize() - 1);
151 ftruncate(fd, mapsize);
152
153 /* Parent and child use separate addresses, to check our mapping logic! */
154 host_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
155 guest_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
156
157 pipe(to_guest);
158 pipe(to_host);
159
160 CPU_ZERO(&cpu_set);
161 find_cpus(&first_cpu, &last_cpu);
162 printf("Using CPUS %u and %u\n", first_cpu, last_cpu);
163 fflush(stdout);
164
165 if (fork() != 0) {
166 struct vringh vrh;
167 int status, err, rlen = 0;
168 char rbuf[5];
169
170 /* We are the host: never access guest addresses! */
171 munmap(guest_map, mapsize);
172
173 __user_addr_min = host_map;
174 __user_addr_max = __user_addr_min + mapsize;
175 user_addr_offset = host_map - guest_map;
176 assert(user_addr_offset);
177
178 close(to_guest[0]);
179 close(to_host[1]);
180
181 vring_init(&vrh.vring, RINGSIZE, host_map, ALIGN);
182 vringh_init_user(&vrh, features, RINGSIZE, true,
183 vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
184 CPU_SET(first_cpu, &cpu_set);
185 if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set))
186 errx(1, "Could not set affinity to cpu %u", first_cpu);
187
188 while (xfers < NUM_XFERS) {
189 struct iovec host_riov[2], host_wiov[2];
190 struct vringh_iov riov, wiov;
191 u16 head, written;
192
193 if (fast_vringh) {
194 for (;;) {
195 err = vringh_get_head(&vrh, &head);
196 if (err != 0)
197 break;
198 err = vringh_need_notify_user(&vrh);
199 if (err < 0)
200 errx(1, "vringh_need_notify_user: %i",
201 err);
202 if (err) {
203 write(to_guest[1], "", 1);
204 notifies++;
205 }
206 }
207 if (err != 1)
208 errx(1, "vringh_get_head");
209 written = 0;
210 goto complete;
211 } else {
212 vringh_iov_init(&riov,
213 host_riov,
214 ARRAY_SIZE(host_riov));
215 vringh_iov_init(&wiov,
216 host_wiov,
217 ARRAY_SIZE(host_wiov));
218
219 err = vringh_getdesc_user(&vrh, &riov, &wiov,
220 getrange, &head);
221 }
222 if (err == 0) {
223 err = vringh_need_notify_user(&vrh);
224 if (err < 0)
225 errx(1, "vringh_need_notify_user: %i",
226 err);
227 if (err) {
228 write(to_guest[1], "", 1);
229 notifies++;
230 }
231
232 if (!vringh_notify_enable_user(&vrh))
233 continue;
234
235 /* Swallow all notifies at once. */
236 if (read(to_host[0], buf, sizeof(buf)) < 1)
237 break;
238
239 vringh_notify_disable_user(&vrh);
240 receives++;
241 continue;
242 }
243 if (err != 1)
244 errx(1, "vringh_getdesc_user: %i", err);
245
246 /* We simply copy bytes. */
247 if (riov.used) {
248 rlen = vringh_iov_pull_user(&riov, rbuf,
249 sizeof(rbuf));
250 if (rlen != 4)
251 errx(1, "vringh_iov_pull_user: %i",
252 rlen);
253 assert(riov.i == riov.used);
254 written = 0;
255 } else {
256 err = vringh_iov_push_user(&wiov, rbuf, rlen);
257 if (err != rlen)
258 errx(1, "vringh_iov_push_user: %i",
259 err);
260 assert(wiov.i == wiov.used);
261 written = err;
262 }
263 complete:
264 xfers++;
265
266 err = vringh_complete_user(&vrh, head, written);
267 if (err != 0)
268 errx(1, "vringh_complete_user: %i", err);
269 }
270
271 err = vringh_need_notify_user(&vrh);
272 if (err < 0)
273 errx(1, "vringh_need_notify_user: %i", err);
274 if (err) {
275 write(to_guest[1], "", 1);
276 notifies++;
277 }
278 wait(&status);
279 if (!WIFEXITED(status))
280 errx(1, "Child died with signal %i?", WTERMSIG(status));
281 if (WEXITSTATUS(status) != 0)
282 errx(1, "Child exited %i?", WEXITSTATUS(status));
283 printf("Host: notified %lu, pinged %lu\n", notifies, receives);
284 return 0;
285 } else {
286 struct guest_virtio_device gvdev;
287 struct virtqueue *vq;
288 unsigned int *data;
289 struct vring_desc *indirects;
290 unsigned int finished = 0;
291
292 /* We pass sg[]s pointing into here, but we need RINGSIZE+1 */
293 data = guest_map + vring_size(RINGSIZE, ALIGN);
294 indirects = (void *)data + (RINGSIZE + 1) * 2 * sizeof(int);
295
296 /* We are the guest. */
297 munmap(host_map, mapsize);
298
299 close(to_guest[1]);
300 close(to_host[0]);
301
302 gvdev.vdev.features[0] = features;
303 gvdev.to_host_fd = to_host[1];
304 gvdev.notifies = 0;
305
306 CPU_SET(first_cpu, &cpu_set);
307 if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set))
308 err(1, "Could not set affinity to cpu %u", first_cpu);
309
310 vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true,
311 guest_map, fast_vringh ? no_notify_host
312 : parallel_notify_host,
313 never_callback_guest, "guest vq");
314
315 /* Don't kfree indirects. */
316 __kfree_ignore_start = indirects;
317 __kfree_ignore_end = indirects + RINGSIZE * 6;
318
319 while (xfers < NUM_XFERS) {
320 struct scatterlist sg[4];
321 unsigned int num_sg, len;
322 int *dbuf, err;
323 bool output = !(xfers % 2);
324
325 /* Consume bufs. */
326 while ((dbuf = virtqueue_get_buf(vq, &len)) != NULL) {
327 if (len == 4)
328 assert(*dbuf == finished - 1);
329 else if (!fast_vringh)
330 assert(*dbuf == finished);
331 finished++;
332 }
333
334 /* Produce a buffer. */
335 dbuf = data + (xfers % (RINGSIZE + 1));
336
337 if (output)
338 *dbuf = xfers;
339 else
340 *dbuf = -1;
341
342 switch ((xfers / sizeof(*dbuf)) % 4) {
343 case 0:
344 /* Nasty three-element sg list. */
345 sg_init_table(sg, num_sg = 3);
346 sg_set_buf(&sg[0], (void *)dbuf, 1);
347 sg_set_buf(&sg[1], (void *)dbuf + 1, 2);
348 sg_set_buf(&sg[2], (void *)dbuf + 3, 1);
349 break;
350 case 1:
351 sg_init_table(sg, num_sg = 2);
352 sg_set_buf(&sg[0], (void *)dbuf, 1);
353 sg_set_buf(&sg[1], (void *)dbuf + 1, 3);
354 break;
355 case 2:
356 sg_init_table(sg, num_sg = 1);
357 sg_set_buf(&sg[0], (void *)dbuf, 4);
358 break;
359 case 3:
360 sg_init_table(sg, num_sg = 4);
361 sg_set_buf(&sg[0], (void *)dbuf, 1);
362 sg_set_buf(&sg[1], (void *)dbuf + 1, 1);
363 sg_set_buf(&sg[2], (void *)dbuf + 2, 1);
364 sg_set_buf(&sg[3], (void *)dbuf + 3, 1);
365 break;
366 }
367
368 /* May allocate an indirect, so force it to allocate
369 * user addr */
370 __kmalloc_fake = indirects + (xfers % RINGSIZE) * 4;
371 if (output)
372 err = virtqueue_add_outbuf(vq, sg, num_sg, dbuf,
373 GFP_KERNEL);
374 else
375 err = virtqueue_add_inbuf(vq, sg, num_sg,
376 dbuf, GFP_KERNEL);
377
378 if (err == -ENOSPC) {
379 if (!virtqueue_enable_cb_delayed(vq))
380 continue;
381 /* Swallow all notifies at once. */
382 if (read(to_guest[0], buf, sizeof(buf)) < 1)
383 break;
384
385 receives++;
386 virtqueue_disable_cb(vq);
387 continue;
388 }
389
390 if (err)
391 errx(1, "virtqueue_add_in/outbuf: %i", err);
392
393 xfers++;
394 virtqueue_kick(vq);
395 }
396
397 /* Any extra? */
398 while (finished != xfers) {
399 int *dbuf;
400 unsigned int len;
401
402 /* Consume bufs. */
403 dbuf = virtqueue_get_buf(vq, &len);
404 if (dbuf) {
405 if (len == 4)
406 assert(*dbuf == finished - 1);
407 else
408 assert(len == 0);
409 finished++;
410 continue;
411 }
412
413 if (!virtqueue_enable_cb_delayed(vq))
414 continue;
415 if (read(to_guest[0], buf, sizeof(buf)) < 1)
416 break;
417
418 receives++;
419 virtqueue_disable_cb(vq);
420 }
421
422 printf("Guest: notified %lu, pinged %lu\n",
423 gvdev.notifies, receives);
424 vring_del_virtqueue(vq);
425 return 0;
426 }
427}
428
429int main(int argc, char *argv[])
430{
431 struct virtio_device vdev;
432 struct virtqueue *vq;
433 struct vringh vrh;
434 struct scatterlist guest_sg[RINGSIZE], *sgs[2];
435 struct iovec host_riov[2], host_wiov[2];
436 struct vringh_iov riov, wiov;
437 struct vring_used_elem used[RINGSIZE];
438 char buf[28];
439 u16 head;
440 int err;
441 unsigned i;
442 void *ret;
443 bool (*getrange)(struct vringh *vrh, u64 addr, struct vringh_range *r);
444 bool fast_vringh = false, parallel = false;
445
446 getrange = getrange_iov;
447 vdev.features[0] = 0;
448
449 while (argv[1]) {
450 if (strcmp(argv[1], "--indirect") == 0)
451 vdev.features[0] |= (1 << VIRTIO_RING_F_INDIRECT_DESC);
452 else if (strcmp(argv[1], "--eventidx") == 0)
453 vdev.features[0] |= (1 << VIRTIO_RING_F_EVENT_IDX);
454 else if (strcmp(argv[1], "--slow-range") == 0)
455 getrange = getrange_slow;
456 else if (strcmp(argv[1], "--fast-vringh") == 0)
457 fast_vringh = true;
458 else if (strcmp(argv[1], "--parallel") == 0)
459 parallel = true;
460 else
461 errx(1, "Unknown arg %s", argv[1]);
462 argv++;
463 }
464
465 if (parallel)
466 return parallel_test(vdev.features[0], getrange, fast_vringh);
467
468 if (posix_memalign(&__user_addr_min, PAGE_SIZE, USER_MEM) != 0)
469 abort();
470 __user_addr_max = __user_addr_min + USER_MEM;
471 memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN));
472
473 /* Set up guest side. */
474 vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
475 __user_addr_min,
476 never_notify_host, never_callback_guest,
477 "guest vq");
478
479 /* Set up host side. */
480 vring_init(&vrh.vring, RINGSIZE, __user_addr_min, ALIGN);
481 vringh_init_user(&vrh, vdev.features[0], RINGSIZE, true,
482 vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
483
484 /* No descriptor to get yet... */
485 err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
486 if (err != 0)
487 errx(1, "vringh_getdesc_user: %i", err);
488
489 /* Guest puts in a descriptor. */
490 memcpy(__user_addr_max - 1, "a", 1);
491 sg_init_table(guest_sg, 1);
492 sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1);
493 sg_init_table(guest_sg+1, 1);
494 sg_set_buf(&guest_sg[1], __user_addr_max - 3, 2);
495 sgs[0] = &guest_sg[0];
496 sgs[1] = &guest_sg[1];
497
498 /* May allocate an indirect, so force it to allocate user addr */
499 __kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN);
500 err = virtqueue_add_sgs(vq, sgs, 1, 1, &err, GFP_KERNEL);
501 if (err)
502 errx(1, "virtqueue_add_sgs: %i", err);
503 __kmalloc_fake = NULL;
504
505 /* Host retreives it. */
506 vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
507 vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
508
509 err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
510 if (err != 1)
511 errx(1, "vringh_getdesc_user: %i", err);
512
513 assert(riov.used == 1);
514 assert(riov.iov[0].iov_base == __user_addr_max - 1);
515 assert(riov.iov[0].iov_len == 1);
516 if (getrange != getrange_slow) {
517 assert(wiov.used == 1);
518 assert(wiov.iov[0].iov_base == __user_addr_max - 3);
519 assert(wiov.iov[0].iov_len == 2);
520 } else {
521 assert(wiov.used == 2);
522 assert(wiov.iov[0].iov_base == __user_addr_max - 3);
523 assert(wiov.iov[0].iov_len == 1);
524 assert(wiov.iov[1].iov_base == __user_addr_max - 2);
525 assert(wiov.iov[1].iov_len == 1);
526 }
527
528 err = vringh_iov_pull_user(&riov, buf, 5);
529 if (err != 1)
530 errx(1, "vringh_iov_pull_user: %i", err);
531 assert(buf[0] == 'a');
532 assert(riov.i == 1);
533 assert(vringh_iov_pull_user(&riov, buf, 5) == 0);
534
535 memcpy(buf, "bcdef", 5);
536 err = vringh_iov_push_user(&wiov, buf, 5);
537 if (err != 2)
538 errx(1, "vringh_iov_push_user: %i", err);
539 assert(memcmp(__user_addr_max - 3, "bc", 2) == 0);
540 assert(wiov.i == wiov.used);
541 assert(vringh_iov_push_user(&wiov, buf, 5) == 0);
542
543 /* Host is done. */
544 err = vringh_complete_user(&vrh, head, err);
545 if (err != 0)
546 errx(1, "vringh_complete_user: %i", err);
547
548 /* Guest should see used token now. */
549 __kfree_ignore_start = __user_addr_min + vring_size(RINGSIZE, ALIGN);
550 __kfree_ignore_end = __kfree_ignore_start + 1;
551 ret = virtqueue_get_buf(vq, &i);
552 if (ret != &err)
553 errx(1, "virtqueue_get_buf: %p", ret);
554 assert(i == 2);
555
556 /* Guest puts in a huge descriptor. */
557 sg_init_table(guest_sg, RINGSIZE);
558 for (i = 0; i < RINGSIZE; i++) {
559 sg_set_buf(&guest_sg[i],
560 __user_addr_max - USER_MEM/4, USER_MEM/4);
561 }
562
563 /* Fill contents with recognisable garbage. */
564 for (i = 0; i < USER_MEM/4; i++)
565 ((char *)__user_addr_max - USER_MEM/4)[i] = i;
566
567 /* This will allocate an indirect, so force it to allocate user addr */
568 __kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN);
569 err = virtqueue_add_outbuf(vq, guest_sg, RINGSIZE, &err, GFP_KERNEL);
570 if (err)
571 errx(1, "virtqueue_add_outbuf (large): %i", err);
572 __kmalloc_fake = NULL;
573
574 /* Host picks it up (allocates new iov). */
575 vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
576 vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
577
578 err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
579 if (err != 1)
580 errx(1, "vringh_getdesc_user: %i", err);
581
582 assert(riov.max_num & VRINGH_IOV_ALLOCATED);
583 assert(riov.iov != host_riov);
584 if (getrange != getrange_slow)
585 assert(riov.used == RINGSIZE);
586 else
587 assert(riov.used == RINGSIZE * USER_MEM/4);
588
589 assert(!(wiov.max_num & VRINGH_IOV_ALLOCATED));
590 assert(wiov.used == 0);
591
592 /* Pull data back out (in odd chunks), should be as expected. */
593 for (i = 0; i < RINGSIZE * USER_MEM/4; i += 3) {
594 err = vringh_iov_pull_user(&riov, buf, 3);
595 if (err != 3 && i + err != RINGSIZE * USER_MEM/4)
596 errx(1, "vringh_iov_pull_user large: %i", err);
597 assert(buf[0] == (char)i);
598 assert(err < 2 || buf[1] == (char)(i + 1));
599 assert(err < 3 || buf[2] == (char)(i + 2));
600 }
601 assert(riov.i == riov.used);
602 vringh_iov_cleanup(&riov);
603 vringh_iov_cleanup(&wiov);
604
605 /* Complete using multi interface, just because we can. */
606 used[0].id = head;
607 used[0].len = 0;
608 err = vringh_complete_multi_user(&vrh, used, 1);
609 if (err)
610 errx(1, "vringh_complete_multi_user(1): %i", err);
611
612 /* Free up those descriptors. */
613 ret = virtqueue_get_buf(vq, &i);
614 if (ret != &err)
615 errx(1, "virtqueue_get_buf: %p", ret);
616
617 /* Add lots of descriptors. */
618 sg_init_table(guest_sg, 1);
619 sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1);
620 for (i = 0; i < RINGSIZE; i++) {
621 err = virtqueue_add_outbuf(vq, guest_sg, 1, &err, GFP_KERNEL);
622 if (err)
623 errx(1, "virtqueue_add_outbuf (multiple): %i", err);
624 }
625
626 /* Now get many, and consume them all at once. */
627 vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
628 vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
629
630 for (i = 0; i < RINGSIZE; i++) {
631 err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
632 if (err != 1)
633 errx(1, "vringh_getdesc_user: %i", err);
634 used[i].id = head;
635 used[i].len = 0;
636 }
637 /* Make sure it wraps around ring, to test! */
638 assert(vrh.vring.used->idx % RINGSIZE != 0);
639 err = vringh_complete_multi_user(&vrh, used, RINGSIZE);
640 if (err)
641 errx(1, "vringh_complete_multi_user: %i", err);
642
643 /* Free those buffers. */
644 for (i = 0; i < RINGSIZE; i++) {
645 unsigned len;
646 assert(virtqueue_get_buf(vq, &len) != NULL);
647 }
648
649 /* Test weird (but legal!) indirect. */
650 if (vdev.features[0] & (1 << VIRTIO_RING_F_INDIRECT_DESC)) {
651 char *data = __user_addr_max - USER_MEM/4;
652 struct vring_desc *d = __user_addr_max - USER_MEM/2;
653 struct vring vring;
654
655 /* Force creation of direct, which we modify. */
656 vdev.features[0] &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
657 vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
658 __user_addr_min,
659 never_notify_host,
660 never_callback_guest,
661 "guest vq");
662
663 sg_init_table(guest_sg, 4);
664 sg_set_buf(&guest_sg[0], d, sizeof(*d)*2);
665 sg_set_buf(&guest_sg[1], d + 2, sizeof(*d)*1);
666 sg_set_buf(&guest_sg[2], data + 6, 4);
667 sg_set_buf(&guest_sg[3], d + 3, sizeof(*d)*3);
668
669 err = virtqueue_add_outbuf(vq, guest_sg, 4, &err, GFP_KERNEL);
670 if (err)
671 errx(1, "virtqueue_add_outbuf (indirect): %i", err);
672
673 vring_init(&vring, RINGSIZE, __user_addr_min, ALIGN);
674
675 /* They're used in order, but double-check... */
676 assert(vring.desc[0].addr == (unsigned long)d);
677 assert(vring.desc[1].addr == (unsigned long)(d+2));
678 assert(vring.desc[2].addr == (unsigned long)data + 6);
679 assert(vring.desc[3].addr == (unsigned long)(d+3));
680 vring.desc[0].flags |= VRING_DESC_F_INDIRECT;
681 vring.desc[1].flags |= VRING_DESC_F_INDIRECT;
682 vring.desc[3].flags |= VRING_DESC_F_INDIRECT;
683
684 /* First indirect */
685 d[0].addr = (unsigned long)data;
686 d[0].len = 1;
687 d[0].flags = VRING_DESC_F_NEXT;
688 d[0].next = 1;
689 d[1].addr = (unsigned long)data + 1;
690 d[1].len = 2;
691 d[1].flags = 0;
692
693 /* Second indirect */
694 d[2].addr = (unsigned long)data + 3;
695 d[2].len = 3;
696 d[2].flags = 0;
697
698 /* Third indirect */
699 d[3].addr = (unsigned long)data + 10;
700 d[3].len = 5;
701 d[3].flags = VRING_DESC_F_NEXT;
702 d[3].next = 1;
703 d[4].addr = (unsigned long)data + 15;
704 d[4].len = 6;
705 d[4].flags = VRING_DESC_F_NEXT;
706 d[4].next = 2;
707 d[5].addr = (unsigned long)data + 21;
708 d[5].len = 7;
709 d[5].flags = 0;
710
711 /* Host picks it up (allocates new iov). */
712 vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
713 vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
714
715 err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
716 if (err != 1)
717 errx(1, "vringh_getdesc_user: %i", err);
718
719 if (head != 0)
720 errx(1, "vringh_getdesc_user: head %i not 0", head);
721
722 assert(riov.max_num & VRINGH_IOV_ALLOCATED);
723 if (getrange != getrange_slow)
724 assert(riov.used == 7);
725 else
726 assert(riov.used == 28);
727 err = vringh_iov_pull_user(&riov, buf, 29);
728 assert(err == 28);
729
730 /* Data should be linear. */
731 for (i = 0; i < err; i++)
732 assert(buf[i] == i);
733 vringh_iov_cleanup(&riov);
734 }
735
736 /* Don't leak memory... */
737 vring_del_virtqueue(vq);
738 free(__user_addr_min);
739
740 return 0;
741}