diff options
Diffstat (limited to 'tools')
40 files changed, 3228 insertions, 167 deletions
diff --git a/tools/Makefile b/tools/Makefile index 6aaeb6cd867d..41067f304215 100644 --- a/tools/Makefile +++ b/tools/Makefile | |||
| @@ -12,6 +12,7 @@ help: | |||
| 12 | @echo ' turbostat - Intel CPU idle stats and freq reporting tool' | 12 | @echo ' turbostat - Intel CPU idle stats and freq reporting tool' |
| 13 | @echo ' usb - USB testing tools' | 13 | @echo ' usb - USB testing tools' |
| 14 | @echo ' virtio - vhost test module' | 14 | @echo ' virtio - vhost test module' |
| 15 | @echo ' net - misc networking tools' | ||
| 15 | @echo ' vm - misc vm tools' | 16 | @echo ' vm - misc vm tools' |
| 16 | @echo ' x86_energy_perf_policy - Intel energy policy tool' | 17 | @echo ' x86_energy_perf_policy - Intel energy policy tool' |
| 17 | @echo '' | 18 | @echo '' |
| @@ -34,7 +35,7 @@ help: | |||
| 34 | cpupower: FORCE | 35 | cpupower: FORCE |
| 35 | $(call descend,power/$@) | 36 | $(call descend,power/$@) |
| 36 | 37 | ||
| 37 | cgroup firewire guest usb virtio vm: FORCE | 38 | cgroup firewire guest usb virtio vm net: FORCE |
| 38 | $(call descend,$@) | 39 | $(call descend,$@) |
| 39 | 40 | ||
| 40 | liblk: FORCE | 41 | liblk: FORCE |
| @@ -52,7 +53,7 @@ turbostat x86_energy_perf_policy: FORCE | |||
| 52 | cpupower_install: | 53 | cpupower_install: |
| 53 | $(call descend,power/$(@:_install=),install) | 54 | $(call descend,power/$(@:_install=),install) |
| 54 | 55 | ||
| 55 | cgroup_install firewire_install lguest_install perf_install usb_install virtio_install vm_install: | 56 | cgroup_install firewire_install lguest_install perf_install usb_install virtio_install vm_install net_install: |
| 56 | $(call descend,$(@:_install=),install) | 57 | $(call descend,$(@:_install=),install) |
| 57 | 58 | ||
| 58 | selftests_install: | 59 | selftests_install: |
| @@ -63,12 +64,12 @@ turbostat_install x86_energy_perf_policy_install: | |||
| 63 | 64 | ||
| 64 | install: cgroup_install cpupower_install firewire_install lguest_install \ | 65 | install: cgroup_install cpupower_install firewire_install lguest_install \ |
| 65 | perf_install selftests_install turbostat_install usb_install \ | 66 | perf_install selftests_install turbostat_install usb_install \ |
| 66 | virtio_install vm_install x86_energy_perf_policy_install | 67 | virtio_install vm_install net_install x86_energy_perf_policy_install |
| 67 | 68 | ||
| 68 | cpupower_clean: | 69 | cpupower_clean: |
| 69 | $(call descend,power/cpupower,clean) | 70 | $(call descend,power/cpupower,clean) |
| 70 | 71 | ||
| 71 | cgroup_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clean: | 72 | cgroup_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clean net_clean: |
| 72 | $(call descend,$(@:_clean=),clean) | 73 | $(call descend,$(@:_clean=),clean) |
| 73 | 74 | ||
| 74 | liblk_clean: | 75 | liblk_clean: |
| @@ -85,6 +86,6 @@ turbostat_clean x86_energy_perf_policy_clean: | |||
| 85 | 86 | ||
| 86 | clean: cgroup_clean cpupower_clean firewire_clean lguest_clean perf_clean \ | 87 | clean: cgroup_clean cpupower_clean firewire_clean lguest_clean perf_clean \ |
| 87 | selftests_clean turbostat_clean usb_clean virtio_clean \ | 88 | selftests_clean turbostat_clean usb_clean virtio_clean \ |
| 88 | vm_clean x86_energy_perf_policy_clean | 89 | vm_clean net_clean x86_energy_perf_policy_clean |
| 89 | 90 | ||
| 90 | .PHONY: FORCE | 91 | .PHONY: FORCE |
diff --git a/tools/lguest/lguest.txt b/tools/lguest/lguest.txt index 7203ace65e83..06e1f4649511 100644 --- a/tools/lguest/lguest.txt +++ b/tools/lguest/lguest.txt | |||
| @@ -70,7 +70,7 @@ Running Lguest: | |||
| 70 | 70 | ||
| 71 | - Run an lguest as root: | 71 | - Run an lguest as root: |
| 72 | 72 | ||
| 73 | Documentation/virtual/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \ | 73 | tools/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \ |
| 74 | --block=rootfile root=/dev/vda | 74 | --block=rootfile root=/dev/vda |
| 75 | 75 | ||
| 76 | Explanation: | 76 | Explanation: |
diff --git a/tools/net/Makefile b/tools/net/Makefile new file mode 100644 index 000000000000..b4444d53b73f --- /dev/null +++ b/tools/net/Makefile | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | prefix = /usr | ||
| 2 | |||
| 3 | CC = gcc | ||
| 4 | |||
| 5 | all : bpf_jit_disasm | ||
| 6 | |||
| 7 | bpf_jit_disasm : CFLAGS = -Wall -O2 | ||
| 8 | bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl | ||
| 9 | bpf_jit_disasm : bpf_jit_disasm.o | ||
| 10 | |||
| 11 | clean : | ||
| 12 | rm -rf *.o bpf_jit_disasm | ||
| 13 | |||
| 14 | install : | ||
| 15 | install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm | ||
diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c new file mode 100644 index 000000000000..cfe0cdcda3de --- /dev/null +++ b/tools/net/bpf_jit_disasm.c | |||
| @@ -0,0 +1,199 @@ | |||
| 1 | /* | ||
| 2 | * Minimal BPF JIT image disassembler | ||
| 3 | * | ||
| 4 | * Disassembles BPF JIT compiler emitted opcodes back to asm insn's for | ||
| 5 | * debugging or verification purposes. | ||
| 6 | * | ||
| 7 | * To get the disassembly of the JIT code, do the following: | ||
| 8 | * | ||
| 9 | * 1) `echo 2 > /proc/sys/net/core/bpf_jit_enable` | ||
| 10 | * 2) Load a BPF filter (e.g. `tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24`) | ||
| 11 | * 3) Run e.g. `bpf_jit_disasm -o` to read out the last JIT code | ||
| 12 | * | ||
| 13 | * Copyright 2013 Daniel Borkmann <borkmann@redhat.com> | ||
| 14 | * Licensed under the GNU General Public License, version 2.0 (GPLv2) | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <stdint.h> | ||
| 18 | #include <stdio.h> | ||
| 19 | #include <stdlib.h> | ||
| 20 | #include <assert.h> | ||
| 21 | #include <unistd.h> | ||
| 22 | #include <string.h> | ||
| 23 | #include <bfd.h> | ||
| 24 | #include <dis-asm.h> | ||
| 25 | #include <sys/klog.h> | ||
| 26 | #include <sys/types.h> | ||
| 27 | #include <regex.h> | ||
| 28 | |||
| 29 | static void get_exec_path(char *tpath, size_t size) | ||
| 30 | { | ||
| 31 | char *path; | ||
| 32 | ssize_t len; | ||
| 33 | |||
| 34 | snprintf(tpath, size, "/proc/%d/exe", (int) getpid()); | ||
| 35 | tpath[size - 1] = 0; | ||
| 36 | |||
| 37 | path = strdup(tpath); | ||
| 38 | assert(path); | ||
| 39 | |||
| 40 | len = readlink(path, tpath, size); | ||
| 41 | tpath[len] = 0; | ||
| 42 | |||
| 43 | free(path); | ||
| 44 | } | ||
| 45 | |||
| 46 | static void get_asm_insns(uint8_t *image, size_t len, unsigned long base, | ||
| 47 | int opcodes) | ||
| 48 | { | ||
| 49 | int count, i, pc = 0; | ||
| 50 | char tpath[256]; | ||
| 51 | struct disassemble_info info; | ||
| 52 | disassembler_ftype disassemble; | ||
| 53 | bfd *bfdf; | ||
| 54 | |||
| 55 | memset(tpath, 0, sizeof(tpath)); | ||
| 56 | get_exec_path(tpath, sizeof(tpath)); | ||
| 57 | |||
| 58 | bfdf = bfd_openr(tpath, NULL); | ||
| 59 | assert(bfdf); | ||
| 60 | assert(bfd_check_format(bfdf, bfd_object)); | ||
| 61 | |||
| 62 | init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf); | ||
| 63 | info.arch = bfd_get_arch(bfdf); | ||
| 64 | info.mach = bfd_get_mach(bfdf); | ||
| 65 | info.buffer = image; | ||
| 66 | info.buffer_length = len; | ||
| 67 | |||
| 68 | disassemble_init_for_target(&info); | ||
| 69 | |||
| 70 | disassemble = disassembler(bfdf); | ||
| 71 | assert(disassemble); | ||
| 72 | |||
| 73 | do { | ||
| 74 | printf("%4x:\t", pc); | ||
| 75 | |||
| 76 | count = disassemble(pc, &info); | ||
| 77 | |||
| 78 | if (opcodes) { | ||
| 79 | printf("\n\t"); | ||
| 80 | for (i = 0; i < count; ++i) | ||
| 81 | printf("%02x ", (uint8_t) image[pc + i]); | ||
| 82 | } | ||
| 83 | printf("\n"); | ||
| 84 | |||
| 85 | pc += count; | ||
| 86 | } while(count > 0 && pc < len); | ||
| 87 | |||
| 88 | bfd_close(bfdf); | ||
| 89 | } | ||
| 90 | |||
| 91 | static char *get_klog_buff(int *klen) | ||
| 92 | { | ||
| 93 | int ret, len = klogctl(10, NULL, 0); | ||
| 94 | char *buff = malloc(len); | ||
| 95 | |||
| 96 | assert(buff && klen); | ||
| 97 | ret = klogctl(3, buff, len); | ||
| 98 | assert(ret >= 0); | ||
| 99 | *klen = ret; | ||
| 100 | |||
| 101 | return buff; | ||
| 102 | } | ||
| 103 | |||
| 104 | static void put_klog_buff(char *buff) | ||
| 105 | { | ||
| 106 | free(buff); | ||
| 107 | } | ||
| 108 | |||
| 109 | static int get_last_jit_image(char *haystack, size_t hlen, | ||
| 110 | uint8_t *image, size_t ilen, | ||
| 111 | unsigned long *base) | ||
| 112 | { | ||
| 113 | char *ptr, *pptr, *tmp; | ||
| 114 | off_t off = 0; | ||
| 115 | int ret, flen, proglen, pass, ulen = 0; | ||
| 116 | regmatch_t pmatch[1]; | ||
| 117 | regex_t regex; | ||
| 118 | |||
| 119 | if (hlen == 0) | ||
| 120 | return 0; | ||
| 121 | |||
| 122 | ret = regcomp(®ex, "flen=[[:alnum:]]+ proglen=[[:digit:]]+ " | ||
| 123 | "pass=[[:digit:]]+ image=[[:xdigit:]]+", REG_EXTENDED); | ||
| 124 | assert(ret == 0); | ||
| 125 | |||
| 126 | ptr = haystack; | ||
| 127 | while (1) { | ||
| 128 | ret = regexec(®ex, ptr, 1, pmatch, 0); | ||
| 129 | if (ret == 0) { | ||
| 130 | ptr += pmatch[0].rm_eo; | ||
| 131 | off += pmatch[0].rm_eo; | ||
| 132 | assert(off < hlen); | ||
| 133 | } else | ||
| 134 | break; | ||
| 135 | } | ||
| 136 | |||
| 137 | ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so); | ||
| 138 | ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx", | ||
| 139 | &flen, &proglen, &pass, base); | ||
| 140 | if (ret != 4) | ||
| 141 | return 0; | ||
| 142 | |||
| 143 | tmp = ptr = haystack + off; | ||
| 144 | while ((ptr = strtok(tmp, "\n")) != NULL && ulen < ilen) { | ||
| 145 | tmp = NULL; | ||
| 146 | if (!strstr(ptr, "JIT code")) | ||
| 147 | continue; | ||
| 148 | pptr = ptr; | ||
| 149 | while ((ptr = strstr(pptr, ":"))) | ||
| 150 | pptr = ptr + 1; | ||
| 151 | ptr = pptr; | ||
| 152 | do { | ||
| 153 | image[ulen++] = (uint8_t) strtoul(pptr, &pptr, 16); | ||
| 154 | if (ptr == pptr || ulen >= ilen) { | ||
| 155 | ulen--; | ||
| 156 | break; | ||
| 157 | } | ||
| 158 | ptr = pptr; | ||
| 159 | } while (1); | ||
| 160 | } | ||
| 161 | |||
| 162 | assert(ulen == proglen); | ||
| 163 | printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n", | ||
| 164 | proglen, pass, flen); | ||
| 165 | printf("%lx + <x>:\n", *base); | ||
| 166 | |||
| 167 | regfree(®ex); | ||
| 168 | return ulen; | ||
| 169 | } | ||
| 170 | |||
| 171 | int main(int argc, char **argv) | ||
| 172 | { | ||
| 173 | int len, klen, opcodes = 0; | ||
| 174 | char *kbuff; | ||
| 175 | unsigned long base; | ||
| 176 | uint8_t image[4096]; | ||
| 177 | |||
| 178 | if (argc > 1) { | ||
| 179 | if (!strncmp("-o", argv[argc - 1], 2)) { | ||
| 180 | opcodes = 1; | ||
| 181 | } else { | ||
| 182 | printf("usage: bpf_jit_disasm [-o: show opcodes]\n"); | ||
| 183 | exit(0); | ||
| 184 | } | ||
| 185 | } | ||
| 186 | |||
| 187 | bfd_init(); | ||
| 188 | memset(image, 0, sizeof(image)); | ||
| 189 | |||
| 190 | kbuff = get_klog_buff(&klen); | ||
| 191 | |||
| 192 | len = get_last_jit_image(kbuff, klen, image, sizeof(image), &base); | ||
| 193 | if (len > 0 && base > 0) | ||
| 194 | get_asm_insns(image, len, base, opcodes); | ||
| 195 | |||
| 196 | put_klog_buff(kbuff); | ||
| 197 | |||
| 198 | return 0; | ||
| 199 | } | ||
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 3cc0ad7ae863..d4abc59ce1d9 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile | |||
| @@ -1,10 +1,13 @@ | |||
| 1 | TARGETS = breakpoints | 1 | TARGETS = breakpoints |
| 2 | TARGETS += cpu-hotplug | ||
| 3 | TARGETS += efivarfs | ||
| 2 | TARGETS += kcmp | 4 | TARGETS += kcmp |
| 5 | TARGETS += memory-hotplug | ||
| 3 | TARGETS += mqueue | 6 | TARGETS += mqueue |
| 7 | TARGETS += net | ||
| 8 | TARGETS += ptrace | ||
| 9 | TARGETS += soft-dirty | ||
| 4 | TARGETS += vm | 10 | TARGETS += vm |
| 5 | TARGETS += cpu-hotplug | ||
| 6 | TARGETS += memory-hotplug | ||
| 7 | TARGETS += efivarfs | ||
| 8 | 11 | ||
| 9 | all: | 12 | all: |
| 10 | for TARGET in $(TARGETS); do \ | 13 | for TARGET in $(TARGETS); do \ |
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore new file mode 100644 index 000000000000..00326629d4af --- /dev/null +++ b/tools/testing/selftests/net/.gitignore | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | socket | ||
| 2 | psock_fanout | ||
| 3 | psock_tpacket | ||
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile new file mode 100644 index 000000000000..750512ba2c88 --- /dev/null +++ b/tools/testing/selftests/net/Makefile | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | # Makefile for net selftests | ||
| 2 | |||
| 3 | CC = $(CROSS_COMPILE)gcc | ||
| 4 | CFLAGS = -Wall -O2 -g | ||
| 5 | |||
| 6 | CFLAGS += -I../../../../usr/include/ | ||
| 7 | |||
| 8 | NET_PROGS = socket psock_fanout psock_tpacket | ||
| 9 | |||
| 10 | all: $(NET_PROGS) | ||
| 11 | %: %.c | ||
| 12 | $(CC) $(CFLAGS) -o $@ $^ | ||
| 13 | |||
| 14 | run_tests: all | ||
| 15 | @/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]" | ||
| 16 | @/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]" | ||
| 17 | |||
| 18 | clean: | ||
| 19 | $(RM) $(NET_PROGS) | ||
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c new file mode 100644 index 000000000000..57b9c2b7c4ff --- /dev/null +++ b/tools/testing/selftests/net/psock_fanout.c | |||
| @@ -0,0 +1,312 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2013 Google Inc. | ||
| 3 | * Author: Willem de Bruijn (willemb@google.com) | ||
| 4 | * | ||
| 5 | * A basic test of packet socket fanout behavior. | ||
| 6 | * | ||
| 7 | * Control: | ||
| 8 | * - create fanout fails as expected with illegal flag combinations | ||
| 9 | * - join fanout fails as expected with diverging types or flags | ||
| 10 | * | ||
| 11 | * Datapath: | ||
| 12 | * Open a pair of packet sockets and a pair of INET sockets, send a known | ||
| 13 | * number of packets across the two INET sockets and count the number of | ||
| 14 | * packets enqueued onto the two packet sockets. | ||
| 15 | * | ||
| 16 | * The test currently runs for | ||
| 17 | * - PACKET_FANOUT_HASH | ||
| 18 | * - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER | ||
| 19 | * - PACKET_FANOUT_LB | ||
| 20 | * - PACKET_FANOUT_CPU | ||
| 21 | * - PACKET_FANOUT_ROLLOVER | ||
| 22 | * | ||
| 23 | * Todo: | ||
| 24 | * - functionality: PACKET_FANOUT_FLAG_DEFRAG | ||
| 25 | * | ||
| 26 | * License (GPLv2): | ||
| 27 | * | ||
| 28 | * This program is free software; you can redistribute it and/or modify it | ||
| 29 | * under the terms and conditions of the GNU General Public License, | ||
| 30 | * version 2, as published by the Free Software Foundation. | ||
| 31 | * | ||
| 32 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 33 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 34 | * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for | ||
| 35 | * more details. | ||
| 36 | * | ||
| 37 | * You should have received a copy of the GNU General Public License along with | ||
| 38 | * this program; if not, write to the Free Software Foundation, Inc., | ||
| 39 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 40 | */ | ||
| 41 | |||
| 42 | #define _GNU_SOURCE /* for sched_setaffinity */ | ||
| 43 | |||
| 44 | #include <arpa/inet.h> | ||
| 45 | #include <errno.h> | ||
| 46 | #include <fcntl.h> | ||
| 47 | #include <linux/filter.h> | ||
| 48 | #include <linux/if_packet.h> | ||
| 49 | #include <net/ethernet.h> | ||
| 50 | #include <netinet/ip.h> | ||
| 51 | #include <netinet/udp.h> | ||
| 52 | #include <poll.h> | ||
| 53 | #include <sched.h> | ||
| 54 | #include <stdint.h> | ||
| 55 | #include <stdio.h> | ||
| 56 | #include <stdlib.h> | ||
| 57 | #include <string.h> | ||
| 58 | #include <sys/mman.h> | ||
| 59 | #include <sys/socket.h> | ||
| 60 | #include <sys/stat.h> | ||
| 61 | #include <sys/types.h> | ||
| 62 | #include <unistd.h> | ||
| 63 | |||
| 64 | #include "psock_lib.h" | ||
| 65 | |||
| 66 | #define RING_NUM_FRAMES 20 | ||
| 67 | |||
| 68 | /* Open a socket in a given fanout mode. | ||
| 69 | * @return -1 if mode is bad, a valid socket otherwise */ | ||
| 70 | static int sock_fanout_open(uint16_t typeflags, int num_packets) | ||
| 71 | { | ||
| 72 | int fd, val; | ||
| 73 | |||
| 74 | fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP)); | ||
| 75 | if (fd < 0) { | ||
| 76 | perror("socket packet"); | ||
| 77 | exit(1); | ||
| 78 | } | ||
| 79 | |||
| 80 | /* fanout group ID is always 0: tests whether old groups are deleted */ | ||
| 81 | val = ((int) typeflags) << 16; | ||
| 82 | if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) { | ||
| 83 | if (close(fd)) { | ||
| 84 | perror("close packet"); | ||
| 85 | exit(1); | ||
| 86 | } | ||
| 87 | return -1; | ||
| 88 | } | ||
| 89 | |||
| 90 | pair_udp_setfilter(fd); | ||
| 91 | return fd; | ||
| 92 | } | ||
| 93 | |||
| 94 | static char *sock_fanout_open_ring(int fd) | ||
| 95 | { | ||
| 96 | struct tpacket_req req = { | ||
| 97 | .tp_block_size = getpagesize(), | ||
| 98 | .tp_frame_size = getpagesize(), | ||
| 99 | .tp_block_nr = RING_NUM_FRAMES, | ||
| 100 | .tp_frame_nr = RING_NUM_FRAMES, | ||
| 101 | }; | ||
| 102 | char *ring; | ||
| 103 | int val = TPACKET_V2; | ||
| 104 | |||
| 105 | if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val, | ||
| 106 | sizeof(val))) { | ||
| 107 | perror("packetsock ring setsockopt version"); | ||
| 108 | exit(1); | ||
| 109 | } | ||
| 110 | if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, | ||
| 111 | sizeof(req))) { | ||
| 112 | perror("packetsock ring setsockopt"); | ||
| 113 | exit(1); | ||
| 114 | } | ||
| 115 | |||
| 116 | ring = mmap(0, req.tp_block_size * req.tp_block_nr, | ||
| 117 | PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | ||
| 118 | if (!ring) { | ||
| 119 | fprintf(stderr, "packetsock ring mmap\n"); | ||
| 120 | exit(1); | ||
| 121 | } | ||
| 122 | |||
| 123 | return ring; | ||
| 124 | } | ||
| 125 | |||
| 126 | static int sock_fanout_read_ring(int fd, void *ring) | ||
| 127 | { | ||
| 128 | struct tpacket2_hdr *header = ring; | ||
| 129 | int count = 0; | ||
| 130 | |||
| 131 | while (header->tp_status & TP_STATUS_USER && count < RING_NUM_FRAMES) { | ||
| 132 | count++; | ||
| 133 | header = ring + (count * getpagesize()); | ||
| 134 | } | ||
| 135 | |||
| 136 | return count; | ||
| 137 | } | ||
| 138 | |||
| 139 | static int sock_fanout_read(int fds[], char *rings[], const int expect[]) | ||
| 140 | { | ||
| 141 | int ret[2]; | ||
| 142 | |||
| 143 | ret[0] = sock_fanout_read_ring(fds[0], rings[0]); | ||
| 144 | ret[1] = sock_fanout_read_ring(fds[1], rings[1]); | ||
| 145 | |||
| 146 | fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n", | ||
| 147 | ret[0], ret[1], expect[0], expect[1]); | ||
| 148 | |||
| 149 | if ((!(ret[0] == expect[0] && ret[1] == expect[1])) && | ||
| 150 | (!(ret[0] == expect[1] && ret[1] == expect[0]))) { | ||
| 151 | fprintf(stderr, "ERROR: incorrect queue lengths\n"); | ||
| 152 | return 1; | ||
| 153 | } | ||
| 154 | |||
| 155 | return 0; | ||
| 156 | } | ||
| 157 | |||
| 158 | /* Test illegal mode + flag combination */ | ||
| 159 | static void test_control_single(void) | ||
| 160 | { | ||
| 161 | fprintf(stderr, "test: control single socket\n"); | ||
| 162 | |||
| 163 | if (sock_fanout_open(PACKET_FANOUT_ROLLOVER | | ||
| 164 | PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) { | ||
| 165 | fprintf(stderr, "ERROR: opened socket with dual rollover\n"); | ||
| 166 | exit(1); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | /* Test illegal group with different modes or flags */ | ||
| 171 | static void test_control_group(void) | ||
| 172 | { | ||
| 173 | int fds[2]; | ||
| 174 | |||
| 175 | fprintf(stderr, "test: control multiple sockets\n"); | ||
| 176 | |||
| 177 | fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 20); | ||
| 178 | if (fds[0] == -1) { | ||
| 179 | fprintf(stderr, "ERROR: failed to open HASH socket\n"); | ||
| 180 | exit(1); | ||
| 181 | } | ||
| 182 | if (sock_fanout_open(PACKET_FANOUT_HASH | | ||
| 183 | PACKET_FANOUT_FLAG_DEFRAG, 10) != -1) { | ||
| 184 | fprintf(stderr, "ERROR: joined group with wrong flag defrag\n"); | ||
| 185 | exit(1); | ||
| 186 | } | ||
| 187 | if (sock_fanout_open(PACKET_FANOUT_HASH | | ||
| 188 | PACKET_FANOUT_FLAG_ROLLOVER, 10) != -1) { | ||
| 189 | fprintf(stderr, "ERROR: joined group with wrong flag ro\n"); | ||
| 190 | exit(1); | ||
| 191 | } | ||
| 192 | if (sock_fanout_open(PACKET_FANOUT_CPU, 10) != -1) { | ||
| 193 | fprintf(stderr, "ERROR: joined group with wrong mode\n"); | ||
| 194 | exit(1); | ||
| 195 | } | ||
| 196 | fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 20); | ||
| 197 | if (fds[1] == -1) { | ||
| 198 | fprintf(stderr, "ERROR: failed to join group\n"); | ||
| 199 | exit(1); | ||
| 200 | } | ||
| 201 | if (close(fds[1]) || close(fds[0])) { | ||
| 202 | fprintf(stderr, "ERROR: closing sockets\n"); | ||
| 203 | exit(1); | ||
| 204 | } | ||
| 205 | } | ||
| 206 | |||
| 207 | static int test_datapath(uint16_t typeflags, int port_off, | ||
| 208 | const int expect1[], const int expect2[]) | ||
| 209 | { | ||
| 210 | const int expect0[] = { 0, 0 }; | ||
| 211 | char *rings[2]; | ||
| 212 | int fds[2], fds_udp[2][2], ret; | ||
| 213 | |||
| 214 | fprintf(stderr, "test: datapath 0x%hx\n", typeflags); | ||
| 215 | |||
| 216 | fds[0] = sock_fanout_open(typeflags, 20); | ||
| 217 | fds[1] = sock_fanout_open(typeflags, 20); | ||
| 218 | if (fds[0] == -1 || fds[1] == -1) { | ||
| 219 | fprintf(stderr, "ERROR: failed open\n"); | ||
| 220 | exit(1); | ||
| 221 | } | ||
| 222 | rings[0] = sock_fanout_open_ring(fds[0]); | ||
| 223 | rings[1] = sock_fanout_open_ring(fds[1]); | ||
| 224 | pair_udp_open(fds_udp[0], PORT_BASE); | ||
| 225 | pair_udp_open(fds_udp[1], PORT_BASE + port_off); | ||
| 226 | sock_fanout_read(fds, rings, expect0); | ||
| 227 | |||
| 228 | /* Send data, but not enough to overflow a queue */ | ||
| 229 | pair_udp_send(fds_udp[0], 15); | ||
| 230 | pair_udp_send(fds_udp[1], 5); | ||
| 231 | ret = sock_fanout_read(fds, rings, expect1); | ||
| 232 | |||
| 233 | /* Send more data, overflow the queue */ | ||
| 234 | pair_udp_send(fds_udp[0], 15); | ||
| 235 | /* TODO: ensure consistent order between expect1 and expect2 */ | ||
| 236 | ret |= sock_fanout_read(fds, rings, expect2); | ||
| 237 | |||
| 238 | if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) || | ||
| 239 | munmap(rings[0], RING_NUM_FRAMES * getpagesize())) { | ||
| 240 | fprintf(stderr, "close rings\n"); | ||
| 241 | exit(1); | ||
| 242 | } | ||
| 243 | if (close(fds_udp[1][1]) || close(fds_udp[1][0]) || | ||
| 244 | close(fds_udp[0][1]) || close(fds_udp[0][0]) || | ||
| 245 | close(fds[1]) || close(fds[0])) { | ||
| 246 | fprintf(stderr, "close datapath\n"); | ||
| 247 | exit(1); | ||
| 248 | } | ||
| 249 | |||
| 250 | return ret; | ||
| 251 | } | ||
| 252 | |||
| 253 | static int set_cpuaffinity(int cpuid) | ||
| 254 | { | ||
| 255 | cpu_set_t mask; | ||
| 256 | |||
| 257 | CPU_ZERO(&mask); | ||
| 258 | CPU_SET(cpuid, &mask); | ||
| 259 | if (sched_setaffinity(0, sizeof(mask), &mask)) { | ||
| 260 | if (errno != EINVAL) { | ||
| 261 | fprintf(stderr, "setaffinity %d\n", cpuid); | ||
| 262 | exit(1); | ||
| 263 | } | ||
| 264 | return 1; | ||
| 265 | } | ||
| 266 | |||
| 267 | return 0; | ||
| 268 | } | ||
| 269 | |||
| 270 | int main(int argc, char **argv) | ||
| 271 | { | ||
| 272 | const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } }; | ||
| 273 | const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } }; | ||
| 274 | const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } }; | ||
| 275 | const int expect_rb[2][2] = { { 20, 0 }, { 20, 15 } }; | ||
| 276 | const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } }; | ||
| 277 | const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; | ||
| 278 | int port_off = 2, tries = 5, ret; | ||
| 279 | |||
| 280 | test_control_single(); | ||
| 281 | test_control_group(); | ||
| 282 | |||
| 283 | /* find a set of ports that do not collide onto the same socket */ | ||
| 284 | ret = test_datapath(PACKET_FANOUT_HASH, port_off, | ||
| 285 | expect_hash[0], expect_hash[1]); | ||
| 286 | while (ret && tries--) { | ||
| 287 | fprintf(stderr, "info: trying alternate ports (%d)\n", tries); | ||
| 288 | ret = test_datapath(PACKET_FANOUT_HASH, ++port_off, | ||
| 289 | expect_hash[0], expect_hash[1]); | ||
| 290 | } | ||
| 291 | |||
| 292 | ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER, | ||
| 293 | port_off, expect_hash_rb[0], expect_hash_rb[1]); | ||
| 294 | ret |= test_datapath(PACKET_FANOUT_LB, | ||
| 295 | port_off, expect_lb[0], expect_lb[1]); | ||
| 296 | ret |= test_datapath(PACKET_FANOUT_ROLLOVER, | ||
| 297 | port_off, expect_rb[0], expect_rb[1]); | ||
| 298 | |||
| 299 | set_cpuaffinity(0); | ||
| 300 | ret |= test_datapath(PACKET_FANOUT_CPU, port_off, | ||
| 301 | expect_cpu0[0], expect_cpu0[1]); | ||
| 302 | if (!set_cpuaffinity(1)) | ||
| 303 | /* TODO: test that choice alternates with previous */ | ||
| 304 | ret |= test_datapath(PACKET_FANOUT_CPU, port_off, | ||
| 305 | expect_cpu1[0], expect_cpu1[1]); | ||
| 306 | |||
| 307 | if (ret) | ||
| 308 | return 1; | ||
| 309 | |||
| 310 | printf("OK. All tests passed\n"); | ||
| 311 | return 0; | ||
| 312 | } | ||
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h new file mode 100644 index 000000000000..37da54ac85a9 --- /dev/null +++ b/tools/testing/selftests/net/psock_lib.h | |||
| @@ -0,0 +1,127 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2013 Google Inc. | ||
| 3 | * Author: Willem de Bruijn <willemb@google.com> | ||
| 4 | * Daniel Borkmann <dborkman@redhat.com> | ||
| 5 | * | ||
| 6 | * License (GPLv2): | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify it | ||
| 9 | * under the terms and conditions of the GNU General Public License, | ||
| 10 | * version 2, as published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 14 | * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for | ||
| 15 | * more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License along with | ||
| 18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
| 19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 20 | */ | ||
| 21 | |||
| 22 | #ifndef PSOCK_LIB_H | ||
| 23 | #define PSOCK_LIB_H | ||
| 24 | |||
| 25 | #include <sys/types.h> | ||
| 26 | #include <sys/socket.h> | ||
| 27 | #include <string.h> | ||
| 28 | #include <arpa/inet.h> | ||
| 29 | #include <unistd.h> | ||
| 30 | |||
| 31 | #define DATA_LEN 100 | ||
| 32 | #define DATA_CHAR 'a' | ||
| 33 | |||
| 34 | #define PORT_BASE 8000 | ||
| 35 | |||
| 36 | #ifndef __maybe_unused | ||
| 37 | # define __maybe_unused __attribute__ ((__unused__)) | ||
| 38 | #endif | ||
| 39 | |||
| 40 | static __maybe_unused void pair_udp_setfilter(int fd) | ||
| 41 | { | ||
| 42 | struct sock_filter bpf_filter[] = { | ||
| 43 | { 0x80, 0, 0, 0x00000000 }, /* LD pktlen */ | ||
| 44 | { 0x35, 0, 5, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/ | ||
| 45 | { 0x30, 0, 0, 0x00000050 }, /* LD ip[80] */ | ||
| 46 | { 0x15, 0, 3, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/ | ||
| 47 | { 0x30, 0, 0, 0x00000051 }, /* LD ip[81] */ | ||
| 48 | { 0x15, 0, 1, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/ | ||
| 49 | { 0x06, 0, 0, 0x00000060 }, /* RET match */ | ||
| 50 | { 0x06, 0, 0, 0x00000000 }, /* RET no match */ | ||
| 51 | }; | ||
| 52 | struct sock_fprog bpf_prog; | ||
| 53 | |||
| 54 | bpf_prog.filter = bpf_filter; | ||
| 55 | bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); | ||
| 56 | if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog, | ||
| 57 | sizeof(bpf_prog))) { | ||
| 58 | perror("setsockopt SO_ATTACH_FILTER"); | ||
| 59 | exit(1); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | static __maybe_unused void pair_udp_open(int fds[], uint16_t port) | ||
| 64 | { | ||
| 65 | struct sockaddr_in saddr, daddr; | ||
| 66 | |||
| 67 | fds[0] = socket(PF_INET, SOCK_DGRAM, 0); | ||
| 68 | fds[1] = socket(PF_INET, SOCK_DGRAM, 0); | ||
| 69 | if (fds[0] == -1 || fds[1] == -1) { | ||
| 70 | fprintf(stderr, "ERROR: socket dgram\n"); | ||
| 71 | exit(1); | ||
| 72 | } | ||
| 73 | |||
| 74 | memset(&saddr, 0, sizeof(saddr)); | ||
| 75 | saddr.sin_family = AF_INET; | ||
| 76 | saddr.sin_port = htons(port); | ||
| 77 | saddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); | ||
| 78 | |||
| 79 | memset(&daddr, 0, sizeof(daddr)); | ||
| 80 | daddr.sin_family = AF_INET; | ||
| 81 | daddr.sin_port = htons(port + 1); | ||
| 82 | daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); | ||
| 83 | |||
| 84 | /* must bind both to get consistent hash result */ | ||
| 85 | if (bind(fds[1], (void *) &daddr, sizeof(daddr))) { | ||
| 86 | perror("bind"); | ||
| 87 | exit(1); | ||
| 88 | } | ||
| 89 | if (bind(fds[0], (void *) &saddr, sizeof(saddr))) { | ||
| 90 | perror("bind"); | ||
| 91 | exit(1); | ||
| 92 | } | ||
| 93 | if (connect(fds[0], (void *) &daddr, sizeof(daddr))) { | ||
| 94 | perror("connect"); | ||
| 95 | exit(1); | ||
| 96 | } | ||
| 97 | } | ||
| 98 | |||
| 99 | static __maybe_unused void pair_udp_send(int fds[], int num) | ||
| 100 | { | ||
| 101 | char buf[DATA_LEN], rbuf[DATA_LEN]; | ||
| 102 | |||
| 103 | memset(buf, DATA_CHAR, sizeof(buf)); | ||
| 104 | while (num--) { | ||
| 105 | /* Should really handle EINTR and EAGAIN */ | ||
| 106 | if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) { | ||
| 107 | fprintf(stderr, "ERROR: send failed left=%d\n", num); | ||
| 108 | exit(1); | ||
| 109 | } | ||
| 110 | if (read(fds[1], rbuf, sizeof(rbuf)) != sizeof(rbuf)) { | ||
| 111 | fprintf(stderr, "ERROR: recv failed left=%d\n", num); | ||
| 112 | exit(1); | ||
| 113 | } | ||
| 114 | if (memcmp(buf, rbuf, sizeof(buf))) { | ||
| 115 | fprintf(stderr, "ERROR: data failed left=%d\n", num); | ||
| 116 | exit(1); | ||
| 117 | } | ||
| 118 | } | ||
| 119 | } | ||
| 120 | |||
| 121 | static __maybe_unused void pair_udp_close(int fds[]) | ||
| 122 | { | ||
| 123 | close(fds[0]); | ||
| 124 | close(fds[1]); | ||
| 125 | } | ||
| 126 | |||
| 127 | #endif /* PSOCK_LIB_H */ | ||
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c new file mode 100644 index 000000000000..c41b58640a05 --- /dev/null +++ b/tools/testing/selftests/net/psock_tpacket.c | |||
| @@ -0,0 +1,824 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2013 Red Hat, Inc. | ||
| 3 | * Author: Daniel Borkmann <dborkman@redhat.com> | ||
| 4 | * | ||
| 5 | * A basic test of packet socket's TPACKET_V1/TPACKET_V2/TPACKET_V3 behavior. | ||
| 6 | * | ||
| 7 | * Control: | ||
| 8 | * Test the setup of the TPACKET socket with different patterns that are | ||
| 9 | * known to fail (TODO) resp. succeed (OK). | ||
| 10 | * | ||
| 11 | * Datapath: | ||
| 12 | * Open a pair of packet sockets and send resp. receive an a priori known | ||
| 13 | * packet pattern accross the sockets and check if it was received resp. | ||
| 14 | * sent correctly. Fanout in combination with RX_RING is currently not | ||
| 15 | * tested here. | ||
| 16 | * | ||
| 17 | * The test currently runs for | ||
| 18 | * - TPACKET_V1: RX_RING, TX_RING | ||
| 19 | * - TPACKET_V2: RX_RING, TX_RING | ||
| 20 | * - TPACKET_V3: RX_RING | ||
| 21 | * | ||
| 22 | * License (GPLv2): | ||
| 23 | * | ||
| 24 | * This program is free software; you can redistribute it and/or modify it | ||
| 25 | * under the terms and conditions of the GNU General Public License, | ||
| 26 | * version 2, as published by the Free Software Foundation. | ||
| 27 | * | ||
| 28 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 29 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 30 | * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for | ||
| 31 | * more details. | ||
| 32 | * | ||
| 33 | * You should have received a copy of the GNU General Public License along with | ||
| 34 | * this program; if not, write to the Free Software Foundation, Inc., | ||
| 35 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 36 | */ | ||
| 37 | |||
| 38 | #include <stdio.h> | ||
| 39 | #include <stdlib.h> | ||
| 40 | #include <sys/types.h> | ||
| 41 | #include <sys/stat.h> | ||
| 42 | #include <sys/socket.h> | ||
| 43 | #include <sys/mman.h> | ||
| 44 | #include <linux/if_packet.h> | ||
| 45 | #include <linux/filter.h> | ||
| 46 | #include <ctype.h> | ||
| 47 | #include <fcntl.h> | ||
| 48 | #include <unistd.h> | ||
| 49 | #include <bits/wordsize.h> | ||
| 50 | #include <net/ethernet.h> | ||
| 51 | #include <netinet/ip.h> | ||
| 52 | #include <arpa/inet.h> | ||
| 53 | #include <stdint.h> | ||
| 54 | #include <string.h> | ||
| 55 | #include <assert.h> | ||
| 56 | #include <net/if.h> | ||
| 57 | #include <inttypes.h> | ||
| 58 | #include <poll.h> | ||
| 59 | |||
| 60 | #include "psock_lib.h" | ||
| 61 | |||
| 62 | #ifndef bug_on | ||
| 63 | # define bug_on(cond) assert(!(cond)) | ||
| 64 | #endif | ||
| 65 | |||
| 66 | #ifndef __aligned_tpacket | ||
| 67 | # define __aligned_tpacket __attribute__((aligned(TPACKET_ALIGNMENT))) | ||
| 68 | #endif | ||
| 69 | |||
| 70 | #ifndef __align_tpacket | ||
| 71 | # define __align_tpacket(x) __attribute__((aligned(TPACKET_ALIGN(x)))) | ||
| 72 | #endif | ||
| 73 | |||
| 74 | #define BLOCK_STATUS(x) ((x)->h1.block_status) | ||
| 75 | #define BLOCK_NUM_PKTS(x) ((x)->h1.num_pkts) | ||
| 76 | #define BLOCK_O2FP(x) ((x)->h1.offset_to_first_pkt) | ||
| 77 | #define BLOCK_LEN(x) ((x)->h1.blk_len) | ||
| 78 | #define BLOCK_SNUM(x) ((x)->h1.seq_num) | ||
| 79 | #define BLOCK_O2PRIV(x) ((x)->offset_to_priv) | ||
| 80 | #define BLOCK_PRIV(x) ((void *) ((uint8_t *) (x) + BLOCK_O2PRIV(x))) | ||
| 81 | #define BLOCK_HDR_LEN (ALIGN_8(sizeof(struct block_desc))) | ||
| 82 | #define ALIGN_8(x) (((x) + 8 - 1) & ~(8 - 1)) | ||
| 83 | #define BLOCK_PLUS_PRIV(sz_pri) (BLOCK_HDR_LEN + ALIGN_8((sz_pri))) | ||
| 84 | |||
| 85 | #define NUM_PACKETS 100 | ||
| 86 | |||
| 87 | struct ring { | ||
| 88 | struct iovec *rd; | ||
| 89 | uint8_t *mm_space; | ||
| 90 | size_t mm_len, rd_len; | ||
| 91 | struct sockaddr_ll ll; | ||
| 92 | void (*walk)(int sock, struct ring *ring); | ||
| 93 | int type, rd_num, flen, version; | ||
| 94 | union { | ||
| 95 | struct tpacket_req req; | ||
| 96 | struct tpacket_req3 req3; | ||
| 97 | }; | ||
| 98 | }; | ||
| 99 | |||
| 100 | struct block_desc { | ||
| 101 | uint32_t version; | ||
| 102 | uint32_t offset_to_priv; | ||
| 103 | struct tpacket_hdr_v1 h1; | ||
| 104 | }; | ||
| 105 | |||
| 106 | union frame_map { | ||
| 107 | struct { | ||
| 108 | struct tpacket_hdr tp_h __aligned_tpacket; | ||
| 109 | struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket_hdr)); | ||
| 110 | } *v1; | ||
| 111 | struct { | ||
| 112 | struct tpacket2_hdr tp_h __aligned_tpacket; | ||
| 113 | struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr)); | ||
| 114 | } *v2; | ||
| 115 | void *raw; | ||
| 116 | }; | ||
| 117 | |||
| 118 | static unsigned int total_packets, total_bytes; | ||
| 119 | |||
| 120 | static int pfsocket(int ver) | ||
| 121 | { | ||
| 122 | int ret, sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); | ||
| 123 | if (sock == -1) { | ||
| 124 | perror("socket"); | ||
| 125 | exit(1); | ||
| 126 | } | ||
| 127 | |||
| 128 | ret = setsockopt(sock, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver)); | ||
| 129 | if (ret == -1) { | ||
| 130 | perror("setsockopt"); | ||
| 131 | exit(1); | ||
| 132 | } | ||
| 133 | |||
| 134 | return sock; | ||
| 135 | } | ||
| 136 | |||
| 137 | static void status_bar_update(void) | ||
| 138 | { | ||
| 139 | if (total_packets % 10 == 0) { | ||
| 140 | fprintf(stderr, "."); | ||
| 141 | fflush(stderr); | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | static void test_payload(void *pay, size_t len) | ||
| 146 | { | ||
| 147 | struct ethhdr *eth = pay; | ||
| 148 | |||
| 149 | if (len < sizeof(struct ethhdr)) { | ||
| 150 | fprintf(stderr, "test_payload: packet too " | ||
| 151 | "small: %zu bytes!\n", len); | ||
| 152 | exit(1); | ||
| 153 | } | ||
| 154 | |||
| 155 | if (eth->h_proto != htons(ETH_P_IP)) { | ||
| 156 | fprintf(stderr, "test_payload: wrong ethernet " | ||
| 157 | "type: 0x%x!\n", ntohs(eth->h_proto)); | ||
| 158 | exit(1); | ||
| 159 | } | ||
| 160 | } | ||
| 161 | |||
| 162 | static void create_payload(void *pay, size_t *len) | ||
| 163 | { | ||
| 164 | int i; | ||
| 165 | struct ethhdr *eth = pay; | ||
| 166 | struct iphdr *ip = pay + sizeof(*eth); | ||
| 167 | |||
| 168 | /* Lets create some broken crap, that still passes | ||
| 169 | * our BPF filter. | ||
| 170 | */ | ||
| 171 | |||
| 172 | *len = DATA_LEN + 42; | ||
| 173 | |||
| 174 | memset(pay, 0xff, ETH_ALEN * 2); | ||
| 175 | eth->h_proto = htons(ETH_P_IP); | ||
| 176 | |||
| 177 | for (i = 0; i < sizeof(*ip); ++i) | ||
| 178 | ((uint8_t *) pay)[i + sizeof(*eth)] = (uint8_t) rand(); | ||
| 179 | |||
| 180 | ip->ihl = 5; | ||
| 181 | ip->version = 4; | ||
| 182 | ip->protocol = 0x11; | ||
| 183 | ip->frag_off = 0; | ||
| 184 | ip->ttl = 64; | ||
| 185 | ip->tot_len = htons((uint16_t) *len - sizeof(*eth)); | ||
| 186 | |||
| 187 | ip->saddr = htonl(INADDR_LOOPBACK); | ||
| 188 | ip->daddr = htonl(INADDR_LOOPBACK); | ||
| 189 | |||
| 190 | memset(pay + sizeof(*eth) + sizeof(*ip), | ||
| 191 | DATA_CHAR, DATA_LEN); | ||
| 192 | } | ||
| 193 | |||
| 194 | static inline int __v1_rx_kernel_ready(struct tpacket_hdr *hdr) | ||
| 195 | { | ||
| 196 | return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER); | ||
| 197 | } | ||
| 198 | |||
| 199 | static inline void __v1_rx_user_ready(struct tpacket_hdr *hdr) | ||
| 200 | { | ||
| 201 | hdr->tp_status = TP_STATUS_KERNEL; | ||
| 202 | __sync_synchronize(); | ||
| 203 | } | ||
| 204 | |||
| 205 | static inline int __v2_rx_kernel_ready(struct tpacket2_hdr *hdr) | ||
| 206 | { | ||
| 207 | return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER); | ||
| 208 | } | ||
| 209 | |||
| 210 | static inline void __v2_rx_user_ready(struct tpacket2_hdr *hdr) | ||
| 211 | { | ||
| 212 | hdr->tp_status = TP_STATUS_KERNEL; | ||
| 213 | __sync_synchronize(); | ||
| 214 | } | ||
| 215 | |||
| 216 | static inline int __v1_v2_rx_kernel_ready(void *base, int version) | ||
| 217 | { | ||
| 218 | switch (version) { | ||
| 219 | case TPACKET_V1: | ||
| 220 | return __v1_rx_kernel_ready(base); | ||
| 221 | case TPACKET_V2: | ||
| 222 | return __v2_rx_kernel_ready(base); | ||
| 223 | default: | ||
| 224 | bug_on(1); | ||
| 225 | return 0; | ||
| 226 | } | ||
| 227 | } | ||
| 228 | |||
| 229 | static inline void __v1_v2_rx_user_ready(void *base, int version) | ||
| 230 | { | ||
| 231 | switch (version) { | ||
| 232 | case TPACKET_V1: | ||
| 233 | __v1_rx_user_ready(base); | ||
| 234 | break; | ||
| 235 | case TPACKET_V2: | ||
| 236 | __v2_rx_user_ready(base); | ||
| 237 | break; | ||
| 238 | } | ||
| 239 | } | ||
| 240 | |||
| 241 | static void walk_v1_v2_rx(int sock, struct ring *ring) | ||
| 242 | { | ||
| 243 | struct pollfd pfd; | ||
| 244 | int udp_sock[2]; | ||
| 245 | union frame_map ppd; | ||
| 246 | unsigned int frame_num = 0; | ||
| 247 | |||
| 248 | bug_on(ring->type != PACKET_RX_RING); | ||
| 249 | |||
| 250 | pair_udp_open(udp_sock, PORT_BASE); | ||
| 251 | pair_udp_setfilter(sock); | ||
| 252 | |||
| 253 | memset(&pfd, 0, sizeof(pfd)); | ||
| 254 | pfd.fd = sock; | ||
| 255 | pfd.events = POLLIN | POLLERR; | ||
| 256 | pfd.revents = 0; | ||
| 257 | |||
| 258 | pair_udp_send(udp_sock, NUM_PACKETS); | ||
| 259 | |||
| 260 | while (total_packets < NUM_PACKETS * 2) { | ||
| 261 | while (__v1_v2_rx_kernel_ready(ring->rd[frame_num].iov_base, | ||
| 262 | ring->version)) { | ||
| 263 | ppd.raw = ring->rd[frame_num].iov_base; | ||
| 264 | |||
| 265 | switch (ring->version) { | ||
| 266 | case TPACKET_V1: | ||
| 267 | test_payload((uint8_t *) ppd.raw + ppd.v1->tp_h.tp_mac, | ||
| 268 | ppd.v1->tp_h.tp_snaplen); | ||
| 269 | total_bytes += ppd.v1->tp_h.tp_snaplen; | ||
| 270 | break; | ||
| 271 | |||
| 272 | case TPACKET_V2: | ||
| 273 | test_payload((uint8_t *) ppd.raw + ppd.v2->tp_h.tp_mac, | ||
| 274 | ppd.v2->tp_h.tp_snaplen); | ||
| 275 | total_bytes += ppd.v2->tp_h.tp_snaplen; | ||
| 276 | break; | ||
| 277 | } | ||
| 278 | |||
| 279 | status_bar_update(); | ||
| 280 | total_packets++; | ||
| 281 | |||
| 282 | __v1_v2_rx_user_ready(ppd.raw, ring->version); | ||
| 283 | |||
| 284 | frame_num = (frame_num + 1) % ring->rd_num; | ||
| 285 | } | ||
| 286 | |||
| 287 | poll(&pfd, 1, 1); | ||
| 288 | } | ||
| 289 | |||
| 290 | pair_udp_close(udp_sock); | ||
| 291 | |||
| 292 | if (total_packets != 2 * NUM_PACKETS) { | ||
| 293 | fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n", | ||
| 294 | ring->version, total_packets, NUM_PACKETS); | ||
| 295 | exit(1); | ||
| 296 | } | ||
| 297 | |||
| 298 | fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1); | ||
| 299 | } | ||
| 300 | |||
| 301 | static inline int __v1_tx_kernel_ready(struct tpacket_hdr *hdr) | ||
| 302 | { | ||
| 303 | return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)); | ||
| 304 | } | ||
| 305 | |||
| 306 | static inline void __v1_tx_user_ready(struct tpacket_hdr *hdr) | ||
| 307 | { | ||
| 308 | hdr->tp_status = TP_STATUS_SEND_REQUEST; | ||
| 309 | __sync_synchronize(); | ||
| 310 | } | ||
| 311 | |||
| 312 | static inline int __v2_tx_kernel_ready(struct tpacket2_hdr *hdr) | ||
| 313 | { | ||
| 314 | return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)); | ||
| 315 | } | ||
| 316 | |||
| 317 | static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr) | ||
| 318 | { | ||
| 319 | hdr->tp_status = TP_STATUS_SEND_REQUEST; | ||
| 320 | __sync_synchronize(); | ||
| 321 | } | ||
| 322 | |||
| 323 | static inline int __v1_v2_tx_kernel_ready(void *base, int version) | ||
| 324 | { | ||
| 325 | switch (version) { | ||
| 326 | case TPACKET_V1: | ||
| 327 | return __v1_tx_kernel_ready(base); | ||
| 328 | case TPACKET_V2: | ||
| 329 | return __v2_tx_kernel_ready(base); | ||
| 330 | default: | ||
| 331 | bug_on(1); | ||
| 332 | return 0; | ||
| 333 | } | ||
| 334 | } | ||
| 335 | |||
| 336 | static inline void __v1_v2_tx_user_ready(void *base, int version) | ||
| 337 | { | ||
| 338 | switch (version) { | ||
| 339 | case TPACKET_V1: | ||
| 340 | __v1_tx_user_ready(base); | ||
| 341 | break; | ||
| 342 | case TPACKET_V2: | ||
| 343 | __v2_tx_user_ready(base); | ||
| 344 | break; | ||
| 345 | } | ||
| 346 | } | ||
| 347 | |||
| 348 | static void __v1_v2_set_packet_loss_discard(int sock) | ||
| 349 | { | ||
| 350 | int ret, discard = 1; | ||
| 351 | |||
| 352 | ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *) &discard, | ||
| 353 | sizeof(discard)); | ||
| 354 | if (ret == -1) { | ||
| 355 | perror("setsockopt"); | ||
| 356 | exit(1); | ||
| 357 | } | ||
| 358 | } | ||
| 359 | |||
| 360 | static void walk_v1_v2_tx(int sock, struct ring *ring) | ||
| 361 | { | ||
| 362 | struct pollfd pfd; | ||
| 363 | int rcv_sock, ret; | ||
| 364 | size_t packet_len; | ||
| 365 | union frame_map ppd; | ||
| 366 | char packet[1024]; | ||
| 367 | unsigned int frame_num = 0, got = 0; | ||
| 368 | struct sockaddr_ll ll = { | ||
| 369 | .sll_family = PF_PACKET, | ||
| 370 | .sll_halen = ETH_ALEN, | ||
| 371 | }; | ||
| 372 | |||
| 373 | bug_on(ring->type != PACKET_TX_RING); | ||
| 374 | bug_on(ring->rd_num < NUM_PACKETS); | ||
| 375 | |||
| 376 | rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); | ||
| 377 | if (rcv_sock == -1) { | ||
| 378 | perror("socket"); | ||
| 379 | exit(1); | ||
| 380 | } | ||
| 381 | |||
| 382 | pair_udp_setfilter(rcv_sock); | ||
| 383 | |||
| 384 | ll.sll_ifindex = if_nametoindex("lo"); | ||
| 385 | ret = bind(rcv_sock, (struct sockaddr *) &ll, sizeof(ll)); | ||
| 386 | if (ret == -1) { | ||
| 387 | perror("bind"); | ||
| 388 | exit(1); | ||
| 389 | } | ||
| 390 | |||
| 391 | memset(&pfd, 0, sizeof(pfd)); | ||
| 392 | pfd.fd = sock; | ||
| 393 | pfd.events = POLLOUT | POLLERR; | ||
| 394 | pfd.revents = 0; | ||
| 395 | |||
| 396 | total_packets = NUM_PACKETS; | ||
| 397 | create_payload(packet, &packet_len); | ||
| 398 | |||
| 399 | while (total_packets > 0) { | ||
| 400 | while (__v1_v2_tx_kernel_ready(ring->rd[frame_num].iov_base, | ||
| 401 | ring->version) && | ||
| 402 | total_packets > 0) { | ||
| 403 | ppd.raw = ring->rd[frame_num].iov_base; | ||
| 404 | |||
| 405 | switch (ring->version) { | ||
| 406 | case TPACKET_V1: | ||
| 407 | ppd.v1->tp_h.tp_snaplen = packet_len; | ||
| 408 | ppd.v1->tp_h.tp_len = packet_len; | ||
| 409 | |||
| 410 | memcpy((uint8_t *) ppd.raw + TPACKET_HDRLEN - | ||
| 411 | sizeof(struct sockaddr_ll), packet, | ||
| 412 | packet_len); | ||
| 413 | total_bytes += ppd.v1->tp_h.tp_snaplen; | ||
| 414 | break; | ||
| 415 | |||
| 416 | case TPACKET_V2: | ||
| 417 | ppd.v2->tp_h.tp_snaplen = packet_len; | ||
| 418 | ppd.v2->tp_h.tp_len = packet_len; | ||
| 419 | |||
| 420 | memcpy((uint8_t *) ppd.raw + TPACKET2_HDRLEN - | ||
| 421 | sizeof(struct sockaddr_ll), packet, | ||
| 422 | packet_len); | ||
| 423 | total_bytes += ppd.v2->tp_h.tp_snaplen; | ||
| 424 | break; | ||
| 425 | } | ||
| 426 | |||
| 427 | status_bar_update(); | ||
| 428 | total_packets--; | ||
| 429 | |||
| 430 | __v1_v2_tx_user_ready(ppd.raw, ring->version); | ||
| 431 | |||
| 432 | frame_num = (frame_num + 1) % ring->rd_num; | ||
| 433 | } | ||
| 434 | |||
| 435 | poll(&pfd, 1, 1); | ||
| 436 | } | ||
| 437 | |||
| 438 | bug_on(total_packets != 0); | ||
| 439 | |||
| 440 | ret = sendto(sock, NULL, 0, 0, NULL, 0); | ||
| 441 | if (ret == -1) { | ||
| 442 | perror("sendto"); | ||
| 443 | exit(1); | ||
| 444 | } | ||
| 445 | |||
| 446 | while ((ret = recvfrom(rcv_sock, packet, sizeof(packet), | ||
| 447 | 0, NULL, NULL)) > 0 && | ||
| 448 | total_packets < NUM_PACKETS) { | ||
| 449 | got += ret; | ||
| 450 | test_payload(packet, ret); | ||
| 451 | |||
| 452 | status_bar_update(); | ||
| 453 | total_packets++; | ||
| 454 | } | ||
| 455 | |||
| 456 | close(rcv_sock); | ||
| 457 | |||
| 458 | if (total_packets != NUM_PACKETS) { | ||
| 459 | fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n", | ||
| 460 | ring->version, total_packets, NUM_PACKETS); | ||
| 461 | exit(1); | ||
| 462 | } | ||
| 463 | |||
| 464 | fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, got); | ||
| 465 | } | ||
| 466 | |||
| 467 | static void walk_v1_v2(int sock, struct ring *ring) | ||
| 468 | { | ||
| 469 | if (ring->type == PACKET_RX_RING) | ||
| 470 | walk_v1_v2_rx(sock, ring); | ||
| 471 | else | ||
| 472 | walk_v1_v2_tx(sock, ring); | ||
| 473 | } | ||
| 474 | |||
| 475 | static uint64_t __v3_prev_block_seq_num = 0; | ||
| 476 | |||
| 477 | void __v3_test_block_seq_num(struct block_desc *pbd) | ||
| 478 | { | ||
| 479 | if (__v3_prev_block_seq_num + 1 != BLOCK_SNUM(pbd)) { | ||
| 480 | fprintf(stderr, "\nprev_block_seq_num:%"PRIu64", expected " | ||
| 481 | "seq:%"PRIu64" != actual seq:%"PRIu64"\n", | ||
| 482 | __v3_prev_block_seq_num, __v3_prev_block_seq_num + 1, | ||
| 483 | (uint64_t) BLOCK_SNUM(pbd)); | ||
| 484 | exit(1); | ||
| 485 | } | ||
| 486 | |||
| 487 | __v3_prev_block_seq_num = BLOCK_SNUM(pbd); | ||
| 488 | } | ||
| 489 | |||
| 490 | static void __v3_test_block_len(struct block_desc *pbd, uint32_t bytes, int block_num) | ||
| 491 | { | ||
| 492 | if (BLOCK_NUM_PKTS(pbd)) { | ||
| 493 | if (bytes != BLOCK_LEN(pbd)) { | ||
| 494 | fprintf(stderr, "\nblock:%u with %upackets, expected " | ||
| 495 | "len:%u != actual len:%u\n", block_num, | ||
| 496 | BLOCK_NUM_PKTS(pbd), bytes, BLOCK_LEN(pbd)); | ||
| 497 | exit(1); | ||
| 498 | } | ||
| 499 | } else { | ||
| 500 | if (BLOCK_LEN(pbd) != BLOCK_PLUS_PRIV(13)) { | ||
| 501 | fprintf(stderr, "\nblock:%u, expected len:%lu != " | ||
| 502 | "actual len:%u\n", block_num, BLOCK_HDR_LEN, | ||
| 503 | BLOCK_LEN(pbd)); | ||
| 504 | exit(1); | ||
| 505 | } | ||
| 506 | } | ||
| 507 | } | ||
| 508 | |||
| 509 | static void __v3_test_block_header(struct block_desc *pbd, const int block_num) | ||
| 510 | { | ||
| 511 | uint32_t block_status = BLOCK_STATUS(pbd); | ||
| 512 | |||
| 513 | if ((block_status & TP_STATUS_USER) == 0) { | ||
| 514 | fprintf(stderr, "\nblock %u: not in TP_STATUS_USER\n", block_num); | ||
| 515 | exit(1); | ||
| 516 | } | ||
| 517 | |||
| 518 | __v3_test_block_seq_num(pbd); | ||
| 519 | } | ||
| 520 | |||
| 521 | static void __v3_walk_block(struct block_desc *pbd, const int block_num) | ||
| 522 | { | ||
| 523 | int num_pkts = BLOCK_NUM_PKTS(pbd), i; | ||
| 524 | unsigned long bytes = 0; | ||
| 525 | unsigned long bytes_with_padding = BLOCK_PLUS_PRIV(13); | ||
| 526 | struct tpacket3_hdr *ppd; | ||
| 527 | |||
| 528 | __v3_test_block_header(pbd, block_num); | ||
| 529 | |||
| 530 | ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd + BLOCK_O2FP(pbd)); | ||
| 531 | for (i = 0; i < num_pkts; ++i) { | ||
| 532 | bytes += ppd->tp_snaplen; | ||
| 533 | |||
| 534 | if (ppd->tp_next_offset) | ||
| 535 | bytes_with_padding += ppd->tp_next_offset; | ||
| 536 | else | ||
| 537 | bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac); | ||
| 538 | |||
| 539 | test_payload((uint8_t *) ppd + ppd->tp_mac, ppd->tp_snaplen); | ||
| 540 | |||
| 541 | status_bar_update(); | ||
| 542 | total_packets++; | ||
| 543 | |||
| 544 | ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset); | ||
| 545 | __sync_synchronize(); | ||
| 546 | } | ||
| 547 | |||
| 548 | __v3_test_block_len(pbd, bytes_with_padding, block_num); | ||
| 549 | total_bytes += bytes; | ||
| 550 | } | ||
| 551 | |||
| 552 | void __v3_flush_block(struct block_desc *pbd) | ||
| 553 | { | ||
| 554 | BLOCK_STATUS(pbd) = TP_STATUS_KERNEL; | ||
| 555 | __sync_synchronize(); | ||
| 556 | } | ||
| 557 | |||
| 558 | static void walk_v3_rx(int sock, struct ring *ring) | ||
| 559 | { | ||
| 560 | unsigned int block_num = 0; | ||
| 561 | struct pollfd pfd; | ||
| 562 | struct block_desc *pbd; | ||
| 563 | int udp_sock[2]; | ||
| 564 | |||
| 565 | bug_on(ring->type != PACKET_RX_RING); | ||
| 566 | |||
| 567 | pair_udp_open(udp_sock, PORT_BASE); | ||
| 568 | pair_udp_setfilter(sock); | ||
| 569 | |||
| 570 | memset(&pfd, 0, sizeof(pfd)); | ||
| 571 | pfd.fd = sock; | ||
| 572 | pfd.events = POLLIN | POLLERR; | ||
| 573 | pfd.revents = 0; | ||
| 574 | |||
| 575 | pair_udp_send(udp_sock, NUM_PACKETS); | ||
| 576 | |||
| 577 | while (total_packets < NUM_PACKETS * 2) { | ||
| 578 | pbd = (struct block_desc *) ring->rd[block_num].iov_base; | ||
| 579 | |||
| 580 | while ((BLOCK_STATUS(pbd) & TP_STATUS_USER) == 0) | ||
| 581 | poll(&pfd, 1, 1); | ||
| 582 | |||
| 583 | __v3_walk_block(pbd, block_num); | ||
| 584 | __v3_flush_block(pbd); | ||
| 585 | |||
| 586 | block_num = (block_num + 1) % ring->rd_num; | ||
| 587 | } | ||
| 588 | |||
| 589 | pair_udp_close(udp_sock); | ||
| 590 | |||
| 591 | if (total_packets != 2 * NUM_PACKETS) { | ||
| 592 | fprintf(stderr, "walk_v3_rx: received %u out of %u pkts\n", | ||
| 593 | total_packets, NUM_PACKETS); | ||
| 594 | exit(1); | ||
| 595 | } | ||
| 596 | |||
| 597 | fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1); | ||
| 598 | } | ||
| 599 | |||
| 600 | static void walk_v3(int sock, struct ring *ring) | ||
| 601 | { | ||
| 602 | if (ring->type == PACKET_RX_RING) | ||
| 603 | walk_v3_rx(sock, ring); | ||
| 604 | else | ||
| 605 | bug_on(1); | ||
| 606 | } | ||
| 607 | |||
| 608 | static void __v1_v2_fill(struct ring *ring, unsigned int blocks) | ||
| 609 | { | ||
| 610 | ring->req.tp_block_size = getpagesize() << 2; | ||
| 611 | ring->req.tp_frame_size = TPACKET_ALIGNMENT << 7; | ||
| 612 | ring->req.tp_block_nr = blocks; | ||
| 613 | |||
| 614 | ring->req.tp_frame_nr = ring->req.tp_block_size / | ||
| 615 | ring->req.tp_frame_size * | ||
| 616 | ring->req.tp_block_nr; | ||
| 617 | |||
| 618 | ring->mm_len = ring->req.tp_block_size * ring->req.tp_block_nr; | ||
| 619 | ring->walk = walk_v1_v2; | ||
| 620 | ring->rd_num = ring->req.tp_frame_nr; | ||
| 621 | ring->flen = ring->req.tp_frame_size; | ||
| 622 | } | ||
| 623 | |||
| 624 | static void __v3_fill(struct ring *ring, unsigned int blocks) | ||
| 625 | { | ||
| 626 | ring->req3.tp_retire_blk_tov = 64; | ||
| 627 | ring->req3.tp_sizeof_priv = 13; | ||
| 628 | ring->req3.tp_feature_req_word |= TP_FT_REQ_FILL_RXHASH; | ||
| 629 | |||
| 630 | ring->req3.tp_block_size = getpagesize() << 2; | ||
| 631 | ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7; | ||
| 632 | ring->req3.tp_block_nr = blocks; | ||
| 633 | |||
| 634 | ring->req3.tp_frame_nr = ring->req3.tp_block_size / | ||
| 635 | ring->req3.tp_frame_size * | ||
| 636 | ring->req3.tp_block_nr; | ||
| 637 | |||
| 638 | ring->mm_len = ring->req3.tp_block_size * ring->req3.tp_block_nr; | ||
| 639 | ring->walk = walk_v3; | ||
| 640 | ring->rd_num = ring->req3.tp_block_nr; | ||
| 641 | ring->flen = ring->req3.tp_block_size; | ||
| 642 | } | ||
| 643 | |||
| 644 | static void setup_ring(int sock, struct ring *ring, int version, int type) | ||
| 645 | { | ||
| 646 | int ret = 0; | ||
| 647 | unsigned int blocks = 256; | ||
| 648 | |||
| 649 | ring->type = type; | ||
| 650 | ring->version = version; | ||
| 651 | |||
| 652 | switch (version) { | ||
| 653 | case TPACKET_V1: | ||
| 654 | case TPACKET_V2: | ||
| 655 | if (type == PACKET_TX_RING) | ||
| 656 | __v1_v2_set_packet_loss_discard(sock); | ||
| 657 | __v1_v2_fill(ring, blocks); | ||
| 658 | ret = setsockopt(sock, SOL_PACKET, type, &ring->req, | ||
| 659 | sizeof(ring->req)); | ||
| 660 | break; | ||
| 661 | |||
| 662 | case TPACKET_V3: | ||
| 663 | __v3_fill(ring, blocks); | ||
| 664 | ret = setsockopt(sock, SOL_PACKET, type, &ring->req3, | ||
| 665 | sizeof(ring->req3)); | ||
| 666 | break; | ||
| 667 | } | ||
| 668 | |||
| 669 | if (ret == -1) { | ||
| 670 | perror("setsockopt"); | ||
| 671 | exit(1); | ||
| 672 | } | ||
| 673 | |||
| 674 | ring->rd_len = ring->rd_num * sizeof(*ring->rd); | ||
| 675 | ring->rd = malloc(ring->rd_len); | ||
| 676 | if (ring->rd == NULL) { | ||
| 677 | perror("malloc"); | ||
| 678 | exit(1); | ||
| 679 | } | ||
| 680 | |||
| 681 | total_packets = 0; | ||
| 682 | total_bytes = 0; | ||
| 683 | } | ||
| 684 | |||
| 685 | static void mmap_ring(int sock, struct ring *ring) | ||
| 686 | { | ||
| 687 | int i; | ||
| 688 | |||
| 689 | ring->mm_space = mmap(0, ring->mm_len, PROT_READ | PROT_WRITE, | ||
| 690 | MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0); | ||
| 691 | if (ring->mm_space == MAP_FAILED) { | ||
| 692 | perror("mmap"); | ||
| 693 | exit(1); | ||
| 694 | } | ||
| 695 | |||
| 696 | memset(ring->rd, 0, ring->rd_len); | ||
| 697 | for (i = 0; i < ring->rd_num; ++i) { | ||
| 698 | ring->rd[i].iov_base = ring->mm_space + (i * ring->flen); | ||
| 699 | ring->rd[i].iov_len = ring->flen; | ||
| 700 | } | ||
| 701 | } | ||
| 702 | |||
| 703 | static void bind_ring(int sock, struct ring *ring) | ||
| 704 | { | ||
| 705 | int ret; | ||
| 706 | |||
| 707 | ring->ll.sll_family = PF_PACKET; | ||
| 708 | ring->ll.sll_protocol = htons(ETH_P_ALL); | ||
| 709 | ring->ll.sll_ifindex = if_nametoindex("lo"); | ||
| 710 | ring->ll.sll_hatype = 0; | ||
| 711 | ring->ll.sll_pkttype = 0; | ||
| 712 | ring->ll.sll_halen = 0; | ||
| 713 | |||
| 714 | ret = bind(sock, (struct sockaddr *) &ring->ll, sizeof(ring->ll)); | ||
| 715 | if (ret == -1) { | ||
| 716 | perror("bind"); | ||
| 717 | exit(1); | ||
| 718 | } | ||
| 719 | } | ||
| 720 | |||
| 721 | static void walk_ring(int sock, struct ring *ring) | ||
| 722 | { | ||
| 723 | ring->walk(sock, ring); | ||
| 724 | } | ||
| 725 | |||
| 726 | static void unmap_ring(int sock, struct ring *ring) | ||
| 727 | { | ||
| 728 | munmap(ring->mm_space, ring->mm_len); | ||
| 729 | free(ring->rd); | ||
| 730 | } | ||
| 731 | |||
| 732 | static int test_kernel_bit_width(void) | ||
| 733 | { | ||
| 734 | char in[512], *ptr; | ||
| 735 | int num = 0, fd; | ||
| 736 | ssize_t ret; | ||
| 737 | |||
| 738 | fd = open("/proc/kallsyms", O_RDONLY); | ||
| 739 | if (fd == -1) { | ||
| 740 | perror("open"); | ||
| 741 | exit(1); | ||
| 742 | } | ||
| 743 | |||
| 744 | ret = read(fd, in, sizeof(in)); | ||
| 745 | if (ret <= 0) { | ||
| 746 | perror("read"); | ||
| 747 | exit(1); | ||
| 748 | } | ||
| 749 | |||
| 750 | close(fd); | ||
| 751 | |||
| 752 | ptr = in; | ||
| 753 | while(!isspace(*ptr)) { | ||
| 754 | num++; | ||
| 755 | ptr++; | ||
| 756 | } | ||
| 757 | |||
| 758 | return num * 4; | ||
| 759 | } | ||
| 760 | |||
| 761 | static int test_user_bit_width(void) | ||
| 762 | { | ||
| 763 | return __WORDSIZE; | ||
| 764 | } | ||
| 765 | |||
| 766 | static const char *tpacket_str[] = { | ||
| 767 | [TPACKET_V1] = "TPACKET_V1", | ||
| 768 | [TPACKET_V2] = "TPACKET_V2", | ||
| 769 | [TPACKET_V3] = "TPACKET_V3", | ||
| 770 | }; | ||
| 771 | |||
| 772 | static const char *type_str[] = { | ||
| 773 | [PACKET_RX_RING] = "PACKET_RX_RING", | ||
| 774 | [PACKET_TX_RING] = "PACKET_TX_RING", | ||
| 775 | }; | ||
| 776 | |||
| 777 | static int test_tpacket(int version, int type) | ||
| 778 | { | ||
| 779 | int sock; | ||
| 780 | struct ring ring; | ||
| 781 | |||
| 782 | fprintf(stderr, "test: %s with %s ", tpacket_str[version], | ||
| 783 | type_str[type]); | ||
| 784 | fflush(stderr); | ||
| 785 | |||
| 786 | if (version == TPACKET_V1 && | ||
| 787 | test_kernel_bit_width() != test_user_bit_width()) { | ||
| 788 | fprintf(stderr, "test: skip %s %s since user and kernel " | ||
| 789 | "space have different bit width\n", | ||
| 790 | tpacket_str[version], type_str[type]); | ||
| 791 | return 0; | ||
| 792 | } | ||
| 793 | |||
| 794 | sock = pfsocket(version); | ||
| 795 | memset(&ring, 0, sizeof(ring)); | ||
| 796 | setup_ring(sock, &ring, version, type); | ||
| 797 | mmap_ring(sock, &ring); | ||
| 798 | bind_ring(sock, &ring); | ||
| 799 | walk_ring(sock, &ring); | ||
| 800 | unmap_ring(sock, &ring); | ||
| 801 | close(sock); | ||
| 802 | |||
| 803 | fprintf(stderr, "\n"); | ||
| 804 | return 0; | ||
| 805 | } | ||
| 806 | |||
| 807 | int main(void) | ||
| 808 | { | ||
| 809 | int ret = 0; | ||
| 810 | |||
| 811 | ret |= test_tpacket(TPACKET_V1, PACKET_RX_RING); | ||
| 812 | ret |= test_tpacket(TPACKET_V1, PACKET_TX_RING); | ||
| 813 | |||
| 814 | ret |= test_tpacket(TPACKET_V2, PACKET_RX_RING); | ||
| 815 | ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING); | ||
| 816 | |||
| 817 | ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING); | ||
| 818 | |||
| 819 | if (ret) | ||
| 820 | return 1; | ||
| 821 | |||
| 822 | printf("OK. All tests passed\n"); | ||
| 823 | return 0; | ||
| 824 | } | ||
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests new file mode 100644 index 000000000000..5246e782d6e8 --- /dev/null +++ b/tools/testing/selftests/net/run_afpackettests | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | #!/bin/sh | ||
| 2 | |||
| 3 | if [ $(id -u) != 0 ]; then | ||
| 4 | echo $msg must be run as root >&2 | ||
| 5 | exit 0 | ||
| 6 | fi | ||
| 7 | |||
| 8 | echo "--------------------" | ||
| 9 | echo "running psock_fanout test" | ||
| 10 | echo "--------------------" | ||
| 11 | ./psock_fanout | ||
| 12 | if [ $? -ne 0 ]; then | ||
| 13 | echo "[FAIL]" | ||
| 14 | else | ||
| 15 | echo "[PASS]" | ||
| 16 | fi | ||
| 17 | |||
| 18 | echo "--------------------" | ||
| 19 | echo "running psock_tpacket test" | ||
| 20 | echo "--------------------" | ||
| 21 | ./psock_tpacket | ||
| 22 | if [ $? -ne 0 ]; then | ||
| 23 | echo "[FAIL]" | ||
| 24 | else | ||
| 25 | echo "[PASS]" | ||
| 26 | fi | ||
diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests new file mode 100644 index 000000000000..c09a682df56a --- /dev/null +++ b/tools/testing/selftests/net/run_netsocktests | |||
| @@ -0,0 +1,12 @@ | |||
| 1 | #!/bin/bash | ||
| 2 | |||
| 3 | echo "--------------------" | ||
| 4 | echo "running socket test" | ||
| 5 | echo "--------------------" | ||
| 6 | ./socket | ||
| 7 | if [ $? -ne 0 ]; then | ||
| 8 | echo "[FAIL]" | ||
| 9 | else | ||
| 10 | echo "[PASS]" | ||
| 11 | fi | ||
| 12 | |||
diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c new file mode 100644 index 000000000000..0f227f2f9be9 --- /dev/null +++ b/tools/testing/selftests/net/socket.c | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | #include <stdio.h> | ||
| 2 | #include <errno.h> | ||
| 3 | #include <unistd.h> | ||
| 4 | #include <string.h> | ||
| 5 | #include <sys/types.h> | ||
| 6 | #include <sys/socket.h> | ||
| 7 | #include <netinet/in.h> | ||
| 8 | |||
| 9 | struct socket_testcase { | ||
| 10 | int domain; | ||
| 11 | int type; | ||
| 12 | int protocol; | ||
| 13 | |||
| 14 | /* 0 = valid file descriptor | ||
| 15 | * -foo = error foo | ||
| 16 | */ | ||
| 17 | int expect; | ||
| 18 | |||
| 19 | /* If non-zero, accept EAFNOSUPPORT to handle the case | ||
| 20 | * of the protocol not being configured into the kernel. | ||
| 21 | */ | ||
| 22 | int nosupport_ok; | ||
| 23 | }; | ||
| 24 | |||
| 25 | static struct socket_testcase tests[] = { | ||
| 26 | { AF_MAX, 0, 0, -EAFNOSUPPORT, 0 }, | ||
| 27 | { AF_INET, SOCK_STREAM, IPPROTO_TCP, 0, 1 }, | ||
| 28 | { AF_INET, SOCK_DGRAM, IPPROTO_TCP, -EPROTONOSUPPORT, 1 }, | ||
| 29 | { AF_INET, SOCK_DGRAM, IPPROTO_UDP, 0, 1 }, | ||
| 30 | { AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1 }, | ||
| 31 | }; | ||
| 32 | |||
| 33 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) | ||
| 34 | #define ERR_STRING_SZ 64 | ||
| 35 | |||
| 36 | static int run_tests(void) | ||
| 37 | { | ||
| 38 | char err_string1[ERR_STRING_SZ]; | ||
| 39 | char err_string2[ERR_STRING_SZ]; | ||
| 40 | int i, err; | ||
| 41 | |||
| 42 | err = 0; | ||
| 43 | for (i = 0; i < ARRAY_SIZE(tests); i++) { | ||
| 44 | struct socket_testcase *s = &tests[i]; | ||
| 45 | int fd; | ||
| 46 | |||
| 47 | fd = socket(s->domain, s->type, s->protocol); | ||
| 48 | if (fd < 0) { | ||
| 49 | if (s->nosupport_ok && | ||
| 50 | errno == EAFNOSUPPORT) | ||
| 51 | continue; | ||
| 52 | |||
| 53 | if (s->expect < 0 && | ||
| 54 | errno == -s->expect) | ||
| 55 | continue; | ||
| 56 | |||
| 57 | strerror_r(-s->expect, err_string1, ERR_STRING_SZ); | ||
| 58 | strerror_r(errno, err_string2, ERR_STRING_SZ); | ||
| 59 | |||
| 60 | fprintf(stderr, "socket(%d, %d, %d) expected " | ||
| 61 | "err (%s) got (%s)\n", | ||
| 62 | s->domain, s->type, s->protocol, | ||
| 63 | err_string1, err_string2); | ||
| 64 | |||
| 65 | err = -1; | ||
| 66 | break; | ||
| 67 | } else { | ||
| 68 | close(fd); | ||
| 69 | |||
| 70 | if (s->expect < 0) { | ||
| 71 | strerror_r(errno, err_string1, ERR_STRING_SZ); | ||
| 72 | |||
| 73 | fprintf(stderr, "socket(%d, %d, %d) expected " | ||
| 74 | "success got err (%s)\n", | ||
| 75 | s->domain, s->type, s->protocol, | ||
| 76 | err_string1); | ||
| 77 | |||
| 78 | err = -1; | ||
| 79 | break; | ||
| 80 | } | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | return err; | ||
| 85 | } | ||
| 86 | |||
| 87 | int main(void) | ||
| 88 | { | ||
| 89 | int err = run_tests(); | ||
| 90 | |||
| 91 | return err; | ||
| 92 | } | ||
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile new file mode 100644 index 000000000000..47ae2d385ce8 --- /dev/null +++ b/tools/testing/selftests/ptrace/Makefile | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | CFLAGS += -iquote../../../../include/uapi -Wall | ||
| 2 | peeksiginfo: peeksiginfo.c | ||
| 3 | |||
| 4 | all: peeksiginfo | ||
| 5 | |||
| 6 | clean: | ||
| 7 | rm -f peeksiginfo | ||
| 8 | |||
| 9 | run_tests: all | ||
| 10 | @./peeksiginfo || echo "peeksiginfo selftests: [FAIL]" | ||
diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c new file mode 100644 index 000000000000..d46558b1f58d --- /dev/null +++ b/tools/testing/selftests/ptrace/peeksiginfo.c | |||
| @@ -0,0 +1,214 @@ | |||
| 1 | #define _GNU_SOURCE | ||
| 2 | #include <stdio.h> | ||
| 3 | #include <signal.h> | ||
| 4 | #include <unistd.h> | ||
| 5 | #include <errno.h> | ||
| 6 | #include <linux/types.h> | ||
| 7 | #include <sys/wait.h> | ||
| 8 | #include <sys/syscall.h> | ||
| 9 | #include <sys/user.h> | ||
| 10 | #include <sys/mman.h> | ||
| 11 | |||
| 12 | #include "linux/ptrace.h" | ||
| 13 | |||
| 14 | static int sys_rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *uinfo) | ||
| 15 | { | ||
| 16 | return syscall(SYS_rt_sigqueueinfo, tgid, sig, uinfo); | ||
| 17 | } | ||
| 18 | |||
| 19 | static int sys_rt_tgsigqueueinfo(pid_t tgid, pid_t tid, | ||
| 20 | int sig, siginfo_t *uinfo) | ||
| 21 | { | ||
| 22 | return syscall(SYS_rt_tgsigqueueinfo, tgid, tid, sig, uinfo); | ||
| 23 | } | ||
| 24 | |||
| 25 | static int sys_ptrace(int request, pid_t pid, void *addr, void *data) | ||
| 26 | { | ||
| 27 | return syscall(SYS_ptrace, request, pid, addr, data); | ||
| 28 | } | ||
| 29 | |||
| 30 | #define SIGNR 10 | ||
| 31 | #define TEST_SICODE_PRIV -1 | ||
| 32 | #define TEST_SICODE_SHARE -2 | ||
| 33 | |||
| 34 | #define err(fmt, ...) \ | ||
| 35 | fprintf(stderr, \ | ||
| 36 | "Error (%s:%d): " fmt, \ | ||
| 37 | __FILE__, __LINE__, ##__VA_ARGS__) | ||
| 38 | |||
| 39 | static int check_error_paths(pid_t child) | ||
| 40 | { | ||
| 41 | struct ptrace_peeksiginfo_args arg; | ||
| 42 | int ret, exit_code = -1; | ||
| 43 | void *addr_rw, *addr_ro; | ||
| 44 | |||
| 45 | /* | ||
| 46 | * Allocate two contiguous pages. The first one is for read-write, | ||
| 47 | * another is for read-only. | ||
| 48 | */ | ||
| 49 | addr_rw = mmap(NULL, 2 * PAGE_SIZE, PROT_READ | PROT_WRITE, | ||
| 50 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); | ||
| 51 | if (addr_rw == MAP_FAILED) { | ||
| 52 | err("mmap() failed: %m\n"); | ||
| 53 | return 1; | ||
| 54 | } | ||
| 55 | |||
| 56 | addr_ro = mmap(addr_rw + PAGE_SIZE, PAGE_SIZE, PROT_READ, | ||
| 57 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); | ||
| 58 | if (addr_ro == MAP_FAILED) { | ||
| 59 | err("mmap() failed: %m\n"); | ||
| 60 | goto out; | ||
| 61 | } | ||
| 62 | |||
| 63 | arg.nr = SIGNR; | ||
| 64 | arg.off = 0; | ||
| 65 | |||
| 66 | /* Unsupported flags */ | ||
| 67 | arg.flags = ~0; | ||
| 68 | ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_rw); | ||
| 69 | if (ret != -1 || errno != EINVAL) { | ||
| 70 | err("sys_ptrace() returns %d (expected -1)," | ||
| 71 | " errno %d (expected %d): %m\n", | ||
| 72 | ret, errno, EINVAL); | ||
| 73 | goto out; | ||
| 74 | } | ||
| 75 | arg.flags = 0; | ||
| 76 | |||
| 77 | /* A part of the buffer is read-only */ | ||
| 78 | ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, | ||
| 79 | addr_ro - sizeof(siginfo_t) * 2); | ||
| 80 | if (ret != 2) { | ||
| 81 | err("sys_ptrace() returns %d (expected 2): %m\n", ret); | ||
| 82 | goto out; | ||
| 83 | } | ||
| 84 | |||
| 85 | /* Read-only buffer */ | ||
| 86 | ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_ro); | ||
| 87 | if (ret != -1 && errno != EFAULT) { | ||
| 88 | err("sys_ptrace() returns %d (expected -1)," | ||
| 89 | " errno %d (expected %d): %m\n", | ||
| 90 | ret, errno, EFAULT); | ||
| 91 | goto out; | ||
| 92 | } | ||
| 93 | |||
| 94 | exit_code = 0; | ||
| 95 | out: | ||
| 96 | munmap(addr_rw, 2 * PAGE_SIZE); | ||
| 97 | return exit_code; | ||
| 98 | } | ||
| 99 | |||
| 100 | int check_direct_path(pid_t child, int shared, int nr) | ||
| 101 | { | ||
| 102 | struct ptrace_peeksiginfo_args arg = {.flags = 0, .nr = nr, .off = 0}; | ||
| 103 | int i, j, ret, exit_code = -1; | ||
| 104 | siginfo_t siginfo[SIGNR]; | ||
| 105 | int si_code; | ||
| 106 | |||
| 107 | if (shared == 1) { | ||
| 108 | arg.flags = PTRACE_PEEKSIGINFO_SHARED; | ||
| 109 | si_code = TEST_SICODE_SHARE; | ||
| 110 | } else { | ||
| 111 | arg.flags = 0; | ||
| 112 | si_code = TEST_SICODE_PRIV; | ||
| 113 | } | ||
| 114 | |||
| 115 | for (i = 0; i < SIGNR; ) { | ||
| 116 | arg.off = i; | ||
| 117 | ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, siginfo); | ||
| 118 | if (ret == -1) { | ||
| 119 | err("ptrace() failed: %m\n"); | ||
| 120 | goto out; | ||
| 121 | } | ||
| 122 | |||
| 123 | if (ret == 0) | ||
| 124 | break; | ||
| 125 | |||
| 126 | for (j = 0; j < ret; j++, i++) { | ||
| 127 | if (siginfo[j].si_code == si_code && | ||
| 128 | siginfo[j].si_int == i) | ||
| 129 | continue; | ||
| 130 | |||
| 131 | err("%d: Wrong siginfo i=%d si_code=%d si_int=%d\n", | ||
| 132 | shared, i, siginfo[j].si_code, siginfo[j].si_int); | ||
| 133 | goto out; | ||
| 134 | } | ||
| 135 | } | ||
| 136 | |||
| 137 | if (i != SIGNR) { | ||
| 138 | err("Only %d signals were read\n", i); | ||
| 139 | goto out; | ||
| 140 | } | ||
| 141 | |||
| 142 | exit_code = 0; | ||
| 143 | out: | ||
| 144 | return exit_code; | ||
| 145 | } | ||
| 146 | |||
| 147 | int main(int argc, char *argv[]) | ||
| 148 | { | ||
| 149 | siginfo_t siginfo[SIGNR]; | ||
| 150 | int i, exit_code = 1; | ||
| 151 | sigset_t blockmask; | ||
| 152 | pid_t child; | ||
| 153 | |||
| 154 | sigemptyset(&blockmask); | ||
| 155 | sigaddset(&blockmask, SIGRTMIN); | ||
| 156 | sigprocmask(SIG_BLOCK, &blockmask, NULL); | ||
| 157 | |||
| 158 | child = fork(); | ||
| 159 | if (child == -1) { | ||
| 160 | err("fork() failed: %m"); | ||
| 161 | return 1; | ||
| 162 | } else if (child == 0) { | ||
| 163 | pid_t ppid = getppid(); | ||
| 164 | while (1) { | ||
| 165 | if (ppid != getppid()) | ||
| 166 | break; | ||
| 167 | sleep(1); | ||
| 168 | } | ||
| 169 | return 1; | ||
| 170 | } | ||
| 171 | |||
| 172 | /* Send signals in process-wide and per-thread queues */ | ||
| 173 | for (i = 0; i < SIGNR; i++) { | ||
| 174 | siginfo->si_code = TEST_SICODE_SHARE; | ||
| 175 | siginfo->si_int = i; | ||
| 176 | sys_rt_sigqueueinfo(child, SIGRTMIN, siginfo); | ||
| 177 | |||
| 178 | siginfo->si_code = TEST_SICODE_PRIV; | ||
| 179 | siginfo->si_int = i; | ||
| 180 | sys_rt_tgsigqueueinfo(child, child, SIGRTMIN, siginfo); | ||
| 181 | } | ||
| 182 | |||
| 183 | if (sys_ptrace(PTRACE_ATTACH, child, NULL, NULL) == -1) | ||
| 184 | return 1; | ||
| 185 | |||
| 186 | waitpid(child, NULL, 0); | ||
| 187 | |||
| 188 | /* Dump signals one by one*/ | ||
| 189 | if (check_direct_path(child, 0, 1)) | ||
| 190 | goto out; | ||
| 191 | /* Dump all signals for one call */ | ||
| 192 | if (check_direct_path(child, 0, SIGNR)) | ||
| 193 | goto out; | ||
| 194 | |||
| 195 | /* | ||
| 196 | * Dump signal from the process-wide queue. | ||
| 197 | * The number of signals is not multible to the buffer size | ||
| 198 | */ | ||
| 199 | if (check_direct_path(child, 1, 3)) | ||
| 200 | goto out; | ||
| 201 | |||
| 202 | if (check_error_paths(child)) | ||
| 203 | goto out; | ||
| 204 | |||
| 205 | printf("PASS\n"); | ||
| 206 | exit_code = 0; | ||
| 207 | out: | ||
| 208 | if (sys_ptrace(PTRACE_KILL, child, NULL, NULL) == -1) | ||
| 209 | return 1; | ||
| 210 | |||
| 211 | waitpid(child, NULL, 0); | ||
| 212 | |||
| 213 | return exit_code; | ||
| 214 | } | ||
diff --git a/tools/testing/selftests/soft-dirty/Makefile b/tools/testing/selftests/soft-dirty/Makefile new file mode 100644 index 000000000000..a9cdc823d6e0 --- /dev/null +++ b/tools/testing/selftests/soft-dirty/Makefile | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | CFLAGS += -iquote../../../../include/uapi -Wall | ||
| 2 | soft-dirty: soft-dirty.c | ||
| 3 | |||
| 4 | all: soft-dirty | ||
| 5 | |||
| 6 | clean: | ||
| 7 | rm -f soft-dirty | ||
| 8 | |||
| 9 | run_tests: all | ||
| 10 | @./soft-dirty || echo "soft-dirty selftests: [FAIL]" | ||
diff --git a/tools/testing/selftests/soft-dirty/soft-dirty.c b/tools/testing/selftests/soft-dirty/soft-dirty.c new file mode 100644 index 000000000000..aba4f87f87f0 --- /dev/null +++ b/tools/testing/selftests/soft-dirty/soft-dirty.c | |||
| @@ -0,0 +1,114 @@ | |||
| 1 | #include <stdlib.h> | ||
| 2 | #include <stdio.h> | ||
| 3 | #include <sys/mman.h> | ||
| 4 | #include <unistd.h> | ||
| 5 | #include <fcntl.h> | ||
| 6 | #include <sys/types.h> | ||
| 7 | |||
| 8 | typedef unsigned long long u64; | ||
| 9 | |||
| 10 | #define PME_PRESENT (1ULL << 63) | ||
| 11 | #define PME_SOFT_DIRTY (1Ull << 55) | ||
| 12 | |||
| 13 | #define PAGES_TO_TEST 3 | ||
| 14 | #ifndef PAGE_SIZE | ||
| 15 | #define PAGE_SIZE 4096 | ||
| 16 | #endif | ||
| 17 | |||
| 18 | static void get_pagemap2(char *mem, u64 *map) | ||
| 19 | { | ||
| 20 | int fd; | ||
| 21 | |||
| 22 | fd = open("/proc/self/pagemap2", O_RDONLY); | ||
| 23 | if (fd < 0) { | ||
| 24 | perror("Can't open pagemap2"); | ||
| 25 | exit(1); | ||
| 26 | } | ||
| 27 | |||
| 28 | lseek(fd, (unsigned long)mem / PAGE_SIZE * sizeof(u64), SEEK_SET); | ||
| 29 | read(fd, map, sizeof(u64) * PAGES_TO_TEST); | ||
| 30 | close(fd); | ||
| 31 | } | ||
| 32 | |||
| 33 | static inline char map_p(u64 map) | ||
| 34 | { | ||
| 35 | return map & PME_PRESENT ? 'p' : '-'; | ||
| 36 | } | ||
| 37 | |||
| 38 | static inline char map_sd(u64 map) | ||
| 39 | { | ||
| 40 | return map & PME_SOFT_DIRTY ? 'd' : '-'; | ||
| 41 | } | ||
| 42 | |||
| 43 | static int check_pte(int step, int page, u64 *map, u64 want) | ||
| 44 | { | ||
| 45 | if ((map[page] & want) != want) { | ||
| 46 | printf("Step %d Page %d has %c%c, want %c%c\n", | ||
| 47 | step, page, | ||
| 48 | map_p(map[page]), map_sd(map[page]), | ||
| 49 | map_p(want), map_sd(want)); | ||
| 50 | return 1; | ||
| 51 | } | ||
| 52 | |||
| 53 | return 0; | ||
| 54 | } | ||
| 55 | |||
| 56 | static void clear_refs(void) | ||
| 57 | { | ||
| 58 | int fd; | ||
| 59 | char *v = "4"; | ||
| 60 | |||
| 61 | fd = open("/proc/self/clear_refs", O_WRONLY); | ||
| 62 | if (write(fd, v, 3) < 3) { | ||
| 63 | perror("Can't clear soft-dirty bit"); | ||
| 64 | exit(1); | ||
| 65 | } | ||
| 66 | close(fd); | ||
| 67 | } | ||
| 68 | |||
| 69 | int main(void) | ||
| 70 | { | ||
| 71 | char *mem, x; | ||
| 72 | u64 map[PAGES_TO_TEST]; | ||
| 73 | |||
| 74 | mem = mmap(NULL, PAGES_TO_TEST * PAGE_SIZE, | ||
| 75 | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 0, 0); | ||
| 76 | |||
| 77 | x = mem[0]; | ||
| 78 | mem[2 * PAGE_SIZE] = 'c'; | ||
| 79 | get_pagemap2(mem, map); | ||
| 80 | |||
| 81 | if (check_pte(1, 0, map, PME_PRESENT)) | ||
| 82 | return 1; | ||
| 83 | if (check_pte(1, 1, map, 0)) | ||
| 84 | return 1; | ||
| 85 | if (check_pte(1, 2, map, PME_PRESENT | PME_SOFT_DIRTY)) | ||
| 86 | return 1; | ||
| 87 | |||
| 88 | clear_refs(); | ||
| 89 | get_pagemap2(mem, map); | ||
| 90 | |||
| 91 | if (check_pte(2, 0, map, PME_PRESENT)) | ||
| 92 | return 1; | ||
| 93 | if (check_pte(2, 1, map, 0)) | ||
| 94 | return 1; | ||
| 95 | if (check_pte(2, 2, map, PME_PRESENT)) | ||
| 96 | return 1; | ||
| 97 | |||
| 98 | mem[0] = 'a'; | ||
| 99 | mem[PAGE_SIZE] = 'b'; | ||
| 100 | x = mem[2 * PAGE_SIZE]; | ||
| 101 | get_pagemap2(mem, map); | ||
| 102 | |||
| 103 | if (check_pte(3, 0, map, PME_PRESENT | PME_SOFT_DIRTY)) | ||
| 104 | return 1; | ||
| 105 | if (check_pte(3, 1, map, PME_PRESENT | PME_SOFT_DIRTY)) | ||
| 106 | return 1; | ||
| 107 | if (check_pte(3, 2, map, PME_PRESENT)) | ||
| 108 | return 1; | ||
| 109 | |||
| 110 | (void)x; /* gcc warn */ | ||
| 111 | |||
| 112 | printf("PASS\n"); | ||
| 113 | return 0; | ||
| 114 | } | ||
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index d1d442ed106a..3187c62d9814 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile | |||
| @@ -1,12 +1,14 @@ | |||
| 1 | all: test mod | 1 | all: test mod |
| 2 | test: virtio_test | 2 | test: virtio_test vringh_test |
| 3 | virtio_test: virtio_ring.o virtio_test.o | 3 | virtio_test: virtio_ring.o virtio_test.o |
| 4 | CFLAGS += -g -O2 -Wall -I. -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -MMD | 4 | vringh_test: vringh_test.o vringh.o virtio_ring.o |
| 5 | vpath %.c ../../drivers/virtio | 5 | |
| 6 | CFLAGS += -g -O2 -Wall -I. -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE | ||
| 7 | vpath %.c ../../drivers/virtio ../../drivers/vhost | ||
| 6 | mod: | 8 | mod: |
| 7 | ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test | 9 | ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test |
| 8 | .PHONY: all test mod clean | 10 | .PHONY: all test mod clean |
| 9 | clean: | 11 | clean: |
| 10 | ${RM} *.o vhost_test/*.o vhost_test/.*.cmd \ | 12 | ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \ |
| 11 | vhost_test/Module.symvers vhost_test/modules.order *.d | 13 | vhost_test/Module.symvers vhost_test/modules.order *.d |
| 12 | -include *.d | 14 | -include *.d |
diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h new file mode 100644 index 000000000000..aff61e13306c --- /dev/null +++ b/tools/virtio/asm/barrier.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | #if defined(__i386__) || defined(__x86_64__) | ||
| 2 | #define barrier() asm volatile("" ::: "memory") | ||
| 3 | #define mb() __sync_synchronize() | ||
| 4 | |||
| 5 | #define smp_mb() mb() | ||
| 6 | # define smp_rmb() barrier() | ||
| 7 | # define smp_wmb() barrier() | ||
| 8 | /* Weak barriers should be used. If not - it's a bug */ | ||
| 9 | # define rmb() abort() | ||
| 10 | # define wmb() abort() | ||
| 11 | #else | ||
| 12 | #error Please fill in barrier macros | ||
| 13 | #endif | ||
| 14 | |||
diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h new file mode 100644 index 000000000000..fb94f0787c47 --- /dev/null +++ b/tools/virtio/linux/bug.h | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | #ifndef BUG_H | ||
| 2 | #define BUG_H | ||
| 3 | |||
| 4 | #define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond)) | ||
| 5 | |||
| 6 | #define BUILD_BUG_ON(x) | ||
| 7 | |||
| 8 | #define BUG() abort() | ||
| 9 | |||
| 10 | #endif /* BUG_H */ | ||
diff --git a/tools/virtio/linux/err.h b/tools/virtio/linux/err.h new file mode 100644 index 000000000000..e32eff8b2a14 --- /dev/null +++ b/tools/virtio/linux/err.h | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | #ifndef ERR_H | ||
| 2 | #define ERR_H | ||
| 3 | #define MAX_ERRNO 4095 | ||
| 4 | |||
| 5 | #define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO) | ||
| 6 | |||
| 7 | static inline void * __must_check ERR_PTR(long error) | ||
| 8 | { | ||
| 9 | return (void *) error; | ||
| 10 | } | ||
| 11 | |||
| 12 | static inline long __must_check PTR_ERR(const void *ptr) | ||
| 13 | { | ||
| 14 | return (long) ptr; | ||
| 15 | } | ||
| 16 | |||
| 17 | static inline long __must_check IS_ERR(const void *ptr) | ||
| 18 | { | ||
| 19 | return IS_ERR_VALUE((unsigned long)ptr); | ||
| 20 | } | ||
| 21 | |||
| 22 | static inline long __must_check IS_ERR_OR_NULL(const void *ptr) | ||
| 23 | { | ||
| 24 | return !ptr || IS_ERR_VALUE((unsigned long)ptr); | ||
| 25 | } | ||
| 26 | #endif /* ERR_H */ | ||
diff --git a/tools/virtio/linux/export.h b/tools/virtio/linux/export.h new file mode 100644 index 000000000000..7311d326894a --- /dev/null +++ b/tools/virtio/linux/export.h | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | #define EXPORT_SYMBOL(sym) | ||
| 2 | #define EXPORT_SYMBOL_GPL(sym) | ||
| 3 | #define EXPORT_SYMBOL_GPL_FUTURE(sym) | ||
| 4 | #define EXPORT_UNUSED_SYMBOL(sym) | ||
| 5 | #define EXPORT_UNUSED_SYMBOL_GPL(sym) | ||
diff --git a/tools/virtio/linux/irqreturn.h b/tools/virtio/linux/irqreturn.h new file mode 100644 index 000000000000..a3c4e7be7089 --- /dev/null +++ b/tools/virtio/linux/irqreturn.h | |||
| @@ -0,0 +1 @@ | |||
| #include "../../../include/linux/irqreturn.h" | |||
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h new file mode 100644 index 000000000000..fba705963968 --- /dev/null +++ b/tools/virtio/linux/kernel.h | |||
| @@ -0,0 +1,112 @@ | |||
| 1 | #ifndef KERNEL_H | ||
| 2 | #define KERNEL_H | ||
| 3 | #include <stdbool.h> | ||
| 4 | #include <stdlib.h> | ||
| 5 | #include <stddef.h> | ||
| 6 | #include <stdio.h> | ||
| 7 | #include <string.h> | ||
| 8 | #include <assert.h> | ||
| 9 | #include <stdarg.h> | ||
| 10 | |||
| 11 | #include <linux/types.h> | ||
| 12 | #include <linux/printk.h> | ||
| 13 | #include <linux/bug.h> | ||
| 14 | #include <errno.h> | ||
| 15 | #include <unistd.h> | ||
| 16 | #include <asm/barrier.h> | ||
| 17 | |||
| 18 | #define CONFIG_SMP | ||
| 19 | |||
| 20 | #define PAGE_SIZE getpagesize() | ||
| 21 | #define PAGE_MASK (~(PAGE_SIZE-1)) | ||
| 22 | |||
| 23 | typedef unsigned long long dma_addr_t; | ||
| 24 | typedef size_t __kernel_size_t; | ||
| 25 | |||
| 26 | struct page { | ||
| 27 | unsigned long long dummy; | ||
| 28 | }; | ||
| 29 | |||
| 30 | /* Physical == Virtual */ | ||
| 31 | #define virt_to_phys(p) ((unsigned long)p) | ||
| 32 | #define phys_to_virt(a) ((void *)(unsigned long)(a)) | ||
| 33 | /* Page address: Virtual / 4K */ | ||
| 34 | #define page_to_phys(p) ((dma_addr_t)(unsigned long)(p)) | ||
| 35 | #define virt_to_page(p) ((struct page *)((unsigned long)p & PAGE_MASK)) | ||
| 36 | |||
| 37 | #define offset_in_page(p) (((unsigned long)p) % PAGE_SIZE) | ||
| 38 | |||
| 39 | #define __printf(a,b) __attribute__((format(printf,a,b))) | ||
| 40 | |||
| 41 | typedef enum { | ||
| 42 | GFP_KERNEL, | ||
| 43 | GFP_ATOMIC, | ||
| 44 | __GFP_HIGHMEM, | ||
| 45 | __GFP_HIGH | ||
| 46 | } gfp_t; | ||
| 47 | |||
| 48 | #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) | ||
| 49 | |||
| 50 | extern void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; | ||
| 51 | static inline void *kmalloc(size_t s, gfp_t gfp) | ||
| 52 | { | ||
| 53 | if (__kmalloc_fake) | ||
| 54 | return __kmalloc_fake; | ||
| 55 | return malloc(s); | ||
| 56 | } | ||
| 57 | |||
| 58 | static inline void kfree(void *p) | ||
| 59 | { | ||
| 60 | if (p >= __kfree_ignore_start && p < __kfree_ignore_end) | ||
| 61 | return; | ||
| 62 | free(p); | ||
| 63 | } | ||
| 64 | |||
| 65 | static inline void *krealloc(void *p, size_t s, gfp_t gfp) | ||
| 66 | { | ||
| 67 | return realloc(p, s); | ||
| 68 | } | ||
| 69 | |||
| 70 | |||
| 71 | static inline unsigned long __get_free_page(gfp_t gfp) | ||
| 72 | { | ||
| 73 | void *p; | ||
| 74 | |||
| 75 | posix_memalign(&p, PAGE_SIZE, PAGE_SIZE); | ||
| 76 | return (unsigned long)p; | ||
| 77 | } | ||
| 78 | |||
| 79 | static inline void free_page(unsigned long addr) | ||
| 80 | { | ||
| 81 | free((void *)addr); | ||
| 82 | } | ||
| 83 | |||
| 84 | #define container_of(ptr, type, member) ({ \ | ||
| 85 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ | ||
| 86 | (type *)( (char *)__mptr - offsetof(type,member) );}) | ||
| 87 | |||
| 88 | #define uninitialized_var(x) x = x | ||
| 89 | |||
| 90 | # ifndef likely | ||
| 91 | # define likely(x) (__builtin_expect(!!(x), 1)) | ||
| 92 | # endif | ||
| 93 | # ifndef unlikely | ||
| 94 | # define unlikely(x) (__builtin_expect(!!(x), 0)) | ||
| 95 | # endif | ||
| 96 | |||
| 97 | #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) | ||
| 98 | #ifdef DEBUG | ||
| 99 | #define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__) | ||
| 100 | #else | ||
| 101 | #define pr_debug(format, ...) do {} while (0) | ||
| 102 | #endif | ||
| 103 | #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) | ||
| 104 | #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) | ||
| 105 | |||
| 106 | #define min(x, y) ({ \ | ||
| 107 | typeof(x) _min1 = (x); \ | ||
| 108 | typeof(y) _min2 = (y); \ | ||
| 109 | (void) (&_min1 == &_min2); \ | ||
| 110 | _min1 < _min2 ? _min1 : _min2; }) | ||
| 111 | |||
| 112 | #endif /* KERNEL_H */ | ||
diff --git a/tools/virtio/linux/module.h b/tools/virtio/linux/module.h index e69de29bb2d1..3039a7e972b6 100644 --- a/tools/virtio/linux/module.h +++ b/tools/virtio/linux/module.h | |||
| @@ -0,0 +1 @@ | |||
| #include <linux/export.h> | |||
diff --git a/tools/virtio/linux/printk.h b/tools/virtio/linux/printk.h new file mode 100644 index 000000000000..9f2423bd89c2 --- /dev/null +++ b/tools/virtio/linux/printk.h | |||
| @@ -0,0 +1,4 @@ | |||
| 1 | #include "../../../include/linux/kern_levels.h" | ||
| 2 | |||
| 3 | #define printk printf | ||
| 4 | #define vprintk vprintf | ||
diff --git a/tools/virtio/linux/ratelimit.h b/tools/virtio/linux/ratelimit.h new file mode 100644 index 000000000000..dcce1725f90d --- /dev/null +++ b/tools/virtio/linux/ratelimit.h | |||
| @@ -0,0 +1,4 @@ | |||
| 1 | #define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) int name = 0 | ||
| 2 | |||
| 3 | #define __ratelimit(x) (*(x)) | ||
| 4 | |||
diff --git a/tools/virtio/linux/scatterlist.h b/tools/virtio/linux/scatterlist.h new file mode 100644 index 000000000000..68c9e2adc996 --- /dev/null +++ b/tools/virtio/linux/scatterlist.h | |||
| @@ -0,0 +1,189 @@ | |||
| 1 | #ifndef SCATTERLIST_H | ||
| 2 | #define SCATTERLIST_H | ||
| 3 | #include <linux/kernel.h> | ||
| 4 | |||
| 5 | struct scatterlist { | ||
| 6 | unsigned long page_link; | ||
| 7 | unsigned int offset; | ||
| 8 | unsigned int length; | ||
| 9 | dma_addr_t dma_address; | ||
| 10 | }; | ||
| 11 | |||
| 12 | /* Scatterlist helpers, stolen from linux/scatterlist.h */ | ||
| 13 | #define sg_is_chain(sg) ((sg)->page_link & 0x01) | ||
| 14 | #define sg_is_last(sg) ((sg)->page_link & 0x02) | ||
| 15 | #define sg_chain_ptr(sg) \ | ||
| 16 | ((struct scatterlist *) ((sg)->page_link & ~0x03)) | ||
| 17 | |||
| 18 | /** | ||
| 19 | * sg_assign_page - Assign a given page to an SG entry | ||
| 20 | * @sg: SG entry | ||
| 21 | * @page: The page | ||
| 22 | * | ||
| 23 | * Description: | ||
| 24 | * Assign page to sg entry. Also see sg_set_page(), the most commonly used | ||
| 25 | * variant. | ||
| 26 | * | ||
| 27 | **/ | ||
| 28 | static inline void sg_assign_page(struct scatterlist *sg, struct page *page) | ||
| 29 | { | ||
| 30 | unsigned long page_link = sg->page_link & 0x3; | ||
| 31 | |||
| 32 | /* | ||
| 33 | * In order for the low bit stealing approach to work, pages | ||
| 34 | * must be aligned at a 32-bit boundary as a minimum. | ||
| 35 | */ | ||
| 36 | BUG_ON((unsigned long) page & 0x03); | ||
| 37 | #ifdef CONFIG_DEBUG_SG | ||
| 38 | BUG_ON(sg->sg_magic != SG_MAGIC); | ||
| 39 | BUG_ON(sg_is_chain(sg)); | ||
| 40 | #endif | ||
| 41 | sg->page_link = page_link | (unsigned long) page; | ||
| 42 | } | ||
| 43 | |||
| 44 | /** | ||
| 45 | * sg_set_page - Set sg entry to point at given page | ||
| 46 | * @sg: SG entry | ||
| 47 | * @page: The page | ||
| 48 | * @len: Length of data | ||
| 49 | * @offset: Offset into page | ||
| 50 | * | ||
| 51 | * Description: | ||
| 52 | * Use this function to set an sg entry pointing at a page, never assign | ||
| 53 | * the page directly. We encode sg table information in the lower bits | ||
| 54 | * of the page pointer. See sg_page() for looking up the page belonging | ||
| 55 | * to an sg entry. | ||
| 56 | * | ||
| 57 | **/ | ||
| 58 | static inline void sg_set_page(struct scatterlist *sg, struct page *page, | ||
| 59 | unsigned int len, unsigned int offset) | ||
| 60 | { | ||
| 61 | sg_assign_page(sg, page); | ||
| 62 | sg->offset = offset; | ||
| 63 | sg->length = len; | ||
| 64 | } | ||
| 65 | |||
| 66 | static inline struct page *sg_page(struct scatterlist *sg) | ||
| 67 | { | ||
| 68 | #ifdef CONFIG_DEBUG_SG | ||
| 69 | BUG_ON(sg->sg_magic != SG_MAGIC); | ||
| 70 | BUG_ON(sg_is_chain(sg)); | ||
| 71 | #endif | ||
| 72 | return (struct page *)((sg)->page_link & ~0x3); | ||
| 73 | } | ||
| 74 | |||
| 75 | /* | ||
| 76 | * Loop over each sg element, following the pointer to a new list if necessary | ||
| 77 | */ | ||
| 78 | #define for_each_sg(sglist, sg, nr, __i) \ | ||
| 79 | for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg)) | ||
| 80 | |||
| 81 | /** | ||
| 82 | * sg_chain - Chain two sglists together | ||
| 83 | * @prv: First scatterlist | ||
| 84 | * @prv_nents: Number of entries in prv | ||
| 85 | * @sgl: Second scatterlist | ||
| 86 | * | ||
| 87 | * Description: | ||
| 88 | * Links @prv@ and @sgl@ together, to form a longer scatterlist. | ||
| 89 | * | ||
| 90 | **/ | ||
| 91 | static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, | ||
| 92 | struct scatterlist *sgl) | ||
| 93 | { | ||
| 94 | /* | ||
| 95 | * offset and length are unused for chain entry. Clear them. | ||
| 96 | */ | ||
| 97 | prv[prv_nents - 1].offset = 0; | ||
| 98 | prv[prv_nents - 1].length = 0; | ||
| 99 | |||
| 100 | /* | ||
| 101 | * Set lowest bit to indicate a link pointer, and make sure to clear | ||
| 102 | * the termination bit if it happens to be set. | ||
| 103 | */ | ||
| 104 | prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02; | ||
| 105 | } | ||
| 106 | |||
| 107 | /** | ||
| 108 | * sg_mark_end - Mark the end of the scatterlist | ||
| 109 | * @sg: SG entryScatterlist | ||
| 110 | * | ||
| 111 | * Description: | ||
| 112 | * Marks the passed in sg entry as the termination point for the sg | ||
| 113 | * table. A call to sg_next() on this entry will return NULL. | ||
| 114 | * | ||
| 115 | **/ | ||
| 116 | static inline void sg_mark_end(struct scatterlist *sg) | ||
| 117 | { | ||
| 118 | #ifdef CONFIG_DEBUG_SG | ||
| 119 | BUG_ON(sg->sg_magic != SG_MAGIC); | ||
| 120 | #endif | ||
| 121 | /* | ||
| 122 | * Set termination bit, clear potential chain bit | ||
| 123 | */ | ||
| 124 | sg->page_link |= 0x02; | ||
| 125 | sg->page_link &= ~0x01; | ||
| 126 | } | ||
| 127 | |||
| 128 | /** | ||
| 129 | * sg_unmark_end - Undo setting the end of the scatterlist | ||
| 130 | * @sg: SG entryScatterlist | ||
| 131 | * | ||
| 132 | * Description: | ||
| 133 | * Removes the termination marker from the given entry of the scatterlist. | ||
| 134 | * | ||
| 135 | **/ | ||
| 136 | static inline void sg_unmark_end(struct scatterlist *sg) | ||
| 137 | { | ||
| 138 | #ifdef CONFIG_DEBUG_SG | ||
| 139 | BUG_ON(sg->sg_magic != SG_MAGIC); | ||
| 140 | #endif | ||
| 141 | sg->page_link &= ~0x02; | ||
| 142 | } | ||
| 143 | |||
| 144 | static inline struct scatterlist *sg_next(struct scatterlist *sg) | ||
| 145 | { | ||
| 146 | #ifdef CONFIG_DEBUG_SG | ||
| 147 | BUG_ON(sg->sg_magic != SG_MAGIC); | ||
| 148 | #endif | ||
| 149 | if (sg_is_last(sg)) | ||
| 150 | return NULL; | ||
| 151 | |||
| 152 | sg++; | ||
| 153 | if (unlikely(sg_is_chain(sg))) | ||
| 154 | sg = sg_chain_ptr(sg); | ||
| 155 | |||
| 156 | return sg; | ||
| 157 | } | ||
| 158 | |||
| 159 | static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents) | ||
| 160 | { | ||
| 161 | memset(sgl, 0, sizeof(*sgl) * nents); | ||
| 162 | #ifdef CONFIG_DEBUG_SG | ||
| 163 | { | ||
| 164 | unsigned int i; | ||
| 165 | for (i = 0; i < nents; i++) | ||
| 166 | sgl[i].sg_magic = SG_MAGIC; | ||
| 167 | } | ||
| 168 | #endif | ||
| 169 | sg_mark_end(&sgl[nents - 1]); | ||
| 170 | } | ||
| 171 | |||
| 172 | static inline dma_addr_t sg_phys(struct scatterlist *sg) | ||
| 173 | { | ||
| 174 | return page_to_phys(sg_page(sg)) + sg->offset; | ||
| 175 | } | ||
| 176 | |||
| 177 | static inline void sg_set_buf(struct scatterlist *sg, const void *buf, | ||
| 178 | unsigned int buflen) | ||
| 179 | { | ||
| 180 | sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); | ||
| 181 | } | ||
| 182 | |||
| 183 | static inline void sg_init_one(struct scatterlist *sg, | ||
| 184 | const void *buf, unsigned int buflen) | ||
| 185 | { | ||
| 186 | sg_init_table(sg, 1); | ||
| 187 | sg_set_buf(sg, buf, buflen); | ||
| 188 | } | ||
| 189 | #endif /* SCATTERLIST_H */ | ||
diff --git a/tools/virtio/linux/types.h b/tools/virtio/linux/types.h new file mode 100644 index 000000000000..f8ebb9a2b3d6 --- /dev/null +++ b/tools/virtio/linux/types.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | #ifndef TYPES_H | ||
| 2 | #define TYPES_H | ||
| 3 | #include <stdint.h> | ||
| 4 | |||
| 5 | #define __force | ||
| 6 | #define __user | ||
| 7 | #define __must_check | ||
| 8 | #define __cold | ||
| 9 | |||
| 10 | typedef uint64_t u64; | ||
| 11 | typedef int64_t s64; | ||
| 12 | typedef uint32_t u32; | ||
| 13 | typedef int32_t s32; | ||
| 14 | typedef uint16_t u16; | ||
| 15 | typedef int16_t s16; | ||
| 16 | typedef uint8_t u8; | ||
| 17 | typedef int8_t s8; | ||
| 18 | |||
| 19 | typedef uint64_t __u64; | ||
| 20 | typedef int64_t __s64; | ||
| 21 | typedef uint32_t __u32; | ||
| 22 | typedef int32_t __s32; | ||
| 23 | typedef uint16_t __u16; | ||
| 24 | typedef int16_t __s16; | ||
| 25 | typedef uint8_t __u8; | ||
| 26 | typedef int8_t __s8; | ||
| 27 | |||
| 28 | #endif /* TYPES_H */ | ||
diff --git a/tools/virtio/linux/uaccess.h b/tools/virtio/linux/uaccess.h new file mode 100644 index 000000000000..0a578fe18653 --- /dev/null +++ b/tools/virtio/linux/uaccess.h | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | #ifndef UACCESS_H | ||
| 2 | #define UACCESS_H | ||
| 3 | extern void *__user_addr_min, *__user_addr_max; | ||
| 4 | |||
| 5 | #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) | ||
| 6 | |||
| 7 | static inline void __chk_user_ptr(const volatile void *p, size_t size) | ||
| 8 | { | ||
| 9 | assert(p >= __user_addr_min && p + size <= __user_addr_max); | ||
| 10 | } | ||
| 11 | |||
| 12 | #define put_user(x, ptr) \ | ||
| 13 | ({ \ | ||
| 14 | typeof(ptr) __pu_ptr = (ptr); \ | ||
| 15 | __chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \ | ||
| 16 | ACCESS_ONCE(*(__pu_ptr)) = x; \ | ||
| 17 | 0; \ | ||
| 18 | }) | ||
| 19 | |||
| 20 | #define get_user(x, ptr) \ | ||
| 21 | ({ \ | ||
| 22 | typeof(ptr) __pu_ptr = (ptr); \ | ||
| 23 | __chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \ | ||
| 24 | x = ACCESS_ONCE(*(__pu_ptr)); \ | ||
| 25 | 0; \ | ||
| 26 | }) | ||
| 27 | |||
| 28 | static void volatile_memcpy(volatile char *to, const volatile char *from, | ||
| 29 | unsigned long n) | ||
| 30 | { | ||
| 31 | while (n--) | ||
| 32 | *(to++) = *(from++); | ||
| 33 | } | ||
| 34 | |||
| 35 | static inline int copy_from_user(void *to, const void __user volatile *from, | ||
| 36 | unsigned long n) | ||
| 37 | { | ||
| 38 | __chk_user_ptr(from, n); | ||
| 39 | volatile_memcpy(to, from, n); | ||
| 40 | return 0; | ||
| 41 | } | ||
| 42 | |||
| 43 | static inline int copy_to_user(void __user volatile *to, const void *from, | ||
| 44 | unsigned long n) | ||
| 45 | { | ||
| 46 | __chk_user_ptr(to, n); | ||
| 47 | volatile_memcpy(to, from, n); | ||
| 48 | return 0; | ||
| 49 | } | ||
| 50 | #endif /* UACCESS_H */ | ||
diff --git a/tools/virtio/linux/uio.h b/tools/virtio/linux/uio.h new file mode 100644 index 000000000000..cd20f0ba3081 --- /dev/null +++ b/tools/virtio/linux/uio.h | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | #include <linux/kernel.h> | ||
| 2 | |||
| 3 | #include "../../../include/linux/uio.h" | ||
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index 81847dd08bd0..cd801838156f 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h | |||
| @@ -1,127 +1,7 @@ | |||
| 1 | #ifndef LINUX_VIRTIO_H | 1 | #ifndef LINUX_VIRTIO_H |
| 2 | #define LINUX_VIRTIO_H | 2 | #define LINUX_VIRTIO_H |
| 3 | 3 | #include <linux/scatterlist.h> | |
| 4 | #include <stdbool.h> | 4 | #include <linux/kernel.h> |
| 5 | #include <stdlib.h> | ||
| 6 | #include <stddef.h> | ||
| 7 | #include <stdio.h> | ||
| 8 | #include <string.h> | ||
| 9 | #include <assert.h> | ||
| 10 | |||
| 11 | #include <linux/types.h> | ||
| 12 | #include <errno.h> | ||
| 13 | |||
| 14 | typedef unsigned long long dma_addr_t; | ||
| 15 | |||
| 16 | struct scatterlist { | ||
| 17 | unsigned long page_link; | ||
| 18 | unsigned int offset; | ||
| 19 | unsigned int length; | ||
| 20 | dma_addr_t dma_address; | ||
| 21 | }; | ||
| 22 | |||
| 23 | struct page { | ||
| 24 | unsigned long long dummy; | ||
| 25 | }; | ||
| 26 | |||
| 27 | #define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond)) | ||
| 28 | |||
| 29 | /* Physical == Virtual */ | ||
| 30 | #define virt_to_phys(p) ((unsigned long)p) | ||
| 31 | #define phys_to_virt(a) ((void *)(unsigned long)(a)) | ||
| 32 | /* Page address: Virtual / 4K */ | ||
| 33 | #define virt_to_page(p) ((struct page*)((virt_to_phys(p) / 4096) * \ | ||
| 34 | sizeof(struct page))) | ||
| 35 | #define offset_in_page(p) (((unsigned long)p) % 4096) | ||
| 36 | #define sg_phys(sg) ((sg->page_link & ~0x3) / sizeof(struct page) * 4096 + \ | ||
| 37 | sg->offset) | ||
| 38 | static inline void sg_mark_end(struct scatterlist *sg) | ||
| 39 | { | ||
| 40 | /* | ||
| 41 | * Set termination bit, clear potential chain bit | ||
| 42 | */ | ||
| 43 | sg->page_link |= 0x02; | ||
| 44 | sg->page_link &= ~0x01; | ||
| 45 | } | ||
| 46 | static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents) | ||
| 47 | { | ||
| 48 | memset(sgl, 0, sizeof(*sgl) * nents); | ||
| 49 | sg_mark_end(&sgl[nents - 1]); | ||
| 50 | } | ||
| 51 | static inline void sg_assign_page(struct scatterlist *sg, struct page *page) | ||
| 52 | { | ||
| 53 | unsigned long page_link = sg->page_link & 0x3; | ||
| 54 | |||
| 55 | /* | ||
| 56 | * In order for the low bit stealing approach to work, pages | ||
| 57 | * must be aligned at a 32-bit boundary as a minimum. | ||
| 58 | */ | ||
| 59 | BUG_ON((unsigned long) page & 0x03); | ||
| 60 | sg->page_link = page_link | (unsigned long) page; | ||
| 61 | } | ||
| 62 | |||
| 63 | static inline void sg_set_page(struct scatterlist *sg, struct page *page, | ||
| 64 | unsigned int len, unsigned int offset) | ||
| 65 | { | ||
| 66 | sg_assign_page(sg, page); | ||
| 67 | sg->offset = offset; | ||
| 68 | sg->length = len; | ||
| 69 | } | ||
| 70 | |||
| 71 | static inline void sg_set_buf(struct scatterlist *sg, const void *buf, | ||
| 72 | unsigned int buflen) | ||
| 73 | { | ||
| 74 | sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); | ||
| 75 | } | ||
| 76 | |||
| 77 | static inline void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen) | ||
| 78 | { | ||
| 79 | sg_init_table(sg, 1); | ||
| 80 | sg_set_buf(sg, buf, buflen); | ||
| 81 | } | ||
| 82 | |||
| 83 | typedef __u16 u16; | ||
| 84 | |||
| 85 | typedef enum { | ||
| 86 | GFP_KERNEL, | ||
| 87 | GFP_ATOMIC, | ||
| 88 | } gfp_t; | ||
| 89 | typedef enum { | ||
| 90 | IRQ_NONE, | ||
| 91 | IRQ_HANDLED | ||
| 92 | } irqreturn_t; | ||
| 93 | |||
| 94 | static inline void *kmalloc(size_t s, gfp_t gfp) | ||
| 95 | { | ||
| 96 | return malloc(s); | ||
| 97 | } | ||
| 98 | |||
| 99 | static inline void kfree(void *p) | ||
| 100 | { | ||
| 101 | free(p); | ||
| 102 | } | ||
| 103 | |||
| 104 | #define container_of(ptr, type, member) ({ \ | ||
| 105 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ | ||
| 106 | (type *)( (char *)__mptr - offsetof(type,member) );}) | ||
| 107 | |||
| 108 | #define uninitialized_var(x) x = x | ||
| 109 | |||
| 110 | # ifndef likely | ||
| 111 | # define likely(x) (__builtin_expect(!!(x), 1)) | ||
| 112 | # endif | ||
| 113 | # ifndef unlikely | ||
| 114 | # define unlikely(x) (__builtin_expect(!!(x), 0)) | ||
| 115 | # endif | ||
| 116 | |||
| 117 | #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) | ||
| 118 | #ifdef DEBUG | ||
| 119 | #define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__) | ||
| 120 | #else | ||
| 121 | #define pr_debug(format, ...) do {} while (0) | ||
| 122 | #endif | ||
| 123 | #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) | ||
| 124 | #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) | ||
| 125 | 5 | ||
| 126 | /* TODO: empty stubs for now. Broken but enough for virtio_ring.c */ | 6 | /* TODO: empty stubs for now. Broken but enough for virtio_ring.c */ |
| 127 | #define list_add_tail(a, b) do {} while (0) | 7 | #define list_add_tail(a, b) do {} while (0) |
| @@ -131,6 +11,7 @@ static inline void kfree(void *p) | |||
| 131 | #define BITS_PER_BYTE 8 | 11 | #define BITS_PER_BYTE 8 |
| 132 | #define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE) | 12 | #define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE) |
| 133 | #define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) | 13 | #define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) |
| 14 | |||
| 134 | /* TODO: Not atomic as it should be: | 15 | /* TODO: Not atomic as it should be: |
| 135 | * we don't use this for anything important. */ | 16 | * we don't use this for anything important. */ |
| 136 | static inline void clear_bit(int nr, volatile unsigned long *addr) | 17 | static inline void clear_bit(int nr, volatile unsigned long *addr) |
| @@ -145,10 +26,6 @@ static inline int test_bit(int nr, const volatile unsigned long *addr) | |||
| 145 | { | 26 | { |
| 146 | return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); | 27 | return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); |
| 147 | } | 28 | } |
| 148 | |||
| 149 | /* The only feature we care to support */ | ||
| 150 | #define virtio_has_feature(dev, feature) \ | ||
| 151 | test_bit((feature), (dev)->features) | ||
| 152 | /* end of stubs */ | 29 | /* end of stubs */ |
| 153 | 30 | ||
| 154 | struct virtio_device { | 31 | struct virtio_device { |
| @@ -163,39 +40,32 @@ struct virtqueue { | |||
| 163 | void (*callback)(struct virtqueue *vq); | 40 | void (*callback)(struct virtqueue *vq); |
| 164 | const char *name; | 41 | const char *name; |
| 165 | struct virtio_device *vdev; | 42 | struct virtio_device *vdev; |
| 43 | unsigned int index; | ||
| 44 | unsigned int num_free; | ||
| 166 | void *priv; | 45 | void *priv; |
| 167 | }; | 46 | }; |
| 168 | 47 | ||
| 169 | #define EXPORT_SYMBOL_GPL(__EXPORT_SYMBOL_GPL_name) \ | ||
| 170 | void __EXPORT_SYMBOL_GPL##__EXPORT_SYMBOL_GPL_name() { \ | ||
| 171 | } | ||
| 172 | #define MODULE_LICENSE(__MODULE_LICENSE_value) \ | 48 | #define MODULE_LICENSE(__MODULE_LICENSE_value) \ |
| 173 | const char *__MODULE_LICENSE_name = __MODULE_LICENSE_value | 49 | const char *__MODULE_LICENSE_name = __MODULE_LICENSE_value |
| 174 | 50 | ||
| 175 | #define CONFIG_SMP | ||
| 176 | |||
| 177 | #if defined(__i386__) || defined(__x86_64__) | ||
| 178 | #define barrier() asm volatile("" ::: "memory") | ||
| 179 | #define mb() __sync_synchronize() | ||
| 180 | |||
| 181 | #define smp_mb() mb() | ||
| 182 | # define smp_rmb() barrier() | ||
| 183 | # define smp_wmb() barrier() | ||
| 184 | /* Weak barriers should be used. If not - it's a bug */ | ||
| 185 | # define rmb() abort() | ||
| 186 | # define wmb() abort() | ||
| 187 | #else | ||
| 188 | #error Please fill in barrier macros | ||
| 189 | #endif | ||
| 190 | |||
| 191 | /* Interfaces exported by virtio_ring. */ | 51 | /* Interfaces exported by virtio_ring. */ |
| 192 | int virtqueue_add_buf(struct virtqueue *vq, | 52 | int virtqueue_add_sgs(struct virtqueue *vq, |
| 193 | struct scatterlist sg[], | 53 | struct scatterlist *sgs[], |
| 194 | unsigned int out_num, | 54 | unsigned int out_sgs, |
| 195 | unsigned int in_num, | 55 | unsigned int in_sgs, |
| 196 | void *data, | 56 | void *data, |
| 197 | gfp_t gfp); | 57 | gfp_t gfp); |
| 198 | 58 | ||
| 59 | int virtqueue_add_outbuf(struct virtqueue *vq, | ||
| 60 | struct scatterlist sg[], unsigned int num, | ||
| 61 | void *data, | ||
| 62 | gfp_t gfp); | ||
| 63 | |||
| 64 | int virtqueue_add_inbuf(struct virtqueue *vq, | ||
| 65 | struct scatterlist sg[], unsigned int num, | ||
| 66 | void *data, | ||
| 67 | gfp_t gfp); | ||
| 68 | |||
| 199 | void virtqueue_kick(struct virtqueue *vq); | 69 | void virtqueue_kick(struct virtqueue *vq); |
| 200 | 70 | ||
| 201 | void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); | 71 | void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); |
| @@ -206,7 +76,8 @@ bool virtqueue_enable_cb(struct virtqueue *vq); | |||
| 206 | bool virtqueue_enable_cb_delayed(struct virtqueue *vq); | 76 | bool virtqueue_enable_cb_delayed(struct virtqueue *vq); |
| 207 | 77 | ||
| 208 | void *virtqueue_detach_unused_buf(struct virtqueue *vq); | 78 | void *virtqueue_detach_unused_buf(struct virtqueue *vq); |
| 209 | struct virtqueue *vring_new_virtqueue(unsigned int num, | 79 | struct virtqueue *vring_new_virtqueue(unsigned int index, |
| 80 | unsigned int num, | ||
| 210 | unsigned int vring_align, | 81 | unsigned int vring_align, |
| 211 | struct virtio_device *vdev, | 82 | struct virtio_device *vdev, |
| 212 | bool weak_barriers, | 83 | bool weak_barriers, |
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h new file mode 100644 index 000000000000..5049967f99f7 --- /dev/null +++ b/tools/virtio/linux/virtio_config.h | |||
| @@ -0,0 +1,6 @@ | |||
| 1 | #define VIRTIO_TRANSPORT_F_START 28 | ||
| 2 | #define VIRTIO_TRANSPORT_F_END 32 | ||
| 3 | |||
| 4 | #define virtio_has_feature(dev, feature) \ | ||
| 5 | test_bit((feature), (dev)->features) | ||
| 6 | |||
diff --git a/tools/virtio/linux/virtio_ring.h b/tools/virtio/linux/virtio_ring.h new file mode 100644 index 000000000000..8949c4e2772c --- /dev/null +++ b/tools/virtio/linux/virtio_ring.h | |||
| @@ -0,0 +1 @@ | |||
| #include "../../../include/linux/virtio_ring.h" | |||
diff --git a/tools/virtio/linux/vringh.h b/tools/virtio/linux/vringh.h new file mode 100644 index 000000000000..9348957be56e --- /dev/null +++ b/tools/virtio/linux/vringh.h | |||
| @@ -0,0 +1 @@ | |||
| #include "../../../include/linux/vringh.h" | |||
diff --git a/tools/virtio/uapi/linux/uio.h b/tools/virtio/uapi/linux/uio.h new file mode 100644 index 000000000000..7230e9002207 --- /dev/null +++ b/tools/virtio/uapi/linux/uio.h | |||
| @@ -0,0 +1 @@ | |||
| #include <sys/uio.h> | |||
diff --git a/tools/virtio/uapi/linux/virtio_config.h b/tools/virtio/uapi/linux/virtio_config.h new file mode 100644 index 000000000000..4c86675f0159 --- /dev/null +++ b/tools/virtio/uapi/linux/virtio_config.h | |||
| @@ -0,0 +1 @@ | |||
| #include "../../../../include/uapi/linux/virtio_config.h" | |||
diff --git a/tools/virtio/uapi/linux/virtio_ring.h b/tools/virtio/uapi/linux/virtio_ring.h new file mode 100644 index 000000000000..4d99c78234d3 --- /dev/null +++ b/tools/virtio/uapi/linux/virtio_ring.h | |||
| @@ -0,0 +1,4 @@ | |||
| 1 | #ifndef VIRTIO_RING_H | ||
| 2 | #define VIRTIO_RING_H | ||
| 3 | #include "../../../../include/uapi/linux/virtio_ring.h" | ||
| 4 | #endif /* VIRTIO_RING_H */ | ||
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index fcc9aa25fd08..da7a19558281 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c | |||
| @@ -10,11 +10,15 @@ | |||
| 10 | #include <sys/stat.h> | 10 | #include <sys/stat.h> |
| 11 | #include <sys/types.h> | 11 | #include <sys/types.h> |
| 12 | #include <fcntl.h> | 12 | #include <fcntl.h> |
| 13 | #include <stdbool.h> | ||
| 13 | #include <linux/vhost.h> | 14 | #include <linux/vhost.h> |
| 14 | #include <linux/virtio.h> | 15 | #include <linux/virtio.h> |
| 15 | #include <linux/virtio_ring.h> | 16 | #include <linux/virtio_ring.h> |
| 16 | #include "../../drivers/vhost/test.h" | 17 | #include "../../drivers/vhost/test.h" |
| 17 | 18 | ||
| 19 | /* Unused */ | ||
| 20 | void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; | ||
| 21 | |||
| 18 | struct vq_info { | 22 | struct vq_info { |
| 19 | int kick; | 23 | int kick; |
| 20 | int call; | 24 | int call; |
| @@ -92,7 +96,8 @@ static void vq_info_add(struct vdev_info *dev, int num) | |||
| 92 | assert(r >= 0); | 96 | assert(r >= 0); |
| 93 | memset(info->ring, 0, vring_size(num, 4096)); | 97 | memset(info->ring, 0, vring_size(num, 4096)); |
| 94 | vring_init(&info->vring, num, info->ring, 4096); | 98 | vring_init(&info->vring, num, info->ring, 4096); |
| 95 | info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, | 99 | info->vq = vring_new_virtqueue(info->idx, |
| 100 | info->vring.num, 4096, &dev->vdev, | ||
| 96 | true, info->ring, | 101 | true, info->ring, |
| 97 | vq_notify, vq_callback, "test"); | 102 | vq_notify, vq_callback, "test"); |
| 98 | assert(info->vq); | 103 | assert(info->vq); |
| @@ -161,9 +166,9 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, | |||
| 161 | do { | 166 | do { |
| 162 | if (started < bufs) { | 167 | if (started < bufs) { |
| 163 | sg_init_one(&sl, dev->buf, dev->buf_size); | 168 | sg_init_one(&sl, dev->buf, dev->buf_size); |
| 164 | r = virtqueue_add_buf(vq->vq, &sl, 1, 0, | 169 | r = virtqueue_add_outbuf(vq->vq, &sl, 1, |
| 165 | dev->buf + started, | 170 | dev->buf + started, |
| 166 | GFP_ATOMIC); | 171 | GFP_ATOMIC); |
| 167 | if (likely(r == 0)) { | 172 | if (likely(r == 0)) { |
| 168 | ++started; | 173 | ++started; |
| 169 | virtqueue_kick(vq->vq); | 174 | virtqueue_kick(vq->vq); |
diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c new file mode 100644 index 000000000000..d053ea40c001 --- /dev/null +++ b/tools/virtio/vringh_test.c | |||
| @@ -0,0 +1,741 @@ | |||
| 1 | /* Simple test of virtio code, entirely in userpsace. */ | ||
| 2 | #define _GNU_SOURCE | ||
| 3 | #include <sched.h> | ||
| 4 | #include <err.h> | ||
| 5 | #include <linux/kernel.h> | ||
| 6 | #include <linux/err.h> | ||
| 7 | #include <linux/virtio.h> | ||
| 8 | #include <linux/vringh.h> | ||
| 9 | #include <linux/virtio_ring.h> | ||
| 10 | #include <linux/uaccess.h> | ||
| 11 | #include <sys/types.h> | ||
| 12 | #include <sys/stat.h> | ||
| 13 | #include <sys/mman.h> | ||
| 14 | #include <sys/wait.h> | ||
| 15 | #include <fcntl.h> | ||
| 16 | |||
| 17 | #define USER_MEM (1024*1024) | ||
| 18 | void *__user_addr_min, *__user_addr_max; | ||
| 19 | void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; | ||
| 20 | static u64 user_addr_offset; | ||
| 21 | |||
| 22 | #define RINGSIZE 256 | ||
| 23 | #define ALIGN 4096 | ||
| 24 | |||
| 25 | static void never_notify_host(struct virtqueue *vq) | ||
| 26 | { | ||
| 27 | abort(); | ||
| 28 | } | ||
| 29 | |||
| 30 | static void never_callback_guest(struct virtqueue *vq) | ||
| 31 | { | ||
| 32 | abort(); | ||
| 33 | } | ||
| 34 | |||
| 35 | static bool getrange_iov(struct vringh *vrh, u64 addr, struct vringh_range *r) | ||
| 36 | { | ||
| 37 | if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset) | ||
| 38 | return false; | ||
| 39 | if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset) | ||
| 40 | return false; | ||
| 41 | |||
| 42 | r->start = (u64)(unsigned long)__user_addr_min - user_addr_offset; | ||
| 43 | r->end_incl = (u64)(unsigned long)__user_addr_max - 1 - user_addr_offset; | ||
| 44 | r->offset = user_addr_offset; | ||
| 45 | return true; | ||
| 46 | } | ||
| 47 | |||
| 48 | /* We return single byte ranges. */ | ||
| 49 | static bool getrange_slow(struct vringh *vrh, u64 addr, struct vringh_range *r) | ||
| 50 | { | ||
| 51 | if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset) | ||
| 52 | return false; | ||
| 53 | if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset) | ||
| 54 | return false; | ||
| 55 | |||
| 56 | r->start = addr; | ||
| 57 | r->end_incl = r->start; | ||
| 58 | r->offset = user_addr_offset; | ||
| 59 | return true; | ||
| 60 | } | ||
| 61 | |||
| 62 | struct guest_virtio_device { | ||
| 63 | struct virtio_device vdev; | ||
| 64 | int to_host_fd; | ||
| 65 | unsigned long notifies; | ||
| 66 | }; | ||
| 67 | |||
| 68 | static void parallel_notify_host(struct virtqueue *vq) | ||
| 69 | { | ||
| 70 | struct guest_virtio_device *gvdev; | ||
| 71 | |||
| 72 | gvdev = container_of(vq->vdev, struct guest_virtio_device, vdev); | ||
| 73 | write(gvdev->to_host_fd, "", 1); | ||
| 74 | gvdev->notifies++; | ||
| 75 | } | ||
| 76 | |||
| 77 | static void no_notify_host(struct virtqueue *vq) | ||
| 78 | { | ||
| 79 | } | ||
| 80 | |||
| 81 | #define NUM_XFERS (10000000) | ||
| 82 | |||
| 83 | /* We aim for two "distant" cpus. */ | ||
| 84 | static void find_cpus(unsigned int *first, unsigned int *last) | ||
| 85 | { | ||
| 86 | unsigned int i; | ||
| 87 | |||
| 88 | *first = -1U; | ||
| 89 | *last = 0; | ||
| 90 | for (i = 0; i < 4096; i++) { | ||
| 91 | cpu_set_t set; | ||
| 92 | CPU_ZERO(&set); | ||
| 93 | CPU_SET(i, &set); | ||
| 94 | if (sched_setaffinity(getpid(), sizeof(set), &set) == 0) { | ||
| 95 | if (i < *first) | ||
| 96 | *first = i; | ||
| 97 | if (i > *last) | ||
| 98 | *last = i; | ||
| 99 | } | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | /* Opencoded version for fast mode */ | ||
| 104 | static inline int vringh_get_head(struct vringh *vrh, u16 *head) | ||
| 105 | { | ||
| 106 | u16 avail_idx, i; | ||
| 107 | int err; | ||
| 108 | |||
| 109 | err = get_user(avail_idx, &vrh->vring.avail->idx); | ||
| 110 | if (err) | ||
| 111 | return err; | ||
| 112 | |||
| 113 | if (vrh->last_avail_idx == avail_idx) | ||
| 114 | return 0; | ||
| 115 | |||
| 116 | /* Only get avail ring entries after they have been exposed by guest. */ | ||
| 117 | virtio_rmb(vrh->weak_barriers); | ||
| 118 | |||
| 119 | i = vrh->last_avail_idx & (vrh->vring.num - 1); | ||
| 120 | |||
| 121 | err = get_user(*head, &vrh->vring.avail->ring[i]); | ||
| 122 | if (err) | ||
| 123 | return err; | ||
| 124 | |||
| 125 | vrh->last_avail_idx++; | ||
| 126 | return 1; | ||
| 127 | } | ||
| 128 | |||
| 129 | static int parallel_test(unsigned long features, | ||
| 130 | bool (*getrange)(struct vringh *vrh, | ||
| 131 | u64 addr, struct vringh_range *r), | ||
| 132 | bool fast_vringh) | ||
| 133 | { | ||
| 134 | void *host_map, *guest_map; | ||
| 135 | int fd, mapsize, to_guest[2], to_host[2]; | ||
| 136 | unsigned long xfers = 0, notifies = 0, receives = 0; | ||
| 137 | unsigned int first_cpu, last_cpu; | ||
| 138 | cpu_set_t cpu_set; | ||
| 139 | char buf[128]; | ||
| 140 | |||
| 141 | /* Create real file to mmap. */ | ||
| 142 | fd = open("/tmp/vringh_test-file", O_RDWR|O_CREAT|O_TRUNC, 0600); | ||
| 143 | if (fd < 0) | ||
| 144 | err(1, "Opening /tmp/vringh_test-file"); | ||
| 145 | |||
| 146 | /* Extra room at the end for some data, and indirects */ | ||
| 147 | mapsize = vring_size(RINGSIZE, ALIGN) | ||
| 148 | + RINGSIZE * 2 * sizeof(int) | ||
| 149 | + RINGSIZE * 6 * sizeof(struct vring_desc); | ||
| 150 | mapsize = (mapsize + getpagesize() - 1) & ~(getpagesize() - 1); | ||
| 151 | ftruncate(fd, mapsize); | ||
| 152 | |||
| 153 | /* Parent and child use separate addresses, to check our mapping logic! */ | ||
| 154 | host_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); | ||
| 155 | guest_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); | ||
| 156 | |||
| 157 | pipe(to_guest); | ||
| 158 | pipe(to_host); | ||
| 159 | |||
| 160 | CPU_ZERO(&cpu_set); | ||
| 161 | find_cpus(&first_cpu, &last_cpu); | ||
| 162 | printf("Using CPUS %u and %u\n", first_cpu, last_cpu); | ||
| 163 | fflush(stdout); | ||
| 164 | |||
| 165 | if (fork() != 0) { | ||
| 166 | struct vringh vrh; | ||
| 167 | int status, err, rlen = 0; | ||
| 168 | char rbuf[5]; | ||
| 169 | |||
| 170 | /* We are the host: never access guest addresses! */ | ||
| 171 | munmap(guest_map, mapsize); | ||
| 172 | |||
| 173 | __user_addr_min = host_map; | ||
| 174 | __user_addr_max = __user_addr_min + mapsize; | ||
| 175 | user_addr_offset = host_map - guest_map; | ||
| 176 | assert(user_addr_offset); | ||
| 177 | |||
| 178 | close(to_guest[0]); | ||
| 179 | close(to_host[1]); | ||
| 180 | |||
| 181 | vring_init(&vrh.vring, RINGSIZE, host_map, ALIGN); | ||
| 182 | vringh_init_user(&vrh, features, RINGSIZE, true, | ||
| 183 | vrh.vring.desc, vrh.vring.avail, vrh.vring.used); | ||
| 184 | CPU_SET(first_cpu, &cpu_set); | ||
| 185 | if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set)) | ||
| 186 | errx(1, "Could not set affinity to cpu %u", first_cpu); | ||
| 187 | |||
| 188 | while (xfers < NUM_XFERS) { | ||
| 189 | struct iovec host_riov[2], host_wiov[2]; | ||
| 190 | struct vringh_iov riov, wiov; | ||
| 191 | u16 head, written; | ||
| 192 | |||
| 193 | if (fast_vringh) { | ||
| 194 | for (;;) { | ||
| 195 | err = vringh_get_head(&vrh, &head); | ||
| 196 | if (err != 0) | ||
| 197 | break; | ||
| 198 | err = vringh_need_notify_user(&vrh); | ||
| 199 | if (err < 0) | ||
| 200 | errx(1, "vringh_need_notify_user: %i", | ||
| 201 | err); | ||
| 202 | if (err) { | ||
| 203 | write(to_guest[1], "", 1); | ||
| 204 | notifies++; | ||
| 205 | } | ||
| 206 | } | ||
| 207 | if (err != 1) | ||
| 208 | errx(1, "vringh_get_head"); | ||
| 209 | written = 0; | ||
| 210 | goto complete; | ||
| 211 | } else { | ||
| 212 | vringh_iov_init(&riov, | ||
| 213 | host_riov, | ||
| 214 | ARRAY_SIZE(host_riov)); | ||
| 215 | vringh_iov_init(&wiov, | ||
| 216 | host_wiov, | ||
| 217 | ARRAY_SIZE(host_wiov)); | ||
| 218 | |||
| 219 | err = vringh_getdesc_user(&vrh, &riov, &wiov, | ||
| 220 | getrange, &head); | ||
| 221 | } | ||
| 222 | if (err == 0) { | ||
| 223 | err = vringh_need_notify_user(&vrh); | ||
| 224 | if (err < 0) | ||
| 225 | errx(1, "vringh_need_notify_user: %i", | ||
| 226 | err); | ||
| 227 | if (err) { | ||
| 228 | write(to_guest[1], "", 1); | ||
| 229 | notifies++; | ||
| 230 | } | ||
| 231 | |||
| 232 | if (!vringh_notify_enable_user(&vrh)) | ||
| 233 | continue; | ||
| 234 | |||
| 235 | /* Swallow all notifies at once. */ | ||
| 236 | if (read(to_host[0], buf, sizeof(buf)) < 1) | ||
| 237 | break; | ||
| 238 | |||
| 239 | vringh_notify_disable_user(&vrh); | ||
| 240 | receives++; | ||
| 241 | continue; | ||
| 242 | } | ||
| 243 | if (err != 1) | ||
| 244 | errx(1, "vringh_getdesc_user: %i", err); | ||
| 245 | |||
| 246 | /* We simply copy bytes. */ | ||
| 247 | if (riov.used) { | ||
| 248 | rlen = vringh_iov_pull_user(&riov, rbuf, | ||
| 249 | sizeof(rbuf)); | ||
| 250 | if (rlen != 4) | ||
| 251 | errx(1, "vringh_iov_pull_user: %i", | ||
| 252 | rlen); | ||
| 253 | assert(riov.i == riov.used); | ||
| 254 | written = 0; | ||
| 255 | } else { | ||
| 256 | err = vringh_iov_push_user(&wiov, rbuf, rlen); | ||
| 257 | if (err != rlen) | ||
| 258 | errx(1, "vringh_iov_push_user: %i", | ||
| 259 | err); | ||
| 260 | assert(wiov.i == wiov.used); | ||
| 261 | written = err; | ||
| 262 | } | ||
| 263 | complete: | ||
| 264 | xfers++; | ||
| 265 | |||
| 266 | err = vringh_complete_user(&vrh, head, written); | ||
| 267 | if (err != 0) | ||
| 268 | errx(1, "vringh_complete_user: %i", err); | ||
| 269 | } | ||
| 270 | |||
| 271 | err = vringh_need_notify_user(&vrh); | ||
| 272 | if (err < 0) | ||
| 273 | errx(1, "vringh_need_notify_user: %i", err); | ||
| 274 | if (err) { | ||
| 275 | write(to_guest[1], "", 1); | ||
| 276 | notifies++; | ||
| 277 | } | ||
| 278 | wait(&status); | ||
| 279 | if (!WIFEXITED(status)) | ||
| 280 | errx(1, "Child died with signal %i?", WTERMSIG(status)); | ||
| 281 | if (WEXITSTATUS(status) != 0) | ||
| 282 | errx(1, "Child exited %i?", WEXITSTATUS(status)); | ||
| 283 | printf("Host: notified %lu, pinged %lu\n", notifies, receives); | ||
| 284 | return 0; | ||
| 285 | } else { | ||
| 286 | struct guest_virtio_device gvdev; | ||
| 287 | struct virtqueue *vq; | ||
| 288 | unsigned int *data; | ||
| 289 | struct vring_desc *indirects; | ||
| 290 | unsigned int finished = 0; | ||
| 291 | |||
| 292 | /* We pass sg[]s pointing into here, but we need RINGSIZE+1 */ | ||
| 293 | data = guest_map + vring_size(RINGSIZE, ALIGN); | ||
| 294 | indirects = (void *)data + (RINGSIZE + 1) * 2 * sizeof(int); | ||
| 295 | |||
| 296 | /* We are the guest. */ | ||
| 297 | munmap(host_map, mapsize); | ||
| 298 | |||
| 299 | close(to_guest[1]); | ||
| 300 | close(to_host[0]); | ||
| 301 | |||
| 302 | gvdev.vdev.features[0] = features; | ||
| 303 | gvdev.to_host_fd = to_host[1]; | ||
| 304 | gvdev.notifies = 0; | ||
| 305 | |||
| 306 | CPU_SET(first_cpu, &cpu_set); | ||
| 307 | if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set)) | ||
| 308 | err(1, "Could not set affinity to cpu %u", first_cpu); | ||
| 309 | |||
| 310 | vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true, | ||
| 311 | guest_map, fast_vringh ? no_notify_host | ||
| 312 | : parallel_notify_host, | ||
| 313 | never_callback_guest, "guest vq"); | ||
| 314 | |||
| 315 | /* Don't kfree indirects. */ | ||
| 316 | __kfree_ignore_start = indirects; | ||
| 317 | __kfree_ignore_end = indirects + RINGSIZE * 6; | ||
| 318 | |||
| 319 | while (xfers < NUM_XFERS) { | ||
| 320 | struct scatterlist sg[4]; | ||
| 321 | unsigned int num_sg, len; | ||
| 322 | int *dbuf, err; | ||
| 323 | bool output = !(xfers % 2); | ||
| 324 | |||
| 325 | /* Consume bufs. */ | ||
| 326 | while ((dbuf = virtqueue_get_buf(vq, &len)) != NULL) { | ||
| 327 | if (len == 4) | ||
| 328 | assert(*dbuf == finished - 1); | ||
| 329 | else if (!fast_vringh) | ||
| 330 | assert(*dbuf == finished); | ||
| 331 | finished++; | ||
| 332 | } | ||
| 333 | |||
| 334 | /* Produce a buffer. */ | ||
| 335 | dbuf = data + (xfers % (RINGSIZE + 1)); | ||
| 336 | |||
| 337 | if (output) | ||
| 338 | *dbuf = xfers; | ||
| 339 | else | ||
| 340 | *dbuf = -1; | ||
| 341 | |||
| 342 | switch ((xfers / sizeof(*dbuf)) % 4) { | ||
| 343 | case 0: | ||
| 344 | /* Nasty three-element sg list. */ | ||
| 345 | sg_init_table(sg, num_sg = 3); | ||
| 346 | sg_set_buf(&sg[0], (void *)dbuf, 1); | ||
| 347 | sg_set_buf(&sg[1], (void *)dbuf + 1, 2); | ||
| 348 | sg_set_buf(&sg[2], (void *)dbuf + 3, 1); | ||
| 349 | break; | ||
| 350 | case 1: | ||
| 351 | sg_init_table(sg, num_sg = 2); | ||
| 352 | sg_set_buf(&sg[0], (void *)dbuf, 1); | ||
| 353 | sg_set_buf(&sg[1], (void *)dbuf + 1, 3); | ||
| 354 | break; | ||
| 355 | case 2: | ||
| 356 | sg_init_table(sg, num_sg = 1); | ||
| 357 | sg_set_buf(&sg[0], (void *)dbuf, 4); | ||
| 358 | break; | ||
| 359 | case 3: | ||
| 360 | sg_init_table(sg, num_sg = 4); | ||
| 361 | sg_set_buf(&sg[0], (void *)dbuf, 1); | ||
| 362 | sg_set_buf(&sg[1], (void *)dbuf + 1, 1); | ||
| 363 | sg_set_buf(&sg[2], (void *)dbuf + 2, 1); | ||
| 364 | sg_set_buf(&sg[3], (void *)dbuf + 3, 1); | ||
| 365 | break; | ||
| 366 | } | ||
| 367 | |||
| 368 | /* May allocate an indirect, so force it to allocate | ||
| 369 | * user addr */ | ||
| 370 | __kmalloc_fake = indirects + (xfers % RINGSIZE) * 4; | ||
| 371 | if (output) | ||
| 372 | err = virtqueue_add_outbuf(vq, sg, num_sg, dbuf, | ||
| 373 | GFP_KERNEL); | ||
| 374 | else | ||
| 375 | err = virtqueue_add_inbuf(vq, sg, num_sg, | ||
| 376 | dbuf, GFP_KERNEL); | ||
| 377 | |||
| 378 | if (err == -ENOSPC) { | ||
| 379 | if (!virtqueue_enable_cb_delayed(vq)) | ||
| 380 | continue; | ||
| 381 | /* Swallow all notifies at once. */ | ||
| 382 | if (read(to_guest[0], buf, sizeof(buf)) < 1) | ||
| 383 | break; | ||
| 384 | |||
| 385 | receives++; | ||
| 386 | virtqueue_disable_cb(vq); | ||
| 387 | continue; | ||
| 388 | } | ||
| 389 | |||
| 390 | if (err) | ||
| 391 | errx(1, "virtqueue_add_in/outbuf: %i", err); | ||
| 392 | |||
| 393 | xfers++; | ||
| 394 | virtqueue_kick(vq); | ||
| 395 | } | ||
| 396 | |||
| 397 | /* Any extra? */ | ||
| 398 | while (finished != xfers) { | ||
| 399 | int *dbuf; | ||
| 400 | unsigned int len; | ||
| 401 | |||
| 402 | /* Consume bufs. */ | ||
| 403 | dbuf = virtqueue_get_buf(vq, &len); | ||
| 404 | if (dbuf) { | ||
| 405 | if (len == 4) | ||
| 406 | assert(*dbuf == finished - 1); | ||
| 407 | else | ||
| 408 | assert(len == 0); | ||
| 409 | finished++; | ||
| 410 | continue; | ||
| 411 | } | ||
| 412 | |||
| 413 | if (!virtqueue_enable_cb_delayed(vq)) | ||
| 414 | continue; | ||
| 415 | if (read(to_guest[0], buf, sizeof(buf)) < 1) | ||
| 416 | break; | ||
| 417 | |||
| 418 | receives++; | ||
| 419 | virtqueue_disable_cb(vq); | ||
| 420 | } | ||
| 421 | |||
| 422 | printf("Guest: notified %lu, pinged %lu\n", | ||
| 423 | gvdev.notifies, receives); | ||
| 424 | vring_del_virtqueue(vq); | ||
| 425 | return 0; | ||
| 426 | } | ||
| 427 | } | ||
| 428 | |||
| 429 | int main(int argc, char *argv[]) | ||
| 430 | { | ||
| 431 | struct virtio_device vdev; | ||
| 432 | struct virtqueue *vq; | ||
| 433 | struct vringh vrh; | ||
| 434 | struct scatterlist guest_sg[RINGSIZE], *sgs[2]; | ||
| 435 | struct iovec host_riov[2], host_wiov[2]; | ||
| 436 | struct vringh_iov riov, wiov; | ||
| 437 | struct vring_used_elem used[RINGSIZE]; | ||
| 438 | char buf[28]; | ||
| 439 | u16 head; | ||
| 440 | int err; | ||
| 441 | unsigned i; | ||
| 442 | void *ret; | ||
| 443 | bool (*getrange)(struct vringh *vrh, u64 addr, struct vringh_range *r); | ||
| 444 | bool fast_vringh = false, parallel = false; | ||
| 445 | |||
| 446 | getrange = getrange_iov; | ||
| 447 | vdev.features[0] = 0; | ||
| 448 | |||
| 449 | while (argv[1]) { | ||
| 450 | if (strcmp(argv[1], "--indirect") == 0) | ||
| 451 | vdev.features[0] |= (1 << VIRTIO_RING_F_INDIRECT_DESC); | ||
| 452 | else if (strcmp(argv[1], "--eventidx") == 0) | ||
| 453 | vdev.features[0] |= (1 << VIRTIO_RING_F_EVENT_IDX); | ||
| 454 | else if (strcmp(argv[1], "--slow-range") == 0) | ||
| 455 | getrange = getrange_slow; | ||
| 456 | else if (strcmp(argv[1], "--fast-vringh") == 0) | ||
| 457 | fast_vringh = true; | ||
| 458 | else if (strcmp(argv[1], "--parallel") == 0) | ||
| 459 | parallel = true; | ||
| 460 | else | ||
| 461 | errx(1, "Unknown arg %s", argv[1]); | ||
| 462 | argv++; | ||
| 463 | } | ||
| 464 | |||
| 465 | if (parallel) | ||
| 466 | return parallel_test(vdev.features[0], getrange, fast_vringh); | ||
| 467 | |||
| 468 | if (posix_memalign(&__user_addr_min, PAGE_SIZE, USER_MEM) != 0) | ||
| 469 | abort(); | ||
| 470 | __user_addr_max = __user_addr_min + USER_MEM; | ||
| 471 | memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN)); | ||
| 472 | |||
| 473 | /* Set up guest side. */ | ||
| 474 | vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, | ||
| 475 | __user_addr_min, | ||
| 476 | never_notify_host, never_callback_guest, | ||
| 477 | "guest vq"); | ||
| 478 | |||
| 479 | /* Set up host side. */ | ||
| 480 | vring_init(&vrh.vring, RINGSIZE, __user_addr_min, ALIGN); | ||
| 481 | vringh_init_user(&vrh, vdev.features[0], RINGSIZE, true, | ||
| 482 | vrh.vring.desc, vrh.vring.avail, vrh.vring.used); | ||
| 483 | |||
| 484 | /* No descriptor to get yet... */ | ||
| 485 | err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head); | ||
| 486 | if (err != 0) | ||
| 487 | errx(1, "vringh_getdesc_user: %i", err); | ||
| 488 | |||
| 489 | /* Guest puts in a descriptor. */ | ||
| 490 | memcpy(__user_addr_max - 1, "a", 1); | ||
| 491 | sg_init_table(guest_sg, 1); | ||
| 492 | sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1); | ||
| 493 | sg_init_table(guest_sg+1, 1); | ||
| 494 | sg_set_buf(&guest_sg[1], __user_addr_max - 3, 2); | ||
| 495 | sgs[0] = &guest_sg[0]; | ||
| 496 | sgs[1] = &guest_sg[1]; | ||
| 497 | |||
| 498 | /* May allocate an indirect, so force it to allocate user addr */ | ||
| 499 | __kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN); | ||
| 500 | err = virtqueue_add_sgs(vq, sgs, 1, 1, &err, GFP_KERNEL); | ||
| 501 | if (err) | ||
| 502 | errx(1, "virtqueue_add_sgs: %i", err); | ||
| 503 | __kmalloc_fake = NULL; | ||
| 504 | |||
| 505 | /* Host retreives it. */ | ||
| 506 | vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov)); | ||
| 507 | vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov)); | ||
| 508 | |||
| 509 | err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head); | ||
| 510 | if (err != 1) | ||
| 511 | errx(1, "vringh_getdesc_user: %i", err); | ||
| 512 | |||
| 513 | assert(riov.used == 1); | ||
| 514 | assert(riov.iov[0].iov_base == __user_addr_max - 1); | ||
| 515 | assert(riov.iov[0].iov_len == 1); | ||
| 516 | if (getrange != getrange_slow) { | ||
| 517 | assert(wiov.used == 1); | ||
| 518 | assert(wiov.iov[0].iov_base == __user_addr_max - 3); | ||
| 519 | assert(wiov.iov[0].iov_len == 2); | ||
| 520 | } else { | ||
| 521 | assert(wiov.used == 2); | ||
| 522 | assert(wiov.iov[0].iov_base == __user_addr_max - 3); | ||
| 523 | assert(wiov.iov[0].iov_len == 1); | ||
| 524 | assert(wiov.iov[1].iov_base == __user_addr_max - 2); | ||
| 525 | assert(wiov.iov[1].iov_len == 1); | ||
| 526 | } | ||
| 527 | |||
| 528 | err = vringh_iov_pull_user(&riov, buf, 5); | ||
| 529 | if (err != 1) | ||
| 530 | errx(1, "vringh_iov_pull_user: %i", err); | ||
| 531 | assert(buf[0] == 'a'); | ||
| 532 | assert(riov.i == 1); | ||
| 533 | assert(vringh_iov_pull_user(&riov, buf, 5) == 0); | ||
| 534 | |||
| 535 | memcpy(buf, "bcdef", 5); | ||
| 536 | err = vringh_iov_push_user(&wiov, buf, 5); | ||
| 537 | if (err != 2) | ||
| 538 | errx(1, "vringh_iov_push_user: %i", err); | ||
| 539 | assert(memcmp(__user_addr_max - 3, "bc", 2) == 0); | ||
| 540 | assert(wiov.i == wiov.used); | ||
| 541 | assert(vringh_iov_push_user(&wiov, buf, 5) == 0); | ||
| 542 | |||
| 543 | /* Host is done. */ | ||
| 544 | err = vringh_complete_user(&vrh, head, err); | ||
| 545 | if (err != 0) | ||
| 546 | errx(1, "vringh_complete_user: %i", err); | ||
| 547 | |||
| 548 | /* Guest should see used token now. */ | ||
| 549 | __kfree_ignore_start = __user_addr_min + vring_size(RINGSIZE, ALIGN); | ||
| 550 | __kfree_ignore_end = __kfree_ignore_start + 1; | ||
| 551 | ret = virtqueue_get_buf(vq, &i); | ||
| 552 | if (ret != &err) | ||
| 553 | errx(1, "virtqueue_get_buf: %p", ret); | ||
| 554 | assert(i == 2); | ||
| 555 | |||
| 556 | /* Guest puts in a huge descriptor. */ | ||
| 557 | sg_init_table(guest_sg, RINGSIZE); | ||
| 558 | for (i = 0; i < RINGSIZE; i++) { | ||
| 559 | sg_set_buf(&guest_sg[i], | ||
| 560 | __user_addr_max - USER_MEM/4, USER_MEM/4); | ||
| 561 | } | ||
| 562 | |||
| 563 | /* Fill contents with recognisable garbage. */ | ||
| 564 | for (i = 0; i < USER_MEM/4; i++) | ||
| 565 | ((char *)__user_addr_max - USER_MEM/4)[i] = i; | ||
| 566 | |||
| 567 | /* This will allocate an indirect, so force it to allocate user addr */ | ||
| 568 | __kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN); | ||
| 569 | err = virtqueue_add_outbuf(vq, guest_sg, RINGSIZE, &err, GFP_KERNEL); | ||
| 570 | if (err) | ||
| 571 | errx(1, "virtqueue_add_outbuf (large): %i", err); | ||
| 572 | __kmalloc_fake = NULL; | ||
| 573 | |||
| 574 | /* Host picks it up (allocates new iov). */ | ||
| 575 | vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov)); | ||
| 576 | vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov)); | ||
| 577 | |||
| 578 | err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head); | ||
| 579 | if (err != 1) | ||
| 580 | errx(1, "vringh_getdesc_user: %i", err); | ||
| 581 | |||
| 582 | assert(riov.max_num & VRINGH_IOV_ALLOCATED); | ||
| 583 | assert(riov.iov != host_riov); | ||
| 584 | if (getrange != getrange_slow) | ||
| 585 | assert(riov.used == RINGSIZE); | ||
| 586 | else | ||
| 587 | assert(riov.used == RINGSIZE * USER_MEM/4); | ||
| 588 | |||
| 589 | assert(!(wiov.max_num & VRINGH_IOV_ALLOCATED)); | ||
| 590 | assert(wiov.used == 0); | ||
| 591 | |||
| 592 | /* Pull data back out (in odd chunks), should be as expected. */ | ||
| 593 | for (i = 0; i < RINGSIZE * USER_MEM/4; i += 3) { | ||
| 594 | err = vringh_iov_pull_user(&riov, buf, 3); | ||
| 595 | if (err != 3 && i + err != RINGSIZE * USER_MEM/4) | ||
| 596 | errx(1, "vringh_iov_pull_user large: %i", err); | ||
| 597 | assert(buf[0] == (char)i); | ||
| 598 | assert(err < 2 || buf[1] == (char)(i + 1)); | ||
| 599 | assert(err < 3 || buf[2] == (char)(i + 2)); | ||
| 600 | } | ||
| 601 | assert(riov.i == riov.used); | ||
| 602 | vringh_iov_cleanup(&riov); | ||
| 603 | vringh_iov_cleanup(&wiov); | ||
| 604 | |||
| 605 | /* Complete using multi interface, just because we can. */ | ||
| 606 | used[0].id = head; | ||
| 607 | used[0].len = 0; | ||
| 608 | err = vringh_complete_multi_user(&vrh, used, 1); | ||
| 609 | if (err) | ||
| 610 | errx(1, "vringh_complete_multi_user(1): %i", err); | ||
| 611 | |||
| 612 | /* Free up those descriptors. */ | ||
| 613 | ret = virtqueue_get_buf(vq, &i); | ||
| 614 | if (ret != &err) | ||
| 615 | errx(1, "virtqueue_get_buf: %p", ret); | ||
| 616 | |||
| 617 | /* Add lots of descriptors. */ | ||
| 618 | sg_init_table(guest_sg, 1); | ||
| 619 | sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1); | ||
| 620 | for (i = 0; i < RINGSIZE; i++) { | ||
| 621 | err = virtqueue_add_outbuf(vq, guest_sg, 1, &err, GFP_KERNEL); | ||
| 622 | if (err) | ||
| 623 | errx(1, "virtqueue_add_outbuf (multiple): %i", err); | ||
| 624 | } | ||
| 625 | |||
| 626 | /* Now get many, and consume them all at once. */ | ||
| 627 | vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov)); | ||
| 628 | vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov)); | ||
| 629 | |||
| 630 | for (i = 0; i < RINGSIZE; i++) { | ||
| 631 | err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head); | ||
| 632 | if (err != 1) | ||
| 633 | errx(1, "vringh_getdesc_user: %i", err); | ||
| 634 | used[i].id = head; | ||
| 635 | used[i].len = 0; | ||
| 636 | } | ||
| 637 | /* Make sure it wraps around ring, to test! */ | ||
| 638 | assert(vrh.vring.used->idx % RINGSIZE != 0); | ||
| 639 | err = vringh_complete_multi_user(&vrh, used, RINGSIZE); | ||
| 640 | if (err) | ||
| 641 | errx(1, "vringh_complete_multi_user: %i", err); | ||
| 642 | |||
| 643 | /* Free those buffers. */ | ||
| 644 | for (i = 0; i < RINGSIZE; i++) { | ||
| 645 | unsigned len; | ||
| 646 | assert(virtqueue_get_buf(vq, &len) != NULL); | ||
| 647 | } | ||
| 648 | |||
| 649 | /* Test weird (but legal!) indirect. */ | ||
| 650 | if (vdev.features[0] & (1 << VIRTIO_RING_F_INDIRECT_DESC)) { | ||
| 651 | char *data = __user_addr_max - USER_MEM/4; | ||
| 652 | struct vring_desc *d = __user_addr_max - USER_MEM/2; | ||
| 653 | struct vring vring; | ||
| 654 | |||
| 655 | /* Force creation of direct, which we modify. */ | ||
| 656 | vdev.features[0] &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); | ||
| 657 | vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, | ||
| 658 | __user_addr_min, | ||
| 659 | never_notify_host, | ||
| 660 | never_callback_guest, | ||
| 661 | "guest vq"); | ||
| 662 | |||
| 663 | sg_init_table(guest_sg, 4); | ||
| 664 | sg_set_buf(&guest_sg[0], d, sizeof(*d)*2); | ||
| 665 | sg_set_buf(&guest_sg[1], d + 2, sizeof(*d)*1); | ||
| 666 | sg_set_buf(&guest_sg[2], data + 6, 4); | ||
| 667 | sg_set_buf(&guest_sg[3], d + 3, sizeof(*d)*3); | ||
| 668 | |||
| 669 | err = virtqueue_add_outbuf(vq, guest_sg, 4, &err, GFP_KERNEL); | ||
| 670 | if (err) | ||
| 671 | errx(1, "virtqueue_add_outbuf (indirect): %i", err); | ||
| 672 | |||
| 673 | vring_init(&vring, RINGSIZE, __user_addr_min, ALIGN); | ||
| 674 | |||
| 675 | /* They're used in order, but double-check... */ | ||
| 676 | assert(vring.desc[0].addr == (unsigned long)d); | ||
| 677 | assert(vring.desc[1].addr == (unsigned long)(d+2)); | ||
| 678 | assert(vring.desc[2].addr == (unsigned long)data + 6); | ||
| 679 | assert(vring.desc[3].addr == (unsigned long)(d+3)); | ||
| 680 | vring.desc[0].flags |= VRING_DESC_F_INDIRECT; | ||
| 681 | vring.desc[1].flags |= VRING_DESC_F_INDIRECT; | ||
| 682 | vring.desc[3].flags |= VRING_DESC_F_INDIRECT; | ||
| 683 | |||
| 684 | /* First indirect */ | ||
| 685 | d[0].addr = (unsigned long)data; | ||
| 686 | d[0].len = 1; | ||
| 687 | d[0].flags = VRING_DESC_F_NEXT; | ||
| 688 | d[0].next = 1; | ||
| 689 | d[1].addr = (unsigned long)data + 1; | ||
| 690 | d[1].len = 2; | ||
| 691 | d[1].flags = 0; | ||
| 692 | |||
| 693 | /* Second indirect */ | ||
| 694 | d[2].addr = (unsigned long)data + 3; | ||
| 695 | d[2].len = 3; | ||
| 696 | d[2].flags = 0; | ||
| 697 | |||
| 698 | /* Third indirect */ | ||
| 699 | d[3].addr = (unsigned long)data + 10; | ||
| 700 | d[3].len = 5; | ||
| 701 | d[3].flags = VRING_DESC_F_NEXT; | ||
| 702 | d[3].next = 1; | ||
| 703 | d[4].addr = (unsigned long)data + 15; | ||
| 704 | d[4].len = 6; | ||
| 705 | d[4].flags = VRING_DESC_F_NEXT; | ||
| 706 | d[4].next = 2; | ||
| 707 | d[5].addr = (unsigned long)data + 21; | ||
| 708 | d[5].len = 7; | ||
| 709 | d[5].flags = 0; | ||
| 710 | |||
| 711 | /* Host picks it up (allocates new iov). */ | ||
| 712 | vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov)); | ||
| 713 | vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov)); | ||
| 714 | |||
| 715 | err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head); | ||
| 716 | if (err != 1) | ||
| 717 | errx(1, "vringh_getdesc_user: %i", err); | ||
| 718 | |||
| 719 | if (head != 0) | ||
| 720 | errx(1, "vringh_getdesc_user: head %i not 0", head); | ||
| 721 | |||
| 722 | assert(riov.max_num & VRINGH_IOV_ALLOCATED); | ||
| 723 | if (getrange != getrange_slow) | ||
| 724 | assert(riov.used == 7); | ||
| 725 | else | ||
| 726 | assert(riov.used == 28); | ||
| 727 | err = vringh_iov_pull_user(&riov, buf, 29); | ||
| 728 | assert(err == 28); | ||
| 729 | |||
| 730 | /* Data should be linear. */ | ||
| 731 | for (i = 0; i < err; i++) | ||
| 732 | assert(buf[i] == i); | ||
| 733 | vringh_iov_cleanup(&riov); | ||
| 734 | } | ||
| 735 | |||
| 736 | /* Don't leak memory... */ | ||
| 737 | vring_del_virtqueue(vq); | ||
| 738 | free(__user_addr_min); | ||
| 739 | |||
| 740 | return 0; | ||
| 741 | } | ||
