aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-01 17:08:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-01 17:08:52 -0400
commit73287a43cc79ca06629a88d1a199cd283f42456a (patch)
treeacf4456e260115bea77ee31a29f10ce17f0db45c /tools
parent251df49db3327c64bf917bfdba94491fde2b4ee0 (diff)
parent20074f357da4a637430aec2879c9d864c5d2c23c (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights (1721 non-merge commits, this has to be a record of some sort): 1) Add 'random' mode to team driver, from Jiri Pirko and Eric Dumazet. 2) Make it so that any driver that supports configuration of multiple MAC addresses can provide the forwarding database add and del calls by providing a default implementation and hooking that up if the driver doesn't have an explicit set of handlers. From Vlad Yasevich. 3) Support GSO segmentation over tunnels and other encapsulating devices such as VXLAN, from Pravin B Shelar. 4) Support L2 GRE tunnels in the flow dissector, from Michael Dalton. 5) Implement Tail Loss Probe (TLP) detection in TCP, from Nandita Dukkipati. 6) In the PHY layer, allow supporting wake-on-lan in situations where the PHY registers have to be written for it to be configured. Use it to support wake-on-lan in mv643xx_eth. From Michael Stapelberg. 7) Significantly improve firewire IPV6 support, from YOSHIFUJI Hideaki. 8) Allow multiple packets to be sent in a single transmission using network coding in batman-adv, from Martin Hundebøll. 9) Add support for T5 cxgb4 chips, from Santosh Rastapur. 10) Generalize the VXLAN forwarding tables so that there is more flexibility in configurating various aspects of the endpoints. From David Stevens. 11) Support RSS and TSO in hardware over GRE tunnels in bxn2x driver, from Dmitry Kravkov. 12) Zero copy support in nfnelink_queue, from Eric Dumazet and Pablo Neira Ayuso. 13) Start adding networking selftests. 14) In situations of overload on the same AF_PACKET fanout socket, or per-cpu packet receive queue, minimize drop by distributing the load to other cpus/fanouts. From Willem de Bruijn and Eric Dumazet. 15) Add support for new payload offset BPF instruction, from Daniel Borkmann. 16) Convert several drivers over to mdoule_platform_driver(), from Sachin Kamat. 17) Provide a minimal BPF JIT image disassembler userspace tool, from Daniel Borkmann. 18) Rewrite F-RTO implementation in TCP to match the final specification of it in RFC4138 and RFC5682. From Yuchung Cheng. 19) Provide netlink socket diag of netlink sockets ("Yo dawg, I hear you like netlink, so I implemented netlink dumping of netlink sockets.") From Andrey Vagin. 20) Remove ugly passing of rtnetlink attributes into rtnl_doit functions, from Thomas Graf. 21) Allow userspace to be able to see if a configuration change occurs in the middle of an address or device list dump, from Nicolas Dichtel. 22) Support RFC3168 ECN protection for ipv6 fragments, from Hannes Frederic Sowa. 23) Increase accuracy of packet length used by packet scheduler, from Jason Wang. 24) Beginning set of changes to make ipv4/ipv6 fragment handling more scalable and less susceptible to overload and locking contention, from Jesper Dangaard Brouer. 25) Get rid of using non-type-safe NLMSG_* macros and use nlmsg_*() instead. From Hong Zhiguo. 26) Optimize route usage in IPVS by avoiding reference counting where possible, from Julian Anastasov. 27) Convert IPVS schedulers to RCU, also from Julian Anastasov. 28) Support cpu fanouts in xt_NFQUEUE netfilter target, from Holger Eitzenberger. 29) Network namespace support for nf_log, ebt_log, xt_LOG, ipt_ULOG, nfnetlink_log, and nfnetlink_queue. From Gao feng. 30) Implement RFC3168 ECN protection, from Hannes Frederic Sowa. 31) Support several new r8169 chips, from Hayes Wang. 32) Support tokenized interface identifiers in ipv6, from Daniel Borkmann. 33) Use usbnet_link_change() helper in USB net driver, from Ming Lei. 34) Add 802.1ad vlan offload support, from Patrick McHardy. 35) Support mmap() based netlink communication, also from Patrick McHardy. 36) Support HW timestamping in mlx4 driver, from Amir Vadai. 37) Rationalize AF_PACKET packet timestamping when transmitting, from Willem de Bruijn and Daniel Borkmann. 38) Bring parity to what's provided by /proc/net/packet socket dumping and the info provided by netlink socket dumping of AF_PACKET sockets. From Nicolas Dichtel. 39) Fix peeking beyond zero sized SKBs in AF_UNIX, from Benjamin Poirier" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1722 commits) filter: fix va_list build error af_unix: fix a fatal race with bit fields bnx2x: Prevent memory leak when cnic is absent bnx2x: correct reading of speed capabilities net: sctp: attribute printl with __printf for gcc fmt checks netlink: kconfig: move mmap i/o into netlink kconfig netpoll: convert mutex into a semaphore netlink: Fix skb ref counting. net_sched: act_ipt forward compat with xtables mlx4_en: fix a build error on 32bit arches Revert "bnx2x: allow nvram test to run when device is down" bridge: avoid OOPS if root port not found drivers: net: cpsw: fix kernel warn on cpsw irq enable sh_eth: use random MAC address if no valid one supplied 3c509.c: call SET_NETDEV_DEV for all device types (ISA/ISAPnP/EISA) tg3: fix to append hardware time stamping flags unix/stream: fix peeking with an offset larger than data in queue unix/dgram: fix peeking with an offset larger than data in queue unix/dgram: peek beyond 0-sized skbs openvswitch: Remove unneeded ovs_netdev_get_ifindex() ...
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile11
-rw-r--r--tools/net/Makefile15
-rw-r--r--tools/net/bpf_jit_disasm.c199
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/net/.gitignore3
-rw-r--r--tools/testing/selftests/net/Makefile19
-rw-r--r--tools/testing/selftests/net/psock_fanout.c312
-rw-r--r--tools/testing/selftests/net/psock_lib.h127
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c824
-rw-r--r--tools/testing/selftests/net/run_afpackettests26
-rw-r--r--tools/testing/selftests/net/run_netsocktests12
-rw-r--r--tools/testing/selftests/net/socket.c92
12 files changed, 1636 insertions, 5 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 6aaeb6cd867d..41067f304215 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -12,6 +12,7 @@ help:
12 @echo ' turbostat - Intel CPU idle stats and freq reporting tool' 12 @echo ' turbostat - Intel CPU idle stats and freq reporting tool'
13 @echo ' usb - USB testing tools' 13 @echo ' usb - USB testing tools'
14 @echo ' virtio - vhost test module' 14 @echo ' virtio - vhost test module'
15 @echo ' net - misc networking tools'
15 @echo ' vm - misc vm tools' 16 @echo ' vm - misc vm tools'
16 @echo ' x86_energy_perf_policy - Intel energy policy tool' 17 @echo ' x86_energy_perf_policy - Intel energy policy tool'
17 @echo '' 18 @echo ''
@@ -34,7 +35,7 @@ help:
34cpupower: FORCE 35cpupower: FORCE
35 $(call descend,power/$@) 36 $(call descend,power/$@)
36 37
37cgroup firewire guest usb virtio vm: FORCE 38cgroup firewire guest usb virtio vm net: FORCE
38 $(call descend,$@) 39 $(call descend,$@)
39 40
40liblk: FORCE 41liblk: FORCE
@@ -52,7 +53,7 @@ turbostat x86_energy_perf_policy: FORCE
52cpupower_install: 53cpupower_install:
53 $(call descend,power/$(@:_install=),install) 54 $(call descend,power/$(@:_install=),install)
54 55
55cgroup_install firewire_install lguest_install perf_install usb_install virtio_install vm_install: 56cgroup_install firewire_install lguest_install perf_install usb_install virtio_install vm_install net_install:
56 $(call descend,$(@:_install=),install) 57 $(call descend,$(@:_install=),install)
57 58
58selftests_install: 59selftests_install:
@@ -63,12 +64,12 @@ turbostat_install x86_energy_perf_policy_install:
63 64
64install: cgroup_install cpupower_install firewire_install lguest_install \ 65install: cgroup_install cpupower_install firewire_install lguest_install \
65 perf_install selftests_install turbostat_install usb_install \ 66 perf_install selftests_install turbostat_install usb_install \
66 virtio_install vm_install x86_energy_perf_policy_install 67 virtio_install vm_install net_install x86_energy_perf_policy_install
67 68
68cpupower_clean: 69cpupower_clean:
69 $(call descend,power/cpupower,clean) 70 $(call descend,power/cpupower,clean)
70 71
71cgroup_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clean: 72cgroup_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clean net_clean:
72 $(call descend,$(@:_clean=),clean) 73 $(call descend,$(@:_clean=),clean)
73 74
74liblk_clean: 75liblk_clean:
@@ -85,6 +86,6 @@ turbostat_clean x86_energy_perf_policy_clean:
85 86
86clean: cgroup_clean cpupower_clean firewire_clean lguest_clean perf_clean \ 87clean: cgroup_clean cpupower_clean firewire_clean lguest_clean perf_clean \
87 selftests_clean turbostat_clean usb_clean virtio_clean \ 88 selftests_clean turbostat_clean usb_clean virtio_clean \
88 vm_clean x86_energy_perf_policy_clean 89 vm_clean net_clean x86_energy_perf_policy_clean
89 90
90.PHONY: FORCE 91.PHONY: FORCE
diff --git a/tools/net/Makefile b/tools/net/Makefile
new file mode 100644
index 000000000000..b4444d53b73f
--- /dev/null
+++ b/tools/net/Makefile
@@ -0,0 +1,15 @@
1prefix = /usr
2
3CC = gcc
4
5all : bpf_jit_disasm
6
7bpf_jit_disasm : CFLAGS = -Wall -O2
8bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
9bpf_jit_disasm : bpf_jit_disasm.o
10
11clean :
12 rm -rf *.o bpf_jit_disasm
13
14install :
15 install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm
diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c
new file mode 100644
index 000000000000..cfe0cdcda3de
--- /dev/null
+++ b/tools/net/bpf_jit_disasm.c
@@ -0,0 +1,199 @@
1/*
2 * Minimal BPF JIT image disassembler
3 *
4 * Disassembles BPF JIT compiler emitted opcodes back to asm insn's for
5 * debugging or verification purposes.
6 *
7 * To get the disassembly of the JIT code, do the following:
8 *
9 * 1) `echo 2 > /proc/sys/net/core/bpf_jit_enable`
10 * 2) Load a BPF filter (e.g. `tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24`)
11 * 3) Run e.g. `bpf_jit_disasm -o` to read out the last JIT code
12 *
13 * Copyright 2013 Daniel Borkmann <borkmann@redhat.com>
14 * Licensed under the GNU General Public License, version 2.0 (GPLv2)
15 */
16
17#include <stdint.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <assert.h>
21#include <unistd.h>
22#include <string.h>
23#include <bfd.h>
24#include <dis-asm.h>
25#include <sys/klog.h>
26#include <sys/types.h>
27#include <regex.h>
28
29static void get_exec_path(char *tpath, size_t size)
30{
31 char *path;
32 ssize_t len;
33
34 snprintf(tpath, size, "/proc/%d/exe", (int) getpid());
35 tpath[size - 1] = 0;
36
37 path = strdup(tpath);
38 assert(path);
39
40 len = readlink(path, tpath, size);
41 tpath[len] = 0;
42
43 free(path);
44}
45
46static void get_asm_insns(uint8_t *image, size_t len, unsigned long base,
47 int opcodes)
48{
49 int count, i, pc = 0;
50 char tpath[256];
51 struct disassemble_info info;
52 disassembler_ftype disassemble;
53 bfd *bfdf;
54
55 memset(tpath, 0, sizeof(tpath));
56 get_exec_path(tpath, sizeof(tpath));
57
58 bfdf = bfd_openr(tpath, NULL);
59 assert(bfdf);
60 assert(bfd_check_format(bfdf, bfd_object));
61
62 init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf);
63 info.arch = bfd_get_arch(bfdf);
64 info.mach = bfd_get_mach(bfdf);
65 info.buffer = image;
66 info.buffer_length = len;
67
68 disassemble_init_for_target(&info);
69
70 disassemble = disassembler(bfdf);
71 assert(disassemble);
72
73 do {
74 printf("%4x:\t", pc);
75
76 count = disassemble(pc, &info);
77
78 if (opcodes) {
79 printf("\n\t");
80 for (i = 0; i < count; ++i)
81 printf("%02x ", (uint8_t) image[pc + i]);
82 }
83 printf("\n");
84
85 pc += count;
86 } while(count > 0 && pc < len);
87
88 bfd_close(bfdf);
89}
90
91static char *get_klog_buff(int *klen)
92{
93 int ret, len = klogctl(10, NULL, 0);
94 char *buff = malloc(len);
95
96 assert(buff && klen);
97 ret = klogctl(3, buff, len);
98 assert(ret >= 0);
99 *klen = ret;
100
101 return buff;
102}
103
104static void put_klog_buff(char *buff)
105{
106 free(buff);
107}
108
109static int get_last_jit_image(char *haystack, size_t hlen,
110 uint8_t *image, size_t ilen,
111 unsigned long *base)
112{
113 char *ptr, *pptr, *tmp;
114 off_t off = 0;
115 int ret, flen, proglen, pass, ulen = 0;
116 regmatch_t pmatch[1];
117 regex_t regex;
118
119 if (hlen == 0)
120 return 0;
121
122 ret = regcomp(&regex, "flen=[[:alnum:]]+ proglen=[[:digit:]]+ "
123 "pass=[[:digit:]]+ image=[[:xdigit:]]+", REG_EXTENDED);
124 assert(ret == 0);
125
126 ptr = haystack;
127 while (1) {
128 ret = regexec(&regex, ptr, 1, pmatch, 0);
129 if (ret == 0) {
130 ptr += pmatch[0].rm_eo;
131 off += pmatch[0].rm_eo;
132 assert(off < hlen);
133 } else
134 break;
135 }
136
137 ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so);
138 ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx",
139 &flen, &proglen, &pass, base);
140 if (ret != 4)
141 return 0;
142
143 tmp = ptr = haystack + off;
144 while ((ptr = strtok(tmp, "\n")) != NULL && ulen < ilen) {
145 tmp = NULL;
146 if (!strstr(ptr, "JIT code"))
147 continue;
148 pptr = ptr;
149 while ((ptr = strstr(pptr, ":")))
150 pptr = ptr + 1;
151 ptr = pptr;
152 do {
153 image[ulen++] = (uint8_t) strtoul(pptr, &pptr, 16);
154 if (ptr == pptr || ulen >= ilen) {
155 ulen--;
156 break;
157 }
158 ptr = pptr;
159 } while (1);
160 }
161
162 assert(ulen == proglen);
163 printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n",
164 proglen, pass, flen);
165 printf("%lx + <x>:\n", *base);
166
167 regfree(&regex);
168 return ulen;
169}
170
171int main(int argc, char **argv)
172{
173 int len, klen, opcodes = 0;
174 char *kbuff;
175 unsigned long base;
176 uint8_t image[4096];
177
178 if (argc > 1) {
179 if (!strncmp("-o", argv[argc - 1], 2)) {
180 opcodes = 1;
181 } else {
182 printf("usage: bpf_jit_disasm [-o: show opcodes]\n");
183 exit(0);
184 }
185 }
186
187 bfd_init();
188 memset(image, 0, sizeof(image));
189
190 kbuff = get_klog_buff(&klen);
191
192 len = get_last_jit_image(kbuff, klen, image, sizeof(image), &base);
193 if (len > 0 && base > 0)
194 get_asm_insns(image, len, base, opcodes);
195
196 put_klog_buff(kbuff);
197
198 return 0;
199}
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index fa6ea69f2e48..d4abc59ce1d9 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -4,6 +4,7 @@ TARGETS += efivarfs
4TARGETS += kcmp 4TARGETS += kcmp
5TARGETS += memory-hotplug 5TARGETS += memory-hotplug
6TARGETS += mqueue 6TARGETS += mqueue
7TARGETS += net
7TARGETS += ptrace 8TARGETS += ptrace
8TARGETS += soft-dirty 9TARGETS += soft-dirty
9TARGETS += vm 10TARGETS += vm
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
new file mode 100644
index 000000000000..00326629d4af
--- /dev/null
+++ b/tools/testing/selftests/net/.gitignore
@@ -0,0 +1,3 @@
1socket
2psock_fanout
3psock_tpacket
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
new file mode 100644
index 000000000000..750512ba2c88
--- /dev/null
+++ b/tools/testing/selftests/net/Makefile
@@ -0,0 +1,19 @@
1# Makefile for net selftests
2
3CC = $(CROSS_COMPILE)gcc
4CFLAGS = -Wall -O2 -g
5
6CFLAGS += -I../../../../usr/include/
7
8NET_PROGS = socket psock_fanout psock_tpacket
9
10all: $(NET_PROGS)
11%: %.c
12 $(CC) $(CFLAGS) -o $@ $^
13
14run_tests: all
15 @/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]"
16 @/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]"
17
18clean:
19 $(RM) $(NET_PROGS)
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
new file mode 100644
index 000000000000..57b9c2b7c4ff
--- /dev/null
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -0,0 +1,312 @@
1/*
2 * Copyright 2013 Google Inc.
3 * Author: Willem de Bruijn (willemb@google.com)
4 *
5 * A basic test of packet socket fanout behavior.
6 *
7 * Control:
8 * - create fanout fails as expected with illegal flag combinations
9 * - join fanout fails as expected with diverging types or flags
10 *
11 * Datapath:
12 * Open a pair of packet sockets and a pair of INET sockets, send a known
13 * number of packets across the two INET sockets and count the number of
14 * packets enqueued onto the two packet sockets.
15 *
16 * The test currently runs for
17 * - PACKET_FANOUT_HASH
18 * - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER
19 * - PACKET_FANOUT_LB
20 * - PACKET_FANOUT_CPU
21 * - PACKET_FANOUT_ROLLOVER
22 *
23 * Todo:
24 * - functionality: PACKET_FANOUT_FLAG_DEFRAG
25 *
26 * License (GPLv2):
27 *
28 * This program is free software; you can redistribute it and/or modify it
29 * under the terms and conditions of the GNU General Public License,
30 * version 2, as published by the Free Software Foundation.
31 *
32 * This program is distributed in the hope it will be useful, but WITHOUT
33 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
34 * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
35 * more details.
36 *
37 * You should have received a copy of the GNU General Public License along with
38 * this program; if not, write to the Free Software Foundation, Inc.,
39 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
40 */
41
42#define _GNU_SOURCE /* for sched_setaffinity */
43
44#include <arpa/inet.h>
45#include <errno.h>
46#include <fcntl.h>
47#include <linux/filter.h>
48#include <linux/if_packet.h>
49#include <net/ethernet.h>
50#include <netinet/ip.h>
51#include <netinet/udp.h>
52#include <poll.h>
53#include <sched.h>
54#include <stdint.h>
55#include <stdio.h>
56#include <stdlib.h>
57#include <string.h>
58#include <sys/mman.h>
59#include <sys/socket.h>
60#include <sys/stat.h>
61#include <sys/types.h>
62#include <unistd.h>
63
64#include "psock_lib.h"
65
66#define RING_NUM_FRAMES 20
67
68/* Open a socket in a given fanout mode.
69 * @return -1 if mode is bad, a valid socket otherwise */
70static int sock_fanout_open(uint16_t typeflags, int num_packets)
71{
72 int fd, val;
73
74 fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP));
75 if (fd < 0) {
76 perror("socket packet");
77 exit(1);
78 }
79
80 /* fanout group ID is always 0: tests whether old groups are deleted */
81 val = ((int) typeflags) << 16;
82 if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
83 if (close(fd)) {
84 perror("close packet");
85 exit(1);
86 }
87 return -1;
88 }
89
90 pair_udp_setfilter(fd);
91 return fd;
92}
93
94static char *sock_fanout_open_ring(int fd)
95{
96 struct tpacket_req req = {
97 .tp_block_size = getpagesize(),
98 .tp_frame_size = getpagesize(),
99 .tp_block_nr = RING_NUM_FRAMES,
100 .tp_frame_nr = RING_NUM_FRAMES,
101 };
102 char *ring;
103 int val = TPACKET_V2;
104
105 if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val,
106 sizeof(val))) {
107 perror("packetsock ring setsockopt version");
108 exit(1);
109 }
110 if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req,
111 sizeof(req))) {
112 perror("packetsock ring setsockopt");
113 exit(1);
114 }
115
116 ring = mmap(0, req.tp_block_size * req.tp_block_nr,
117 PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
118 if (!ring) {
119 fprintf(stderr, "packetsock ring mmap\n");
120 exit(1);
121 }
122
123 return ring;
124}
125
126static int sock_fanout_read_ring(int fd, void *ring)
127{
128 struct tpacket2_hdr *header = ring;
129 int count = 0;
130
131 while (header->tp_status & TP_STATUS_USER && count < RING_NUM_FRAMES) {
132 count++;
133 header = ring + (count * getpagesize());
134 }
135
136 return count;
137}
138
139static int sock_fanout_read(int fds[], char *rings[], const int expect[])
140{
141 int ret[2];
142
143 ret[0] = sock_fanout_read_ring(fds[0], rings[0]);
144 ret[1] = sock_fanout_read_ring(fds[1], rings[1]);
145
146 fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n",
147 ret[0], ret[1], expect[0], expect[1]);
148
149 if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
150 (!(ret[0] == expect[1] && ret[1] == expect[0]))) {
151 fprintf(stderr, "ERROR: incorrect queue lengths\n");
152 return 1;
153 }
154
155 return 0;
156}
157
158/* Test illegal mode + flag combination */
159static void test_control_single(void)
160{
161 fprintf(stderr, "test: control single socket\n");
162
163 if (sock_fanout_open(PACKET_FANOUT_ROLLOVER |
164 PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
165 fprintf(stderr, "ERROR: opened socket with dual rollover\n");
166 exit(1);
167 }
168}
169
170/* Test illegal group with different modes or flags */
171static void test_control_group(void)
172{
173 int fds[2];
174
175 fprintf(stderr, "test: control multiple sockets\n");
176
177 fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 20);
178 if (fds[0] == -1) {
179 fprintf(stderr, "ERROR: failed to open HASH socket\n");
180 exit(1);
181 }
182 if (sock_fanout_open(PACKET_FANOUT_HASH |
183 PACKET_FANOUT_FLAG_DEFRAG, 10) != -1) {
184 fprintf(stderr, "ERROR: joined group with wrong flag defrag\n");
185 exit(1);
186 }
187 if (sock_fanout_open(PACKET_FANOUT_HASH |
188 PACKET_FANOUT_FLAG_ROLLOVER, 10) != -1) {
189 fprintf(stderr, "ERROR: joined group with wrong flag ro\n");
190 exit(1);
191 }
192 if (sock_fanout_open(PACKET_FANOUT_CPU, 10) != -1) {
193 fprintf(stderr, "ERROR: joined group with wrong mode\n");
194 exit(1);
195 }
196 fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 20);
197 if (fds[1] == -1) {
198 fprintf(stderr, "ERROR: failed to join group\n");
199 exit(1);
200 }
201 if (close(fds[1]) || close(fds[0])) {
202 fprintf(stderr, "ERROR: closing sockets\n");
203 exit(1);
204 }
205}
206
207static int test_datapath(uint16_t typeflags, int port_off,
208 const int expect1[], const int expect2[])
209{
210 const int expect0[] = { 0, 0 };
211 char *rings[2];
212 int fds[2], fds_udp[2][2], ret;
213
214 fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
215
216 fds[0] = sock_fanout_open(typeflags, 20);
217 fds[1] = sock_fanout_open(typeflags, 20);
218 if (fds[0] == -1 || fds[1] == -1) {
219 fprintf(stderr, "ERROR: failed open\n");
220 exit(1);
221 }
222 rings[0] = sock_fanout_open_ring(fds[0]);
223 rings[1] = sock_fanout_open_ring(fds[1]);
224 pair_udp_open(fds_udp[0], PORT_BASE);
225 pair_udp_open(fds_udp[1], PORT_BASE + port_off);
226 sock_fanout_read(fds, rings, expect0);
227
228 /* Send data, but not enough to overflow a queue */
229 pair_udp_send(fds_udp[0], 15);
230 pair_udp_send(fds_udp[1], 5);
231 ret = sock_fanout_read(fds, rings, expect1);
232
233 /* Send more data, overflow the queue */
234 pair_udp_send(fds_udp[0], 15);
235 /* TODO: ensure consistent order between expect1 and expect2 */
236 ret |= sock_fanout_read(fds, rings, expect2);
237
238 if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) ||
239 munmap(rings[0], RING_NUM_FRAMES * getpagesize())) {
240 fprintf(stderr, "close rings\n");
241 exit(1);
242 }
243 if (close(fds_udp[1][1]) || close(fds_udp[1][0]) ||
244 close(fds_udp[0][1]) || close(fds_udp[0][0]) ||
245 close(fds[1]) || close(fds[0])) {
246 fprintf(stderr, "close datapath\n");
247 exit(1);
248 }
249
250 return ret;
251}
252
253static int set_cpuaffinity(int cpuid)
254{
255 cpu_set_t mask;
256
257 CPU_ZERO(&mask);
258 CPU_SET(cpuid, &mask);
259 if (sched_setaffinity(0, sizeof(mask), &mask)) {
260 if (errno != EINVAL) {
261 fprintf(stderr, "setaffinity %d\n", cpuid);
262 exit(1);
263 }
264 return 1;
265 }
266
267 return 0;
268}
269
270int main(int argc, char **argv)
271{
272 const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } };
273 const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } };
274 const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } };
275 const int expect_rb[2][2] = { { 20, 0 }, { 20, 15 } };
276 const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } };
277 const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } };
278 int port_off = 2, tries = 5, ret;
279
280 test_control_single();
281 test_control_group();
282
283 /* find a set of ports that do not collide onto the same socket */
284 ret = test_datapath(PACKET_FANOUT_HASH, port_off,
285 expect_hash[0], expect_hash[1]);
286 while (ret && tries--) {
287 fprintf(stderr, "info: trying alternate ports (%d)\n", tries);
288 ret = test_datapath(PACKET_FANOUT_HASH, ++port_off,
289 expect_hash[0], expect_hash[1]);
290 }
291
292 ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
293 port_off, expect_hash_rb[0], expect_hash_rb[1]);
294 ret |= test_datapath(PACKET_FANOUT_LB,
295 port_off, expect_lb[0], expect_lb[1]);
296 ret |= test_datapath(PACKET_FANOUT_ROLLOVER,
297 port_off, expect_rb[0], expect_rb[1]);
298
299 set_cpuaffinity(0);
300 ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
301 expect_cpu0[0], expect_cpu0[1]);
302 if (!set_cpuaffinity(1))
303 /* TODO: test that choice alternates with previous */
304 ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
305 expect_cpu1[0], expect_cpu1[1]);
306
307 if (ret)
308 return 1;
309
310 printf("OK. All tests passed\n");
311 return 0;
312}
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
new file mode 100644
index 000000000000..37da54ac85a9
--- /dev/null
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -0,0 +1,127 @@
1/*
2 * Copyright 2013 Google Inc.
3 * Author: Willem de Bruijn <willemb@google.com>
4 * Daniel Borkmann <dborkman@redhat.com>
5 *
6 * License (GPLv2):
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#ifndef PSOCK_LIB_H
23#define PSOCK_LIB_H
24
25#include <sys/types.h>
26#include <sys/socket.h>
27#include <string.h>
28#include <arpa/inet.h>
29#include <unistd.h>
30
31#define DATA_LEN 100
32#define DATA_CHAR 'a'
33
34#define PORT_BASE 8000
35
36#ifndef __maybe_unused
37# define __maybe_unused __attribute__ ((__unused__))
38#endif
39
40static __maybe_unused void pair_udp_setfilter(int fd)
41{
42 struct sock_filter bpf_filter[] = {
43 { 0x80, 0, 0, 0x00000000 }, /* LD pktlen */
44 { 0x35, 0, 5, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/
45 { 0x30, 0, 0, 0x00000050 }, /* LD ip[80] */
46 { 0x15, 0, 3, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/
47 { 0x30, 0, 0, 0x00000051 }, /* LD ip[81] */
48 { 0x15, 0, 1, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/
49 { 0x06, 0, 0, 0x00000060 }, /* RET match */
50 { 0x06, 0, 0, 0x00000000 }, /* RET no match */
51 };
52 struct sock_fprog bpf_prog;
53
54 bpf_prog.filter = bpf_filter;
55 bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
56 if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
57 sizeof(bpf_prog))) {
58 perror("setsockopt SO_ATTACH_FILTER");
59 exit(1);
60 }
61}
62
63static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
64{
65 struct sockaddr_in saddr, daddr;
66
67 fds[0] = socket(PF_INET, SOCK_DGRAM, 0);
68 fds[1] = socket(PF_INET, SOCK_DGRAM, 0);
69 if (fds[0] == -1 || fds[1] == -1) {
70 fprintf(stderr, "ERROR: socket dgram\n");
71 exit(1);
72 }
73
74 memset(&saddr, 0, sizeof(saddr));
75 saddr.sin_family = AF_INET;
76 saddr.sin_port = htons(port);
77 saddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
78
79 memset(&daddr, 0, sizeof(daddr));
80 daddr.sin_family = AF_INET;
81 daddr.sin_port = htons(port + 1);
82 daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
83
84 /* must bind both to get consistent hash result */
85 if (bind(fds[1], (void *) &daddr, sizeof(daddr))) {
86 perror("bind");
87 exit(1);
88 }
89 if (bind(fds[0], (void *) &saddr, sizeof(saddr))) {
90 perror("bind");
91 exit(1);
92 }
93 if (connect(fds[0], (void *) &daddr, sizeof(daddr))) {
94 perror("connect");
95 exit(1);
96 }
97}
98
99static __maybe_unused void pair_udp_send(int fds[], int num)
100{
101 char buf[DATA_LEN], rbuf[DATA_LEN];
102
103 memset(buf, DATA_CHAR, sizeof(buf));
104 while (num--) {
105 /* Should really handle EINTR and EAGAIN */
106 if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) {
107 fprintf(stderr, "ERROR: send failed left=%d\n", num);
108 exit(1);
109 }
110 if (read(fds[1], rbuf, sizeof(rbuf)) != sizeof(rbuf)) {
111 fprintf(stderr, "ERROR: recv failed left=%d\n", num);
112 exit(1);
113 }
114 if (memcmp(buf, rbuf, sizeof(buf))) {
115 fprintf(stderr, "ERROR: data failed left=%d\n", num);
116 exit(1);
117 }
118 }
119}
120
121static __maybe_unused void pair_udp_close(int fds[])
122{
123 close(fds[0]);
124 close(fds[1]);
125}
126
127#endif /* PSOCK_LIB_H */
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
new file mode 100644
index 000000000000..c41b58640a05
--- /dev/null
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -0,0 +1,824 @@
1/*
2 * Copyright 2013 Red Hat, Inc.
3 * Author: Daniel Borkmann <dborkman@redhat.com>
4 *
5 * A basic test of packet socket's TPACKET_V1/TPACKET_V2/TPACKET_V3 behavior.
6 *
7 * Control:
8 * Test the setup of the TPACKET socket with different patterns that are
9 * known to fail (TODO) resp. succeed (OK).
10 *
11 * Datapath:
12 * Open a pair of packet sockets and send resp. receive an a priori known
13 * packet pattern accross the sockets and check if it was received resp.
14 * sent correctly. Fanout in combination with RX_RING is currently not
15 * tested here.
16 *
17 * The test currently runs for
18 * - TPACKET_V1: RX_RING, TX_RING
19 * - TPACKET_V2: RX_RING, TX_RING
20 * - TPACKET_V3: RX_RING
21 *
22 * License (GPLv2):
23 *
24 * This program is free software; you can redistribute it and/or modify it
25 * under the terms and conditions of the GNU General Public License,
26 * version 2, as published by the Free Software Foundation.
27 *
28 * This program is distributed in the hope it will be useful, but WITHOUT
29 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
30 * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
31 * more details.
32 *
33 * You should have received a copy of the GNU General Public License along with
34 * this program; if not, write to the Free Software Foundation, Inc.,
35 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
36 */
37
38#include <stdio.h>
39#include <stdlib.h>
40#include <sys/types.h>
41#include <sys/stat.h>
42#include <sys/socket.h>
43#include <sys/mman.h>
44#include <linux/if_packet.h>
45#include <linux/filter.h>
46#include <ctype.h>
47#include <fcntl.h>
48#include <unistd.h>
49#include <bits/wordsize.h>
50#include <net/ethernet.h>
51#include <netinet/ip.h>
52#include <arpa/inet.h>
53#include <stdint.h>
54#include <string.h>
55#include <assert.h>
56#include <net/if.h>
57#include <inttypes.h>
58#include <poll.h>
59
60#include "psock_lib.h"
61
62#ifndef bug_on
63# define bug_on(cond) assert(!(cond))
64#endif
65
66#ifndef __aligned_tpacket
67# define __aligned_tpacket __attribute__((aligned(TPACKET_ALIGNMENT)))
68#endif
69
70#ifndef __align_tpacket
71# define __align_tpacket(x) __attribute__((aligned(TPACKET_ALIGN(x))))
72#endif
73
74#define BLOCK_STATUS(x) ((x)->h1.block_status)
75#define BLOCK_NUM_PKTS(x) ((x)->h1.num_pkts)
76#define BLOCK_O2FP(x) ((x)->h1.offset_to_first_pkt)
77#define BLOCK_LEN(x) ((x)->h1.blk_len)
78#define BLOCK_SNUM(x) ((x)->h1.seq_num)
79#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
80#define BLOCK_PRIV(x) ((void *) ((uint8_t *) (x) + BLOCK_O2PRIV(x)))
81#define BLOCK_HDR_LEN (ALIGN_8(sizeof(struct block_desc)))
82#define ALIGN_8(x) (((x) + 8 - 1) & ~(8 - 1))
83#define BLOCK_PLUS_PRIV(sz_pri) (BLOCK_HDR_LEN + ALIGN_8((sz_pri)))
84
85#define NUM_PACKETS 100
86
87struct ring {
88 struct iovec *rd;
89 uint8_t *mm_space;
90 size_t mm_len, rd_len;
91 struct sockaddr_ll ll;
92 void (*walk)(int sock, struct ring *ring);
93 int type, rd_num, flen, version;
94 union {
95 struct tpacket_req req;
96 struct tpacket_req3 req3;
97 };
98};
99
100struct block_desc {
101 uint32_t version;
102 uint32_t offset_to_priv;
103 struct tpacket_hdr_v1 h1;
104};
105
106union frame_map {
107 struct {
108 struct tpacket_hdr tp_h __aligned_tpacket;
109 struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket_hdr));
110 } *v1;
111 struct {
112 struct tpacket2_hdr tp_h __aligned_tpacket;
113 struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr));
114 } *v2;
115 void *raw;
116};
117
118static unsigned int total_packets, total_bytes;
119
120static int pfsocket(int ver)
121{
122 int ret, sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
123 if (sock == -1) {
124 perror("socket");
125 exit(1);
126 }
127
128 ret = setsockopt(sock, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver));
129 if (ret == -1) {
130 perror("setsockopt");
131 exit(1);
132 }
133
134 return sock;
135}
136
137static void status_bar_update(void)
138{
139 if (total_packets % 10 == 0) {
140 fprintf(stderr, ".");
141 fflush(stderr);
142 }
143}
144
145static void test_payload(void *pay, size_t len)
146{
147 struct ethhdr *eth = pay;
148
149 if (len < sizeof(struct ethhdr)) {
150 fprintf(stderr, "test_payload: packet too "
151 "small: %zu bytes!\n", len);
152 exit(1);
153 }
154
155 if (eth->h_proto != htons(ETH_P_IP)) {
156 fprintf(stderr, "test_payload: wrong ethernet "
157 "type: 0x%x!\n", ntohs(eth->h_proto));
158 exit(1);
159 }
160}
161
162static void create_payload(void *pay, size_t *len)
163{
164 int i;
165 struct ethhdr *eth = pay;
166 struct iphdr *ip = pay + sizeof(*eth);
167
168 /* Lets create some broken crap, that still passes
169 * our BPF filter.
170 */
171
172 *len = DATA_LEN + 42;
173
174 memset(pay, 0xff, ETH_ALEN * 2);
175 eth->h_proto = htons(ETH_P_IP);
176
177 for (i = 0; i < sizeof(*ip); ++i)
178 ((uint8_t *) pay)[i + sizeof(*eth)] = (uint8_t) rand();
179
180 ip->ihl = 5;
181 ip->version = 4;
182 ip->protocol = 0x11;
183 ip->frag_off = 0;
184 ip->ttl = 64;
185 ip->tot_len = htons((uint16_t) *len - sizeof(*eth));
186
187 ip->saddr = htonl(INADDR_LOOPBACK);
188 ip->daddr = htonl(INADDR_LOOPBACK);
189
190 memset(pay + sizeof(*eth) + sizeof(*ip),
191 DATA_CHAR, DATA_LEN);
192}
193
194static inline int __v1_rx_kernel_ready(struct tpacket_hdr *hdr)
195{
196 return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
197}
198
199static inline void __v1_rx_user_ready(struct tpacket_hdr *hdr)
200{
201 hdr->tp_status = TP_STATUS_KERNEL;
202 __sync_synchronize();
203}
204
205static inline int __v2_rx_kernel_ready(struct tpacket2_hdr *hdr)
206{
207 return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
208}
209
210static inline void __v2_rx_user_ready(struct tpacket2_hdr *hdr)
211{
212 hdr->tp_status = TP_STATUS_KERNEL;
213 __sync_synchronize();
214}
215
216static inline int __v1_v2_rx_kernel_ready(void *base, int version)
217{
218 switch (version) {
219 case TPACKET_V1:
220 return __v1_rx_kernel_ready(base);
221 case TPACKET_V2:
222 return __v2_rx_kernel_ready(base);
223 default:
224 bug_on(1);
225 return 0;
226 }
227}
228
229static inline void __v1_v2_rx_user_ready(void *base, int version)
230{
231 switch (version) {
232 case TPACKET_V1:
233 __v1_rx_user_ready(base);
234 break;
235 case TPACKET_V2:
236 __v2_rx_user_ready(base);
237 break;
238 }
239}
240
241static void walk_v1_v2_rx(int sock, struct ring *ring)
242{
243 struct pollfd pfd;
244 int udp_sock[2];
245 union frame_map ppd;
246 unsigned int frame_num = 0;
247
248 bug_on(ring->type != PACKET_RX_RING);
249
250 pair_udp_open(udp_sock, PORT_BASE);
251 pair_udp_setfilter(sock);
252
253 memset(&pfd, 0, sizeof(pfd));
254 pfd.fd = sock;
255 pfd.events = POLLIN | POLLERR;
256 pfd.revents = 0;
257
258 pair_udp_send(udp_sock, NUM_PACKETS);
259
260 while (total_packets < NUM_PACKETS * 2) {
261 while (__v1_v2_rx_kernel_ready(ring->rd[frame_num].iov_base,
262 ring->version)) {
263 ppd.raw = ring->rd[frame_num].iov_base;
264
265 switch (ring->version) {
266 case TPACKET_V1:
267 test_payload((uint8_t *) ppd.raw + ppd.v1->tp_h.tp_mac,
268 ppd.v1->tp_h.tp_snaplen);
269 total_bytes += ppd.v1->tp_h.tp_snaplen;
270 break;
271
272 case TPACKET_V2:
273 test_payload((uint8_t *) ppd.raw + ppd.v2->tp_h.tp_mac,
274 ppd.v2->tp_h.tp_snaplen);
275 total_bytes += ppd.v2->tp_h.tp_snaplen;
276 break;
277 }
278
279 status_bar_update();
280 total_packets++;
281
282 __v1_v2_rx_user_ready(ppd.raw, ring->version);
283
284 frame_num = (frame_num + 1) % ring->rd_num;
285 }
286
287 poll(&pfd, 1, 1);
288 }
289
290 pair_udp_close(udp_sock);
291
292 if (total_packets != 2 * NUM_PACKETS) {
293 fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
294 ring->version, total_packets, NUM_PACKETS);
295 exit(1);
296 }
297
298 fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
299}
300
301static inline int __v1_tx_kernel_ready(struct tpacket_hdr *hdr)
302{
303 return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
304}
305
306static inline void __v1_tx_user_ready(struct tpacket_hdr *hdr)
307{
308 hdr->tp_status = TP_STATUS_SEND_REQUEST;
309 __sync_synchronize();
310}
311
312static inline int __v2_tx_kernel_ready(struct tpacket2_hdr *hdr)
313{
314 return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
315}
316
317static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr)
318{
319 hdr->tp_status = TP_STATUS_SEND_REQUEST;
320 __sync_synchronize();
321}
322
323static inline int __v1_v2_tx_kernel_ready(void *base, int version)
324{
325 switch (version) {
326 case TPACKET_V1:
327 return __v1_tx_kernel_ready(base);
328 case TPACKET_V2:
329 return __v2_tx_kernel_ready(base);
330 default:
331 bug_on(1);
332 return 0;
333 }
334}
335
336static inline void __v1_v2_tx_user_ready(void *base, int version)
337{
338 switch (version) {
339 case TPACKET_V1:
340 __v1_tx_user_ready(base);
341 break;
342 case TPACKET_V2:
343 __v2_tx_user_ready(base);
344 break;
345 }
346}
347
348static void __v1_v2_set_packet_loss_discard(int sock)
349{
350 int ret, discard = 1;
351
352 ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *) &discard,
353 sizeof(discard));
354 if (ret == -1) {
355 perror("setsockopt");
356 exit(1);
357 }
358}
359
360static void walk_v1_v2_tx(int sock, struct ring *ring)
361{
362 struct pollfd pfd;
363 int rcv_sock, ret;
364 size_t packet_len;
365 union frame_map ppd;
366 char packet[1024];
367 unsigned int frame_num = 0, got = 0;
368 struct sockaddr_ll ll = {
369 .sll_family = PF_PACKET,
370 .sll_halen = ETH_ALEN,
371 };
372
373 bug_on(ring->type != PACKET_TX_RING);
374 bug_on(ring->rd_num < NUM_PACKETS);
375
376 rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
377 if (rcv_sock == -1) {
378 perror("socket");
379 exit(1);
380 }
381
382 pair_udp_setfilter(rcv_sock);
383
384 ll.sll_ifindex = if_nametoindex("lo");
385 ret = bind(rcv_sock, (struct sockaddr *) &ll, sizeof(ll));
386 if (ret == -1) {
387 perror("bind");
388 exit(1);
389 }
390
391 memset(&pfd, 0, sizeof(pfd));
392 pfd.fd = sock;
393 pfd.events = POLLOUT | POLLERR;
394 pfd.revents = 0;
395
396 total_packets = NUM_PACKETS;
397 create_payload(packet, &packet_len);
398
399 while (total_packets > 0) {
400 while (__v1_v2_tx_kernel_ready(ring->rd[frame_num].iov_base,
401 ring->version) &&
402 total_packets > 0) {
403 ppd.raw = ring->rd[frame_num].iov_base;
404
405 switch (ring->version) {
406 case TPACKET_V1:
407 ppd.v1->tp_h.tp_snaplen = packet_len;
408 ppd.v1->tp_h.tp_len = packet_len;
409
410 memcpy((uint8_t *) ppd.raw + TPACKET_HDRLEN -
411 sizeof(struct sockaddr_ll), packet,
412 packet_len);
413 total_bytes += ppd.v1->tp_h.tp_snaplen;
414 break;
415
416 case TPACKET_V2:
417 ppd.v2->tp_h.tp_snaplen = packet_len;
418 ppd.v2->tp_h.tp_len = packet_len;
419
420 memcpy((uint8_t *) ppd.raw + TPACKET2_HDRLEN -
421 sizeof(struct sockaddr_ll), packet,
422 packet_len);
423 total_bytes += ppd.v2->tp_h.tp_snaplen;
424 break;
425 }
426
427 status_bar_update();
428 total_packets--;
429
430 __v1_v2_tx_user_ready(ppd.raw, ring->version);
431
432 frame_num = (frame_num + 1) % ring->rd_num;
433 }
434
435 poll(&pfd, 1, 1);
436 }
437
438 bug_on(total_packets != 0);
439
440 ret = sendto(sock, NULL, 0, 0, NULL, 0);
441 if (ret == -1) {
442 perror("sendto");
443 exit(1);
444 }
445
446 while ((ret = recvfrom(rcv_sock, packet, sizeof(packet),
447 0, NULL, NULL)) > 0 &&
448 total_packets < NUM_PACKETS) {
449 got += ret;
450 test_payload(packet, ret);
451
452 status_bar_update();
453 total_packets++;
454 }
455
456 close(rcv_sock);
457
458 if (total_packets != NUM_PACKETS) {
459 fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
460 ring->version, total_packets, NUM_PACKETS);
461 exit(1);
462 }
463
464 fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, got);
465}
466
467static void walk_v1_v2(int sock, struct ring *ring)
468{
469 if (ring->type == PACKET_RX_RING)
470 walk_v1_v2_rx(sock, ring);
471 else
472 walk_v1_v2_tx(sock, ring);
473}
474
475static uint64_t __v3_prev_block_seq_num = 0;
476
477void __v3_test_block_seq_num(struct block_desc *pbd)
478{
479 if (__v3_prev_block_seq_num + 1 != BLOCK_SNUM(pbd)) {
480 fprintf(stderr, "\nprev_block_seq_num:%"PRIu64", expected "
481 "seq:%"PRIu64" != actual seq:%"PRIu64"\n",
482 __v3_prev_block_seq_num, __v3_prev_block_seq_num + 1,
483 (uint64_t) BLOCK_SNUM(pbd));
484 exit(1);
485 }
486
487 __v3_prev_block_seq_num = BLOCK_SNUM(pbd);
488}
489
490static void __v3_test_block_len(struct block_desc *pbd, uint32_t bytes, int block_num)
491{
492 if (BLOCK_NUM_PKTS(pbd)) {
493 if (bytes != BLOCK_LEN(pbd)) {
494 fprintf(stderr, "\nblock:%u with %upackets, expected "
495 "len:%u != actual len:%u\n", block_num,
496 BLOCK_NUM_PKTS(pbd), bytes, BLOCK_LEN(pbd));
497 exit(1);
498 }
499 } else {
500 if (BLOCK_LEN(pbd) != BLOCK_PLUS_PRIV(13)) {
501 fprintf(stderr, "\nblock:%u, expected len:%lu != "
502 "actual len:%u\n", block_num, BLOCK_HDR_LEN,
503 BLOCK_LEN(pbd));
504 exit(1);
505 }
506 }
507}
508
509static void __v3_test_block_header(struct block_desc *pbd, const int block_num)
510{
511 uint32_t block_status = BLOCK_STATUS(pbd);
512
513 if ((block_status & TP_STATUS_USER) == 0) {
514 fprintf(stderr, "\nblock %u: not in TP_STATUS_USER\n", block_num);
515 exit(1);
516 }
517
518 __v3_test_block_seq_num(pbd);
519}
520
521static void __v3_walk_block(struct block_desc *pbd, const int block_num)
522{
523 int num_pkts = BLOCK_NUM_PKTS(pbd), i;
524 unsigned long bytes = 0;
525 unsigned long bytes_with_padding = BLOCK_PLUS_PRIV(13);
526 struct tpacket3_hdr *ppd;
527
528 __v3_test_block_header(pbd, block_num);
529
530 ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd + BLOCK_O2FP(pbd));
531 for (i = 0; i < num_pkts; ++i) {
532 bytes += ppd->tp_snaplen;
533
534 if (ppd->tp_next_offset)
535 bytes_with_padding += ppd->tp_next_offset;
536 else
537 bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac);
538
539 test_payload((uint8_t *) ppd + ppd->tp_mac, ppd->tp_snaplen);
540
541 status_bar_update();
542 total_packets++;
543
544 ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset);
545 __sync_synchronize();
546 }
547
548 __v3_test_block_len(pbd, bytes_with_padding, block_num);
549 total_bytes += bytes;
550}
551
552void __v3_flush_block(struct block_desc *pbd)
553{
554 BLOCK_STATUS(pbd) = TP_STATUS_KERNEL;
555 __sync_synchronize();
556}
557
558static void walk_v3_rx(int sock, struct ring *ring)
559{
560 unsigned int block_num = 0;
561 struct pollfd pfd;
562 struct block_desc *pbd;
563 int udp_sock[2];
564
565 bug_on(ring->type != PACKET_RX_RING);
566
567 pair_udp_open(udp_sock, PORT_BASE);
568 pair_udp_setfilter(sock);
569
570 memset(&pfd, 0, sizeof(pfd));
571 pfd.fd = sock;
572 pfd.events = POLLIN | POLLERR;
573 pfd.revents = 0;
574
575 pair_udp_send(udp_sock, NUM_PACKETS);
576
577 while (total_packets < NUM_PACKETS * 2) {
578 pbd = (struct block_desc *) ring->rd[block_num].iov_base;
579
580 while ((BLOCK_STATUS(pbd) & TP_STATUS_USER) == 0)
581 poll(&pfd, 1, 1);
582
583 __v3_walk_block(pbd, block_num);
584 __v3_flush_block(pbd);
585
586 block_num = (block_num + 1) % ring->rd_num;
587 }
588
589 pair_udp_close(udp_sock);
590
591 if (total_packets != 2 * NUM_PACKETS) {
592 fprintf(stderr, "walk_v3_rx: received %u out of %u pkts\n",
593 total_packets, NUM_PACKETS);
594 exit(1);
595 }
596
597 fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
598}
599
600static void walk_v3(int sock, struct ring *ring)
601{
602 if (ring->type == PACKET_RX_RING)
603 walk_v3_rx(sock, ring);
604 else
605 bug_on(1);
606}
607
608static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
609{
610 ring->req.tp_block_size = getpagesize() << 2;
611 ring->req.tp_frame_size = TPACKET_ALIGNMENT << 7;
612 ring->req.tp_block_nr = blocks;
613
614 ring->req.tp_frame_nr = ring->req.tp_block_size /
615 ring->req.tp_frame_size *
616 ring->req.tp_block_nr;
617
618 ring->mm_len = ring->req.tp_block_size * ring->req.tp_block_nr;
619 ring->walk = walk_v1_v2;
620 ring->rd_num = ring->req.tp_frame_nr;
621 ring->flen = ring->req.tp_frame_size;
622}
623
624static void __v3_fill(struct ring *ring, unsigned int blocks)
625{
626 ring->req3.tp_retire_blk_tov = 64;
627 ring->req3.tp_sizeof_priv = 13;
628 ring->req3.tp_feature_req_word |= TP_FT_REQ_FILL_RXHASH;
629
630 ring->req3.tp_block_size = getpagesize() << 2;
631 ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7;
632 ring->req3.tp_block_nr = blocks;
633
634 ring->req3.tp_frame_nr = ring->req3.tp_block_size /
635 ring->req3.tp_frame_size *
636 ring->req3.tp_block_nr;
637
638 ring->mm_len = ring->req3.tp_block_size * ring->req3.tp_block_nr;
639 ring->walk = walk_v3;
640 ring->rd_num = ring->req3.tp_block_nr;
641 ring->flen = ring->req3.tp_block_size;
642}
643
644static void setup_ring(int sock, struct ring *ring, int version, int type)
645{
646 int ret = 0;
647 unsigned int blocks = 256;
648
649 ring->type = type;
650 ring->version = version;
651
652 switch (version) {
653 case TPACKET_V1:
654 case TPACKET_V2:
655 if (type == PACKET_TX_RING)
656 __v1_v2_set_packet_loss_discard(sock);
657 __v1_v2_fill(ring, blocks);
658 ret = setsockopt(sock, SOL_PACKET, type, &ring->req,
659 sizeof(ring->req));
660 break;
661
662 case TPACKET_V3:
663 __v3_fill(ring, blocks);
664 ret = setsockopt(sock, SOL_PACKET, type, &ring->req3,
665 sizeof(ring->req3));
666 break;
667 }
668
669 if (ret == -1) {
670 perror("setsockopt");
671 exit(1);
672 }
673
674 ring->rd_len = ring->rd_num * sizeof(*ring->rd);
675 ring->rd = malloc(ring->rd_len);
676 if (ring->rd == NULL) {
677 perror("malloc");
678 exit(1);
679 }
680
681 total_packets = 0;
682 total_bytes = 0;
683}
684
685static void mmap_ring(int sock, struct ring *ring)
686{
687 int i;
688
689 ring->mm_space = mmap(0, ring->mm_len, PROT_READ | PROT_WRITE,
690 MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0);
691 if (ring->mm_space == MAP_FAILED) {
692 perror("mmap");
693 exit(1);
694 }
695
696 memset(ring->rd, 0, ring->rd_len);
697 for (i = 0; i < ring->rd_num; ++i) {
698 ring->rd[i].iov_base = ring->mm_space + (i * ring->flen);
699 ring->rd[i].iov_len = ring->flen;
700 }
701}
702
703static void bind_ring(int sock, struct ring *ring)
704{
705 int ret;
706
707 ring->ll.sll_family = PF_PACKET;
708 ring->ll.sll_protocol = htons(ETH_P_ALL);
709 ring->ll.sll_ifindex = if_nametoindex("lo");
710 ring->ll.sll_hatype = 0;
711 ring->ll.sll_pkttype = 0;
712 ring->ll.sll_halen = 0;
713
714 ret = bind(sock, (struct sockaddr *) &ring->ll, sizeof(ring->ll));
715 if (ret == -1) {
716 perror("bind");
717 exit(1);
718 }
719}
720
721static void walk_ring(int sock, struct ring *ring)
722{
723 ring->walk(sock, ring);
724}
725
726static void unmap_ring(int sock, struct ring *ring)
727{
728 munmap(ring->mm_space, ring->mm_len);
729 free(ring->rd);
730}
731
732static int test_kernel_bit_width(void)
733{
734 char in[512], *ptr;
735 int num = 0, fd;
736 ssize_t ret;
737
738 fd = open("/proc/kallsyms", O_RDONLY);
739 if (fd == -1) {
740 perror("open");
741 exit(1);
742 }
743
744 ret = read(fd, in, sizeof(in));
745 if (ret <= 0) {
746 perror("read");
747 exit(1);
748 }
749
750 close(fd);
751
752 ptr = in;
753 while(!isspace(*ptr)) {
754 num++;
755 ptr++;
756 }
757
758 return num * 4;
759}
760
761static int test_user_bit_width(void)
762{
763 return __WORDSIZE;
764}
765
766static const char *tpacket_str[] = {
767 [TPACKET_V1] = "TPACKET_V1",
768 [TPACKET_V2] = "TPACKET_V2",
769 [TPACKET_V3] = "TPACKET_V3",
770};
771
772static const char *type_str[] = {
773 [PACKET_RX_RING] = "PACKET_RX_RING",
774 [PACKET_TX_RING] = "PACKET_TX_RING",
775};
776
777static int test_tpacket(int version, int type)
778{
779 int sock;
780 struct ring ring;
781
782 fprintf(stderr, "test: %s with %s ", tpacket_str[version],
783 type_str[type]);
784 fflush(stderr);
785
786 if (version == TPACKET_V1 &&
787 test_kernel_bit_width() != test_user_bit_width()) {
788 fprintf(stderr, "test: skip %s %s since user and kernel "
789 "space have different bit width\n",
790 tpacket_str[version], type_str[type]);
791 return 0;
792 }
793
794 sock = pfsocket(version);
795 memset(&ring, 0, sizeof(ring));
796 setup_ring(sock, &ring, version, type);
797 mmap_ring(sock, &ring);
798 bind_ring(sock, &ring);
799 walk_ring(sock, &ring);
800 unmap_ring(sock, &ring);
801 close(sock);
802
803 fprintf(stderr, "\n");
804 return 0;
805}
806
807int main(void)
808{
809 int ret = 0;
810
811 ret |= test_tpacket(TPACKET_V1, PACKET_RX_RING);
812 ret |= test_tpacket(TPACKET_V1, PACKET_TX_RING);
813
814 ret |= test_tpacket(TPACKET_V2, PACKET_RX_RING);
815 ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING);
816
817 ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING);
818
819 if (ret)
820 return 1;
821
822 printf("OK. All tests passed\n");
823 return 0;
824}
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
new file mode 100644
index 000000000000..5246e782d6e8
--- /dev/null
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -0,0 +1,26 @@
1#!/bin/sh
2
3if [ $(id -u) != 0 ]; then
4 echo $msg must be run as root >&2
5 exit 0
6fi
7
8echo "--------------------"
9echo "running psock_fanout test"
10echo "--------------------"
11./psock_fanout
12if [ $? -ne 0 ]; then
13 echo "[FAIL]"
14else
15 echo "[PASS]"
16fi
17
18echo "--------------------"
19echo "running psock_tpacket test"
20echo "--------------------"
21./psock_tpacket
22if [ $? -ne 0 ]; then
23 echo "[FAIL]"
24else
25 echo "[PASS]"
26fi
diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests
new file mode 100644
index 000000000000..c09a682df56a
--- /dev/null
+++ b/tools/testing/selftests/net/run_netsocktests
@@ -0,0 +1,12 @@
1#!/bin/bash
2
3echo "--------------------"
4echo "running socket test"
5echo "--------------------"
6./socket
7if [ $? -ne 0 ]; then
8 echo "[FAIL]"
9else
10 echo "[PASS]"
11fi
12
diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c
new file mode 100644
index 000000000000..0f227f2f9be9
--- /dev/null
+++ b/tools/testing/selftests/net/socket.c
@@ -0,0 +1,92 @@
1#include <stdio.h>
2#include <errno.h>
3#include <unistd.h>
4#include <string.h>
5#include <sys/types.h>
6#include <sys/socket.h>
7#include <netinet/in.h>
8
9struct socket_testcase {
10 int domain;
11 int type;
12 int protocol;
13
14 /* 0 = valid file descriptor
15 * -foo = error foo
16 */
17 int expect;
18
19 /* If non-zero, accept EAFNOSUPPORT to handle the case
20 * of the protocol not being configured into the kernel.
21 */
22 int nosupport_ok;
23};
24
25static struct socket_testcase tests[] = {
26 { AF_MAX, 0, 0, -EAFNOSUPPORT, 0 },
27 { AF_INET, SOCK_STREAM, IPPROTO_TCP, 0, 1 },
28 { AF_INET, SOCK_DGRAM, IPPROTO_TCP, -EPROTONOSUPPORT, 1 },
29 { AF_INET, SOCK_DGRAM, IPPROTO_UDP, 0, 1 },
30 { AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1 },
31};
32
33#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
34#define ERR_STRING_SZ 64
35
36static int run_tests(void)
37{
38 char err_string1[ERR_STRING_SZ];
39 char err_string2[ERR_STRING_SZ];
40 int i, err;
41
42 err = 0;
43 for (i = 0; i < ARRAY_SIZE(tests); i++) {
44 struct socket_testcase *s = &tests[i];
45 int fd;
46
47 fd = socket(s->domain, s->type, s->protocol);
48 if (fd < 0) {
49 if (s->nosupport_ok &&
50 errno == EAFNOSUPPORT)
51 continue;
52
53 if (s->expect < 0 &&
54 errno == -s->expect)
55 continue;
56
57 strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
58 strerror_r(errno, err_string2, ERR_STRING_SZ);
59
60 fprintf(stderr, "socket(%d, %d, %d) expected "
61 "err (%s) got (%s)\n",
62 s->domain, s->type, s->protocol,
63 err_string1, err_string2);
64
65 err = -1;
66 break;
67 } else {
68 close(fd);
69
70 if (s->expect < 0) {
71 strerror_r(errno, err_string1, ERR_STRING_SZ);
72
73 fprintf(stderr, "socket(%d, %d, %d) expected "
74 "success got err (%s)\n",
75 s->domain, s->type, s->protocol,
76 err_string1);
77
78 err = -1;
79 break;
80 }
81 }
82 }
83
84 return err;
85}
86
87int main(void)
88{
89 int err = run_tests();
90
91 return err;
92}