aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/networking
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/networking')
-rw-r--r--Documentation/networking/timestamping.txt368
-rw-r--r--Documentation/networking/timestamping/Makefile10
-rw-r--r--Documentation/networking/timestamping/txtimestamp.c470
3 files changed, 764 insertions, 84 deletions
diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index 897f942b976b..412f45ca2d73 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -1,102 +1,307 @@
1The existing interfaces for getting network packages time stamped are: 1
21. Control Interfaces
3
4The interfaces for receiving network packages timestamps are:
2 5
3* SO_TIMESTAMP 6* SO_TIMESTAMP
4 Generate time stamp for each incoming packet using the (not necessarily 7 Generates a timestamp for each incoming packet in (not necessarily
5 monotonous!) system time. Result is returned via recv_msg() in a 8 monotonic) system time. Reports the timestamp via recvmsg() in a
6 control message as timeval (usec resolution). 9 control message as struct timeval (usec resolution).
7 10
8* SO_TIMESTAMPNS 11* SO_TIMESTAMPNS
9 Same time stamping mechanism as SO_TIMESTAMP, but returns result as 12 Same timestamping mechanism as SO_TIMESTAMP, but reports the
10 timespec (nsec resolution). 13 timestamp as struct timespec (nsec resolution).
11 14
12* IP_MULTICAST_LOOP + SO_TIMESTAMP[NS] 15* IP_MULTICAST_LOOP + SO_TIMESTAMP[NS]
13 Only for multicasts: approximate send time stamp by receiving the looped 16 Only for multicast:approximate transmit timestamp obtained by
14 packet and using its receive time stamp. 17 reading the looped packet receive timestamp.
15 18
16The following interface complements the existing ones: receive time 19* SO_TIMESTAMPING
17stamps can be generated and returned for arbitrary packets and much 20 Generates timestamps on reception, transmission or both. Supports
18closer to the point where the packet is really sent. Time stamps can 21 multiple timestamp sources, including hardware. Supports generating
19be generated in software (as before) or in hardware (if the hardware 22 timestamps for stream sockets.
20has such a feature).
21 23
22SO_TIMESTAMPING:
23 24
24Instructs the socket layer which kind of information should be collected 251.1 SO_TIMESTAMP:
25and/or reported. The parameter is an integer with some of the following
26bits set. Setting other bits is an error and doesn't change the current
27state.
28 26
29Four of the bits are requests to the stack to try to generate 27This socket option enables timestamping of datagrams on the reception
30timestamps. Any combination of them is valid. 28path. Because the destination socket, if any, is not known early in
29the network stack, the feature has to be enabled for all packets. The
30same is true for all early receive timestamp options.
31 31
32SOF_TIMESTAMPING_TX_HARDWARE: try to obtain send time stamps in hardware 32For interface details, see `man 7 socket`.
33SOF_TIMESTAMPING_TX_SOFTWARE: try to obtain send time stamps in software 33
34SOF_TIMESTAMPING_RX_HARDWARE: try to obtain receive time stamps in hardware 34
35SOF_TIMESTAMPING_RX_SOFTWARE: try to obtain receive time stamps in software 351.2 SO_TIMESTAMPNS:
36
37This option is identical to SO_TIMESTAMP except for the returned data type.
38Its struct timespec allows for higher resolution (ns) timestamps than the
39timeval of SO_TIMESTAMP (ms).
40
41
421.3 SO_TIMESTAMPING:
43
44Supports multiple types of timestamp requests. As a result, this
45socket option takes a bitmap of flags, not a boolean. In
46
47 err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (void *) val, &val);
48
49val is an integer with any of the following bits set. Setting other
50bit returns EINVAL and does not change the current state.
36 51
37The other three bits control which timestamps will be reported in a
38generated control message. If none of these bits are set or if none of
39the set bits correspond to data that is available, then the control
40message will not be generated:
41 52
42SOF_TIMESTAMPING_SOFTWARE: report systime if available 531.3.1 Timestamp Generation
43SOF_TIMESTAMPING_SYS_HARDWARE: report hwtimetrans if available (deprecated)
44SOF_TIMESTAMPING_RAW_HARDWARE: report hwtimeraw if available
45 54
46It is worth noting that timestamps may be collected for reasons other 55Some bits are requests to the stack to try to generate timestamps. Any
47than being requested by a particular socket with 56combination of them is valid. Changes to these bits apply to newly
48SOF_TIMESTAMPING_[TR]X_(HARD|SOFT)WARE. For example, most drivers that 57created packets, not to packets already in the stack. As a result, it
49can generate hardware receive timestamps ignore 58is possible to selectively request timestamps for a subset of packets
50SOF_TIMESTAMPING_RX_HARDWARE. It is still a good idea to set that flag 59(e.g., for sampling) by embedding an send() call within two setsockopt
51in case future drivers pay attention. 60calls, one to enable timestamp generation and one to disable it.
61Timestamps may also be generated for reasons other than being
62requested by a particular socket, such as when receive timestamping is
63enabled system wide, as explained earlier.
52 64
53If timestamps are reported, they will appear in a control message with 65SOF_TIMESTAMPING_RX_HARDWARE:
54cmsg_level==SOL_SOCKET, cmsg_type==SO_TIMESTAMPING, and a payload like 66 Request rx timestamps generated by the network adapter.
55this: 67
68SOF_TIMESTAMPING_RX_SOFTWARE:
69 Request rx timestamps when data enters the kernel. These timestamps
70 are generated just after a device driver hands a packet to the
71 kernel receive stack.
72
73SOF_TIMESTAMPING_TX_HARDWARE:
74 Request tx timestamps generated by the network adapter.
75
76SOF_TIMESTAMPING_TX_SOFTWARE:
77 Request tx timestamps when data leaves the kernel. These timestamps
78 are generated in the device driver as close as possible, but always
79 prior to, passing the packet to the network interface. Hence, they
80 require driver support and may not be available for all devices.
81
82SOF_TIMESTAMPING_TX_SCHED:
83 Request tx timestamps prior to entering the packet scheduler. Kernel
84 transmit latency is, if long, often dominated by queuing delay. The
85 difference between this timestamp and one taken at
86 SOF_TIMESTAMPING_TX_SOFTWARE will expose this latency independent
87 of protocol processing. The latency incurred in protocol
88 processing, if any, can be computed by subtracting a userspace
89 timestamp taken immediately before send() from this timestamp. On
90 machines with virtual devices where a transmitted packet travels
91 through multiple devices and, hence, multiple packet schedulers,
92 a timestamp is generated at each layer. This allows for fine
93 grained measurement of queuing delay.
94
95SOF_TIMESTAMPING_TX_ACK:
96 Request tx timestamps when all data in the send buffer has been
97 acknowledged. This only makes sense for reliable protocols. It is
98 currently only implemented for TCP. For that protocol, it may
99 over-report measurement, because the timestamp is generated when all
100 data up to and including the buffer at send() was acknowledged: the
101 cumulative acknowledgment. The mechanism ignores SACK and FACK.
102
103
1041.3.2 Timestamp Reporting
105
106The other three bits control which timestamps will be reported in a
107generated control message. Changes to the bits take immediate
108effect at the timestamp reporting locations in the stack. Timestamps
109are only reported for packets that also have the relevant timestamp
110generation request set.
111
112SOF_TIMESTAMPING_SOFTWARE:
113 Report any software timestamps when available.
114
115SOF_TIMESTAMPING_SYS_HARDWARE:
116 This option is deprecated and ignored.
117
118SOF_TIMESTAMPING_RAW_HARDWARE:
119 Report hardware timestamps as generated by
120 SOF_TIMESTAMPING_TX_HARDWARE when available.
121
122
1231.3.3 Timestamp Options
124
125The interface supports one option
126
127SOF_TIMESTAMPING_OPT_ID:
128
129 Generate a unique identifier along with each packet. A process can
130 have multiple concurrent timestamping requests outstanding. Packets
131 can be reordered in the transmit path, for instance in the packet
132 scheduler. In that case timestamps will be queued onto the error
133 queue out of order from the original send() calls. This option
134 embeds a counter that is incremented at send() time, to order
135 timestamps within a flow.
136
137 This option is implemented only for transmit timestamps. There, the
138 timestamp is always looped along with a struct sock_extended_err.
139 The option modifies field ee_info to pass an id that is unique
140 among all possibly concurrently outstanding timestamp requests for
141 that socket. In practice, it is a monotonically increasing u32
142 (that wraps).
143
144 In datagram sockets, the counter increments on each send call. In
145 stream sockets, it increments with every byte.
146
147
1481.4 Bytestream Timestamps
149
150The SO_TIMESTAMPING interface supports timestamping of bytes in a
151bytestream. Each request is interpreted as a request for when the
152entire contents of the buffer has passed a timestamping point. That
153is, for streams option SOF_TIMESTAMPING_TX_SOFTWARE will record
154when all bytes have reached the device driver, regardless of how
155many packets the data has been converted into.
156
157In general, bytestreams have no natural delimiters and therefore
158correlating a timestamp with data is non-trivial. A range of bytes
159may be split across segments, any segments may be merged (possibly
160coalescing sections of previously segmented buffers associated with
161independent send() calls). Segments can be reordered and the same
162byte range can coexist in multiple segments for protocols that
163implement retransmissions.
164
165It is essential that all timestamps implement the same semantics,
166regardless of these possible transformations, as otherwise they are
167incomparable. Handling "rare" corner cases differently from the
168simple case (a 1:1 mapping from buffer to skb) is insufficient
169because performance debugging often needs to focus on such outliers.
170
171In practice, timestamps can be correlated with segments of a
172bytestream consistently, if both semantics of the timestamp and the
173timing of measurement are chosen correctly. This challenge is no
174different from deciding on a strategy for IP fragmentation. There, the
175definition is that only the first fragment is timestamped. For
176bytestreams, we chose that a timestamp is generated only when all
177bytes have passed a point. SOF_TIMESTAMPING_TX_ACK as defined is easy to
178implement and reason about. An implementation that has to take into
179account SACK would be more complex due to possible transmission holes
180and out of order arrival.
181
182On the host, TCP can also break the simple 1:1 mapping from buffer to
183skbuff as a result of Nagle, cork, autocork, segmentation and GSO. The
184implementation ensures correctness in all cases by tracking the
185individual last byte passed to send(), even if it is no longer the
186last byte after an skbuff extend or merge operation. It stores the
187relevant sequence number in skb_shinfo(skb)->tskey. Because an skbuff
188has only one such field, only one timestamp can be generated.
189
190In rare cases, a timestamp request can be missed if two requests are
191collapsed onto the same skb. A process can detect this situation by
192enabling SOF_TIMESTAMPING_OPT_ID and comparing the byte offset at
193send time with the value returned for each timestamp. It can prevent
194the situation by always flushing the TCP stack in between requests,
195for instance by enabling TCP_NODELAY and disabling TCP_CORK and
196autocork.
197
198These precautions ensure that the timestamp is generated only when all
199bytes have passed a timestamp point, assuming that the network stack
200itself does not reorder the segments. The stack indeed tries to avoid
201reordering. The one exception is under administrator control: it is
202possible to construct a packet scheduler configuration that delays
203segments from the same stream differently. Such a setup would be
204unusual.
205
206
2072 Data Interfaces
208
209Timestamps are read using the ancillary data feature of recvmsg().
210See `man 3 cmsg` for details of this interface. The socket manual
211page (`man 7 socket`) describes how timestamps generated with
212SO_TIMESTAMP and SO_TIMESTAMPNS records can be retrieved.
213
214
2152.1 SCM_TIMESTAMPING records
216
217These timestamps are returned in a control message with cmsg_level
218SOL_SOCKET, cmsg_type SCM_TIMESTAMPING, and payload of type
56 219
57struct scm_timestamping { 220struct scm_timestamping {
58 struct timespec systime; 221 struct timespec ts[3];
59 struct timespec hwtimetrans;
60 struct timespec hwtimeraw;
61}; 222};
62 223
63recvmsg() can be used to get this control message for regular incoming 224The structure can return up to three timestamps. This is a legacy
64packets. For send time stamps the outgoing packet is looped back to 225feature. Only one field is non-zero at any time. Most timestamps
65the socket's error queue with the send time stamp(s) attached. It can 226are passed in ts[0]. Hardware timestamps are passed in ts[2].
66be received with recvmsg(flags=MSG_ERRQUEUE). The call returns the 227
67original outgoing packet data including all headers preprended down to 228ts[1] used to hold hardware timestamps converted to system time.
68and including the link layer, the scm_timestamping control message and 229Instead, expose the hardware clock device on the NIC directly as
69a sock_extended_err control message with ee_errno==ENOMSG and 230a HW PTP clock source, to allow time conversion in userspace and
70ee_origin==SO_EE_ORIGIN_TIMESTAMPING. A socket with such a pending 231optionally synchronize system time with a userspace PTP stack such
71bounced packet is ready for reading as far as select() is concerned. 232as linuxptp. For the PTP clock API, see Documentation/ptp/ptp.txt.
72If the outgoing packet has to be fragmented, then only the first 233
73fragment is time stamped and returned to the sending socket. 2342.1.1 Transmit timestamps with MSG_ERRQUEUE
74 235
75All three values correspond to the same event in time, but were 236For transmit timestamps the outgoing packet is looped back to the
76generated in different ways. Each of these values may be empty (= all 237socket's error queue with the send timestamp(s) attached. A process
77zero), in which case no such value was available. If the application 238receives the timestamps by calling recvmsg() with flag MSG_ERRQUEUE
78is not interested in some of these values, they can be left blank to 239set and with a msg_control buffer sufficiently large to receive the
79avoid the potential overhead of calculating them. 240relevant metadata structures. The recvmsg call returns the original
80 241outgoing data packet with two ancillary messages attached.
81systime is the value of the system time at that moment. This 242
82corresponds to the value also returned via SO_TIMESTAMP[NS]. If the 243A message of cm_level SOL_IP(V6) and cm_type IP(V6)_RECVERR
83time stamp was generated by hardware, then this field is 244embeds a struct sock_extended_err. This defines the error type. For
84empty. Otherwise it is filled in if SOF_TIMESTAMPING_SOFTWARE is 245timestamps, the ee_errno field is ENOMSG. The other ancillary message
85set. 246will have cm_level SOL_SOCKET and cm_type SCM_TIMESTAMPING. This
86 247embeds the struct scm_timestamping.
87hwtimeraw is the original hardware time stamp. Filled in if 248
88SOF_TIMESTAMPING_RAW_HARDWARE is set. No assumptions about its 249
89relation to system time should be made. 2502.1.1.2 Timestamp types
90 251
91hwtimetrans is always zero. This field is deprecated. It used to hold 252The semantics of the three struct timespec are defined by field
92hw timestamps converted to system time. Instead, expose the hardware 253ee_info in the extended error structure. It contains a value of
93clock device on the NIC directly as a HW PTP clock source, to allow 254type SCM_TSTAMP_* to define the actual timestamp passed in
94time conversion in userspace and optionally synchronize system time 255scm_timestamping.
95with a userspace PTP stack such as linuxptp. For the PTP clock API, 256
96see Documentation/ptp/ptp.txt. 257The SCM_TSTAMP_* types are 1:1 matches to the SOF_TIMESTAMPING_*
97 258control fields discussed previously, with one exception. For legacy
98 259reasons, SCM_TSTAMP_SND is equal to zero and can be set for both
99SIOCSHWTSTAMP, SIOCGHWTSTAMP: 260SOF_TIMESTAMPING_TX_HARDWARE and SOF_TIMESTAMPING_TX_SOFTWARE. It
261is the first if ts[2] is non-zero, the second otherwise, in which
262case the timestamp is stored in ts[0].
263
264
2652.1.1.3 Fragmentation
266
267Fragmentation of outgoing datagrams is rare, but is possible, e.g., by
268explicitly disabling PMTU discovery. If an outgoing packet is fragmented,
269then only the first fragment is timestamped and returned to the sending
270socket.
271
272
2732.1.1.4 Packet Payload
274
275The calling application is often not interested in receiving the whole
276packet payload that it passed to the stack originally: the socket
277error queue mechanism is just a method to piggyback the timestamp on.
278In this case, the application can choose to read datagrams with a
279smaller buffer, possibly even of length 0. The payload is truncated
280accordingly. Until the process calls recvmsg() on the error queue,
281however, the full packet is queued, taking up budget from SO_RCVBUF.
282
283
2842.1.1.5 Blocking Read
285
286Reading from the error queue is always a non-blocking operation. To
287block waiting on a timestamp, use poll or select. poll() will return
288POLLERR in pollfd.revents if any data is ready on the error queue.
289There is no need to pass this flag in pollfd.events. This flag is
290ignored on request. See also `man 2 poll`.
291
292
2932.1.2 Receive timestamps
294
295On reception, there is no reason to read from the socket error queue.
296The SCM_TIMESTAMPING ancillary data is sent along with the packet data
297on a normal recvmsg(). Since this is not a socket error, it is not
298accompanied by a message SOL_IP(V6)/IP(V6)_RECVERROR. In this case,
299the meaning of the three fields in struct scm_timestamping is
300implicitly defined. ts[0] holds a software timestamp if set, ts[1]
301is again deprecated and ts[2] holds a hardware timestamp if set.
302
303
3043. Hardware Timestamping configuration: SIOCSHWTSTAMP and SIOCGHWTSTAMP
100 305
101Hardware time stamping must also be initialized for each device driver 306Hardware time stamping must also be initialized for each device driver
102that is expected to do hardware time stamping. The parameter is defined in 307that is expected to do hardware time stamping. The parameter is defined in
@@ -167,8 +372,7 @@ enum {
167 */ 372 */
168}; 373};
169 374
170 3753.1 Hardware Timestamping Implementation: Device Drivers
171DEVICE IMPLEMENTATION
172 376
173A driver which supports hardware time stamping must support the 377A driver which supports hardware time stamping must support the
174SIOCSHWTSTAMP ioctl and update the supplied struct hwtstamp_config with 378SIOCSHWTSTAMP ioctl and update the supplied struct hwtstamp_config with
diff --git a/Documentation/networking/timestamping/Makefile b/Documentation/networking/timestamping/Makefile
index d934afc8306a..95e239c70076 100644
--- a/Documentation/networking/timestamping/Makefile
+++ b/Documentation/networking/timestamping/Makefile
@@ -1,14 +1,20 @@
1# To compile, from the source root
2#
3# make headers_install
4# make M=documentation
5
1# kbuild trick to avoid linker error. Can be omitted if a module is built. 6# kbuild trick to avoid linker error. Can be omitted if a module is built.
2obj- := dummy.o 7obj- := dummy.o
3 8
4# List of programs to build 9# List of programs to build
5hostprogs-y := timestamping hwtstamp_config 10hostprogs-y := timestamping txtimestamp hwtstamp_config
6 11
7# Tell kbuild to always build the programs 12# Tell kbuild to always build the programs
8always := $(hostprogs-y) 13always := $(hostprogs-y)
9 14
10HOSTCFLAGS_timestamping.o += -I$(objtree)/usr/include 15HOSTCFLAGS_timestamping.o += -I$(objtree)/usr/include
16HOSTCFLAGS_txtimestamp.o += -I$(objtree)/usr/include
11HOSTCFLAGS_hwtstamp_config.o += -I$(objtree)/usr/include 17HOSTCFLAGS_hwtstamp_config.o += -I$(objtree)/usr/include
12 18
13clean: 19clean:
14 rm -f timestamping hwtstamp_config 20 rm -f timestamping txtimestamp hwtstamp_config
diff --git a/Documentation/networking/timestamping/txtimestamp.c b/Documentation/networking/timestamping/txtimestamp.c
new file mode 100644
index 000000000000..e5b0b98a89af
--- /dev/null
+++ b/Documentation/networking/timestamping/txtimestamp.c
@@ -0,0 +1,470 @@
1/*
2 * Copyright 2014 Google Inc.
3 * Author: willemb@google.com (Willem de Bruijn)
4 *
5 * Test software tx timestamping, including
6 *
7 * - SCHED, SND and ACK timestamps
8 * - RAW, UDP and TCP
9 * - IPv4 and IPv6
10 * - various packet sizes (to test GSO and TSO)
11 *
12 * Consult the command line arguments for help on running
13 * the various testcases.
14 *
15 * This test requires a dummy TCP server.
16 * A simple `nc6 [-u] -l -p $DESTPORT` will do
17 *
18 *
19 * This program is free software; you can redistribute it and/or modify it
20 * under the terms and conditions of the GNU General Public License,
21 * version 2, as published by the Free Software Foundation.
22 *
23 * This program is distributed in the hope it will be useful, but WITHOUT
24 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
25 * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
26 * more details.
27 *
28 * You should have received a copy of the GNU General Public License along with
29 * this program; if not, write to the Free Software Foundation, Inc.,
30 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
31 */
32
33#include <arpa/inet.h>
34#include <asm/types.h>
35#include <error.h>
36#include <errno.h>
37#include <linux/errqueue.h>
38#include <linux/if_ether.h>
39#include <linux/net_tstamp.h>
40#include <netdb.h>
41#include <net/if.h>
42#include <netinet/in.h>
43#include <netinet/ip.h>
44#include <netinet/udp.h>
45#include <netinet/tcp.h>
46#include <netpacket/packet.h>
47#include <poll.h>
48#include <stdarg.h>
49#include <stdint.h>
50#include <stdio.h>
51#include <stdlib.h>
52#include <string.h>
53#include <sys/ioctl.h>
54#include <sys/select.h>
55#include <sys/socket.h>
56#include <sys/time.h>
57#include <sys/types.h>
58#include <time.h>
59#include <unistd.h>
60
61/* command line parameters */
62static int cfg_proto = SOCK_STREAM;
63static int cfg_ipproto = IPPROTO_TCP;
64static int cfg_num_pkts = 4;
65static int do_ipv4 = 1;
66static int do_ipv6 = 1;
67static int cfg_payload_len = 10;
68static uint16_t dest_port = 9000;
69
70static struct sockaddr_in daddr;
71static struct sockaddr_in6 daddr6;
72static struct timespec ts_prev;
73
74static void __print_timestamp(const char *name, struct timespec *cur,
75 uint32_t key, int payload_len)
76{
77 if (!(cur->tv_sec | cur->tv_nsec))
78 return;
79
80 fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)",
81 name, cur->tv_sec, cur->tv_nsec / 1000,
82 key, payload_len);
83
84 if ((ts_prev.tv_sec | ts_prev.tv_nsec)) {
85 int64_t cur_ms, prev_ms;
86
87 cur_ms = (long) cur->tv_sec * 1000 * 1000;
88 cur_ms += cur->tv_nsec / 1000;
89
90 prev_ms = (long) ts_prev.tv_sec * 1000 * 1000;
91 prev_ms += ts_prev.tv_nsec / 1000;
92
93 fprintf(stderr, " (%+ld us)", cur_ms - prev_ms);
94 }
95
96 ts_prev = *cur;
97 fprintf(stderr, "\n");
98}
99
100static void print_timestamp_usr(void)
101{
102 struct timespec ts;
103 struct timeval tv; /* avoid dependency on -lrt */
104
105 gettimeofday(&tv, NULL);
106 ts.tv_sec = tv.tv_sec;
107 ts.tv_nsec = tv.tv_usec * 1000;
108
109 __print_timestamp(" USR", &ts, 0, 0);
110}
111
112static void print_timestamp(struct scm_timestamping *tss, int tstype,
113 int tskey, int payload_len)
114{
115 const char *tsname;
116
117 switch (tstype) {
118 case SCM_TSTAMP_SCHED:
119 tsname = " ENQ";
120 break;
121 case SCM_TSTAMP_SND:
122 tsname = " SND";
123 break;
124 case SCM_TSTAMP_ACK:
125 tsname = " ACK";
126 break;
127 default:
128 error(1, 0, "unknown timestamp type: %u",
129 tstype);
130 }
131 __print_timestamp(tsname, &tss->ts[0], tskey, payload_len);
132}
133
134static void __poll(int fd)
135{
136 struct pollfd pollfd;
137 int ret;
138
139 memset(&pollfd, 0, sizeof(pollfd));
140 pollfd.fd = fd;
141 ret = poll(&pollfd, 1, 100);
142 if (ret != 1)
143 error(1, errno, "poll");
144}
145
146static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
147{
148 struct sock_extended_err *serr = NULL;
149 struct scm_timestamping *tss = NULL;
150 struct cmsghdr *cm;
151
152 for (cm = CMSG_FIRSTHDR(msg);
153 cm && cm->cmsg_len;
154 cm = CMSG_NXTHDR(msg, cm)) {
155 if (cm->cmsg_level == SOL_SOCKET &&
156 cm->cmsg_type == SCM_TIMESTAMPING) {
157 tss = (void *) CMSG_DATA(cm);
158 } else if ((cm->cmsg_level == SOL_IP &&
159 cm->cmsg_type == IP_RECVERR) ||
160 (cm->cmsg_level == SOL_IPV6 &&
161 cm->cmsg_type == IPV6_RECVERR)) {
162
163 serr = (void *) CMSG_DATA(cm);
164 if (serr->ee_errno != ENOMSG ||
165 serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) {
166 fprintf(stderr, "unknown ip error %d %d\n",
167 serr->ee_errno,
168 serr->ee_origin);
169 serr = NULL;
170 }
171 } else
172 fprintf(stderr, "unknown cmsg %d,%d\n",
173 cm->cmsg_level, cm->cmsg_type);
174 }
175
176 if (serr && tss)
177 print_timestamp(tss, serr->ee_info, serr->ee_data, payload_len);
178}
179
180static int recv_errmsg(int fd)
181{
182 static char ctrl[1024 /* overprovision*/];
183 static struct msghdr msg;
184 struct iovec entry;
185 static char *data;
186 int ret = 0;
187
188 data = malloc(cfg_payload_len);
189 if (!data)
190 error(1, 0, "malloc");
191
192 memset(&msg, 0, sizeof(msg));
193 memset(&entry, 0, sizeof(entry));
194 memset(ctrl, 0, sizeof(ctrl));
195 memset(data, 0, sizeof(data));
196
197 entry.iov_base = data;
198 entry.iov_len = cfg_payload_len;
199 msg.msg_iov = &entry;
200 msg.msg_iovlen = 1;
201 msg.msg_name = NULL;
202 msg.msg_namelen = 0;
203 msg.msg_control = ctrl;
204 msg.msg_controllen = sizeof(ctrl);
205
206 ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
207 if (ret == -1 && errno != EAGAIN)
208 error(1, errno, "recvmsg");
209
210 __recv_errmsg_cmsg(&msg, ret);
211
212 free(data);
213 return ret == -1;
214}
215
216static void do_test(int family, unsigned int opt)
217{
218 char *buf;
219 int fd, i, val, total_len;
220
221 if (family == IPPROTO_IPV6 && cfg_proto != SOCK_STREAM) {
222 /* due to lack of checksum generation code */
223 fprintf(stderr, "test: skipping datagram over IPv6\n");
224 return;
225 }
226
227 total_len = cfg_payload_len;
228 if (cfg_proto == SOCK_RAW) {
229 total_len += sizeof(struct udphdr);
230 if (cfg_ipproto == IPPROTO_RAW)
231 total_len += sizeof(struct iphdr);
232 }
233
234 buf = malloc(total_len);
235 if (!buf)
236 error(1, 0, "malloc");
237
238 fd = socket(family, cfg_proto, cfg_ipproto);
239 if (fd < 0)
240 error(1, errno, "socket");
241
242 if (cfg_proto == SOCK_STREAM) {
243 val = 1;
244 if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
245 (char*) &val, sizeof(val)))
246 error(1, 0, "setsockopt no nagle");
247
248 if (family == PF_INET) {
249 if (connect(fd, (void *) &daddr, sizeof(daddr)))
250 error(1, errno, "connect ipv4");
251 } else {
252 if (connect(fd, (void *) &daddr6, sizeof(daddr6)))
253 error(1, errno, "connect ipv6");
254 }
255 }
256
257 opt |= SOF_TIMESTAMPING_SOFTWARE |
258 SOF_TIMESTAMPING_OPT_ID;
259 if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
260 (char *) &opt, sizeof(opt)))
261 error(1, 0, "setsockopt timestamping");
262
263 for (i = 0; i < cfg_num_pkts; i++) {
264 memset(&ts_prev, 0, sizeof(ts_prev));
265 memset(buf, 'a' + i, total_len);
266 buf[total_len - 2] = '\n';
267 buf[total_len - 1] = '\0';
268
269 if (cfg_proto == SOCK_RAW) {
270 struct udphdr *udph;
271 int off = 0;
272
273 if (cfg_ipproto == IPPROTO_RAW) {
274 struct iphdr *iph = (void *) buf;
275
276 memset(iph, 0, sizeof(*iph));
277 iph->ihl = 5;
278 iph->version = 4;
279 iph->ttl = 2;
280 iph->daddr = daddr.sin_addr.s_addr;
281 iph->protocol = IPPROTO_UDP;
282 /* kernel writes saddr, csum, len */
283
284 off = sizeof(*iph);
285 }
286
287 udph = (void *) buf + off;
288 udph->source = ntohs(9000); /* random spoof */
289 udph->dest = ntohs(dest_port);
290 udph->len = ntohs(sizeof(*udph) + cfg_payload_len);
291 udph->check = 0; /* not allowed for IPv6 */
292 }
293
294 print_timestamp_usr();
295 if (cfg_proto != SOCK_STREAM) {
296 if (family == PF_INET)
297 val = sendto(fd, buf, total_len, 0, (void *) &daddr, sizeof(daddr));
298 else
299 val = sendto(fd, buf, total_len, 0, (void *) &daddr6, sizeof(daddr6));
300 } else {
301 val = send(fd, buf, cfg_payload_len, 0);
302 }
303 if (val != total_len)
304 error(1, errno, "send");
305
306 /* wait for all errors to be queued, else ACKs arrive OOO */
307 usleep(50 * 1000);
308
309 __poll(fd);
310
311 while (!recv_errmsg(fd)) {}
312 }
313
314 if (close(fd))
315 error(1, errno, "close");
316
317 free(buf);
318 usleep(400 * 1000);
319}
320
321static void __attribute__((noreturn)) usage(const char *filepath)
322{
323 fprintf(stderr, "\nUsage: %s [options] hostname\n"
324 "\nwhere options are:\n"
325 " -4: only IPv4\n"
326 " -6: only IPv6\n"
327 " -h: show this message\n"
328 " -l N: send N bytes at a time\n"
329 " -r: use raw\n"
330 " -R: use raw (IP_HDRINCL)\n"
331 " -p N: connect to port N\n"
332 " -u: use udp\n",
333 filepath);
334 exit(1);
335}
336
337static void parse_opt(int argc, char **argv)
338{
339 int proto_count = 0;
340 char c;
341
342 while ((c = getopt(argc, argv, "46hl:p:rRu")) != -1) {
343 switch (c) {
344 case '4':
345 do_ipv6 = 0;
346 break;
347 case '6':
348 do_ipv4 = 0;
349 break;
350 case 'r':
351 proto_count++;
352 cfg_proto = SOCK_RAW;
353 cfg_ipproto = IPPROTO_UDP;
354 break;
355 case 'R':
356 proto_count++;
357 cfg_proto = SOCK_RAW;
358 cfg_ipproto = IPPROTO_RAW;
359 break;
360 case 'u':
361 proto_count++;
362 cfg_proto = SOCK_DGRAM;
363 cfg_ipproto = IPPROTO_UDP;
364 break;
365 case 'l':
366 cfg_payload_len = strtoul(optarg, NULL, 10);
367 break;
368 case 'p':
369 dest_port = strtoul(optarg, NULL, 10);
370 break;
371 case 'h':
372 default:
373 usage(argv[0]);
374 }
375 }
376
377 if (!cfg_payload_len)
378 error(1, 0, "payload may not be nonzero");
379 if (cfg_proto != SOCK_STREAM && cfg_payload_len > 1472)
380 error(1, 0, "udp packet might exceed expected MTU");
381 if (!do_ipv4 && !do_ipv6)
382 error(1, 0, "pass -4 or -6, not both");
383 if (proto_count > 1)
384 error(1, 0, "pass -r, -R or -u, not multiple");
385
386 if (optind != argc - 1)
387 error(1, 0, "missing required hostname argument");
388}
389
390static void resolve_hostname(const char *hostname)
391{
392 struct addrinfo *addrs, *cur;
393 int have_ipv4 = 0, have_ipv6 = 0;
394
395 if (getaddrinfo(hostname, NULL, NULL, &addrs))
396 error(1, errno, "getaddrinfo");
397
398 cur = addrs;
399 while (cur && !have_ipv4 && !have_ipv6) {
400 if (!have_ipv4 && cur->ai_family == AF_INET) {
401 memcpy(&daddr, cur->ai_addr, sizeof(daddr));
402 daddr.sin_port = htons(dest_port);
403 have_ipv4 = 1;
404 }
405 else if (!have_ipv6 && cur->ai_family == AF_INET6) {
406 memcpy(&daddr6, cur->ai_addr, sizeof(daddr6));
407 daddr6.sin6_port = htons(dest_port);
408 have_ipv6 = 1;
409 }
410 cur = cur->ai_next;
411 }
412 if (addrs)
413 freeaddrinfo(addrs);
414
415 do_ipv4 &= have_ipv4;
416 do_ipv6 &= have_ipv6;
417}
418
419static void do_main(int family)
420{
421 fprintf(stderr, "family: %s\n",
422 family == PF_INET ? "INET" : "INET6");
423
424 fprintf(stderr, "test SND\n");
425 do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE);
426
427 fprintf(stderr, "test ENQ\n");
428 do_test(family, SOF_TIMESTAMPING_TX_SCHED);
429
430 fprintf(stderr, "test ENQ + SND\n");
431 do_test(family, SOF_TIMESTAMPING_TX_SCHED |
432 SOF_TIMESTAMPING_TX_SOFTWARE);
433
434 if (cfg_proto == SOCK_STREAM) {
435 fprintf(stderr, "\ntest ACK\n");
436 do_test(family, SOF_TIMESTAMPING_TX_ACK);
437
438 fprintf(stderr, "\ntest SND + ACK\n");
439 do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE |
440 SOF_TIMESTAMPING_TX_ACK);
441
442 fprintf(stderr, "\ntest ENQ + SND + ACK\n");
443 do_test(family, SOF_TIMESTAMPING_TX_SCHED |
444 SOF_TIMESTAMPING_TX_SOFTWARE |
445 SOF_TIMESTAMPING_TX_ACK);
446 }
447}
448
449const char *sock_names[] = { NULL, "TCP", "UDP", "RAW" };
450
451int main(int argc, char **argv)
452{
453 if (argc == 1)
454 usage(argv[0]);
455
456 parse_opt(argc, argv);
457 resolve_hostname(argv[argc - 1]);
458
459 fprintf(stderr, "protocol: %s\n", sock_names[cfg_proto]);
460 fprintf(stderr, "payload: %u\n", cfg_payload_len);
461 fprintf(stderr, "server port: %u\n", dest_port);
462 fprintf(stderr, "\n");
463
464 if (do_ipv4)
465 do_main(PF_INET);
466 if (do_ipv6)
467 do_main(PF_INET6);
468
469 return 0;
470}