diff options
Diffstat (limited to 'Documentation/networking')
-rw-r--r-- | Documentation/networking/timestamping.txt | 368 | ||||
-rw-r--r-- | Documentation/networking/timestamping/Makefile | 10 | ||||
-rw-r--r-- | Documentation/networking/timestamping/txtimestamp.c | 470 |
3 files changed, 764 insertions, 84 deletions
diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index 897f942b976b..412f45ca2d73 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt | |||
@@ -1,102 +1,307 @@ | |||
1 | The existing interfaces for getting network packages time stamped are: | 1 | |
2 | 1. Control Interfaces | ||
3 | |||
4 | The interfaces for receiving network packages timestamps are: | ||
2 | 5 | ||
3 | * SO_TIMESTAMP | 6 | * SO_TIMESTAMP |
4 | Generate time stamp for each incoming packet using the (not necessarily | 7 | Generates a timestamp for each incoming packet in (not necessarily |
5 | monotonous!) system time. Result is returned via recv_msg() in a | 8 | monotonic) system time. Reports the timestamp via recvmsg() in a |
6 | control message as timeval (usec resolution). | 9 | control message as struct timeval (usec resolution). |
7 | 10 | ||
8 | * SO_TIMESTAMPNS | 11 | * SO_TIMESTAMPNS |
9 | Same time stamping mechanism as SO_TIMESTAMP, but returns result as | 12 | Same timestamping mechanism as SO_TIMESTAMP, but reports the |
10 | timespec (nsec resolution). | 13 | timestamp as struct timespec (nsec resolution). |
11 | 14 | ||
12 | * IP_MULTICAST_LOOP + SO_TIMESTAMP[NS] | 15 | * IP_MULTICAST_LOOP + SO_TIMESTAMP[NS] |
13 | Only for multicasts: approximate send time stamp by receiving the looped | 16 | Only for multicast:approximate transmit timestamp obtained by |
14 | packet and using its receive time stamp. | 17 | reading the looped packet receive timestamp. |
15 | 18 | ||
16 | The following interface complements the existing ones: receive time | 19 | * SO_TIMESTAMPING |
17 | stamps can be generated and returned for arbitrary packets and much | 20 | Generates timestamps on reception, transmission or both. Supports |
18 | closer to the point where the packet is really sent. Time stamps can | 21 | multiple timestamp sources, including hardware. Supports generating |
19 | be generated in software (as before) or in hardware (if the hardware | 22 | timestamps for stream sockets. |
20 | has such a feature). | ||
21 | 23 | ||
22 | SO_TIMESTAMPING: | ||
23 | 24 | ||
24 | Instructs the socket layer which kind of information should be collected | 25 | 1.1 SO_TIMESTAMP: |
25 | and/or reported. The parameter is an integer with some of the following | ||
26 | bits set. Setting other bits is an error and doesn't change the current | ||
27 | state. | ||
28 | 26 | ||
29 | Four of the bits are requests to the stack to try to generate | 27 | This socket option enables timestamping of datagrams on the reception |
30 | timestamps. Any combination of them is valid. | 28 | path. Because the destination socket, if any, is not known early in |
29 | the network stack, the feature has to be enabled for all packets. The | ||
30 | same is true for all early receive timestamp options. | ||
31 | 31 | ||
32 | SOF_TIMESTAMPING_TX_HARDWARE: try to obtain send time stamps in hardware | 32 | For interface details, see `man 7 socket`. |
33 | SOF_TIMESTAMPING_TX_SOFTWARE: try to obtain send time stamps in software | 33 | |
34 | SOF_TIMESTAMPING_RX_HARDWARE: try to obtain receive time stamps in hardware | 34 | |
35 | SOF_TIMESTAMPING_RX_SOFTWARE: try to obtain receive time stamps in software | 35 | 1.2 SO_TIMESTAMPNS: |
36 | |||
37 | This option is identical to SO_TIMESTAMP except for the returned data type. | ||
38 | Its struct timespec allows for higher resolution (ns) timestamps than the | ||
39 | timeval of SO_TIMESTAMP (ms). | ||
40 | |||
41 | |||
42 | 1.3 SO_TIMESTAMPING: | ||
43 | |||
44 | Supports multiple types of timestamp requests. As a result, this | ||
45 | socket option takes a bitmap of flags, not a boolean. In | ||
46 | |||
47 | err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (void *) val, &val); | ||
48 | |||
49 | val is an integer with any of the following bits set. Setting other | ||
50 | bit returns EINVAL and does not change the current state. | ||
36 | 51 | ||
37 | The other three bits control which timestamps will be reported in a | ||
38 | generated control message. If none of these bits are set or if none of | ||
39 | the set bits correspond to data that is available, then the control | ||
40 | message will not be generated: | ||
41 | 52 | ||
42 | SOF_TIMESTAMPING_SOFTWARE: report systime if available | 53 | 1.3.1 Timestamp Generation |
43 | SOF_TIMESTAMPING_SYS_HARDWARE: report hwtimetrans if available (deprecated) | ||
44 | SOF_TIMESTAMPING_RAW_HARDWARE: report hwtimeraw if available | ||
45 | 54 | ||
46 | It is worth noting that timestamps may be collected for reasons other | 55 | Some bits are requests to the stack to try to generate timestamps. Any |
47 | than being requested by a particular socket with | 56 | combination of them is valid. Changes to these bits apply to newly |
48 | SOF_TIMESTAMPING_[TR]X_(HARD|SOFT)WARE. For example, most drivers that | 57 | created packets, not to packets already in the stack. As a result, it |
49 | can generate hardware receive timestamps ignore | 58 | is possible to selectively request timestamps for a subset of packets |
50 | SOF_TIMESTAMPING_RX_HARDWARE. It is still a good idea to set that flag | 59 | (e.g., for sampling) by embedding an send() call within two setsockopt |
51 | in case future drivers pay attention. | 60 | calls, one to enable timestamp generation and one to disable it. |
61 | Timestamps may also be generated for reasons other than being | ||
62 | requested by a particular socket, such as when receive timestamping is | ||
63 | enabled system wide, as explained earlier. | ||
52 | 64 | ||
53 | If timestamps are reported, they will appear in a control message with | 65 | SOF_TIMESTAMPING_RX_HARDWARE: |
54 | cmsg_level==SOL_SOCKET, cmsg_type==SO_TIMESTAMPING, and a payload like | 66 | Request rx timestamps generated by the network adapter. |
55 | this: | 67 | |
68 | SOF_TIMESTAMPING_RX_SOFTWARE: | ||
69 | Request rx timestamps when data enters the kernel. These timestamps | ||
70 | are generated just after a device driver hands a packet to the | ||
71 | kernel receive stack. | ||
72 | |||
73 | SOF_TIMESTAMPING_TX_HARDWARE: | ||
74 | Request tx timestamps generated by the network adapter. | ||
75 | |||
76 | SOF_TIMESTAMPING_TX_SOFTWARE: | ||
77 | Request tx timestamps when data leaves the kernel. These timestamps | ||
78 | are generated in the device driver as close as possible, but always | ||
79 | prior to, passing the packet to the network interface. Hence, they | ||
80 | require driver support and may not be available for all devices. | ||
81 | |||
82 | SOF_TIMESTAMPING_TX_SCHED: | ||
83 | Request tx timestamps prior to entering the packet scheduler. Kernel | ||
84 | transmit latency is, if long, often dominated by queuing delay. The | ||
85 | difference between this timestamp and one taken at | ||
86 | SOF_TIMESTAMPING_TX_SOFTWARE will expose this latency independent | ||
87 | of protocol processing. The latency incurred in protocol | ||
88 | processing, if any, can be computed by subtracting a userspace | ||
89 | timestamp taken immediately before send() from this timestamp. On | ||
90 | machines with virtual devices where a transmitted packet travels | ||
91 | through multiple devices and, hence, multiple packet schedulers, | ||
92 | a timestamp is generated at each layer. This allows for fine | ||
93 | grained measurement of queuing delay. | ||
94 | |||
95 | SOF_TIMESTAMPING_TX_ACK: | ||
96 | Request tx timestamps when all data in the send buffer has been | ||
97 | acknowledged. This only makes sense for reliable protocols. It is | ||
98 | currently only implemented for TCP. For that protocol, it may | ||
99 | over-report measurement, because the timestamp is generated when all | ||
100 | data up to and including the buffer at send() was acknowledged: the | ||
101 | cumulative acknowledgment. The mechanism ignores SACK and FACK. | ||
102 | |||
103 | |||
104 | 1.3.2 Timestamp Reporting | ||
105 | |||
106 | The other three bits control which timestamps will be reported in a | ||
107 | generated control message. Changes to the bits take immediate | ||
108 | effect at the timestamp reporting locations in the stack. Timestamps | ||
109 | are only reported for packets that also have the relevant timestamp | ||
110 | generation request set. | ||
111 | |||
112 | SOF_TIMESTAMPING_SOFTWARE: | ||
113 | Report any software timestamps when available. | ||
114 | |||
115 | SOF_TIMESTAMPING_SYS_HARDWARE: | ||
116 | This option is deprecated and ignored. | ||
117 | |||
118 | SOF_TIMESTAMPING_RAW_HARDWARE: | ||
119 | Report hardware timestamps as generated by | ||
120 | SOF_TIMESTAMPING_TX_HARDWARE when available. | ||
121 | |||
122 | |||
123 | 1.3.3 Timestamp Options | ||
124 | |||
125 | The interface supports one option | ||
126 | |||
127 | SOF_TIMESTAMPING_OPT_ID: | ||
128 | |||
129 | Generate a unique identifier along with each packet. A process can | ||
130 | have multiple concurrent timestamping requests outstanding. Packets | ||
131 | can be reordered in the transmit path, for instance in the packet | ||
132 | scheduler. In that case timestamps will be queued onto the error | ||
133 | queue out of order from the original send() calls. This option | ||
134 | embeds a counter that is incremented at send() time, to order | ||
135 | timestamps within a flow. | ||
136 | |||
137 | This option is implemented only for transmit timestamps. There, the | ||
138 | timestamp is always looped along with a struct sock_extended_err. | ||
139 | The option modifies field ee_info to pass an id that is unique | ||
140 | among all possibly concurrently outstanding timestamp requests for | ||
141 | that socket. In practice, it is a monotonically increasing u32 | ||
142 | (that wraps). | ||
143 | |||
144 | In datagram sockets, the counter increments on each send call. In | ||
145 | stream sockets, it increments with every byte. | ||
146 | |||
147 | |||
148 | 1.4 Bytestream Timestamps | ||
149 | |||
150 | The SO_TIMESTAMPING interface supports timestamping of bytes in a | ||
151 | bytestream. Each request is interpreted as a request for when the | ||
152 | entire contents of the buffer has passed a timestamping point. That | ||
153 | is, for streams option SOF_TIMESTAMPING_TX_SOFTWARE will record | ||
154 | when all bytes have reached the device driver, regardless of how | ||
155 | many packets the data has been converted into. | ||
156 | |||
157 | In general, bytestreams have no natural delimiters and therefore | ||
158 | correlating a timestamp with data is non-trivial. A range of bytes | ||
159 | may be split across segments, any segments may be merged (possibly | ||
160 | coalescing sections of previously segmented buffers associated with | ||
161 | independent send() calls). Segments can be reordered and the same | ||
162 | byte range can coexist in multiple segments for protocols that | ||
163 | implement retransmissions. | ||
164 | |||
165 | It is essential that all timestamps implement the same semantics, | ||
166 | regardless of these possible transformations, as otherwise they are | ||
167 | incomparable. Handling "rare" corner cases differently from the | ||
168 | simple case (a 1:1 mapping from buffer to skb) is insufficient | ||
169 | because performance debugging often needs to focus on such outliers. | ||
170 | |||
171 | In practice, timestamps can be correlated with segments of a | ||
172 | bytestream consistently, if both semantics of the timestamp and the | ||
173 | timing of measurement are chosen correctly. This challenge is no | ||
174 | different from deciding on a strategy for IP fragmentation. There, the | ||
175 | definition is that only the first fragment is timestamped. For | ||
176 | bytestreams, we chose that a timestamp is generated only when all | ||
177 | bytes have passed a point. SOF_TIMESTAMPING_TX_ACK as defined is easy to | ||
178 | implement and reason about. An implementation that has to take into | ||
179 | account SACK would be more complex due to possible transmission holes | ||
180 | and out of order arrival. | ||
181 | |||
182 | On the host, TCP can also break the simple 1:1 mapping from buffer to | ||
183 | skbuff as a result of Nagle, cork, autocork, segmentation and GSO. The | ||
184 | implementation ensures correctness in all cases by tracking the | ||
185 | individual last byte passed to send(), even if it is no longer the | ||
186 | last byte after an skbuff extend or merge operation. It stores the | ||
187 | relevant sequence number in skb_shinfo(skb)->tskey. Because an skbuff | ||
188 | has only one such field, only one timestamp can be generated. | ||
189 | |||
190 | In rare cases, a timestamp request can be missed if two requests are | ||
191 | collapsed onto the same skb. A process can detect this situation by | ||
192 | enabling SOF_TIMESTAMPING_OPT_ID and comparing the byte offset at | ||
193 | send time with the value returned for each timestamp. It can prevent | ||
194 | the situation by always flushing the TCP stack in between requests, | ||
195 | for instance by enabling TCP_NODELAY and disabling TCP_CORK and | ||
196 | autocork. | ||
197 | |||
198 | These precautions ensure that the timestamp is generated only when all | ||
199 | bytes have passed a timestamp point, assuming that the network stack | ||
200 | itself does not reorder the segments. The stack indeed tries to avoid | ||
201 | reordering. The one exception is under administrator control: it is | ||
202 | possible to construct a packet scheduler configuration that delays | ||
203 | segments from the same stream differently. Such a setup would be | ||
204 | unusual. | ||
205 | |||
206 | |||
207 | 2 Data Interfaces | ||
208 | |||
209 | Timestamps are read using the ancillary data feature of recvmsg(). | ||
210 | See `man 3 cmsg` for details of this interface. The socket manual | ||
211 | page (`man 7 socket`) describes how timestamps generated with | ||
212 | SO_TIMESTAMP and SO_TIMESTAMPNS records can be retrieved. | ||
213 | |||
214 | |||
215 | 2.1 SCM_TIMESTAMPING records | ||
216 | |||
217 | These timestamps are returned in a control message with cmsg_level | ||
218 | SOL_SOCKET, cmsg_type SCM_TIMESTAMPING, and payload of type | ||
56 | 219 | ||
57 | struct scm_timestamping { | 220 | struct scm_timestamping { |
58 | struct timespec systime; | 221 | struct timespec ts[3]; |
59 | struct timespec hwtimetrans; | ||
60 | struct timespec hwtimeraw; | ||
61 | }; | 222 | }; |
62 | 223 | ||
63 | recvmsg() can be used to get this control message for regular incoming | 224 | The structure can return up to three timestamps. This is a legacy |
64 | packets. For send time stamps the outgoing packet is looped back to | 225 | feature. Only one field is non-zero at any time. Most timestamps |
65 | the socket's error queue with the send time stamp(s) attached. It can | 226 | are passed in ts[0]. Hardware timestamps are passed in ts[2]. |
66 | be received with recvmsg(flags=MSG_ERRQUEUE). The call returns the | 227 | |
67 | original outgoing packet data including all headers preprended down to | 228 | ts[1] used to hold hardware timestamps converted to system time. |
68 | and including the link layer, the scm_timestamping control message and | 229 | Instead, expose the hardware clock device on the NIC directly as |
69 | a sock_extended_err control message with ee_errno==ENOMSG and | 230 | a HW PTP clock source, to allow time conversion in userspace and |
70 | ee_origin==SO_EE_ORIGIN_TIMESTAMPING. A socket with such a pending | 231 | optionally synchronize system time with a userspace PTP stack such |
71 | bounced packet is ready for reading as far as select() is concerned. | 232 | as linuxptp. For the PTP clock API, see Documentation/ptp/ptp.txt. |
72 | If the outgoing packet has to be fragmented, then only the first | 233 | |
73 | fragment is time stamped and returned to the sending socket. | 234 | 2.1.1 Transmit timestamps with MSG_ERRQUEUE |
74 | 235 | ||
75 | All three values correspond to the same event in time, but were | 236 | For transmit timestamps the outgoing packet is looped back to the |
76 | generated in different ways. Each of these values may be empty (= all | 237 | socket's error queue with the send timestamp(s) attached. A process |
77 | zero), in which case no such value was available. If the application | 238 | receives the timestamps by calling recvmsg() with flag MSG_ERRQUEUE |
78 | is not interested in some of these values, they can be left blank to | 239 | set and with a msg_control buffer sufficiently large to receive the |
79 | avoid the potential overhead of calculating them. | 240 | relevant metadata structures. The recvmsg call returns the original |
80 | 241 | outgoing data packet with two ancillary messages attached. | |
81 | systime is the value of the system time at that moment. This | 242 | |
82 | corresponds to the value also returned via SO_TIMESTAMP[NS]. If the | 243 | A message of cm_level SOL_IP(V6) and cm_type IP(V6)_RECVERR |
83 | time stamp was generated by hardware, then this field is | 244 | embeds a struct sock_extended_err. This defines the error type. For |
84 | empty. Otherwise it is filled in if SOF_TIMESTAMPING_SOFTWARE is | 245 | timestamps, the ee_errno field is ENOMSG. The other ancillary message |
85 | set. | 246 | will have cm_level SOL_SOCKET and cm_type SCM_TIMESTAMPING. This |
86 | 247 | embeds the struct scm_timestamping. | |
87 | hwtimeraw is the original hardware time stamp. Filled in if | 248 | |
88 | SOF_TIMESTAMPING_RAW_HARDWARE is set. No assumptions about its | 249 | |
89 | relation to system time should be made. | 250 | 2.1.1.2 Timestamp types |
90 | 251 | ||
91 | hwtimetrans is always zero. This field is deprecated. It used to hold | 252 | The semantics of the three struct timespec are defined by field |
92 | hw timestamps converted to system time. Instead, expose the hardware | 253 | ee_info in the extended error structure. It contains a value of |
93 | clock device on the NIC directly as a HW PTP clock source, to allow | 254 | type SCM_TSTAMP_* to define the actual timestamp passed in |
94 | time conversion in userspace and optionally synchronize system time | 255 | scm_timestamping. |
95 | with a userspace PTP stack such as linuxptp. For the PTP clock API, | 256 | |
96 | see Documentation/ptp/ptp.txt. | 257 | The SCM_TSTAMP_* types are 1:1 matches to the SOF_TIMESTAMPING_* |
97 | 258 | control fields discussed previously, with one exception. For legacy | |
98 | 259 | reasons, SCM_TSTAMP_SND is equal to zero and can be set for both | |
99 | SIOCSHWTSTAMP, SIOCGHWTSTAMP: | 260 | SOF_TIMESTAMPING_TX_HARDWARE and SOF_TIMESTAMPING_TX_SOFTWARE. It |
261 | is the first if ts[2] is non-zero, the second otherwise, in which | ||
262 | case the timestamp is stored in ts[0]. | ||
263 | |||
264 | |||
265 | 2.1.1.3 Fragmentation | ||
266 | |||
267 | Fragmentation of outgoing datagrams is rare, but is possible, e.g., by | ||
268 | explicitly disabling PMTU discovery. If an outgoing packet is fragmented, | ||
269 | then only the first fragment is timestamped and returned to the sending | ||
270 | socket. | ||
271 | |||
272 | |||
273 | 2.1.1.4 Packet Payload | ||
274 | |||
275 | The calling application is often not interested in receiving the whole | ||
276 | packet payload that it passed to the stack originally: the socket | ||
277 | error queue mechanism is just a method to piggyback the timestamp on. | ||
278 | In this case, the application can choose to read datagrams with a | ||
279 | smaller buffer, possibly even of length 0. The payload is truncated | ||
280 | accordingly. Until the process calls recvmsg() on the error queue, | ||
281 | however, the full packet is queued, taking up budget from SO_RCVBUF. | ||
282 | |||
283 | |||
284 | 2.1.1.5 Blocking Read | ||
285 | |||
286 | Reading from the error queue is always a non-blocking operation. To | ||
287 | block waiting on a timestamp, use poll or select. poll() will return | ||
288 | POLLERR in pollfd.revents if any data is ready on the error queue. | ||
289 | There is no need to pass this flag in pollfd.events. This flag is | ||
290 | ignored on request. See also `man 2 poll`. | ||
291 | |||
292 | |||
293 | 2.1.2 Receive timestamps | ||
294 | |||
295 | On reception, there is no reason to read from the socket error queue. | ||
296 | The SCM_TIMESTAMPING ancillary data is sent along with the packet data | ||
297 | on a normal recvmsg(). Since this is not a socket error, it is not | ||
298 | accompanied by a message SOL_IP(V6)/IP(V6)_RECVERROR. In this case, | ||
299 | the meaning of the three fields in struct scm_timestamping is | ||
300 | implicitly defined. ts[0] holds a software timestamp if set, ts[1] | ||
301 | is again deprecated and ts[2] holds a hardware timestamp if set. | ||
302 | |||
303 | |||
304 | 3. Hardware Timestamping configuration: SIOCSHWTSTAMP and SIOCGHWTSTAMP | ||
100 | 305 | ||
101 | Hardware time stamping must also be initialized for each device driver | 306 | Hardware time stamping must also be initialized for each device driver |
102 | that is expected to do hardware time stamping. The parameter is defined in | 307 | that is expected to do hardware time stamping. The parameter is defined in |
@@ -167,8 +372,7 @@ enum { | |||
167 | */ | 372 | */ |
168 | }; | 373 | }; |
169 | 374 | ||
170 | 375 | 3.1 Hardware Timestamping Implementation: Device Drivers | |
171 | DEVICE IMPLEMENTATION | ||
172 | 376 | ||
173 | A driver which supports hardware time stamping must support the | 377 | A driver which supports hardware time stamping must support the |
174 | SIOCSHWTSTAMP ioctl and update the supplied struct hwtstamp_config with | 378 | SIOCSHWTSTAMP ioctl and update the supplied struct hwtstamp_config with |
diff --git a/Documentation/networking/timestamping/Makefile b/Documentation/networking/timestamping/Makefile index d934afc8306a..95e239c70076 100644 --- a/Documentation/networking/timestamping/Makefile +++ b/Documentation/networking/timestamping/Makefile | |||
@@ -1,14 +1,20 @@ | |||
1 | # To compile, from the source root | ||
2 | # | ||
3 | # make headers_install | ||
4 | # make M=documentation | ||
5 | |||
1 | # kbuild trick to avoid linker error. Can be omitted if a module is built. | 6 | # kbuild trick to avoid linker error. Can be omitted if a module is built. |
2 | obj- := dummy.o | 7 | obj- := dummy.o |
3 | 8 | ||
4 | # List of programs to build | 9 | # List of programs to build |
5 | hostprogs-y := timestamping hwtstamp_config | 10 | hostprogs-y := timestamping txtimestamp hwtstamp_config |
6 | 11 | ||
7 | # Tell kbuild to always build the programs | 12 | # Tell kbuild to always build the programs |
8 | always := $(hostprogs-y) | 13 | always := $(hostprogs-y) |
9 | 14 | ||
10 | HOSTCFLAGS_timestamping.o += -I$(objtree)/usr/include | 15 | HOSTCFLAGS_timestamping.o += -I$(objtree)/usr/include |
16 | HOSTCFLAGS_txtimestamp.o += -I$(objtree)/usr/include | ||
11 | HOSTCFLAGS_hwtstamp_config.o += -I$(objtree)/usr/include | 17 | HOSTCFLAGS_hwtstamp_config.o += -I$(objtree)/usr/include |
12 | 18 | ||
13 | clean: | 19 | clean: |
14 | rm -f timestamping hwtstamp_config | 20 | rm -f timestamping txtimestamp hwtstamp_config |
diff --git a/Documentation/networking/timestamping/txtimestamp.c b/Documentation/networking/timestamping/txtimestamp.c new file mode 100644 index 000000000000..e5b0b98a89af --- /dev/null +++ b/Documentation/networking/timestamping/txtimestamp.c | |||
@@ -0,0 +1,470 @@ | |||
1 | /* | ||
2 | * Copyright 2014 Google Inc. | ||
3 | * Author: willemb@google.com (Willem de Bruijn) | ||
4 | * | ||
5 | * Test software tx timestamping, including | ||
6 | * | ||
7 | * - SCHED, SND and ACK timestamps | ||
8 | * - RAW, UDP and TCP | ||
9 | * - IPv4 and IPv6 | ||
10 | * - various packet sizes (to test GSO and TSO) | ||
11 | * | ||
12 | * Consult the command line arguments for help on running | ||
13 | * the various testcases. | ||
14 | * | ||
15 | * This test requires a dummy TCP server. | ||
16 | * A simple `nc6 [-u] -l -p $DESTPORT` will do | ||
17 | * | ||
18 | * | ||
19 | * This program is free software; you can redistribute it and/or modify it | ||
20 | * under the terms and conditions of the GNU General Public License, | ||
21 | * version 2, as published by the Free Software Foundation. | ||
22 | * | ||
23 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
24 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
25 | * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for | ||
26 | * more details. | ||
27 | * | ||
28 | * You should have received a copy of the GNU General Public License along with | ||
29 | * this program; if not, write to the Free Software Foundation, Inc., | ||
30 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
31 | */ | ||
32 | |||
33 | #include <arpa/inet.h> | ||
34 | #include <asm/types.h> | ||
35 | #include <error.h> | ||
36 | #include <errno.h> | ||
37 | #include <linux/errqueue.h> | ||
38 | #include <linux/if_ether.h> | ||
39 | #include <linux/net_tstamp.h> | ||
40 | #include <netdb.h> | ||
41 | #include <net/if.h> | ||
42 | #include <netinet/in.h> | ||
43 | #include <netinet/ip.h> | ||
44 | #include <netinet/udp.h> | ||
45 | #include <netinet/tcp.h> | ||
46 | #include <netpacket/packet.h> | ||
47 | #include <poll.h> | ||
48 | #include <stdarg.h> | ||
49 | #include <stdint.h> | ||
50 | #include <stdio.h> | ||
51 | #include <stdlib.h> | ||
52 | #include <string.h> | ||
53 | #include <sys/ioctl.h> | ||
54 | #include <sys/select.h> | ||
55 | #include <sys/socket.h> | ||
56 | #include <sys/time.h> | ||
57 | #include <sys/types.h> | ||
58 | #include <time.h> | ||
59 | #include <unistd.h> | ||
60 | |||
61 | /* command line parameters */ | ||
62 | static int cfg_proto = SOCK_STREAM; | ||
63 | static int cfg_ipproto = IPPROTO_TCP; | ||
64 | static int cfg_num_pkts = 4; | ||
65 | static int do_ipv4 = 1; | ||
66 | static int do_ipv6 = 1; | ||
67 | static int cfg_payload_len = 10; | ||
68 | static uint16_t dest_port = 9000; | ||
69 | |||
70 | static struct sockaddr_in daddr; | ||
71 | static struct sockaddr_in6 daddr6; | ||
72 | static struct timespec ts_prev; | ||
73 | |||
74 | static void __print_timestamp(const char *name, struct timespec *cur, | ||
75 | uint32_t key, int payload_len) | ||
76 | { | ||
77 | if (!(cur->tv_sec | cur->tv_nsec)) | ||
78 | return; | ||
79 | |||
80 | fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)", | ||
81 | name, cur->tv_sec, cur->tv_nsec / 1000, | ||
82 | key, payload_len); | ||
83 | |||
84 | if ((ts_prev.tv_sec | ts_prev.tv_nsec)) { | ||
85 | int64_t cur_ms, prev_ms; | ||
86 | |||
87 | cur_ms = (long) cur->tv_sec * 1000 * 1000; | ||
88 | cur_ms += cur->tv_nsec / 1000; | ||
89 | |||
90 | prev_ms = (long) ts_prev.tv_sec * 1000 * 1000; | ||
91 | prev_ms += ts_prev.tv_nsec / 1000; | ||
92 | |||
93 | fprintf(stderr, " (%+ld us)", cur_ms - prev_ms); | ||
94 | } | ||
95 | |||
96 | ts_prev = *cur; | ||
97 | fprintf(stderr, "\n"); | ||
98 | } | ||
99 | |||
100 | static void print_timestamp_usr(void) | ||
101 | { | ||
102 | struct timespec ts; | ||
103 | struct timeval tv; /* avoid dependency on -lrt */ | ||
104 | |||
105 | gettimeofday(&tv, NULL); | ||
106 | ts.tv_sec = tv.tv_sec; | ||
107 | ts.tv_nsec = tv.tv_usec * 1000; | ||
108 | |||
109 | __print_timestamp(" USR", &ts, 0, 0); | ||
110 | } | ||
111 | |||
112 | static void print_timestamp(struct scm_timestamping *tss, int tstype, | ||
113 | int tskey, int payload_len) | ||
114 | { | ||
115 | const char *tsname; | ||
116 | |||
117 | switch (tstype) { | ||
118 | case SCM_TSTAMP_SCHED: | ||
119 | tsname = " ENQ"; | ||
120 | break; | ||
121 | case SCM_TSTAMP_SND: | ||
122 | tsname = " SND"; | ||
123 | break; | ||
124 | case SCM_TSTAMP_ACK: | ||
125 | tsname = " ACK"; | ||
126 | break; | ||
127 | default: | ||
128 | error(1, 0, "unknown timestamp type: %u", | ||
129 | tstype); | ||
130 | } | ||
131 | __print_timestamp(tsname, &tss->ts[0], tskey, payload_len); | ||
132 | } | ||
133 | |||
134 | static void __poll(int fd) | ||
135 | { | ||
136 | struct pollfd pollfd; | ||
137 | int ret; | ||
138 | |||
139 | memset(&pollfd, 0, sizeof(pollfd)); | ||
140 | pollfd.fd = fd; | ||
141 | ret = poll(&pollfd, 1, 100); | ||
142 | if (ret != 1) | ||
143 | error(1, errno, "poll"); | ||
144 | } | ||
145 | |||
146 | static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) | ||
147 | { | ||
148 | struct sock_extended_err *serr = NULL; | ||
149 | struct scm_timestamping *tss = NULL; | ||
150 | struct cmsghdr *cm; | ||
151 | |||
152 | for (cm = CMSG_FIRSTHDR(msg); | ||
153 | cm && cm->cmsg_len; | ||
154 | cm = CMSG_NXTHDR(msg, cm)) { | ||
155 | if (cm->cmsg_level == SOL_SOCKET && | ||
156 | cm->cmsg_type == SCM_TIMESTAMPING) { | ||
157 | tss = (void *) CMSG_DATA(cm); | ||
158 | } else if ((cm->cmsg_level == SOL_IP && | ||
159 | cm->cmsg_type == IP_RECVERR) || | ||
160 | (cm->cmsg_level == SOL_IPV6 && | ||
161 | cm->cmsg_type == IPV6_RECVERR)) { | ||
162 | |||
163 | serr = (void *) CMSG_DATA(cm); | ||
164 | if (serr->ee_errno != ENOMSG || | ||
165 | serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) { | ||
166 | fprintf(stderr, "unknown ip error %d %d\n", | ||
167 | serr->ee_errno, | ||
168 | serr->ee_origin); | ||
169 | serr = NULL; | ||
170 | } | ||
171 | } else | ||
172 | fprintf(stderr, "unknown cmsg %d,%d\n", | ||
173 | cm->cmsg_level, cm->cmsg_type); | ||
174 | } | ||
175 | |||
176 | if (serr && tss) | ||
177 | print_timestamp(tss, serr->ee_info, serr->ee_data, payload_len); | ||
178 | } | ||
179 | |||
180 | static int recv_errmsg(int fd) | ||
181 | { | ||
182 | static char ctrl[1024 /* overprovision*/]; | ||
183 | static struct msghdr msg; | ||
184 | struct iovec entry; | ||
185 | static char *data; | ||
186 | int ret = 0; | ||
187 | |||
188 | data = malloc(cfg_payload_len); | ||
189 | if (!data) | ||
190 | error(1, 0, "malloc"); | ||
191 | |||
192 | memset(&msg, 0, sizeof(msg)); | ||
193 | memset(&entry, 0, sizeof(entry)); | ||
194 | memset(ctrl, 0, sizeof(ctrl)); | ||
195 | memset(data, 0, sizeof(data)); | ||
196 | |||
197 | entry.iov_base = data; | ||
198 | entry.iov_len = cfg_payload_len; | ||
199 | msg.msg_iov = &entry; | ||
200 | msg.msg_iovlen = 1; | ||
201 | msg.msg_name = NULL; | ||
202 | msg.msg_namelen = 0; | ||
203 | msg.msg_control = ctrl; | ||
204 | msg.msg_controllen = sizeof(ctrl); | ||
205 | |||
206 | ret = recvmsg(fd, &msg, MSG_ERRQUEUE); | ||
207 | if (ret == -1 && errno != EAGAIN) | ||
208 | error(1, errno, "recvmsg"); | ||
209 | |||
210 | __recv_errmsg_cmsg(&msg, ret); | ||
211 | |||
212 | free(data); | ||
213 | return ret == -1; | ||
214 | } | ||
215 | |||
216 | static void do_test(int family, unsigned int opt) | ||
217 | { | ||
218 | char *buf; | ||
219 | int fd, i, val, total_len; | ||
220 | |||
221 | if (family == IPPROTO_IPV6 && cfg_proto != SOCK_STREAM) { | ||
222 | /* due to lack of checksum generation code */ | ||
223 | fprintf(stderr, "test: skipping datagram over IPv6\n"); | ||
224 | return; | ||
225 | } | ||
226 | |||
227 | total_len = cfg_payload_len; | ||
228 | if (cfg_proto == SOCK_RAW) { | ||
229 | total_len += sizeof(struct udphdr); | ||
230 | if (cfg_ipproto == IPPROTO_RAW) | ||
231 | total_len += sizeof(struct iphdr); | ||
232 | } | ||
233 | |||
234 | buf = malloc(total_len); | ||
235 | if (!buf) | ||
236 | error(1, 0, "malloc"); | ||
237 | |||
238 | fd = socket(family, cfg_proto, cfg_ipproto); | ||
239 | if (fd < 0) | ||
240 | error(1, errno, "socket"); | ||
241 | |||
242 | if (cfg_proto == SOCK_STREAM) { | ||
243 | val = 1; | ||
244 | if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, | ||
245 | (char*) &val, sizeof(val))) | ||
246 | error(1, 0, "setsockopt no nagle"); | ||
247 | |||
248 | if (family == PF_INET) { | ||
249 | if (connect(fd, (void *) &daddr, sizeof(daddr))) | ||
250 | error(1, errno, "connect ipv4"); | ||
251 | } else { | ||
252 | if (connect(fd, (void *) &daddr6, sizeof(daddr6))) | ||
253 | error(1, errno, "connect ipv6"); | ||
254 | } | ||
255 | } | ||
256 | |||
257 | opt |= SOF_TIMESTAMPING_SOFTWARE | | ||
258 | SOF_TIMESTAMPING_OPT_ID; | ||
259 | if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, | ||
260 | (char *) &opt, sizeof(opt))) | ||
261 | error(1, 0, "setsockopt timestamping"); | ||
262 | |||
263 | for (i = 0; i < cfg_num_pkts; i++) { | ||
264 | memset(&ts_prev, 0, sizeof(ts_prev)); | ||
265 | memset(buf, 'a' + i, total_len); | ||
266 | buf[total_len - 2] = '\n'; | ||
267 | buf[total_len - 1] = '\0'; | ||
268 | |||
269 | if (cfg_proto == SOCK_RAW) { | ||
270 | struct udphdr *udph; | ||
271 | int off = 0; | ||
272 | |||
273 | if (cfg_ipproto == IPPROTO_RAW) { | ||
274 | struct iphdr *iph = (void *) buf; | ||
275 | |||
276 | memset(iph, 0, sizeof(*iph)); | ||
277 | iph->ihl = 5; | ||
278 | iph->version = 4; | ||
279 | iph->ttl = 2; | ||
280 | iph->daddr = daddr.sin_addr.s_addr; | ||
281 | iph->protocol = IPPROTO_UDP; | ||
282 | /* kernel writes saddr, csum, len */ | ||
283 | |||
284 | off = sizeof(*iph); | ||
285 | } | ||
286 | |||
287 | udph = (void *) buf + off; | ||
288 | udph->source = ntohs(9000); /* random spoof */ | ||
289 | udph->dest = ntohs(dest_port); | ||
290 | udph->len = ntohs(sizeof(*udph) + cfg_payload_len); | ||
291 | udph->check = 0; /* not allowed for IPv6 */ | ||
292 | } | ||
293 | |||
294 | print_timestamp_usr(); | ||
295 | if (cfg_proto != SOCK_STREAM) { | ||
296 | if (family == PF_INET) | ||
297 | val = sendto(fd, buf, total_len, 0, (void *) &daddr, sizeof(daddr)); | ||
298 | else | ||
299 | val = sendto(fd, buf, total_len, 0, (void *) &daddr6, sizeof(daddr6)); | ||
300 | } else { | ||
301 | val = send(fd, buf, cfg_payload_len, 0); | ||
302 | } | ||
303 | if (val != total_len) | ||
304 | error(1, errno, "send"); | ||
305 | |||
306 | /* wait for all errors to be queued, else ACKs arrive OOO */ | ||
307 | usleep(50 * 1000); | ||
308 | |||
309 | __poll(fd); | ||
310 | |||
311 | while (!recv_errmsg(fd)) {} | ||
312 | } | ||
313 | |||
314 | if (close(fd)) | ||
315 | error(1, errno, "close"); | ||
316 | |||
317 | free(buf); | ||
318 | usleep(400 * 1000); | ||
319 | } | ||
320 | |||
321 | static void __attribute__((noreturn)) usage(const char *filepath) | ||
322 | { | ||
323 | fprintf(stderr, "\nUsage: %s [options] hostname\n" | ||
324 | "\nwhere options are:\n" | ||
325 | " -4: only IPv4\n" | ||
326 | " -6: only IPv6\n" | ||
327 | " -h: show this message\n" | ||
328 | " -l N: send N bytes at a time\n" | ||
329 | " -r: use raw\n" | ||
330 | " -R: use raw (IP_HDRINCL)\n" | ||
331 | " -p N: connect to port N\n" | ||
332 | " -u: use udp\n", | ||
333 | filepath); | ||
334 | exit(1); | ||
335 | } | ||
336 | |||
337 | static void parse_opt(int argc, char **argv) | ||
338 | { | ||
339 | int proto_count = 0; | ||
340 | char c; | ||
341 | |||
342 | while ((c = getopt(argc, argv, "46hl:p:rRu")) != -1) { | ||
343 | switch (c) { | ||
344 | case '4': | ||
345 | do_ipv6 = 0; | ||
346 | break; | ||
347 | case '6': | ||
348 | do_ipv4 = 0; | ||
349 | break; | ||
350 | case 'r': | ||
351 | proto_count++; | ||
352 | cfg_proto = SOCK_RAW; | ||
353 | cfg_ipproto = IPPROTO_UDP; | ||
354 | break; | ||
355 | case 'R': | ||
356 | proto_count++; | ||
357 | cfg_proto = SOCK_RAW; | ||
358 | cfg_ipproto = IPPROTO_RAW; | ||
359 | break; | ||
360 | case 'u': | ||
361 | proto_count++; | ||
362 | cfg_proto = SOCK_DGRAM; | ||
363 | cfg_ipproto = IPPROTO_UDP; | ||
364 | break; | ||
365 | case 'l': | ||
366 | cfg_payload_len = strtoul(optarg, NULL, 10); | ||
367 | break; | ||
368 | case 'p': | ||
369 | dest_port = strtoul(optarg, NULL, 10); | ||
370 | break; | ||
371 | case 'h': | ||
372 | default: | ||
373 | usage(argv[0]); | ||
374 | } | ||
375 | } | ||
376 | |||
377 | if (!cfg_payload_len) | ||
378 | error(1, 0, "payload may not be nonzero"); | ||
379 | if (cfg_proto != SOCK_STREAM && cfg_payload_len > 1472) | ||
380 | error(1, 0, "udp packet might exceed expected MTU"); | ||
381 | if (!do_ipv4 && !do_ipv6) | ||
382 | error(1, 0, "pass -4 or -6, not both"); | ||
383 | if (proto_count > 1) | ||
384 | error(1, 0, "pass -r, -R or -u, not multiple"); | ||
385 | |||
386 | if (optind != argc - 1) | ||
387 | error(1, 0, "missing required hostname argument"); | ||
388 | } | ||
389 | |||
390 | static void resolve_hostname(const char *hostname) | ||
391 | { | ||
392 | struct addrinfo *addrs, *cur; | ||
393 | int have_ipv4 = 0, have_ipv6 = 0; | ||
394 | |||
395 | if (getaddrinfo(hostname, NULL, NULL, &addrs)) | ||
396 | error(1, errno, "getaddrinfo"); | ||
397 | |||
398 | cur = addrs; | ||
399 | while (cur && !have_ipv4 && !have_ipv6) { | ||
400 | if (!have_ipv4 && cur->ai_family == AF_INET) { | ||
401 | memcpy(&daddr, cur->ai_addr, sizeof(daddr)); | ||
402 | daddr.sin_port = htons(dest_port); | ||
403 | have_ipv4 = 1; | ||
404 | } | ||
405 | else if (!have_ipv6 && cur->ai_family == AF_INET6) { | ||
406 | memcpy(&daddr6, cur->ai_addr, sizeof(daddr6)); | ||
407 | daddr6.sin6_port = htons(dest_port); | ||
408 | have_ipv6 = 1; | ||
409 | } | ||
410 | cur = cur->ai_next; | ||
411 | } | ||
412 | if (addrs) | ||
413 | freeaddrinfo(addrs); | ||
414 | |||
415 | do_ipv4 &= have_ipv4; | ||
416 | do_ipv6 &= have_ipv6; | ||
417 | } | ||
418 | |||
419 | static void do_main(int family) | ||
420 | { | ||
421 | fprintf(stderr, "family: %s\n", | ||
422 | family == PF_INET ? "INET" : "INET6"); | ||
423 | |||
424 | fprintf(stderr, "test SND\n"); | ||
425 | do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE); | ||
426 | |||
427 | fprintf(stderr, "test ENQ\n"); | ||
428 | do_test(family, SOF_TIMESTAMPING_TX_SCHED); | ||
429 | |||
430 | fprintf(stderr, "test ENQ + SND\n"); | ||
431 | do_test(family, SOF_TIMESTAMPING_TX_SCHED | | ||
432 | SOF_TIMESTAMPING_TX_SOFTWARE); | ||
433 | |||
434 | if (cfg_proto == SOCK_STREAM) { | ||
435 | fprintf(stderr, "\ntest ACK\n"); | ||
436 | do_test(family, SOF_TIMESTAMPING_TX_ACK); | ||
437 | |||
438 | fprintf(stderr, "\ntest SND + ACK\n"); | ||
439 | do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE | | ||
440 | SOF_TIMESTAMPING_TX_ACK); | ||
441 | |||
442 | fprintf(stderr, "\ntest ENQ + SND + ACK\n"); | ||
443 | do_test(family, SOF_TIMESTAMPING_TX_SCHED | | ||
444 | SOF_TIMESTAMPING_TX_SOFTWARE | | ||
445 | SOF_TIMESTAMPING_TX_ACK); | ||
446 | } | ||
447 | } | ||
448 | |||
449 | const char *sock_names[] = { NULL, "TCP", "UDP", "RAW" }; | ||
450 | |||
451 | int main(int argc, char **argv) | ||
452 | { | ||
453 | if (argc == 1) | ||
454 | usage(argv[0]); | ||
455 | |||
456 | parse_opt(argc, argv); | ||
457 | resolve_hostname(argv[argc - 1]); | ||
458 | |||
459 | fprintf(stderr, "protocol: %s\n", sock_names[cfg_proto]); | ||
460 | fprintf(stderr, "payload: %u\n", cfg_payload_len); | ||
461 | fprintf(stderr, "server port: %u\n", dest_port); | ||
462 | fprintf(stderr, "\n"); | ||
463 | |||
464 | if (do_ipv4) | ||
465 | do_main(PF_INET); | ||
466 | if (do_ipv6) | ||
467 | do_main(PF_INET6); | ||
468 | |||
469 | return 0; | ||
470 | } | ||