aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-08-05 19:36:01 -0400
committerDavid S. Miller <davem@davemloft.net>2014-08-05 19:36:01 -0400
commit618896e6d00773d6d50e0b19f660af22fa26cd61 (patch)
tree83f4b5e777a13535434be99bee2e764a6677e647
parenta2b81b35f9e5ade210e4df2001f7a30ac390114d (diff)
parente1c8a607b28190cd09a271508aa3025d3c2f312e (diff)
Merge branch 'net-timestamp-next'
Willem de Bruijn says: ==================== net-timestamp: new tx tstamps and tcp Extend socket tx timestamping: - allow multiple types of software timestamps aside from send (1) - add software timestamp on enter packet scheduling (4) - add software timestamp for TCP (5) - add software timestamp for TCP on ACK (6) The sk_flags option space is nearly exhausted. Also move the many timestamp options to a new sk->sk_tstamps (2). To disambiguate data when tstamps may arrive out of order, optionally return a sequential ID assigned at send (3). Extend Linux tx timestamping to monitoring of latency incurred within the kernel stack and to protocols embedded in TCP. Complex kernel setups may have multiple layers of queueing, including multiple instances of packet scheduling, and many classes per layer. Many applications embed discrete payloads into TCP bytestreams for reliability, flow control, etcetera. Detecting application tail latency in such scenarios relies on identifying the exact queue responsible if on the host, or the network latency if otherwise. Changelog: v4->v5 - define SCM_TSTAMP_SND == 0, for legacy behavior - add TCP tstamps without changing the generated byte stream   - modify GSO and ACK to find offset: slightly more complex than previous invariant that it is the last byte - consistent naming of packet scheduling   - rename SCM_TSTAMP_ENQ to SCM_TSTAMP_SCHED - add unique key in ee_data - add id field in ee_info to disambiguate tstamps   - optional, only on new flag SOF_TIMESTAMPING_OPT_ID   - for bytestream, in bytes v3->v4 - (v3 review comment) removed skb->mark packet identification (*A) - (v3 review comment) fixed indentation - tcp: fixed poll() to return POLLERR on non-zero queue - rebased to work without syststamp - comments: removed all traces of MSG_TSTAMP_.. (*B) v2->v3 - extend the SO_TIMESTAMPING API, instead of defining a new one. - add protocol independent support to correlate tstamps with data, based on returning skb->mark. - removed no-payload optimization and documentation (for now): I have a follow-on patch that reintroduces MSG_TSTAMP along with a new socket option SOF_TIMESTAMPING_OPT_ONFLAG. This is equivalent to sequence setsockopt(<enable>); send(..); setsockopt(<disable>), but avoids the need to define a MSG_TSTAMP_<TYPE> for each type. I will leave these three patches as follow-on, as this patchset is large enough as is. v1->v2 - expand timestamping (existing and new) to SOCK_RAW and ping sockets - rename sock_errqueue_timestamping to scm_timestamping - change timestamp data format: do not add fields to scm_timestamping. Doing so could break legacy applications. Instead, communicate through an existing, but unused, field in the error message. - rename SOF_.._OPT_TX_NO_PAYLOAD to shorter SOF_.._OPT_TSONLY - move msg_tstamp test app out of patchset and to github git://github.com/wdebruij/kerneltools.git ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/skbuff.h18
-rw-r--r--include/net/sock.h33
-rw-r--r--include/uapi/linux/errqueue.h20
-rw-r--r--include/uapi/linux/net_tstamp.h10
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/skbuff.c20
-rw-r--r--net/core/sock.c37
-rw-r--r--net/ipv4/ip_output.c6
-rw-r--r--net/ipv4/tcp.c22
-rw-r--r--net/ipv4/tcp_input.c6
-rw-r--r--net/ipv4/tcp_offload.c18
-rw-r--r--net/ipv6/ip6_output.c9
-rw-r--r--net/socket.c31
13 files changed, 170 insertions, 64 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 281deced7469..11c270551d25 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -229,7 +229,7 @@ enum {
229 /* generate hardware time stamp */ 229 /* generate hardware time stamp */
230 SKBTX_HW_TSTAMP = 1 << 0, 230 SKBTX_HW_TSTAMP = 1 << 0,
231 231
232 /* generate software time stamp */ 232 /* generate software time stamp when queueing packet to NIC */
233 SKBTX_SW_TSTAMP = 1 << 1, 233 SKBTX_SW_TSTAMP = 1 << 1,
234 234
235 /* device driver is going to provide hardware time stamp */ 235 /* device driver is going to provide hardware time stamp */
@@ -247,8 +247,19 @@ enum {
247 * all frags to avoid possible bad checksum 247 * all frags to avoid possible bad checksum
248 */ 248 */
249 SKBTX_SHARED_FRAG = 1 << 5, 249 SKBTX_SHARED_FRAG = 1 << 5,
250
251 /* generate software time stamp when entering packet scheduling */
252 SKBTX_SCHED_TSTAMP = 1 << 6,
253
254 /* generate software timestamp on peer data acknowledgment */
255 SKBTX_ACK_TSTAMP = 1 << 7,
250}; 256};
251 257
258#define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \
259 SKBTX_SCHED_TSTAMP | \
260 SKBTX_ACK_TSTAMP)
261#define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
262
252/* 263/*
253 * The callback notifies userspace to release buffers when skb DMA is done in 264 * The callback notifies userspace to release buffers when skb DMA is done in
254 * lower device, the skb last reference should be 0 when calling this. 265 * lower device, the skb last reference should be 0 when calling this.
@@ -275,6 +286,7 @@ struct skb_shared_info {
275 unsigned short gso_type; 286 unsigned short gso_type;
276 struct sk_buff *frag_list; 287 struct sk_buff *frag_list;
277 struct skb_shared_hwtstamps hwtstamps; 288 struct skb_shared_hwtstamps hwtstamps;
289 u32 tskey;
278 __be32 ip6_frag_id; 290 __be32 ip6_frag_id;
279 291
280 /* 292 /*
@@ -2691,6 +2703,10 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
2691void skb_complete_tx_timestamp(struct sk_buff *skb, 2703void skb_complete_tx_timestamp(struct sk_buff *skb,
2692 struct skb_shared_hwtstamps *hwtstamps); 2704 struct skb_shared_hwtstamps *hwtstamps);
2693 2705
2706void __skb_tstamp_tx(struct sk_buff *orig_skb,
2707 struct skb_shared_hwtstamps *hwtstamps,
2708 struct sock *sk, int tstype);
2709
2694/** 2710/**
2695 * skb_tstamp_tx - queue clone of skb with send time stamps 2711 * skb_tstamp_tx - queue clone of skb with send time stamps
2696 * @orig_skb: the original outgoing packet 2712 * @orig_skb: the original outgoing packet
diff --git a/include/net/sock.h b/include/net/sock.h
index b91c8868ab8d..52fe0bc5598a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -67,6 +67,7 @@
67#include <linux/atomic.h> 67#include <linux/atomic.h>
68#include <net/dst.h> 68#include <net/dst.h>
69#include <net/checksum.h> 69#include <net/checksum.h>
70#include <linux/net_tstamp.h>
70 71
71struct cgroup; 72struct cgroup;
72struct cgroup_subsys; 73struct cgroup_subsys;
@@ -278,6 +279,8 @@ struct cg_proto;
278 * @sk_protinfo: private area, net family specific, when not using slab 279 * @sk_protinfo: private area, net family specific, when not using slab
279 * @sk_timer: sock cleanup timer 280 * @sk_timer: sock cleanup timer
280 * @sk_stamp: time stamp of last packet received 281 * @sk_stamp: time stamp of last packet received
282 * @sk_tsflags: SO_TIMESTAMPING socket options
283 * @sk_tskey: counter to disambiguate concurrent tstamp requests
281 * @sk_socket: Identd and reporting IO signals 284 * @sk_socket: Identd and reporting IO signals
282 * @sk_user_data: RPC layer private data 285 * @sk_user_data: RPC layer private data
283 * @sk_frag: cached page frag 286 * @sk_frag: cached page frag
@@ -411,6 +414,8 @@ struct sock {
411 void *sk_protinfo; 414 void *sk_protinfo;
412 struct timer_list sk_timer; 415 struct timer_list sk_timer;
413 ktime_t sk_stamp; 416 ktime_t sk_stamp;
417 u16 sk_tsflags;
418 u32 sk_tskey;
414 struct socket *sk_socket; 419 struct socket *sk_socket;
415 void *sk_user_data; 420 void *sk_user_data;
416 struct page_frag sk_frag; 421 struct page_frag sk_frag;
@@ -701,12 +706,7 @@ enum sock_flags {
701 SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ 706 SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
702 SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ 707 SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
703 SOCK_MEMALLOC, /* VM depends on this socket for swapping */ 708 SOCK_MEMALLOC, /* VM depends on this socket for swapping */
704 SOCK_TIMESTAMPING_TX_HARDWARE, /* %SOF_TIMESTAMPING_TX_HARDWARE */
705 SOCK_TIMESTAMPING_TX_SOFTWARE, /* %SOF_TIMESTAMPING_TX_SOFTWARE */
706 SOCK_TIMESTAMPING_RX_HARDWARE, /* %SOF_TIMESTAMPING_RX_HARDWARE */
707 SOCK_TIMESTAMPING_RX_SOFTWARE, /* %SOF_TIMESTAMPING_RX_SOFTWARE */ 709 SOCK_TIMESTAMPING_RX_SOFTWARE, /* %SOF_TIMESTAMPING_RX_SOFTWARE */
708 SOCK_TIMESTAMPING_SOFTWARE, /* %SOF_TIMESTAMPING_SOFTWARE */
709 SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */
710 SOCK_FASYNC, /* fasync() active */ 710 SOCK_FASYNC, /* fasync() active */
711 SOCK_RXQ_OVFL, 711 SOCK_RXQ_OVFL,
712 SOCK_ZEROCOPY, /* buffers from userspace */ 712 SOCK_ZEROCOPY, /* buffers from userspace */
@@ -2160,18 +2160,17 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
2160 2160
2161 /* 2161 /*
2162 * generate control messages if 2162 * generate control messages if
2163 * - receive time stamping in software requested (SOCK_RCVTSTAMP 2163 * - receive time stamping in software requested
2164 * or SOCK_TIMESTAMPING_RX_SOFTWARE)
2165 * - software time stamp available and wanted 2164 * - software time stamp available and wanted
2166 * (SOCK_TIMESTAMPING_SOFTWARE)
2167 * - hardware time stamps available and wanted 2165 * - hardware time stamps available and wanted
2168 * SOCK_TIMESTAMPING_RAW_HARDWARE
2169 */ 2166 */
2170 if (sock_flag(sk, SOCK_RCVTSTAMP) || 2167 if (sock_flag(sk, SOCK_RCVTSTAMP) ||
2171 sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) || 2168 (sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
2172 (kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) || 2169 (kt.tv64 &&
2170 (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE ||
2171 skb_shinfo(skb)->tx_flags & SKBTX_ANY_SW_TSTAMP)) ||
2173 (hwtstamps->hwtstamp.tv64 && 2172 (hwtstamps->hwtstamp.tv64 &&
2174 sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))) 2173 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
2175 __sock_recv_timestamp(msg, sk, skb); 2174 __sock_recv_timestamp(msg, sk, skb);
2176 else 2175 else
2177 sk->sk_stamp = kt; 2176 sk->sk_stamp = kt;
@@ -2187,11 +2186,11 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
2187 struct sk_buff *skb) 2186 struct sk_buff *skb)
2188{ 2187{
2189#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \ 2188#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \
2190 (1UL << SOCK_RCVTSTAMP) | \ 2189 (1UL << SOCK_RCVTSTAMP))
2191 (1UL << SOCK_TIMESTAMPING_SOFTWARE) | \ 2190#define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \
2192 (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE)) 2191 SOF_TIMESTAMPING_RAW_HARDWARE)
2193 2192
2194 if (sk->sk_flags & FLAGS_TS_OR_DROPS) 2193 if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY)
2195 __sock_recv_ts_and_drops(msg, sk, skb); 2194 __sock_recv_ts_and_drops(msg, sk, skb);
2196 else 2195 else
2197 sk->sk_stamp = skb->tstamp; 2196 sk->sk_stamp = skb->tstamp;
@@ -2201,8 +2200,6 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
2201 * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped 2200 * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
2202 * @sk: socket sending this packet 2201 * @sk: socket sending this packet
2203 * @tx_flags: filled with instructions for time stamping 2202 * @tx_flags: filled with instructions for time stamping
2204 *
2205 * Currently only depends on SOCK_TIMESTAMPING* flags.
2206 */ 2203 */
2207void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); 2204void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags);
2208 2205
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index aacd4fb7102a..07bdce1f444a 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -22,5 +22,25 @@ struct sock_extended_err {
22 22
23#define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1)) 23#define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1))
24 24
25/**
26 * struct scm_timestamping - timestamps exposed through cmsg
27 *
28 * The timestamping interfaces SO_TIMESTAMPING, MSG_TSTAMP_*
29 * communicate network timestamps by passing this struct in a cmsg with
30 * recvmsg(). See Documentation/networking/timestamping.txt for details.
31 */
32struct scm_timestamping {
33 struct timespec ts[3];
34};
35
36/* The type of scm_timestamping, passed in sock_extended_err ee_info.
37 * This defines the type of ts[0]. For SCM_TSTAMP_SND only, if ts[0]
38 * is zero, then this is a hardware timestamp and recorded in ts[2].
39 */
40enum {
41 SCM_TSTAMP_SND, /* driver passed skb to NIC, or HW */
42 SCM_TSTAMP_SCHED, /* data entered the packet scheduler */
43 SCM_TSTAMP_ACK, /* data acknowledged by peer */
44};
25 45
26#endif /* _UAPI_LINUX_ERRQUEUE_H */ 46#endif /* _UAPI_LINUX_ERRQUEUE_H */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index f53879c0f590..ff354021bb69 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -20,9 +20,13 @@ enum {
20 SOF_TIMESTAMPING_SOFTWARE = (1<<4), 20 SOF_TIMESTAMPING_SOFTWARE = (1<<4),
21 SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5), 21 SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5),
22 SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6), 22 SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6),
23 SOF_TIMESTAMPING_MASK = 23 SOF_TIMESTAMPING_OPT_ID = (1<<7),
24 (SOF_TIMESTAMPING_RAW_HARDWARE - 1) | 24 SOF_TIMESTAMPING_TX_SCHED = (1<<8),
25 SOF_TIMESTAMPING_RAW_HARDWARE 25 SOF_TIMESTAMPING_TX_ACK = (1<<9),
26
27 SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_ACK,
28 SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
29 SOF_TIMESTAMPING_LAST
26}; 30};
27 31
28/** 32/**
diff --git a/net/core/dev.c b/net/core/dev.c
index b370230fe1d3..1c15b189c52b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
132#include <linux/hashtable.h> 132#include <linux/hashtable.h>
133#include <linux/vmalloc.h> 133#include <linux/vmalloc.h>
134#include <linux/if_macvlan.h> 134#include <linux/if_macvlan.h>
135#include <linux/errqueue.h>
135 136
136#include "net-sysfs.h" 137#include "net-sysfs.h"
137 138
@@ -2876,6 +2877,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
2876 2877
2877 skb_reset_mac_header(skb); 2878 skb_reset_mac_header(skb);
2878 2879
2880 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
2881 __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
2882
2879 /* Disable soft irqs for various locks below. Also 2883 /* Disable soft irqs for various locks below. Also
2880 * stops preemption for RCU. 2884 * stops preemption for RCU.
2881 */ 2885 */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c1a33033cbe2..3dec0293a7c5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3490,10 +3490,10 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
3490} 3490}
3491EXPORT_SYMBOL(sock_queue_err_skb); 3491EXPORT_SYMBOL(sock_queue_err_skb);
3492 3492
3493void skb_tstamp_tx(struct sk_buff *orig_skb, 3493void __skb_tstamp_tx(struct sk_buff *orig_skb,
3494 struct skb_shared_hwtstamps *hwtstamps) 3494 struct skb_shared_hwtstamps *hwtstamps,
3495 struct sock *sk, int tstype)
3495{ 3496{
3496 struct sock *sk = orig_skb->sk;
3497 struct sock_exterr_skb *serr; 3497 struct sock_exterr_skb *serr;
3498 struct sk_buff *skb; 3498 struct sk_buff *skb;
3499 int err; 3499 int err;
@@ -3521,12 +3521,26 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
3521 memset(serr, 0, sizeof(*serr)); 3521 memset(serr, 0, sizeof(*serr));
3522 serr->ee.ee_errno = ENOMSG; 3522 serr->ee.ee_errno = ENOMSG;
3523 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; 3523 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
3524 serr->ee.ee_info = tstype;
3525 if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
3526 serr->ee.ee_data = skb_shinfo(skb)->tskey;
3527 if (sk->sk_protocol == IPPROTO_TCP)
3528 serr->ee.ee_data -= sk->sk_tskey;
3529 }
3524 3530
3525 err = sock_queue_err_skb(sk, skb); 3531 err = sock_queue_err_skb(sk, skb);
3526 3532
3527 if (err) 3533 if (err)
3528 kfree_skb(skb); 3534 kfree_skb(skb);
3529} 3535}
3536EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
3537
3538void skb_tstamp_tx(struct sk_buff *orig_skb,
3539 struct skb_shared_hwtstamps *hwtstamps)
3540{
3541 return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk,
3542 SCM_TSTAMP_SND);
3543}
3530EXPORT_SYMBOL_GPL(skb_tstamp_tx); 3544EXPORT_SYMBOL_GPL(skb_tstamp_tx);
3531 3545
3532void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) 3546void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
diff --git a/net/core/sock.c b/net/core/sock.c
index a741163568fa..2714811afbd8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -848,22 +848,25 @@ set_rcvbuf:
848 ret = -EINVAL; 848 ret = -EINVAL;
849 break; 849 break;
850 } 850 }
851 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE, 851 if (val & SOF_TIMESTAMPING_OPT_ID &&
852 val & SOF_TIMESTAMPING_TX_HARDWARE); 852 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
853 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE, 853 if (sk->sk_protocol == IPPROTO_TCP) {
854 val & SOF_TIMESTAMPING_TX_SOFTWARE); 854 if (sk->sk_state != TCP_ESTABLISHED) {
855 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE, 855 ret = -EINVAL;
856 val & SOF_TIMESTAMPING_RX_HARDWARE); 856 break;
857 }
858 sk->sk_tskey = tcp_sk(sk)->snd_una;
859 } else {
860 sk->sk_tskey = 0;
861 }
862 }
863 sk->sk_tsflags = val;
857 if (val & SOF_TIMESTAMPING_RX_SOFTWARE) 864 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
858 sock_enable_timestamp(sk, 865 sock_enable_timestamp(sk,
859 SOCK_TIMESTAMPING_RX_SOFTWARE); 866 SOCK_TIMESTAMPING_RX_SOFTWARE);
860 else 867 else
861 sock_disable_timestamp(sk, 868 sock_disable_timestamp(sk,
862 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); 869 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
863 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
864 val & SOF_TIMESTAMPING_SOFTWARE);
865 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
866 val & SOF_TIMESTAMPING_RAW_HARDWARE);
867 break; 870 break;
868 871
869 case SO_RCVLOWAT: 872 case SO_RCVLOWAT:
@@ -1089,19 +1092,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1089 break; 1092 break;
1090 1093
1091 case SO_TIMESTAMPING: 1094 case SO_TIMESTAMPING:
1092 v.val = 0; 1095 v.val = sk->sk_tsflags;
1093 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
1094 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
1095 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
1096 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
1097 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
1098 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
1099 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
1100 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
1101 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
1102 v.val |= SOF_TIMESTAMPING_SOFTWARE;
1103 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
1104 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
1105 break; 1096 break;
1106 1097
1107 case SO_RCVTIMEO: 1098 case SO_RCVTIMEO:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b16556836d66..215af2b155cb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -855,11 +855,15 @@ static int __ip_append_data(struct sock *sk,
855 unsigned int maxfraglen, fragheaderlen, maxnonfragsize; 855 unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
856 int csummode = CHECKSUM_NONE; 856 int csummode = CHECKSUM_NONE;
857 struct rtable *rt = (struct rtable *)cork->dst; 857 struct rtable *rt = (struct rtable *)cork->dst;
858 u32 tskey = 0;
858 859
859 skb = skb_peek_tail(queue); 860 skb = skb_peek_tail(queue);
860 861
861 exthdrlen = !skb ? rt->dst.header_len : 0; 862 exthdrlen = !skb ? rt->dst.header_len : 0;
862 mtu = cork->fragsize; 863 mtu = cork->fragsize;
864 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
865 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
866 tskey = sk->sk_tskey++;
863 867
864 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 868 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
865 869
@@ -976,6 +980,8 @@ alloc_new_skb:
976 /* only the initial fragment is time stamped */ 980 /* only the initial fragment is time stamped */
977 skb_shinfo(skb)->tx_flags = cork->tx_flags; 981 skb_shinfo(skb)->tx_flags = cork->tx_flags;
978 cork->tx_flags = 0; 982 cork->tx_flags = 0;
983 skb_shinfo(skb)->tskey = tskey;
984 tskey = 0;
979 985
980 /* 986 /*
981 * Find where to start putting bytes. 987 * Find where to start putting bytes.
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9d2118e5fbc7..744af67a5989 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -426,6 +426,15 @@ void tcp_init_sock(struct sock *sk)
426} 426}
427EXPORT_SYMBOL(tcp_init_sock); 427EXPORT_SYMBOL(tcp_init_sock);
428 428
429void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb)
430{
431 struct skb_shared_info *shinfo = skb_shinfo(skb);
432
433 sock_tx_timestamp(sk, &shinfo->tx_flags);
434 if (shinfo->tx_flags & SKBTX_ANY_SW_TSTAMP)
435 shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
436}
437
429/* 438/*
430 * Wait for a TCP event. 439 * Wait for a TCP event.
431 * 440 *
@@ -523,7 +532,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
523 } 532 }
524 /* This barrier is coupled with smp_wmb() in tcp_reset() */ 533 /* This barrier is coupled with smp_wmb() in tcp_reset() */
525 smp_rmb(); 534 smp_rmb();
526 if (sk->sk_err) 535 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
527 mask |= POLLERR; 536 mask |= POLLERR;
528 537
529 return mask; 538 return mask;
@@ -959,8 +968,10 @@ new_segment:
959 968
960 copied += copy; 969 copied += copy;
961 offset += copy; 970 offset += copy;
962 if (!(size -= copy)) 971 if (!(size -= copy)) {
972 tcp_tx_timestamp(sk, skb);
963 goto out; 973 goto out;
974 }
964 975
965 if (skb->len < size_goal || (flags & MSG_OOB)) 976 if (skb->len < size_goal || (flags & MSG_OOB))
966 continue; 977 continue;
@@ -1252,8 +1263,10 @@ new_segment:
1252 1263
1253 from += copy; 1264 from += copy;
1254 copied += copy; 1265 copied += copy;
1255 if ((seglen -= copy) == 0 && iovlen == 0) 1266 if ((seglen -= copy) == 0 && iovlen == 0) {
1267 tcp_tx_timestamp(sk, skb);
1256 goto out; 1268 goto out;
1269 }
1257 1270
1258 if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) 1271 if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
1259 continue; 1272 continue;
@@ -1617,6 +1630,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1617 struct sk_buff *skb; 1630 struct sk_buff *skb;
1618 u32 urg_hole = 0; 1631 u32 urg_hole = 0;
1619 1632
1633 if (unlikely(flags & MSG_ERRQUEUE))
1634 return ip_recv_error(sk, msg, len, addr_len);
1635
1620 if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && 1636 if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
1621 (sk->sk_state == TCP_ESTABLISHED)) 1637 (sk->sk_state == TCP_ESTABLISHED))
1622 sk_busy_loop(sk, nonblock); 1638 sk_busy_loop(sk, nonblock);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6a2984507755..a3d47af01906 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -74,6 +74,7 @@
74#include <linux/ipsec.h> 74#include <linux/ipsec.h>
75#include <asm/unaligned.h> 75#include <asm/unaligned.h>
76#include <net/netdma.h> 76#include <net/netdma.h>
77#include <linux/errqueue.h>
77 78
78int sysctl_tcp_timestamps __read_mostly = 1; 79int sysctl_tcp_timestamps __read_mostly = 1;
79int sysctl_tcp_window_scaling __read_mostly = 1; 80int sysctl_tcp_window_scaling __read_mostly = 1;
@@ -3106,6 +3107,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3106 tp->retrans_stamp = 0; 3107 tp->retrans_stamp = 0;
3107 } 3108 }
3108 3109
3110 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_ACK_TSTAMP) &&
3111 between(skb_shinfo(skb)->tskey, prior_snd_una,
3112 tp->snd_una + 1))
3113 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3114
3109 if (!fully_acked) 3115 if (!fully_acked)
3110 break; 3116 break;
3111 3117
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 55046ecd083e..f597119fc4e7 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -14,6 +14,21 @@
14#include <net/tcp.h> 14#include <net/tcp.h>
15#include <net/protocol.h> 15#include <net/protocol.h>
16 16
17void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, unsigned int seq,
18 unsigned int mss)
19{
20 while (skb) {
21 if (ts_seq < (__u64) seq + mss) {
22 skb_shinfo(skb)->tx_flags = SKBTX_SW_TSTAMP;
23 skb_shinfo(skb)->tskey = ts_seq;
24 return;
25 }
26
27 skb = skb->next;
28 seq += mss;
29 }
30}
31
17struct sk_buff *tcp_gso_segment(struct sk_buff *skb, 32struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
18 netdev_features_t features) 33 netdev_features_t features)
19{ 34{
@@ -91,6 +106,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
91 th = tcp_hdr(skb); 106 th = tcp_hdr(skb);
92 seq = ntohl(th->seq); 107 seq = ntohl(th->seq);
93 108
109 if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP))
110 tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss);
111
94 newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + 112 newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
95 (__force u32)delta)); 113 (__force u32)delta));
96 114
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f5dafe609f8b..315a55d66079 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1157,6 +1157,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1157 int err; 1157 int err;
1158 int offset = 0; 1158 int offset = 0;
1159 __u8 tx_flags = 0; 1159 __u8 tx_flags = 0;
1160 u32 tskey = 0;
1160 1161
1161 if (flags&MSG_PROBE) 1162 if (flags&MSG_PROBE)
1162 return 0; 1163 return 0;
@@ -1272,8 +1273,12 @@ emsgsize:
1272 } 1273 }
1273 } 1274 }
1274 1275
1275 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) 1276 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1276 sock_tx_timestamp(sk, &tx_flags); 1277 sock_tx_timestamp(sk, &tx_flags);
1278 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1279 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1280 tskey = sk->sk_tskey++;
1281 }
1277 1282
1278 /* 1283 /*
1279 * Let's try using as much space as possible. 1284 * Let's try using as much space as possible.
@@ -1397,6 +1402,8 @@ alloc_new_skb:
1397 /* Only the initial fragment is time stamped */ 1402 /* Only the initial fragment is time stamped */
1398 skb_shinfo(skb)->tx_flags = tx_flags; 1403 skb_shinfo(skb)->tx_flags = tx_flags;
1399 tx_flags = 0; 1404 tx_flags = 0;
1405 skb_shinfo(skb)->tskey = tskey;
1406 tskey = 0;
1400 1407
1401 /* 1408 /*
1402 * Find where to start putting bytes 1409 * Find where to start putting bytes
diff --git a/net/socket.c b/net/socket.c
index d8222c025061..ae89569a2db5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -106,6 +106,7 @@
106#include <linux/sockios.h> 106#include <linux/sockios.h>
107#include <linux/atalk.h> 107#include <linux/atalk.h>
108#include <net/busy_poll.h> 108#include <net/busy_poll.h>
109#include <linux/errqueue.h>
109 110
110#ifdef CONFIG_NET_RX_BUSY_POLL 111#ifdef CONFIG_NET_RX_BUSY_POLL
111unsigned int sysctl_net_busy_read __read_mostly; 112unsigned int sysctl_net_busy_read __read_mostly;
@@ -612,10 +613,15 @@ EXPORT_SYMBOL(sock_release);
612void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) 613void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
613{ 614{
614 *tx_flags = 0; 615 *tx_flags = 0;
615 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 616 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
616 *tx_flags |= SKBTX_HW_TSTAMP; 617 *tx_flags |= SKBTX_HW_TSTAMP;
617 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 618 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
618 *tx_flags |= SKBTX_SW_TSTAMP; 619 *tx_flags |= SKBTX_SW_TSTAMP;
620 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
621 *tx_flags |= SKBTX_SCHED_TSTAMP;
622 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
623 *tx_flags |= SKBTX_ACK_TSTAMP;
624
619 if (sock_flag(sk, SOCK_WIFI_STATUS)) 625 if (sock_flag(sk, SOCK_WIFI_STATUS))
620 *tx_flags |= SKBTX_WIFI_STATUS; 626 *tx_flags |= SKBTX_WIFI_STATUS;
621} 627}
@@ -697,7 +703,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
697 struct sk_buff *skb) 703 struct sk_buff *skb)
698{ 704{
699 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); 705 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
700 struct timespec ts[3]; 706 struct scm_timestamping tss;
701 int empty = 1; 707 int empty = 1;
702 struct skb_shared_hwtstamps *shhwtstamps = 708 struct skb_shared_hwtstamps *shhwtstamps =
703 skb_hwtstamps(skb); 709 skb_hwtstamps(skb);
@@ -714,24 +720,25 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
714 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, 720 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
715 sizeof(tv), &tv); 721 sizeof(tv), &tv);
716 } else { 722 } else {
717 skb_get_timestampns(skb, &ts[0]); 723 struct timespec ts;
724 skb_get_timestampns(skb, &ts);
718 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, 725 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
719 sizeof(ts[0]), &ts[0]); 726 sizeof(ts), &ts);
720 } 727 }
721 } 728 }
722 729
723 730 memset(&tss, 0, sizeof(tss));
724 memset(ts, 0, sizeof(ts)); 731 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE ||
725 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) && 732 skb_shinfo(skb)->tx_flags & SKBTX_ANY_SW_TSTAMP) &&
726 ktime_to_timespec_cond(skb->tstamp, ts + 0)) 733 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
727 empty = 0; 734 empty = 0;
728 if (shhwtstamps && 735 if (shhwtstamps &&
729 sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && 736 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
730 ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2)) 737 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
731 empty = 0; 738 empty = 0;
732 if (!empty) 739 if (!empty)
733 put_cmsg(msg, SOL_SOCKET, 740 put_cmsg(msg, SOL_SOCKET,
734 SCM_TIMESTAMPING, sizeof(ts), &ts); 741 SCM_TIMESTAMPING, sizeof(tss), &tss);
735} 742}
736EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 743EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
737 744