aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDeepa Dinamani <deepa.kernel@gmail.com>2018-12-27 21:55:09 -0500
committerDavid S. Miller <davem@davemloft.net>2019-01-01 12:47:59 -0500
commit3a0ed3e9619738067214871e9cb826fa23b2ddb9 (patch)
treef27788a1eb07823c642631cb4d27aa0c052e3095
parent756af9c642329d54f048bac2a62f829b391f6944 (diff)
sock: Make sock->sk_stamp thread-safe
Al Viro mentioned (Message-ID <20170626041334.GZ10672@ZenIV.linux.org.uk>) that there is probably a race condition lurking in accesses of sk_stamp on 32-bit machines. sock->sk_stamp is of type ktime_t which is always an s64. On a 32 bit architecture, we might run into situations of unsafe access as the access to the field becomes non atomic. Use seqlocks for synchronization. This allows us to avoid using spinlocks for readers as readers do not need mutual exclusion. Another approach to solve this is to require sk_lock for all modifications of the timestamps. The current approach allows for timestamps to have their own lock: sk_stamp_lock. This allows for the patch to not compete with already existing critical sections, and side effects are limited to the paths in the patch. The addition of the new field maintains the data locality optimizations from commit 9115e8cd2a0c ("net: reorganize struct sock for better data locality") Note that all the instances of the sk_stamp accesses are either through the ioctl or the syscall recvmsg. Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/sock.h38
-rw-r--r--net/compat.c15
-rw-r--r--net/core/sock.c15
-rw-r--r--net/sunrpc/svcsock.c2
4 files changed, 55 insertions, 15 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index a6235c286ef9..2b229f7be8eb 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -298,6 +298,7 @@ struct sock_common {
298 * @sk_filter: socket filtering instructions 298 * @sk_filter: socket filtering instructions
299 * @sk_timer: sock cleanup timer 299 * @sk_timer: sock cleanup timer
300 * @sk_stamp: time stamp of last packet received 300 * @sk_stamp: time stamp of last packet received
301 * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only
301 * @sk_tsflags: SO_TIMESTAMPING socket options 302 * @sk_tsflags: SO_TIMESTAMPING socket options
302 * @sk_tskey: counter to disambiguate concurrent tstamp requests 303 * @sk_tskey: counter to disambiguate concurrent tstamp requests
303 * @sk_zckey: counter to order MSG_ZEROCOPY notifications 304 * @sk_zckey: counter to order MSG_ZEROCOPY notifications
@@ -474,6 +475,9 @@ struct sock {
474 const struct cred *sk_peer_cred; 475 const struct cred *sk_peer_cred;
475 long sk_rcvtimeo; 476 long sk_rcvtimeo;
476 ktime_t sk_stamp; 477 ktime_t sk_stamp;
478#if BITS_PER_LONG==32
479 seqlock_t sk_stamp_seq;
480#endif
477 u16 sk_tsflags; 481 u16 sk_tsflags;
478 u8 sk_shutdown; 482 u8 sk_shutdown;
479 u32 sk_tskey; 483 u32 sk_tskey;
@@ -2297,6 +2301,34 @@ static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
2297 atomic_add(segs, &sk->sk_drops); 2301 atomic_add(segs, &sk->sk_drops);
2298} 2302}
2299 2303
2304static inline ktime_t sock_read_timestamp(struct sock *sk)
2305{
2306#if BITS_PER_LONG==32
2307 unsigned int seq;
2308 ktime_t kt;
2309
2310 do {
2311 seq = read_seqbegin(&sk->sk_stamp_seq);
2312 kt = sk->sk_stamp;
2313 } while (read_seqretry(&sk->sk_stamp_seq, seq));
2314
2315 return kt;
2316#else
2317 return sk->sk_stamp;
2318#endif
2319}
2320
2321static inline void sock_write_timestamp(struct sock *sk, ktime_t kt)
2322{
2323#if BITS_PER_LONG==32
2324 write_seqlock(&sk->sk_stamp_seq);
2325 sk->sk_stamp = kt;
2326 write_sequnlock(&sk->sk_stamp_seq);
2327#else
2328 sk->sk_stamp = kt;
2329#endif
2330}
2331
2300void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, 2332void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
2301 struct sk_buff *skb); 2333 struct sk_buff *skb);
2302void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, 2334void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
@@ -2321,7 +2353,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
2321 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE))) 2353 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
2322 __sock_recv_timestamp(msg, sk, skb); 2354 __sock_recv_timestamp(msg, sk, skb);
2323 else 2355 else
2324 sk->sk_stamp = kt; 2356 sock_write_timestamp(sk, kt);
2325 2357
2326 if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid) 2358 if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid)
2327 __sock_recv_wifi_status(msg, sk, skb); 2359 __sock_recv_wifi_status(msg, sk, skb);
@@ -2342,9 +2374,9 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
2342 if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) 2374 if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY)
2343 __sock_recv_ts_and_drops(msg, sk, skb); 2375 __sock_recv_ts_and_drops(msg, sk, skb);
2344 else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) 2376 else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
2345 sk->sk_stamp = skb->tstamp; 2377 sock_write_timestamp(sk, skb->tstamp);
2346 else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) 2378 else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP))
2347 sk->sk_stamp = 0; 2379 sock_write_timestamp(sk, 0);
2348} 2380}
2349 2381
2350void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); 2382void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags);
diff --git a/net/compat.c b/net/compat.c
index 47a614b370cd..d1f3a8a0b3ef 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -467,12 +467,14 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
467 ctv = (struct compat_timeval __user *) userstamp; 467 ctv = (struct compat_timeval __user *) userstamp;
468 err = -ENOENT; 468 err = -ENOENT;
469 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 469 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
470 tv = ktime_to_timeval(sk->sk_stamp); 470 tv = ktime_to_timeval(sock_read_timestamp(sk));
471
471 if (tv.tv_sec == -1) 472 if (tv.tv_sec == -1)
472 return err; 473 return err;
473 if (tv.tv_sec == 0) { 474 if (tv.tv_sec == 0) {
474 sk->sk_stamp = ktime_get_real(); 475 ktime_t kt = ktime_get_real();
475 tv = ktime_to_timeval(sk->sk_stamp); 476 sock_write_timestamp(sk, kt);
477 tv = ktime_to_timeval(kt);
476 } 478 }
477 err = 0; 479 err = 0;
478 if (put_user(tv.tv_sec, &ctv->tv_sec) || 480 if (put_user(tv.tv_sec, &ctv->tv_sec) ||
@@ -494,12 +496,13 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta
494 ctv = (struct compat_timespec __user *) userstamp; 496 ctv = (struct compat_timespec __user *) userstamp;
495 err = -ENOENT; 497 err = -ENOENT;
496 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 498 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
497 ts = ktime_to_timespec(sk->sk_stamp); 499 ts = ktime_to_timespec(sock_read_timestamp(sk));
498 if (ts.tv_sec == -1) 500 if (ts.tv_sec == -1)
499 return err; 501 return err;
500 if (ts.tv_sec == 0) { 502 if (ts.tv_sec == 0) {
501 sk->sk_stamp = ktime_get_real(); 503 ktime_t kt = ktime_get_real();
502 ts = ktime_to_timespec(sk->sk_stamp); 504 sock_write_timestamp(sk, kt);
505 ts = ktime_to_timespec(kt);
503 } 506 }
504 err = 0; 507 err = 0;
505 if (put_user(ts.tv_sec, &ctv->tv_sec) || 508 if (put_user(ts.tv_sec, &ctv->tv_sec) ||
diff --git a/net/core/sock.c b/net/core/sock.c
index f00902c532cc..6aa2e7e0b4fb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2751,6 +2751,9 @@ void sock_init_data(struct socket *sock, struct sock *sk)
2751 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; 2751 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2752 2752
2753 sk->sk_stamp = SK_DEFAULT_STAMP; 2753 sk->sk_stamp = SK_DEFAULT_STAMP;
2754#if BITS_PER_LONG==32
2755 seqlock_init(&sk->sk_stamp_seq);
2756#endif
2754 atomic_set(&sk->sk_zckey, 0); 2757 atomic_set(&sk->sk_zckey, 0);
2755 2758
2756#ifdef CONFIG_NET_RX_BUSY_POLL 2759#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -2850,12 +2853,13 @@ int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2850 struct timeval tv; 2853 struct timeval tv;
2851 2854
2852 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 2855 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2853 tv = ktime_to_timeval(sk->sk_stamp); 2856 tv = ktime_to_timeval(sock_read_timestamp(sk));
2854 if (tv.tv_sec == -1) 2857 if (tv.tv_sec == -1)
2855 return -ENOENT; 2858 return -ENOENT;
2856 if (tv.tv_sec == 0) { 2859 if (tv.tv_sec == 0) {
2857 sk->sk_stamp = ktime_get_real(); 2860 ktime_t kt = ktime_get_real();
2858 tv = ktime_to_timeval(sk->sk_stamp); 2861 sock_write_timestamp(sk, kt);
2862 tv = ktime_to_timeval(kt);
2859 } 2863 }
2860 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; 2864 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2861} 2865}
@@ -2866,11 +2870,12 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2866 struct timespec ts; 2870 struct timespec ts;
2867 2871
2868 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 2872 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2869 ts = ktime_to_timespec(sk->sk_stamp); 2873 ts = ktime_to_timespec(sock_read_timestamp(sk));
2870 if (ts.tv_sec == -1) 2874 if (ts.tv_sec == -1)
2871 return -ENOENT; 2875 return -ENOENT;
2872 if (ts.tv_sec == 0) { 2876 if (ts.tv_sec == 0) {
2873 sk->sk_stamp = ktime_get_real(); 2877 ktime_t kt = ktime_get_real();
2878 sock_write_timestamp(sk, kt);
2874 ts = ktime_to_timespec(sk->sk_stamp); 2879 ts = ktime_to_timespec(sk->sk_stamp);
2875 } 2880 }
2876 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; 2881 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 986f3ed7d1a2..b7e67310ec37 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -549,7 +549,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
549 /* Don't enable netstamp, sunrpc doesn't 549 /* Don't enable netstamp, sunrpc doesn't
550 need that much accuracy */ 550 need that much accuracy */
551 } 551 }
552 svsk->sk_sk->sk_stamp = skb->tstamp; 552 sock_write_timestamp(svsk->sk_sk, skb->tstamp);
553 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ 553 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
554 554
555 len = skb->len; 555 len = skb->len;