aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h132
1 files changed, 107 insertions, 25 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 56df440a950b..328e03f47dd1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -74,7 +74,7 @@
74 printk(KERN_DEBUG msg); } while (0) 74 printk(KERN_DEBUG msg); } while (0)
75#else 75#else
76/* Validate arguments and do nothing */ 76/* Validate arguments and do nothing */
77static void inline int __attribute__ ((format (printf, 2, 3))) 77static inline void __attribute__ ((format (printf, 2, 3)))
78SOCK_DEBUG(struct sock *sk, const char *msg, ...) 78SOCK_DEBUG(struct sock *sk, const char *msg, ...)
79{ 79{
80} 80}
@@ -159,7 +159,7 @@ struct sock_common {
159 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings 159 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
160 * @sk_lock: synchronizer 160 * @sk_lock: synchronizer
161 * @sk_rcvbuf: size of receive buffer in bytes 161 * @sk_rcvbuf: size of receive buffer in bytes
162 * @sk_sleep: sock wait queue 162 * @sk_wq: sock wait queue and async head
163 * @sk_dst_cache: destination cache 163 * @sk_dst_cache: destination cache
164 * @sk_dst_lock: destination cache lock 164 * @sk_dst_lock: destination cache lock
165 * @sk_policy: flow policy 165 * @sk_policy: flow policy
@@ -198,6 +198,7 @@ struct sock_common {
198 * @sk_rcvlowat: %SO_RCVLOWAT setting 198 * @sk_rcvlowat: %SO_RCVLOWAT setting
199 * @sk_rcvtimeo: %SO_RCVTIMEO setting 199 * @sk_rcvtimeo: %SO_RCVTIMEO setting
200 * @sk_sndtimeo: %SO_SNDTIMEO setting 200 * @sk_sndtimeo: %SO_SNDTIMEO setting
201 * @sk_rxhash: flow hash received from netif layer
201 * @sk_filter: socket filtering instructions 202 * @sk_filter: socket filtering instructions
202 * @sk_protinfo: private area, net family specific, when not using slab 203 * @sk_protinfo: private area, net family specific, when not using slab
203 * @sk_timer: sock cleanup timer 204 * @sk_timer: sock cleanup timer
@@ -255,9 +256,8 @@ struct sock {
255 struct sk_buff *head; 256 struct sk_buff *head;
256 struct sk_buff *tail; 257 struct sk_buff *tail;
257 int len; 258 int len;
258 int limit;
259 } sk_backlog; 259 } sk_backlog;
260 wait_queue_head_t *sk_sleep; 260 struct socket_wq *sk_wq;
261 struct dst_entry *sk_dst_cache; 261 struct dst_entry *sk_dst_cache;
262#ifdef CONFIG_XFRM 262#ifdef CONFIG_XFRM
263 struct xfrm_policy *sk_policy[2]; 263 struct xfrm_policy *sk_policy[2];
@@ -279,6 +279,9 @@ struct sock {
279 int sk_gso_type; 279 int sk_gso_type;
280 unsigned int sk_gso_max_size; 280 unsigned int sk_gso_max_size;
281 int sk_rcvlowat; 281 int sk_rcvlowat;
282#ifdef CONFIG_RPS
283 __u32 sk_rxhash;
284#endif
282 unsigned long sk_flags; 285 unsigned long sk_flags;
283 unsigned long sk_lingertime; 286 unsigned long sk_lingertime;
284 struct sk_buff_head sk_error_queue; 287 struct sk_buff_head sk_error_queue;
@@ -604,10 +607,20 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
604 skb->next = NULL; 607 skb->next = NULL;
605} 608}
606 609
610/*
611 * Take into account size of receive queue and backlog queue
612 */
613static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb)
614{
615 unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);
616
617 return qsize + skb->truesize > sk->sk_rcvbuf;
618}
619
607/* The per-socket spinlock must be held here. */ 620/* The per-socket spinlock must be held here. */
608static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb) 621static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb)
609{ 622{
610 if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) 623 if (sk_rcvqueues_full(sk, skb))
611 return -ENOBUFS; 624 return -ENOBUFS;
612 625
613 __sk_add_backlog(sk, skb); 626 __sk_add_backlog(sk, skb);
@@ -620,6 +633,40 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
620 return sk->sk_backlog_rcv(sk, skb); 633 return sk->sk_backlog_rcv(sk, skb);
621} 634}
622 635
636static inline void sock_rps_record_flow(const struct sock *sk)
637{
638#ifdef CONFIG_RPS
639 struct rps_sock_flow_table *sock_flow_table;
640
641 rcu_read_lock();
642 sock_flow_table = rcu_dereference(rps_sock_flow_table);
643 rps_record_sock_flow(sock_flow_table, sk->sk_rxhash);
644 rcu_read_unlock();
645#endif
646}
647
648static inline void sock_rps_reset_flow(const struct sock *sk)
649{
650#ifdef CONFIG_RPS
651 struct rps_sock_flow_table *sock_flow_table;
652
653 rcu_read_lock();
654 sock_flow_table = rcu_dereference(rps_sock_flow_table);
655 rps_reset_sock_flow(sock_flow_table, sk->sk_rxhash);
656 rcu_read_unlock();
657#endif
658}
659
660static inline void sock_rps_save_rxhash(struct sock *sk, u32 rxhash)
661{
662#ifdef CONFIG_RPS
663 if (unlikely(sk->sk_rxhash != rxhash)) {
664 sock_rps_reset_flow(sk);
665 sk->sk_rxhash = rxhash;
666 }
667#endif
668}
669
623#define sk_wait_event(__sk, __timeo, __condition) \ 670#define sk_wait_event(__sk, __timeo, __condition) \
624 ({ int __rc; \ 671 ({ int __rc; \
625 release_sock(__sk); \ 672 release_sock(__sk); \
@@ -974,6 +1021,16 @@ extern void release_sock(struct sock *sk);
974 SINGLE_DEPTH_NESTING) 1021 SINGLE_DEPTH_NESTING)
975#define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) 1022#define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock))
976 1023
1024static inline void lock_sock_bh(struct sock *sk)
1025{
1026 spin_lock_bh(&sk->sk_lock.slock);
1027}
1028
1029static inline void unlock_sock_bh(struct sock *sk)
1030{
1031 spin_unlock_bh(&sk->sk_lock.slock);
1032}
1033
977extern struct sock *sk_alloc(struct net *net, int family, 1034extern struct sock *sk_alloc(struct net *net, int family,
978 gfp_t priority, 1035 gfp_t priority,
979 struct proto *prot); 1036 struct proto *prot);
@@ -1160,6 +1217,10 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock)
1160 sk->sk_socket = sock; 1217 sk->sk_socket = sock;
1161} 1218}
1162 1219
1220static inline wait_queue_head_t *sk_sleep(struct sock *sk)
1221{
1222 return &sk->sk_wq->wait;
1223}
1163/* Detach socket from process context. 1224/* Detach socket from process context.
1164 * Announce socket dead, detach it from wait queue and inode. 1225 * Announce socket dead, detach it from wait queue and inode.
1165 * Note that parent inode held reference count on this struct sock, 1226 * Note that parent inode held reference count on this struct sock,
@@ -1172,14 +1233,14 @@ static inline void sock_orphan(struct sock *sk)
1172 write_lock_bh(&sk->sk_callback_lock); 1233 write_lock_bh(&sk->sk_callback_lock);
1173 sock_set_flag(sk, SOCK_DEAD); 1234 sock_set_flag(sk, SOCK_DEAD);
1174 sk_set_socket(sk, NULL); 1235 sk_set_socket(sk, NULL);
1175 sk->sk_sleep = NULL; 1236 sk->sk_wq = NULL;
1176 write_unlock_bh(&sk->sk_callback_lock); 1237 write_unlock_bh(&sk->sk_callback_lock);
1177} 1238}
1178 1239
1179static inline void sock_graft(struct sock *sk, struct socket *parent) 1240static inline void sock_graft(struct sock *sk, struct socket *parent)
1180{ 1241{
1181 write_lock_bh(&sk->sk_callback_lock); 1242 write_lock_bh(&sk->sk_callback_lock);
1182 sk->sk_sleep = &parent->wait; 1243 rcu_assign_pointer(sk->sk_wq, parent->wq);
1183 parent->sk = sk; 1244 parent->sk = sk;
1184 sk_set_socket(sk, parent); 1245 sk_set_socket(sk, parent);
1185 security_sock_graft(sk, parent); 1246 security_sock_graft(sk, parent);
@@ -1193,7 +1254,8 @@ static inline struct dst_entry *
1193__sk_dst_get(struct sock *sk) 1254__sk_dst_get(struct sock *sk)
1194{ 1255{
1195 return rcu_dereference_check(sk->sk_dst_cache, rcu_read_lock_held() || 1256 return rcu_dereference_check(sk->sk_dst_cache, rcu_read_lock_held() ||
1196 sock_owned_by_user(sk)); 1257 sock_owned_by_user(sk) ||
1258 lockdep_is_held(&sk->sk_lock.slock));
1197} 1259}
1198 1260
1199static inline struct dst_entry * 1261static inline struct dst_entry *
@@ -1231,8 +1293,11 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst)
1231 struct dst_entry *old_dst; 1293 struct dst_entry *old_dst;
1232 1294
1233 sk_tx_queue_clear(sk); 1295 sk_tx_queue_clear(sk);
1234 old_dst = rcu_dereference_check(sk->sk_dst_cache, 1296 /*
1235 lockdep_is_held(&sk->sk_dst_lock)); 1297 * This can be called while sk is owned by the caller only,
1298 * with no state that can be checked in a rcu_dereference_check() cond
1299 */
1300 old_dst = rcu_dereference_raw(sk->sk_dst_cache);
1236 rcu_assign_pointer(sk->sk_dst_cache, dst); 1301 rcu_assign_pointer(sk->sk_dst_cache, dst);
1237 dst_release(old_dst); 1302 dst_release(old_dst);
1238} 1303}
@@ -1327,12 +1392,12 @@ static inline int sk_has_allocations(const struct sock *sk)
1327} 1392}
1328 1393
1329/** 1394/**
1330 * sk_has_sleeper - check if there are any waiting processes 1395 * wq_has_sleeper - check if there are any waiting processes
1331 * @sk: socket 1396 * @sk: struct socket_wq
1332 * 1397 *
1333 * Returns true if socket has waiting processes 1398 * Returns true if socket_wq has waiting processes
1334 * 1399 *
1335 * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory 1400 * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory
1336 * barrier call. They were added due to the race found within the tcp code. 1401 * barrier call. They were added due to the race found within the tcp code.
1337 * 1402 *
1338 * Consider following tcp code paths: 1403 * Consider following tcp code paths:
@@ -1345,9 +1410,10 @@ static inline int sk_has_allocations(const struct sock *sk)
1345 * ... ... 1410 * ... ...
1346 * tp->rcv_nxt check sock_def_readable 1411 * tp->rcv_nxt check sock_def_readable
1347 * ... { 1412 * ... {
1348 * schedule ... 1413 * schedule rcu_read_lock();
1349 * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1414 * wq = rcu_dereference(sk->sk_wq);
1350 * wake_up_interruptible(sk->sk_sleep) 1415 * if (wq && waitqueue_active(&wq->wait))
1416 * wake_up_interruptible(&wq->wait)
1351 * ... 1417 * ...
1352 * } 1418 * }
1353 * 1419 *
@@ -1356,19 +1422,18 @@ static inline int sk_has_allocations(const struct sock *sk)
1356 * could then endup calling schedule and sleep forever if there are no more 1422 * could then endup calling schedule and sleep forever if there are no more
1357 * data on the socket. 1423 * data on the socket.
1358 * 1424 *
1359 * The sk_has_sleeper is always called right after a call to read_lock, so we
1360 * can use smp_mb__after_lock barrier.
1361 */ 1425 */
1362static inline int sk_has_sleeper(struct sock *sk) 1426static inline bool wq_has_sleeper(struct socket_wq *wq)
1363{ 1427{
1428
1364 /* 1429 /*
1365 * We need to be sure we are in sync with the 1430 * We need to be sure we are in sync with the
1366 * add_wait_queue modifications to the wait queue. 1431 * add_wait_queue modifications to the wait queue.
1367 * 1432 *
1368 * This memory barrier is paired in the sock_poll_wait. 1433 * This memory barrier is paired in the sock_poll_wait.
1369 */ 1434 */
1370 smp_mb__after_lock(); 1435 smp_mb();
1371 return sk->sk_sleep && waitqueue_active(sk->sk_sleep); 1436 return wq && waitqueue_active(&wq->wait);
1372} 1437}
1373 1438
1374/** 1439/**
@@ -1377,7 +1442,7 @@ static inline int sk_has_sleeper(struct sock *sk)
1377 * @wait_address: socket wait queue 1442 * @wait_address: socket wait queue
1378 * @p: poll_table 1443 * @p: poll_table
1379 * 1444 *
1380 * See the comments in the sk_has_sleeper function. 1445 * See the comments in the wq_has_sleeper function.
1381 */ 1446 */
1382static inline void sock_poll_wait(struct file *filp, 1447static inline void sock_poll_wait(struct file *filp,
1383 wait_queue_head_t *wait_address, poll_table *p) 1448 wait_queue_head_t *wait_address, poll_table *p)
@@ -1388,7 +1453,7 @@ static inline void sock_poll_wait(struct file *filp,
1388 * We need to be sure we are in sync with the 1453 * We need to be sure we are in sync with the
1389 * socket flags modification. 1454 * socket flags modification.
1390 * 1455 *
1391 * This memory barrier is paired in the sk_has_sleeper. 1456 * This memory barrier is paired in the wq_has_sleeper.
1392 */ 1457 */
1393 smp_mb(); 1458 smp_mb();
1394 } 1459 }
@@ -1570,7 +1635,24 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
1570 sk->sk_stamp = kt; 1635 sk->sk_stamp = kt;
1571} 1636}
1572 1637
1573extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); 1638extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
1639 struct sk_buff *skb);
1640
1641static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
1642 struct sk_buff *skb)
1643{
1644#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \
1645 (1UL << SOCK_RCVTSTAMP) | \
1646 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
1647 (1UL << SOCK_TIMESTAMPING_SOFTWARE) | \
1648 (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE) | \
1649 (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE))
1650
1651 if (sk->sk_flags & FLAGS_TS_OR_DROPS)
1652 __sock_recv_ts_and_drops(msg, sk, skb);
1653 else
1654 sk->sk_stamp = skb->tstamp;
1655}
1574 1656
1575/** 1657/**
1576 * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped 1658 * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped