diff options
Diffstat (limited to 'include/net/sock.h')
-rw-r--r-- | include/net/sock.h | 132 |
1 files changed, 107 insertions, 25 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 56df440a950b..328e03f47dd1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -74,7 +74,7 @@ | |||
74 | printk(KERN_DEBUG msg); } while (0) | 74 | printk(KERN_DEBUG msg); } while (0) |
75 | #else | 75 | #else |
76 | /* Validate arguments and do nothing */ | 76 | /* Validate arguments and do nothing */ |
77 | static void inline int __attribute__ ((format (printf, 2, 3))) | 77 | static inline void __attribute__ ((format (printf, 2, 3))) |
78 | SOCK_DEBUG(struct sock *sk, const char *msg, ...) | 78 | SOCK_DEBUG(struct sock *sk, const char *msg, ...) |
79 | { | 79 | { |
80 | } | 80 | } |
@@ -159,7 +159,7 @@ struct sock_common { | |||
159 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings | 159 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings |
160 | * @sk_lock: synchronizer | 160 | * @sk_lock: synchronizer |
161 | * @sk_rcvbuf: size of receive buffer in bytes | 161 | * @sk_rcvbuf: size of receive buffer in bytes |
162 | * @sk_sleep: sock wait queue | 162 | * @sk_wq: sock wait queue and async head |
163 | * @sk_dst_cache: destination cache | 163 | * @sk_dst_cache: destination cache |
164 | * @sk_dst_lock: destination cache lock | 164 | * @sk_dst_lock: destination cache lock |
165 | * @sk_policy: flow policy | 165 | * @sk_policy: flow policy |
@@ -198,6 +198,7 @@ struct sock_common { | |||
198 | * @sk_rcvlowat: %SO_RCVLOWAT setting | 198 | * @sk_rcvlowat: %SO_RCVLOWAT setting |
199 | * @sk_rcvtimeo: %SO_RCVTIMEO setting | 199 | * @sk_rcvtimeo: %SO_RCVTIMEO setting |
200 | * @sk_sndtimeo: %SO_SNDTIMEO setting | 200 | * @sk_sndtimeo: %SO_SNDTIMEO setting |
201 | * @sk_rxhash: flow hash received from netif layer | ||
201 | * @sk_filter: socket filtering instructions | 202 | * @sk_filter: socket filtering instructions |
202 | * @sk_protinfo: private area, net family specific, when not using slab | 203 | * @sk_protinfo: private area, net family specific, when not using slab |
203 | * @sk_timer: sock cleanup timer | 204 | * @sk_timer: sock cleanup timer |
@@ -255,9 +256,8 @@ struct sock { | |||
255 | struct sk_buff *head; | 256 | struct sk_buff *head; |
256 | struct sk_buff *tail; | 257 | struct sk_buff *tail; |
257 | int len; | 258 | int len; |
258 | int limit; | ||
259 | } sk_backlog; | 259 | } sk_backlog; |
260 | wait_queue_head_t *sk_sleep; | 260 | struct socket_wq *sk_wq; |
261 | struct dst_entry *sk_dst_cache; | 261 | struct dst_entry *sk_dst_cache; |
262 | #ifdef CONFIG_XFRM | 262 | #ifdef CONFIG_XFRM |
263 | struct xfrm_policy *sk_policy[2]; | 263 | struct xfrm_policy *sk_policy[2]; |
@@ -279,6 +279,9 @@ struct sock { | |||
279 | int sk_gso_type; | 279 | int sk_gso_type; |
280 | unsigned int sk_gso_max_size; | 280 | unsigned int sk_gso_max_size; |
281 | int sk_rcvlowat; | 281 | int sk_rcvlowat; |
282 | #ifdef CONFIG_RPS | ||
283 | __u32 sk_rxhash; | ||
284 | #endif | ||
282 | unsigned long sk_flags; | 285 | unsigned long sk_flags; |
283 | unsigned long sk_lingertime; | 286 | unsigned long sk_lingertime; |
284 | struct sk_buff_head sk_error_queue; | 287 | struct sk_buff_head sk_error_queue; |
@@ -604,10 +607,20 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) | |||
604 | skb->next = NULL; | 607 | skb->next = NULL; |
605 | } | 608 | } |
606 | 609 | ||
610 | /* | ||
611 | * Take into account size of receive queue and backlog queue | ||
612 | */ | ||
613 | static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb) | ||
614 | { | ||
615 | unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc); | ||
616 | |||
617 | return qsize + skb->truesize > sk->sk_rcvbuf; | ||
618 | } | ||
619 | |||
607 | /* The per-socket spinlock must be held here. */ | 620 | /* The per-socket spinlock must be held here. */ |
608 | static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb) | 621 | static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb) |
609 | { | 622 | { |
610 | if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) | 623 | if (sk_rcvqueues_full(sk, skb)) |
611 | return -ENOBUFS; | 624 | return -ENOBUFS; |
612 | 625 | ||
613 | __sk_add_backlog(sk, skb); | 626 | __sk_add_backlog(sk, skb); |
@@ -620,6 +633,40 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) | |||
620 | return sk->sk_backlog_rcv(sk, skb); | 633 | return sk->sk_backlog_rcv(sk, skb); |
621 | } | 634 | } |
622 | 635 | ||
636 | static inline void sock_rps_record_flow(const struct sock *sk) | ||
637 | { | ||
638 | #ifdef CONFIG_RPS | ||
639 | struct rps_sock_flow_table *sock_flow_table; | ||
640 | |||
641 | rcu_read_lock(); | ||
642 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | ||
643 | rps_record_sock_flow(sock_flow_table, sk->sk_rxhash); | ||
644 | rcu_read_unlock(); | ||
645 | #endif | ||
646 | } | ||
647 | |||
648 | static inline void sock_rps_reset_flow(const struct sock *sk) | ||
649 | { | ||
650 | #ifdef CONFIG_RPS | ||
651 | struct rps_sock_flow_table *sock_flow_table; | ||
652 | |||
653 | rcu_read_lock(); | ||
654 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | ||
655 | rps_reset_sock_flow(sock_flow_table, sk->sk_rxhash); | ||
656 | rcu_read_unlock(); | ||
657 | #endif | ||
658 | } | ||
659 | |||
660 | static inline void sock_rps_save_rxhash(struct sock *sk, u32 rxhash) | ||
661 | { | ||
662 | #ifdef CONFIG_RPS | ||
663 | if (unlikely(sk->sk_rxhash != rxhash)) { | ||
664 | sock_rps_reset_flow(sk); | ||
665 | sk->sk_rxhash = rxhash; | ||
666 | } | ||
667 | #endif | ||
668 | } | ||
669 | |||
623 | #define sk_wait_event(__sk, __timeo, __condition) \ | 670 | #define sk_wait_event(__sk, __timeo, __condition) \ |
624 | ({ int __rc; \ | 671 | ({ int __rc; \ |
625 | release_sock(__sk); \ | 672 | release_sock(__sk); \ |
@@ -974,6 +1021,16 @@ extern void release_sock(struct sock *sk); | |||
974 | SINGLE_DEPTH_NESTING) | 1021 | SINGLE_DEPTH_NESTING) |
975 | #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) | 1022 | #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) |
976 | 1023 | ||
1024 | static inline void lock_sock_bh(struct sock *sk) | ||
1025 | { | ||
1026 | spin_lock_bh(&sk->sk_lock.slock); | ||
1027 | } | ||
1028 | |||
1029 | static inline void unlock_sock_bh(struct sock *sk) | ||
1030 | { | ||
1031 | spin_unlock_bh(&sk->sk_lock.slock); | ||
1032 | } | ||
1033 | |||
977 | extern struct sock *sk_alloc(struct net *net, int family, | 1034 | extern struct sock *sk_alloc(struct net *net, int family, |
978 | gfp_t priority, | 1035 | gfp_t priority, |
979 | struct proto *prot); | 1036 | struct proto *prot); |
@@ -1160,6 +1217,10 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock) | |||
1160 | sk->sk_socket = sock; | 1217 | sk->sk_socket = sock; |
1161 | } | 1218 | } |
1162 | 1219 | ||
1220 | static inline wait_queue_head_t *sk_sleep(struct sock *sk) | ||
1221 | { | ||
1222 | return &sk->sk_wq->wait; | ||
1223 | } | ||
1163 | /* Detach socket from process context. | 1224 | /* Detach socket from process context. |
1164 | * Announce socket dead, detach it from wait queue and inode. | 1225 | * Announce socket dead, detach it from wait queue and inode. |
1165 | * Note that parent inode held reference count on this struct sock, | 1226 | * Note that parent inode held reference count on this struct sock, |
@@ -1172,14 +1233,14 @@ static inline void sock_orphan(struct sock *sk) | |||
1172 | write_lock_bh(&sk->sk_callback_lock); | 1233 | write_lock_bh(&sk->sk_callback_lock); |
1173 | sock_set_flag(sk, SOCK_DEAD); | 1234 | sock_set_flag(sk, SOCK_DEAD); |
1174 | sk_set_socket(sk, NULL); | 1235 | sk_set_socket(sk, NULL); |
1175 | sk->sk_sleep = NULL; | 1236 | sk->sk_wq = NULL; |
1176 | write_unlock_bh(&sk->sk_callback_lock); | 1237 | write_unlock_bh(&sk->sk_callback_lock); |
1177 | } | 1238 | } |
1178 | 1239 | ||
1179 | static inline void sock_graft(struct sock *sk, struct socket *parent) | 1240 | static inline void sock_graft(struct sock *sk, struct socket *parent) |
1180 | { | 1241 | { |
1181 | write_lock_bh(&sk->sk_callback_lock); | 1242 | write_lock_bh(&sk->sk_callback_lock); |
1182 | sk->sk_sleep = &parent->wait; | 1243 | rcu_assign_pointer(sk->sk_wq, parent->wq); |
1183 | parent->sk = sk; | 1244 | parent->sk = sk; |
1184 | sk_set_socket(sk, parent); | 1245 | sk_set_socket(sk, parent); |
1185 | security_sock_graft(sk, parent); | 1246 | security_sock_graft(sk, parent); |
@@ -1193,7 +1254,8 @@ static inline struct dst_entry * | |||
1193 | __sk_dst_get(struct sock *sk) | 1254 | __sk_dst_get(struct sock *sk) |
1194 | { | 1255 | { |
1195 | return rcu_dereference_check(sk->sk_dst_cache, rcu_read_lock_held() || | 1256 | return rcu_dereference_check(sk->sk_dst_cache, rcu_read_lock_held() || |
1196 | sock_owned_by_user(sk)); | 1257 | sock_owned_by_user(sk) || |
1258 | lockdep_is_held(&sk->sk_lock.slock)); | ||
1197 | } | 1259 | } |
1198 | 1260 | ||
1199 | static inline struct dst_entry * | 1261 | static inline struct dst_entry * |
@@ -1231,8 +1293,11 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) | |||
1231 | struct dst_entry *old_dst; | 1293 | struct dst_entry *old_dst; |
1232 | 1294 | ||
1233 | sk_tx_queue_clear(sk); | 1295 | sk_tx_queue_clear(sk); |
1234 | old_dst = rcu_dereference_check(sk->sk_dst_cache, | 1296 | /* |
1235 | lockdep_is_held(&sk->sk_dst_lock)); | 1297 | * This can be called while sk is owned by the caller only, |
1298 | * with no state that can be checked in a rcu_dereference_check() cond | ||
1299 | */ | ||
1300 | old_dst = rcu_dereference_raw(sk->sk_dst_cache); | ||
1236 | rcu_assign_pointer(sk->sk_dst_cache, dst); | 1301 | rcu_assign_pointer(sk->sk_dst_cache, dst); |
1237 | dst_release(old_dst); | 1302 | dst_release(old_dst); |
1238 | } | 1303 | } |
@@ -1327,12 +1392,12 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
1327 | } | 1392 | } |
1328 | 1393 | ||
1329 | /** | 1394 | /** |
1330 | * sk_has_sleeper - check if there are any waiting processes | 1395 | * wq_has_sleeper - check if there are any waiting processes |
1331 | * @sk: socket | 1396 | * @sk: struct socket_wq |
1332 | * | 1397 | * |
1333 | * Returns true if socket has waiting processes | 1398 | * Returns true if socket_wq has waiting processes |
1334 | * | 1399 | * |
1335 | * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory | 1400 | * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory |
1336 | * barrier call. They were added due to the race found within the tcp code. | 1401 | * barrier call. They were added due to the race found within the tcp code. |
1337 | * | 1402 | * |
1338 | * Consider following tcp code paths: | 1403 | * Consider following tcp code paths: |
@@ -1345,9 +1410,10 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
1345 | * ... ... | 1410 | * ... ... |
1346 | * tp->rcv_nxt check sock_def_readable | 1411 | * tp->rcv_nxt check sock_def_readable |
1347 | * ... { | 1412 | * ... { |
1348 | * schedule ... | 1413 | * schedule rcu_read_lock(); |
1349 | * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 1414 | * wq = rcu_dereference(sk->sk_wq); |
1350 | * wake_up_interruptible(sk->sk_sleep) | 1415 | * if (wq && waitqueue_active(&wq->wait)) |
1416 | * wake_up_interruptible(&wq->wait) | ||
1351 | * ... | 1417 | * ... |
1352 | * } | 1418 | * } |
1353 | * | 1419 | * |
@@ -1356,19 +1422,18 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
1356 | * could then endup calling schedule and sleep forever if there are no more | 1422 | * could then endup calling schedule and sleep forever if there are no more |
1357 | * data on the socket. | 1423 | * data on the socket. |
1358 | * | 1424 | * |
1359 | * The sk_has_sleeper is always called right after a call to read_lock, so we | ||
1360 | * can use smp_mb__after_lock barrier. | ||
1361 | */ | 1425 | */ |
1362 | static inline int sk_has_sleeper(struct sock *sk) | 1426 | static inline bool wq_has_sleeper(struct socket_wq *wq) |
1363 | { | 1427 | { |
1428 | |||
1364 | /* | 1429 | /* |
1365 | * We need to be sure we are in sync with the | 1430 | * We need to be sure we are in sync with the |
1366 | * add_wait_queue modifications to the wait queue. | 1431 | * add_wait_queue modifications to the wait queue. |
1367 | * | 1432 | * |
1368 | * This memory barrier is paired in the sock_poll_wait. | 1433 | * This memory barrier is paired in the sock_poll_wait. |
1369 | */ | 1434 | */ |
1370 | smp_mb__after_lock(); | 1435 | smp_mb(); |
1371 | return sk->sk_sleep && waitqueue_active(sk->sk_sleep); | 1436 | return wq && waitqueue_active(&wq->wait); |
1372 | } | 1437 | } |
1373 | 1438 | ||
1374 | /** | 1439 | /** |
@@ -1377,7 +1442,7 @@ static inline int sk_has_sleeper(struct sock *sk) | |||
1377 | * @wait_address: socket wait queue | 1442 | * @wait_address: socket wait queue |
1378 | * @p: poll_table | 1443 | * @p: poll_table |
1379 | * | 1444 | * |
1380 | * See the comments in the sk_has_sleeper function. | 1445 | * See the comments in the wq_has_sleeper function. |
1381 | */ | 1446 | */ |
1382 | static inline void sock_poll_wait(struct file *filp, | 1447 | static inline void sock_poll_wait(struct file *filp, |
1383 | wait_queue_head_t *wait_address, poll_table *p) | 1448 | wait_queue_head_t *wait_address, poll_table *p) |
@@ -1388,7 +1453,7 @@ static inline void sock_poll_wait(struct file *filp, | |||
1388 | * We need to be sure we are in sync with the | 1453 | * We need to be sure we are in sync with the |
1389 | * socket flags modification. | 1454 | * socket flags modification. |
1390 | * | 1455 | * |
1391 | * This memory barrier is paired in the sk_has_sleeper. | 1456 | * This memory barrier is paired in the wq_has_sleeper. |
1392 | */ | 1457 | */ |
1393 | smp_mb(); | 1458 | smp_mb(); |
1394 | } | 1459 | } |
@@ -1570,7 +1635,24 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) | |||
1570 | sk->sk_stamp = kt; | 1635 | sk->sk_stamp = kt; |
1571 | } | 1636 | } |
1572 | 1637 | ||
1573 | extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); | 1638 | extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, |
1639 | struct sk_buff *skb); | ||
1640 | |||
1641 | static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, | ||
1642 | struct sk_buff *skb) | ||
1643 | { | ||
1644 | #define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \ | ||
1645 | (1UL << SOCK_RCVTSTAMP) | \ | ||
1646 | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ | ||
1647 | (1UL << SOCK_TIMESTAMPING_SOFTWARE) | \ | ||
1648 | (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE) | \ | ||
1649 | (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE)) | ||
1650 | |||
1651 | if (sk->sk_flags & FLAGS_TS_OR_DROPS) | ||
1652 | __sock_recv_ts_and_drops(msg, sk, skb); | ||
1653 | else | ||
1654 | sk->sk_stamp = skb->tstamp; | ||
1655 | } | ||
1574 | 1656 | ||
1575 | /** | 1657 | /** |
1576 | * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped | 1658 | * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped |