diff options
Diffstat (limited to 'include/net/sock.h')
| -rw-r--r-- | include/net/sock.h | 284 |
1 files changed, 208 insertions, 76 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 3f1a4804bb3f..adab9dc58183 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
| @@ -51,6 +51,7 @@ | |||
| 51 | #include <linux/skbuff.h> /* struct sk_buff */ | 51 | #include <linux/skbuff.h> /* struct sk_buff */ |
| 52 | #include <linux/mm.h> | 52 | #include <linux/mm.h> |
| 53 | #include <linux/security.h> | 53 | #include <linux/security.h> |
| 54 | #include <linux/slab.h> | ||
| 54 | 55 | ||
| 55 | #include <linux/filter.h> | 56 | #include <linux/filter.h> |
| 56 | #include <linux/rculist_nulls.h> | 57 | #include <linux/rculist_nulls.h> |
| @@ -73,7 +74,7 @@ | |||
| 73 | printk(KERN_DEBUG msg); } while (0) | 74 | printk(KERN_DEBUG msg); } while (0) |
| 74 | #else | 75 | #else |
| 75 | /* Validate arguments and do nothing */ | 76 | /* Validate arguments and do nothing */ |
| 76 | static void inline int __attribute__ ((format (printf, 2, 3))) | 77 | static inline void __attribute__ ((format (printf, 2, 3))) |
| 77 | SOCK_DEBUG(struct sock *sk, const char *msg, ...) | 78 | SOCK_DEBUG(struct sock *sk, const char *msg, ...) |
| 78 | { | 79 | { |
| 79 | } | 80 | } |
| @@ -158,7 +159,7 @@ struct sock_common { | |||
| 158 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings | 159 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings |
| 159 | * @sk_lock: synchronizer | 160 | * @sk_lock: synchronizer |
| 160 | * @sk_rcvbuf: size of receive buffer in bytes | 161 | * @sk_rcvbuf: size of receive buffer in bytes |
| 161 | * @sk_sleep: sock wait queue | 162 | * @sk_wq: sock wait queue and async head |
| 162 | * @sk_dst_cache: destination cache | 163 | * @sk_dst_cache: destination cache |
| 163 | * @sk_dst_lock: destination cache lock | 164 | * @sk_dst_lock: destination cache lock |
| 164 | * @sk_policy: flow policy | 165 | * @sk_policy: flow policy |
| @@ -176,6 +177,7 @@ struct sock_common { | |||
| 176 | * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings | 177 | * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings |
| 177 | * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets | 178 | * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets |
| 178 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) | 179 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) |
| 180 | * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) | ||
| 179 | * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) | 181 | * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) |
| 180 | * @sk_gso_max_size: Maximum GSO segment size to build | 182 | * @sk_gso_max_size: Maximum GSO segment size to build |
| 181 | * @sk_lingertime: %SO_LINGER l_linger setting | 183 | * @sk_lingertime: %SO_LINGER l_linger setting |
| @@ -193,10 +195,12 @@ struct sock_common { | |||
| 193 | * @sk_priority: %SO_PRIORITY setting | 195 | * @sk_priority: %SO_PRIORITY setting |
| 194 | * @sk_type: socket type (%SOCK_STREAM, etc) | 196 | * @sk_type: socket type (%SOCK_STREAM, etc) |
| 195 | * @sk_protocol: which protocol this socket belongs in this network family | 197 | * @sk_protocol: which protocol this socket belongs in this network family |
| 196 | * @sk_peercred: %SO_PEERCRED setting | 198 | * @sk_peer_pid: &struct pid for this socket's peer |
| 199 | * @sk_peer_cred: %SO_PEERCRED setting | ||
| 197 | * @sk_rcvlowat: %SO_RCVLOWAT setting | 200 | * @sk_rcvlowat: %SO_RCVLOWAT setting |
| 198 | * @sk_rcvtimeo: %SO_RCVTIMEO setting | 201 | * @sk_rcvtimeo: %SO_RCVTIMEO setting |
| 199 | * @sk_sndtimeo: %SO_SNDTIMEO setting | 202 | * @sk_sndtimeo: %SO_SNDTIMEO setting |
| 203 | * @sk_rxhash: flow hash received from netif layer | ||
| 200 | * @sk_filter: socket filtering instructions | 204 | * @sk_filter: socket filtering instructions |
| 201 | * @sk_protinfo: private area, net family specific, when not using slab | 205 | * @sk_protinfo: private area, net family specific, when not using slab |
| 202 | * @sk_timer: sock cleanup timer | 206 | * @sk_timer: sock cleanup timer |
| @@ -208,6 +212,7 @@ struct sock_common { | |||
| 208 | * @sk_send_head: front of stuff to transmit | 212 | * @sk_send_head: front of stuff to transmit |
| 209 | * @sk_security: used by security modules | 213 | * @sk_security: used by security modules |
| 210 | * @sk_mark: generic packet mark | 214 | * @sk_mark: generic packet mark |
| 215 | * @sk_classid: this socket's cgroup classid | ||
| 211 | * @sk_write_pending: a write to stream socket waits to start | 216 | * @sk_write_pending: a write to stream socket waits to start |
| 212 | * @sk_state_change: callback to indicate change in the state of the sock | 217 | * @sk_state_change: callback to indicate change in the state of the sock |
| 213 | * @sk_data_ready: callback to indicate there is data to be processed | 218 | * @sk_data_ready: callback to indicate there is data to be processed |
| @@ -253,13 +258,14 @@ struct sock { | |||
| 253 | struct { | 258 | struct { |
| 254 | struct sk_buff *head; | 259 | struct sk_buff *head; |
| 255 | struct sk_buff *tail; | 260 | struct sk_buff *tail; |
| 261 | int len; | ||
| 256 | } sk_backlog; | 262 | } sk_backlog; |
| 257 | wait_queue_head_t *sk_sleep; | 263 | struct socket_wq *sk_wq; |
| 258 | struct dst_entry *sk_dst_cache; | 264 | struct dst_entry *sk_dst_cache; |
| 259 | #ifdef CONFIG_XFRM | 265 | #ifdef CONFIG_XFRM |
| 260 | struct xfrm_policy *sk_policy[2]; | 266 | struct xfrm_policy *sk_policy[2]; |
| 261 | #endif | 267 | #endif |
| 262 | rwlock_t sk_dst_lock; | 268 | spinlock_t sk_dst_lock; |
| 263 | atomic_t sk_rmem_alloc; | 269 | atomic_t sk_rmem_alloc; |
| 264 | atomic_t sk_wmem_alloc; | 270 | atomic_t sk_wmem_alloc; |
| 265 | atomic_t sk_omem_alloc; | 271 | atomic_t sk_omem_alloc; |
| @@ -273,9 +279,13 @@ struct sock { | |||
| 273 | int sk_forward_alloc; | 279 | int sk_forward_alloc; |
| 274 | gfp_t sk_allocation; | 280 | gfp_t sk_allocation; |
| 275 | int sk_route_caps; | 281 | int sk_route_caps; |
| 282 | int sk_route_nocaps; | ||
| 276 | int sk_gso_type; | 283 | int sk_gso_type; |
| 277 | unsigned int sk_gso_max_size; | 284 | unsigned int sk_gso_max_size; |
| 278 | int sk_rcvlowat; | 285 | int sk_rcvlowat; |
| 286 | #ifdef CONFIG_RPS | ||
| 287 | __u32 sk_rxhash; | ||
| 288 | #endif | ||
| 279 | unsigned long sk_flags; | 289 | unsigned long sk_flags; |
| 280 | unsigned long sk_lingertime; | 290 | unsigned long sk_lingertime; |
| 281 | struct sk_buff_head sk_error_queue; | 291 | struct sk_buff_head sk_error_queue; |
| @@ -287,7 +297,8 @@ struct sock { | |||
| 287 | unsigned short sk_ack_backlog; | 297 | unsigned short sk_ack_backlog; |
| 288 | unsigned short sk_max_ack_backlog; | 298 | unsigned short sk_max_ack_backlog; |
| 289 | __u32 sk_priority; | 299 | __u32 sk_priority; |
| 290 | struct ucred sk_peercred; | 300 | struct pid *sk_peer_pid; |
| 301 | const struct cred *sk_peer_cred; | ||
| 291 | long sk_rcvtimeo; | 302 | long sk_rcvtimeo; |
| 292 | long sk_sndtimeo; | 303 | long sk_sndtimeo; |
| 293 | struct sk_filter *sk_filter; | 304 | struct sk_filter *sk_filter; |
| @@ -304,7 +315,7 @@ struct sock { | |||
| 304 | void *sk_security; | 315 | void *sk_security; |
| 305 | #endif | 316 | #endif |
| 306 | __u32 sk_mark; | 317 | __u32 sk_mark; |
| 307 | /* XXX 4 bytes hole on 64 bit */ | 318 | u32 sk_classid; |
| 308 | void (*sk_state_change)(struct sock *sk); | 319 | void (*sk_state_change)(struct sock *sk); |
| 309 | void (*sk_data_ready)(struct sock *sk, int bytes); | 320 | void (*sk_data_ready)(struct sock *sk, int bytes); |
| 310 | void (*sk_write_space)(struct sock *sk); | 321 | void (*sk_write_space)(struct sock *sk); |
| @@ -317,6 +328,11 @@ struct sock { | |||
| 317 | /* | 328 | /* |
| 318 | * Hashed lists helper routines | 329 | * Hashed lists helper routines |
| 319 | */ | 330 | */ |
| 331 | static inline struct sock *sk_entry(const struct hlist_node *node) | ||
| 332 | { | ||
| 333 | return hlist_entry(node, struct sock, sk_node); | ||
| 334 | } | ||
| 335 | |||
| 320 | static inline struct sock *__sk_head(const struct hlist_head *head) | 336 | static inline struct sock *__sk_head(const struct hlist_head *head) |
| 321 | { | 337 | { |
| 322 | return hlist_entry(head->first, struct sock, sk_node); | 338 | return hlist_entry(head->first, struct sock, sk_node); |
| @@ -376,6 +392,7 @@ static __inline__ void __sk_del_node(struct sock *sk) | |||
| 376 | __hlist_del(&sk->sk_node); | 392 | __hlist_del(&sk->sk_node); |
| 377 | } | 393 | } |
| 378 | 394 | ||
| 395 | /* NB: equivalent to hlist_del_init_rcu */ | ||
| 379 | static __inline__ int __sk_del_node_init(struct sock *sk) | 396 | static __inline__ int __sk_del_node_init(struct sock *sk) |
| 380 | { | 397 | { |
| 381 | if (sk_hashed(sk)) { | 398 | if (sk_hashed(sk)) { |
| @@ -416,6 +433,7 @@ static __inline__ int sk_del_node_init(struct sock *sk) | |||
| 416 | } | 433 | } |
| 417 | return rc; | 434 | return rc; |
| 418 | } | 435 | } |
| 436 | #define sk_del_node_init_rcu(sk) sk_del_node_init(sk) | ||
| 419 | 437 | ||
| 420 | static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk) | 438 | static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk) |
| 421 | { | 439 | { |
| @@ -449,6 +467,12 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) | |||
| 449 | __sk_add_node(sk, list); | 467 | __sk_add_node(sk, list); |
| 450 | } | 468 | } |
| 451 | 469 | ||
| 470 | static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) | ||
| 471 | { | ||
| 472 | sock_hold(sk); | ||
| 473 | hlist_add_head_rcu(&sk->sk_node, list); | ||
| 474 | } | ||
| 475 | |||
| 452 | static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) | 476 | static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) |
| 453 | { | 477 | { |
| 454 | hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); | 478 | hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); |
| @@ -473,6 +497,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk, | |||
| 473 | 497 | ||
| 474 | #define sk_for_each(__sk, node, list) \ | 498 | #define sk_for_each(__sk, node, list) \ |
| 475 | hlist_for_each_entry(__sk, node, list, sk_node) | 499 | hlist_for_each_entry(__sk, node, list, sk_node) |
| 500 | #define sk_for_each_rcu(__sk, node, list) \ | ||
| 501 | hlist_for_each_entry_rcu(__sk, node, list, sk_node) | ||
| 476 | #define sk_nulls_for_each(__sk, node, list) \ | 502 | #define sk_nulls_for_each(__sk, node, list) \ |
| 477 | hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) | 503 | hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) |
| 478 | #define sk_nulls_for_each_rcu(__sk, node, list) \ | 504 | #define sk_nulls_for_each_rcu(__sk, node, list) \ |
| @@ -574,23 +600,81 @@ static inline int sk_stream_memory_free(struct sock *sk) | |||
| 574 | return sk->sk_wmem_queued < sk->sk_sndbuf; | 600 | return sk->sk_wmem_queued < sk->sk_sndbuf; |
| 575 | } | 601 | } |
| 576 | 602 | ||
| 577 | /* The per-socket spinlock must be held here. */ | 603 | /* OOB backlog add */ |
| 578 | static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) | 604 | static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) |
| 579 | { | 605 | { |
| 580 | if (!sk->sk_backlog.tail) { | 606 | /* dont let skb dst not refcounted, we are going to leave rcu lock */ |
| 581 | sk->sk_backlog.head = sk->sk_backlog.tail = skb; | 607 | skb_dst_force(skb); |
| 582 | } else { | 608 | |
| 609 | if (!sk->sk_backlog.tail) | ||
| 610 | sk->sk_backlog.head = skb; | ||
| 611 | else | ||
| 583 | sk->sk_backlog.tail->next = skb; | 612 | sk->sk_backlog.tail->next = skb; |
| 584 | sk->sk_backlog.tail = skb; | 613 | |
| 585 | } | 614 | sk->sk_backlog.tail = skb; |
| 586 | skb->next = NULL; | 615 | skb->next = NULL; |
| 587 | } | 616 | } |
| 588 | 617 | ||
| 618 | /* | ||
| 619 | * Take into account size of receive queue and backlog queue | ||
| 620 | */ | ||
| 621 | static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb) | ||
| 622 | { | ||
| 623 | unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc); | ||
| 624 | |||
| 625 | return qsize + skb->truesize > sk->sk_rcvbuf; | ||
| 626 | } | ||
| 627 | |||
| 628 | /* The per-socket spinlock must be held here. */ | ||
| 629 | static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb) | ||
| 630 | { | ||
| 631 | if (sk_rcvqueues_full(sk, skb)) | ||
| 632 | return -ENOBUFS; | ||
| 633 | |||
| 634 | __sk_add_backlog(sk, skb); | ||
| 635 | sk->sk_backlog.len += skb->truesize; | ||
| 636 | return 0; | ||
| 637 | } | ||
| 638 | |||
| 589 | static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) | 639 | static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) |
| 590 | { | 640 | { |
| 591 | return sk->sk_backlog_rcv(sk, skb); | 641 | return sk->sk_backlog_rcv(sk, skb); |
| 592 | } | 642 | } |
| 593 | 643 | ||
| 644 | static inline void sock_rps_record_flow(const struct sock *sk) | ||
| 645 | { | ||
| 646 | #ifdef CONFIG_RPS | ||
| 647 | struct rps_sock_flow_table *sock_flow_table; | ||
| 648 | |||
| 649 | rcu_read_lock(); | ||
| 650 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | ||
| 651 | rps_record_sock_flow(sock_flow_table, sk->sk_rxhash); | ||
| 652 | rcu_read_unlock(); | ||
| 653 | #endif | ||
| 654 | } | ||
| 655 | |||
| 656 | static inline void sock_rps_reset_flow(const struct sock *sk) | ||
| 657 | { | ||
| 658 | #ifdef CONFIG_RPS | ||
| 659 | struct rps_sock_flow_table *sock_flow_table; | ||
| 660 | |||
| 661 | rcu_read_lock(); | ||
| 662 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | ||
| 663 | rps_reset_sock_flow(sock_flow_table, sk->sk_rxhash); | ||
| 664 | rcu_read_unlock(); | ||
| 665 | #endif | ||
| 666 | } | ||
| 667 | |||
| 668 | static inline void sock_rps_save_rxhash(struct sock *sk, u32 rxhash) | ||
| 669 | { | ||
| 670 | #ifdef CONFIG_RPS | ||
| 671 | if (unlikely(sk->sk_rxhash != rxhash)) { | ||
| 672 | sock_rps_reset_flow(sk); | ||
| 673 | sk->sk_rxhash = rxhash; | ||
| 674 | } | ||
| 675 | #endif | ||
| 676 | } | ||
| 677 | |||
| 594 | #define sk_wait_event(__sk, __timeo, __condition) \ | 678 | #define sk_wait_event(__sk, __timeo, __condition) \ |
| 595 | ({ int __rc; \ | 679 | ({ int __rc; \ |
| 596 | release_sock(__sk); \ | 680 | release_sock(__sk); \ |
| @@ -668,6 +752,7 @@ struct proto { | |||
| 668 | /* Keeping track of sk's, looking them up, and port selection methods. */ | 752 | /* Keeping track of sk's, looking them up, and port selection methods. */ |
| 669 | void (*hash)(struct sock *sk); | 753 | void (*hash)(struct sock *sk); |
| 670 | void (*unhash)(struct sock *sk); | 754 | void (*unhash)(struct sock *sk); |
| 755 | void (*rehash)(struct sock *sk); | ||
| 671 | int (*get_port)(struct sock *sk, unsigned short snum); | 756 | int (*get_port)(struct sock *sk, unsigned short snum); |
| 672 | 757 | ||
| 673 | /* Keeping track of sockets in use */ | 758 | /* Keeping track of sockets in use */ |
| @@ -690,6 +775,7 @@ struct proto { | |||
| 690 | int *sysctl_wmem; | 775 | int *sysctl_wmem; |
| 691 | int *sysctl_rmem; | 776 | int *sysctl_rmem; |
| 692 | int max_header; | 777 | int max_header; |
| 778 | bool no_autobind; | ||
| 693 | 779 | ||
| 694 | struct kmem_cache *slab; | 780 | struct kmem_cache *slab; |
| 695 | unsigned int obj_size; | 781 | unsigned int obj_size; |
| @@ -945,6 +1031,24 @@ extern void release_sock(struct sock *sk); | |||
| 945 | SINGLE_DEPTH_NESTING) | 1031 | SINGLE_DEPTH_NESTING) |
| 946 | #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) | 1032 | #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) |
| 947 | 1033 | ||
| 1034 | extern bool lock_sock_fast(struct sock *sk); | ||
| 1035 | /** | ||
| 1036 | * unlock_sock_fast - complement of lock_sock_fast | ||
| 1037 | * @sk: socket | ||
| 1038 | * @slow: slow mode | ||
| 1039 | * | ||
| 1040 | * fast unlock socket for user context. | ||
| 1041 | * If slow mode is on, we call regular release_sock() | ||
| 1042 | */ | ||
| 1043 | static inline void unlock_sock_fast(struct sock *sk, bool slow) | ||
| 1044 | { | ||
| 1045 | if (slow) | ||
| 1046 | release_sock(sk); | ||
| 1047 | else | ||
| 1048 | spin_unlock_bh(&sk->sk_lock.slock); | ||
| 1049 | } | ||
| 1050 | |||
| 1051 | |||
| 948 | extern struct sock *sk_alloc(struct net *net, int family, | 1052 | extern struct sock *sk_alloc(struct net *net, int family, |
| 949 | gfp_t priority, | 1053 | gfp_t priority, |
| 950 | struct proto *prot); | 1054 | struct proto *prot); |
| @@ -983,6 +1087,14 @@ extern void *sock_kmalloc(struct sock *sk, int size, | |||
| 983 | extern void sock_kfree_s(struct sock *sk, void *mem, int size); | 1087 | extern void sock_kfree_s(struct sock *sk, void *mem, int size); |
| 984 | extern void sk_send_sigurg(struct sock *sk); | 1088 | extern void sk_send_sigurg(struct sock *sk); |
| 985 | 1089 | ||
| 1090 | #ifdef CONFIG_CGROUPS | ||
| 1091 | extern void sock_update_classid(struct sock *sk); | ||
| 1092 | #else | ||
| 1093 | static inline void sock_update_classid(struct sock *sk) | ||
| 1094 | { | ||
| 1095 | } | ||
| 1096 | #endif | ||
| 1097 | |||
| 986 | /* | 1098 | /* |
| 987 | * Functions to fill in entries in struct proto_ops when a protocol | 1099 | * Functions to fill in entries in struct proto_ops when a protocol |
| 988 | * does not implement a particular function. | 1100 | * does not implement a particular function. |
| @@ -1044,7 +1156,7 @@ extern void sk_common_release(struct sock *sk); | |||
| 1044 | extern void sock_init_data(struct socket *sock, struct sock *sk); | 1156 | extern void sock_init_data(struct socket *sock, struct sock *sk); |
| 1045 | 1157 | ||
| 1046 | /** | 1158 | /** |
| 1047 | * sk_filter_release: Release a socket filter | 1159 | * sk_filter_release - release a socket filter |
| 1048 | * @fp: filter to remove | 1160 | * @fp: filter to remove |
| 1049 | * | 1161 | * |
| 1050 | * Remove a filter from a socket and release its resources. | 1162 | * Remove a filter from a socket and release its resources. |
| @@ -1117,12 +1229,7 @@ static inline void sk_tx_queue_clear(struct sock *sk) | |||
| 1117 | 1229 | ||
| 1118 | static inline int sk_tx_queue_get(const struct sock *sk) | 1230 | static inline int sk_tx_queue_get(const struct sock *sk) |
| 1119 | { | 1231 | { |
| 1120 | return sk->sk_tx_queue_mapping; | 1232 | return sk ? sk->sk_tx_queue_mapping : -1; |
| 1121 | } | ||
| 1122 | |||
| 1123 | static inline bool sk_tx_queue_recorded(const struct sock *sk) | ||
| 1124 | { | ||
| 1125 | return (sk && sk->sk_tx_queue_mapping >= 0); | ||
| 1126 | } | 1233 | } |
| 1127 | 1234 | ||
| 1128 | static inline void sk_set_socket(struct sock *sk, struct socket *sock) | 1235 | static inline void sk_set_socket(struct sock *sk, struct socket *sock) |
| @@ -1131,6 +1238,10 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock) | |||
| 1131 | sk->sk_socket = sock; | 1238 | sk->sk_socket = sock; |
| 1132 | } | 1239 | } |
| 1133 | 1240 | ||
| 1241 | static inline wait_queue_head_t *sk_sleep(struct sock *sk) | ||
| 1242 | { | ||
| 1243 | return &sk->sk_wq->wait; | ||
| 1244 | } | ||
| 1134 | /* Detach socket from process context. | 1245 | /* Detach socket from process context. |
| 1135 | * Announce socket dead, detach it from wait queue and inode. | 1246 | * Announce socket dead, detach it from wait queue and inode. |
| 1136 | * Note that parent inode held reference count on this struct sock, | 1247 | * Note that parent inode held reference count on this struct sock, |
| @@ -1143,14 +1254,14 @@ static inline void sock_orphan(struct sock *sk) | |||
| 1143 | write_lock_bh(&sk->sk_callback_lock); | 1254 | write_lock_bh(&sk->sk_callback_lock); |
| 1144 | sock_set_flag(sk, SOCK_DEAD); | 1255 | sock_set_flag(sk, SOCK_DEAD); |
| 1145 | sk_set_socket(sk, NULL); | 1256 | sk_set_socket(sk, NULL); |
| 1146 | sk->sk_sleep = NULL; | 1257 | sk->sk_wq = NULL; |
| 1147 | write_unlock_bh(&sk->sk_callback_lock); | 1258 | write_unlock_bh(&sk->sk_callback_lock); |
| 1148 | } | 1259 | } |
| 1149 | 1260 | ||
| 1150 | static inline void sock_graft(struct sock *sk, struct socket *parent) | 1261 | static inline void sock_graft(struct sock *sk, struct socket *parent) |
| 1151 | { | 1262 | { |
| 1152 | write_lock_bh(&sk->sk_callback_lock); | 1263 | write_lock_bh(&sk->sk_callback_lock); |
| 1153 | sk->sk_sleep = &parent->wait; | 1264 | rcu_assign_pointer(sk->sk_wq, parent->wq); |
| 1154 | parent->sk = sk; | 1265 | parent->sk = sk; |
| 1155 | sk_set_socket(sk, parent); | 1266 | sk_set_socket(sk, parent); |
| 1156 | security_sock_graft(sk, parent); | 1267 | security_sock_graft(sk, parent); |
| @@ -1163,7 +1274,9 @@ extern unsigned long sock_i_ino(struct sock *sk); | |||
| 1163 | static inline struct dst_entry * | 1274 | static inline struct dst_entry * |
| 1164 | __sk_dst_get(struct sock *sk) | 1275 | __sk_dst_get(struct sock *sk) |
| 1165 | { | 1276 | { |
| 1166 | return sk->sk_dst_cache; | 1277 | return rcu_dereference_check(sk->sk_dst_cache, rcu_read_lock_held() || |
| 1278 | sock_owned_by_user(sk) || | ||
| 1279 | lockdep_is_held(&sk->sk_lock.slock)); | ||
| 1167 | } | 1280 | } |
| 1168 | 1281 | ||
| 1169 | static inline struct dst_entry * | 1282 | static inline struct dst_entry * |
| @@ -1171,50 +1284,65 @@ sk_dst_get(struct sock *sk) | |||
| 1171 | { | 1284 | { |
| 1172 | struct dst_entry *dst; | 1285 | struct dst_entry *dst; |
| 1173 | 1286 | ||
| 1174 | read_lock(&sk->sk_dst_lock); | 1287 | rcu_read_lock(); |
| 1175 | dst = sk->sk_dst_cache; | 1288 | dst = rcu_dereference(sk->sk_dst_cache); |
| 1176 | if (dst) | 1289 | if (dst) |
| 1177 | dst_hold(dst); | 1290 | dst_hold(dst); |
| 1178 | read_unlock(&sk->sk_dst_lock); | 1291 | rcu_read_unlock(); |
| 1179 | return dst; | 1292 | return dst; |
| 1180 | } | 1293 | } |
| 1181 | 1294 | ||
| 1295 | extern void sk_reset_txq(struct sock *sk); | ||
| 1296 | |||
| 1297 | static inline void dst_negative_advice(struct sock *sk) | ||
| 1298 | { | ||
| 1299 | struct dst_entry *ndst, *dst = __sk_dst_get(sk); | ||
| 1300 | |||
| 1301 | if (dst && dst->ops->negative_advice) { | ||
| 1302 | ndst = dst->ops->negative_advice(dst); | ||
| 1303 | |||
| 1304 | if (ndst != dst) { | ||
| 1305 | rcu_assign_pointer(sk->sk_dst_cache, ndst); | ||
| 1306 | sk_reset_txq(sk); | ||
| 1307 | } | ||
| 1308 | } | ||
| 1309 | } | ||
| 1310 | |||
| 1182 | static inline void | 1311 | static inline void |
| 1183 | __sk_dst_set(struct sock *sk, struct dst_entry *dst) | 1312 | __sk_dst_set(struct sock *sk, struct dst_entry *dst) |
| 1184 | { | 1313 | { |
| 1185 | struct dst_entry *old_dst; | 1314 | struct dst_entry *old_dst; |
| 1186 | 1315 | ||
| 1187 | sk_tx_queue_clear(sk); | 1316 | sk_tx_queue_clear(sk); |
| 1188 | old_dst = sk->sk_dst_cache; | 1317 | /* |
| 1189 | sk->sk_dst_cache = dst; | 1318 | * This can be called while sk is owned by the caller only, |
| 1319 | * with no state that can be checked in a rcu_dereference_check() cond | ||
| 1320 | */ | ||
| 1321 | old_dst = rcu_dereference_raw(sk->sk_dst_cache); | ||
| 1322 | rcu_assign_pointer(sk->sk_dst_cache, dst); | ||
| 1190 | dst_release(old_dst); | 1323 | dst_release(old_dst); |
| 1191 | } | 1324 | } |
| 1192 | 1325 | ||
| 1193 | static inline void | 1326 | static inline void |
| 1194 | sk_dst_set(struct sock *sk, struct dst_entry *dst) | 1327 | sk_dst_set(struct sock *sk, struct dst_entry *dst) |
| 1195 | { | 1328 | { |
| 1196 | write_lock(&sk->sk_dst_lock); | 1329 | spin_lock(&sk->sk_dst_lock); |
| 1197 | __sk_dst_set(sk, dst); | 1330 | __sk_dst_set(sk, dst); |
| 1198 | write_unlock(&sk->sk_dst_lock); | 1331 | spin_unlock(&sk->sk_dst_lock); |
| 1199 | } | 1332 | } |
| 1200 | 1333 | ||
| 1201 | static inline void | 1334 | static inline void |
| 1202 | __sk_dst_reset(struct sock *sk) | 1335 | __sk_dst_reset(struct sock *sk) |
| 1203 | { | 1336 | { |
| 1204 | struct dst_entry *old_dst; | 1337 | __sk_dst_set(sk, NULL); |
| 1205 | |||
| 1206 | sk_tx_queue_clear(sk); | ||
| 1207 | old_dst = sk->sk_dst_cache; | ||
| 1208 | sk->sk_dst_cache = NULL; | ||
| 1209 | dst_release(old_dst); | ||
| 1210 | } | 1338 | } |
| 1211 | 1339 | ||
| 1212 | static inline void | 1340 | static inline void |
| 1213 | sk_dst_reset(struct sock *sk) | 1341 | sk_dst_reset(struct sock *sk) |
| 1214 | { | 1342 | { |
| 1215 | write_lock(&sk->sk_dst_lock); | 1343 | spin_lock(&sk->sk_dst_lock); |
| 1216 | __sk_dst_reset(sk); | 1344 | __sk_dst_reset(sk); |
| 1217 | write_unlock(&sk->sk_dst_lock); | 1345 | spin_unlock(&sk->sk_dst_lock); |
| 1218 | } | 1346 | } |
| 1219 | 1347 | ||
| 1220 | extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); | 1348 | extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); |
| @@ -1228,6 +1356,12 @@ static inline int sk_can_gso(const struct sock *sk) | |||
| 1228 | 1356 | ||
| 1229 | extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst); | 1357 | extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst); |
| 1230 | 1358 | ||
| 1359 | static inline void sk_nocaps_add(struct sock *sk, int flags) | ||
| 1360 | { | ||
| 1361 | sk->sk_route_nocaps |= flags; | ||
| 1362 | sk->sk_route_caps &= ~flags; | ||
| 1363 | } | ||
| 1364 | |||
| 1231 | static inline int skb_copy_to_page(struct sock *sk, char __user *from, | 1365 | static inline int skb_copy_to_page(struct sock *sk, char __user *from, |
| 1232 | struct sk_buff *skb, struct page *page, | 1366 | struct sk_buff *skb, struct page *page, |
| 1233 | int off, int copy) | 1367 | int off, int copy) |
| @@ -1285,12 +1419,12 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
| 1285 | } | 1419 | } |
| 1286 | 1420 | ||
| 1287 | /** | 1421 | /** |
| 1288 | * sk_has_sleeper - check if there are any waiting processes | 1422 | * wq_has_sleeper - check if there are any waiting processes |
| 1289 | * @sk: socket | 1423 | * @wq: struct socket_wq |
| 1290 | * | 1424 | * |
| 1291 | * Returns true if socket has waiting processes | 1425 | * Returns true if socket_wq has waiting processes |
| 1292 | * | 1426 | * |
| 1293 | * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory | 1427 | * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory |
| 1294 | * barrier call. They were added due to the race found within the tcp code. | 1428 | * barrier call. They were added due to the race found within the tcp code. |
| 1295 | * | 1429 | * |
| 1296 | * Consider following tcp code paths: | 1430 | * Consider following tcp code paths: |
| @@ -1303,9 +1437,10 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
| 1303 | * ... ... | 1437 | * ... ... |
| 1304 | * tp->rcv_nxt check sock_def_readable | 1438 | * tp->rcv_nxt check sock_def_readable |
| 1305 | * ... { | 1439 | * ... { |
| 1306 | * schedule ... | 1440 | * schedule rcu_read_lock(); |
| 1307 | * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 1441 | * wq = rcu_dereference(sk->sk_wq); |
| 1308 | * wake_up_interruptible(sk->sk_sleep) | 1442 | * if (wq && waitqueue_active(&wq->wait)) |
| 1443 | * wake_up_interruptible(&wq->wait) | ||
| 1309 | * ... | 1444 | * ... |
| 1310 | * } | 1445 | * } |
| 1311 | * | 1446 | * |
| @@ -1314,19 +1449,18 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
| 1314 | * could then endup calling schedule and sleep forever if there are no more | 1449 | * could then endup calling schedule and sleep forever if there are no more |
| 1315 | * data on the socket. | 1450 | * data on the socket. |
| 1316 | * | 1451 | * |
| 1317 | * The sk_has_sleeper is always called right after a call to read_lock, so we | ||
| 1318 | * can use smp_mb__after_lock barrier. | ||
| 1319 | */ | 1452 | */ |
| 1320 | static inline int sk_has_sleeper(struct sock *sk) | 1453 | static inline bool wq_has_sleeper(struct socket_wq *wq) |
| 1321 | { | 1454 | { |
| 1455 | |||
| 1322 | /* | 1456 | /* |
| 1323 | * We need to be sure we are in sync with the | 1457 | * We need to be sure we are in sync with the |
| 1324 | * add_wait_queue modifications to the wait queue. | 1458 | * add_wait_queue modifications to the wait queue. |
| 1325 | * | 1459 | * |
| 1326 | * This memory barrier is paired in the sock_poll_wait. | 1460 | * This memory barrier is paired in the sock_poll_wait. |
| 1327 | */ | 1461 | */ |
| 1328 | smp_mb__after_lock(); | 1462 | smp_mb(); |
| 1329 | return sk->sk_sleep && waitqueue_active(sk->sk_sleep); | 1463 | return wq && waitqueue_active(&wq->wait); |
| 1330 | } | 1464 | } |
| 1331 | 1465 | ||
| 1332 | /** | 1466 | /** |
| @@ -1335,7 +1469,7 @@ static inline int sk_has_sleeper(struct sock *sk) | |||
| 1335 | * @wait_address: socket wait queue | 1469 | * @wait_address: socket wait queue |
| 1336 | * @p: poll_table | 1470 | * @p: poll_table |
| 1337 | * | 1471 | * |
| 1338 | * See the comments in the sk_has_sleeper function. | 1472 | * See the comments in the wq_has_sleeper function. |
| 1339 | */ | 1473 | */ |
| 1340 | static inline void sock_poll_wait(struct file *filp, | 1474 | static inline void sock_poll_wait(struct file *filp, |
| 1341 | wait_queue_head_t *wait_address, poll_table *p) | 1475 | wait_queue_head_t *wait_address, poll_table *p) |
| @@ -1346,7 +1480,7 @@ static inline void sock_poll_wait(struct file *filp, | |||
| 1346 | * We need to be sure we are in sync with the | 1480 | * We need to be sure we are in sync with the |
| 1347 | * socket flags modification. | 1481 | * socket flags modification. |
| 1348 | * | 1482 | * |
| 1349 | * This memory barrier is paired in the sk_has_sleeper. | 1483 | * This memory barrier is paired in the wq_has_sleeper. |
| 1350 | */ | 1484 | */ |
| 1351 | smp_mb(); | 1485 | smp_mb(); |
| 1352 | } | 1486 | } |
| @@ -1390,20 +1524,7 @@ extern void sk_stop_timer(struct sock *sk, struct timer_list* timer); | |||
| 1390 | 1524 | ||
| 1391 | extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); | 1525 | extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); |
| 1392 | 1526 | ||
| 1393 | static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) | 1527 | extern int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); |
| 1394 | { | ||
| 1395 | /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces | ||
| 1396 | number of warnings when compiling with -W --ANK | ||
| 1397 | */ | ||
| 1398 | if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= | ||
| 1399 | (unsigned)sk->sk_rcvbuf) | ||
| 1400 | return -ENOMEM; | ||
| 1401 | skb_set_owner_r(skb, sk); | ||
| 1402 | skb_queue_tail(&sk->sk_error_queue, skb); | ||
| 1403 | if (!sock_flag(sk, SOCK_DEAD)) | ||
| 1404 | sk->sk_data_ready(sk, skb->len); | ||
| 1405 | return 0; | ||
| 1406 | } | ||
| 1407 | 1528 | ||
| 1408 | /* | 1529 | /* |
| 1409 | * Recover an error report and clear atomically | 1530 | * Recover an error report and clear atomically |
| @@ -1528,7 +1649,24 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) | |||
| 1528 | sk->sk_stamp = kt; | 1649 | sk->sk_stamp = kt; |
| 1529 | } | 1650 | } |
| 1530 | 1651 | ||
| 1531 | extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); | 1652 | extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, |
| 1653 | struct sk_buff *skb); | ||
| 1654 | |||
| 1655 | static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, | ||
| 1656 | struct sk_buff *skb) | ||
| 1657 | { | ||
| 1658 | #define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \ | ||
| 1659 | (1UL << SOCK_RCVTSTAMP) | \ | ||
| 1660 | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ | ||
| 1661 | (1UL << SOCK_TIMESTAMPING_SOFTWARE) | \ | ||
| 1662 | (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE) | \ | ||
| 1663 | (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE)) | ||
| 1664 | |||
| 1665 | if (sk->sk_flags & FLAGS_TS_OR_DROPS) | ||
| 1666 | __sock_recv_ts_and_drops(msg, sk, skb); | ||
| 1667 | else | ||
| 1668 | sk->sk_stamp = skb->tstamp; | ||
| 1669 | } | ||
| 1532 | 1670 | ||
| 1533 | /** | 1671 | /** |
| 1534 | * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped | 1672 | * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped |
| @@ -1573,19 +1711,13 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_e | |||
| 1573 | static inline | 1711 | static inline |
| 1574 | struct net *sock_net(const struct sock *sk) | 1712 | struct net *sock_net(const struct sock *sk) |
| 1575 | { | 1713 | { |
| 1576 | #ifdef CONFIG_NET_NS | 1714 | return read_pnet(&sk->sk_net); |
| 1577 | return sk->sk_net; | ||
| 1578 | #else | ||
| 1579 | return &init_net; | ||
| 1580 | #endif | ||
| 1581 | } | 1715 | } |
| 1582 | 1716 | ||
| 1583 | static inline | 1717 | static inline |
| 1584 | void sock_net_set(struct sock *sk, struct net *net) | 1718 | void sock_net_set(struct sock *sk, struct net *net) |
| 1585 | { | 1719 | { |
| 1586 | #ifdef CONFIG_NET_NS | 1720 | write_pnet(&sk->sk_net, net); |
| 1587 | sk->sk_net = net; | ||
| 1588 | #endif | ||
| 1589 | } | 1721 | } |
| 1590 | 1722 | ||
| 1591 | /* | 1723 | /* |
