diff options
Diffstat (limited to 'include/net/sock.h')
-rw-r--r-- | include/net/sock.h | 122 |
1 files changed, 88 insertions, 34 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 7aa78440559a..7f89e4ba18d1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -150,6 +150,10 @@ typedef __u64 __bitwise __addrpair; | |||
150 | * @skc_node: main hash linkage for various protocol lookup tables | 150 | * @skc_node: main hash linkage for various protocol lookup tables |
151 | * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol | 151 | * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol |
152 | * @skc_tx_queue_mapping: tx queue number for this connection | 152 | * @skc_tx_queue_mapping: tx queue number for this connection |
153 | * @skc_flags: place holder for sk_flags | ||
154 | * %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, | ||
155 | * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings | ||
156 | * @skc_incoming_cpu: record/match cpu processing incoming packets | ||
153 | * @skc_refcnt: reference count | 157 | * @skc_refcnt: reference count |
154 | * | 158 | * |
155 | * This is the minimal network layer representation of sockets, the header | 159 | * This is the minimal network layer representation of sockets, the header |
@@ -200,6 +204,16 @@ struct sock_common { | |||
200 | 204 | ||
201 | atomic64_t skc_cookie; | 205 | atomic64_t skc_cookie; |
202 | 206 | ||
207 | /* following fields are padding to force | ||
208 | * offset(struct sock, sk_refcnt) == 128 on 64bit arches | ||
209 | * assuming IPV6 is enabled. We use this padding differently | ||
210 | * for different kind of 'sockets' | ||
211 | */ | ||
212 | union { | ||
213 | unsigned long skc_flags; | ||
214 | struct sock *skc_listener; /* request_sock */ | ||
215 | struct inet_timewait_death_row *skc_tw_dr; /* inet_timewait_sock */ | ||
216 | }; | ||
203 | /* | 217 | /* |
204 | * fields between dontcopy_begin/dontcopy_end | 218 | * fields between dontcopy_begin/dontcopy_end |
205 | * are not copied in sock_copy() | 219 | * are not copied in sock_copy() |
@@ -212,9 +226,20 @@ struct sock_common { | |||
212 | struct hlist_nulls_node skc_nulls_node; | 226 | struct hlist_nulls_node skc_nulls_node; |
213 | }; | 227 | }; |
214 | int skc_tx_queue_mapping; | 228 | int skc_tx_queue_mapping; |
229 | union { | ||
230 | int skc_incoming_cpu; | ||
231 | u32 skc_rcv_wnd; | ||
232 | u32 skc_tw_rcv_nxt; /* struct tcp_timewait_sock */ | ||
233 | }; | ||
234 | |||
215 | atomic_t skc_refcnt; | 235 | atomic_t skc_refcnt; |
216 | /* private: */ | 236 | /* private: */ |
217 | int skc_dontcopy_end[0]; | 237 | int skc_dontcopy_end[0]; |
238 | union { | ||
239 | u32 skc_rxhash; | ||
240 | u32 skc_window_clamp; | ||
241 | u32 skc_tw_snd_nxt; /* struct tcp_timewait_sock */ | ||
242 | }; | ||
218 | /* public: */ | 243 | /* public: */ |
219 | }; | 244 | }; |
220 | 245 | ||
@@ -243,8 +268,6 @@ struct cg_proto; | |||
243 | * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) | 268 | * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) |
244 | * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) | 269 | * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) |
245 | * @sk_sndbuf: size of send buffer in bytes | 270 | * @sk_sndbuf: size of send buffer in bytes |
246 | * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, | ||
247 | * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings | ||
248 | * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets | 271 | * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets |
249 | * @sk_no_check_rx: allow zero checksum in RX packets | 272 | * @sk_no_check_rx: allow zero checksum in RX packets |
250 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) | 273 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) |
@@ -273,8 +296,6 @@ struct cg_proto; | |||
273 | * @sk_rcvlowat: %SO_RCVLOWAT setting | 296 | * @sk_rcvlowat: %SO_RCVLOWAT setting |
274 | * @sk_rcvtimeo: %SO_RCVTIMEO setting | 297 | * @sk_rcvtimeo: %SO_RCVTIMEO setting |
275 | * @sk_sndtimeo: %SO_SNDTIMEO setting | 298 | * @sk_sndtimeo: %SO_SNDTIMEO setting |
276 | * @sk_rxhash: flow hash received from netif layer | ||
277 | * @sk_incoming_cpu: record cpu processing incoming packets | ||
278 | * @sk_txhash: computed flow hash for use on transmit | 299 | * @sk_txhash: computed flow hash for use on transmit |
279 | * @sk_filter: socket filtering instructions | 300 | * @sk_filter: socket filtering instructions |
280 | * @sk_timer: sock cleanup timer | 301 | * @sk_timer: sock cleanup timer |
@@ -331,6 +352,9 @@ struct sock { | |||
331 | #define sk_v6_daddr __sk_common.skc_v6_daddr | 352 | #define sk_v6_daddr __sk_common.skc_v6_daddr |
332 | #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr | 353 | #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr |
333 | #define sk_cookie __sk_common.skc_cookie | 354 | #define sk_cookie __sk_common.skc_cookie |
355 | #define sk_incoming_cpu __sk_common.skc_incoming_cpu | ||
356 | #define sk_flags __sk_common.skc_flags | ||
357 | #define sk_rxhash __sk_common.skc_rxhash | ||
334 | 358 | ||
335 | socket_lock_t sk_lock; | 359 | socket_lock_t sk_lock; |
336 | struct sk_buff_head sk_receive_queue; | 360 | struct sk_buff_head sk_receive_queue; |
@@ -350,14 +374,6 @@ struct sock { | |||
350 | } sk_backlog; | 374 | } sk_backlog; |
351 | #define sk_rmem_alloc sk_backlog.rmem_alloc | 375 | #define sk_rmem_alloc sk_backlog.rmem_alloc |
352 | int sk_forward_alloc; | 376 | int sk_forward_alloc; |
353 | #ifdef CONFIG_RPS | ||
354 | __u32 sk_rxhash; | ||
355 | #endif | ||
356 | u16 sk_incoming_cpu; | ||
357 | /* 16bit hole | ||
358 | * Warned : sk_incoming_cpu can be set from softirq, | ||
359 | * Do not use this hole without fully understanding possible issues. | ||
360 | */ | ||
361 | 377 | ||
362 | __u32 sk_txhash; | 378 | __u32 sk_txhash; |
363 | #ifdef CONFIG_NET_RX_BUSY_POLL | 379 | #ifdef CONFIG_NET_RX_BUSY_POLL |
@@ -373,7 +389,6 @@ struct sock { | |||
373 | #ifdef CONFIG_XFRM | 389 | #ifdef CONFIG_XFRM |
374 | struct xfrm_policy *sk_policy[2]; | 390 | struct xfrm_policy *sk_policy[2]; |
375 | #endif | 391 | #endif |
376 | unsigned long sk_flags; | ||
377 | struct dst_entry *sk_rx_dst; | 392 | struct dst_entry *sk_rx_dst; |
378 | struct dst_entry __rcu *sk_dst_cache; | 393 | struct dst_entry __rcu *sk_dst_cache; |
379 | spinlock_t sk_dst_lock; | 394 | spinlock_t sk_dst_lock; |
@@ -759,7 +774,7 @@ static inline int sk_memalloc_socks(void) | |||
759 | 774 | ||
760 | #endif | 775 | #endif |
761 | 776 | ||
762 | static inline gfp_t sk_gfp_atomic(struct sock *sk, gfp_t gfp_mask) | 777 | static inline gfp_t sk_gfp_atomic(const struct sock *sk, gfp_t gfp_mask) |
763 | { | 778 | { |
764 | return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC); | 779 | return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC); |
765 | } | 780 | } |
@@ -828,6 +843,14 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s | |||
828 | if (sk_rcvqueues_full(sk, limit)) | 843 | if (sk_rcvqueues_full(sk, limit)) |
829 | return -ENOBUFS; | 844 | return -ENOBUFS; |
830 | 845 | ||
846 | /* | ||
847 | * If the skb was allocated from pfmemalloc reserves, only | ||
848 | * allow SOCK_MEMALLOC sockets to use it as this socket is | ||
849 | * helping free memory | ||
850 | */ | ||
851 | if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) | ||
852 | return -ENOMEM; | ||
853 | |||
831 | __sk_add_backlog(sk, skb); | 854 | __sk_add_backlog(sk, skb); |
832 | sk->sk_backlog.len += skb->truesize; | 855 | sk->sk_backlog.len += skb->truesize; |
833 | return 0; | 856 | return 0; |
@@ -1514,6 +1537,13 @@ void sock_kfree_s(struct sock *sk, void *mem, int size); | |||
1514 | void sock_kzfree_s(struct sock *sk, void *mem, int size); | 1537 | void sock_kzfree_s(struct sock *sk, void *mem, int size); |
1515 | void sk_send_sigurg(struct sock *sk); | 1538 | void sk_send_sigurg(struct sock *sk); |
1516 | 1539 | ||
1540 | struct sockcm_cookie { | ||
1541 | u32 mark; | ||
1542 | }; | ||
1543 | |||
1544 | int sock_cmsg_send(struct sock *sk, struct msghdr *msg, | ||
1545 | struct sockcm_cookie *sockc); | ||
1546 | |||
1517 | /* | 1547 | /* |
1518 | * Functions to fill in entries in struct proto_ops when a protocol | 1548 | * Functions to fill in entries in struct proto_ops when a protocol |
1519 | * does not implement a particular function. | 1549 | * does not implement a particular function. |
@@ -1654,12 +1684,16 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) | |||
1654 | kuid_t sock_i_uid(struct sock *sk); | 1684 | kuid_t sock_i_uid(struct sock *sk); |
1655 | unsigned long sock_i_ino(struct sock *sk); | 1685 | unsigned long sock_i_ino(struct sock *sk); |
1656 | 1686 | ||
1657 | static inline void sk_set_txhash(struct sock *sk) | 1687 | static inline u32 net_tx_rndhash(void) |
1658 | { | 1688 | { |
1659 | sk->sk_txhash = prandom_u32(); | 1689 | u32 v = prandom_u32(); |
1660 | 1690 | ||
1661 | if (unlikely(!sk->sk_txhash)) | 1691 | return v ?: 1; |
1662 | sk->sk_txhash = 1; | 1692 | } |
1693 | |||
1694 | static inline void sk_set_txhash(struct sock *sk) | ||
1695 | { | ||
1696 | sk->sk_txhash = net_tx_rndhash(); | ||
1663 | } | 1697 | } |
1664 | 1698 | ||
1665 | static inline void sk_rethink_txhash(struct sock *sk) | 1699 | static inline void sk_rethink_txhash(struct sock *sk) |
@@ -1917,6 +1951,8 @@ static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk) | |||
1917 | } | 1951 | } |
1918 | } | 1952 | } |
1919 | 1953 | ||
1954 | void skb_set_owner_w(struct sk_buff *skb, struct sock *sk); | ||
1955 | |||
1920 | /* | 1956 | /* |
1921 | * Queue a received datagram if it will fit. Stream and sequenced | 1957 | * Queue a received datagram if it will fit. Stream and sequenced |
1922 | * protocols can't normally use this as they need to fit buffers in | 1958 | * protocols can't normally use this as they need to fit buffers in |
@@ -1925,21 +1961,6 @@ static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk) | |||
1925 | * Inlined as it's very short and called for pretty much every | 1961 | * Inlined as it's very short and called for pretty much every |
1926 | * packet ever received. | 1962 | * packet ever received. |
1927 | */ | 1963 | */ |
1928 | |||
1929 | static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) | ||
1930 | { | ||
1931 | skb_orphan(skb); | ||
1932 | skb->sk = sk; | ||
1933 | skb->destructor = sock_wfree; | ||
1934 | skb_set_hash_from_sk(skb, sk); | ||
1935 | /* | ||
1936 | * We used to take a refcount on sk, but following operation | ||
1937 | * is enough to guarantee sk_free() wont free this sock until | ||
1938 | * all in-flight packets are completed | ||
1939 | */ | ||
1940 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); | ||
1941 | } | ||
1942 | |||
1943 | static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) | 1964 | static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) |
1944 | { | 1965 | { |
1945 | skb_orphan(skb); | 1966 | skb_orphan(skb); |
@@ -2020,7 +2041,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, | |||
2020 | */ | 2041 | */ |
2021 | static inline struct page_frag *sk_page_frag(struct sock *sk) | 2042 | static inline struct page_frag *sk_page_frag(struct sock *sk) |
2022 | { | 2043 | { |
2023 | if (sk->sk_allocation & __GFP_WAIT) | 2044 | if (gfpflags_allow_blocking(sk->sk_allocation)) |
2024 | return ¤t->task_frag; | 2045 | return ¤t->task_frag; |
2025 | 2046 | ||
2026 | return &sk->sk_frag; | 2047 | return &sk->sk_frag; |
@@ -2197,6 +2218,39 @@ static inline bool sk_fullsock(const struct sock *sk) | |||
2197 | return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV); | 2218 | return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV); |
2198 | } | 2219 | } |
2199 | 2220 | ||
2221 | /* This helper checks if a socket is a LISTEN or NEW_SYN_RECV | ||
2222 | * SYNACK messages can be attached to either ones (depending on SYNCOOKIE) | ||
2223 | */ | ||
2224 | static inline bool sk_listener(const struct sock *sk) | ||
2225 | { | ||
2226 | return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); | ||
2227 | } | ||
2228 | |||
2229 | /** | ||
2230 | * sk_state_load - read sk->sk_state for lockless contexts | ||
2231 | * @sk: socket pointer | ||
2232 | * | ||
2233 | * Paired with sk_state_store(). Used in places we do not hold socket lock : | ||
2234 | * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ... | ||
2235 | */ | ||
2236 | static inline int sk_state_load(const struct sock *sk) | ||
2237 | { | ||
2238 | return smp_load_acquire(&sk->sk_state); | ||
2239 | } | ||
2240 | |||
2241 | /** | ||
2242 | * sk_state_store - update sk->sk_state | ||
2243 | * @sk: socket pointer | ||
2244 | * @newstate: new state | ||
2245 | * | ||
2246 | * Paired with sk_state_load(). Should be used in contexts where | ||
2247 | * state change might impact lockless readers. | ||
2248 | */ | ||
2249 | static inline void sk_state_store(struct sock *sk, int newstate) | ||
2250 | { | ||
2251 | smp_store_release(&sk->sk_state, newstate); | ||
2252 | } | ||
2253 | |||
2200 | void sock_enable_timestamp(struct sock *sk, int flag); | 2254 | void sock_enable_timestamp(struct sock *sk, int flag); |
2201 | int sock_get_timestamp(struct sock *, struct timeval __user *); | 2255 | int sock_get_timestamp(struct sock *, struct timeval __user *); |
2202 | int sock_get_timestampns(struct sock *, struct timespec __user *); | 2256 | int sock_get_timestampns(struct sock *, struct timespec __user *); |