aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h192
1 files changed, 136 insertions, 56 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index adab9dc58183..c0b938cb4b1a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -52,12 +52,13 @@
52#include <linux/mm.h> 52#include <linux/mm.h>
53#include <linux/security.h> 53#include <linux/security.h>
54#include <linux/slab.h> 54#include <linux/slab.h>
55#include <linux/uaccess.h>
55 56
56#include <linux/filter.h> 57#include <linux/filter.h>
57#include <linux/rculist_nulls.h> 58#include <linux/rculist_nulls.h>
58#include <linux/poll.h> 59#include <linux/poll.h>
59 60
60#include <asm/atomic.h> 61#include <linux/atomic.h>
61#include <net/dst.h> 62#include <net/dst.h>
62#include <net/checksum.h> 63#include <net/checksum.h>
63 64
@@ -105,10 +106,8 @@ struct net;
105 106
106/** 107/**
107 * struct sock_common - minimal network layer representation of sockets 108 * struct sock_common - minimal network layer representation of sockets
108 * @skc_node: main hash linkage for various protocol lookup tables 109 * @skc_daddr: Foreign IPv4 addr
109 * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol 110 * @skc_rcv_saddr: Bound local IPv4 addr
110 * @skc_refcnt: reference count
111 * @skc_tx_queue_mapping: tx queue number for this connection
112 * @skc_hash: hash value used with various protocol lookup tables 111 * @skc_hash: hash value used with various protocol lookup tables
113 * @skc_u16hashes: two u16 hash values used by UDP lookup tables 112 * @skc_u16hashes: two u16 hash values used by UDP lookup tables
114 * @skc_family: network address family 113 * @skc_family: network address family
@@ -119,20 +118,20 @@ struct net;
119 * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol 118 * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
120 * @skc_prot: protocol handlers inside a network family 119 * @skc_prot: protocol handlers inside a network family
121 * @skc_net: reference to the network namespace of this socket 120 * @skc_net: reference to the network namespace of this socket
121 * @skc_node: main hash linkage for various protocol lookup tables
122 * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
123 * @skc_tx_queue_mapping: tx queue number for this connection
124 * @skc_refcnt: reference count
122 * 125 *
123 * This is the minimal network layer representation of sockets, the header 126 * This is the minimal network layer representation of sockets, the header
124 * for struct sock and struct inet_timewait_sock. 127 * for struct sock and struct inet_timewait_sock.
125 */ 128 */
126struct sock_common { 129struct sock_common {
127 /* 130 /* skc_daddr and skc_rcv_saddr must be grouped :
128 * first fields are not copied in sock_copy() 131 * cf INET_MATCH() and INET_TW_MATCH()
129 */ 132 */
130 union { 133 __be32 skc_daddr;
131 struct hlist_node skc_node; 134 __be32 skc_rcv_saddr;
132 struct hlist_nulls_node skc_nulls_node;
133 };
134 atomic_t skc_refcnt;
135 int skc_tx_queue_mapping;
136 135
137 union { 136 union {
138 unsigned int skc_hash; 137 unsigned int skc_hash;
@@ -150,6 +149,22 @@ struct sock_common {
150#ifdef CONFIG_NET_NS 149#ifdef CONFIG_NET_NS
151 struct net *skc_net; 150 struct net *skc_net;
152#endif 151#endif
152 /*
153 * fields between dontcopy_begin/dontcopy_end
154 * are not copied in sock_copy()
155 */
156 /* private: */
157 int skc_dontcopy_begin[0];
158 /* public: */
159 union {
160 struct hlist_node skc_node;
161 struct hlist_nulls_node skc_nulls_node;
162 };
163 int skc_tx_queue_mapping;
164 atomic_t skc_refcnt;
165 /* private: */
166 int skc_dontcopy_end[0];
167 /* public: */
153}; 168};
154 169
155/** 170/**
@@ -163,7 +178,6 @@ struct sock_common {
163 * @sk_dst_cache: destination cache 178 * @sk_dst_cache: destination cache
164 * @sk_dst_lock: destination cache lock 179 * @sk_dst_lock: destination cache lock
165 * @sk_policy: flow policy 180 * @sk_policy: flow policy
166 * @sk_rmem_alloc: receive queue bytes committed
167 * @sk_receive_queue: incoming packets 181 * @sk_receive_queue: incoming packets
168 * @sk_wmem_alloc: transmit queue bytes committed 182 * @sk_wmem_alloc: transmit queue bytes committed
169 * @sk_write_queue: Packet sending queue 183 * @sk_write_queue: Packet sending queue
@@ -232,7 +246,8 @@ struct sock {
232#define sk_refcnt __sk_common.skc_refcnt 246#define sk_refcnt __sk_common.skc_refcnt
233#define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping 247#define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping
234 248
235#define sk_copy_start __sk_common.skc_hash 249#define sk_dontcopy_begin __sk_common.skc_dontcopy_begin
250#define sk_dontcopy_end __sk_common.skc_dontcopy_end
236#define sk_hash __sk_common.skc_hash 251#define sk_hash __sk_common.skc_hash
237#define sk_family __sk_common.skc_family 252#define sk_family __sk_common.skc_family
238#define sk_state __sk_common.skc_state 253#define sk_state __sk_common.skc_state
@@ -241,59 +256,67 @@ struct sock {
241#define sk_bind_node __sk_common.skc_bind_node 256#define sk_bind_node __sk_common.skc_bind_node
242#define sk_prot __sk_common.skc_prot 257#define sk_prot __sk_common.skc_prot
243#define sk_net __sk_common.skc_net 258#define sk_net __sk_common.skc_net
244 kmemcheck_bitfield_begin(flags);
245 unsigned int sk_shutdown : 2,
246 sk_no_check : 2,
247 sk_userlocks : 4,
248 sk_protocol : 8,
249 sk_type : 16;
250 kmemcheck_bitfield_end(flags);
251 int sk_rcvbuf;
252 socket_lock_t sk_lock; 259 socket_lock_t sk_lock;
260 struct sk_buff_head sk_receive_queue;
253 /* 261 /*
254 * The backlog queue is special, it is always used with 262 * The backlog queue is special, it is always used with
255 * the per-socket spinlock held and requires low latency 263 * the per-socket spinlock held and requires low latency
256 * access. Therefore we special case it's implementation. 264 * access. Therefore we special case it's implementation.
265 * Note : rmem_alloc is in this structure to fill a hole
266 * on 64bit arches, not because its logically part of
267 * backlog.
257 */ 268 */
258 struct { 269 struct {
259 struct sk_buff *head; 270 atomic_t rmem_alloc;
260 struct sk_buff *tail; 271 int len;
261 int len; 272 struct sk_buff *head;
273 struct sk_buff *tail;
262 } sk_backlog; 274 } sk_backlog;
263 struct socket_wq *sk_wq; 275#define sk_rmem_alloc sk_backlog.rmem_alloc
264 struct dst_entry *sk_dst_cache; 276 int sk_forward_alloc;
277#ifdef CONFIG_RPS
278 __u32 sk_rxhash;
279#endif
280 atomic_t sk_drops;
281 int sk_rcvbuf;
282
283 struct sk_filter __rcu *sk_filter;
284 struct socket_wq __rcu *sk_wq;
285
286#ifdef CONFIG_NET_DMA
287 struct sk_buff_head sk_async_wait_queue;
288#endif
289
265#ifdef CONFIG_XFRM 290#ifdef CONFIG_XFRM
266 struct xfrm_policy *sk_policy[2]; 291 struct xfrm_policy *sk_policy[2];
267#endif 292#endif
293 unsigned long sk_flags;
294 struct dst_entry *sk_dst_cache;
268 spinlock_t sk_dst_lock; 295 spinlock_t sk_dst_lock;
269 atomic_t sk_rmem_alloc;
270 atomic_t sk_wmem_alloc; 296 atomic_t sk_wmem_alloc;
271 atomic_t sk_omem_alloc; 297 atomic_t sk_omem_alloc;
272 int sk_sndbuf; 298 int sk_sndbuf;
273 struct sk_buff_head sk_receive_queue;
274 struct sk_buff_head sk_write_queue; 299 struct sk_buff_head sk_write_queue;
275#ifdef CONFIG_NET_DMA 300 kmemcheck_bitfield_begin(flags);
276 struct sk_buff_head sk_async_wait_queue; 301 unsigned int sk_shutdown : 2,
277#endif 302 sk_no_check : 2,
303 sk_userlocks : 4,
304 sk_protocol : 8,
305 sk_type : 16;
306 kmemcheck_bitfield_end(flags);
278 int sk_wmem_queued; 307 int sk_wmem_queued;
279 int sk_forward_alloc;
280 gfp_t sk_allocation; 308 gfp_t sk_allocation;
281 int sk_route_caps; 309 int sk_route_caps;
282 int sk_route_nocaps; 310 int sk_route_nocaps;
283 int sk_gso_type; 311 int sk_gso_type;
284 unsigned int sk_gso_max_size; 312 unsigned int sk_gso_max_size;
285 int sk_rcvlowat; 313 int sk_rcvlowat;
286#ifdef CONFIG_RPS
287 __u32 sk_rxhash;
288#endif
289 unsigned long sk_flags;
290 unsigned long sk_lingertime; 314 unsigned long sk_lingertime;
291 struct sk_buff_head sk_error_queue; 315 struct sk_buff_head sk_error_queue;
292 struct proto *sk_prot_creator; 316 struct proto *sk_prot_creator;
293 rwlock_t sk_callback_lock; 317 rwlock_t sk_callback_lock;
294 int sk_err, 318 int sk_err,
295 sk_err_soft; 319 sk_err_soft;
296 atomic_t sk_drops;
297 unsigned short sk_ack_backlog; 320 unsigned short sk_ack_backlog;
298 unsigned short sk_max_ack_backlog; 321 unsigned short sk_max_ack_backlog;
299 __u32 sk_priority; 322 __u32 sk_priority;
@@ -301,7 +324,6 @@ struct sock {
301 const struct cred *sk_peer_cred; 324 const struct cred *sk_peer_cred;
302 long sk_rcvtimeo; 325 long sk_rcvtimeo;
303 long sk_sndtimeo; 326 long sk_sndtimeo;
304 struct sk_filter *sk_filter;
305 void *sk_protinfo; 327 void *sk_protinfo;
306 struct timer_list sk_timer; 328 struct timer_list sk_timer;
307 ktime_t sk_stamp; 329 ktime_t sk_stamp;
@@ -509,9 +531,6 @@ static __inline__ void sk_add_bind_node(struct sock *sk,
509#define sk_nulls_for_each_from(__sk, node) \ 531#define sk_nulls_for_each_from(__sk, node) \
510 if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \ 532 if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \
511 hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node) 533 hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node)
512#define sk_for_each_continue(__sk, node) \
513 if (__sk && ({ node = &(__sk)->sk_node; 1; })) \
514 hlist_for_each_entry_continue(__sk, node, sk_node)
515#define sk_for_each_safe(__sk, node, tmp, list) \ 534#define sk_for_each_safe(__sk, node, tmp, list) \
516 hlist_for_each_entry_safe(__sk, node, tmp, list, sk_node) 535 hlist_for_each_entry_safe(__sk, node, tmp, list, sk_node)
517#define sk_for_each_bound(__sk, node, list) \ 536#define sk_for_each_bound(__sk, node, list) \
@@ -734,6 +753,8 @@ struct proto {
734 int level, 753 int level,
735 int optname, char __user *optval, 754 int optname, char __user *optval,
736 int __user *option); 755 int __user *option);
756 int (*compat_ioctl)(struct sock *sk,
757 unsigned int cmd, unsigned long arg);
737#endif 758#endif
738 int (*sendmsg)(struct kiocb *iocb, struct sock *sk, 759 int (*sendmsg)(struct kiocb *iocb, struct sock *sk,
739 struct msghdr *msg, size_t len); 760 struct msghdr *msg, size_t len);
@@ -754,6 +775,7 @@ struct proto {
754 void (*unhash)(struct sock *sk); 775 void (*unhash)(struct sock *sk);
755 void (*rehash)(struct sock *sk); 776 void (*rehash)(struct sock *sk);
756 int (*get_port)(struct sock *sk, unsigned short snum); 777 int (*get_port)(struct sock *sk, unsigned short snum);
778 void (*clear_sk)(struct sock *sk, int size);
757 779
758 /* Keeping track of sockets in use */ 780 /* Keeping track of sockets in use */
759#ifdef CONFIG_PROC_FS 781#ifdef CONFIG_PROC_FS
@@ -762,7 +784,7 @@ struct proto {
762 784
763 /* Memory pressure */ 785 /* Memory pressure */
764 void (*enter_memory_pressure)(struct sock *sk); 786 void (*enter_memory_pressure)(struct sock *sk);
765 atomic_t *memory_allocated; /* Current allocated memory. */ 787 atomic_long_t *memory_allocated; /* Current allocated memory. */
766 struct percpu_counter *sockets_allocated; /* Current number of sockets. */ 788 struct percpu_counter *sockets_allocated; /* Current number of sockets. */
767 /* 789 /*
768 * Pressure flag: try to collapse. 790 * Pressure flag: try to collapse.
@@ -771,7 +793,7 @@ struct proto {
771 * is strict, actions are advisory and have some latency. 793 * is strict, actions are advisory and have some latency.
772 */ 794 */
773 int *memory_pressure; 795 int *memory_pressure;
774 int *sysctl_mem; 796 long *sysctl_mem;
775 int *sysctl_wmem; 797 int *sysctl_wmem;
776 int *sysctl_rmem; 798 int *sysctl_rmem;
777 int max_header; 799 int max_header;
@@ -852,6 +874,8 @@ static inline void __sk_prot_rehash(struct sock *sk)
852 sk->sk_prot->hash(sk); 874 sk->sk_prot->hash(sk);
853} 875}
854 876
877void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
878
855/* About 10 seconds */ 879/* About 10 seconds */
856#define SOCK_DESTROY_TIME (10*HZ) 880#define SOCK_DESTROY_TIME (10*HZ)
857 881
@@ -1155,6 +1179,8 @@ extern void sk_common_release(struct sock *sk);
1155/* Initialise core socket variables */ 1179/* Initialise core socket variables */
1156extern void sock_init_data(struct socket *sock, struct sock *sk); 1180extern void sock_init_data(struct socket *sock, struct sock *sk);
1157 1181
1182extern void sk_filter_release_rcu(struct rcu_head *rcu);
1183
1158/** 1184/**
1159 * sk_filter_release - release a socket filter 1185 * sk_filter_release - release a socket filter
1160 * @fp: filter to remove 1186 * @fp: filter to remove
@@ -1165,7 +1191,7 @@ extern void sock_init_data(struct socket *sock, struct sock *sk);
1165static inline void sk_filter_release(struct sk_filter *fp) 1191static inline void sk_filter_release(struct sk_filter *fp)
1166{ 1192{
1167 if (atomic_dec_and_test(&fp->refcnt)) 1193 if (atomic_dec_and_test(&fp->refcnt))
1168 kfree(fp); 1194 call_rcu(&fp->rcu, sk_filter_release_rcu);
1169} 1195}
1170 1196
1171static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) 1197static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
@@ -1240,7 +1266,8 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock)
1240 1266
1241static inline wait_queue_head_t *sk_sleep(struct sock *sk) 1267static inline wait_queue_head_t *sk_sleep(struct sock *sk)
1242{ 1268{
1243 return &sk->sk_wq->wait; 1269 BUILD_BUG_ON(offsetof(struct socket_wq, wait) != 0);
1270 return &rcu_dereference_raw(sk->sk_wq)->wait;
1244} 1271}
1245/* Detach socket from process context. 1272/* Detach socket from process context.
1246 * Announce socket dead, detach it from wait queue and inode. 1273 * Announce socket dead, detach it from wait queue and inode.
@@ -1261,7 +1288,7 @@ static inline void sock_orphan(struct sock *sk)
1261static inline void sock_graft(struct sock *sk, struct socket *parent) 1288static inline void sock_graft(struct sock *sk, struct socket *parent)
1262{ 1289{
1263 write_lock_bh(&sk->sk_callback_lock); 1290 write_lock_bh(&sk->sk_callback_lock);
1264 rcu_assign_pointer(sk->sk_wq, parent->wq); 1291 sk->sk_wq = parent->wq;
1265 parent->sk = sk; 1292 parent->sk = sk;
1266 sk_set_socket(sk, parent); 1293 sk_set_socket(sk, parent);
1267 security_sock_graft(sk, parent); 1294 security_sock_graft(sk, parent);
@@ -1362,6 +1389,59 @@ static inline void sk_nocaps_add(struct sock *sk, int flags)
1362 sk->sk_route_caps &= ~flags; 1389 sk->sk_route_caps &= ~flags;
1363} 1390}
1364 1391
1392static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
1393 char __user *from, char *to,
1394 int copy, int offset)
1395{
1396 if (skb->ip_summed == CHECKSUM_NONE) {
1397 int err = 0;
1398 __wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err);
1399 if (err)
1400 return err;
1401 skb->csum = csum_block_add(skb->csum, csum, offset);
1402 } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
1403 if (!access_ok(VERIFY_READ, from, copy) ||
1404 __copy_from_user_nocache(to, from, copy))
1405 return -EFAULT;
1406 } else if (copy_from_user(to, from, copy))
1407 return -EFAULT;
1408
1409 return 0;
1410}
1411
1412static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
1413 char __user *from, int copy)
1414{
1415 int err, offset = skb->len;
1416
1417 err = skb_do_copy_data_nocache(sk, skb, from, skb_put(skb, copy),
1418 copy, offset);
1419 if (err)
1420 __skb_trim(skb, offset);
1421
1422 return err;
1423}
1424
1425static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from,
1426 struct sk_buff *skb,
1427 struct page *page,
1428 int off, int copy)
1429{
1430 int err;
1431
1432 err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) + off,
1433 copy, skb->len);
1434 if (err)
1435 return err;
1436
1437 skb->len += copy;
1438 skb->data_len += copy;
1439 skb->truesize += copy;
1440 sk->sk_wmem_queued += copy;
1441 sk_mem_charge(sk, copy);
1442 return 0;
1443}
1444
1365static inline int skb_copy_to_page(struct sock *sk, char __user *from, 1445static inline int skb_copy_to_page(struct sock *sk, char __user *from,
1366 struct sk_buff *skb, struct page *page, 1446 struct sk_buff *skb, struct page *page,
1367 int off, int copy) 1447 int off, int copy)
@@ -1558,7 +1638,11 @@ static inline void sk_wake_async(struct sock *sk, int how, int band)
1558} 1638}
1559 1639
1560#define SOCK_MIN_SNDBUF 2048 1640#define SOCK_MIN_SNDBUF 2048
1561#define SOCK_MIN_RCVBUF 256 1641/*
1642 * Since sk_rmem_alloc sums skb->truesize, even a small frame might need
1643 * sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak
1644 */
1645#define SOCK_MIN_RCVBUF (2048 + sizeof(struct sk_buff))
1562 1646
1563static inline void sk_stream_moderate_sndbuf(struct sock *sk) 1647static inline void sk_stream_moderate_sndbuf(struct sock *sk)
1564{ 1648{
@@ -1670,17 +1754,13 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
1670 1754
1671/** 1755/**
1672 * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped 1756 * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
1673 * @msg: outgoing packet
1674 * @sk: socket sending this packet 1757 * @sk: socket sending this packet
1675 * @shtx: filled with instructions for time stamping 1758 * @tx_flags: filled with instructions for time stamping
1676 * 1759 *
1677 * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if 1760 * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if
1678 * parameters are invalid. 1761 * parameters are invalid.
1679 */ 1762 */
1680extern int sock_tx_timestamp(struct msghdr *msg, 1763extern int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags);
1681 struct sock *sk,
1682 union skb_shared_tx *shtx);
1683
1684 1764
1685/** 1765/**
1686 * sk_eat_skb - Release a skb if it is no longer needed 1766 * sk_eat_skb - Release a skb if it is no longer needed
@@ -1722,7 +1802,7 @@ void sock_net_set(struct sock *sk, struct net *net)
1722 1802
1723/* 1803/*
1724 * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace. 1804 * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace.
1725 * They should not hold a referrence to a namespace in order to allow 1805 * They should not hold a reference to a namespace in order to allow
1726 * to stop it. 1806 * to stop it.
1727 * Sockets after sk_change_net should be released using sk_release_kernel 1807 * Sockets after sk_change_net should be released using sk_release_kernel
1728 */ 1808 */