aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h284
1 files changed, 208 insertions, 76 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 3f1a4804bb3f..adab9dc58183 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -51,6 +51,7 @@
51#include <linux/skbuff.h> /* struct sk_buff */ 51#include <linux/skbuff.h> /* struct sk_buff */
52#include <linux/mm.h> 52#include <linux/mm.h>
53#include <linux/security.h> 53#include <linux/security.h>
54#include <linux/slab.h>
54 55
55#include <linux/filter.h> 56#include <linux/filter.h>
56#include <linux/rculist_nulls.h> 57#include <linux/rculist_nulls.h>
@@ -73,7 +74,7 @@
73 printk(KERN_DEBUG msg); } while (0) 74 printk(KERN_DEBUG msg); } while (0)
74#else 75#else
75/* Validate arguments and do nothing */ 76/* Validate arguments and do nothing */
76static void inline int __attribute__ ((format (printf, 2, 3))) 77static inline void __attribute__ ((format (printf, 2, 3)))
77SOCK_DEBUG(struct sock *sk, const char *msg, ...) 78SOCK_DEBUG(struct sock *sk, const char *msg, ...)
78{ 79{
79} 80}
@@ -158,7 +159,7 @@ struct sock_common {
158 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings 159 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
159 * @sk_lock: synchronizer 160 * @sk_lock: synchronizer
160 * @sk_rcvbuf: size of receive buffer in bytes 161 * @sk_rcvbuf: size of receive buffer in bytes
161 * @sk_sleep: sock wait queue 162 * @sk_wq: sock wait queue and async head
162 * @sk_dst_cache: destination cache 163 * @sk_dst_cache: destination cache
163 * @sk_dst_lock: destination cache lock 164 * @sk_dst_lock: destination cache lock
164 * @sk_policy: flow policy 165 * @sk_policy: flow policy
@@ -176,6 +177,7 @@ struct sock_common {
176 * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings 177 * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
177 * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets 178 * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
178 * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) 179 * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
180 * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
179 * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) 181 * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
180 * @sk_gso_max_size: Maximum GSO segment size to build 182 * @sk_gso_max_size: Maximum GSO segment size to build
181 * @sk_lingertime: %SO_LINGER l_linger setting 183 * @sk_lingertime: %SO_LINGER l_linger setting
@@ -193,10 +195,12 @@ struct sock_common {
193 * @sk_priority: %SO_PRIORITY setting 195 * @sk_priority: %SO_PRIORITY setting
194 * @sk_type: socket type (%SOCK_STREAM, etc) 196 * @sk_type: socket type (%SOCK_STREAM, etc)
195 * @sk_protocol: which protocol this socket belongs in this network family 197 * @sk_protocol: which protocol this socket belongs in this network family
196 * @sk_peercred: %SO_PEERCRED setting 198 * @sk_peer_pid: &struct pid for this socket's peer
199 * @sk_peer_cred: %SO_PEERCRED setting
197 * @sk_rcvlowat: %SO_RCVLOWAT setting 200 * @sk_rcvlowat: %SO_RCVLOWAT setting
198 * @sk_rcvtimeo: %SO_RCVTIMEO setting 201 * @sk_rcvtimeo: %SO_RCVTIMEO setting
199 * @sk_sndtimeo: %SO_SNDTIMEO setting 202 * @sk_sndtimeo: %SO_SNDTIMEO setting
203 * @sk_rxhash: flow hash received from netif layer
200 * @sk_filter: socket filtering instructions 204 * @sk_filter: socket filtering instructions
201 * @sk_protinfo: private area, net family specific, when not using slab 205 * @sk_protinfo: private area, net family specific, when not using slab
202 * @sk_timer: sock cleanup timer 206 * @sk_timer: sock cleanup timer
@@ -208,6 +212,7 @@ struct sock_common {
208 * @sk_send_head: front of stuff to transmit 212 * @sk_send_head: front of stuff to transmit
209 * @sk_security: used by security modules 213 * @sk_security: used by security modules
210 * @sk_mark: generic packet mark 214 * @sk_mark: generic packet mark
215 * @sk_classid: this socket's cgroup classid
211 * @sk_write_pending: a write to stream socket waits to start 216 * @sk_write_pending: a write to stream socket waits to start
212 * @sk_state_change: callback to indicate change in the state of the sock 217 * @sk_state_change: callback to indicate change in the state of the sock
213 * @sk_data_ready: callback to indicate there is data to be processed 218 * @sk_data_ready: callback to indicate there is data to be processed
@@ -253,13 +258,14 @@ struct sock {
253 struct { 258 struct {
254 struct sk_buff *head; 259 struct sk_buff *head;
255 struct sk_buff *tail; 260 struct sk_buff *tail;
261 int len;
256 } sk_backlog; 262 } sk_backlog;
257 wait_queue_head_t *sk_sleep; 263 struct socket_wq *sk_wq;
258 struct dst_entry *sk_dst_cache; 264 struct dst_entry *sk_dst_cache;
259#ifdef CONFIG_XFRM 265#ifdef CONFIG_XFRM
260 struct xfrm_policy *sk_policy[2]; 266 struct xfrm_policy *sk_policy[2];
261#endif 267#endif
262 rwlock_t sk_dst_lock; 268 spinlock_t sk_dst_lock;
263 atomic_t sk_rmem_alloc; 269 atomic_t sk_rmem_alloc;
264 atomic_t sk_wmem_alloc; 270 atomic_t sk_wmem_alloc;
265 atomic_t sk_omem_alloc; 271 atomic_t sk_omem_alloc;
@@ -273,9 +279,13 @@ struct sock {
273 int sk_forward_alloc; 279 int sk_forward_alloc;
274 gfp_t sk_allocation; 280 gfp_t sk_allocation;
275 int sk_route_caps; 281 int sk_route_caps;
282 int sk_route_nocaps;
276 int sk_gso_type; 283 int sk_gso_type;
277 unsigned int sk_gso_max_size; 284 unsigned int sk_gso_max_size;
278 int sk_rcvlowat; 285 int sk_rcvlowat;
286#ifdef CONFIG_RPS
287 __u32 sk_rxhash;
288#endif
279 unsigned long sk_flags; 289 unsigned long sk_flags;
280 unsigned long sk_lingertime; 290 unsigned long sk_lingertime;
281 struct sk_buff_head sk_error_queue; 291 struct sk_buff_head sk_error_queue;
@@ -287,7 +297,8 @@ struct sock {
287 unsigned short sk_ack_backlog; 297 unsigned short sk_ack_backlog;
288 unsigned short sk_max_ack_backlog; 298 unsigned short sk_max_ack_backlog;
289 __u32 sk_priority; 299 __u32 sk_priority;
290 struct ucred sk_peercred; 300 struct pid *sk_peer_pid;
301 const struct cred *sk_peer_cred;
291 long sk_rcvtimeo; 302 long sk_rcvtimeo;
292 long sk_sndtimeo; 303 long sk_sndtimeo;
293 struct sk_filter *sk_filter; 304 struct sk_filter *sk_filter;
@@ -304,7 +315,7 @@ struct sock {
304 void *sk_security; 315 void *sk_security;
305#endif 316#endif
306 __u32 sk_mark; 317 __u32 sk_mark;
307 /* XXX 4 bytes hole on 64 bit */ 318 u32 sk_classid;
308 void (*sk_state_change)(struct sock *sk); 319 void (*sk_state_change)(struct sock *sk);
309 void (*sk_data_ready)(struct sock *sk, int bytes); 320 void (*sk_data_ready)(struct sock *sk, int bytes);
310 void (*sk_write_space)(struct sock *sk); 321 void (*sk_write_space)(struct sock *sk);
@@ -317,6 +328,11 @@ struct sock {
317/* 328/*
318 * Hashed lists helper routines 329 * Hashed lists helper routines
319 */ 330 */
331static inline struct sock *sk_entry(const struct hlist_node *node)
332{
333 return hlist_entry(node, struct sock, sk_node);
334}
335
320static inline struct sock *__sk_head(const struct hlist_head *head) 336static inline struct sock *__sk_head(const struct hlist_head *head)
321{ 337{
322 return hlist_entry(head->first, struct sock, sk_node); 338 return hlist_entry(head->first, struct sock, sk_node);
@@ -376,6 +392,7 @@ static __inline__ void __sk_del_node(struct sock *sk)
376 __hlist_del(&sk->sk_node); 392 __hlist_del(&sk->sk_node);
377} 393}
378 394
395/* NB: equivalent to hlist_del_init_rcu */
379static __inline__ int __sk_del_node_init(struct sock *sk) 396static __inline__ int __sk_del_node_init(struct sock *sk)
380{ 397{
381 if (sk_hashed(sk)) { 398 if (sk_hashed(sk)) {
@@ -416,6 +433,7 @@ static __inline__ int sk_del_node_init(struct sock *sk)
416 } 433 }
417 return rc; 434 return rc;
418} 435}
436#define sk_del_node_init_rcu(sk) sk_del_node_init(sk)
419 437
420static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk) 438static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk)
421{ 439{
@@ -449,6 +467,12 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list)
449 __sk_add_node(sk, list); 467 __sk_add_node(sk, list);
450} 468}
451 469
470static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
471{
472 sock_hold(sk);
473 hlist_add_head_rcu(&sk->sk_node, list);
474}
475
452static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) 476static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
453{ 477{
454 hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); 478 hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
@@ -473,6 +497,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk,
473 497
474#define sk_for_each(__sk, node, list) \ 498#define sk_for_each(__sk, node, list) \
475 hlist_for_each_entry(__sk, node, list, sk_node) 499 hlist_for_each_entry(__sk, node, list, sk_node)
500#define sk_for_each_rcu(__sk, node, list) \
501 hlist_for_each_entry_rcu(__sk, node, list, sk_node)
476#define sk_nulls_for_each(__sk, node, list) \ 502#define sk_nulls_for_each(__sk, node, list) \
477 hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) 503 hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
478#define sk_nulls_for_each_rcu(__sk, node, list) \ 504#define sk_nulls_for_each_rcu(__sk, node, list) \
@@ -574,23 +600,81 @@ static inline int sk_stream_memory_free(struct sock *sk)
574 return sk->sk_wmem_queued < sk->sk_sndbuf; 600 return sk->sk_wmem_queued < sk->sk_sndbuf;
575} 601}
576 602
577/* The per-socket spinlock must be held here. */ 603/* OOB backlog add */
578static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) 604static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
579{ 605{
580 if (!sk->sk_backlog.tail) { 606 /* dont let skb dst not refcounted, we are going to leave rcu lock */
581 sk->sk_backlog.head = sk->sk_backlog.tail = skb; 607 skb_dst_force(skb);
582 } else { 608
609 if (!sk->sk_backlog.tail)
610 sk->sk_backlog.head = skb;
611 else
583 sk->sk_backlog.tail->next = skb; 612 sk->sk_backlog.tail->next = skb;
584 sk->sk_backlog.tail = skb; 613
585 } 614 sk->sk_backlog.tail = skb;
586 skb->next = NULL; 615 skb->next = NULL;
587} 616}
588 617
618/*
619 * Take into account size of receive queue and backlog queue
620 */
621static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb)
622{
623 unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);
624
625 return qsize + skb->truesize > sk->sk_rcvbuf;
626}
627
628/* The per-socket spinlock must be held here. */
629static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb)
630{
631 if (sk_rcvqueues_full(sk, skb))
632 return -ENOBUFS;
633
634 __sk_add_backlog(sk, skb);
635 sk->sk_backlog.len += skb->truesize;
636 return 0;
637}
638
589static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) 639static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
590{ 640{
591 return sk->sk_backlog_rcv(sk, skb); 641 return sk->sk_backlog_rcv(sk, skb);
592} 642}
593 643
644static inline void sock_rps_record_flow(const struct sock *sk)
645{
646#ifdef CONFIG_RPS
647 struct rps_sock_flow_table *sock_flow_table;
648
649 rcu_read_lock();
650 sock_flow_table = rcu_dereference(rps_sock_flow_table);
651 rps_record_sock_flow(sock_flow_table, sk->sk_rxhash);
652 rcu_read_unlock();
653#endif
654}
655
656static inline void sock_rps_reset_flow(const struct sock *sk)
657{
658#ifdef CONFIG_RPS
659 struct rps_sock_flow_table *sock_flow_table;
660
661 rcu_read_lock();
662 sock_flow_table = rcu_dereference(rps_sock_flow_table);
663 rps_reset_sock_flow(sock_flow_table, sk->sk_rxhash);
664 rcu_read_unlock();
665#endif
666}
667
668static inline void sock_rps_save_rxhash(struct sock *sk, u32 rxhash)
669{
670#ifdef CONFIG_RPS
671 if (unlikely(sk->sk_rxhash != rxhash)) {
672 sock_rps_reset_flow(sk);
673 sk->sk_rxhash = rxhash;
674 }
675#endif
676}
677
594#define sk_wait_event(__sk, __timeo, __condition) \ 678#define sk_wait_event(__sk, __timeo, __condition) \
595 ({ int __rc; \ 679 ({ int __rc; \
596 release_sock(__sk); \ 680 release_sock(__sk); \
@@ -668,6 +752,7 @@ struct proto {
668 /* Keeping track of sk's, looking them up, and port selection methods. */ 752 /* Keeping track of sk's, looking them up, and port selection methods. */
669 void (*hash)(struct sock *sk); 753 void (*hash)(struct sock *sk);
670 void (*unhash)(struct sock *sk); 754 void (*unhash)(struct sock *sk);
755 void (*rehash)(struct sock *sk);
671 int (*get_port)(struct sock *sk, unsigned short snum); 756 int (*get_port)(struct sock *sk, unsigned short snum);
672 757
673 /* Keeping track of sockets in use */ 758 /* Keeping track of sockets in use */
@@ -690,6 +775,7 @@ struct proto {
690 int *sysctl_wmem; 775 int *sysctl_wmem;
691 int *sysctl_rmem; 776 int *sysctl_rmem;
692 int max_header; 777 int max_header;
778 bool no_autobind;
693 779
694 struct kmem_cache *slab; 780 struct kmem_cache *slab;
695 unsigned int obj_size; 781 unsigned int obj_size;
@@ -945,6 +1031,24 @@ extern void release_sock(struct sock *sk);
945 SINGLE_DEPTH_NESTING) 1031 SINGLE_DEPTH_NESTING)
946#define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) 1032#define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock))
947 1033
1034extern bool lock_sock_fast(struct sock *sk);
1035/**
1036 * unlock_sock_fast - complement of lock_sock_fast
1037 * @sk: socket
1038 * @slow: slow mode
1039 *
1040 * fast unlock socket for user context.
1041 * If slow mode is on, we call regular release_sock()
1042 */
1043static inline void unlock_sock_fast(struct sock *sk, bool slow)
1044{
1045 if (slow)
1046 release_sock(sk);
1047 else
1048 spin_unlock_bh(&sk->sk_lock.slock);
1049}
1050
1051
948extern struct sock *sk_alloc(struct net *net, int family, 1052extern struct sock *sk_alloc(struct net *net, int family,
949 gfp_t priority, 1053 gfp_t priority,
950 struct proto *prot); 1054 struct proto *prot);
@@ -983,6 +1087,14 @@ extern void *sock_kmalloc(struct sock *sk, int size,
983extern void sock_kfree_s(struct sock *sk, void *mem, int size); 1087extern void sock_kfree_s(struct sock *sk, void *mem, int size);
984extern void sk_send_sigurg(struct sock *sk); 1088extern void sk_send_sigurg(struct sock *sk);
985 1089
1090#ifdef CONFIG_CGROUPS
1091extern void sock_update_classid(struct sock *sk);
1092#else
1093static inline void sock_update_classid(struct sock *sk)
1094{
1095}
1096#endif
1097
986/* 1098/*
987 * Functions to fill in entries in struct proto_ops when a protocol 1099 * Functions to fill in entries in struct proto_ops when a protocol
988 * does not implement a particular function. 1100 * does not implement a particular function.
@@ -1044,7 +1156,7 @@ extern void sk_common_release(struct sock *sk);
1044extern void sock_init_data(struct socket *sock, struct sock *sk); 1156extern void sock_init_data(struct socket *sock, struct sock *sk);
1045 1157
1046/** 1158/**
1047 * sk_filter_release: Release a socket filter 1159 * sk_filter_release - release a socket filter
1048 * @fp: filter to remove 1160 * @fp: filter to remove
1049 * 1161 *
1050 * Remove a filter from a socket and release its resources. 1162 * Remove a filter from a socket and release its resources.
@@ -1117,12 +1229,7 @@ static inline void sk_tx_queue_clear(struct sock *sk)
1117 1229
1118static inline int sk_tx_queue_get(const struct sock *sk) 1230static inline int sk_tx_queue_get(const struct sock *sk)
1119{ 1231{
1120 return sk->sk_tx_queue_mapping; 1232 return sk ? sk->sk_tx_queue_mapping : -1;
1121}
1122
1123static inline bool sk_tx_queue_recorded(const struct sock *sk)
1124{
1125 return (sk && sk->sk_tx_queue_mapping >= 0);
1126} 1233}
1127 1234
1128static inline void sk_set_socket(struct sock *sk, struct socket *sock) 1235static inline void sk_set_socket(struct sock *sk, struct socket *sock)
@@ -1131,6 +1238,10 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock)
1131 sk->sk_socket = sock; 1238 sk->sk_socket = sock;
1132} 1239}
1133 1240
1241static inline wait_queue_head_t *sk_sleep(struct sock *sk)
1242{
1243 return &sk->sk_wq->wait;
1244}
1134/* Detach socket from process context. 1245/* Detach socket from process context.
1135 * Announce socket dead, detach it from wait queue and inode. 1246 * Announce socket dead, detach it from wait queue and inode.
1136 * Note that parent inode held reference count on this struct sock, 1247 * Note that parent inode held reference count on this struct sock,
@@ -1143,14 +1254,14 @@ static inline void sock_orphan(struct sock *sk)
1143 write_lock_bh(&sk->sk_callback_lock); 1254 write_lock_bh(&sk->sk_callback_lock);
1144 sock_set_flag(sk, SOCK_DEAD); 1255 sock_set_flag(sk, SOCK_DEAD);
1145 sk_set_socket(sk, NULL); 1256 sk_set_socket(sk, NULL);
1146 sk->sk_sleep = NULL; 1257 sk->sk_wq = NULL;
1147 write_unlock_bh(&sk->sk_callback_lock); 1258 write_unlock_bh(&sk->sk_callback_lock);
1148} 1259}
1149 1260
1150static inline void sock_graft(struct sock *sk, struct socket *parent) 1261static inline void sock_graft(struct sock *sk, struct socket *parent)
1151{ 1262{
1152 write_lock_bh(&sk->sk_callback_lock); 1263 write_lock_bh(&sk->sk_callback_lock);
1153 sk->sk_sleep = &parent->wait; 1264 rcu_assign_pointer(sk->sk_wq, parent->wq);
1154 parent->sk = sk; 1265 parent->sk = sk;
1155 sk_set_socket(sk, parent); 1266 sk_set_socket(sk, parent);
1156 security_sock_graft(sk, parent); 1267 security_sock_graft(sk, parent);
@@ -1163,7 +1274,9 @@ extern unsigned long sock_i_ino(struct sock *sk);
1163static inline struct dst_entry * 1274static inline struct dst_entry *
1164__sk_dst_get(struct sock *sk) 1275__sk_dst_get(struct sock *sk)
1165{ 1276{
1166 return sk->sk_dst_cache; 1277 return rcu_dereference_check(sk->sk_dst_cache, rcu_read_lock_held() ||
1278 sock_owned_by_user(sk) ||
1279 lockdep_is_held(&sk->sk_lock.slock));
1167} 1280}
1168 1281
1169static inline struct dst_entry * 1282static inline struct dst_entry *
@@ -1171,50 +1284,65 @@ sk_dst_get(struct sock *sk)
1171{ 1284{
1172 struct dst_entry *dst; 1285 struct dst_entry *dst;
1173 1286
1174 read_lock(&sk->sk_dst_lock); 1287 rcu_read_lock();
1175 dst = sk->sk_dst_cache; 1288 dst = rcu_dereference(sk->sk_dst_cache);
1176 if (dst) 1289 if (dst)
1177 dst_hold(dst); 1290 dst_hold(dst);
1178 read_unlock(&sk->sk_dst_lock); 1291 rcu_read_unlock();
1179 return dst; 1292 return dst;
1180} 1293}
1181 1294
1295extern void sk_reset_txq(struct sock *sk);
1296
1297static inline void dst_negative_advice(struct sock *sk)
1298{
1299 struct dst_entry *ndst, *dst = __sk_dst_get(sk);
1300
1301 if (dst && dst->ops->negative_advice) {
1302 ndst = dst->ops->negative_advice(dst);
1303
1304 if (ndst != dst) {
1305 rcu_assign_pointer(sk->sk_dst_cache, ndst);
1306 sk_reset_txq(sk);
1307 }
1308 }
1309}
1310
1182static inline void 1311static inline void
1183__sk_dst_set(struct sock *sk, struct dst_entry *dst) 1312__sk_dst_set(struct sock *sk, struct dst_entry *dst)
1184{ 1313{
1185 struct dst_entry *old_dst; 1314 struct dst_entry *old_dst;
1186 1315
1187 sk_tx_queue_clear(sk); 1316 sk_tx_queue_clear(sk);
1188 old_dst = sk->sk_dst_cache; 1317 /*
1189 sk->sk_dst_cache = dst; 1318 * This can be called while sk is owned by the caller only,
1319 * with no state that can be checked in a rcu_dereference_check() cond
1320 */
1321 old_dst = rcu_dereference_raw(sk->sk_dst_cache);
1322 rcu_assign_pointer(sk->sk_dst_cache, dst);
1190 dst_release(old_dst); 1323 dst_release(old_dst);
1191} 1324}
1192 1325
1193static inline void 1326static inline void
1194sk_dst_set(struct sock *sk, struct dst_entry *dst) 1327sk_dst_set(struct sock *sk, struct dst_entry *dst)
1195{ 1328{
1196 write_lock(&sk->sk_dst_lock); 1329 spin_lock(&sk->sk_dst_lock);
1197 __sk_dst_set(sk, dst); 1330 __sk_dst_set(sk, dst);
1198 write_unlock(&sk->sk_dst_lock); 1331 spin_unlock(&sk->sk_dst_lock);
1199} 1332}
1200 1333
1201static inline void 1334static inline void
1202__sk_dst_reset(struct sock *sk) 1335__sk_dst_reset(struct sock *sk)
1203{ 1336{
1204 struct dst_entry *old_dst; 1337 __sk_dst_set(sk, NULL);
1205
1206 sk_tx_queue_clear(sk);
1207 old_dst = sk->sk_dst_cache;
1208 sk->sk_dst_cache = NULL;
1209 dst_release(old_dst);
1210} 1338}
1211 1339
1212static inline void 1340static inline void
1213sk_dst_reset(struct sock *sk) 1341sk_dst_reset(struct sock *sk)
1214{ 1342{
1215 write_lock(&sk->sk_dst_lock); 1343 spin_lock(&sk->sk_dst_lock);
1216 __sk_dst_reset(sk); 1344 __sk_dst_reset(sk);
1217 write_unlock(&sk->sk_dst_lock); 1345 spin_unlock(&sk->sk_dst_lock);
1218} 1346}
1219 1347
1220extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); 1348extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
@@ -1228,6 +1356,12 @@ static inline int sk_can_gso(const struct sock *sk)
1228 1356
1229extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst); 1357extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
1230 1358
1359static inline void sk_nocaps_add(struct sock *sk, int flags)
1360{
1361 sk->sk_route_nocaps |= flags;
1362 sk->sk_route_caps &= ~flags;
1363}
1364
1231static inline int skb_copy_to_page(struct sock *sk, char __user *from, 1365static inline int skb_copy_to_page(struct sock *sk, char __user *from,
1232 struct sk_buff *skb, struct page *page, 1366 struct sk_buff *skb, struct page *page,
1233 int off, int copy) 1367 int off, int copy)
@@ -1285,12 +1419,12 @@ static inline int sk_has_allocations(const struct sock *sk)
1285} 1419}
1286 1420
1287/** 1421/**
1288 * sk_has_sleeper - check if there are any waiting processes 1422 * wq_has_sleeper - check if there are any waiting processes
1289 * @sk: socket 1423 * @wq: struct socket_wq
1290 * 1424 *
1291 * Returns true if socket has waiting processes 1425 * Returns true if socket_wq has waiting processes
1292 * 1426 *
1293 * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory 1427 * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory
1294 * barrier call. They were added due to the race found within the tcp code. 1428 * barrier call. They were added due to the race found within the tcp code.
1295 * 1429 *
1296 * Consider following tcp code paths: 1430 * Consider following tcp code paths:
@@ -1303,9 +1437,10 @@ static inline int sk_has_allocations(const struct sock *sk)
1303 * ... ... 1437 * ... ...
1304 * tp->rcv_nxt check sock_def_readable 1438 * tp->rcv_nxt check sock_def_readable
1305 * ... { 1439 * ... {
1306 * schedule ... 1440 * schedule rcu_read_lock();
1307 * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1441 * wq = rcu_dereference(sk->sk_wq);
1308 * wake_up_interruptible(sk->sk_sleep) 1442 * if (wq && waitqueue_active(&wq->wait))
1443 * wake_up_interruptible(&wq->wait)
1309 * ... 1444 * ...
1310 * } 1445 * }
1311 * 1446 *
@@ -1314,19 +1449,18 @@ static inline int sk_has_allocations(const struct sock *sk)
1314 * could then endup calling schedule and sleep forever if there are no more 1449 * could then endup calling schedule and sleep forever if there are no more
1315 * data on the socket. 1450 * data on the socket.
1316 * 1451 *
1317 * The sk_has_sleeper is always called right after a call to read_lock, so we
1318 * can use smp_mb__after_lock barrier.
1319 */ 1452 */
1320static inline int sk_has_sleeper(struct sock *sk) 1453static inline bool wq_has_sleeper(struct socket_wq *wq)
1321{ 1454{
1455
1322 /* 1456 /*
1323 * We need to be sure we are in sync with the 1457 * We need to be sure we are in sync with the
1324 * add_wait_queue modifications to the wait queue. 1458 * add_wait_queue modifications to the wait queue.
1325 * 1459 *
1326 * This memory barrier is paired in the sock_poll_wait. 1460 * This memory barrier is paired in the sock_poll_wait.
1327 */ 1461 */
1328 smp_mb__after_lock(); 1462 smp_mb();
1329 return sk->sk_sleep && waitqueue_active(sk->sk_sleep); 1463 return wq && waitqueue_active(&wq->wait);
1330} 1464}
1331 1465
1332/** 1466/**
@@ -1335,7 +1469,7 @@ static inline int sk_has_sleeper(struct sock *sk)
1335 * @wait_address: socket wait queue 1469 * @wait_address: socket wait queue
1336 * @p: poll_table 1470 * @p: poll_table
1337 * 1471 *
1338 * See the comments in the sk_has_sleeper function. 1472 * See the comments in the wq_has_sleeper function.
1339 */ 1473 */
1340static inline void sock_poll_wait(struct file *filp, 1474static inline void sock_poll_wait(struct file *filp,
1341 wait_queue_head_t *wait_address, poll_table *p) 1475 wait_queue_head_t *wait_address, poll_table *p)
@@ -1346,7 +1480,7 @@ static inline void sock_poll_wait(struct file *filp,
1346 * We need to be sure we are in sync with the 1480 * We need to be sure we are in sync with the
1347 * socket flags modification. 1481 * socket flags modification.
1348 * 1482 *
1349 * This memory barrier is paired in the sk_has_sleeper. 1483 * This memory barrier is paired in the wq_has_sleeper.
1350 */ 1484 */
1351 smp_mb(); 1485 smp_mb();
1352 } 1486 }
@@ -1390,20 +1524,7 @@ extern void sk_stop_timer(struct sock *sk, struct timer_list* timer);
1390 1524
1391extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); 1525extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
1392 1526
1393static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) 1527extern int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
1394{
1395 /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
1396 number of warnings when compiling with -W --ANK
1397 */
1398 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
1399 (unsigned)sk->sk_rcvbuf)
1400 return -ENOMEM;
1401 skb_set_owner_r(skb, sk);
1402 skb_queue_tail(&sk->sk_error_queue, skb);
1403 if (!sock_flag(sk, SOCK_DEAD))
1404 sk->sk_data_ready(sk, skb->len);
1405 return 0;
1406}
1407 1528
1408/* 1529/*
1409 * Recover an error report and clear atomically 1530 * Recover an error report and clear atomically
@@ -1528,7 +1649,24 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
1528 sk->sk_stamp = kt; 1649 sk->sk_stamp = kt;
1529} 1650}
1530 1651
1531extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); 1652extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
1653 struct sk_buff *skb);
1654
1655static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
1656 struct sk_buff *skb)
1657{
1658#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \
1659 (1UL << SOCK_RCVTSTAMP) | \
1660 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
1661 (1UL << SOCK_TIMESTAMPING_SOFTWARE) | \
1662 (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE) | \
1663 (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE))
1664
1665 if (sk->sk_flags & FLAGS_TS_OR_DROPS)
1666 __sock_recv_ts_and_drops(msg, sk, skb);
1667 else
1668 sk->sk_stamp = skb->tstamp;
1669}
1532 1670
1533/** 1671/**
1534 * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped 1672 * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
@@ -1573,19 +1711,13 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_e
1573static inline 1711static inline
1574struct net *sock_net(const struct sock *sk) 1712struct net *sock_net(const struct sock *sk)
1575{ 1713{
1576#ifdef CONFIG_NET_NS 1714 return read_pnet(&sk->sk_net);
1577 return sk->sk_net;
1578#else
1579 return &init_net;
1580#endif
1581} 1715}
1582 1716
1583static inline 1717static inline
1584void sock_net_set(struct sock *sk, struct net *net) 1718void sock_net_set(struct sock *sk, struct net *net)
1585{ 1719{
1586#ifdef CONFIG_NET_NS 1720 write_pnet(&sk->sk_net, net);
1587 sk->sk_net = net;
1588#endif
1589} 1721}
1590 1722
1591/* 1723/*