aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h101
1 files changed, 88 insertions, 13 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 352f06bbd7a9..950409dcec3d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -54,6 +54,7 @@
54 54
55#include <linux/filter.h> 55#include <linux/filter.h>
56#include <linux/rculist_nulls.h> 56#include <linux/rculist_nulls.h>
57#include <linux/poll.h>
57 58
58#include <asm/atomic.h> 59#include <asm/atomic.h>
59#include <net/dst.h> 60#include <net/dst.h>
@@ -103,15 +104,15 @@ struct net;
103 104
104/** 105/**
105 * struct sock_common - minimal network layer representation of sockets 106 * struct sock_common - minimal network layer representation of sockets
107 * @skc_node: main hash linkage for various protocol lookup tables
108 * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol
109 * @skc_refcnt: reference count
110 * @skc_hash: hash value used with various protocol lookup tables
106 * @skc_family: network address family 111 * @skc_family: network address family
107 * @skc_state: Connection state 112 * @skc_state: Connection state
108 * @skc_reuse: %SO_REUSEADDR setting 113 * @skc_reuse: %SO_REUSEADDR setting
109 * @skc_bound_dev_if: bound device index if != 0 114 * @skc_bound_dev_if: bound device index if != 0
110 * @skc_node: main hash linkage for various protocol lookup tables
111 * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol
112 * @skc_bind_node: bind hash linkage for various protocol lookup tables 115 * @skc_bind_node: bind hash linkage for various protocol lookup tables
113 * @skc_refcnt: reference count
114 * @skc_hash: hash value used with various protocol lookup tables
115 * @skc_prot: protocol handlers inside a network family 116 * @skc_prot: protocol handlers inside a network family
116 * @skc_net: reference to the network namespace of this socket 117 * @skc_net: reference to the network namespace of this socket
117 * 118 *
@@ -119,17 +120,21 @@ struct net;
119 * for struct sock and struct inet_timewait_sock. 120 * for struct sock and struct inet_timewait_sock.
120 */ 121 */
121struct sock_common { 122struct sock_common {
122 unsigned short skc_family; 123 /*
123 volatile unsigned char skc_state; 124 * first fields are not copied in sock_copy()
124 unsigned char skc_reuse; 125 */
125 int skc_bound_dev_if;
126 union { 126 union {
127 struct hlist_node skc_node; 127 struct hlist_node skc_node;
128 struct hlist_nulls_node skc_nulls_node; 128 struct hlist_nulls_node skc_nulls_node;
129 }; 129 };
130 struct hlist_node skc_bind_node;
131 atomic_t skc_refcnt; 130 atomic_t skc_refcnt;
131
132 unsigned int skc_hash; 132 unsigned int skc_hash;
133 unsigned short skc_family;
134 volatile unsigned char skc_state;
135 unsigned char skc_reuse;
136 int skc_bound_dev_if;
137 struct hlist_node skc_bind_node;
133 struct proto *skc_prot; 138 struct proto *skc_prot;
134#ifdef CONFIG_NET_NS 139#ifdef CONFIG_NET_NS
135 struct net *skc_net; 140 struct net *skc_net;
@@ -207,15 +212,17 @@ struct sock {
207 * don't add nothing before this first member (__sk_common) --acme 212 * don't add nothing before this first member (__sk_common) --acme
208 */ 213 */
209 struct sock_common __sk_common; 214 struct sock_common __sk_common;
215#define sk_node __sk_common.skc_node
216#define sk_nulls_node __sk_common.skc_nulls_node
217#define sk_refcnt __sk_common.skc_refcnt
218
219#define sk_copy_start __sk_common.skc_hash
220#define sk_hash __sk_common.skc_hash
210#define sk_family __sk_common.skc_family 221#define sk_family __sk_common.skc_family
211#define sk_state __sk_common.skc_state 222#define sk_state __sk_common.skc_state
212#define sk_reuse __sk_common.skc_reuse 223#define sk_reuse __sk_common.skc_reuse
213#define sk_bound_dev_if __sk_common.skc_bound_dev_if 224#define sk_bound_dev_if __sk_common.skc_bound_dev_if
214#define sk_node __sk_common.skc_node
215#define sk_nulls_node __sk_common.skc_nulls_node
216#define sk_bind_node __sk_common.skc_bind_node 225#define sk_bind_node __sk_common.skc_bind_node
217#define sk_refcnt __sk_common.skc_refcnt
218#define sk_hash __sk_common.skc_hash
219#define sk_prot __sk_common.skc_prot 226#define sk_prot __sk_common.skc_prot
220#define sk_net __sk_common.skc_net 227#define sk_net __sk_common.skc_net
221 kmemcheck_bitfield_begin(flags); 228 kmemcheck_bitfield_begin(flags);
@@ -1241,6 +1248,74 @@ static inline int sk_has_allocations(const struct sock *sk)
1241 return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk); 1248 return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk);
1242} 1249}
1243 1250
1251/**
1252 * sk_has_sleeper - check if there are any waiting processes
1253 * @sk: socket
1254 *
1255 * Returns true if socket has waiting processes
1256 *
1257 * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory
1258 * barrier call. They were added due to the race found within the tcp code.
1259 *
1260 * Consider following tcp code paths:
1261 *
1262 * CPU1 CPU2
1263 *
1264 * sys_select receive packet
1265 * ... ...
1266 * __add_wait_queue update tp->rcv_nxt
1267 * ... ...
1268 * tp->rcv_nxt check sock_def_readable
1269 * ... {
1270 * schedule ...
1271 * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1272 * wake_up_interruptible(sk->sk_sleep)
1273 * ...
1274 * }
1275 *
1276 * The race for tcp fires when the __add_wait_queue changes done by CPU1 stay
1277 * in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1
1278 * could then endup calling schedule and sleep forever if there are no more
1279 * data on the socket.
1280 *
1281 * The sk_has_sleeper is always called right after a call to read_lock, so we
1282 * can use smp_mb__after_lock barrier.
1283 */
1284static inline int sk_has_sleeper(struct sock *sk)
1285{
1286 /*
1287 * We need to be sure we are in sync with the
1288 * add_wait_queue modifications to the wait queue.
1289 *
1290 * This memory barrier is paired in the sock_poll_wait.
1291 */
1292 smp_mb__after_lock();
1293 return sk->sk_sleep && waitqueue_active(sk->sk_sleep);
1294}
1295
1296/**
1297 * sock_poll_wait - place memory barrier behind the poll_wait call.
1298 * @filp: file
1299 * @wait_address: socket wait queue
1300 * @p: poll_table
1301 *
1302 * See the comments in the sk_has_sleeper function.
1303 */
1304static inline void sock_poll_wait(struct file *filp,
1305 wait_queue_head_t *wait_address, poll_table *p)
1306{
1307 if (p && wait_address) {
1308 poll_wait(filp, wait_address, p);
1309 /*
1310 * We need to be sure we are in sync with the
1311 * socket flags modification.
1312 *
1313 * This memory barrier is paired in the sk_has_sleeper.
1314 */
1315 smp_mb();
1316 }
1317}
1318
1244/* 1319/*
1245 * Queue a received datagram if it will fit. Stream and sequenced 1320 * Queue a received datagram if it will fit. Stream and sequenced
1246 * protocols can't normally use this as they need to fit buffers in 1321 * protocols can't normally use this as they need to fit buffers in