diff options
Diffstat (limited to 'include/net/sock.h')
| -rw-r--r-- | include/net/sock.h | 101 |
1 files changed, 88 insertions, 13 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 352f06bbd7a9..950409dcec3d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
| @@ -54,6 +54,7 @@ | |||
| 54 | 54 | ||
| 55 | #include <linux/filter.h> | 55 | #include <linux/filter.h> |
| 56 | #include <linux/rculist_nulls.h> | 56 | #include <linux/rculist_nulls.h> |
| 57 | #include <linux/poll.h> | ||
| 57 | 58 | ||
| 58 | #include <asm/atomic.h> | 59 | #include <asm/atomic.h> |
| 59 | #include <net/dst.h> | 60 | #include <net/dst.h> |
| @@ -103,15 +104,15 @@ struct net; | |||
| 103 | 104 | ||
| 104 | /** | 105 | /** |
| 105 | * struct sock_common - minimal network layer representation of sockets | 106 | * struct sock_common - minimal network layer representation of sockets |
| 107 | * @skc_node: main hash linkage for various protocol lookup tables | ||
| 108 | * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol | ||
| 109 | * @skc_refcnt: reference count | ||
| 110 | * @skc_hash: hash value used with various protocol lookup tables | ||
| 106 | * @skc_family: network address family | 111 | * @skc_family: network address family |
| 107 | * @skc_state: Connection state | 112 | * @skc_state: Connection state |
| 108 | * @skc_reuse: %SO_REUSEADDR setting | 113 | * @skc_reuse: %SO_REUSEADDR setting |
| 109 | * @skc_bound_dev_if: bound device index if != 0 | 114 | * @skc_bound_dev_if: bound device index if != 0 |
| 110 | * @skc_node: main hash linkage for various protocol lookup tables | ||
| 111 | * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol | ||
| 112 | * @skc_bind_node: bind hash linkage for various protocol lookup tables | 115 | * @skc_bind_node: bind hash linkage for various protocol lookup tables |
| 113 | * @skc_refcnt: reference count | ||
| 114 | * @skc_hash: hash value used with various protocol lookup tables | ||
| 115 | * @skc_prot: protocol handlers inside a network family | 116 | * @skc_prot: protocol handlers inside a network family |
| 116 | * @skc_net: reference to the network namespace of this socket | 117 | * @skc_net: reference to the network namespace of this socket |
| 117 | * | 118 | * |
| @@ -119,17 +120,21 @@ struct net; | |||
| 119 | * for struct sock and struct inet_timewait_sock. | 120 | * for struct sock and struct inet_timewait_sock. |
| 120 | */ | 121 | */ |
| 121 | struct sock_common { | 122 | struct sock_common { |
| 122 | unsigned short skc_family; | 123 | /* |
| 123 | volatile unsigned char skc_state; | 124 | * first fields are not copied in sock_copy() |
| 124 | unsigned char skc_reuse; | 125 | */ |
| 125 | int skc_bound_dev_if; | ||
| 126 | union { | 126 | union { |
| 127 | struct hlist_node skc_node; | 127 | struct hlist_node skc_node; |
| 128 | struct hlist_nulls_node skc_nulls_node; | 128 | struct hlist_nulls_node skc_nulls_node; |
| 129 | }; | 129 | }; |
| 130 | struct hlist_node skc_bind_node; | ||
| 131 | atomic_t skc_refcnt; | 130 | atomic_t skc_refcnt; |
| 131 | |||
| 132 | unsigned int skc_hash; | 132 | unsigned int skc_hash; |
| 133 | unsigned short skc_family; | ||
| 134 | volatile unsigned char skc_state; | ||
| 135 | unsigned char skc_reuse; | ||
| 136 | int skc_bound_dev_if; | ||
| 137 | struct hlist_node skc_bind_node; | ||
| 133 | struct proto *skc_prot; | 138 | struct proto *skc_prot; |
| 134 | #ifdef CONFIG_NET_NS | 139 | #ifdef CONFIG_NET_NS |
| 135 | struct net *skc_net; | 140 | struct net *skc_net; |
| @@ -207,15 +212,17 @@ struct sock { | |||
| 207 | * don't add nothing before this first member (__sk_common) --acme | 212 | * don't add nothing before this first member (__sk_common) --acme |
| 208 | */ | 213 | */ |
| 209 | struct sock_common __sk_common; | 214 | struct sock_common __sk_common; |
| 215 | #define sk_node __sk_common.skc_node | ||
| 216 | #define sk_nulls_node __sk_common.skc_nulls_node | ||
| 217 | #define sk_refcnt __sk_common.skc_refcnt | ||
| 218 | |||
| 219 | #define sk_copy_start __sk_common.skc_hash | ||
| 220 | #define sk_hash __sk_common.skc_hash | ||
| 210 | #define sk_family __sk_common.skc_family | 221 | #define sk_family __sk_common.skc_family |
| 211 | #define sk_state __sk_common.skc_state | 222 | #define sk_state __sk_common.skc_state |
| 212 | #define sk_reuse __sk_common.skc_reuse | 223 | #define sk_reuse __sk_common.skc_reuse |
| 213 | #define sk_bound_dev_if __sk_common.skc_bound_dev_if | 224 | #define sk_bound_dev_if __sk_common.skc_bound_dev_if |
| 214 | #define sk_node __sk_common.skc_node | ||
| 215 | #define sk_nulls_node __sk_common.skc_nulls_node | ||
| 216 | #define sk_bind_node __sk_common.skc_bind_node | 225 | #define sk_bind_node __sk_common.skc_bind_node |
| 217 | #define sk_refcnt __sk_common.skc_refcnt | ||
| 218 | #define sk_hash __sk_common.skc_hash | ||
| 219 | #define sk_prot __sk_common.skc_prot | 226 | #define sk_prot __sk_common.skc_prot |
| 220 | #define sk_net __sk_common.skc_net | 227 | #define sk_net __sk_common.skc_net |
| 221 | kmemcheck_bitfield_begin(flags); | 228 | kmemcheck_bitfield_begin(flags); |
| @@ -1241,6 +1248,74 @@ static inline int sk_has_allocations(const struct sock *sk) | |||
| 1241 | return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk); | 1248 | return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk); |
| 1242 | } | 1249 | } |
| 1243 | 1250 | ||
| 1251 | /** | ||
| 1252 | * sk_has_sleeper - check if there are any waiting processes | ||
| 1253 | * @sk: socket | ||
| 1254 | * | ||
| 1255 | * Returns true if socket has waiting processes | ||
| 1256 | * | ||
| 1257 | * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory | ||
| 1258 | * barrier call. They were added due to the race found within the tcp code. | ||
| 1259 | * | ||
| 1260 | * Consider following tcp code paths: | ||
| 1261 | * | ||
| 1262 | * CPU1 CPU2 | ||
| 1263 | * | ||
| 1264 | * sys_select receive packet | ||
| 1265 | * ... ... | ||
| 1266 | * __add_wait_queue update tp->rcv_nxt | ||
| 1267 | * ... ... | ||
| 1268 | * tp->rcv_nxt check sock_def_readable | ||
| 1269 | * ... { | ||
| 1270 | * schedule ... | ||
| 1271 | * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | ||
| 1272 | * wake_up_interruptible(sk->sk_sleep) | ||
| 1273 | * ... | ||
| 1274 | * } | ||
| 1275 | * | ||
| 1276 | * The race for tcp fires when the __add_wait_queue changes done by CPU1 stay | ||
| 1277 | * in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1 | ||
| 1278 | * could then endup calling schedule and sleep forever if there are no more | ||
| 1279 | * data on the socket. | ||
| 1280 | * | ||
| 1281 | * The sk_has_sleeper is always called right after a call to read_lock, so we | ||
| 1282 | * can use smp_mb__after_lock barrier. | ||
| 1283 | */ | ||
| 1284 | static inline int sk_has_sleeper(struct sock *sk) | ||
| 1285 | { | ||
| 1286 | /* | ||
| 1287 | * We need to be sure we are in sync with the | ||
| 1288 | * add_wait_queue modifications to the wait queue. | ||
| 1289 | * | ||
| 1290 | * This memory barrier is paired in the sock_poll_wait. | ||
| 1291 | */ | ||
| 1292 | smp_mb__after_lock(); | ||
| 1293 | return sk->sk_sleep && waitqueue_active(sk->sk_sleep); | ||
| 1294 | } | ||
| 1295 | |||
| 1296 | /** | ||
| 1297 | * sock_poll_wait - place memory barrier behind the poll_wait call. | ||
| 1298 | * @filp: file | ||
| 1299 | * @wait_address: socket wait queue | ||
| 1300 | * @p: poll_table | ||
| 1301 | * | ||
| 1302 | * See the comments in the sk_has_sleeper function. | ||
| 1303 | */ | ||
| 1304 | static inline void sock_poll_wait(struct file *filp, | ||
| 1305 | wait_queue_head_t *wait_address, poll_table *p) | ||
| 1306 | { | ||
| 1307 | if (p && wait_address) { | ||
| 1308 | poll_wait(filp, wait_address, p); | ||
| 1309 | /* | ||
| 1310 | * We need to be sure we are in sync with the | ||
| 1311 | * socket flags modification. | ||
| 1312 | * | ||
| 1313 | * This memory barrier is paired in the sk_has_sleeper. | ||
| 1314 | */ | ||
| 1315 | smp_mb(); | ||
| 1316 | } | ||
| 1317 | } | ||
| 1318 | |||
| 1244 | /* | 1319 | /* |
| 1245 | * Queue a received datagram if it will fit. Stream and sequenced | 1320 | * Queue a received datagram if it will fit. Stream and sequenced |
| 1246 | * protocols can't normally use this as they need to fit buffers in | 1321 | * protocols can't normally use this as they need to fit buffers in |
