diff options
Diffstat (limited to 'include/net/sock.h')
| -rw-r--r-- | include/net/sock.h | 144 |
1 files changed, 130 insertions, 14 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 4bb1ff9fd15b..950409dcec3d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
| @@ -54,6 +54,7 @@ | |||
| 54 | 54 | ||
| 55 | #include <linux/filter.h> | 55 | #include <linux/filter.h> |
| 56 | #include <linux/rculist_nulls.h> | 56 | #include <linux/rculist_nulls.h> |
| 57 | #include <linux/poll.h> | ||
| 57 | 58 | ||
| 58 | #include <asm/atomic.h> | 59 | #include <asm/atomic.h> |
| 59 | #include <net/dst.h> | 60 | #include <net/dst.h> |
| @@ -103,15 +104,15 @@ struct net; | |||
| 103 | 104 | ||
| 104 | /** | 105 | /** |
| 105 | * struct sock_common - minimal network layer representation of sockets | 106 | * struct sock_common - minimal network layer representation of sockets |
| 107 | * @skc_node: main hash linkage for various protocol lookup tables | ||
| 108 | * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol | ||
| 109 | * @skc_refcnt: reference count | ||
| 110 | * @skc_hash: hash value used with various protocol lookup tables | ||
| 106 | * @skc_family: network address family | 111 | * @skc_family: network address family |
| 107 | * @skc_state: Connection state | 112 | * @skc_state: Connection state |
| 108 | * @skc_reuse: %SO_REUSEADDR setting | 113 | * @skc_reuse: %SO_REUSEADDR setting |
| 109 | * @skc_bound_dev_if: bound device index if != 0 | 114 | * @skc_bound_dev_if: bound device index if != 0 |
| 110 | * @skc_node: main hash linkage for various protocol lookup tables | ||
| 111 | * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol | ||
| 112 | * @skc_bind_node: bind hash linkage for various protocol lookup tables | 115 | * @skc_bind_node: bind hash linkage for various protocol lookup tables |
| 113 | * @skc_refcnt: reference count | ||
| 114 | * @skc_hash: hash value used with various protocol lookup tables | ||
| 115 | * @skc_prot: protocol handlers inside a network family | 116 | * @skc_prot: protocol handlers inside a network family |
| 116 | * @skc_net: reference to the network namespace of this socket | 117 | * @skc_net: reference to the network namespace of this socket |
| 117 | * | 118 | * |
| @@ -119,17 +120,21 @@ struct net; | |||
| 119 | * for struct sock and struct inet_timewait_sock. | 120 | * for struct sock and struct inet_timewait_sock. |
| 120 | */ | 121 | */ |
| 121 | struct sock_common { | 122 | struct sock_common { |
| 122 | unsigned short skc_family; | 123 | /* |
| 123 | volatile unsigned char skc_state; | 124 | * first fields are not copied in sock_copy() |
| 124 | unsigned char skc_reuse; | 125 | */ |
| 125 | int skc_bound_dev_if; | ||
| 126 | union { | 126 | union { |
| 127 | struct hlist_node skc_node; | 127 | struct hlist_node skc_node; |
| 128 | struct hlist_nulls_node skc_nulls_node; | 128 | struct hlist_nulls_node skc_nulls_node; |
| 129 | }; | 129 | }; |
| 130 | struct hlist_node skc_bind_node; | ||
| 131 | atomic_t skc_refcnt; | 130 | atomic_t skc_refcnt; |
| 131 | |||
| 132 | unsigned int skc_hash; | 132 | unsigned int skc_hash; |
| 133 | unsigned short skc_family; | ||
| 134 | volatile unsigned char skc_state; | ||
| 135 | unsigned char skc_reuse; | ||
| 136 | int skc_bound_dev_if; | ||
| 137 | struct hlist_node skc_bind_node; | ||
| 133 | struct proto *skc_prot; | 138 | struct proto *skc_prot; |
| 134 | #ifdef CONFIG_NET_NS | 139 | #ifdef CONFIG_NET_NS |
| 135 | struct net *skc_net; | 140 | struct net *skc_net; |
| @@ -207,20 +212,24 @@ struct sock { | |||
| 207 | * don't add nothing before this first member (__sk_common) --acme | 212 | * don't add nothing before this first member (__sk_common) --acme |
| 208 | */ | 213 | */ |
| 209 | struct sock_common __sk_common; | 214 | struct sock_common __sk_common; |
| 215 | #define sk_node __sk_common.skc_node | ||
| 216 | #define sk_nulls_node __sk_common.skc_nulls_node | ||
| 217 | #define sk_refcnt __sk_common.skc_refcnt | ||
| 218 | |||
| 219 | #define sk_copy_start __sk_common.skc_hash | ||
| 220 | #define sk_hash __sk_common.skc_hash | ||
| 210 | #define sk_family __sk_common.skc_family | 221 | #define sk_family __sk_common.skc_family |
| 211 | #define sk_state __sk_common.skc_state | 222 | #define sk_state __sk_common.skc_state |
| 212 | #define sk_reuse __sk_common.skc_reuse | 223 | #define sk_reuse __sk_common.skc_reuse |
| 213 | #define sk_bound_dev_if __sk_common.skc_bound_dev_if | 224 | #define sk_bound_dev_if __sk_common.skc_bound_dev_if |
| 214 | #define sk_node __sk_common.skc_node | ||
| 215 | #define sk_nulls_node __sk_common.skc_nulls_node | ||
| 216 | #define sk_bind_node __sk_common.skc_bind_node | 225 | #define sk_bind_node __sk_common.skc_bind_node |
| 217 | #define sk_refcnt __sk_common.skc_refcnt | ||
| 218 | #define sk_hash __sk_common.skc_hash | ||
| 219 | #define sk_prot __sk_common.skc_prot | 226 | #define sk_prot __sk_common.skc_prot |
| 220 | #define sk_net __sk_common.skc_net | 227 | #define sk_net __sk_common.skc_net |
| 228 | kmemcheck_bitfield_begin(flags); | ||
| 221 | unsigned char sk_shutdown : 2, | 229 | unsigned char sk_shutdown : 2, |
| 222 | sk_no_check : 2, | 230 | sk_no_check : 2, |
| 223 | sk_userlocks : 4; | 231 | sk_userlocks : 4; |
| 232 | kmemcheck_bitfield_end(flags); | ||
| 224 | unsigned char sk_protocol; | 233 | unsigned char sk_protocol; |
| 225 | unsigned short sk_type; | 234 | unsigned short sk_type; |
| 226 | int sk_rcvbuf; | 235 | int sk_rcvbuf; |
| @@ -1206,6 +1215,107 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from, | |||
| 1206 | return 0; | 1215 | return 0; |
| 1207 | } | 1216 | } |
| 1208 | 1217 | ||
| 1218 | /** | ||
| 1219 | * sk_wmem_alloc_get - returns write allocations | ||
| 1220 | * @sk: socket | ||
| 1221 | * | ||
| 1222 | * Returns sk_wmem_alloc minus initial offset of one | ||
| 1223 | */ | ||
| 1224 | static inline int sk_wmem_alloc_get(const struct sock *sk) | ||
| 1225 | { | ||
| 1226 | return atomic_read(&sk->sk_wmem_alloc) - 1; | ||
| 1227 | } | ||
| 1228 | |||
| 1229 | /** | ||
| 1230 | * sk_rmem_alloc_get - returns read allocations | ||
| 1231 | * @sk: socket | ||
| 1232 | * | ||
| 1233 | * Returns sk_rmem_alloc | ||
| 1234 | */ | ||
| 1235 | static inline int sk_rmem_alloc_get(const struct sock *sk) | ||
| 1236 | { | ||
| 1237 | return atomic_read(&sk->sk_rmem_alloc); | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | /** | ||
| 1241 | * sk_has_allocations - check if allocations are outstanding | ||
| 1242 | * @sk: socket | ||
| 1243 | * | ||
| 1244 | * Returns true if socket has write or read allocations | ||
| 1245 | */ | ||
| 1246 | static inline int sk_has_allocations(const struct sock *sk) | ||
| 1247 | { | ||
| 1248 | return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk); | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | /** | ||
| 1252 | * sk_has_sleeper - check if there are any waiting processes | ||
| 1253 | * @sk: socket | ||
| 1254 | * | ||
| 1255 | * Returns true if socket has waiting processes | ||
| 1256 | * | ||
| 1257 | * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory | ||
| 1258 | * barrier call. They were added due to the race found within the tcp code. | ||
| 1259 | * | ||
| 1260 | * Consider following tcp code paths: | ||
| 1261 | * | ||
| 1262 | * CPU1 CPU2 | ||
| 1263 | * | ||
| 1264 | * sys_select receive packet | ||
| 1265 | * ... ... | ||
| 1266 | * __add_wait_queue update tp->rcv_nxt | ||
| 1267 | * ... ... | ||
| 1268 | * tp->rcv_nxt check sock_def_readable | ||
| 1269 | * ... { | ||
| 1270 | * schedule ... | ||
| 1271 | * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | ||
| 1272 | * wake_up_interruptible(sk->sk_sleep) | ||
| 1273 | * ... | ||
| 1274 | * } | ||
| 1275 | * | ||
| 1276 | * The race for tcp fires when the __add_wait_queue changes done by CPU1 stay | ||
| 1277 | * in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1 | ||
| 1278 | * could then endup calling schedule and sleep forever if there are no more | ||
| 1279 | * data on the socket. | ||
| 1280 | * | ||
| 1281 | * The sk_has_sleeper is always called right after a call to read_lock, so we | ||
| 1282 | * can use smp_mb__after_lock barrier. | ||
| 1283 | */ | ||
| 1284 | static inline int sk_has_sleeper(struct sock *sk) | ||
| 1285 | { | ||
| 1286 | /* | ||
| 1287 | * We need to be sure we are in sync with the | ||
| 1288 | * add_wait_queue modifications to the wait queue. | ||
| 1289 | * | ||
| 1290 | * This memory barrier is paired in the sock_poll_wait. | ||
| 1291 | */ | ||
| 1292 | smp_mb__after_lock(); | ||
| 1293 | return sk->sk_sleep && waitqueue_active(sk->sk_sleep); | ||
| 1294 | } | ||
| 1295 | |||
| 1296 | /** | ||
| 1297 | * sock_poll_wait - place memory barrier behind the poll_wait call. | ||
| 1298 | * @filp: file | ||
| 1299 | * @wait_address: socket wait queue | ||
| 1300 | * @p: poll_table | ||
| 1301 | * | ||
| 1302 | * See the comments in the sk_has_sleeper function. | ||
| 1303 | */ | ||
| 1304 | static inline void sock_poll_wait(struct file *filp, | ||
| 1305 | wait_queue_head_t *wait_address, poll_table *p) | ||
| 1306 | { | ||
| 1307 | if (p && wait_address) { | ||
| 1308 | poll_wait(filp, wait_address, p); | ||
| 1309 | /* | ||
| 1310 | * We need to be sure we are in sync with the | ||
| 1311 | * socket flags modification. | ||
| 1312 | * | ||
| 1313 | * This memory barrier is paired in the sk_has_sleeper. | ||
| 1314 | */ | ||
| 1315 | smp_mb(); | ||
| 1316 | } | ||
| 1317 | } | ||
| 1318 | |||
| 1209 | /* | 1319 | /* |
| 1210 | * Queue a received datagram if it will fit. Stream and sequenced | 1320 | * Queue a received datagram if it will fit. Stream and sequenced |
| 1211 | * protocols can't normally use this as they need to fit buffers in | 1321 | * protocols can't normally use this as they need to fit buffers in |
| @@ -1217,14 +1327,20 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from, | |||
| 1217 | 1327 | ||
| 1218 | static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) | 1328 | static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) |
| 1219 | { | 1329 | { |
| 1220 | sock_hold(sk); | 1330 | skb_orphan(skb); |
| 1221 | skb->sk = sk; | 1331 | skb->sk = sk; |
| 1222 | skb->destructor = sock_wfree; | 1332 | skb->destructor = sock_wfree; |
| 1333 | /* | ||
| 1334 | * We used to take a refcount on sk, but following operation | ||
| 1335 | * is enough to guarantee sk_free() wont free this sock until | ||
| 1336 | * all in-flight packets are completed | ||
| 1337 | */ | ||
| 1223 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); | 1338 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); |
| 1224 | } | 1339 | } |
| 1225 | 1340 | ||
| 1226 | static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) | 1341 | static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) |
| 1227 | { | 1342 | { |
| 1343 | skb_orphan(skb); | ||
| 1228 | skb->sk = sk; | 1344 | skb->sk = sk; |
| 1229 | skb->destructor = sock_rfree; | 1345 | skb->destructor = sock_rfree; |
| 1230 | atomic_add(skb->truesize, &sk->sk_rmem_alloc); | 1346 | atomic_add(skb->truesize, &sk->sk_rmem_alloc); |
