diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2009-07-15 19:13:10 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-07-16 21:05:26 -0400 |
commit | 4dc6dc7162c08b9965163c9ab3f9375d4adff2c7 (patch) | |
tree | f4b9959e9ca5e5b8bdb6301663a15e0f67d4c68c | |
parent | 303d67c288319768b19ed8dbed429fef7eb7c275 (diff) |
net: sock_copy() fixes
Commit e912b1142be8f1e2c71c71001dc992c6e5eb2ec1
(net: sk_prot_alloc() should not blindly overwrite memory)
took care of not zeroing whole new socket at allocation time.
sock_copy() is another spot where we should be very careful.
We should not set refcnt to a non null value, until
we are sure other fields are correctly setup, or
a lockless reader could catch this socket by mistake,
while not fully (re)initialized.
This patch puts sk_node & sk_refcnt to the very beginning
of struct sock to ease sock_copy() & sk_prot_alloc() job.
We add appropriate smp_wmb() before sk_refcnt initializations
to match our RCU requirements (changes to sock keys should
be committed to memory before sk_refcnt setting)
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/sock.h | 32 | ||||
-rw-r--r-- | net/core/sock.c | 20 |
2 files changed, 37 insertions, 15 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 2c0da9239b95..950409dcec3d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -104,15 +104,15 @@ struct net; | |||
104 | 104 | ||
105 | /** | 105 | /** |
106 | * struct sock_common - minimal network layer representation of sockets | 106 | * struct sock_common - minimal network layer representation of sockets |
107 | * @skc_node: main hash linkage for various protocol lookup tables | ||
108 | * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol | ||
109 | * @skc_refcnt: reference count | ||
110 | * @skc_hash: hash value used with various protocol lookup tables | ||
107 | * @skc_family: network address family | 111 | * @skc_family: network address family |
108 | * @skc_state: Connection state | 112 | * @skc_state: Connection state |
109 | * @skc_reuse: %SO_REUSEADDR setting | 113 | * @skc_reuse: %SO_REUSEADDR setting |
110 | * @skc_bound_dev_if: bound device index if != 0 | 114 | * @skc_bound_dev_if: bound device index if != 0 |
111 | * @skc_node: main hash linkage for various protocol lookup tables | ||
112 | * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol | ||
113 | * @skc_bind_node: bind hash linkage for various protocol lookup tables | 115 | * @skc_bind_node: bind hash linkage for various protocol lookup tables |
114 | * @skc_refcnt: reference count | ||
115 | * @skc_hash: hash value used with various protocol lookup tables | ||
116 | * @skc_prot: protocol handlers inside a network family | 116 | * @skc_prot: protocol handlers inside a network family |
117 | * @skc_net: reference to the network namespace of this socket | 117 | * @skc_net: reference to the network namespace of this socket |
118 | * | 118 | * |
@@ -120,17 +120,21 @@ struct net; | |||
120 | * for struct sock and struct inet_timewait_sock. | 120 | * for struct sock and struct inet_timewait_sock. |
121 | */ | 121 | */ |
122 | struct sock_common { | 122 | struct sock_common { |
123 | unsigned short skc_family; | 123 | /* |
124 | volatile unsigned char skc_state; | 124 | * first fields are not copied in sock_copy() |
125 | unsigned char skc_reuse; | 125 | */ |
126 | int skc_bound_dev_if; | ||
127 | union { | 126 | union { |
128 | struct hlist_node skc_node; | 127 | struct hlist_node skc_node; |
129 | struct hlist_nulls_node skc_nulls_node; | 128 | struct hlist_nulls_node skc_nulls_node; |
130 | }; | 129 | }; |
131 | struct hlist_node skc_bind_node; | ||
132 | atomic_t skc_refcnt; | 130 | atomic_t skc_refcnt; |
131 | |||
133 | unsigned int skc_hash; | 132 | unsigned int skc_hash; |
133 | unsigned short skc_family; | ||
134 | volatile unsigned char skc_state; | ||
135 | unsigned char skc_reuse; | ||
136 | int skc_bound_dev_if; | ||
137 | struct hlist_node skc_bind_node; | ||
134 | struct proto *skc_prot; | 138 | struct proto *skc_prot; |
135 | #ifdef CONFIG_NET_NS | 139 | #ifdef CONFIG_NET_NS |
136 | struct net *skc_net; | 140 | struct net *skc_net; |
@@ -208,15 +212,17 @@ struct sock { | |||
208 | * don't add nothing before this first member (__sk_common) --acme | 212 | * don't add nothing before this first member (__sk_common) --acme |
209 | */ | 213 | */ |
210 | struct sock_common __sk_common; | 214 | struct sock_common __sk_common; |
215 | #define sk_node __sk_common.skc_node | ||
216 | #define sk_nulls_node __sk_common.skc_nulls_node | ||
217 | #define sk_refcnt __sk_common.skc_refcnt | ||
218 | |||
219 | #define sk_copy_start __sk_common.skc_hash | ||
220 | #define sk_hash __sk_common.skc_hash | ||
211 | #define sk_family __sk_common.skc_family | 221 | #define sk_family __sk_common.skc_family |
212 | #define sk_state __sk_common.skc_state | 222 | #define sk_state __sk_common.skc_state |
213 | #define sk_reuse __sk_common.skc_reuse | 223 | #define sk_reuse __sk_common.skc_reuse |
214 | #define sk_bound_dev_if __sk_common.skc_bound_dev_if | 224 | #define sk_bound_dev_if __sk_common.skc_bound_dev_if |
215 | #define sk_node __sk_common.skc_node | ||
216 | #define sk_nulls_node __sk_common.skc_nulls_node | ||
217 | #define sk_bind_node __sk_common.skc_bind_node | 225 | #define sk_bind_node __sk_common.skc_bind_node |
218 | #define sk_refcnt __sk_common.skc_refcnt | ||
219 | #define sk_hash __sk_common.skc_hash | ||
220 | #define sk_prot __sk_common.skc_prot | 226 | #define sk_prot __sk_common.skc_prot |
221 | #define sk_net __sk_common.skc_net | 227 | #define sk_net __sk_common.skc_net |
222 | kmemcheck_bitfield_begin(flags); | 228 | kmemcheck_bitfield_begin(flags); |
diff --git a/net/core/sock.c b/net/core/sock.c index ba5d2116aea1..d9eec153d531 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -919,13 +919,19 @@ static inline void sock_lock_init(struct sock *sk) | |||
919 | af_family_keys + sk->sk_family); | 919 | af_family_keys + sk->sk_family); |
920 | } | 920 | } |
921 | 921 | ||
922 | /* | ||
923 | * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, | ||
924 | * even temporarly, because of RCU lookups. sk_node should also be left as is. | ||
925 | */ | ||
922 | static void sock_copy(struct sock *nsk, const struct sock *osk) | 926 | static void sock_copy(struct sock *nsk, const struct sock *osk) |
923 | { | 927 | { |
924 | #ifdef CONFIG_SECURITY_NETWORK | 928 | #ifdef CONFIG_SECURITY_NETWORK |
925 | void *sptr = nsk->sk_security; | 929 | void *sptr = nsk->sk_security; |
926 | #endif | 930 | #endif |
927 | 931 | BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != | |
928 | memcpy(nsk, osk, osk->sk_prot->obj_size); | 932 | sizeof(osk->sk_node) + sizeof(osk->sk_refcnt)); |
933 | memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, | ||
934 | osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); | ||
929 | #ifdef CONFIG_SECURITY_NETWORK | 935 | #ifdef CONFIG_SECURITY_NETWORK |
930 | nsk->sk_security = sptr; | 936 | nsk->sk_security = sptr; |
931 | security_sk_clone(osk, nsk); | 937 | security_sk_clone(osk, nsk); |
@@ -1140,6 +1146,11 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
1140 | 1146 | ||
1141 | newsk->sk_err = 0; | 1147 | newsk->sk_err = 0; |
1142 | newsk->sk_priority = 0; | 1148 | newsk->sk_priority = 0; |
1149 | /* | ||
1150 | * Before updating sk_refcnt, we must commit prior changes to memory | ||
1151 | * (Documentation/RCU/rculist_nulls.txt for details) | ||
1152 | */ | ||
1153 | smp_wmb(); | ||
1143 | atomic_set(&newsk->sk_refcnt, 2); | 1154 | atomic_set(&newsk->sk_refcnt, 2); |
1144 | 1155 | ||
1145 | /* | 1156 | /* |
@@ -1855,6 +1866,11 @@ void sock_init_data(struct socket *sock, struct sock *sk) | |||
1855 | 1866 | ||
1856 | sk->sk_stamp = ktime_set(-1L, 0); | 1867 | sk->sk_stamp = ktime_set(-1L, 0); |
1857 | 1868 | ||
1869 | /* | ||
1870 | * Before updating sk_refcnt, we must commit prior changes to memory | ||
1871 | * (Documentation/RCU/rculist_nulls.txt for details) | ||
1872 | */ | ||
1873 | smp_wmb(); | ||
1858 | atomic_set(&sk->sk_refcnt, 1); | 1874 | atomic_set(&sk->sk_refcnt, 1); |
1859 | atomic_set(&sk->sk_wmem_alloc, 1); | 1875 | atomic_set(&sk->sk_wmem_alloc, 1); |
1860 | atomic_set(&sk->sk_drops, 0); | 1876 | atomic_set(&sk->sk_drops, 0); |