aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2009-07-15 19:13:10 -0400
committerDavid S. Miller <davem@davemloft.net>2009-07-16 21:05:26 -0400
commit4dc6dc7162c08b9965163c9ab3f9375d4adff2c7 (patch)
treef4b9959e9ca5e5b8bdb6301663a15e0f67d4c68c
parent303d67c288319768b19ed8dbed429fef7eb7c275 (diff)
net: sock_copy() fixes
Commit e912b1142be8f1e2c71c71001dc992c6e5eb2ec1 (net: sk_prot_alloc() should not blindly overwrite memory) took care of not zeroing whole new socket at allocation time. sock_copy() is another spot where we should be very careful. We should not set refcnt to a non null value, until we are sure other fields are correctly setup, or a lockless reader could catch this socket by mistake, while not fully (re)initialized. This patch puts sk_node & sk_refcnt to the very beginning of struct sock to ease sock_copy() & sk_prot_alloc() job. We add appropriate smp_wmb() before sk_refcnt initializations to match our RCU requirements (changes to sock keys should be committed to memory before sk_refcnt setting) Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/sock.h32
-rw-r--r--net/core/sock.c20
2 files changed, 37 insertions, 15 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 2c0da9239b95..950409dcec3d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -104,15 +104,15 @@ struct net;
104 104
105/** 105/**
106 * struct sock_common - minimal network layer representation of sockets 106 * struct sock_common - minimal network layer representation of sockets
107 * @skc_node: main hash linkage for various protocol lookup tables
108 * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol
109 * @skc_refcnt: reference count
110 * @skc_hash: hash value used with various protocol lookup tables
107 * @skc_family: network address family 111 * @skc_family: network address family
108 * @skc_state: Connection state 112 * @skc_state: Connection state
109 * @skc_reuse: %SO_REUSEADDR setting 113 * @skc_reuse: %SO_REUSEADDR setting
110 * @skc_bound_dev_if: bound device index if != 0 114 * @skc_bound_dev_if: bound device index if != 0
111 * @skc_node: main hash linkage for various protocol lookup tables
112 * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol
113 * @skc_bind_node: bind hash linkage for various protocol lookup tables 115 * @skc_bind_node: bind hash linkage for various protocol lookup tables
114 * @skc_refcnt: reference count
115 * @skc_hash: hash value used with various protocol lookup tables
116 * @skc_prot: protocol handlers inside a network family 116 * @skc_prot: protocol handlers inside a network family
117 * @skc_net: reference to the network namespace of this socket 117 * @skc_net: reference to the network namespace of this socket
118 * 118 *
@@ -120,17 +120,21 @@ struct net;
120 * for struct sock and struct inet_timewait_sock. 120 * for struct sock and struct inet_timewait_sock.
121 */ 121 */
122struct sock_common { 122struct sock_common {
123 unsigned short skc_family; 123 /*
124 volatile unsigned char skc_state; 124 * first fields are not copied in sock_copy()
125 unsigned char skc_reuse; 125 */
126 int skc_bound_dev_if;
127 union { 126 union {
128 struct hlist_node skc_node; 127 struct hlist_node skc_node;
129 struct hlist_nulls_node skc_nulls_node; 128 struct hlist_nulls_node skc_nulls_node;
130 }; 129 };
131 struct hlist_node skc_bind_node;
132 atomic_t skc_refcnt; 130 atomic_t skc_refcnt;
131
133 unsigned int skc_hash; 132 unsigned int skc_hash;
133 unsigned short skc_family;
134 volatile unsigned char skc_state;
135 unsigned char skc_reuse;
136 int skc_bound_dev_if;
137 struct hlist_node skc_bind_node;
134 struct proto *skc_prot; 138 struct proto *skc_prot;
135#ifdef CONFIG_NET_NS 139#ifdef CONFIG_NET_NS
136 struct net *skc_net; 140 struct net *skc_net;
@@ -208,15 +212,17 @@ struct sock {
208 * don't add nothing before this first member (__sk_common) --acme 212 * don't add nothing before this first member (__sk_common) --acme
209 */ 213 */
210 struct sock_common __sk_common; 214 struct sock_common __sk_common;
215#define sk_node __sk_common.skc_node
216#define sk_nulls_node __sk_common.skc_nulls_node
217#define sk_refcnt __sk_common.skc_refcnt
218
219#define sk_copy_start __sk_common.skc_hash
220#define sk_hash __sk_common.skc_hash
211#define sk_family __sk_common.skc_family 221#define sk_family __sk_common.skc_family
212#define sk_state __sk_common.skc_state 222#define sk_state __sk_common.skc_state
213#define sk_reuse __sk_common.skc_reuse 223#define sk_reuse __sk_common.skc_reuse
214#define sk_bound_dev_if __sk_common.skc_bound_dev_if 224#define sk_bound_dev_if __sk_common.skc_bound_dev_if
215#define sk_node __sk_common.skc_node
216#define sk_nulls_node __sk_common.skc_nulls_node
217#define sk_bind_node __sk_common.skc_bind_node 225#define sk_bind_node __sk_common.skc_bind_node
218#define sk_refcnt __sk_common.skc_refcnt
219#define sk_hash __sk_common.skc_hash
220#define sk_prot __sk_common.skc_prot 226#define sk_prot __sk_common.skc_prot
221#define sk_net __sk_common.skc_net 227#define sk_net __sk_common.skc_net
222 kmemcheck_bitfield_begin(flags); 228 kmemcheck_bitfield_begin(flags);
diff --git a/net/core/sock.c b/net/core/sock.c
index ba5d2116aea1..d9eec153d531 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -919,13 +919,19 @@ static inline void sock_lock_init(struct sock *sk)
919 af_family_keys + sk->sk_family); 919 af_family_keys + sk->sk_family);
920} 920}
921 921
922/*
923 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
924 * even temporarly, because of RCU lookups. sk_node should also be left as is.
925 */
922static void sock_copy(struct sock *nsk, const struct sock *osk) 926static void sock_copy(struct sock *nsk, const struct sock *osk)
923{ 927{
924#ifdef CONFIG_SECURITY_NETWORK 928#ifdef CONFIG_SECURITY_NETWORK
925 void *sptr = nsk->sk_security; 929 void *sptr = nsk->sk_security;
926#endif 930#endif
927 931 BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) !=
928 memcpy(nsk, osk, osk->sk_prot->obj_size); 932 sizeof(osk->sk_node) + sizeof(osk->sk_refcnt));
933 memcpy(&nsk->sk_copy_start, &osk->sk_copy_start,
934 osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start));
929#ifdef CONFIG_SECURITY_NETWORK 935#ifdef CONFIG_SECURITY_NETWORK
930 nsk->sk_security = sptr; 936 nsk->sk_security = sptr;
931 security_sk_clone(osk, nsk); 937 security_sk_clone(osk, nsk);
@@ -1140,6 +1146,11 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1140 1146
1141 newsk->sk_err = 0; 1147 newsk->sk_err = 0;
1142 newsk->sk_priority = 0; 1148 newsk->sk_priority = 0;
1149 /*
1150 * Before updating sk_refcnt, we must commit prior changes to memory
1151 * (Documentation/RCU/rculist_nulls.txt for details)
1152 */
1153 smp_wmb();
1143 atomic_set(&newsk->sk_refcnt, 2); 1154 atomic_set(&newsk->sk_refcnt, 2);
1144 1155
1145 /* 1156 /*
@@ -1855,6 +1866,11 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1855 1866
1856 sk->sk_stamp = ktime_set(-1L, 0); 1867 sk->sk_stamp = ktime_set(-1L, 0);
1857 1868
1869 /*
1870 * Before updating sk_refcnt, we must commit prior changes to memory
1871 * (Documentation/RCU/rculist_nulls.txt for details)
1872 */
1873 smp_wmb();
1858 atomic_set(&sk->sk_refcnt, 1); 1874 atomic_set(&sk->sk_refcnt, 1);
1859 atomic_set(&sk->sk_wmem_alloc, 1); 1875 atomic_set(&sk->sk_wmem_alloc, 1);
1860 atomic_set(&sk->sk_drops, 0); 1876 atomic_set(&sk->sk_drops, 0);