diff options
-rw-r--r-- | include/net/sock.h | 37 | ||||
-rw-r--r-- | net/core/sock.c | 3 | ||||
-rw-r--r-- | net/ipv4/udp.c | 35 | ||||
-rw-r--r-- | net/ipv4/udplite.c | 1 | ||||
-rw-r--r-- | net/ipv6/udp.c | 31 | ||||
-rw-r--r-- | net/ipv6/udplite.c | 1 |
6 files changed, 90 insertions, 18 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index d200dfbe1ef6..0bea25db5471 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -363,6 +363,27 @@ static __inline__ int sk_del_node_init(struct sock *sk) | |||
363 | return rc; | 363 | return rc; |
364 | } | 364 | } |
365 | 365 | ||
366 | static __inline__ int __sk_del_node_init_rcu(struct sock *sk) | ||
367 | { | ||
368 | if (sk_hashed(sk)) { | ||
369 | hlist_del_init_rcu(&sk->sk_node); | ||
370 | return 1; | ||
371 | } | ||
372 | return 0; | ||
373 | } | ||
374 | |||
375 | static __inline__ int sk_del_node_init_rcu(struct sock *sk) | ||
376 | { | ||
377 | int rc = __sk_del_node_init_rcu(sk); | ||
378 | |||
379 | if (rc) { | ||
380 | /* paranoid for a while -acme */ | ||
381 | WARN_ON(atomic_read(&sk->sk_refcnt) == 1); | ||
382 | __sock_put(sk); | ||
383 | } | ||
384 | return rc; | ||
385 | } | ||
386 | |||
366 | static __inline__ void __sk_add_node(struct sock *sk, struct hlist_head *list) | 387 | static __inline__ void __sk_add_node(struct sock *sk, struct hlist_head *list) |
367 | { | 388 | { |
368 | hlist_add_head(&sk->sk_node, list); | 389 | hlist_add_head(&sk->sk_node, list); |
@@ -374,6 +395,17 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) | |||
374 | __sk_add_node(sk, list); | 395 | __sk_add_node(sk, list); |
375 | } | 396 | } |
376 | 397 | ||
398 | static __inline__ void __sk_add_node_rcu(struct sock *sk, struct hlist_head *list) | ||
399 | { | ||
400 | hlist_add_head_rcu(&sk->sk_node, list); | ||
401 | } | ||
402 | |||
403 | static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) | ||
404 | { | ||
405 | sock_hold(sk); | ||
406 | __sk_add_node_rcu(sk, list); | ||
407 | } | ||
408 | |||
377 | static __inline__ void __sk_del_bind_node(struct sock *sk) | 409 | static __inline__ void __sk_del_bind_node(struct sock *sk) |
378 | { | 410 | { |
379 | __hlist_del(&sk->sk_bind_node); | 411 | __hlist_del(&sk->sk_bind_node); |
@@ -387,6 +419,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk, | |||
387 | 419 | ||
388 | #define sk_for_each(__sk, node, list) \ | 420 | #define sk_for_each(__sk, node, list) \ |
389 | hlist_for_each_entry(__sk, node, list, sk_node) | 421 | hlist_for_each_entry(__sk, node, list, sk_node) |
422 | #define sk_for_each_rcu(__sk, node, list) \ | ||
423 | hlist_for_each_entry_rcu(__sk, node, list, sk_node) | ||
390 | #define sk_for_each_from(__sk, node) \ | 424 | #define sk_for_each_from(__sk, node) \ |
391 | if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ | 425 | if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ |
392 | hlist_for_each_entry_from(__sk, node, sk_node) | 426 | hlist_for_each_entry_from(__sk, node, sk_node) |
@@ -589,8 +623,9 @@ struct proto { | |||
589 | int *sysctl_rmem; | 623 | int *sysctl_rmem; |
590 | int max_header; | 624 | int max_header; |
591 | 625 | ||
592 | struct kmem_cache *slab; | 626 | struct kmem_cache *slab; |
593 | unsigned int obj_size; | 627 | unsigned int obj_size; |
628 | int slab_flags; | ||
594 | 629 | ||
595 | atomic_t *orphan_count; | 630 | atomic_t *orphan_count; |
596 | 631 | ||
diff --git a/net/core/sock.c b/net/core/sock.c index 5e2a3132a8c9..ded1eb5d2fd4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -2042,7 +2042,8 @@ int proto_register(struct proto *prot, int alloc_slab) | |||
2042 | 2042 | ||
2043 | if (alloc_slab) { | 2043 | if (alloc_slab) { |
2044 | prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, | 2044 | prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, |
2045 | SLAB_HWCACHE_ALIGN, NULL); | 2045 | SLAB_HWCACHE_ALIGN | prot->slab_flags, |
2046 | NULL); | ||
2046 | 2047 | ||
2047 | if (prot->slab == NULL) { | 2048 | if (prot->slab == NULL) { |
2048 | printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", | 2049 | printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2a6c491f97d7..0ea974bf7962 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -187,7 +187,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
187 | inet_sk(sk)->num = snum; | 187 | inet_sk(sk)->num = snum; |
188 | sk->sk_hash = snum; | 188 | sk->sk_hash = snum; |
189 | if (sk_unhashed(sk)) { | 189 | if (sk_unhashed(sk)) { |
190 | sk_add_node(sk, &hslot->head); | 190 | sk_add_node_rcu(sk, &hslot->head); |
191 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 191 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
192 | } | 192 | } |
193 | error = 0; | 193 | error = 0; |
@@ -253,15 +253,24 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | |||
253 | __be16 sport, __be32 daddr, __be16 dport, | 253 | __be16 sport, __be32 daddr, __be16 dport, |
254 | int dif, struct udp_table *udptable) | 254 | int dif, struct udp_table *udptable) |
255 | { | 255 | { |
256 | struct sock *sk, *result = NULL; | 256 | struct sock *sk, *result; |
257 | struct hlist_node *node; | 257 | struct hlist_node *node; |
258 | unsigned short hnum = ntohs(dport); | 258 | unsigned short hnum = ntohs(dport); |
259 | unsigned int hash = udp_hashfn(net, hnum); | 259 | unsigned int hash = udp_hashfn(net, hnum); |
260 | struct udp_hslot *hslot = &udptable->hash[hash]; | 260 | struct udp_hslot *hslot = &udptable->hash[hash]; |
261 | int score, badness = -1; | 261 | int score, badness; |
262 | 262 | ||
263 | spin_lock(&hslot->lock); | 263 | rcu_read_lock(); |
264 | sk_for_each(sk, node, &hslot->head) { | 264 | begin: |
265 | result = NULL; | ||
266 | badness = -1; | ||
267 | sk_for_each_rcu(sk, node, &hslot->head) { | ||
268 | /* | ||
269 | * lockless reader, and SLAB_DESTROY_BY_RCU items: | ||
270 | * We must check this item was not moved to another chain | ||
271 | */ | ||
272 | if (udp_hashfn(net, sk->sk_hash) != hash) | ||
273 | goto begin; | ||
265 | score = compute_score(sk, net, saddr, hnum, sport, | 274 | score = compute_score(sk, net, saddr, hnum, sport, |
266 | daddr, dport, dif); | 275 | daddr, dport, dif); |
267 | if (score > badness) { | 276 | if (score > badness) { |
@@ -269,9 +278,16 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | |||
269 | badness = score; | 278 | badness = score; |
270 | } | 279 | } |
271 | } | 280 | } |
272 | if (result) | 281 | if (result) { |
273 | sock_hold(result); | 282 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) |
274 | spin_unlock(&hslot->lock); | 283 | result = NULL; |
284 | else if (unlikely(compute_score(result, net, saddr, hnum, sport, | ||
285 | daddr, dport, dif) < badness)) { | ||
286 | sock_put(result); | ||
287 | goto begin; | ||
288 | } | ||
289 | } | ||
290 | rcu_read_unlock(); | ||
275 | return result; | 291 | return result; |
276 | } | 292 | } |
277 | 293 | ||
@@ -953,7 +969,7 @@ void udp_lib_unhash(struct sock *sk) | |||
953 | struct udp_hslot *hslot = &udptable->hash[hash]; | 969 | struct udp_hslot *hslot = &udptable->hash[hash]; |
954 | 970 | ||
955 | spin_lock(&hslot->lock); | 971 | spin_lock(&hslot->lock); |
956 | if (sk_del_node_init(sk)) { | 972 | if (sk_del_node_init_rcu(sk)) { |
957 | inet_sk(sk)->num = 0; | 973 | inet_sk(sk)->num = 0; |
958 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 974 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
959 | } | 975 | } |
@@ -1517,6 +1533,7 @@ struct proto udp_prot = { | |||
1517 | .sysctl_wmem = &sysctl_udp_wmem_min, | 1533 | .sysctl_wmem = &sysctl_udp_wmem_min, |
1518 | .sysctl_rmem = &sysctl_udp_rmem_min, | 1534 | .sysctl_rmem = &sysctl_udp_rmem_min, |
1519 | .obj_size = sizeof(struct udp_sock), | 1535 | .obj_size = sizeof(struct udp_sock), |
1536 | .slab_flags = SLAB_DESTROY_BY_RCU, | ||
1520 | .h.udp_table = &udp_table, | 1537 | .h.udp_table = &udp_table, |
1521 | #ifdef CONFIG_COMPAT | 1538 | #ifdef CONFIG_COMPAT |
1522 | .compat_setsockopt = compat_udp_setsockopt, | 1539 | .compat_setsockopt = compat_udp_setsockopt, |
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index d8ea8e5f5ea3..c784891cb7e5 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c | |||
@@ -51,6 +51,7 @@ struct proto udplite_prot = { | |||
51 | .unhash = udp_lib_unhash, | 51 | .unhash = udp_lib_unhash, |
52 | .get_port = udp_v4_get_port, | 52 | .get_port = udp_v4_get_port, |
53 | .obj_size = sizeof(struct udp_sock), | 53 | .obj_size = sizeof(struct udp_sock), |
54 | .slab_flags = SLAB_DESTROY_BY_RCU, | ||
54 | .h.udp_table = &udplite_table, | 55 | .h.udp_table = &udplite_table, |
55 | #ifdef CONFIG_COMPAT | 56 | #ifdef CONFIG_COMPAT |
56 | .compat_setsockopt = compat_udp_setsockopt, | 57 | .compat_setsockopt = compat_udp_setsockopt, |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ccee7244ca0f..1d9790e43dfc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -97,24 +97,40 @@ static struct sock *__udp6_lib_lookup(struct net *net, | |||
97 | struct in6_addr *daddr, __be16 dport, | 97 | struct in6_addr *daddr, __be16 dport, |
98 | int dif, struct udp_table *udptable) | 98 | int dif, struct udp_table *udptable) |
99 | { | 99 | { |
100 | struct sock *sk, *result = NULL; | 100 | struct sock *sk, *result; |
101 | struct hlist_node *node; | 101 | struct hlist_node *node; |
102 | unsigned short hnum = ntohs(dport); | 102 | unsigned short hnum = ntohs(dport); |
103 | unsigned int hash = udp_hashfn(net, hnum); | 103 | unsigned int hash = udp_hashfn(net, hnum); |
104 | struct udp_hslot *hslot = &udptable->hash[hash]; | 104 | struct udp_hslot *hslot = &udptable->hash[hash]; |
105 | int score, badness = -1; | 105 | int score, badness; |
106 | 106 | ||
107 | spin_lock(&hslot->lock); | 107 | rcu_read_lock(); |
108 | sk_for_each(sk, node, &hslot->head) { | 108 | begin: |
109 | result = NULL; | ||
110 | badness = -1; | ||
111 | sk_for_each_rcu(sk, node, &hslot->head) { | ||
112 | /* | ||
113 | * lockless reader, and SLAB_DESTROY_BY_RCU items: | ||
114 | * We must check this item was not moved to another chain | ||
115 | */ | ||
116 | if (udp_hashfn(net, sk->sk_hash) != hash) | ||
117 | goto begin; | ||
109 | score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); | 118 | score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); |
110 | if (score > badness) { | 119 | if (score > badness) { |
111 | result = sk; | 120 | result = sk; |
112 | badness = score; | 121 | badness = score; |
113 | } | 122 | } |
114 | } | 123 | } |
115 | if (result) | 124 | if (result) { |
116 | sock_hold(result); | 125 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) |
117 | spin_unlock(&hslot->lock); | 126 | result = NULL; |
127 | else if (unlikely(compute_score(result, net, hnum, saddr, sport, | ||
128 | daddr, dport, dif) < badness)) { | ||
129 | sock_put(result); | ||
130 | goto begin; | ||
131 | } | ||
132 | } | ||
133 | rcu_read_unlock(); | ||
118 | return result; | 134 | return result; |
119 | } | 135 | } |
120 | 136 | ||
@@ -1062,6 +1078,7 @@ struct proto udpv6_prot = { | |||
1062 | .sysctl_wmem = &sysctl_udp_wmem_min, | 1078 | .sysctl_wmem = &sysctl_udp_wmem_min, |
1063 | .sysctl_rmem = &sysctl_udp_rmem_min, | 1079 | .sysctl_rmem = &sysctl_udp_rmem_min, |
1064 | .obj_size = sizeof(struct udp6_sock), | 1080 | .obj_size = sizeof(struct udp6_sock), |
1081 | .slab_flags = SLAB_DESTROY_BY_RCU, | ||
1065 | .h.udp_table = &udp_table, | 1082 | .h.udp_table = &udp_table, |
1066 | #ifdef CONFIG_COMPAT | 1083 | #ifdef CONFIG_COMPAT |
1067 | .compat_setsockopt = compat_udpv6_setsockopt, | 1084 | .compat_setsockopt = compat_udpv6_setsockopt, |
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index f1e892a99e05..ba162a824585 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c | |||
@@ -49,6 +49,7 @@ struct proto udplitev6_prot = { | |||
49 | .unhash = udp_lib_unhash, | 49 | .unhash = udp_lib_unhash, |
50 | .get_port = udp_v6_get_port, | 50 | .get_port = udp_v6_get_port, |
51 | .obj_size = sizeof(struct udp6_sock), | 51 | .obj_size = sizeof(struct udp6_sock), |
52 | .slab_flags = SLAB_DESTROY_BY_RCU, | ||
52 | .h.udp_table = &udplite_table, | 53 | .h.udp_table = &udplite_table, |
53 | #ifdef CONFIG_COMPAT | 54 | #ifdef CONFIG_COMPAT |
54 | .compat_setsockopt = compat_udpv6_setsockopt, | 55 | .compat_setsockopt = compat_udpv6_setsockopt, |