diff options
author | stephen hemminger <shemminger@vyatta.com> | 2010-02-22 02:57:18 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-02-22 18:45:56 -0500 |
commit | 808f5114a9206fee855117d416440e1071ab375c (patch) | |
tree | cec3f04220909b77c0880029b63862553ad5161c | |
parent | 1cc523271ef0b6305c565a143e3d48f6fff826dd (diff) |
packet: convert socket list to RCU (v3)
Convert AF_PACKET to use RCU, eliminating one more reader/writer lock.
There is no need for a real sk_del_node_init_rcu(), because sk_del_node_init
is doing the equivalent thing to hlst_del_init_rcu already; but added
some comments to try and make that obvious.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/netns/packet.h | 4 | ||||
-rw-r--r-- | include/net/sock.h | 10 | ||||
-rw-r--r-- | net/packet/af_packet.c | 62 |
3 files changed, 43 insertions, 33 deletions
diff --git a/include/net/netns/packet.h b/include/net/netns/packet.h index 637daf698884..cb4e894c0f8d 100644 --- a/include/net/netns/packet.h +++ b/include/net/netns/packet.h | |||
@@ -4,11 +4,11 @@ | |||
4 | #ifndef __NETNS_PACKET_H__ | 4 | #ifndef __NETNS_PACKET_H__ |
5 | #define __NETNS_PACKET_H__ | 5 | #define __NETNS_PACKET_H__ |
6 | 6 | ||
7 | #include <linux/list.h> | 7 | #include <linux/rculist.h> |
8 | #include <linux/spinlock.h> | 8 | #include <linux/spinlock.h> |
9 | 9 | ||
10 | struct netns_packet { | 10 | struct netns_packet { |
11 | rwlock_t sklist_lock; | 11 | spinlock_t sklist_lock; |
12 | struct hlist_head sklist; | 12 | struct hlist_head sklist; |
13 | }; | 13 | }; |
14 | 14 | ||
diff --git a/include/net/sock.h b/include/net/sock.h index 580d51fa28e9..6cb1676e409a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -381,6 +381,7 @@ static __inline__ void __sk_del_node(struct sock *sk) | |||
381 | __hlist_del(&sk->sk_node); | 381 | __hlist_del(&sk->sk_node); |
382 | } | 382 | } |
383 | 383 | ||
384 | /* NB: equivalent to hlist_del_init_rcu */ | ||
384 | static __inline__ int __sk_del_node_init(struct sock *sk) | 385 | static __inline__ int __sk_del_node_init(struct sock *sk) |
385 | { | 386 | { |
386 | if (sk_hashed(sk)) { | 387 | if (sk_hashed(sk)) { |
@@ -421,6 +422,7 @@ static __inline__ int sk_del_node_init(struct sock *sk) | |||
421 | } | 422 | } |
422 | return rc; | 423 | return rc; |
423 | } | 424 | } |
425 | #define sk_del_node_init_rcu(sk) sk_del_node_init(sk) | ||
424 | 426 | ||
425 | static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk) | 427 | static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk) |
426 | { | 428 | { |
@@ -454,6 +456,12 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) | |||
454 | __sk_add_node(sk, list); | 456 | __sk_add_node(sk, list); |
455 | } | 457 | } |
456 | 458 | ||
459 | static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) | ||
460 | { | ||
461 | sock_hold(sk); | ||
462 | hlist_add_head_rcu(&sk->sk_node, list); | ||
463 | } | ||
464 | |||
457 | static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) | 465 | static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) |
458 | { | 466 | { |
459 | hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); | 467 | hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); |
@@ -478,6 +486,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk, | |||
478 | 486 | ||
479 | #define sk_for_each(__sk, node, list) \ | 487 | #define sk_for_each(__sk, node, list) \ |
480 | hlist_for_each_entry(__sk, node, list, sk_node) | 488 | hlist_for_each_entry(__sk, node, list, sk_node) |
489 | #define sk_for_each_rcu(__sk, node, list) \ | ||
490 | hlist_for_each_entry_rcu(__sk, node, list, sk_node) | ||
481 | #define sk_nulls_for_each(__sk, node, list) \ | 491 | #define sk_nulls_for_each(__sk, node, list) \ |
482 | hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) | 492 | hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) |
483 | #define sk_nulls_for_each_rcu(__sk, node, list) \ | 493 | #define sk_nulls_for_each_rcu(__sk, node, list) \ |
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 10f7295bcefb..2f0369367ee0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -1262,24 +1262,22 @@ static int packet_release(struct socket *sock) | |||
1262 | net = sock_net(sk); | 1262 | net = sock_net(sk); |
1263 | po = pkt_sk(sk); | 1263 | po = pkt_sk(sk); |
1264 | 1264 | ||
1265 | write_lock_bh(&net->packet.sklist_lock); | 1265 | spin_lock_bh(&net->packet.sklist_lock); |
1266 | sk_del_node_init(sk); | 1266 | sk_del_node_init_rcu(sk); |
1267 | sock_prot_inuse_add(net, sk->sk_prot, -1); | 1267 | sock_prot_inuse_add(net, sk->sk_prot, -1); |
1268 | write_unlock_bh(&net->packet.sklist_lock); | 1268 | spin_unlock_bh(&net->packet.sklist_lock); |
1269 | |||
1270 | /* | ||
1271 | * Unhook packet receive handler. | ||
1272 | */ | ||
1273 | 1269 | ||
1270 | spin_lock(&po->bind_lock); | ||
1274 | if (po->running) { | 1271 | if (po->running) { |
1275 | /* | 1272 | /* |
1276 | * Remove the protocol hook | 1273 | * Remove from protocol table |
1277 | */ | 1274 | */ |
1278 | dev_remove_pack(&po->prot_hook); | ||
1279 | po->running = 0; | 1275 | po->running = 0; |
1280 | po->num = 0; | 1276 | po->num = 0; |
1277 | __dev_remove_pack(&po->prot_hook); | ||
1281 | __sock_put(sk); | 1278 | __sock_put(sk); |
1282 | } | 1279 | } |
1280 | spin_unlock(&po->bind_lock); | ||
1283 | 1281 | ||
1284 | packet_flush_mclist(sk); | 1282 | packet_flush_mclist(sk); |
1285 | 1283 | ||
@@ -1291,10 +1289,10 @@ static int packet_release(struct socket *sock) | |||
1291 | if (po->tx_ring.pg_vec) | 1289 | if (po->tx_ring.pg_vec) |
1292 | packet_set_ring(sk, &req, 1, 1); | 1290 | packet_set_ring(sk, &req, 1, 1); |
1293 | 1291 | ||
1292 | synchronize_net(); | ||
1294 | /* | 1293 | /* |
1295 | * Now the socket is dead. No more input will appear. | 1294 | * Now the socket is dead. No more input will appear. |
1296 | */ | 1295 | */ |
1297 | |||
1298 | sock_orphan(sk); | 1296 | sock_orphan(sk); |
1299 | sock->sk = NULL; | 1297 | sock->sk = NULL; |
1300 | 1298 | ||
@@ -1478,10 +1476,11 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, | |||
1478 | po->running = 1; | 1476 | po->running = 1; |
1479 | } | 1477 | } |
1480 | 1478 | ||
1481 | write_lock_bh(&net->packet.sklist_lock); | 1479 | spin_lock_bh(&net->packet.sklist_lock); |
1482 | sk_add_node(sk, &net->packet.sklist); | 1480 | sk_add_node_rcu(sk, &net->packet.sklist); |
1483 | sock_prot_inuse_add(net, &packet_proto, 1); | 1481 | sock_prot_inuse_add(net, &packet_proto, 1); |
1484 | write_unlock_bh(&net->packet.sklist_lock); | 1482 | spin_unlock_bh(&net->packet.sklist_lock); |
1483 | |||
1485 | return 0; | 1484 | return 0; |
1486 | out: | 1485 | out: |
1487 | return err; | 1486 | return err; |
@@ -2075,8 +2074,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void | |||
2075 | struct net_device *dev = data; | 2074 | struct net_device *dev = data; |
2076 | struct net *net = dev_net(dev); | 2075 | struct net *net = dev_net(dev); |
2077 | 2076 | ||
2078 | read_lock(&net->packet.sklist_lock); | 2077 | rcu_read_lock(); |
2079 | sk_for_each(sk, node, &net->packet.sklist) { | 2078 | sk_for_each_rcu(sk, node, &net->packet.sklist) { |
2080 | struct packet_sock *po = pkt_sk(sk); | 2079 | struct packet_sock *po = pkt_sk(sk); |
2081 | 2080 | ||
2082 | switch (msg) { | 2081 | switch (msg) { |
@@ -2104,18 +2103,19 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void | |||
2104 | } | 2103 | } |
2105 | break; | 2104 | break; |
2106 | case NETDEV_UP: | 2105 | case NETDEV_UP: |
2107 | spin_lock(&po->bind_lock); | 2106 | if (dev->ifindex == po->ifindex) { |
2108 | if (dev->ifindex == po->ifindex && po->num && | 2107 | spin_lock(&po->bind_lock); |
2109 | !po->running) { | 2108 | if (po->num && !po->running) { |
2110 | dev_add_pack(&po->prot_hook); | 2109 | dev_add_pack(&po->prot_hook); |
2111 | sock_hold(sk); | 2110 | sock_hold(sk); |
2112 | po->running = 1; | 2111 | po->running = 1; |
2112 | } | ||
2113 | spin_unlock(&po->bind_lock); | ||
2113 | } | 2114 | } |
2114 | spin_unlock(&po->bind_lock); | ||
2115 | break; | 2115 | break; |
2116 | } | 2116 | } |
2117 | } | 2117 | } |
2118 | read_unlock(&net->packet.sklist_lock); | 2118 | rcu_read_unlock(); |
2119 | return NOTIFY_DONE; | 2119 | return NOTIFY_DONE; |
2120 | } | 2120 | } |
2121 | 2121 | ||
@@ -2512,24 +2512,24 @@ static struct notifier_block packet_netdev_notifier = { | |||
2512 | #ifdef CONFIG_PROC_FS | 2512 | #ifdef CONFIG_PROC_FS |
2513 | 2513 | ||
2514 | static void *packet_seq_start(struct seq_file *seq, loff_t *pos) | 2514 | static void *packet_seq_start(struct seq_file *seq, loff_t *pos) |
2515 | __acquires(seq_file_net(seq)->packet.sklist_lock) | 2515 | __acquires(RCU) |
2516 | { | 2516 | { |
2517 | struct net *net = seq_file_net(seq); | 2517 | struct net *net = seq_file_net(seq); |
2518 | read_lock(&net->packet.sklist_lock); | 2518 | |
2519 | return seq_hlist_start_head(&net->packet.sklist, *pos); | 2519 | rcu_read_lock(); |
2520 | return seq_hlist_start_head_rcu(&net->packet.sklist, *pos); | ||
2520 | } | 2521 | } |
2521 | 2522 | ||
2522 | static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 2523 | static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
2523 | { | 2524 | { |
2524 | struct net *net = seq_file_net(seq); | 2525 | struct net *net = seq_file_net(seq); |
2525 | return seq_hlist_next(v, &net->packet.sklist, pos); | 2526 | return seq_hlist_next_rcu(v, &net->packet.sklist, pos); |
2526 | } | 2527 | } |
2527 | 2528 | ||
2528 | static void packet_seq_stop(struct seq_file *seq, void *v) | 2529 | static void packet_seq_stop(struct seq_file *seq, void *v) |
2529 | __releases(seq_file_net(seq)->packet.sklist_lock) | 2530 | __releases(RCU) |
2530 | { | 2531 | { |
2531 | struct net *net = seq_file_net(seq); | 2532 | rcu_read_unlock(); |
2532 | read_unlock(&net->packet.sklist_lock); | ||
2533 | } | 2533 | } |
2534 | 2534 | ||
2535 | static int packet_seq_show(struct seq_file *seq, void *v) | 2535 | static int packet_seq_show(struct seq_file *seq, void *v) |
@@ -2581,7 +2581,7 @@ static const struct file_operations packet_seq_fops = { | |||
2581 | 2581 | ||
2582 | static int __net_init packet_net_init(struct net *net) | 2582 | static int __net_init packet_net_init(struct net *net) |
2583 | { | 2583 | { |
2584 | rwlock_init(&net->packet.sklist_lock); | 2584 | spin_lock_init(&net->packet.sklist_lock); |
2585 | INIT_HLIST_HEAD(&net->packet.sklist); | 2585 | INIT_HLIST_HEAD(&net->packet.sklist); |
2586 | 2586 | ||
2587 | if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) | 2587 | if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) |