aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-10-01 12:15:01 -0400
committerDavid S. Miller <davem@davemloft.net>2010-10-04 00:50:52 -0400
commit4c9687098f245601e9d94178715ee03afbcc6f80 (patch)
tree00e08d50538ffbf8a85a481cef351034c5a2174a
parent55747a0a73ea74a25fcebb0731e8d3f13fe8c09d (diff)
ipmr: RCU conversion of mroute_sk
Use RCU and RTNL to protect (struct mr_table)->mroute_sk Readers use RCU, writers use RTNL. ip_ra_control() already use an RCU grace period before ip_ra_destroy_rcu(), so we dont need synchronize_rcu() in mrtsock_destruct() Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/ipmr.c91
1 files changed, 49 insertions, 42 deletions
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 1a92ebd85196..e2db2ea616ff 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -75,7 +75,7 @@ struct mr_table {
75 struct net *net; 75 struct net *net;
76#endif 76#endif
77 u32 id; 77 u32 id;
78 struct sock *mroute_sk; 78 struct sock __rcu *mroute_sk;
79 struct timer_list ipmr_expire_timer; 79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue; 80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES]; 81 struct list_head mfc_cache_array[MFC_LINES];
@@ -867,6 +867,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
867 const int ihl = ip_hdrlen(pkt); 867 const int ihl = ip_hdrlen(pkt);
868 struct igmphdr *igmp; 868 struct igmphdr *igmp;
869 struct igmpmsg *msg; 869 struct igmpmsg *msg;
870 struct sock *mroute_sk;
870 int ret; 871 int ret;
871 872
872#ifdef CONFIG_IP_PIMSM 873#ifdef CONFIG_IP_PIMSM
@@ -925,7 +926,10 @@ static int ipmr_cache_report(struct mr_table *mrt,
925 skb->transport_header = skb->network_header; 926 skb->transport_header = skb->network_header;
926 } 927 }
927 928
928 if (mrt->mroute_sk == NULL) { 929 rcu_read_lock();
930 mroute_sk = rcu_dereference(mrt->mroute_sk);
931 if (mroute_sk == NULL) {
932 rcu_read_unlock();
929 kfree_skb(skb); 933 kfree_skb(skb);
930 return -EINVAL; 934 return -EINVAL;
931 } 935 }
@@ -933,7 +937,8 @@ static int ipmr_cache_report(struct mr_table *mrt,
933 /* 937 /*
934 * Deliver to mrouted 938 * Deliver to mrouted
935 */ 939 */
936 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb); 940 ret = sock_queue_rcv_skb(mroute_sk, skb);
941 rcu_read_unlock();
937 if (ret < 0) { 942 if (ret < 0) {
938 if (net_ratelimit()) 943 if (net_ratelimit())
939 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 944 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
@@ -1164,6 +1169,9 @@ static void mroute_clean_tables(struct mr_table *mrt)
1164 } 1169 }
1165} 1170}
1166 1171
1172/* called from ip_ra_control(), before an RCU grace period,
1173 * we dont need to call synchronize_rcu() here
1174 */
1167static void mrtsock_destruct(struct sock *sk) 1175static void mrtsock_destruct(struct sock *sk)
1168{ 1176{
1169 struct net *net = sock_net(sk); 1177 struct net *net = sock_net(sk);
@@ -1171,13 +1179,9 @@ static void mrtsock_destruct(struct sock *sk)
1171 1179
1172 rtnl_lock(); 1180 rtnl_lock();
1173 ipmr_for_each_table(mrt, net) { 1181 ipmr_for_each_table(mrt, net) {
1174 if (sk == mrt->mroute_sk) { 1182 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1175 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1183 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1176 1184 rcu_assign_pointer(mrt->mroute_sk, NULL);
1177 write_lock_bh(&mrt_lock);
1178 mrt->mroute_sk = NULL;
1179 write_unlock_bh(&mrt_lock);
1180
1181 mroute_clean_tables(mrt); 1185 mroute_clean_tables(mrt);
1182 } 1186 }
1183 } 1187 }
@@ -1204,7 +1208,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1204 return -ENOENT; 1208 return -ENOENT;
1205 1209
1206 if (optname != MRT_INIT) { 1210 if (optname != MRT_INIT) {
1207 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN)) 1211 if (sk != rcu_dereference_raw(mrt->mroute_sk) &&
1212 !capable(CAP_NET_ADMIN))
1208 return -EACCES; 1213 return -EACCES;
1209 } 1214 }
1210 1215
@@ -1217,23 +1222,20 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1217 return -ENOPROTOOPT; 1222 return -ENOPROTOOPT;
1218 1223
1219 rtnl_lock(); 1224 rtnl_lock();
1220 if (mrt->mroute_sk) { 1225 if (rtnl_dereference(mrt->mroute_sk)) {
1221 rtnl_unlock(); 1226 rtnl_unlock();
1222 return -EADDRINUSE; 1227 return -EADDRINUSE;
1223 } 1228 }
1224 1229
1225 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1230 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1226 if (ret == 0) { 1231 if (ret == 0) {
1227 write_lock_bh(&mrt_lock); 1232 rcu_assign_pointer(mrt->mroute_sk, sk);
1228 mrt->mroute_sk = sk;
1229 write_unlock_bh(&mrt_lock);
1230
1231 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1233 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1232 } 1234 }
1233 rtnl_unlock(); 1235 rtnl_unlock();
1234 return ret; 1236 return ret;
1235 case MRT_DONE: 1237 case MRT_DONE:
1236 if (sk != mrt->mroute_sk) 1238 if (sk != rcu_dereference_raw(mrt->mroute_sk))
1237 return -EACCES; 1239 return -EACCES;
1238 return ip_ra_control(sk, 0, NULL); 1240 return ip_ra_control(sk, 0, NULL);
1239 case MRT_ADD_VIF: 1241 case MRT_ADD_VIF:
@@ -1246,7 +1248,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1246 return -ENFILE; 1248 return -ENFILE;
1247 rtnl_lock(); 1249 rtnl_lock();
1248 if (optname == MRT_ADD_VIF) { 1250 if (optname == MRT_ADD_VIF) {
1249 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk); 1251 ret = vif_add(net, mrt, &vif,
1252 sk == rtnl_dereference(mrt->mroute_sk));
1250 } else { 1253 } else {
1251 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1254 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1252 } 1255 }
@@ -1267,7 +1270,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1267 if (optname == MRT_DEL_MFC) 1270 if (optname == MRT_DEL_MFC)
1268 ret = ipmr_mfc_delete(mrt, &mfc); 1271 ret = ipmr_mfc_delete(mrt, &mfc);
1269 else 1272 else
1270 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk); 1273 ret = ipmr_mfc_add(net, mrt, &mfc,
1274 sk == rtnl_dereference(mrt->mroute_sk));
1271 rtnl_unlock(); 1275 rtnl_unlock();
1272 return ret; 1276 return ret;
1273 /* 1277 /*
@@ -1309,14 +1313,16 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1309 return -EINVAL; 1313 return -EINVAL;
1310 if (get_user(v, (u32 __user *)optval)) 1314 if (get_user(v, (u32 __user *)optval))
1311 return -EFAULT; 1315 return -EFAULT;
1312 if (sk == mrt->mroute_sk)
1313 return -EBUSY;
1314 1316
1315 rtnl_lock(); 1317 rtnl_lock();
1316 ret = 0; 1318 ret = 0;
1317 if (!ipmr_new_table(net, v)) 1319 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1318 ret = -ENOMEM; 1320 ret = -EBUSY;
1319 raw_sk(sk)->ipmr_table = v; 1321 } else {
1322 if (!ipmr_new_table(net, v))
1323 ret = -ENOMEM;
1324 raw_sk(sk)->ipmr_table = v;
1325 }
1320 rtnl_unlock(); 1326 rtnl_unlock();
1321 return ret; 1327 return ret;
1322 } 1328 }
@@ -1713,6 +1719,7 @@ dont_forward:
1713 1719
1714/* 1720/*
1715 * Multicast packets for forwarding arrive here 1721 * Multicast packets for forwarding arrive here
1722 * Called with rcu_read_lock();
1716 */ 1723 */
1717 1724
1718int ip_mr_input(struct sk_buff *skb) 1725int ip_mr_input(struct sk_buff *skb)
@@ -1726,7 +1733,7 @@ int ip_mr_input(struct sk_buff *skb)
1726 /* Packet is looped back after forward, it should not be 1733 /* Packet is looped back after forward, it should not be
1727 forwarded second time, but still can be delivered locally. 1734 forwarded second time, but still can be delivered locally.
1728 */ 1735 */
1729 if (IPCB(skb)->flags&IPSKB_FORWARDED) 1736 if (IPCB(skb)->flags & IPSKB_FORWARDED)
1730 goto dont_forward; 1737 goto dont_forward;
1731 1738
1732 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); 1739 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
@@ -1736,24 +1743,24 @@ int ip_mr_input(struct sk_buff *skb)
1736 } 1743 }
1737 1744
1738 if (!local) { 1745 if (!local) {
1739 if (IPCB(skb)->opt.router_alert) { 1746 if (IPCB(skb)->opt.router_alert) {
1740 if (ip_call_ra_chain(skb)) 1747 if (ip_call_ra_chain(skb))
1741 return 0; 1748 return 0;
1742 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){ 1749 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
1743 /* IGMPv1 (and broken IGMPv2 implementations sort of 1750 /* IGMPv1 (and broken IGMPv2 implementations sort of
1744 Cisco IOS <= 11.2(8)) do not put router alert 1751 * Cisco IOS <= 11.2(8)) do not put router alert
1745 option to IGMP packets destined to routable 1752 * option to IGMP packets destined to routable
1746 groups. It is very bad, because it means 1753 * groups. It is very bad, because it means
1747 that we can forward NO IGMP messages. 1754 * that we can forward NO IGMP messages.
1748 */ 1755 */
1749 read_lock(&mrt_lock); 1756 struct sock *mroute_sk;
1750 if (mrt->mroute_sk) { 1757
1751 nf_reset(skb); 1758 mroute_sk = rcu_dereference(mrt->mroute_sk);
1752 raw_rcv(mrt->mroute_sk, skb); 1759 if (mroute_sk) {
1753 read_unlock(&mrt_lock); 1760 nf_reset(skb);
1754 return 0; 1761 raw_rcv(mroute_sk, skb);
1755 } 1762 return 0;
1756 read_unlock(&mrt_lock); 1763 }
1757 } 1764 }
1758 } 1765 }
1759 1766