aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorAlan Stern <stern@rowland.harvard.edu>2006-03-27 04:16:30 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-27 11:44:50 -0500
commite041c683412d5bf44dc2b109053e3b837b71742d (patch)
tree9d271066ef379da0c0fb3b8cb4137abd5d2ebba0 /net/core
parent76b81e2b0e2241accebcc68e126bc5ab958661b9 (diff)
[PATCH] Notifier chain update: API changes
The kernel's implementation of notifier chains is unsafe. There is no protection against entries being added to or removed from a chain while the chain is in use. The issues were discussed in this thread: http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2 We noticed that notifier chains in the kernel fall into two basic usage classes: "Blocking" chains are always called from a process context and the callout routines are allowed to sleep; "Atomic" chains can be called from an atomic context and the callout routines are not allowed to sleep. We decided to codify this distinction and make it part of the API. Therefore this set of patches introduces three new, parallel APIs: one for blocking notifiers, one for atomic notifiers, and one for "raw" notifiers (which is really just the old API under a new name). New kinds of data structures are used for the heads of the chains, and new routines are defined for registration, unregistration, and calling a chain. The three APIs are explained in include/linux/notifier.h and their implementation is in kernel/sys.c. With atomic and blocking chains, the implementation guarantees that the chain links will not be corrupted and that chain callers will not get messed up by entries being added or removed. For raw chains the implementation provides no guarantees at all; users of this API must provide their own protections. (The idea was that situations may come up where the assumptions of the atomic and blocking APIs are not appropriate, so it should be possible for users to handle these things in their own way.) There are some limitations, which should not be too hard to live with. For atomic/blocking chains, registration and unregistration must always be done in a process context since the chain is protected by a mutex/rwsem. Also, a callout routine for a non-raw chain must not try to register or unregister entries on its own chain. (This did happen in a couple of places and the code had to be changed to avoid it.) Since atomic chains may be called from within an NMI handler, they cannot use spinlocks for synchronization. Instead we use RCU. The overhead falls almost entirely in the unregister routine, which is okay since unregistration is much less frequent that calling a chain. Here is the list of chains that we adjusted and their classifications. None of them use the raw API, so for the moment it is only a placeholder. ATOMIC CHAINS ------------- arch/i386/kernel/traps.c: i386die_chain arch/ia64/kernel/traps.c: ia64die_chain arch/powerpc/kernel/traps.c: powerpc_die_chain arch/sparc64/kernel/traps.c: sparc64die_chain arch/x86_64/kernel/traps.c: die_chain drivers/char/ipmi/ipmi_si_intf.c: xaction_notifier_list kernel/panic.c: panic_notifier_list kernel/profile.c: task_free_notifier net/bluetooth/hci_core.c: hci_notifier net/ipv4/netfilter/ip_conntrack_core.c: ip_conntrack_chain net/ipv4/netfilter/ip_conntrack_core.c: ip_conntrack_expect_chain net/ipv6/addrconf.c: inet6addr_chain net/netfilter/nf_conntrack_core.c: nf_conntrack_chain net/netfilter/nf_conntrack_core.c: nf_conntrack_expect_chain net/netlink/af_netlink.c: netlink_chain BLOCKING CHAINS --------------- arch/powerpc/platforms/pseries/reconfig.c: pSeries_reconfig_chain arch/s390/kernel/process.c: idle_chain arch/x86_64/kernel/process.c idle_notifier drivers/base/memory.c: memory_chain drivers/cpufreq/cpufreq.c cpufreq_policy_notifier_list drivers/cpufreq/cpufreq.c cpufreq_transition_notifier_list drivers/macintosh/adb.c: adb_client_list drivers/macintosh/via-pmu.c sleep_notifier_list drivers/macintosh/via-pmu68k.c sleep_notifier_list drivers/macintosh/windfarm_core.c wf_client_list drivers/usb/core/notify.c usb_notifier_list drivers/video/fbmem.c fb_notifier_list kernel/cpu.c cpu_chain kernel/module.c module_notify_list kernel/profile.c munmap_notifier kernel/profile.c task_exit_notifier kernel/sys.c reboot_notifier_list net/core/dev.c netdev_chain net/decnet/dn_dev.c: dnaddr_chain net/ipv4/devinet.c: inetaddr_chain It's possible that some of these classifications are wrong. If they are, please let us know or submit a patch to fix them. Note that any chain that gets called very frequently should be atomic, because the rwsem read-locking used for blocking chains is very likely to incur cache misses on SMP systems. (However, if the chain's callout routines may sleep then the chain cannot be atomic.) The patch set was written by Alan Stern and Chandra Seetharaman, incorporating material written by Keith Owens and suggestions from Paul McKenney and Andrew Morton. [jes@sgi.com: restructure the notifier chain initialization macros] Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com> Signed-off-by: Jes Sorensen <jes@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c42
1 files changed, 23 insertions, 19 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 8e1dc3051222..a3ab11f34153 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -193,7 +193,7 @@ static inline struct hlist_head *dev_index_hash(int ifindex)
193 * Our notifier list 193 * Our notifier list
194 */ 194 */
195 195
196static struct notifier_block *netdev_chain; 196static BLOCKING_NOTIFIER_HEAD(netdev_chain);
197 197
198/* 198/*
199 * Device drivers call our routines to queue packets here. We empty the 199 * Device drivers call our routines to queue packets here. We empty the
@@ -736,7 +736,8 @@ int dev_change_name(struct net_device *dev, char *newname)
736 if (!err) { 736 if (!err) {
737 hlist_del(&dev->name_hlist); 737 hlist_del(&dev->name_hlist);
738 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); 738 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
739 notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); 739 blocking_notifier_call_chain(&netdev_chain,
740 NETDEV_CHANGENAME, dev);
740 } 741 }
741 742
742 return err; 743 return err;
@@ -750,7 +751,7 @@ int dev_change_name(struct net_device *dev, char *newname)
750 */ 751 */
751void netdev_features_change(struct net_device *dev) 752void netdev_features_change(struct net_device *dev)
752{ 753{
753 notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); 754 blocking_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
754} 755}
755EXPORT_SYMBOL(netdev_features_change); 756EXPORT_SYMBOL(netdev_features_change);
756 757
@@ -765,7 +766,8 @@ EXPORT_SYMBOL(netdev_features_change);
765void netdev_state_change(struct net_device *dev) 766void netdev_state_change(struct net_device *dev)
766{ 767{
767 if (dev->flags & IFF_UP) { 768 if (dev->flags & IFF_UP) {
768 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); 769 blocking_notifier_call_chain(&netdev_chain,
770 NETDEV_CHANGE, dev);
769 rtmsg_ifinfo(RTM_NEWLINK, dev, 0); 771 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
770 } 772 }
771} 773}
@@ -862,7 +864,7 @@ int dev_open(struct net_device *dev)
862 /* 864 /*
863 * ... and announce new interface. 865 * ... and announce new interface.
864 */ 866 */
865 notifier_call_chain(&netdev_chain, NETDEV_UP, dev); 867 blocking_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
866 } 868 }
867 return ret; 869 return ret;
868} 870}
@@ -885,7 +887,7 @@ int dev_close(struct net_device *dev)
885 * Tell people we are going down, so that they can 887 * Tell people we are going down, so that they can
886 * prepare to death, when device is still operating. 888 * prepare to death, when device is still operating.
887 */ 889 */
888 notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); 890 blocking_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
889 891
890 dev_deactivate(dev); 892 dev_deactivate(dev);
891 893
@@ -922,7 +924,7 @@ int dev_close(struct net_device *dev)
922 /* 924 /*
923 * Tell people we are down 925 * Tell people we are down
924 */ 926 */
925 notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); 927 blocking_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
926 928
927 return 0; 929 return 0;
928} 930}
@@ -953,7 +955,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
953 int err; 955 int err;
954 956
955 rtnl_lock(); 957 rtnl_lock();
956 err = notifier_chain_register(&netdev_chain, nb); 958 err = blocking_notifier_chain_register(&netdev_chain, nb);
957 if (!err) { 959 if (!err) {
958 for (dev = dev_base; dev; dev = dev->next) { 960 for (dev = dev_base; dev; dev = dev->next) {
959 nb->notifier_call(nb, NETDEV_REGISTER, dev); 961 nb->notifier_call(nb, NETDEV_REGISTER, dev);
@@ -981,7 +983,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
981 int err; 983 int err;
982 984
983 rtnl_lock(); 985 rtnl_lock();
984 err = notifier_chain_unregister(&netdev_chain, nb); 986 err = blocking_notifier_chain_unregister(&netdev_chain, nb);
985 rtnl_unlock(); 987 rtnl_unlock();
986 return err; 988 return err;
987} 989}
@@ -992,12 +994,12 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
992 * @v: pointer passed unmodified to notifier function 994 * @v: pointer passed unmodified to notifier function
993 * 995 *
994 * Call all network notifier blocks. Parameters and return value 996 * Call all network notifier blocks. Parameters and return value
995 * are as for notifier_call_chain(). 997 * are as for blocking_notifier_call_chain().
996 */ 998 */
997 999
998int call_netdevice_notifiers(unsigned long val, void *v) 1000int call_netdevice_notifiers(unsigned long val, void *v)
999{ 1001{
1000 return notifier_call_chain(&netdev_chain, val, v); 1002 return blocking_notifier_call_chain(&netdev_chain, val, v);
1001} 1003}
1002 1004
1003/* When > 0 there are consumers of rx skb time stamps */ 1005/* When > 0 there are consumers of rx skb time stamps */
@@ -2242,7 +2244,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
2242 if (dev->flags & IFF_UP && 2244 if (dev->flags & IFF_UP &&
2243 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | 2245 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2244 IFF_VOLATILE))) 2246 IFF_VOLATILE)))
2245 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); 2247 blocking_notifier_call_chain(&netdev_chain,
2248 NETDEV_CHANGE, dev);
2246 2249
2247 if ((flags ^ dev->gflags) & IFF_PROMISC) { 2250 if ((flags ^ dev->gflags) & IFF_PROMISC) {
2248 int inc = (flags & IFF_PROMISC) ? +1 : -1; 2251 int inc = (flags & IFF_PROMISC) ? +1 : -1;
@@ -2286,8 +2289,8 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
2286 else 2289 else
2287 dev->mtu = new_mtu; 2290 dev->mtu = new_mtu;
2288 if (!err && dev->flags & IFF_UP) 2291 if (!err && dev->flags & IFF_UP)
2289 notifier_call_chain(&netdev_chain, 2292 blocking_notifier_call_chain(&netdev_chain,
2290 NETDEV_CHANGEMTU, dev); 2293 NETDEV_CHANGEMTU, dev);
2291 return err; 2294 return err;
2292} 2295}
2293 2296
@@ -2303,7 +2306,8 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2303 return -ENODEV; 2306 return -ENODEV;
2304 err = dev->set_mac_address(dev, sa); 2307 err = dev->set_mac_address(dev, sa);
2305 if (!err) 2308 if (!err)
2306 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); 2309 blocking_notifier_call_chain(&netdev_chain,
2310 NETDEV_CHANGEADDR, dev);
2307 return err; 2311 return err;
2308} 2312}
2309 2313
@@ -2359,7 +2363,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2359 return -EINVAL; 2363 return -EINVAL;
2360 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, 2364 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2361 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); 2365 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2362 notifier_call_chain(&netdev_chain, 2366 blocking_notifier_call_chain(&netdev_chain,
2363 NETDEV_CHANGEADDR, dev); 2367 NETDEV_CHANGEADDR, dev);
2364 return 0; 2368 return 0;
2365 2369
@@ -2813,7 +2817,7 @@ int register_netdevice(struct net_device *dev)
2813 write_unlock_bh(&dev_base_lock); 2817 write_unlock_bh(&dev_base_lock);
2814 2818
2815 /* Notify protocols, that a new device appeared. */ 2819 /* Notify protocols, that a new device appeared. */
2816 notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); 2820 blocking_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2817 2821
2818 /* Finish registration after unlock */ 2822 /* Finish registration after unlock */
2819 net_set_todo(dev); 2823 net_set_todo(dev);
@@ -2892,7 +2896,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
2892 rtnl_lock(); 2896 rtnl_lock();
2893 2897
2894 /* Rebroadcast unregister notification */ 2898 /* Rebroadcast unregister notification */
2895 notifier_call_chain(&netdev_chain, 2899 blocking_notifier_call_chain(&netdev_chain,
2896 NETDEV_UNREGISTER, dev); 2900 NETDEV_UNREGISTER, dev);
2897 2901
2898 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 2902 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
@@ -3148,7 +3152,7 @@ int unregister_netdevice(struct net_device *dev)
3148 /* Notify protocols, that we are about to destroy 3152 /* Notify protocols, that we are about to destroy
3149 this device. They should clean all the things. 3153 this device. They should clean all the things.
3150 */ 3154 */
3151 notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); 3155 blocking_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3152 3156
3153 /* 3157 /*
3154 * Flush the multicast chain 3158 * Flush the multicast chain