diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2011-06-08 21:13:01 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2011-06-12 17:45:41 -0400 |
commit | a685e08987d1edf1995b76511d4c98ea0e905377 (patch) | |
tree | 1d42593e2bc320f8d93b98851b2df0fd432e3859 | |
parent | dde194a64bb5c3fd05d965775dc92e8a4920a53a (diff) |
Delay struct net freeing while there's a sysfs instance refering to it
* new refcount in struct net, controlling actual freeing of the memory
* new method in kobj_ns_type_operations (->drop_ns())
* ->current_ns() semantics change - it's supposed to be followed by
corresponding ->drop_ns(). For struct net in case of CONFIG_NET_NS it bumps
the new refcount; net_drop_ns() decrements it and calls net_free() if the
last reference has been dropped. Method renamed to ->grab_current_ns().
* old net_free() callers call net_drop_ns() instead.
* sysfs_exit_ns() is gone, along with a large part of callchain
leading to it; now that the references stored in ->ns[...] stay valid we
do not need to hunt them down and replace them with NULL. That fixes
problems in sysfs_lookup() and sysfs_readdir(), along with getting rid
of sb->s_instances abuse.
Note that struct net *shutdown* logics has not changed - net_cleanup()
is called exactly when it used to be called. The only thing postponed by
having a sysfs instance refering to that struct net is actual freeing of
memory occupied by struct net.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/sysfs/mount.c | 37 | ||||
-rw-r--r-- | fs/sysfs/sysfs.h | 2 | ||||
-rw-r--r-- | include/linux/kobject_ns.h | 10 | ||||
-rw-r--r-- | include/linux/sysfs.h | 7 | ||||
-rw-r--r-- | include/net/net_namespace.h | 10 | ||||
-rw-r--r-- | lib/kobject.c | 26 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 23 | ||||
-rw-r--r-- | net/core/net_namespace.c | 12 |
8 files changed, 55 insertions, 72 deletions
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 266895783b47..e34f0d99ea4e 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c | |||
@@ -95,6 +95,14 @@ static int sysfs_set_super(struct super_block *sb, void *data) | |||
95 | return error; | 95 | return error; |
96 | } | 96 | } |
97 | 97 | ||
98 | static void free_sysfs_super_info(struct sysfs_super_info *info) | ||
99 | { | ||
100 | int type; | ||
101 | for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) | ||
102 | kobj_ns_drop(type, info->ns[type]); | ||
103 | kfree(info); | ||
104 | } | ||
105 | |||
98 | static struct dentry *sysfs_mount(struct file_system_type *fs_type, | 106 | static struct dentry *sysfs_mount(struct file_system_type *fs_type, |
99 | int flags, const char *dev_name, void *data) | 107 | int flags, const char *dev_name, void *data) |
100 | { | 108 | { |
@@ -108,11 +116,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, | |||
108 | return ERR_PTR(-ENOMEM); | 116 | return ERR_PTR(-ENOMEM); |
109 | 117 | ||
110 | for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) | 118 | for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) |
111 | info->ns[type] = kobj_ns_current(type); | 119 | info->ns[type] = kobj_ns_grab_current(type); |
112 | 120 | ||
113 | sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); | 121 | sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); |
114 | if (IS_ERR(sb) || sb->s_fs_info != info) | 122 | if (IS_ERR(sb) || sb->s_fs_info != info) |
115 | kfree(info); | 123 | free_sysfs_super_info(info); |
116 | if (IS_ERR(sb)) | 124 | if (IS_ERR(sb)) |
117 | return ERR_CAST(sb); | 125 | return ERR_CAST(sb); |
118 | if (!sb->s_root) { | 126 | if (!sb->s_root) { |
@@ -131,12 +139,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, | |||
131 | static void sysfs_kill_sb(struct super_block *sb) | 139 | static void sysfs_kill_sb(struct super_block *sb) |
132 | { | 140 | { |
133 | struct sysfs_super_info *info = sysfs_info(sb); | 141 | struct sysfs_super_info *info = sysfs_info(sb); |
134 | |||
135 | /* Remove the superblock from fs_supers/s_instances | 142 | /* Remove the superblock from fs_supers/s_instances |
136 | * so we can't find it, before freeing sysfs_super_info. | 143 | * so we can't find it, before freeing sysfs_super_info. |
137 | */ | 144 | */ |
138 | kill_anon_super(sb); | 145 | kill_anon_super(sb); |
139 | kfree(info); | 146 | free_sysfs_super_info(info); |
140 | } | 147 | } |
141 | 148 | ||
142 | static struct file_system_type sysfs_fs_type = { | 149 | static struct file_system_type sysfs_fs_type = { |
@@ -145,28 +152,6 @@ static struct file_system_type sysfs_fs_type = { | |||
145 | .kill_sb = sysfs_kill_sb, | 152 | .kill_sb = sysfs_kill_sb, |
146 | }; | 153 | }; |
147 | 154 | ||
148 | void sysfs_exit_ns(enum kobj_ns_type type, const void *ns) | ||
149 | { | ||
150 | struct super_block *sb; | ||
151 | |||
152 | mutex_lock(&sysfs_mutex); | ||
153 | spin_lock(&sb_lock); | ||
154 | list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) { | ||
155 | struct sysfs_super_info *info = sysfs_info(sb); | ||
156 | /* | ||
157 | * If we see a superblock on the fs_supers/s_instances | ||
158 | * list the unmount has not completed and sb->s_fs_info | ||
159 | * points to a valid struct sysfs_super_info. | ||
160 | */ | ||
161 | /* Ignore superblocks with the wrong ns */ | ||
162 | if (info->ns[type] != ns) | ||
163 | continue; | ||
164 | info->ns[type] = NULL; | ||
165 | } | ||
166 | spin_unlock(&sb_lock); | ||
167 | mutex_unlock(&sysfs_mutex); | ||
168 | } | ||
169 | |||
170 | int __init sysfs_init(void) | 155 | int __init sysfs_init(void) |
171 | { | 156 | { |
172 | int err = -ENOMEM; | 157 | int err = -ENOMEM; |
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 3d28af31d863..2ed2404f3113 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h | |||
@@ -136,7 +136,7 @@ struct sysfs_addrm_cxt { | |||
136 | * instance). | 136 | * instance). |
137 | */ | 137 | */ |
138 | struct sysfs_super_info { | 138 | struct sysfs_super_info { |
139 | const void *ns[KOBJ_NS_TYPES]; | 139 | void *ns[KOBJ_NS_TYPES]; |
140 | }; | 140 | }; |
141 | #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) | 141 | #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) |
142 | extern struct sysfs_dirent sysfs_root; | 142 | extern struct sysfs_dirent sysfs_root; |
diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 82cb5bf461fb..f66b065a8b5f 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h | |||
@@ -32,15 +32,17 @@ enum kobj_ns_type { | |||
32 | 32 | ||
33 | /* | 33 | /* |
34 | * Callbacks so sysfs can determine namespaces | 34 | * Callbacks so sysfs can determine namespaces |
35 | * @current_ns: return calling task's namespace | 35 | * @grab_current_ns: return a new reference to calling task's namespace |
36 | * @netlink_ns: return namespace to which a sock belongs (right?) | 36 | * @netlink_ns: return namespace to which a sock belongs (right?) |
37 | * @initial_ns: return the initial namespace (i.e. init_net_ns) | 37 | * @initial_ns: return the initial namespace (i.e. init_net_ns) |
38 | * @drop_ns: drops a reference to namespace | ||
38 | */ | 39 | */ |
39 | struct kobj_ns_type_operations { | 40 | struct kobj_ns_type_operations { |
40 | enum kobj_ns_type type; | 41 | enum kobj_ns_type type; |
41 | const void *(*current_ns)(void); | 42 | void *(*grab_current_ns)(void); |
42 | const void *(*netlink_ns)(struct sock *sk); | 43 | const void *(*netlink_ns)(struct sock *sk); |
43 | const void *(*initial_ns)(void); | 44 | const void *(*initial_ns)(void); |
45 | void (*drop_ns)(void *); | ||
44 | }; | 46 | }; |
45 | 47 | ||
46 | int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); | 48 | int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); |
@@ -48,9 +50,9 @@ int kobj_ns_type_registered(enum kobj_ns_type type); | |||
48 | const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); | 50 | const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); |
49 | const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); | 51 | const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); |
50 | 52 | ||
51 | const void *kobj_ns_current(enum kobj_ns_type type); | 53 | void *kobj_ns_grab_current(enum kobj_ns_type type); |
52 | const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); | 54 | const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); |
53 | const void *kobj_ns_initial(enum kobj_ns_type type); | 55 | const void *kobj_ns_initial(enum kobj_ns_type type); |
54 | void kobj_ns_exit(enum kobj_ns_type type, const void *ns); | 56 | void kobj_ns_drop(enum kobj_ns_type type, void *ns); |
55 | 57 | ||
56 | #endif /* _LINUX_KOBJECT_NS_H */ | 58 | #endif /* _LINUX_KOBJECT_NS_H */ |
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c3acda60eee0..e2696d76a599 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h | |||
@@ -177,9 +177,6 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, | |||
177 | struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd); | 177 | struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd); |
178 | void sysfs_put(struct sysfs_dirent *sd); | 178 | void sysfs_put(struct sysfs_dirent *sd); |
179 | 179 | ||
180 | /* Called to clear a ns tag when it is no longer valid */ | ||
181 | void sysfs_exit_ns(enum kobj_ns_type type, const void *tag); | ||
182 | |||
183 | int __must_check sysfs_init(void); | 180 | int __must_check sysfs_init(void); |
184 | 181 | ||
185 | #else /* CONFIG_SYSFS */ | 182 | #else /* CONFIG_SYSFS */ |
@@ -338,10 +335,6 @@ static inline void sysfs_put(struct sysfs_dirent *sd) | |||
338 | { | 335 | { |
339 | } | 336 | } |
340 | 337 | ||
341 | static inline void sysfs_exit_ns(int type, const void *tag) | ||
342 | { | ||
343 | } | ||
344 | |||
345 | static inline int __must_check sysfs_init(void) | 338 | static inline int __must_check sysfs_init(void) |
346 | { | 339 | { |
347 | return 0; | 340 | return 0; |
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 2bf9ed9ef26b..aef430d779bd 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h | |||
@@ -35,8 +35,11 @@ struct netns_ipvs; | |||
35 | #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) | 35 | #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) |
36 | 36 | ||
37 | struct net { | 37 | struct net { |
38 | atomic_t passive; /* To decided when the network | ||
39 | * namespace should be freed. | ||
40 | */ | ||
38 | atomic_t count; /* To decided when the network | 41 | atomic_t count; /* To decided when the network |
39 | * namespace should be freed. | 42 | * namespace should be shut down. |
40 | */ | 43 | */ |
41 | #ifdef NETNS_REFCNT_DEBUG | 44 | #ifdef NETNS_REFCNT_DEBUG |
42 | atomic_t use_count; /* To track references we | 45 | atomic_t use_count; /* To track references we |
@@ -154,6 +157,9 @@ int net_eq(const struct net *net1, const struct net *net2) | |||
154 | { | 157 | { |
155 | return net1 == net2; | 158 | return net1 == net2; |
156 | } | 159 | } |
160 | |||
161 | extern void net_drop_ns(void *); | ||
162 | |||
157 | #else | 163 | #else |
158 | 164 | ||
159 | static inline struct net *get_net(struct net *net) | 165 | static inline struct net *get_net(struct net *net) |
@@ -175,6 +181,8 @@ int net_eq(const struct net *net1, const struct net *net2) | |||
175 | { | 181 | { |
176 | return 1; | 182 | return 1; |
177 | } | 183 | } |
184 | |||
185 | #define net_drop_ns NULL | ||
178 | #endif | 186 | #endif |
179 | 187 | ||
180 | 188 | ||
diff --git a/lib/kobject.c b/lib/kobject.c index 82dc34c095c2..640bd98a4c8a 100644 --- a/lib/kobject.c +++ b/lib/kobject.c | |||
@@ -948,14 +948,14 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) | |||
948 | } | 948 | } |
949 | 949 | ||
950 | 950 | ||
951 | const void *kobj_ns_current(enum kobj_ns_type type) | 951 | void *kobj_ns_grab_current(enum kobj_ns_type type) |
952 | { | 952 | { |
953 | const void *ns = NULL; | 953 | void *ns = NULL; |
954 | 954 | ||
955 | spin_lock(&kobj_ns_type_lock); | 955 | spin_lock(&kobj_ns_type_lock); |
956 | if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && | 956 | if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && |
957 | kobj_ns_ops_tbl[type]) | 957 | kobj_ns_ops_tbl[type]) |
958 | ns = kobj_ns_ops_tbl[type]->current_ns(); | 958 | ns = kobj_ns_ops_tbl[type]->grab_current_ns(); |
959 | spin_unlock(&kobj_ns_type_lock); | 959 | spin_unlock(&kobj_ns_type_lock); |
960 | 960 | ||
961 | return ns; | 961 | return ns; |
@@ -987,23 +987,15 @@ const void *kobj_ns_initial(enum kobj_ns_type type) | |||
987 | return ns; | 987 | return ns; |
988 | } | 988 | } |
989 | 989 | ||
990 | /* | 990 | void kobj_ns_drop(enum kobj_ns_type type, void *ns) |
991 | * kobj_ns_exit - invalidate a namespace tag | ||
992 | * | ||
993 | * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET) | ||
994 | * @ns: the actual namespace being invalidated | ||
995 | * | ||
996 | * This is called when a tag is no longer valid. For instance, | ||
997 | * when a network namespace exits, it uses this helper to | ||
998 | * make sure no sb's sysfs_info points to the now-invalidated | ||
999 | * netns. | ||
1000 | */ | ||
1001 | void kobj_ns_exit(enum kobj_ns_type type, const void *ns) | ||
1002 | { | 991 | { |
1003 | sysfs_exit_ns(type, ns); | 992 | spin_lock(&kobj_ns_type_lock); |
993 | if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && | ||
994 | kobj_ns_ops_tbl[type] && kobj_ns_ops_tbl[type]->drop_ns) | ||
995 | kobj_ns_ops_tbl[type]->drop_ns(ns); | ||
996 | spin_unlock(&kobj_ns_type_lock); | ||
1004 | } | 997 | } |
1005 | 998 | ||
1006 | |||
1007 | EXPORT_SYMBOL(kobject_get); | 999 | EXPORT_SYMBOL(kobject_get); |
1008 | EXPORT_SYMBOL(kobject_put); | 1000 | EXPORT_SYMBOL(kobject_put); |
1009 | EXPORT_SYMBOL(kobject_del); | 1001 | EXPORT_SYMBOL(kobject_del); |
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 11b98bc2aa8f..33d2a1fba131 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
@@ -1179,9 +1179,14 @@ static void remove_queue_kobjects(struct net_device *net) | |||
1179 | #endif | 1179 | #endif |
1180 | } | 1180 | } |
1181 | 1181 | ||
1182 | static const void *net_current_ns(void) | 1182 | static void *net_grab_current_ns(void) |
1183 | { | 1183 | { |
1184 | return current->nsproxy->net_ns; | 1184 | struct net *ns = current->nsproxy->net_ns; |
1185 | #ifdef CONFIG_NET_NS | ||
1186 | if (ns) | ||
1187 | atomic_inc(&ns->passive); | ||
1188 | #endif | ||
1189 | return ns; | ||
1185 | } | 1190 | } |
1186 | 1191 | ||
1187 | static const void *net_initial_ns(void) | 1192 | static const void *net_initial_ns(void) |
@@ -1196,22 +1201,13 @@ static const void *net_netlink_ns(struct sock *sk) | |||
1196 | 1201 | ||
1197 | struct kobj_ns_type_operations net_ns_type_operations = { | 1202 | struct kobj_ns_type_operations net_ns_type_operations = { |
1198 | .type = KOBJ_NS_TYPE_NET, | 1203 | .type = KOBJ_NS_TYPE_NET, |
1199 | .current_ns = net_current_ns, | 1204 | .grab_current_ns = net_grab_current_ns, |
1200 | .netlink_ns = net_netlink_ns, | 1205 | .netlink_ns = net_netlink_ns, |
1201 | .initial_ns = net_initial_ns, | 1206 | .initial_ns = net_initial_ns, |
1207 | .drop_ns = net_drop_ns, | ||
1202 | }; | 1208 | }; |
1203 | EXPORT_SYMBOL_GPL(net_ns_type_operations); | 1209 | EXPORT_SYMBOL_GPL(net_ns_type_operations); |
1204 | 1210 | ||
1205 | static void net_kobj_ns_exit(struct net *net) | ||
1206 | { | ||
1207 | kobj_ns_exit(KOBJ_NS_TYPE_NET, net); | ||
1208 | } | ||
1209 | |||
1210 | static struct pernet_operations kobj_net_ops = { | ||
1211 | .exit = net_kobj_ns_exit, | ||
1212 | }; | ||
1213 | |||
1214 | |||
1215 | #ifdef CONFIG_HOTPLUG | 1211 | #ifdef CONFIG_HOTPLUG |
1216 | static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) | 1212 | static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) |
1217 | { | 1213 | { |
@@ -1339,6 +1335,5 @@ EXPORT_SYMBOL(netdev_class_remove_file); | |||
1339 | int netdev_kobject_init(void) | 1335 | int netdev_kobject_init(void) |
1340 | { | 1336 | { |
1341 | kobj_ns_type_register(&net_ns_type_operations); | 1337 | kobj_ns_type_register(&net_ns_type_operations); |
1342 | register_pernet_subsys(&kobj_net_ops); | ||
1343 | return class_register(&net_class); | 1338 | return class_register(&net_class); |
1344 | } | 1339 | } |
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6c6b86d0da15..cdcbc3cb00a9 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c | |||
@@ -128,6 +128,7 @@ static __net_init int setup_net(struct net *net) | |||
128 | LIST_HEAD(net_exit_list); | 128 | LIST_HEAD(net_exit_list); |
129 | 129 | ||
130 | atomic_set(&net->count, 1); | 130 | atomic_set(&net->count, 1); |
131 | atomic_set(&net->passive, 1); | ||
131 | 132 | ||
132 | #ifdef NETNS_REFCNT_DEBUG | 133 | #ifdef NETNS_REFCNT_DEBUG |
133 | atomic_set(&net->use_count, 0); | 134 | atomic_set(&net->use_count, 0); |
@@ -210,6 +211,13 @@ static void net_free(struct net *net) | |||
210 | kmem_cache_free(net_cachep, net); | 211 | kmem_cache_free(net_cachep, net); |
211 | } | 212 | } |
212 | 213 | ||
214 | void net_drop_ns(void *p) | ||
215 | { | ||
216 | struct net *ns = p; | ||
217 | if (ns && atomic_dec_and_test(&ns->passive)) | ||
218 | net_free(ns); | ||
219 | } | ||
220 | |||
213 | struct net *copy_net_ns(unsigned long flags, struct net *old_net) | 221 | struct net *copy_net_ns(unsigned long flags, struct net *old_net) |
214 | { | 222 | { |
215 | struct net *net; | 223 | struct net *net; |
@@ -230,7 +238,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) | |||
230 | } | 238 | } |
231 | mutex_unlock(&net_mutex); | 239 | mutex_unlock(&net_mutex); |
232 | if (rv < 0) { | 240 | if (rv < 0) { |
233 | net_free(net); | 241 | net_drop_ns(net); |
234 | return ERR_PTR(rv); | 242 | return ERR_PTR(rv); |
235 | } | 243 | } |
236 | return net; | 244 | return net; |
@@ -286,7 +294,7 @@ static void cleanup_net(struct work_struct *work) | |||
286 | /* Finally it is safe to free my network namespace structure */ | 294 | /* Finally it is safe to free my network namespace structure */ |
287 | list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { | 295 | list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { |
288 | list_del_init(&net->exit_list); | 296 | list_del_init(&net->exit_list); |
289 | net_free(net); | 297 | net_drop_ns(net); |
290 | } | 298 | } |
291 | } | 299 | } |
292 | static DECLARE_WORK(net_cleanup_work, cleanup_net); | 300 | static DECLARE_WORK(net_cleanup_work, cleanup_net); |