diff options
author | Pavel Emelyanov <xemul@openvz.org> | 2008-03-07 14:08:40 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-03-07 14:08:40 -0500 |
commit | e9720acd728a46cb40daa52c99a979f7c4ff195c (patch) | |
tree | 01380f601384cf93f30dedb64afe80359fecb807 /fs/proc | |
parent | 1ff82fe0024e8070c38346b8abc1ff09612dea4c (diff) |
[NET]: Make /proc/net a symlink on /proc/self/net (v3)
Current /proc/net is done with so called "shadows", but current
implementation is broken and has little chances to get fixed.
The problem is that dentries subtree of /proc/net directory has
fancy revalidation rules to make processes living in different
net namespaces see different entries in /proc/net subtree, but
currently, tasks see in the /proc/net subdir the contents of any
other namespace, depending on who opened the file first.
The proposed fix is to turn /proc/net into a symlink, which points
to /proc/self/net, which in turn shows what previously was in
/proc/net - the network-related info, from the net namespace the
appropriate task lives in.
# ls -l /proc/net
lrwxrwxrwx 1 root root 8 Mar 5 15:17 /proc/net -> self/net
In other words - this behaves like /proc/mounts, but unlike
"mounts", "net" is not a file, but a directory.
Changes from v2:
* Fixed discrepancy of /proc/net nlink count and selinux labeling
screwup pointed out by Stephen.
To get the correct nlink count the ->getattr callback for /proc/net
is overridden to read one from the net->proc_net entry.
To make selinux still work the net->proc_net entry is initialized
properly, i.e. with the "net" name and the proc_net parent.
Selinux fixes are
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Changes from v1:
* Fixed a task_struct leak in get_proc_task_net, pointed out by Paul.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/base.c | 1 | ||||
-rw-r--r-- | fs/proc/generic.c | 26 | ||||
-rw-r--r-- | fs/proc/internal.h | 7 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 117 |
4 files changed, 114 insertions, 37 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 96ee899d6502..cc43cf0c1fa5 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -2274,6 +2274,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2274 | DIR("task", S_IRUGO|S_IXUGO, task), | 2274 | DIR("task", S_IRUGO|S_IXUGO, task), |
2275 | DIR("fd", S_IRUSR|S_IXUSR, fd), | 2275 | DIR("fd", S_IRUSR|S_IXUSR, fd), |
2276 | DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), | 2276 | DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), |
2277 | DIR("net", S_IRUGO|S_IXUSR, net), | ||
2277 | REG("environ", S_IRUSR, environ), | 2278 | REG("environ", S_IRUSR, environ), |
2278 | INF("auxv", S_IRUSR, pid_auxv), | 2279 | INF("auxv", S_IRUSR, pid_auxv), |
2279 | ONE("status", S_IRUGO, pid_status), | 2280 | ONE("status", S_IRUGO, pid_status), |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 68971e66cd41..a36ad3c75cf4 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -377,15 +377,14 @@ static struct dentry_operations proc_dentry_operations = | |||
377 | * Don't create negative dentries here, return -ENOENT by hand | 377 | * Don't create negative dentries here, return -ENOENT by hand |
378 | * instead. | 378 | * instead. |
379 | */ | 379 | */ |
380 | struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) | 380 | struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, |
381 | struct dentry *dentry) | ||
381 | { | 382 | { |
382 | struct inode *inode = NULL; | 383 | struct inode *inode = NULL; |
383 | struct proc_dir_entry * de; | ||
384 | int error = -ENOENT; | 384 | int error = -ENOENT; |
385 | 385 | ||
386 | lock_kernel(); | 386 | lock_kernel(); |
387 | spin_lock(&proc_subdir_lock); | 387 | spin_lock(&proc_subdir_lock); |
388 | de = PDE(dir); | ||
389 | if (de) { | 388 | if (de) { |
390 | for (de = de->subdir; de ; de = de->next) { | 389 | for (de = de->subdir; de ; de = de->next) { |
391 | if (de->namelen != dentry->d_name.len) | 390 | if (de->namelen != dentry->d_name.len) |
@@ -393,8 +392,6 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam | |||
393 | if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { | 392 | if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { |
394 | unsigned int ino; | 393 | unsigned int ino; |
395 | 394 | ||
396 | if (de->shadow_proc) | ||
397 | de = de->shadow_proc(current, de); | ||
398 | ino = de->low_ino; | 395 | ino = de->low_ino; |
399 | de_get(de); | 396 | de_get(de); |
400 | spin_unlock(&proc_subdir_lock); | 397 | spin_unlock(&proc_subdir_lock); |
@@ -417,6 +414,12 @@ out_unlock: | |||
417 | return ERR_PTR(error); | 414 | return ERR_PTR(error); |
418 | } | 415 | } |
419 | 416 | ||
417 | struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, | ||
418 | struct nameidata *nd) | ||
419 | { | ||
420 | return proc_lookup_de(PDE(dir), dir, dentry); | ||
421 | } | ||
422 | |||
420 | /* | 423 | /* |
421 | * This returns non-zero if at EOF, so that the /proc | 424 | * This returns non-zero if at EOF, so that the /proc |
422 | * root directory can use this and check if it should | 425 | * root directory can use this and check if it should |
@@ -426,10 +429,9 @@ out_unlock: | |||
426 | * value of the readdir() call, as long as it's non-negative | 429 | * value of the readdir() call, as long as it's non-negative |
427 | * for success.. | 430 | * for success.. |
428 | */ | 431 | */ |
429 | int proc_readdir(struct file * filp, | 432 | int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, |
430 | void * dirent, filldir_t filldir) | 433 | filldir_t filldir) |
431 | { | 434 | { |
432 | struct proc_dir_entry * de; | ||
433 | unsigned int ino; | 435 | unsigned int ino; |
434 | int i; | 436 | int i; |
435 | struct inode *inode = filp->f_path.dentry->d_inode; | 437 | struct inode *inode = filp->f_path.dentry->d_inode; |
@@ -438,7 +440,6 @@ int proc_readdir(struct file * filp, | |||
438 | lock_kernel(); | 440 | lock_kernel(); |
439 | 441 | ||
440 | ino = inode->i_ino; | 442 | ino = inode->i_ino; |
441 | de = PDE(inode); | ||
442 | if (!de) { | 443 | if (!de) { |
443 | ret = -EINVAL; | 444 | ret = -EINVAL; |
444 | goto out; | 445 | goto out; |
@@ -499,6 +500,13 @@ out: unlock_kernel(); | |||
499 | return ret; | 500 | return ret; |
500 | } | 501 | } |
501 | 502 | ||
503 | int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
504 | { | ||
505 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
506 | |||
507 | return proc_readdir_de(PDE(inode), filp, dirent, filldir); | ||
508 | } | ||
509 | |||
502 | /* | 510 | /* |
503 | * These are the generic /proc directory operations. They | 511 | * These are the generic /proc directory operations. They |
504 | * use the in-memory "struct proc_dir_entry" tree to parse | 512 | * use the in-memory "struct proc_dir_entry" tree to parse |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 1c81c8f1aeed..bc72f5c8c47d 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -64,6 +64,8 @@ extern const struct file_operations proc_numa_maps_operations; | |||
64 | extern const struct file_operations proc_smaps_operations; | 64 | extern const struct file_operations proc_smaps_operations; |
65 | extern const struct file_operations proc_clear_refs_operations; | 65 | extern const struct file_operations proc_clear_refs_operations; |
66 | extern const struct file_operations proc_pagemap_operations; | 66 | extern const struct file_operations proc_pagemap_operations; |
67 | extern const struct file_operations proc_net_operations; | ||
68 | extern const struct inode_operations proc_net_inode_operations; | ||
67 | 69 | ||
68 | void free_proc_entry(struct proc_dir_entry *de); | 70 | void free_proc_entry(struct proc_dir_entry *de); |
69 | 71 | ||
@@ -83,3 +85,8 @@ static inline int proc_fd(struct inode *inode) | |||
83 | { | 85 | { |
84 | return PROC_I(inode)->fd; | 86 | return PROC_I(inode)->fd; |
85 | } | 87 | } |
88 | |||
89 | struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, | ||
90 | struct dentry *dentry); | ||
91 | int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, | ||
92 | filldir_t filldir); | ||
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 14e9b5aaf863..4caa5f774fb7 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c | |||
@@ -63,6 +63,82 @@ int seq_release_net(struct inode *ino, struct file *f) | |||
63 | } | 63 | } |
64 | EXPORT_SYMBOL_GPL(seq_release_net); | 64 | EXPORT_SYMBOL_GPL(seq_release_net); |
65 | 65 | ||
66 | static struct net *get_proc_task_net(struct inode *dir) | ||
67 | { | ||
68 | struct task_struct *task; | ||
69 | struct nsproxy *ns; | ||
70 | struct net *net = NULL; | ||
71 | |||
72 | rcu_read_lock(); | ||
73 | task = pid_task(proc_pid(dir), PIDTYPE_PID); | ||
74 | if (task != NULL) { | ||
75 | ns = task_nsproxy(task); | ||
76 | if (ns != NULL) | ||
77 | net = get_net(ns->net_ns); | ||
78 | } | ||
79 | rcu_read_unlock(); | ||
80 | |||
81 | return net; | ||
82 | } | ||
83 | |||
84 | static struct dentry *proc_tgid_net_lookup(struct inode *dir, | ||
85 | struct dentry *dentry, struct nameidata *nd) | ||
86 | { | ||
87 | struct dentry *de; | ||
88 | struct net *net; | ||
89 | |||
90 | de = ERR_PTR(-ENOENT); | ||
91 | net = get_proc_task_net(dir); | ||
92 | if (net != NULL) { | ||
93 | de = proc_lookup_de(net->proc_net, dir, dentry); | ||
94 | put_net(net); | ||
95 | } | ||
96 | return de; | ||
97 | } | ||
98 | |||
99 | static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
100 | struct kstat *stat) | ||
101 | { | ||
102 | struct inode *inode = dentry->d_inode; | ||
103 | struct net *net; | ||
104 | |||
105 | net = get_proc_task_net(inode); | ||
106 | |||
107 | generic_fillattr(inode, stat); | ||
108 | |||
109 | if (net != NULL) { | ||
110 | stat->nlink = net->proc_net->nlink; | ||
111 | put_net(net); | ||
112 | } | ||
113 | |||
114 | return 0; | ||
115 | } | ||
116 | |||
117 | const struct inode_operations proc_net_inode_operations = { | ||
118 | .lookup = proc_tgid_net_lookup, | ||
119 | .getattr = proc_tgid_net_getattr, | ||
120 | }; | ||
121 | |||
122 | static int proc_tgid_net_readdir(struct file *filp, void *dirent, | ||
123 | filldir_t filldir) | ||
124 | { | ||
125 | int ret; | ||
126 | struct net *net; | ||
127 | |||
128 | ret = -EINVAL; | ||
129 | net = get_proc_task_net(filp->f_path.dentry->d_inode); | ||
130 | if (net != NULL) { | ||
131 | ret = proc_readdir_de(net->proc_net, filp, dirent, filldir); | ||
132 | put_net(net); | ||
133 | } | ||
134 | return ret; | ||
135 | } | ||
136 | |||
137 | const struct file_operations proc_net_operations = { | ||
138 | .read = generic_read_dir, | ||
139 | .readdir = proc_tgid_net_readdir, | ||
140 | }; | ||
141 | |||
66 | 142 | ||
67 | struct proc_dir_entry *proc_net_fops_create(struct net *net, | 143 | struct proc_dir_entry *proc_net_fops_create(struct net *net, |
68 | const char *name, mode_t mode, const struct file_operations *fops) | 144 | const char *name, mode_t mode, const struct file_operations *fops) |
@@ -83,14 +159,6 @@ struct net *get_proc_net(const struct inode *inode) | |||
83 | } | 159 | } |
84 | EXPORT_SYMBOL_GPL(get_proc_net); | 160 | EXPORT_SYMBOL_GPL(get_proc_net); |
85 | 161 | ||
86 | static struct proc_dir_entry *shadow_pde; | ||
87 | |||
88 | static struct proc_dir_entry *proc_net_shadow(struct task_struct *task, | ||
89 | struct proc_dir_entry *de) | ||
90 | { | ||
91 | return task->nsproxy->net_ns->proc_net; | ||
92 | } | ||
93 | |||
94 | struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, | 162 | struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, |
95 | struct proc_dir_entry *parent) | 163 | struct proc_dir_entry *parent) |
96 | { | 164 | { |
@@ -104,45 +172,39 @@ EXPORT_SYMBOL_GPL(proc_net_mkdir); | |||
104 | 172 | ||
105 | static __net_init int proc_net_ns_init(struct net *net) | 173 | static __net_init int proc_net_ns_init(struct net *net) |
106 | { | 174 | { |
107 | struct proc_dir_entry *root, *netd, *net_statd; | 175 | struct proc_dir_entry *netd, *net_statd; |
108 | int err; | 176 | int err; |
109 | 177 | ||
110 | err = -ENOMEM; | 178 | err = -ENOMEM; |
111 | root = kzalloc(sizeof(*root), GFP_KERNEL); | 179 | netd = kzalloc(sizeof(*netd), GFP_KERNEL); |
112 | if (!root) | 180 | if (!netd) |
113 | goto out; | 181 | goto out; |
114 | 182 | ||
115 | err = -EEXIST; | 183 | netd->data = net; |
116 | netd = proc_net_mkdir(net, "net", root); | 184 | netd->nlink = 2; |
117 | if (!netd) | 185 | netd->name = "net"; |
118 | goto free_root; | 186 | netd->namelen = 3; |
187 | netd->parent = &proc_root; | ||
119 | 188 | ||
120 | err = -EEXIST; | 189 | err = -EEXIST; |
121 | net_statd = proc_net_mkdir(net, "stat", netd); | 190 | net_statd = proc_net_mkdir(net, "stat", netd); |
122 | if (!net_statd) | 191 | if (!net_statd) |
123 | goto free_net; | 192 | goto free_net; |
124 | 193 | ||
125 | root->data = net; | ||
126 | |||
127 | net->proc_net_root = root; | ||
128 | net->proc_net = netd; | 194 | net->proc_net = netd; |
129 | net->proc_net_stat = net_statd; | 195 | net->proc_net_stat = net_statd; |
130 | err = 0; | 196 | return 0; |
131 | 197 | ||
198 | free_net: | ||
199 | kfree(netd); | ||
132 | out: | 200 | out: |
133 | return err; | 201 | return err; |
134 | free_net: | ||
135 | remove_proc_entry("net", root); | ||
136 | free_root: | ||
137 | kfree(root); | ||
138 | goto out; | ||
139 | } | 202 | } |
140 | 203 | ||
141 | static __net_exit void proc_net_ns_exit(struct net *net) | 204 | static __net_exit void proc_net_ns_exit(struct net *net) |
142 | { | 205 | { |
143 | remove_proc_entry("stat", net->proc_net); | 206 | remove_proc_entry("stat", net->proc_net); |
144 | remove_proc_entry("net", net->proc_net_root); | 207 | kfree(net->proc_net); |
145 | kfree(net->proc_net_root); | ||
146 | } | 208 | } |
147 | 209 | ||
148 | static struct pernet_operations __net_initdata proc_net_ns_ops = { | 210 | static struct pernet_operations __net_initdata proc_net_ns_ops = { |
@@ -152,8 +214,7 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = { | |||
152 | 214 | ||
153 | int __init proc_net_init(void) | 215 | int __init proc_net_init(void) |
154 | { | 216 | { |
155 | shadow_pde = proc_mkdir("net", NULL); | 217 | proc_symlink("net", NULL, "self/net"); |
156 | shadow_pde->shadow_proc = proc_net_shadow; | ||
157 | 218 | ||
158 | return register_pernet_subsys(&proc_net_ns_ops); | 219 | return register_pernet_subsys(&proc_net_ns_ops); |
159 | } | 220 | } |