aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2007-02-14 03:34:12 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-14 11:10:00 -0500
commit77b14db502cb85a031fe8fde6c85d52f3e0acb63 (patch)
tree4201f6a4dfe1062d1dc00659c403d630401b87cc /fs/proc
parent1ff007eb8e8c7c44e9a384a67d0fdd0fd06ba811 (diff)
[PATCH] sysctl: reimplement the sysctl proc support
With this change the sysctl inodes can be cached and nothing needs to be done when removing a sysctl table. For a cost of 2K code we will save about 4K of static tables (when we remove de from ctl_table) and 70K in proc_dir_entries that we will not allocate, or about half that on a 32bit arch. The speed feels about the same, even though we can now cache the sysctl dentries :( We get the core advantage that we don't need to have a 1 to 1 mapping between ctl table entries and proc files. Making it possible to have /proc/sys vary depending on the namespace you are in. The currently merged namespaces don't have an issue here but the network namespace under /proc/sys/net needs to have different directories depending on which network adapters are visible. By simply being a cache different directories being visible depending on who you are is trivial to implement. [akpm@osdl.org: fix uninitialised var] [akpm@osdl.org: fix ARM build] [bunk@stusta.de: make things static] Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Cc: Russell King <rmk@arm.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/Makefile2
-rw-r--r--fs/proc/generic.c2
-rw-r--r--fs/proc/inode.c1
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/proc_sysctl.c478
-rw-r--r--fs/proc/root.c10
6 files changed, 486 insertions, 9 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index f6c776272572..a6b3a8f878f0 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o
8proc-$(CONFIG_MMU) := mmu.o task_mmu.o 8proc-$(CONFIG_MMU) := mmu.o task_mmu.o
9 9
10proc-y += inode.o root.o base.o generic.o array.o \ 10proc-y += inode.o root.o base.o generic.o array.o \
11 proc_tty.o proc_misc.o 11 proc_tty.o proc_misc.o proc_sysctl.o
12 12
13proc-$(CONFIG_PROC_KCORE) += kcore.o 13proc-$(CONFIG_PROC_KCORE) += kcore.o
14proc-$(CONFIG_PROC_VMCORE) += vmcore.o 14proc-$(CONFIG_PROC_VMCORE) += vmcore.o
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 0cdc00d9d97e..775fb21294d8 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -32,7 +32,7 @@ static loff_t proc_file_lseek(struct file *, loff_t, int);
32 32
33DEFINE_SPINLOCK(proc_subdir_lock); 33DEFINE_SPINLOCK(proc_subdir_lock);
34 34
35int proc_match(int len, const char *name, struct proc_dir_entry *de) 35static int proc_match(int len, const char *name, struct proc_dir_entry *de)
36{ 36{
37 if (de->namelen != len) 37 if (de->namelen != len)
38 return 0; 38 return 0;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index f6722be37dde..c372eb151a3a 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -161,6 +161,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
161 if (!inode) 161 if (!inode)
162 goto out_ino; 162 goto out_ino;
163 163
164 PROC_I(inode)->fd = 0;
164 PROC_I(inode)->pde = de; 165 PROC_I(inode)->pde = de;
165 if (de) { 166 if (de) {
166 if (de->mode) { 167 if (de->mode) {
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 277dcd66ebe2..c932aa65e198 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,6 +11,8 @@
11 11
12#include <linux/proc_fs.h> 12#include <linux/proc_fs.h>
13 13
14extern int proc_sys_init(void);
15
14struct vmalloc_info { 16struct vmalloc_info {
15 unsigned long used; 17 unsigned long used;
16 unsigned long largest_chunk; 18 unsigned long largest_chunk;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
new file mode 100644
index 000000000000..bb16a1e78826
--- /dev/null
+++ b/fs/proc/proc_sysctl.c
@@ -0,0 +1,478 @@
1/*
2 * /proc/sys support
3 */
4
5#include <linux/sysctl.h>
6#include <linux/proc_fs.h>
7#include <linux/security.h>
8#include "internal.h"
9
10static struct dentry_operations proc_sys_dentry_operations;
11static const struct file_operations proc_sys_file_operations;
12static struct inode_operations proc_sys_inode_operations;
13
14static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table)
15{
16 /* Refresh the cached information bits in the inode */
17 if (table) {
18 inode->i_uid = 0;
19 inode->i_gid = 0;
20 inode->i_mode = table->mode;
21 if (table->proc_handler) {
22 inode->i_mode |= S_IFREG;
23 inode->i_nlink = 1;
24 } else {
25 inode->i_mode |= S_IFDIR;
26 inode->i_nlink = 0; /* It is too hard to figure out */
27 }
28 }
29}
30
31static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
32{
33 struct inode *inode;
34 struct proc_inode *dir_ei, *ei;
35 int depth;
36
37 inode = new_inode(dir->i_sb);
38 if (!inode)
39 goto out;
40
41 /* A directory is always one deeper than it's parent */
42 dir_ei = PROC_I(dir);
43 depth = dir_ei->fd + 1;
44
45 ei = PROC_I(inode);
46 ei->fd = depth;
47 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
48 inode->i_op = &proc_sys_inode_operations;
49 inode->i_fop = &proc_sys_file_operations;
50 proc_sys_refresh_inode(inode, table);
51out:
52 return inode;
53}
54
55static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth)
56{
57 for (;;) {
58 struct proc_inode *ei;
59
60 ei = PROC_I(dentry->d_inode);
61 if (ei->fd == depth)
62 break; /* found */
63
64 dentry = dentry->d_parent;
65 }
66 return dentry;
67}
68
69static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table,
70 struct qstr *name)
71{
72 int len;
73 for ( ; table->ctl_name || table->procname; table++) {
74
75 if (!table->procname)
76 continue;
77
78 len = strlen(table->procname);
79 if (len != name->len)
80 continue;
81
82 if (memcmp(table->procname, name->name, len) != 0)
83 continue;
84
85 /* I have a match */
86 return table;
87 }
88 return NULL;
89}
90
91static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry,
92 struct ctl_table *table)
93{
94 struct dentry *ancestor;
95 struct proc_inode *ei;
96 int depth, i;
97
98 ei = PROC_I(dentry->d_inode);
99 depth = ei->fd;
100
101 if (depth == 0)
102 return table;
103
104 for (i = 1; table && (i <= depth); i++) {
105 ancestor = proc_sys_ancestor(dentry, i);
106 table = proc_sys_lookup_table_one(table, &ancestor->d_name);
107 if (table)
108 table = table->child;
109 }
110 return table;
111
112}
113static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent,
114 struct qstr *name,
115 struct ctl_table *table)
116{
117 table = proc_sys_lookup_table(dparent, table);
118 if (table)
119 table = proc_sys_lookup_table_one(table, name);
120 return table;
121}
122
123static struct ctl_table *do_proc_sys_lookup(struct dentry *parent,
124 struct qstr *name,
125 struct ctl_table_header **ptr)
126{
127 struct ctl_table_header *head;
128 struct ctl_table *table = NULL;
129
130 for (head = sysctl_head_next(NULL); head;
131 head = sysctl_head_next(head)) {
132 table = proc_sys_lookup_entry(parent, name, head->ctl_table);
133 if (table)
134 break;
135 }
136 *ptr = head;
137 return table;
138}
139
140static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
141 struct nameidata *nd)
142{
143 struct ctl_table_header *head;
144 struct inode *inode;
145 struct dentry *err;
146 struct ctl_table *table;
147
148 err = ERR_PTR(-ENOENT);
149 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
150 if (!table)
151 goto out;
152
153 err = ERR_PTR(-ENOMEM);
154 inode = proc_sys_make_inode(dir, table);
155 if (!inode)
156 goto out;
157
158 err = NULL;
159 dentry->d_op = &proc_sys_dentry_operations;
160 d_add(dentry, inode);
161
162out:
163 sysctl_head_finish(head);
164 return err;
165}
166
167static ssize_t proc_sys_read(struct file *filp, char __user *buf,
168 size_t count, loff_t *ppos)
169{
170 struct dentry *dentry = filp->f_dentry;
171 struct ctl_table_header *head;
172 struct ctl_table *table;
173 ssize_t error, res;
174
175 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
176 /* Has the sysctl entry disappeared on us? */
177 error = -ENOENT;
178 if (!table)
179 goto out;
180
181 /* Has the sysctl entry been replaced by a directory? */
182 error = -EISDIR;
183 if (!table->proc_handler)
184 goto out;
185
186 /*
187 * At this point we know that the sysctl was not unregistered
188 * and won't be until we finish.
189 */
190 error = -EPERM;
191 if (sysctl_perm(table, MAY_READ))
192 goto out;
193
194 /* careful: calling conventions are nasty here */
195 res = count;
196 error = table->proc_handler(table, 0, filp, buf, &res, ppos);
197 if (!error)
198 error = res;
199out:
200 sysctl_head_finish(head);
201
202 return error;
203}
204
205static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
206 size_t count, loff_t *ppos)
207{
208 struct dentry *dentry = filp->f_dentry;
209 struct ctl_table_header *head;
210 struct ctl_table *table;
211 ssize_t error, res;
212
213 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
214 /* Has the sysctl entry disappeared on us? */
215 error = -ENOENT;
216 if (!table)
217 goto out;
218
219 /* Has the sysctl entry been replaced by a directory? */
220 error = -EISDIR;
221 if (!table->proc_handler)
222 goto out;
223
224 /*
225 * At this point we know that the sysctl was not unregistered
226 * and won't be until we finish.
227 */
228 error = -EPERM;
229 if (sysctl_perm(table, MAY_WRITE))
230 goto out;
231
232 /* careful: calling conventions are nasty here */
233 res = count;
234 error = table->proc_handler(table, 1, filp, (char __user *)buf,
235 &res, ppos);
236 if (!error)
237 error = res;
238out:
239 sysctl_head_finish(head);
240
241 return error;
242}
243
244
245static int proc_sys_fill_cache(struct file *filp, void *dirent,
246 filldir_t filldir, struct ctl_table *table)
247{
248 struct ctl_table_header *head;
249 struct ctl_table *child_table = NULL;
250 struct dentry *child, *dir = filp->f_path.dentry;
251 struct inode *inode;
252 struct qstr qname;
253 ino_t ino = 0;
254 unsigned type = DT_UNKNOWN;
255 int ret;
256
257 qname.name = table->procname;
258 qname.len = strlen(table->procname);
259 qname.hash = full_name_hash(qname.name, qname.len);
260
261 /* Suppress duplicates.
262 * Only fill a directory entry if it is the value that
263 * an ordinary lookup of that name returns. Hide all
264 * others.
265 *
266 * If we ever cache this translation in the dcache
267 * I should do a dcache lookup first. But for now
268 * it is just simpler not to.
269 */
270 ret = 0;
271 child_table = do_proc_sys_lookup(dir, &qname, &head);
272 sysctl_head_finish(head);
273 if (child_table != table)
274 return 0;
275
276 child = d_lookup(dir, &qname);
277 if (!child) {
278 struct dentry *new;
279 new = d_alloc(dir, &qname);
280 if (new) {
281 inode = proc_sys_make_inode(dir->d_inode, table);
282 if (!inode)
283 child = ERR_PTR(-ENOMEM);
284 else {
285 new->d_op = &proc_sys_dentry_operations;
286 d_add(new, inode);
287 }
288 if (child)
289 dput(new);
290 else
291 child = new;
292 }
293 }
294 if (!child || IS_ERR(child) || !child->d_inode)
295 goto end_instantiate;
296 inode = child->d_inode;
297 if (inode) {
298 ino = inode->i_ino;
299 type = inode->i_mode >> 12;
300 }
301 dput(child);
302end_instantiate:
303 if (!ino)
304 ino= find_inode_number(dir, &qname);
305 if (!ino)
306 ino = 1;
307 return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
308}
309
310static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
311{
312 struct dentry *dentry = filp->f_dentry;
313 struct inode *inode = dentry->d_inode;
314 struct ctl_table_header *head = NULL;
315 struct ctl_table *table;
316 unsigned long pos;
317 int ret;
318
319 ret = -ENOTDIR;
320 if (!S_ISDIR(inode->i_mode))
321 goto out;
322
323 ret = 0;
324 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
325 if (filp->f_pos == 0) {
326 if (filldir(dirent, ".", 1, filp->f_pos,
327 inode->i_ino, DT_DIR) < 0)
328 goto out;
329 filp->f_pos++;
330 }
331 if (filp->f_pos == 1) {
332 if (filldir(dirent, "..", 2, filp->f_pos,
333 parent_ino(dentry), DT_DIR) < 0)
334 goto out;
335 filp->f_pos++;
336 }
337 pos = 2;
338
339 /* - Find each instance of the directory
340 * - Read all entries in each instance
341 * - Before returning an entry to user space lookup the entry
342 * by name and if I find a different entry don't return
343 * this one because it means it is a buried dup.
344 * For sysctl this should only happen for directory entries.
345 */
346 for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) {
347 table = proc_sys_lookup_table(dentry, head->ctl_table);
348
349 if (!table)
350 continue;
351
352 for (; table->ctl_name || table->procname; table++, pos++) {
353 /* Can't do anything without a proc name */
354 if (!table->procname)
355 continue;
356
357 if (pos < filp->f_pos)
358 continue;
359
360 if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0)
361 goto out;
362 filp->f_pos = pos + 1;
363 }
364 }
365 ret = 1;
366out:
367 sysctl_head_finish(head);
368 return ret;
369}
370
371static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd)
372{
373 /*
374 * sysctl entries that are not writeable,
375 * are _NOT_ writeable, capabilities or not.
376 */
377 struct ctl_table_header *head;
378 struct ctl_table *table;
379 struct dentry *dentry;
380 int mode;
381 int depth;
382 int error;
383
384 head = NULL;
385 depth = PROC_I(inode)->fd;
386
387 /* First check the cached permissions, in case we don't have
388 * enough information to lookup the sysctl table entry.
389 */
390 error = -EACCES;
391 mode = inode->i_mode;
392
393 if (current->euid == 0)
394 mode >>= 6;
395 else if (in_group_p(0))
396 mode >>= 3;
397
398 if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
399 error = 0;
400
401 /* If we can't get a sysctl table entry the permission
402 * checks on the cached mode will have to be enough.
403 */
404 if (!nd || !depth)
405 goto out;
406
407 dentry = nd->dentry;
408 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
409
410 /* If the entry does not exist deny permission */
411 error = -EACCES;
412 if (!table)
413 goto out;
414
415 /* Use the permissions on the sysctl table entry */
416 error = sysctl_perm(table, mask);
417out:
418 sysctl_head_finish(head);
419 return error;
420}
421
422static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
423{
424 struct inode *inode = dentry->d_inode;
425 int error;
426
427 if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
428 return -EPERM;
429
430 error = inode_change_ok(inode, attr);
431 if (!error) {
432 error = security_inode_setattr(dentry, attr);
433 if (!error)
434 error = inode_setattr(inode, attr);
435 }
436
437 return error;
438}
439
440/* I'm lazy and don't distinguish between files and directories,
441 * until access time.
442 */
443static const struct file_operations proc_sys_file_operations = {
444 .read = proc_sys_read,
445 .write = proc_sys_write,
446 .readdir = proc_sys_readdir,
447};
448
449static struct inode_operations proc_sys_inode_operations = {
450 .lookup = proc_sys_lookup,
451 .permission = proc_sys_permission,
452 .setattr = proc_sys_setattr,
453};
454
455static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
456{
457 struct ctl_table_header *head;
458 struct ctl_table *table;
459 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
460 proc_sys_refresh_inode(dentry->d_inode, table);
461 sysctl_head_finish(head);
462 return !!table;
463}
464
465static struct dentry_operations proc_sys_dentry_operations = {
466 .d_revalidate = proc_sys_revalidate,
467};
468
469static struct proc_dir_entry *proc_sys_root;
470
471int proc_sys_init(void)
472{
473 proc_sys_root = proc_mkdir("sys", NULL);
474 proc_sys_root->proc_iops = &proc_sys_inode_operations;
475 proc_sys_root->proc_fops = &proc_sys_file_operations;
476 proc_sys_root->nlink = 0;
477 return 0;
478}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 6ae222b509ce..5834a744c2a9 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -23,10 +23,6 @@
23 23
24struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; 24struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
25 25
26#ifdef CONFIG_SYSCTL
27struct proc_dir_entry *proc_sys_root;
28#endif
29
30static int proc_get_sb(struct file_system_type *fs_type, 26static int proc_get_sb(struct file_system_type *fs_type,
31 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 27 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
32{ 28{
@@ -71,9 +67,6 @@ void __init proc_root_init(void)
71#ifdef CONFIG_SYSVIPC 67#ifdef CONFIG_SYSVIPC
72 proc_mkdir("sysvipc", NULL); 68 proc_mkdir("sysvipc", NULL);
73#endif 69#endif
74#ifdef CONFIG_SYSCTL
75 proc_sys_root = proc_mkdir("sys", NULL);
76#endif
77 proc_root_fs = proc_mkdir("fs", NULL); 70 proc_root_fs = proc_mkdir("fs", NULL);
78 proc_root_driver = proc_mkdir("driver", NULL); 71 proc_root_driver = proc_mkdir("driver", NULL);
79 proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ 72 proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
@@ -86,6 +79,9 @@ void __init proc_root_init(void)
86 proc_device_tree_init(); 79 proc_device_tree_init();
87#endif 80#endif
88 proc_bus = proc_mkdir("bus", NULL); 81 proc_bus = proc_mkdir("bus", NULL);
82#ifdef CONFIG_SYSCTL
83 proc_sys_init();
84#endif
89} 85}
90 86
91static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat 87static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat