diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2007-02-14 03:34:12 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-14 11:10:00 -0500 |
commit | 77b14db502cb85a031fe8fde6c85d52f3e0acb63 (patch) | |
tree | 4201f6a4dfe1062d1dc00659c403d630401b87cc /fs/proc/proc_sysctl.c | |
parent | 1ff007eb8e8c7c44e9a384a67d0fdd0fd06ba811 (diff) |
[PATCH] sysctl: reimplement the sysctl proc support
With this change the sysctl inodes can be cached and nothing needs to be done
when removing a sysctl table.
For a cost of 2K code we will save about 4K of static tables (when we remove
de from ctl_table) and 70K in proc_dir_entries that we will not allocate, or
about half that on a 32bit arch.
The speed feels about the same, even though we can now cache the sysctl
dentries :(
We get the core advantage that we don't need to have a 1 to 1 mapping between
ctl table entries and proc files. Making it possible to have /proc/sys vary
depending on the namespace you are in. The currently merged namespaces don't
have an issue here but the network namespace under /proc/sys/net needs to have
different directories depending on which network adapters are visible. By
simply being a cache different directories being visible depending on who you
are is trivial to implement.
[akpm@osdl.org: fix uninitialised var]
[akpm@osdl.org: fix ARM build]
[bunk@stusta.de: make things static]
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/proc/proc_sysctl.c')
-rw-r--r-- | fs/proc/proc_sysctl.c | 478 |
1 files changed, 478 insertions, 0 deletions
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c new file mode 100644 index 000000000000..bb16a1e78826 --- /dev/null +++ b/fs/proc/proc_sysctl.c | |||
@@ -0,0 +1,478 @@ | |||
1 | /* | ||
2 | * /proc/sys support | ||
3 | */ | ||
4 | |||
5 | #include <linux/sysctl.h> | ||
6 | #include <linux/proc_fs.h> | ||
7 | #include <linux/security.h> | ||
8 | #include "internal.h" | ||
9 | |||
10 | static struct dentry_operations proc_sys_dentry_operations; | ||
11 | static const struct file_operations proc_sys_file_operations; | ||
12 | static struct inode_operations proc_sys_inode_operations; | ||
13 | |||
14 | static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table) | ||
15 | { | ||
16 | /* Refresh the cached information bits in the inode */ | ||
17 | if (table) { | ||
18 | inode->i_uid = 0; | ||
19 | inode->i_gid = 0; | ||
20 | inode->i_mode = table->mode; | ||
21 | if (table->proc_handler) { | ||
22 | inode->i_mode |= S_IFREG; | ||
23 | inode->i_nlink = 1; | ||
24 | } else { | ||
25 | inode->i_mode |= S_IFDIR; | ||
26 | inode->i_nlink = 0; /* It is too hard to figure out */ | ||
27 | } | ||
28 | } | ||
29 | } | ||
30 | |||
31 | static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table) | ||
32 | { | ||
33 | struct inode *inode; | ||
34 | struct proc_inode *dir_ei, *ei; | ||
35 | int depth; | ||
36 | |||
37 | inode = new_inode(dir->i_sb); | ||
38 | if (!inode) | ||
39 | goto out; | ||
40 | |||
41 | /* A directory is always one deeper than it's parent */ | ||
42 | dir_ei = PROC_I(dir); | ||
43 | depth = dir_ei->fd + 1; | ||
44 | |||
45 | ei = PROC_I(inode); | ||
46 | ei->fd = depth; | ||
47 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
48 | inode->i_op = &proc_sys_inode_operations; | ||
49 | inode->i_fop = &proc_sys_file_operations; | ||
50 | proc_sys_refresh_inode(inode, table); | ||
51 | out: | ||
52 | return inode; | ||
53 | } | ||
54 | |||
55 | static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth) | ||
56 | { | ||
57 | for (;;) { | ||
58 | struct proc_inode *ei; | ||
59 | |||
60 | ei = PROC_I(dentry->d_inode); | ||
61 | if (ei->fd == depth) | ||
62 | break; /* found */ | ||
63 | |||
64 | dentry = dentry->d_parent; | ||
65 | } | ||
66 | return dentry; | ||
67 | } | ||
68 | |||
69 | static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table, | ||
70 | struct qstr *name) | ||
71 | { | ||
72 | int len; | ||
73 | for ( ; table->ctl_name || table->procname; table++) { | ||
74 | |||
75 | if (!table->procname) | ||
76 | continue; | ||
77 | |||
78 | len = strlen(table->procname); | ||
79 | if (len != name->len) | ||
80 | continue; | ||
81 | |||
82 | if (memcmp(table->procname, name->name, len) != 0) | ||
83 | continue; | ||
84 | |||
85 | /* I have a match */ | ||
86 | return table; | ||
87 | } | ||
88 | return NULL; | ||
89 | } | ||
90 | |||
91 | static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry, | ||
92 | struct ctl_table *table) | ||
93 | { | ||
94 | struct dentry *ancestor; | ||
95 | struct proc_inode *ei; | ||
96 | int depth, i; | ||
97 | |||
98 | ei = PROC_I(dentry->d_inode); | ||
99 | depth = ei->fd; | ||
100 | |||
101 | if (depth == 0) | ||
102 | return table; | ||
103 | |||
104 | for (i = 1; table && (i <= depth); i++) { | ||
105 | ancestor = proc_sys_ancestor(dentry, i); | ||
106 | table = proc_sys_lookup_table_one(table, &ancestor->d_name); | ||
107 | if (table) | ||
108 | table = table->child; | ||
109 | } | ||
110 | return table; | ||
111 | |||
112 | } | ||
113 | static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent, | ||
114 | struct qstr *name, | ||
115 | struct ctl_table *table) | ||
116 | { | ||
117 | table = proc_sys_lookup_table(dparent, table); | ||
118 | if (table) | ||
119 | table = proc_sys_lookup_table_one(table, name); | ||
120 | return table; | ||
121 | } | ||
122 | |||
123 | static struct ctl_table *do_proc_sys_lookup(struct dentry *parent, | ||
124 | struct qstr *name, | ||
125 | struct ctl_table_header **ptr) | ||
126 | { | ||
127 | struct ctl_table_header *head; | ||
128 | struct ctl_table *table = NULL; | ||
129 | |||
130 | for (head = sysctl_head_next(NULL); head; | ||
131 | head = sysctl_head_next(head)) { | ||
132 | table = proc_sys_lookup_entry(parent, name, head->ctl_table); | ||
133 | if (table) | ||
134 | break; | ||
135 | } | ||
136 | *ptr = head; | ||
137 | return table; | ||
138 | } | ||
139 | |||
140 | static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, | ||
141 | struct nameidata *nd) | ||
142 | { | ||
143 | struct ctl_table_header *head; | ||
144 | struct inode *inode; | ||
145 | struct dentry *err; | ||
146 | struct ctl_table *table; | ||
147 | |||
148 | err = ERR_PTR(-ENOENT); | ||
149 | table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); | ||
150 | if (!table) | ||
151 | goto out; | ||
152 | |||
153 | err = ERR_PTR(-ENOMEM); | ||
154 | inode = proc_sys_make_inode(dir, table); | ||
155 | if (!inode) | ||
156 | goto out; | ||
157 | |||
158 | err = NULL; | ||
159 | dentry->d_op = &proc_sys_dentry_operations; | ||
160 | d_add(dentry, inode); | ||
161 | |||
162 | out: | ||
163 | sysctl_head_finish(head); | ||
164 | return err; | ||
165 | } | ||
166 | |||
167 | static ssize_t proc_sys_read(struct file *filp, char __user *buf, | ||
168 | size_t count, loff_t *ppos) | ||
169 | { | ||
170 | struct dentry *dentry = filp->f_dentry; | ||
171 | struct ctl_table_header *head; | ||
172 | struct ctl_table *table; | ||
173 | ssize_t error, res; | ||
174 | |||
175 | table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); | ||
176 | /* Has the sysctl entry disappeared on us? */ | ||
177 | error = -ENOENT; | ||
178 | if (!table) | ||
179 | goto out; | ||
180 | |||
181 | /* Has the sysctl entry been replaced by a directory? */ | ||
182 | error = -EISDIR; | ||
183 | if (!table->proc_handler) | ||
184 | goto out; | ||
185 | |||
186 | /* | ||
187 | * At this point we know that the sysctl was not unregistered | ||
188 | * and won't be until we finish. | ||
189 | */ | ||
190 | error = -EPERM; | ||
191 | if (sysctl_perm(table, MAY_READ)) | ||
192 | goto out; | ||
193 | |||
194 | /* careful: calling conventions are nasty here */ | ||
195 | res = count; | ||
196 | error = table->proc_handler(table, 0, filp, buf, &res, ppos); | ||
197 | if (!error) | ||
198 | error = res; | ||
199 | out: | ||
200 | sysctl_head_finish(head); | ||
201 | |||
202 | return error; | ||
203 | } | ||
204 | |||
205 | static ssize_t proc_sys_write(struct file *filp, const char __user *buf, | ||
206 | size_t count, loff_t *ppos) | ||
207 | { | ||
208 | struct dentry *dentry = filp->f_dentry; | ||
209 | struct ctl_table_header *head; | ||
210 | struct ctl_table *table; | ||
211 | ssize_t error, res; | ||
212 | |||
213 | table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); | ||
214 | /* Has the sysctl entry disappeared on us? */ | ||
215 | error = -ENOENT; | ||
216 | if (!table) | ||
217 | goto out; | ||
218 | |||
219 | /* Has the sysctl entry been replaced by a directory? */ | ||
220 | error = -EISDIR; | ||
221 | if (!table->proc_handler) | ||
222 | goto out; | ||
223 | |||
224 | /* | ||
225 | * At this point we know that the sysctl was not unregistered | ||
226 | * and won't be until we finish. | ||
227 | */ | ||
228 | error = -EPERM; | ||
229 | if (sysctl_perm(table, MAY_WRITE)) | ||
230 | goto out; | ||
231 | |||
232 | /* careful: calling conventions are nasty here */ | ||
233 | res = count; | ||
234 | error = table->proc_handler(table, 1, filp, (char __user *)buf, | ||
235 | &res, ppos); | ||
236 | if (!error) | ||
237 | error = res; | ||
238 | out: | ||
239 | sysctl_head_finish(head); | ||
240 | |||
241 | return error; | ||
242 | } | ||
243 | |||
244 | |||
245 | static int proc_sys_fill_cache(struct file *filp, void *dirent, | ||
246 | filldir_t filldir, struct ctl_table *table) | ||
247 | { | ||
248 | struct ctl_table_header *head; | ||
249 | struct ctl_table *child_table = NULL; | ||
250 | struct dentry *child, *dir = filp->f_path.dentry; | ||
251 | struct inode *inode; | ||
252 | struct qstr qname; | ||
253 | ino_t ino = 0; | ||
254 | unsigned type = DT_UNKNOWN; | ||
255 | int ret; | ||
256 | |||
257 | qname.name = table->procname; | ||
258 | qname.len = strlen(table->procname); | ||
259 | qname.hash = full_name_hash(qname.name, qname.len); | ||
260 | |||
261 | /* Suppress duplicates. | ||
262 | * Only fill a directory entry if it is the value that | ||
263 | * an ordinary lookup of that name returns. Hide all | ||
264 | * others. | ||
265 | * | ||
266 | * If we ever cache this translation in the dcache | ||
267 | * I should do a dcache lookup first. But for now | ||
268 | * it is just simpler not to. | ||
269 | */ | ||
270 | ret = 0; | ||
271 | child_table = do_proc_sys_lookup(dir, &qname, &head); | ||
272 | sysctl_head_finish(head); | ||
273 | if (child_table != table) | ||
274 | return 0; | ||
275 | |||
276 | child = d_lookup(dir, &qname); | ||
277 | if (!child) { | ||
278 | struct dentry *new; | ||
279 | new = d_alloc(dir, &qname); | ||
280 | if (new) { | ||
281 | inode = proc_sys_make_inode(dir->d_inode, table); | ||
282 | if (!inode) | ||
283 | child = ERR_PTR(-ENOMEM); | ||
284 | else { | ||
285 | new->d_op = &proc_sys_dentry_operations; | ||
286 | d_add(new, inode); | ||
287 | } | ||
288 | if (child) | ||
289 | dput(new); | ||
290 | else | ||
291 | child = new; | ||
292 | } | ||
293 | } | ||
294 | if (!child || IS_ERR(child) || !child->d_inode) | ||
295 | goto end_instantiate; | ||
296 | inode = child->d_inode; | ||
297 | if (inode) { | ||
298 | ino = inode->i_ino; | ||
299 | type = inode->i_mode >> 12; | ||
300 | } | ||
301 | dput(child); | ||
302 | end_instantiate: | ||
303 | if (!ino) | ||
304 | ino= find_inode_number(dir, &qname); | ||
305 | if (!ino) | ||
306 | ino = 1; | ||
307 | return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); | ||
308 | } | ||
309 | |||
310 | static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
311 | { | ||
312 | struct dentry *dentry = filp->f_dentry; | ||
313 | struct inode *inode = dentry->d_inode; | ||
314 | struct ctl_table_header *head = NULL; | ||
315 | struct ctl_table *table; | ||
316 | unsigned long pos; | ||
317 | int ret; | ||
318 | |||
319 | ret = -ENOTDIR; | ||
320 | if (!S_ISDIR(inode->i_mode)) | ||
321 | goto out; | ||
322 | |||
323 | ret = 0; | ||
324 | /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ | ||
325 | if (filp->f_pos == 0) { | ||
326 | if (filldir(dirent, ".", 1, filp->f_pos, | ||
327 | inode->i_ino, DT_DIR) < 0) | ||
328 | goto out; | ||
329 | filp->f_pos++; | ||
330 | } | ||
331 | if (filp->f_pos == 1) { | ||
332 | if (filldir(dirent, "..", 2, filp->f_pos, | ||
333 | parent_ino(dentry), DT_DIR) < 0) | ||
334 | goto out; | ||
335 | filp->f_pos++; | ||
336 | } | ||
337 | pos = 2; | ||
338 | |||
339 | /* - Find each instance of the directory | ||
340 | * - Read all entries in each instance | ||
341 | * - Before returning an entry to user space lookup the entry | ||
342 | * by name and if I find a different entry don't return | ||
343 | * this one because it means it is a buried dup. | ||
344 | * For sysctl this should only happen for directory entries. | ||
345 | */ | ||
346 | for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) { | ||
347 | table = proc_sys_lookup_table(dentry, head->ctl_table); | ||
348 | |||
349 | if (!table) | ||
350 | continue; | ||
351 | |||
352 | for (; table->ctl_name || table->procname; table++, pos++) { | ||
353 | /* Can't do anything without a proc name */ | ||
354 | if (!table->procname) | ||
355 | continue; | ||
356 | |||
357 | if (pos < filp->f_pos) | ||
358 | continue; | ||
359 | |||
360 | if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0) | ||
361 | goto out; | ||
362 | filp->f_pos = pos + 1; | ||
363 | } | ||
364 | } | ||
365 | ret = 1; | ||
366 | out: | ||
367 | sysctl_head_finish(head); | ||
368 | return ret; | ||
369 | } | ||
370 | |||
371 | static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd) | ||
372 | { | ||
373 | /* | ||
374 | * sysctl entries that are not writeable, | ||
375 | * are _NOT_ writeable, capabilities or not. | ||
376 | */ | ||
377 | struct ctl_table_header *head; | ||
378 | struct ctl_table *table; | ||
379 | struct dentry *dentry; | ||
380 | int mode; | ||
381 | int depth; | ||
382 | int error; | ||
383 | |||
384 | head = NULL; | ||
385 | depth = PROC_I(inode)->fd; | ||
386 | |||
387 | /* First check the cached permissions, in case we don't have | ||
388 | * enough information to lookup the sysctl table entry. | ||
389 | */ | ||
390 | error = -EACCES; | ||
391 | mode = inode->i_mode; | ||
392 | |||
393 | if (current->euid == 0) | ||
394 | mode >>= 6; | ||
395 | else if (in_group_p(0)) | ||
396 | mode >>= 3; | ||
397 | |||
398 | if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask) | ||
399 | error = 0; | ||
400 | |||
401 | /* If we can't get a sysctl table entry the permission | ||
402 | * checks on the cached mode will have to be enough. | ||
403 | */ | ||
404 | if (!nd || !depth) | ||
405 | goto out; | ||
406 | |||
407 | dentry = nd->dentry; | ||
408 | table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); | ||
409 | |||
410 | /* If the entry does not exist deny permission */ | ||
411 | error = -EACCES; | ||
412 | if (!table) | ||
413 | goto out; | ||
414 | |||
415 | /* Use the permissions on the sysctl table entry */ | ||
416 | error = sysctl_perm(table, mask); | ||
417 | out: | ||
418 | sysctl_head_finish(head); | ||
419 | return error; | ||
420 | } | ||
421 | |||
422 | static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) | ||
423 | { | ||
424 | struct inode *inode = dentry->d_inode; | ||
425 | int error; | ||
426 | |||
427 | if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) | ||
428 | return -EPERM; | ||
429 | |||
430 | error = inode_change_ok(inode, attr); | ||
431 | if (!error) { | ||
432 | error = security_inode_setattr(dentry, attr); | ||
433 | if (!error) | ||
434 | error = inode_setattr(inode, attr); | ||
435 | } | ||
436 | |||
437 | return error; | ||
438 | } | ||
439 | |||
440 | /* I'm lazy and don't distinguish between files and directories, | ||
441 | * until access time. | ||
442 | */ | ||
443 | static const struct file_operations proc_sys_file_operations = { | ||
444 | .read = proc_sys_read, | ||
445 | .write = proc_sys_write, | ||
446 | .readdir = proc_sys_readdir, | ||
447 | }; | ||
448 | |||
449 | static struct inode_operations proc_sys_inode_operations = { | ||
450 | .lookup = proc_sys_lookup, | ||
451 | .permission = proc_sys_permission, | ||
452 | .setattr = proc_sys_setattr, | ||
453 | }; | ||
454 | |||
455 | static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) | ||
456 | { | ||
457 | struct ctl_table_header *head; | ||
458 | struct ctl_table *table; | ||
459 | table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); | ||
460 | proc_sys_refresh_inode(dentry->d_inode, table); | ||
461 | sysctl_head_finish(head); | ||
462 | return !!table; | ||
463 | } | ||
464 | |||
465 | static struct dentry_operations proc_sys_dentry_operations = { | ||
466 | .d_revalidate = proc_sys_revalidate, | ||
467 | }; | ||
468 | |||
469 | static struct proc_dir_entry *proc_sys_root; | ||
470 | |||
471 | int proc_sys_init(void) | ||
472 | { | ||
473 | proc_sys_root = proc_mkdir("sys", NULL); | ||
474 | proc_sys_root->proc_iops = &proc_sys_inode_operations; | ||
475 | proc_sys_root->proc_fops = &proc_sys_file_operations; | ||
476 | proc_sys_root->nlink = 0; | ||
477 | return 0; | ||
478 | } | ||