diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2007-02-14 03:34:12 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-14 11:10:00 -0500 |
commit | 77b14db502cb85a031fe8fde6c85d52f3e0acb63 (patch) | |
tree | 4201f6a4dfe1062d1dc00659c403d630401b87cc | |
parent | 1ff007eb8e8c7c44e9a384a67d0fdd0fd06ba811 (diff) |
[PATCH] sysctl: reimplement the sysctl proc support
With this change the sysctl inodes can be cached and nothing needs to be done
when removing a sysctl table.
For a cost of 2K code we will save about 4K of static tables (when we remove
de from ctl_table) and 70K in proc_dir_entries that we will not allocate, or
about half that on a 32bit arch.
The speed feels about the same, even though we can now cache the sysctl
dentries :(
We get the core advantage that we don't need to have a 1 to 1 mapping between
ctl table entries and proc files. Making it possible to have /proc/sys vary
depending on the namespace you are in. The currently merged namespaces don't
have an issue here but the network namespace under /proc/sys/net needs to have
different directories depending on which network adapters are visible. By
simply being a cache different directories being visible depending on who you
are is trivial to implement.
[akpm@osdl.org: fix uninitialised var]
[akpm@osdl.org: fix ARM build]
[bunk@stusta.de: make things static]
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/proc/Makefile | 2 | ||||
-rw-r--r-- | fs/proc/generic.c | 2 | ||||
-rw-r--r-- | fs/proc/inode.c | 1 | ||||
-rw-r--r-- | fs/proc/internal.h | 2 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 478 | ||||
-rw-r--r-- | fs/proc/root.c | 10 | ||||
-rw-r--r-- | include/linux/proc_fs.h | 2 | ||||
-rw-r--r-- | include/linux/sysctl.h | 2 | ||||
-rw-r--r-- | init/main.c | 4 | ||||
-rw-r--r-- | kernel/sysctl.c | 182 |
10 files changed, 486 insertions, 199 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index f6c776272572..a6b3a8f878f0 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile | |||
@@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o | |||
8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o | 8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o |
9 | 9 | ||
10 | proc-y += inode.o root.o base.o generic.o array.o \ | 10 | proc-y += inode.o root.o base.o generic.o array.o \ |
11 | proc_tty.o proc_misc.o | 11 | proc_tty.o proc_misc.o proc_sysctl.o |
12 | 12 | ||
13 | proc-$(CONFIG_PROC_KCORE) += kcore.o | 13 | proc-$(CONFIG_PROC_KCORE) += kcore.o |
14 | proc-$(CONFIG_PROC_VMCORE) += vmcore.o | 14 | proc-$(CONFIG_PROC_VMCORE) += vmcore.o |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 0cdc00d9d97e..775fb21294d8 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -32,7 +32,7 @@ static loff_t proc_file_lseek(struct file *, loff_t, int); | |||
32 | 32 | ||
33 | DEFINE_SPINLOCK(proc_subdir_lock); | 33 | DEFINE_SPINLOCK(proc_subdir_lock); |
34 | 34 | ||
35 | int proc_match(int len, const char *name, struct proc_dir_entry *de) | 35 | static int proc_match(int len, const char *name, struct proc_dir_entry *de) |
36 | { | 36 | { |
37 | if (de->namelen != len) | 37 | if (de->namelen != len) |
38 | return 0; | 38 | return 0; |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index f6722be37dde..c372eb151a3a 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -161,6 +161,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, | |||
161 | if (!inode) | 161 | if (!inode) |
162 | goto out_ino; | 162 | goto out_ino; |
163 | 163 | ||
164 | PROC_I(inode)->fd = 0; | ||
164 | PROC_I(inode)->pde = de; | 165 | PROC_I(inode)->pde = de; |
165 | if (de) { | 166 | if (de) { |
166 | if (de->mode) { | 167 | if (de->mode) { |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 277dcd66ebe2..c932aa65e198 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -11,6 +11,8 @@ | |||
11 | 11 | ||
12 | #include <linux/proc_fs.h> | 12 | #include <linux/proc_fs.h> |
13 | 13 | ||
14 | extern int proc_sys_init(void); | ||
15 | |||
14 | struct vmalloc_info { | 16 | struct vmalloc_info { |
15 | unsigned long used; | 17 | unsigned long used; |
16 | unsigned long largest_chunk; | 18 | unsigned long largest_chunk; |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c new file mode 100644 index 000000000000..bb16a1e78826 --- /dev/null +++ b/fs/proc/proc_sysctl.c | |||
@@ -0,0 +1,478 @@ | |||
1 | /* | ||
2 | * /proc/sys support | ||
3 | */ | ||
4 | |||
5 | #include <linux/sysctl.h> | ||
6 | #include <linux/proc_fs.h> | ||
7 | #include <linux/security.h> | ||
8 | #include "internal.h" | ||
9 | |||
10 | static struct dentry_operations proc_sys_dentry_operations; | ||
11 | static const struct file_operations proc_sys_file_operations; | ||
12 | static struct inode_operations proc_sys_inode_operations; | ||
13 | |||
14 | static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table) | ||
15 | { | ||
16 | /* Refresh the cached information bits in the inode */ | ||
17 | if (table) { | ||
18 | inode->i_uid = 0; | ||
19 | inode->i_gid = 0; | ||
20 | inode->i_mode = table->mode; | ||
21 | if (table->proc_handler) { | ||
22 | inode->i_mode |= S_IFREG; | ||
23 | inode->i_nlink = 1; | ||
24 | } else { | ||
25 | inode->i_mode |= S_IFDIR; | ||
26 | inode->i_nlink = 0; /* It is too hard to figure out */ | ||
27 | } | ||
28 | } | ||
29 | } | ||
30 | |||
31 | static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table) | ||
32 | { | ||
33 | struct inode *inode; | ||
34 | struct proc_inode *dir_ei, *ei; | ||
35 | int depth; | ||
36 | |||
37 | inode = new_inode(dir->i_sb); | ||
38 | if (!inode) | ||
39 | goto out; | ||
40 | |||
41 | /* A directory is always one deeper than it's parent */ | ||
42 | dir_ei = PROC_I(dir); | ||
43 | depth = dir_ei->fd + 1; | ||
44 | |||
45 | ei = PROC_I(inode); | ||
46 | ei->fd = depth; | ||
47 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
48 | inode->i_op = &proc_sys_inode_operations; | ||
49 | inode->i_fop = &proc_sys_file_operations; | ||
50 | proc_sys_refresh_inode(inode, table); | ||
51 | out: | ||
52 | return inode; | ||
53 | } | ||
54 | |||
55 | static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth) | ||
56 | { | ||
57 | for (;;) { | ||
58 | struct proc_inode *ei; | ||
59 | |||
60 | ei = PROC_I(dentry->d_inode); | ||
61 | if (ei->fd == depth) | ||
62 | break; /* found */ | ||
63 | |||
64 | dentry = dentry->d_parent; | ||
65 | } | ||
66 | return dentry; | ||
67 | } | ||
68 | |||
69 | static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table, | ||
70 | struct qstr *name) | ||
71 | { | ||
72 | int len; | ||
73 | for ( ; table->ctl_name || table->procname; table++) { | ||
74 | |||
75 | if (!table->procname) | ||
76 | continue; | ||
77 | |||
78 | len = strlen(table->procname); | ||
79 | if (len != name->len) | ||
80 | continue; | ||
81 | |||
82 | if (memcmp(table->procname, name->name, len) != 0) | ||
83 | continue; | ||
84 | |||
85 | /* I have a match */ | ||
86 | return table; | ||
87 | } | ||
88 | return NULL; | ||
89 | } | ||
90 | |||
91 | static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry, | ||
92 | struct ctl_table *table) | ||
93 | { | ||
94 | struct dentry *ancestor; | ||
95 | struct proc_inode *ei; | ||
96 | int depth, i; | ||
97 | |||
98 | ei = PROC_I(dentry->d_inode); | ||
99 | depth = ei->fd; | ||
100 | |||
101 | if (depth == 0) | ||
102 | return table; | ||
103 | |||
104 | for (i = 1; table && (i <= depth); i++) { | ||
105 | ancestor = proc_sys_ancestor(dentry, i); | ||
106 | table = proc_sys_lookup_table_one(table, &ancestor->d_name); | ||
107 | if (table) | ||
108 | table = table->child; | ||
109 | } | ||
110 | return table; | ||
111 | |||
112 | } | ||
113 | static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent, | ||
114 | struct qstr *name, | ||
115 | struct ctl_table *table) | ||
116 | { | ||
117 | table = proc_sys_lookup_table(dparent, table); | ||
118 | if (table) | ||
119 | table = proc_sys_lookup_table_one(table, name); | ||
120 | return table; | ||
121 | } | ||
122 | |||
123 | static struct ctl_table *do_proc_sys_lookup(struct dentry *parent, | ||
124 | struct qstr *name, | ||
125 | struct ctl_table_header **ptr) | ||
126 | { | ||
127 | struct ctl_table_header *head; | ||
128 | struct ctl_table *table = NULL; | ||
129 | |||
130 | for (head = sysctl_head_next(NULL); head; | ||
131 | head = sysctl_head_next(head)) { | ||
132 | table = proc_sys_lookup_entry(parent, name, head->ctl_table); | ||
133 | if (table) | ||
134 | break; | ||
135 | } | ||
136 | *ptr = head; | ||
137 | return table; | ||
138 | } | ||
139 | |||
140 | static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, | ||
141 | struct nameidata *nd) | ||
142 | { | ||
143 | struct ctl_table_header *head; | ||
144 | struct inode *inode; | ||
145 | struct dentry *err; | ||
146 | struct ctl_table *table; | ||
147 | |||
148 | err = ERR_PTR(-ENOENT); | ||
149 | table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); | ||
150 | if (!table) | ||
151 | goto out; | ||
152 | |||
153 | err = ERR_PTR(-ENOMEM); | ||
154 | inode = proc_sys_make_inode(dir, table); | ||
155 | if (!inode) | ||
156 | goto out; | ||
157 | |||
158 | err = NULL; | ||
159 | dentry->d_op = &proc_sys_dentry_operations; | ||
160 | d_add(dentry, inode); | ||
161 | |||
162 | out: | ||
163 | sysctl_head_finish(head); | ||
164 | return err; | ||
165 | } | ||
166 | |||
167 | static ssize_t proc_sys_read(struct file *filp, char __user *buf, | ||
168 | size_t count, loff_t *ppos) | ||
169 | { | ||
170 | struct dentry *dentry = filp->f_dentry; | ||
171 | struct ctl_table_header *head; | ||
172 | struct ctl_table *table; | ||
173 | ssize_t error, res; | ||
174 | |||
175 | table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); | ||
176 | /* Has the sysctl entry disappeared on us? */ | ||
177 | error = -ENOENT; | ||
178 | if (!table) | ||
179 | goto out; | ||
180 | |||
181 | /* Has the sysctl entry been replaced by a directory? */ | ||
182 | error = -EISDIR; | ||
183 | if (!table->proc_handler) | ||
184 | goto out; | ||
185 | |||
186 | /* | ||
187 | * At this point we know that the sysctl was not unregistered | ||
188 | * and won't be until we finish. | ||
189 | */ | ||
190 | error = -EPERM; | ||
191 | if (sysctl_perm(table, MAY_READ)) | ||
192 | goto out; | ||
193 | |||
194 | /* careful: calling conventions are nasty here */ | ||
195 | res = count; | ||
196 | error = table->proc_handler(table, 0, filp, buf, &res, ppos); | ||
197 | if (!error) | ||
198 | error = res; | ||
199 | out: | ||
200 | sysctl_head_finish(head); | ||
201 | |||
202 | return error; | ||
203 | } | ||
204 | |||
205 | static ssize_t proc_sys_write(struct file *filp, const char __user *buf, | ||
206 | size_t count, loff_t *ppos) | ||
207 | { | ||
208 | struct dentry *dentry = filp->f_dentry; | ||
209 | struct ctl_table_header *head; | ||
210 | struct ctl_table *table; | ||
211 | ssize_t error, res; | ||
212 | |||
213 | table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); | ||
214 | /* Has the sysctl entry disappeared on us? */ | ||
215 | error = -ENOENT; | ||
216 | if (!table) | ||
217 | goto out; | ||
218 | |||
219 | /* Has the sysctl entry been replaced by a directory? */ | ||
220 | error = -EISDIR; | ||
221 | if (!table->proc_handler) | ||
222 | goto out; | ||
223 | |||
224 | /* | ||
225 | * At this point we know that the sysctl was not unregistered | ||
226 | * and won't be until we finish. | ||
227 | */ | ||
228 | error = -EPERM; | ||
229 | if (sysctl_perm(table, MAY_WRITE)) | ||
230 | goto out; | ||
231 | |||
232 | /* careful: calling conventions are nasty here */ | ||
233 | res = count; | ||
234 | error = table->proc_handler(table, 1, filp, (char __user *)buf, | ||
235 | &res, ppos); | ||
236 | if (!error) | ||
237 | error = res; | ||
238 | out: | ||
239 | sysctl_head_finish(head); | ||
240 | |||
241 | return error; | ||
242 | } | ||
243 | |||
244 | |||
245 | static int proc_sys_fill_cache(struct file *filp, void *dirent, | ||
246 | filldir_t filldir, struct ctl_table *table) | ||
247 | { | ||
248 | struct ctl_table_header *head; | ||
249 | struct ctl_table *child_table = NULL; | ||
250 | struct dentry *child, *dir = filp->f_path.dentry; | ||
251 | struct inode *inode; | ||
252 | struct qstr qname; | ||
253 | ino_t ino = 0; | ||
254 | unsigned type = DT_UNKNOWN; | ||
255 | int ret; | ||
256 | |||
257 | qname.name = table->procname; | ||
258 | qname.len = strlen(table->procname); | ||
259 | qname.hash = full_name_hash(qname.name, qname.len); | ||
260 | |||
261 | /* Suppress duplicates. | ||
262 | * Only fill a directory entry if it is the value that | ||
263 | * an ordinary lookup of that name returns. Hide all | ||
264 | * others. | ||
265 | * | ||
266 | * If we ever cache this translation in the dcache | ||
267 | * I should do a dcache lookup first. But for now | ||
268 | * it is just simpler not to. | ||
269 | */ | ||
270 | ret = 0; | ||
271 | child_table = do_proc_sys_lookup(dir, &qname, &head); | ||
272 | sysctl_head_finish(head); | ||
273 | if (child_table != table) | ||
274 | return 0; | ||
275 | |||
276 | child = d_lookup(dir, &qname); | ||
277 | if (!child) { | ||
278 | struct dentry *new; | ||
279 | new = d_alloc(dir, &qname); | ||
280 | if (new) { | ||
281 | inode = proc_sys_make_inode(dir->d_inode, table); | ||
282 | if (!inode) | ||
283 | child = ERR_PTR(-ENOMEM); | ||
284 | else { | ||
285 | new->d_op = &proc_sys_dentry_operations; | ||
286 | d_add(new, inode); | ||
287 | } | ||
288 | if (child) | ||
289 | dput(new); | ||
290 | else | ||
291 | child = new; | ||
292 | } | ||
293 | } | ||
294 | if (!child || IS_ERR(child) || !child->d_inode) | ||
295 | goto end_instantiate; | ||
296 | inode = child->d_inode; | ||
297 | if (inode) { | ||
298 | ino = inode->i_ino; | ||
299 | type = inode->i_mode >> 12; | ||
300 | } | ||
301 | dput(child); | ||
302 | end_instantiate: | ||
303 | if (!ino) | ||
304 | ino= find_inode_number(dir, &qname); | ||
305 | if (!ino) | ||
306 | ino = 1; | ||
307 | return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); | ||
308 | } | ||
309 | |||
310 | static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
311 | { | ||
312 | struct dentry *dentry = filp->f_dentry; | ||
313 | struct inode *inode = dentry->d_inode; | ||
314 | struct ctl_table_header *head = NULL; | ||
315 | struct ctl_table *table; | ||
316 | unsigned long pos; | ||
317 | int ret; | ||
318 | |||
319 | ret = -ENOTDIR; | ||
320 | if (!S_ISDIR(inode->i_mode)) | ||
321 | goto out; | ||
322 | |||
323 | ret = 0; | ||
324 | /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ | ||
325 | if (filp->f_pos == 0) { | ||
326 | if (filldir(dirent, ".", 1, filp->f_pos, | ||
327 | inode->i_ino, DT_DIR) < 0) | ||
328 | goto out; | ||
329 | filp->f_pos++; | ||
330 | } | ||
331 | if (filp->f_pos == 1) { | ||
332 | if (filldir(dirent, "..", 2, filp->f_pos, | ||
333 | parent_ino(dentry), DT_DIR) < 0) | ||
334 | goto out; | ||
335 | filp->f_pos++; | ||
336 | } | ||
337 | pos = 2; | ||
338 | |||
339 | /* - Find each instance of the directory | ||
340 | * - Read all entries in each instance | ||
341 | * - Before returning an entry to user space lookup the entry | ||
342 | * by name and if I find a different entry don't return | ||
343 | * this one because it means it is a buried dup. | ||
344 | * For sysctl this should only happen for directory entries. | ||
345 | */ | ||
346 | for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) { | ||
347 | table = proc_sys_lookup_table(dentry, head->ctl_table); | ||
348 | |||
349 | if (!table) | ||
350 | continue; | ||
351 | |||
352 | for (; table->ctl_name || table->procname; table++, pos++) { | ||
353 | /* Can't do anything without a proc name */ | ||
354 | if (!table->procname) | ||
355 | continue; | ||
356 | |||
357 | if (pos < filp->f_pos) | ||
358 | continue; | ||
359 | |||
360 | if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0) | ||
361 | goto out; | ||
362 | filp->f_pos = pos + 1; | ||
363 | } | ||
364 | } | ||
365 | ret = 1; | ||
366 | out: | ||
367 | sysctl_head_finish(head); | ||
368 | return ret; | ||
369 | } | ||
370 | |||
371 | static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd) | ||
372 | { | ||
373 | /* | ||
374 | * sysctl entries that are not writeable, | ||
375 | * are _NOT_ writeable, capabilities or not. | ||
376 | */ | ||
377 | struct ctl_table_header *head; | ||
378 | struct ctl_table *table; | ||
379 | struct dentry *dentry; | ||
380 | int mode; | ||
381 | int depth; | ||
382 | int error; | ||
383 | |||
384 | head = NULL; | ||
385 | depth = PROC_I(inode)->fd; | ||
386 | |||
387 | /* First check the cached permissions, in case we don't have | ||
388 | * enough information to lookup the sysctl table entry. | ||
389 | */ | ||
390 | error = -EACCES; | ||
391 | mode = inode->i_mode; | ||
392 | |||
393 | if (current->euid == 0) | ||
394 | mode >>= 6; | ||
395 | else if (in_group_p(0)) | ||
396 | mode >>= 3; | ||
397 | |||
398 | if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask) | ||
399 | error = 0; | ||
400 | |||
401 | /* If we can't get a sysctl table entry the permission | ||
402 | * checks on the cached mode will have to be enough. | ||
403 | */ | ||
404 | if (!nd || !depth) | ||
405 | goto out; | ||
406 | |||
407 | dentry = nd->dentry; | ||
408 | table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); | ||
409 | |||
410 | /* If the entry does not exist deny permission */ | ||
411 | error = -EACCES; | ||
412 | if (!table) | ||
413 | goto out; | ||
414 | |||
415 | /* Use the permissions on the sysctl table entry */ | ||
416 | error = sysctl_perm(table, mask); | ||
417 | out: | ||
418 | sysctl_head_finish(head); | ||
419 | return error; | ||
420 | } | ||
421 | |||
422 | static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) | ||
423 | { | ||
424 | struct inode *inode = dentry->d_inode; | ||
425 | int error; | ||
426 | |||
427 | if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) | ||
428 | return -EPERM; | ||
429 | |||
430 | error = inode_change_ok(inode, attr); | ||
431 | if (!error) { | ||
432 | error = security_inode_setattr(dentry, attr); | ||
433 | if (!error) | ||
434 | error = inode_setattr(inode, attr); | ||
435 | } | ||
436 | |||
437 | return error; | ||
438 | } | ||
439 | |||
440 | /* I'm lazy and don't distinguish between files and directories, | ||
441 | * until access time. | ||
442 | */ | ||
443 | static const struct file_operations proc_sys_file_operations = { | ||
444 | .read = proc_sys_read, | ||
445 | .write = proc_sys_write, | ||
446 | .readdir = proc_sys_readdir, | ||
447 | }; | ||
448 | |||
449 | static struct inode_operations proc_sys_inode_operations = { | ||
450 | .lookup = proc_sys_lookup, | ||
451 | .permission = proc_sys_permission, | ||
452 | .setattr = proc_sys_setattr, | ||
453 | }; | ||
454 | |||
455 | static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) | ||
456 | { | ||
457 | struct ctl_table_header *head; | ||
458 | struct ctl_table *table; | ||
459 | table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); | ||
460 | proc_sys_refresh_inode(dentry->d_inode, table); | ||
461 | sysctl_head_finish(head); | ||
462 | return !!table; | ||
463 | } | ||
464 | |||
465 | static struct dentry_operations proc_sys_dentry_operations = { | ||
466 | .d_revalidate = proc_sys_revalidate, | ||
467 | }; | ||
468 | |||
469 | static struct proc_dir_entry *proc_sys_root; | ||
470 | |||
471 | int proc_sys_init(void) | ||
472 | { | ||
473 | proc_sys_root = proc_mkdir("sys", NULL); | ||
474 | proc_sys_root->proc_iops = &proc_sys_inode_operations; | ||
475 | proc_sys_root->proc_fops = &proc_sys_file_operations; | ||
476 | proc_sys_root->nlink = 0; | ||
477 | return 0; | ||
478 | } | ||
diff --git a/fs/proc/root.c b/fs/proc/root.c index 6ae222b509ce..5834a744c2a9 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -23,10 +23,6 @@ | |||
23 | 23 | ||
24 | struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; | 24 | struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; |
25 | 25 | ||
26 | #ifdef CONFIG_SYSCTL | ||
27 | struct proc_dir_entry *proc_sys_root; | ||
28 | #endif | ||
29 | |||
30 | static int proc_get_sb(struct file_system_type *fs_type, | 26 | static int proc_get_sb(struct file_system_type *fs_type, |
31 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 27 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) |
32 | { | 28 | { |
@@ -71,9 +67,6 @@ void __init proc_root_init(void) | |||
71 | #ifdef CONFIG_SYSVIPC | 67 | #ifdef CONFIG_SYSVIPC |
72 | proc_mkdir("sysvipc", NULL); | 68 | proc_mkdir("sysvipc", NULL); |
73 | #endif | 69 | #endif |
74 | #ifdef CONFIG_SYSCTL | ||
75 | proc_sys_root = proc_mkdir("sys", NULL); | ||
76 | #endif | ||
77 | proc_root_fs = proc_mkdir("fs", NULL); | 70 | proc_root_fs = proc_mkdir("fs", NULL); |
78 | proc_root_driver = proc_mkdir("driver", NULL); | 71 | proc_root_driver = proc_mkdir("driver", NULL); |
79 | proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ | 72 | proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ |
@@ -86,6 +79,9 @@ void __init proc_root_init(void) | |||
86 | proc_device_tree_init(); | 79 | proc_device_tree_init(); |
87 | #endif | 80 | #endif |
88 | proc_bus = proc_mkdir("bus", NULL); | 81 | proc_bus = proc_mkdir("bus", NULL); |
82 | #ifdef CONFIG_SYSCTL | ||
83 | proc_sys_init(); | ||
84 | #endif | ||
89 | } | 85 | } |
90 | 86 | ||
91 | static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat | 87 | static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat |
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 2e132473cbe5..be4652a0545a 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h | |||
@@ -113,8 +113,6 @@ extern struct vfsmount *proc_mnt; | |||
113 | extern int proc_fill_super(struct super_block *,void *,int); | 113 | extern int proc_fill_super(struct super_block *,void *,int); |
114 | extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); | 114 | extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); |
115 | 115 | ||
116 | extern int proc_match(int, const char *,struct proc_dir_entry *); | ||
117 | |||
118 | /* | 116 | /* |
119 | * These are generic /proc routines that use the internal | 117 | * These are generic /proc routines that use the internal |
120 | * "struct proc_dir_entry" tree to traverse the filesystem. | 118 | * "struct proc_dir_entry" tree to traverse the filesystem. |
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 9698ac30f553..89150494bd10 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
@@ -930,8 +930,6 @@ extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); | |||
930 | extern void sysctl_head_finish(struct ctl_table_header *prev); | 930 | extern void sysctl_head_finish(struct ctl_table_header *prev); |
931 | extern int sysctl_perm(struct ctl_table *table, int op); | 931 | extern int sysctl_perm(struct ctl_table *table, int op); |
932 | 932 | ||
933 | extern void sysctl_init(void); | ||
934 | |||
935 | typedef struct ctl_table ctl_table; | 933 | typedef struct ctl_table ctl_table; |
936 | 934 | ||
937 | typedef int ctl_handler (ctl_table *table, int __user *name, int nlen, | 935 | typedef int ctl_handler (ctl_table *table, int __user *name, int nlen, |
diff --git a/init/main.c b/init/main.c index a20a5138211f..649ab5443d43 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -86,7 +86,6 @@ extern void init_IRQ(void); | |||
86 | extern void fork_init(unsigned long); | 86 | extern void fork_init(unsigned long); |
87 | extern void mca_init(void); | 87 | extern void mca_init(void); |
88 | extern void sbus_init(void); | 88 | extern void sbus_init(void); |
89 | extern void sysctl_init(void); | ||
90 | extern void signals_init(void); | 89 | extern void signals_init(void); |
91 | extern void pidhash_init(void); | 90 | extern void pidhash_init(void); |
92 | extern void pidmap_init(void); | 91 | extern void pidmap_init(void); |
@@ -702,9 +701,6 @@ static void __init do_basic_setup(void) | |||
702 | usermodehelper_init(); | 701 | usermodehelper_init(); |
703 | driver_init(); | 702 | driver_init(); |
704 | 703 | ||
705 | #ifdef CONFIG_SYSCTL | ||
706 | sysctl_init(); | ||
707 | #endif | ||
708 | #ifdef CONFIG_PROC_FS | 704 | #ifdef CONFIG_PROC_FS |
709 | init_irq_proc(); | 705 | init_irq_proc(); |
710 | #endif | 706 | #endif |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6bbac5ce75ed..b3ee791ad663 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -159,26 +159,6 @@ int sysctl_legacy_va_layout; | |||
159 | #endif | 159 | #endif |
160 | 160 | ||
161 | 161 | ||
162 | /* /proc declarations: */ | ||
163 | |||
164 | #ifdef CONFIG_PROC_SYSCTL | ||
165 | |||
166 | static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *); | ||
167 | static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *); | ||
168 | static int proc_opensys(struct inode *, struct file *); | ||
169 | |||
170 | const struct file_operations proc_sys_file_operations = { | ||
171 | .open = proc_opensys, | ||
172 | .read = proc_readsys, | ||
173 | .write = proc_writesys, | ||
174 | }; | ||
175 | |||
176 | extern struct proc_dir_entry *proc_sys_root; | ||
177 | |||
178 | static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *); | ||
179 | static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); | ||
180 | #endif | ||
181 | |||
182 | /* The default sysctl tables: */ | 162 | /* The default sysctl tables: */ |
183 | 163 | ||
184 | static ctl_table root_table[] = { | 164 | static ctl_table root_table[] = { |
@@ -1106,13 +1086,6 @@ struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) | |||
1106 | return NULL; | 1086 | return NULL; |
1107 | } | 1087 | } |
1108 | 1088 | ||
1109 | void __init sysctl_init(void) | ||
1110 | { | ||
1111 | #ifdef CONFIG_PROC_SYSCTL | ||
1112 | register_proc_table(root_table, proc_sys_root, &root_table_header); | ||
1113 | #endif | ||
1114 | } | ||
1115 | |||
1116 | #ifdef CONFIG_SYSCTL_SYSCALL | 1089 | #ifdef CONFIG_SYSCTL_SYSCALL |
1117 | int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, | 1090 | int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, |
1118 | void __user *newval, size_t newlen) | 1091 | void __user *newval, size_t newlen) |
@@ -1348,9 +1321,6 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table) | |||
1348 | spin_lock(&sysctl_lock); | 1321 | spin_lock(&sysctl_lock); |
1349 | list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); | 1322 | list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); |
1350 | spin_unlock(&sysctl_lock); | 1323 | spin_unlock(&sysctl_lock); |
1351 | #ifdef CONFIG_PROC_SYSCTL | ||
1352 | register_proc_table(table, proc_sys_root, tmp); | ||
1353 | #endif | ||
1354 | return tmp; | 1324 | return tmp; |
1355 | } | 1325 | } |
1356 | 1326 | ||
@@ -1366,9 +1336,6 @@ void unregister_sysctl_table(struct ctl_table_header * header) | |||
1366 | might_sleep(); | 1336 | might_sleep(); |
1367 | spin_lock(&sysctl_lock); | 1337 | spin_lock(&sysctl_lock); |
1368 | start_unregistering(header); | 1338 | start_unregistering(header); |
1369 | #ifdef CONFIG_PROC_SYSCTL | ||
1370 | unregister_proc_table(header->ctl_table, proc_sys_root); | ||
1371 | #endif | ||
1372 | spin_unlock(&sysctl_lock); | 1339 | spin_unlock(&sysctl_lock); |
1373 | kfree(header); | 1340 | kfree(header); |
1374 | } | 1341 | } |
@@ -1392,155 +1359,6 @@ void unregister_sysctl_table(struct ctl_table_header * table) | |||
1392 | 1359 | ||
1393 | #ifdef CONFIG_PROC_SYSCTL | 1360 | #ifdef CONFIG_PROC_SYSCTL |
1394 | 1361 | ||
1395 | /* Scan the sysctl entries in table and add them all into /proc */ | ||
1396 | static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set) | ||
1397 | { | ||
1398 | struct proc_dir_entry *de; | ||
1399 | int len; | ||
1400 | mode_t mode; | ||
1401 | |||
1402 | for (; table->ctl_name || table->procname; table++) { | ||
1403 | /* Can't do anything without a proc name. */ | ||
1404 | if (!table->procname) | ||
1405 | continue; | ||
1406 | /* Maybe we can't do anything with it... */ | ||
1407 | if (!table->proc_handler && !table->child) { | ||
1408 | printk(KERN_WARNING "SYSCTL: Can't register %s\n", | ||
1409 | table->procname); | ||
1410 | continue; | ||
1411 | } | ||
1412 | |||
1413 | len = strlen(table->procname); | ||
1414 | mode = table->mode; | ||
1415 | |||
1416 | de = NULL; | ||
1417 | if (table->proc_handler) | ||
1418 | mode |= S_IFREG; | ||
1419 | else { | ||
1420 | mode |= S_IFDIR; | ||
1421 | for (de = root->subdir; de; de = de->next) { | ||
1422 | if (proc_match(len, table->procname, de)) | ||
1423 | break; | ||
1424 | } | ||
1425 | /* If the subdir exists already, de is non-NULL */ | ||
1426 | } | ||
1427 | |||
1428 | if (!de) { | ||
1429 | de = create_proc_entry(table->procname, mode, root); | ||
1430 | if (!de) | ||
1431 | continue; | ||
1432 | de->set = set; | ||
1433 | de->data = (void *) table; | ||
1434 | if (table->proc_handler) | ||
1435 | de->proc_fops = &proc_sys_file_operations; | ||
1436 | } | ||
1437 | table->de = de; | ||
1438 | if (de->mode & S_IFDIR) | ||
1439 | register_proc_table(table->child, de, set); | ||
1440 | } | ||
1441 | } | ||
1442 | |||
1443 | /* | ||
1444 | * Unregister a /proc sysctl table and any subdirectories. | ||
1445 | */ | ||
1446 | static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root) | ||
1447 | { | ||
1448 | struct proc_dir_entry *de; | ||
1449 | for (; table->ctl_name || table->procname; table++) { | ||
1450 | if (!(de = table->de)) | ||
1451 | continue; | ||
1452 | if (de->mode & S_IFDIR) { | ||
1453 | if (!table->child) { | ||
1454 | printk (KERN_ALERT "Help - malformed sysctl tree on free\n"); | ||
1455 | continue; | ||
1456 | } | ||
1457 | unregister_proc_table(table->child, de); | ||
1458 | |||
1459 | /* Don't unregister directories which still have entries.. */ | ||
1460 | if (de->subdir) | ||
1461 | continue; | ||
1462 | } | ||
1463 | |||
1464 | /* | ||
1465 | * In any case, mark the entry as goner; we'll keep it | ||
1466 | * around if it's busy, but we'll know to do nothing with | ||
1467 | * its fields. We are under sysctl_lock here. | ||
1468 | */ | ||
1469 | de->data = NULL; | ||
1470 | |||
1471 | /* Don't unregister proc entries that are still being used.. */ | ||
1472 | if (atomic_read(&de->count)) | ||
1473 | continue; | ||
1474 | |||
1475 | table->de = NULL; | ||
1476 | remove_proc_entry(table->procname, root); | ||
1477 | } | ||
1478 | } | ||
1479 | |||
1480 | static ssize_t do_rw_proc(int write, struct file * file, char __user * buf, | ||
1481 | size_t count, loff_t *ppos) | ||
1482 | { | ||
1483 | int op; | ||
1484 | struct proc_dir_entry *de = PDE(file->f_path.dentry->d_inode); | ||
1485 | struct ctl_table *table; | ||
1486 | size_t res; | ||
1487 | ssize_t error = -ENOTDIR; | ||
1488 | |||
1489 | spin_lock(&sysctl_lock); | ||
1490 | if (de && de->data && use_table(de->set)) { | ||
1491 | /* | ||
1492 | * at that point we know that sysctl was not unregistered | ||
1493 | * and won't be until we finish | ||
1494 | */ | ||
1495 | spin_unlock(&sysctl_lock); | ||
1496 | table = (struct ctl_table *) de->data; | ||
1497 | if (!table || !table->proc_handler) | ||
1498 | goto out; | ||
1499 | error = -EPERM; | ||
1500 | op = (write ? 002 : 004); | ||
1501 | if (sysctl_perm(table, op)) | ||
1502 | goto out; | ||
1503 | |||
1504 | /* careful: calling conventions are nasty here */ | ||
1505 | res = count; | ||
1506 | error = (*table->proc_handler)(table, write, file, | ||
1507 | buf, &res, ppos); | ||
1508 | if (!error) | ||
1509 | error = res; | ||
1510 | out: | ||
1511 | spin_lock(&sysctl_lock); | ||
1512 | unuse_table(de->set); | ||
1513 | } | ||
1514 | spin_unlock(&sysctl_lock); | ||
1515 | return error; | ||
1516 | } | ||
1517 | |||
1518 | static int proc_opensys(struct inode *inode, struct file *file) | ||
1519 | { | ||
1520 | if (file->f_mode & FMODE_WRITE) { | ||
1521 | /* | ||
1522 | * sysctl entries that are not writable, | ||
1523 | * are _NOT_ writable, capabilities or not. | ||
1524 | */ | ||
1525 | if (!(inode->i_mode & S_IWUSR)) | ||
1526 | return -EPERM; | ||
1527 | } | ||
1528 | |||
1529 | return 0; | ||
1530 | } | ||
1531 | |||
1532 | static ssize_t proc_readsys(struct file * file, char __user * buf, | ||
1533 | size_t count, loff_t *ppos) | ||
1534 | { | ||
1535 | return do_rw_proc(0, file, buf, count, ppos); | ||
1536 | } | ||
1537 | |||
1538 | static ssize_t proc_writesys(struct file * file, const char __user * buf, | ||
1539 | size_t count, loff_t *ppos) | ||
1540 | { | ||
1541 | return do_rw_proc(1, file, (char __user *) buf, count, ppos); | ||
1542 | } | ||
1543 | |||
1544 | static int _proc_do_string(void* data, int maxlen, int write, | 1362 | static int _proc_do_string(void* data, int maxlen, int write, |
1545 | struct file *filp, void __user *buffer, | 1363 | struct file *filp, void __user *buffer, |
1546 | size_t *lenp, loff_t *ppos) | 1364 | size_t *lenp, loff_t *ppos) |