diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-23 21:08:58 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-23 21:08:58 -0400 |
commit | f1d38e423a697b7aa06e12d3ca4753bcc1aa3531 (patch) | |
tree | 1cbfd86070f724d5ffe53146d4c67edf14cccf98 /fs | |
parent | dae430c6f6e5d0b98c238c340a41a39e221e8940 (diff) | |
parent | 4e474a00d7ff746ed177ddae14fa8b2d4bad7a00 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/sysctl
Pull sysctl updates from Eric Biederman:
- Rewrite of sysctl for speed and clarity.
Insert/remove/Lookup in sysctl are all now O(NlogN) operations, and
are no longer bottlenecks in the process of adding and removing
network devices.
sysctl is now focused on being a filesystem instead of system call
and the code can all be found in fs/proc/proc_sysctl.c. Hopefully
this means the code is now approachable.
Much thanks is owed to Lucian Grinjincu for keeping at this until
something was found that was usable.
- The recent proc_sys_poll oops found by the fuzzer during hibernation
is fixed.
* git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/sysctl: (36 commits)
sysctl: protect poll() in entries that may go away
sysctl: Don't call sysctl_follow_link unless we are a link.
sysctl: Comments to make the code clearer.
sysctl: Correct error return from get_subdir
sysctl: An easier to read version of find_subdir
sysctl: fix memset parameters in setup_sysctl_set()
sysctl: remove an unused variable
sysctl: Add register_sysctl for normal sysctl users
sysctl: Index sysctl directories with rbtrees.
sysctl: Make the header lists per directory.
sysctl: Move sysctl_check_dups into insert_header
sysctl: Modify __register_sysctl_paths to take a set instead of a root and an nsproxy
sysctl: Replace root_list with links between sysctl_table_sets.
sysctl: Add sysctl_print_dir and use it in get_subdir
sysctl: Stop requiring explicit management of sysctl directories
sysctl: Add a root pointer to ctl_table_set
sysctl: Rewrite proc_sys_readdir in terms of first_entry and next_entry
sysctl: Rewrite proc_sys_lookup introducing find_entry and lookup_entry.
sysctl: Normalize the root_table data structure.
sysctl: Factor out insert_header and erase_header
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/proc/internal.h | 3 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 1274 |
2 files changed, 1201 insertions, 76 deletions
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c44efe19798f..5f79bb8b4c60 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -10,12 +10,15 @@ | |||
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/proc_fs.h> | 12 | #include <linux/proc_fs.h> |
13 | struct ctl_table_header; | ||
13 | 14 | ||
14 | extern struct proc_dir_entry proc_root; | 15 | extern struct proc_dir_entry proc_root; |
15 | #ifdef CONFIG_PROC_SYSCTL | 16 | #ifdef CONFIG_PROC_SYSCTL |
16 | extern int proc_sys_init(void); | 17 | extern int proc_sys_init(void); |
18 | extern void sysctl_head_put(struct ctl_table_header *head); | ||
17 | #else | 19 | #else |
18 | static inline void proc_sys_init(void) { } | 20 | static inline void proc_sys_init(void) { } |
21 | static inline void sysctl_head_put(struct ctl_table_header *head) { } | ||
19 | #endif | 22 | #endif |
20 | #ifdef CONFIG_NET | 23 | #ifdef CONFIG_NET |
21 | extern int proc_net_init(void); | 24 | extern int proc_net_init(void); |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 67bbf6e4e197..21d836f40292 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
10 | #include <linux/namei.h> | 10 | #include <linux/namei.h> |
11 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
12 | #include <linux/module.h> | ||
12 | #include "internal.h" | 13 | #include "internal.h" |
13 | 14 | ||
14 | static const struct dentry_operations proc_sys_dentry_operations; | 15 | static const struct dentry_operations proc_sys_dentry_operations; |
@@ -26,6 +27,371 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll) | |||
26 | wake_up_interruptible(&poll->wait); | 27 | wake_up_interruptible(&poll->wait); |
27 | } | 28 | } |
28 | 29 | ||
30 | static struct ctl_table root_table[] = { | ||
31 | { | ||
32 | .procname = "", | ||
33 | .mode = S_IFDIR|S_IRUGO|S_IXUGO, | ||
34 | }, | ||
35 | { } | ||
36 | }; | ||
37 | static struct ctl_table_root sysctl_table_root = { | ||
38 | .default_set.dir.header = { | ||
39 | {{.count = 1, | ||
40 | .nreg = 1, | ||
41 | .ctl_table = root_table }}, | ||
42 | .ctl_table_arg = root_table, | ||
43 | .root = &sysctl_table_root, | ||
44 | .set = &sysctl_table_root.default_set, | ||
45 | }, | ||
46 | }; | ||
47 | |||
48 | static DEFINE_SPINLOCK(sysctl_lock); | ||
49 | |||
50 | static void drop_sysctl_table(struct ctl_table_header *header); | ||
51 | static int sysctl_follow_link(struct ctl_table_header **phead, | ||
52 | struct ctl_table **pentry, struct nsproxy *namespaces); | ||
53 | static int insert_links(struct ctl_table_header *head); | ||
54 | static void put_links(struct ctl_table_header *header); | ||
55 | |||
56 | static void sysctl_print_dir(struct ctl_dir *dir) | ||
57 | { | ||
58 | if (dir->header.parent) | ||
59 | sysctl_print_dir(dir->header.parent); | ||
60 | printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname); | ||
61 | } | ||
62 | |||
63 | static int namecmp(const char *name1, int len1, const char *name2, int len2) | ||
64 | { | ||
65 | int minlen; | ||
66 | int cmp; | ||
67 | |||
68 | minlen = len1; | ||
69 | if (minlen > len2) | ||
70 | minlen = len2; | ||
71 | |||
72 | cmp = memcmp(name1, name2, minlen); | ||
73 | if (cmp == 0) | ||
74 | cmp = len1 - len2; | ||
75 | return cmp; | ||
76 | } | ||
77 | |||
78 | /* Called under sysctl_lock */ | ||
79 | static struct ctl_table *find_entry(struct ctl_table_header **phead, | ||
80 | struct ctl_dir *dir, const char *name, int namelen) | ||
81 | { | ||
82 | struct ctl_table_header *head; | ||
83 | struct ctl_table *entry; | ||
84 | struct rb_node *node = dir->root.rb_node; | ||
85 | |||
86 | while (node) | ||
87 | { | ||
88 | struct ctl_node *ctl_node; | ||
89 | const char *procname; | ||
90 | int cmp; | ||
91 | |||
92 | ctl_node = rb_entry(node, struct ctl_node, node); | ||
93 | head = ctl_node->header; | ||
94 | entry = &head->ctl_table[ctl_node - head->node]; | ||
95 | procname = entry->procname; | ||
96 | |||
97 | cmp = namecmp(name, namelen, procname, strlen(procname)); | ||
98 | if (cmp < 0) | ||
99 | node = node->rb_left; | ||
100 | else if (cmp > 0) | ||
101 | node = node->rb_right; | ||
102 | else { | ||
103 | *phead = head; | ||
104 | return entry; | ||
105 | } | ||
106 | } | ||
107 | return NULL; | ||
108 | } | ||
109 | |||
110 | static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) | ||
111 | { | ||
112 | struct rb_node *node = &head->node[entry - head->ctl_table].node; | ||
113 | struct rb_node **p = &head->parent->root.rb_node; | ||
114 | struct rb_node *parent = NULL; | ||
115 | const char *name = entry->procname; | ||
116 | int namelen = strlen(name); | ||
117 | |||
118 | while (*p) { | ||
119 | struct ctl_table_header *parent_head; | ||
120 | struct ctl_table *parent_entry; | ||
121 | struct ctl_node *parent_node; | ||
122 | const char *parent_name; | ||
123 | int cmp; | ||
124 | |||
125 | parent = *p; | ||
126 | parent_node = rb_entry(parent, struct ctl_node, node); | ||
127 | parent_head = parent_node->header; | ||
128 | parent_entry = &parent_head->ctl_table[parent_node - parent_head->node]; | ||
129 | parent_name = parent_entry->procname; | ||
130 | |||
131 | cmp = namecmp(name, namelen, parent_name, strlen(parent_name)); | ||
132 | if (cmp < 0) | ||
133 | p = &(*p)->rb_left; | ||
134 | else if (cmp > 0) | ||
135 | p = &(*p)->rb_right; | ||
136 | else { | ||
137 | printk(KERN_ERR "sysctl duplicate entry: "); | ||
138 | sysctl_print_dir(head->parent); | ||
139 | printk(KERN_CONT "/%s\n", entry->procname); | ||
140 | return -EEXIST; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | rb_link_node(node, parent, p); | ||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry) | ||
149 | { | ||
150 | struct rb_node *node = &head->node[entry - head->ctl_table].node; | ||
151 | |||
152 | rb_erase(node, &head->parent->root); | ||
153 | } | ||
154 | |||
155 | static void init_header(struct ctl_table_header *head, | ||
156 | struct ctl_table_root *root, struct ctl_table_set *set, | ||
157 | struct ctl_node *node, struct ctl_table *table) | ||
158 | { | ||
159 | head->ctl_table = table; | ||
160 | head->ctl_table_arg = table; | ||
161 | head->used = 0; | ||
162 | head->count = 1; | ||
163 | head->nreg = 1; | ||
164 | head->unregistering = NULL; | ||
165 | head->root = root; | ||
166 | head->set = set; | ||
167 | head->parent = NULL; | ||
168 | head->node = node; | ||
169 | if (node) { | ||
170 | struct ctl_table *entry; | ||
171 | for (entry = table; entry->procname; entry++, node++) { | ||
172 | rb_init_node(&node->node); | ||
173 | node->header = head; | ||
174 | } | ||
175 | } | ||
176 | } | ||
177 | |||
178 | static void erase_header(struct ctl_table_header *head) | ||
179 | { | ||
180 | struct ctl_table *entry; | ||
181 | for (entry = head->ctl_table; entry->procname; entry++) | ||
182 | erase_entry(head, entry); | ||
183 | } | ||
184 | |||
185 | static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) | ||
186 | { | ||
187 | struct ctl_table *entry; | ||
188 | int err; | ||
189 | |||
190 | dir->header.nreg++; | ||
191 | header->parent = dir; | ||
192 | err = insert_links(header); | ||
193 | if (err) | ||
194 | goto fail_links; | ||
195 | for (entry = header->ctl_table; entry->procname; entry++) { | ||
196 | err = insert_entry(header, entry); | ||
197 | if (err) | ||
198 | goto fail; | ||
199 | } | ||
200 | return 0; | ||
201 | fail: | ||
202 | erase_header(header); | ||
203 | put_links(header); | ||
204 | fail_links: | ||
205 | header->parent = NULL; | ||
206 | drop_sysctl_table(&dir->header); | ||
207 | return err; | ||
208 | } | ||
209 | |||
210 | /* called under sysctl_lock */ | ||
211 | static int use_table(struct ctl_table_header *p) | ||
212 | { | ||
213 | if (unlikely(p->unregistering)) | ||
214 | return 0; | ||
215 | p->used++; | ||
216 | return 1; | ||
217 | } | ||
218 | |||
219 | /* called under sysctl_lock */ | ||
220 | static void unuse_table(struct ctl_table_header *p) | ||
221 | { | ||
222 | if (!--p->used) | ||
223 | if (unlikely(p->unregistering)) | ||
224 | complete(p->unregistering); | ||
225 | } | ||
226 | |||
227 | /* called under sysctl_lock, will reacquire if has to wait */ | ||
228 | static void start_unregistering(struct ctl_table_header *p) | ||
229 | { | ||
230 | /* | ||
231 | * if p->used is 0, nobody will ever touch that entry again; | ||
232 | * we'll eliminate all paths to it before dropping sysctl_lock | ||
233 | */ | ||
234 | if (unlikely(p->used)) { | ||
235 | struct completion wait; | ||
236 | init_completion(&wait); | ||
237 | p->unregistering = &wait; | ||
238 | spin_unlock(&sysctl_lock); | ||
239 | wait_for_completion(&wait); | ||
240 | spin_lock(&sysctl_lock); | ||
241 | } else { | ||
242 | /* anything non-NULL; we'll never dereference it */ | ||
243 | p->unregistering = ERR_PTR(-EINVAL); | ||
244 | } | ||
245 | /* | ||
246 | * do not remove from the list until nobody holds it; walking the | ||
247 | * list in do_sysctl() relies on that. | ||
248 | */ | ||
249 | erase_header(p); | ||
250 | } | ||
251 | |||
252 | static void sysctl_head_get(struct ctl_table_header *head) | ||
253 | { | ||
254 | spin_lock(&sysctl_lock); | ||
255 | head->count++; | ||
256 | spin_unlock(&sysctl_lock); | ||
257 | } | ||
258 | |||
259 | void sysctl_head_put(struct ctl_table_header *head) | ||
260 | { | ||
261 | spin_lock(&sysctl_lock); | ||
262 | if (!--head->count) | ||
263 | kfree_rcu(head, rcu); | ||
264 | spin_unlock(&sysctl_lock); | ||
265 | } | ||
266 | |||
267 | static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) | ||
268 | { | ||
269 | if (!head) | ||
270 | BUG(); | ||
271 | spin_lock(&sysctl_lock); | ||
272 | if (!use_table(head)) | ||
273 | head = ERR_PTR(-ENOENT); | ||
274 | spin_unlock(&sysctl_lock); | ||
275 | return head; | ||
276 | } | ||
277 | |||
278 | static void sysctl_head_finish(struct ctl_table_header *head) | ||
279 | { | ||
280 | if (!head) | ||
281 | return; | ||
282 | spin_lock(&sysctl_lock); | ||
283 | unuse_table(head); | ||
284 | spin_unlock(&sysctl_lock); | ||
285 | } | ||
286 | |||
287 | static struct ctl_table_set * | ||
288 | lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) | ||
289 | { | ||
290 | struct ctl_table_set *set = &root->default_set; | ||
291 | if (root->lookup) | ||
292 | set = root->lookup(root, namespaces); | ||
293 | return set; | ||
294 | } | ||
295 | |||
296 | static struct ctl_table *lookup_entry(struct ctl_table_header **phead, | ||
297 | struct ctl_dir *dir, | ||
298 | const char *name, int namelen) | ||
299 | { | ||
300 | struct ctl_table_header *head; | ||
301 | struct ctl_table *entry; | ||
302 | |||
303 | spin_lock(&sysctl_lock); | ||
304 | entry = find_entry(&head, dir, name, namelen); | ||
305 | if (entry && use_table(head)) | ||
306 | *phead = head; | ||
307 | else | ||
308 | entry = NULL; | ||
309 | spin_unlock(&sysctl_lock); | ||
310 | return entry; | ||
311 | } | ||
312 | |||
313 | static struct ctl_node *first_usable_entry(struct rb_node *node) | ||
314 | { | ||
315 | struct ctl_node *ctl_node; | ||
316 | |||
317 | for (;node; node = rb_next(node)) { | ||
318 | ctl_node = rb_entry(node, struct ctl_node, node); | ||
319 | if (use_table(ctl_node->header)) | ||
320 | return ctl_node; | ||
321 | } | ||
322 | return NULL; | ||
323 | } | ||
324 | |||
325 | static void first_entry(struct ctl_dir *dir, | ||
326 | struct ctl_table_header **phead, struct ctl_table **pentry) | ||
327 | { | ||
328 | struct ctl_table_header *head = NULL; | ||
329 | struct ctl_table *entry = NULL; | ||
330 | struct ctl_node *ctl_node; | ||
331 | |||
332 | spin_lock(&sysctl_lock); | ||
333 | ctl_node = first_usable_entry(rb_first(&dir->root)); | ||
334 | spin_unlock(&sysctl_lock); | ||
335 | if (ctl_node) { | ||
336 | head = ctl_node->header; | ||
337 | entry = &head->ctl_table[ctl_node - head->node]; | ||
338 | } | ||
339 | *phead = head; | ||
340 | *pentry = entry; | ||
341 | } | ||
342 | |||
343 | static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry) | ||
344 | { | ||
345 | struct ctl_table_header *head = *phead; | ||
346 | struct ctl_table *entry = *pentry; | ||
347 | struct ctl_node *ctl_node = &head->node[entry - head->ctl_table]; | ||
348 | |||
349 | spin_lock(&sysctl_lock); | ||
350 | unuse_table(head); | ||
351 | |||
352 | ctl_node = first_usable_entry(rb_next(&ctl_node->node)); | ||
353 | spin_unlock(&sysctl_lock); | ||
354 | head = NULL; | ||
355 | if (ctl_node) { | ||
356 | head = ctl_node->header; | ||
357 | entry = &head->ctl_table[ctl_node - head->node]; | ||
358 | } | ||
359 | *phead = head; | ||
360 | *pentry = entry; | ||
361 | } | ||
362 | |||
363 | void register_sysctl_root(struct ctl_table_root *root) | ||
364 | { | ||
365 | } | ||
366 | |||
367 | /* | ||
368 | * sysctl_perm does NOT grant the superuser all rights automatically, because | ||
369 | * some sysctl variables are readonly even to root. | ||
370 | */ | ||
371 | |||
372 | static int test_perm(int mode, int op) | ||
373 | { | ||
374 | if (!current_euid()) | ||
375 | mode >>= 6; | ||
376 | else if (in_egroup_p(0)) | ||
377 | mode >>= 3; | ||
378 | if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) | ||
379 | return 0; | ||
380 | return -EACCES; | ||
381 | } | ||
382 | |||
383 | static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) | ||
384 | { | ||
385 | int mode; | ||
386 | |||
387 | if (root->permissions) | ||
388 | mode = root->permissions(root, current->nsproxy, table); | ||
389 | else | ||
390 | mode = table->mode; | ||
391 | |||
392 | return test_perm(mode, op); | ||
393 | } | ||
394 | |||
29 | static struct inode *proc_sys_make_inode(struct super_block *sb, | 395 | static struct inode *proc_sys_make_inode(struct super_block *sb, |
30 | struct ctl_table_header *head, struct ctl_table *table) | 396 | struct ctl_table_header *head, struct ctl_table *table) |
31 | { | 397 | { |
@@ -45,13 +411,12 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, | |||
45 | 411 | ||
46 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 412 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
47 | inode->i_mode = table->mode; | 413 | inode->i_mode = table->mode; |
48 | if (!table->child) { | 414 | if (!S_ISDIR(table->mode)) { |
49 | inode->i_mode |= S_IFREG; | 415 | inode->i_mode |= S_IFREG; |
50 | inode->i_op = &proc_sys_inode_operations; | 416 | inode->i_op = &proc_sys_inode_operations; |
51 | inode->i_fop = &proc_sys_file_operations; | 417 | inode->i_fop = &proc_sys_file_operations; |
52 | } else { | 418 | } else { |
53 | inode->i_mode |= S_IFDIR; | 419 | inode->i_mode |= S_IFDIR; |
54 | clear_nlink(inode); | ||
55 | inode->i_op = &proc_sys_dir_operations; | 420 | inode->i_op = &proc_sys_dir_operations; |
56 | inode->i_fop = &proc_sys_dir_file_operations; | 421 | inode->i_fop = &proc_sys_dir_file_operations; |
57 | } | 422 | } |
@@ -59,70 +424,42 @@ out: | |||
59 | return inode; | 424 | return inode; |
60 | } | 425 | } |
61 | 426 | ||
62 | static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) | ||
63 | { | ||
64 | int len; | ||
65 | for ( ; p->procname; p++) { | ||
66 | |||
67 | if (!p->procname) | ||
68 | continue; | ||
69 | |||
70 | len = strlen(p->procname); | ||
71 | if (len != name->len) | ||
72 | continue; | ||
73 | |||
74 | if (memcmp(p->procname, name->name, len) != 0) | ||
75 | continue; | ||
76 | |||
77 | /* I have a match */ | ||
78 | return p; | ||
79 | } | ||
80 | return NULL; | ||
81 | } | ||
82 | |||
83 | static struct ctl_table_header *grab_header(struct inode *inode) | 427 | static struct ctl_table_header *grab_header(struct inode *inode) |
84 | { | 428 | { |
85 | if (PROC_I(inode)->sysctl) | 429 | struct ctl_table_header *head = PROC_I(inode)->sysctl; |
86 | return sysctl_head_grab(PROC_I(inode)->sysctl); | 430 | if (!head) |
87 | else | 431 | head = &sysctl_table_root.default_set.dir.header; |
88 | return sysctl_head_next(NULL); | 432 | return sysctl_head_grab(head); |
89 | } | 433 | } |
90 | 434 | ||
91 | static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, | 435 | static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, |
92 | struct nameidata *nd) | 436 | struct nameidata *nd) |
93 | { | 437 | { |
94 | struct ctl_table_header *head = grab_header(dir); | 438 | struct ctl_table_header *head = grab_header(dir); |
95 | struct ctl_table *table = PROC_I(dir)->sysctl_entry; | ||
96 | struct ctl_table_header *h = NULL; | 439 | struct ctl_table_header *h = NULL; |
97 | struct qstr *name = &dentry->d_name; | 440 | struct qstr *name = &dentry->d_name; |
98 | struct ctl_table *p; | 441 | struct ctl_table *p; |
99 | struct inode *inode; | 442 | struct inode *inode; |
100 | struct dentry *err = ERR_PTR(-ENOENT); | 443 | struct dentry *err = ERR_PTR(-ENOENT); |
444 | struct ctl_dir *ctl_dir; | ||
445 | int ret; | ||
101 | 446 | ||
102 | if (IS_ERR(head)) | 447 | if (IS_ERR(head)) |
103 | return ERR_CAST(head); | 448 | return ERR_CAST(head); |
104 | 449 | ||
105 | if (table && !table->child) { | 450 | ctl_dir = container_of(head, struct ctl_dir, header); |
106 | WARN_ON(1); | ||
107 | goto out; | ||
108 | } | ||
109 | |||
110 | table = table ? table->child : head->ctl_table; | ||
111 | |||
112 | p = find_in_table(table, name); | ||
113 | if (!p) { | ||
114 | for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { | ||
115 | if (h->attached_to != table) | ||
116 | continue; | ||
117 | p = find_in_table(h->attached_by, name); | ||
118 | if (p) | ||
119 | break; | ||
120 | } | ||
121 | } | ||
122 | 451 | ||
452 | p = lookup_entry(&h, ctl_dir, name->name, name->len); | ||
123 | if (!p) | 453 | if (!p) |
124 | goto out; | 454 | goto out; |
125 | 455 | ||
456 | if (S_ISLNK(p->mode)) { | ||
457 | ret = sysctl_follow_link(&h, &p, current->nsproxy); | ||
458 | err = ERR_PTR(ret); | ||
459 | if (ret) | ||
460 | goto out; | ||
461 | } | ||
462 | |||
126 | err = ERR_PTR(-ENOMEM); | 463 | err = ERR_PTR(-ENOMEM); |
127 | inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); | 464 | inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); |
128 | if (h) | 465 | if (h) |
@@ -190,20 +527,32 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf, | |||
190 | 527 | ||
191 | static int proc_sys_open(struct inode *inode, struct file *filp) | 528 | static int proc_sys_open(struct inode *inode, struct file *filp) |
192 | { | 529 | { |
530 | struct ctl_table_header *head = grab_header(inode); | ||
193 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; | 531 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; |
194 | 532 | ||
533 | /* sysctl was unregistered */ | ||
534 | if (IS_ERR(head)) | ||
535 | return PTR_ERR(head); | ||
536 | |||
195 | if (table->poll) | 537 | if (table->poll) |
196 | filp->private_data = proc_sys_poll_event(table->poll); | 538 | filp->private_data = proc_sys_poll_event(table->poll); |
197 | 539 | ||
540 | sysctl_head_finish(head); | ||
541 | |||
198 | return 0; | 542 | return 0; |
199 | } | 543 | } |
200 | 544 | ||
201 | static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) | 545 | static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) |
202 | { | 546 | { |
203 | struct inode *inode = filp->f_path.dentry->d_inode; | 547 | struct inode *inode = filp->f_path.dentry->d_inode; |
548 | struct ctl_table_header *head = grab_header(inode); | ||
204 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; | 549 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; |
205 | unsigned long event = (unsigned long)filp->private_data; | ||
206 | unsigned int ret = DEFAULT_POLLMASK; | 550 | unsigned int ret = DEFAULT_POLLMASK; |
551 | unsigned long event; | ||
552 | |||
553 | /* sysctl was unregistered */ | ||
554 | if (IS_ERR(head)) | ||
555 | return POLLERR | POLLHUP; | ||
207 | 556 | ||
208 | if (!table->proc_handler) | 557 | if (!table->proc_handler) |
209 | goto out; | 558 | goto out; |
@@ -211,6 +560,7 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) | |||
211 | if (!table->poll) | 560 | if (!table->poll) |
212 | goto out; | 561 | goto out; |
213 | 562 | ||
563 | event = (unsigned long)filp->private_data; | ||
214 | poll_wait(filp, &table->poll->wait, wait); | 564 | poll_wait(filp, &table->poll->wait, wait); |
215 | 565 | ||
216 | if (event != atomic_read(&table->poll->event)) { | 566 | if (event != atomic_read(&table->poll->event)) { |
@@ -219,6 +569,8 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) | |||
219 | } | 569 | } |
220 | 570 | ||
221 | out: | 571 | out: |
572 | sysctl_head_finish(head); | ||
573 | |||
222 | return ret; | 574 | return ret; |
223 | } | 575 | } |
224 | 576 | ||
@@ -260,28 +612,45 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, | |||
260 | return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); | 612 | return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); |
261 | } | 613 | } |
262 | 614 | ||
615 | static int proc_sys_link_fill_cache(struct file *filp, void *dirent, | ||
616 | filldir_t filldir, | ||
617 | struct ctl_table_header *head, | ||
618 | struct ctl_table *table) | ||
619 | { | ||
620 | int err, ret = 0; | ||
621 | head = sysctl_head_grab(head); | ||
622 | |||
623 | if (S_ISLNK(table->mode)) { | ||
624 | /* It is not an error if we can not follow the link ignore it */ | ||
625 | err = sysctl_follow_link(&head, &table, current->nsproxy); | ||
626 | if (err) | ||
627 | goto out; | ||
628 | } | ||
629 | |||
630 | ret = proc_sys_fill_cache(filp, dirent, filldir, head, table); | ||
631 | out: | ||
632 | sysctl_head_finish(head); | ||
633 | return ret; | ||
634 | } | ||
635 | |||
263 | static int scan(struct ctl_table_header *head, ctl_table *table, | 636 | static int scan(struct ctl_table_header *head, ctl_table *table, |
264 | unsigned long *pos, struct file *file, | 637 | unsigned long *pos, struct file *file, |
265 | void *dirent, filldir_t filldir) | 638 | void *dirent, filldir_t filldir) |
266 | { | 639 | { |
640 | int res; | ||
267 | 641 | ||
268 | for (; table->procname; table++, (*pos)++) { | 642 | if ((*pos)++ < file->f_pos) |
269 | int res; | 643 | return 0; |
270 | |||
271 | /* Can't do anything without a proc name */ | ||
272 | if (!table->procname) | ||
273 | continue; | ||
274 | |||
275 | if (*pos < file->f_pos) | ||
276 | continue; | ||
277 | 644 | ||
645 | if (unlikely(S_ISLNK(table->mode))) | ||
646 | res = proc_sys_link_fill_cache(file, dirent, filldir, head, table); | ||
647 | else | ||
278 | res = proc_sys_fill_cache(file, dirent, filldir, head, table); | 648 | res = proc_sys_fill_cache(file, dirent, filldir, head, table); |
279 | if (res) | ||
280 | return res; | ||
281 | 649 | ||
282 | file->f_pos = *pos + 1; | 650 | if (res == 0) |
283 | } | 651 | file->f_pos = *pos; |
284 | return 0; | 652 | |
653 | return res; | ||
285 | } | 654 | } |
286 | 655 | ||
287 | static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | 656 | static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) |
@@ -289,20 +658,16 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
289 | struct dentry *dentry = filp->f_path.dentry; | 658 | struct dentry *dentry = filp->f_path.dentry; |
290 | struct inode *inode = dentry->d_inode; | 659 | struct inode *inode = dentry->d_inode; |
291 | struct ctl_table_header *head = grab_header(inode); | 660 | struct ctl_table_header *head = grab_header(inode); |
292 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; | ||
293 | struct ctl_table_header *h = NULL; | 661 | struct ctl_table_header *h = NULL; |
662 | struct ctl_table *entry; | ||
663 | struct ctl_dir *ctl_dir; | ||
294 | unsigned long pos; | 664 | unsigned long pos; |
295 | int ret = -EINVAL; | 665 | int ret = -EINVAL; |
296 | 666 | ||
297 | if (IS_ERR(head)) | 667 | if (IS_ERR(head)) |
298 | return PTR_ERR(head); | 668 | return PTR_ERR(head); |
299 | 669 | ||
300 | if (table && !table->child) { | 670 | ctl_dir = container_of(head, struct ctl_dir, header); |
301 | WARN_ON(1); | ||
302 | goto out; | ||
303 | } | ||
304 | |||
305 | table = table ? table->child : head->ctl_table; | ||
306 | 671 | ||
307 | ret = 0; | 672 | ret = 0; |
308 | /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ | 673 | /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ |
@@ -320,14 +685,8 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
320 | } | 685 | } |
321 | pos = 2; | 686 | pos = 2; |
322 | 687 | ||
323 | ret = scan(head, table, &pos, filp, dirent, filldir); | 688 | for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { |
324 | if (ret) | 689 | ret = scan(h, entry, &pos, filp, dirent, filldir); |
325 | goto out; | ||
326 | |||
327 | for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { | ||
328 | if (h->attached_to != table) | ||
329 | continue; | ||
330 | ret = scan(h, h->attached_by, &pos, filp, dirent, filldir); | ||
331 | if (ret) { | 690 | if (ret) { |
332 | sysctl_head_finish(h); | 691 | sysctl_head_finish(h); |
333 | break; | 692 | break; |
@@ -447,6 +806,21 @@ static int proc_sys_delete(const struct dentry *dentry) | |||
447 | return !!PROC_I(dentry->d_inode)->sysctl->unregistering; | 806 | return !!PROC_I(dentry->d_inode)->sysctl->unregistering; |
448 | } | 807 | } |
449 | 808 | ||
809 | static int sysctl_is_seen(struct ctl_table_header *p) | ||
810 | { | ||
811 | struct ctl_table_set *set = p->set; | ||
812 | int res; | ||
813 | spin_lock(&sysctl_lock); | ||
814 | if (p->unregistering) | ||
815 | res = 0; | ||
816 | else if (!set->is_seen) | ||
817 | res = 1; | ||
818 | else | ||
819 | res = set->is_seen(set); | ||
820 | spin_unlock(&sysctl_lock); | ||
821 | return res; | ||
822 | } | ||
823 | |||
450 | static int proc_sys_compare(const struct dentry *parent, | 824 | static int proc_sys_compare(const struct dentry *parent, |
451 | const struct inode *pinode, | 825 | const struct inode *pinode, |
452 | const struct dentry *dentry, const struct inode *inode, | 826 | const struct dentry *dentry, const struct inode *inode, |
@@ -472,6 +846,753 @@ static const struct dentry_operations proc_sys_dentry_operations = { | |||
472 | .d_compare = proc_sys_compare, | 846 | .d_compare = proc_sys_compare, |
473 | }; | 847 | }; |
474 | 848 | ||
849 | static struct ctl_dir *find_subdir(struct ctl_dir *dir, | ||
850 | const char *name, int namelen) | ||
851 | { | ||
852 | struct ctl_table_header *head; | ||
853 | struct ctl_table *entry; | ||
854 | |||
855 | entry = find_entry(&head, dir, name, namelen); | ||
856 | if (!entry) | ||
857 | return ERR_PTR(-ENOENT); | ||
858 | if (!S_ISDIR(entry->mode)) | ||
859 | return ERR_PTR(-ENOTDIR); | ||
860 | return container_of(head, struct ctl_dir, header); | ||
861 | } | ||
862 | |||
863 | static struct ctl_dir *new_dir(struct ctl_table_set *set, | ||
864 | const char *name, int namelen) | ||
865 | { | ||
866 | struct ctl_table *table; | ||
867 | struct ctl_dir *new; | ||
868 | struct ctl_node *node; | ||
869 | char *new_name; | ||
870 | |||
871 | new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) + | ||
872 | sizeof(struct ctl_table)*2 + namelen + 1, | ||
873 | GFP_KERNEL); | ||
874 | if (!new) | ||
875 | return NULL; | ||
876 | |||
877 | node = (struct ctl_node *)(new + 1); | ||
878 | table = (struct ctl_table *)(node + 1); | ||
879 | new_name = (char *)(table + 2); | ||
880 | memcpy(new_name, name, namelen); | ||
881 | new_name[namelen] = '\0'; | ||
882 | table[0].procname = new_name; | ||
883 | table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; | ||
884 | init_header(&new->header, set->dir.header.root, set, node, table); | ||
885 | |||
886 | return new; | ||
887 | } | ||
888 | |||
889 | /** | ||
890 | * get_subdir - find or create a subdir with the specified name. | ||
891 | * @dir: Directory to create the subdirectory in | ||
892 | * @name: The name of the subdirectory to find or create | ||
893 | * @namelen: The length of name | ||
894 | * | ||
895 | * Takes a directory with an elevated reference count so we know that | ||
896 | * if we drop the lock the directory will not go away. Upon success | ||
897 | * the reference is moved from @dir to the returned subdirectory. | ||
898 | * Upon error an error code is returned and the reference on @dir is | ||
899 | * simply dropped. | ||
900 | */ | ||
901 | static struct ctl_dir *get_subdir(struct ctl_dir *dir, | ||
902 | const char *name, int namelen) | ||
903 | { | ||
904 | struct ctl_table_set *set = dir->header.set; | ||
905 | struct ctl_dir *subdir, *new = NULL; | ||
906 | int err; | ||
907 | |||
908 | spin_lock(&sysctl_lock); | ||
909 | subdir = find_subdir(dir, name, namelen); | ||
910 | if (!IS_ERR(subdir)) | ||
911 | goto found; | ||
912 | if (PTR_ERR(subdir) != -ENOENT) | ||
913 | goto failed; | ||
914 | |||
915 | spin_unlock(&sysctl_lock); | ||
916 | new = new_dir(set, name, namelen); | ||
917 | spin_lock(&sysctl_lock); | ||
918 | subdir = ERR_PTR(-ENOMEM); | ||
919 | if (!new) | ||
920 | goto failed; | ||
921 | |||
922 | /* Was the subdir added while we dropped the lock? */ | ||
923 | subdir = find_subdir(dir, name, namelen); | ||
924 | if (!IS_ERR(subdir)) | ||
925 | goto found; | ||
926 | if (PTR_ERR(subdir) != -ENOENT) | ||
927 | goto failed; | ||
928 | |||
929 | /* Nope. Use the our freshly made directory entry. */ | ||
930 | err = insert_header(dir, &new->header); | ||
931 | subdir = ERR_PTR(err); | ||
932 | if (err) | ||
933 | goto failed; | ||
934 | subdir = new; | ||
935 | found: | ||
936 | subdir->header.nreg++; | ||
937 | failed: | ||
938 | if (unlikely(IS_ERR(subdir))) { | ||
939 | printk(KERN_ERR "sysctl could not get directory: "); | ||
940 | sysctl_print_dir(dir); | ||
941 | printk(KERN_CONT "/%*.*s %ld\n", | ||
942 | namelen, namelen, name, PTR_ERR(subdir)); | ||
943 | } | ||
944 | drop_sysctl_table(&dir->header); | ||
945 | if (new) | ||
946 | drop_sysctl_table(&new->header); | ||
947 | spin_unlock(&sysctl_lock); | ||
948 | return subdir; | ||
949 | } | ||
950 | |||
951 | static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir) | ||
952 | { | ||
953 | struct ctl_dir *parent; | ||
954 | const char *procname; | ||
955 | if (!dir->header.parent) | ||
956 | return &set->dir; | ||
957 | parent = xlate_dir(set, dir->header.parent); | ||
958 | if (IS_ERR(parent)) | ||
959 | return parent; | ||
960 | procname = dir->header.ctl_table[0].procname; | ||
961 | return find_subdir(parent, procname, strlen(procname)); | ||
962 | } | ||
963 | |||
964 | static int sysctl_follow_link(struct ctl_table_header **phead, | ||
965 | struct ctl_table **pentry, struct nsproxy *namespaces) | ||
966 | { | ||
967 | struct ctl_table_header *head; | ||
968 | struct ctl_table_root *root; | ||
969 | struct ctl_table_set *set; | ||
970 | struct ctl_table *entry; | ||
971 | struct ctl_dir *dir; | ||
972 | int ret; | ||
973 | |||
974 | ret = 0; | ||
975 | spin_lock(&sysctl_lock); | ||
976 | root = (*pentry)->data; | ||
977 | set = lookup_header_set(root, namespaces); | ||
978 | dir = xlate_dir(set, (*phead)->parent); | ||
979 | if (IS_ERR(dir)) | ||
980 | ret = PTR_ERR(dir); | ||
981 | else { | ||
982 | const char *procname = (*pentry)->procname; | ||
983 | head = NULL; | ||
984 | entry = find_entry(&head, dir, procname, strlen(procname)); | ||
985 | ret = -ENOENT; | ||
986 | if (entry && use_table(head)) { | ||
987 | unuse_table(*phead); | ||
988 | *phead = head; | ||
989 | *pentry = entry; | ||
990 | ret = 0; | ||
991 | } | ||
992 | } | ||
993 | |||
994 | spin_unlock(&sysctl_lock); | ||
995 | return ret; | ||
996 | } | ||
997 | |||
998 | static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) | ||
999 | { | ||
1000 | struct va_format vaf; | ||
1001 | va_list args; | ||
1002 | |||
1003 | va_start(args, fmt); | ||
1004 | vaf.fmt = fmt; | ||
1005 | vaf.va = &args; | ||
1006 | |||
1007 | printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n", | ||
1008 | path, table->procname, &vaf); | ||
1009 | |||
1010 | va_end(args); | ||
1011 | return -EINVAL; | ||
1012 | } | ||
1013 | |||
1014 | static int sysctl_check_table(const char *path, struct ctl_table *table) | ||
1015 | { | ||
1016 | int err = 0; | ||
1017 | for (; table->procname; table++) { | ||
1018 | if (table->child) | ||
1019 | err = sysctl_err(path, table, "Not a file"); | ||
1020 | |||
1021 | if ((table->proc_handler == proc_dostring) || | ||
1022 | (table->proc_handler == proc_dointvec) || | ||
1023 | (table->proc_handler == proc_dointvec_minmax) || | ||
1024 | (table->proc_handler == proc_dointvec_jiffies) || | ||
1025 | (table->proc_handler == proc_dointvec_userhz_jiffies) || | ||
1026 | (table->proc_handler == proc_dointvec_ms_jiffies) || | ||
1027 | (table->proc_handler == proc_doulongvec_minmax) || | ||
1028 | (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { | ||
1029 | if (!table->data) | ||
1030 | err = sysctl_err(path, table, "No data"); | ||
1031 | if (!table->maxlen) | ||
1032 | err = sysctl_err(path, table, "No maxlen"); | ||
1033 | } | ||
1034 | if (!table->proc_handler) | ||
1035 | err = sysctl_err(path, table, "No proc_handler"); | ||
1036 | |||
1037 | if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode) | ||
1038 | err = sysctl_err(path, table, "bogus .mode 0%o", | ||
1039 | table->mode); | ||
1040 | } | ||
1041 | return err; | ||
1042 | } | ||
1043 | |||
1044 | static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table, | ||
1045 | struct ctl_table_root *link_root) | ||
1046 | { | ||
1047 | struct ctl_table *link_table, *entry, *link; | ||
1048 | struct ctl_table_header *links; | ||
1049 | struct ctl_node *node; | ||
1050 | char *link_name; | ||
1051 | int nr_entries, name_bytes; | ||
1052 | |||
1053 | name_bytes = 0; | ||
1054 | nr_entries = 0; | ||
1055 | for (entry = table; entry->procname; entry++) { | ||
1056 | nr_entries++; | ||
1057 | name_bytes += strlen(entry->procname) + 1; | ||
1058 | } | ||
1059 | |||
1060 | links = kzalloc(sizeof(struct ctl_table_header) + | ||
1061 | sizeof(struct ctl_node)*nr_entries + | ||
1062 | sizeof(struct ctl_table)*(nr_entries + 1) + | ||
1063 | name_bytes, | ||
1064 | GFP_KERNEL); | ||
1065 | |||
1066 | if (!links) | ||
1067 | return NULL; | ||
1068 | |||
1069 | node = (struct ctl_node *)(links + 1); | ||
1070 | link_table = (struct ctl_table *)(node + nr_entries); | ||
1071 | link_name = (char *)&link_table[nr_entries + 1]; | ||
1072 | |||
1073 | for (link = link_table, entry = table; entry->procname; link++, entry++) { | ||
1074 | int len = strlen(entry->procname) + 1; | ||
1075 | memcpy(link_name, entry->procname, len); | ||
1076 | link->procname = link_name; | ||
1077 | link->mode = S_IFLNK|S_IRWXUGO; | ||
1078 | link->data = link_root; | ||
1079 | link_name += len; | ||
1080 | } | ||
1081 | init_header(links, dir->header.root, dir->header.set, node, link_table); | ||
1082 | links->nreg = nr_entries; | ||
1083 | |||
1084 | return links; | ||
1085 | } | ||
1086 | |||
1087 | static bool get_links(struct ctl_dir *dir, | ||
1088 | struct ctl_table *table, struct ctl_table_root *link_root) | ||
1089 | { | ||
1090 | struct ctl_table_header *head; | ||
1091 | struct ctl_table *entry, *link; | ||
1092 | |||
1093 | /* Are there links available for every entry in table? */ | ||
1094 | for (entry = table; entry->procname; entry++) { | ||
1095 | const char *procname = entry->procname; | ||
1096 | link = find_entry(&head, dir, procname, strlen(procname)); | ||
1097 | if (!link) | ||
1098 | return false; | ||
1099 | if (S_ISDIR(link->mode) && S_ISDIR(entry->mode)) | ||
1100 | continue; | ||
1101 | if (S_ISLNK(link->mode) && (link->data == link_root)) | ||
1102 | continue; | ||
1103 | return false; | ||
1104 | } | ||
1105 | |||
1106 | /* The checks passed. Increase the registration count on the links */ | ||
1107 | for (entry = table; entry->procname; entry++) { | ||
1108 | const char *procname = entry->procname; | ||
1109 | link = find_entry(&head, dir, procname, strlen(procname)); | ||
1110 | head->nreg++; | ||
1111 | } | ||
1112 | return true; | ||
1113 | } | ||
1114 | |||
1115 | static int insert_links(struct ctl_table_header *head) | ||
1116 | { | ||
1117 | struct ctl_table_set *root_set = &sysctl_table_root.default_set; | ||
1118 | struct ctl_dir *core_parent = NULL; | ||
1119 | struct ctl_table_header *links; | ||
1120 | int err; | ||
1121 | |||
1122 | if (head->set == root_set) | ||
1123 | return 0; | ||
1124 | |||
1125 | core_parent = xlate_dir(root_set, head->parent); | ||
1126 | if (IS_ERR(core_parent)) | ||
1127 | return 0; | ||
1128 | |||
1129 | if (get_links(core_parent, head->ctl_table, head->root)) | ||
1130 | return 0; | ||
1131 | |||
1132 | core_parent->header.nreg++; | ||
1133 | spin_unlock(&sysctl_lock); | ||
1134 | |||
1135 | links = new_links(core_parent, head->ctl_table, head->root); | ||
1136 | |||
1137 | spin_lock(&sysctl_lock); | ||
1138 | err = -ENOMEM; | ||
1139 | if (!links) | ||
1140 | goto out; | ||
1141 | |||
1142 | err = 0; | ||
1143 | if (get_links(core_parent, head->ctl_table, head->root)) { | ||
1144 | kfree(links); | ||
1145 | goto out; | ||
1146 | } | ||
1147 | |||
1148 | err = insert_header(core_parent, links); | ||
1149 | if (err) | ||
1150 | kfree(links); | ||
1151 | out: | ||
1152 | drop_sysctl_table(&core_parent->header); | ||
1153 | return err; | ||
1154 | } | ||
1155 | |||
1156 | /** | ||
1157 | * __register_sysctl_table - register a leaf sysctl table | ||
1158 | * @set: Sysctl tree to register on | ||
1159 | * @path: The path to the directory the sysctl table is in. | ||
1160 | * @table: the top-level table structure | ||
1161 | * | ||
1162 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1163 | * array. A completely 0 filled entry terminates the table. | ||
1164 | * | ||
1165 | * The members of the &struct ctl_table structure are used as follows: | ||
1166 | * | ||
1167 | * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not | ||
1168 | * enter a sysctl file | ||
1169 | * | ||
1170 | * data - a pointer to data for use by proc_handler | ||
1171 | * | ||
1172 | * maxlen - the maximum size in bytes of the data | ||
1173 | * | ||
1174 | * mode - the file permissions for the /proc/sys file | ||
1175 | * | ||
1176 | * child - must be %NULL. | ||
1177 | * | ||
1178 | * proc_handler - the text handler routine (described below) | ||
1179 | * | ||
1180 | * extra1, extra2 - extra pointers usable by the proc handler routines | ||
1181 | * | ||
1182 | * Leaf nodes in the sysctl tree will be represented by a single file | ||
1183 | * under /proc; non-leaf nodes will be represented by directories. | ||
1184 | * | ||
1185 | * There must be a proc_handler routine for any terminal nodes. | ||
1186 | * Several default handlers are available to cover common cases - | ||
1187 | * | ||
1188 | * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), | ||
1189 | * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), | ||
1190 | * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() | ||
1191 | * | ||
1192 | * It is the handler's job to read the input buffer from user memory | ||
1193 | * and process it. The handler should return 0 on success. | ||
1194 | * | ||
1195 | * This routine returns %NULL on a failure to register, and a pointer | ||
1196 | * to the table header on success. | ||
1197 | */ | ||
1198 | struct ctl_table_header *__register_sysctl_table( | ||
1199 | struct ctl_table_set *set, | ||
1200 | const char *path, struct ctl_table *table) | ||
1201 | { | ||
1202 | struct ctl_table_root *root = set->dir.header.root; | ||
1203 | struct ctl_table_header *header; | ||
1204 | const char *name, *nextname; | ||
1205 | struct ctl_dir *dir; | ||
1206 | struct ctl_table *entry; | ||
1207 | struct ctl_node *node; | ||
1208 | int nr_entries = 0; | ||
1209 | |||
1210 | for (entry = table; entry->procname; entry++) | ||
1211 | nr_entries++; | ||
1212 | |||
1213 | header = kzalloc(sizeof(struct ctl_table_header) + | ||
1214 | sizeof(struct ctl_node)*nr_entries, GFP_KERNEL); | ||
1215 | if (!header) | ||
1216 | return NULL; | ||
1217 | |||
1218 | node = (struct ctl_node *)(header + 1); | ||
1219 | init_header(header, root, set, node, table); | ||
1220 | if (sysctl_check_table(path, table)) | ||
1221 | goto fail; | ||
1222 | |||
1223 | spin_lock(&sysctl_lock); | ||
1224 | dir = &set->dir; | ||
1225 | /* Reference moved down the diretory tree get_subdir */ | ||
1226 | dir->header.nreg++; | ||
1227 | spin_unlock(&sysctl_lock); | ||
1228 | |||
1229 | /* Find the directory for the ctl_table */ | ||
1230 | for (name = path; name; name = nextname) { | ||
1231 | int namelen; | ||
1232 | nextname = strchr(name, '/'); | ||
1233 | if (nextname) { | ||
1234 | namelen = nextname - name; | ||
1235 | nextname++; | ||
1236 | } else { | ||
1237 | namelen = strlen(name); | ||
1238 | } | ||
1239 | if (namelen == 0) | ||
1240 | continue; | ||
1241 | |||
1242 | dir = get_subdir(dir, name, namelen); | ||
1243 | if (IS_ERR(dir)) | ||
1244 | goto fail; | ||
1245 | } | ||
1246 | |||
1247 | spin_lock(&sysctl_lock); | ||
1248 | if (insert_header(dir, header)) | ||
1249 | goto fail_put_dir_locked; | ||
1250 | |||
1251 | drop_sysctl_table(&dir->header); | ||
1252 | spin_unlock(&sysctl_lock); | ||
1253 | |||
1254 | return header; | ||
1255 | |||
1256 | fail_put_dir_locked: | ||
1257 | drop_sysctl_table(&dir->header); | ||
1258 | spin_unlock(&sysctl_lock); | ||
1259 | fail: | ||
1260 | kfree(header); | ||
1261 | dump_stack(); | ||
1262 | return NULL; | ||
1263 | } | ||
1264 | |||
1265 | /** | ||
1266 | * register_sysctl - register a sysctl table | ||
1267 | * @path: The path to the directory the sysctl table is in. | ||
1268 | * @table: the table structure | ||
1269 | * | ||
1270 | * Register a sysctl table. @table should be a filled in ctl_table | ||
1271 | * array. A completely 0 filled entry terminates the table. | ||
1272 | * | ||
1273 | * See __register_sysctl_table for more details. | ||
1274 | */ | ||
1275 | struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table) | ||
1276 | { | ||
1277 | return __register_sysctl_table(&sysctl_table_root.default_set, | ||
1278 | path, table); | ||
1279 | } | ||
1280 | EXPORT_SYMBOL(register_sysctl); | ||
1281 | |||
1282 | static char *append_path(const char *path, char *pos, const char *name) | ||
1283 | { | ||
1284 | int namelen; | ||
1285 | namelen = strlen(name); | ||
1286 | if (((pos - path) + namelen + 2) >= PATH_MAX) | ||
1287 | return NULL; | ||
1288 | memcpy(pos, name, namelen); | ||
1289 | pos[namelen] = '/'; | ||
1290 | pos[namelen + 1] = '\0'; | ||
1291 | pos += namelen + 1; | ||
1292 | return pos; | ||
1293 | } | ||
1294 | |||
1295 | static int count_subheaders(struct ctl_table *table) | ||
1296 | { | ||
1297 | int has_files = 0; | ||
1298 | int nr_subheaders = 0; | ||
1299 | struct ctl_table *entry; | ||
1300 | |||
1301 | /* special case: no directory and empty directory */ | ||
1302 | if (!table || !table->procname) | ||
1303 | return 1; | ||
1304 | |||
1305 | for (entry = table; entry->procname; entry++) { | ||
1306 | if (entry->child) | ||
1307 | nr_subheaders += count_subheaders(entry->child); | ||
1308 | else | ||
1309 | has_files = 1; | ||
1310 | } | ||
1311 | return nr_subheaders + has_files; | ||
1312 | } | ||
1313 | |||
1314 | static int register_leaf_sysctl_tables(const char *path, char *pos, | ||
1315 | struct ctl_table_header ***subheader, struct ctl_table_set *set, | ||
1316 | struct ctl_table *table) | ||
1317 | { | ||
1318 | struct ctl_table *ctl_table_arg = NULL; | ||
1319 | struct ctl_table *entry, *files; | ||
1320 | int nr_files = 0; | ||
1321 | int nr_dirs = 0; | ||
1322 | int err = -ENOMEM; | ||
1323 | |||
1324 | for (entry = table; entry->procname; entry++) { | ||
1325 | if (entry->child) | ||
1326 | nr_dirs++; | ||
1327 | else | ||
1328 | nr_files++; | ||
1329 | } | ||
1330 | |||
1331 | files = table; | ||
1332 | /* If there are mixed files and directories we need a new table */ | ||
1333 | if (nr_dirs && nr_files) { | ||
1334 | struct ctl_table *new; | ||
1335 | files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1), | ||
1336 | GFP_KERNEL); | ||
1337 | if (!files) | ||
1338 | goto out; | ||
1339 | |||
1340 | ctl_table_arg = files; | ||
1341 | for (new = files, entry = table; entry->procname; entry++) { | ||
1342 | if (entry->child) | ||
1343 | continue; | ||
1344 | *new = *entry; | ||
1345 | new++; | ||
1346 | } | ||
1347 | } | ||
1348 | |||
1349 | /* Register everything except a directory full of subdirectories */ | ||
1350 | if (nr_files || !nr_dirs) { | ||
1351 | struct ctl_table_header *header; | ||
1352 | header = __register_sysctl_table(set, path, files); | ||
1353 | if (!header) { | ||
1354 | kfree(ctl_table_arg); | ||
1355 | goto out; | ||
1356 | } | ||
1357 | |||
1358 | /* Remember if we need to free the file table */ | ||
1359 | header->ctl_table_arg = ctl_table_arg; | ||
1360 | **subheader = header; | ||
1361 | (*subheader)++; | ||
1362 | } | ||
1363 | |||
1364 | /* Recurse into the subdirectories. */ | ||
1365 | for (entry = table; entry->procname; entry++) { | ||
1366 | char *child_pos; | ||
1367 | |||
1368 | if (!entry->child) | ||
1369 | continue; | ||
1370 | |||
1371 | err = -ENAMETOOLONG; | ||
1372 | child_pos = append_path(path, pos, entry->procname); | ||
1373 | if (!child_pos) | ||
1374 | goto out; | ||
1375 | |||
1376 | err = register_leaf_sysctl_tables(path, child_pos, subheader, | ||
1377 | set, entry->child); | ||
1378 | pos[0] = '\0'; | ||
1379 | if (err) | ||
1380 | goto out; | ||
1381 | } | ||
1382 | err = 0; | ||
1383 | out: | ||
1384 | /* On failure our caller will unregister all registered subheaders */ | ||
1385 | return err; | ||
1386 | } | ||
1387 | |||
1388 | /** | ||
1389 | * __register_sysctl_paths - register a sysctl table hierarchy | ||
1390 | * @set: Sysctl tree to register on | ||
1391 | * @path: The path to the directory the sysctl table is in. | ||
1392 | * @table: the top-level table structure | ||
1393 | * | ||
1394 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1395 | * array. A completely 0 filled entry terminates the table. | ||
1396 | * | ||
1397 | * See __register_sysctl_table for more details. | ||
1398 | */ | ||
1399 | struct ctl_table_header *__register_sysctl_paths( | ||
1400 | struct ctl_table_set *set, | ||
1401 | const struct ctl_path *path, struct ctl_table *table) | ||
1402 | { | ||
1403 | struct ctl_table *ctl_table_arg = table; | ||
1404 | int nr_subheaders = count_subheaders(table); | ||
1405 | struct ctl_table_header *header = NULL, **subheaders, **subheader; | ||
1406 | const struct ctl_path *component; | ||
1407 | char *new_path, *pos; | ||
1408 | |||
1409 | pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL); | ||
1410 | if (!new_path) | ||
1411 | return NULL; | ||
1412 | |||
1413 | pos[0] = '\0'; | ||
1414 | for (component = path; component->procname; component++) { | ||
1415 | pos = append_path(new_path, pos, component->procname); | ||
1416 | if (!pos) | ||
1417 | goto out; | ||
1418 | } | ||
1419 | while (table->procname && table->child && !table[1].procname) { | ||
1420 | pos = append_path(new_path, pos, table->procname); | ||
1421 | if (!pos) | ||
1422 | goto out; | ||
1423 | table = table->child; | ||
1424 | } | ||
1425 | if (nr_subheaders == 1) { | ||
1426 | header = __register_sysctl_table(set, new_path, table); | ||
1427 | if (header) | ||
1428 | header->ctl_table_arg = ctl_table_arg; | ||
1429 | } else { | ||
1430 | header = kzalloc(sizeof(*header) + | ||
1431 | sizeof(*subheaders)*nr_subheaders, GFP_KERNEL); | ||
1432 | if (!header) | ||
1433 | goto out; | ||
1434 | |||
1435 | subheaders = (struct ctl_table_header **) (header + 1); | ||
1436 | subheader = subheaders; | ||
1437 | header->ctl_table_arg = ctl_table_arg; | ||
1438 | |||
1439 | if (register_leaf_sysctl_tables(new_path, pos, &subheader, | ||
1440 | set, table)) | ||
1441 | goto err_register_leaves; | ||
1442 | } | ||
1443 | |||
1444 | out: | ||
1445 | kfree(new_path); | ||
1446 | return header; | ||
1447 | |||
1448 | err_register_leaves: | ||
1449 | while (subheader > subheaders) { | ||
1450 | struct ctl_table_header *subh = *(--subheader); | ||
1451 | struct ctl_table *table = subh->ctl_table_arg; | ||
1452 | unregister_sysctl_table(subh); | ||
1453 | kfree(table); | ||
1454 | } | ||
1455 | kfree(header); | ||
1456 | header = NULL; | ||
1457 | goto out; | ||
1458 | } | ||
1459 | |||
1460 | /** | ||
1461 | * register_sysctl_table_path - register a sysctl table hierarchy | ||
1462 | * @path: The path to the directory the sysctl table is in. | ||
1463 | * @table: the top-level table structure | ||
1464 | * | ||
1465 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1466 | * array. A completely 0 filled entry terminates the table. | ||
1467 | * | ||
1468 | * See __register_sysctl_paths for more details. | ||
1469 | */ | ||
1470 | struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, | ||
1471 | struct ctl_table *table) | ||
1472 | { | ||
1473 | return __register_sysctl_paths(&sysctl_table_root.default_set, | ||
1474 | path, table); | ||
1475 | } | ||
1476 | EXPORT_SYMBOL(register_sysctl_paths); | ||
1477 | |||
1478 | /** | ||
1479 | * register_sysctl_table - register a sysctl table hierarchy | ||
1480 | * @table: the top-level table structure | ||
1481 | * | ||
1482 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1483 | * array. A completely 0 filled entry terminates the table. | ||
1484 | * | ||
1485 | * See register_sysctl_paths for more details. | ||
1486 | */ | ||
1487 | struct ctl_table_header *register_sysctl_table(struct ctl_table *table) | ||
1488 | { | ||
1489 | static const struct ctl_path null_path[] = { {} }; | ||
1490 | |||
1491 | return register_sysctl_paths(null_path, table); | ||
1492 | } | ||
1493 | EXPORT_SYMBOL(register_sysctl_table); | ||
1494 | |||
1495 | static void put_links(struct ctl_table_header *header) | ||
1496 | { | ||
1497 | struct ctl_table_set *root_set = &sysctl_table_root.default_set; | ||
1498 | struct ctl_table_root *root = header->root; | ||
1499 | struct ctl_dir *parent = header->parent; | ||
1500 | struct ctl_dir *core_parent; | ||
1501 | struct ctl_table *entry; | ||
1502 | |||
1503 | if (header->set == root_set) | ||
1504 | return; | ||
1505 | |||
1506 | core_parent = xlate_dir(root_set, parent); | ||
1507 | if (IS_ERR(core_parent)) | ||
1508 | return; | ||
1509 | |||
1510 | for (entry = header->ctl_table; entry->procname; entry++) { | ||
1511 | struct ctl_table_header *link_head; | ||
1512 | struct ctl_table *link; | ||
1513 | const char *name = entry->procname; | ||
1514 | |||
1515 | link = find_entry(&link_head, core_parent, name, strlen(name)); | ||
1516 | if (link && | ||
1517 | ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) || | ||
1518 | (S_ISLNK(link->mode) && (link->data == root)))) { | ||
1519 | drop_sysctl_table(link_head); | ||
1520 | } | ||
1521 | else { | ||
1522 | printk(KERN_ERR "sysctl link missing during unregister: "); | ||
1523 | sysctl_print_dir(parent); | ||
1524 | printk(KERN_CONT "/%s\n", name); | ||
1525 | } | ||
1526 | } | ||
1527 | } | ||
1528 | |||
1529 | static void drop_sysctl_table(struct ctl_table_header *header) | ||
1530 | { | ||
1531 | struct ctl_dir *parent = header->parent; | ||
1532 | |||
1533 | if (--header->nreg) | ||
1534 | return; | ||
1535 | |||
1536 | put_links(header); | ||
1537 | start_unregistering(header); | ||
1538 | if (!--header->count) | ||
1539 | kfree_rcu(header, rcu); | ||
1540 | |||
1541 | if (parent) | ||
1542 | drop_sysctl_table(&parent->header); | ||
1543 | } | ||
1544 | |||
1545 | /** | ||
1546 | * unregister_sysctl_table - unregister a sysctl table hierarchy | ||
1547 | * @header: the header returned from register_sysctl_table | ||
1548 | * | ||
1549 | * Unregisters the sysctl table and all children. proc entries may not | ||
1550 | * actually be removed until they are no longer used by anyone. | ||
1551 | */ | ||
1552 | void unregister_sysctl_table(struct ctl_table_header * header) | ||
1553 | { | ||
1554 | int nr_subheaders; | ||
1555 | might_sleep(); | ||
1556 | |||
1557 | if (header == NULL) | ||
1558 | return; | ||
1559 | |||
1560 | nr_subheaders = count_subheaders(header->ctl_table_arg); | ||
1561 | if (unlikely(nr_subheaders > 1)) { | ||
1562 | struct ctl_table_header **subheaders; | ||
1563 | int i; | ||
1564 | |||
1565 | subheaders = (struct ctl_table_header **)(header + 1); | ||
1566 | for (i = nr_subheaders -1; i >= 0; i--) { | ||
1567 | struct ctl_table_header *subh = subheaders[i]; | ||
1568 | struct ctl_table *table = subh->ctl_table_arg; | ||
1569 | unregister_sysctl_table(subh); | ||
1570 | kfree(table); | ||
1571 | } | ||
1572 | kfree(header); | ||
1573 | return; | ||
1574 | } | ||
1575 | |||
1576 | spin_lock(&sysctl_lock); | ||
1577 | drop_sysctl_table(header); | ||
1578 | spin_unlock(&sysctl_lock); | ||
1579 | } | ||
1580 | EXPORT_SYMBOL(unregister_sysctl_table); | ||
1581 | |||
1582 | void setup_sysctl_set(struct ctl_table_set *set, | ||
1583 | struct ctl_table_root *root, | ||
1584 | int (*is_seen)(struct ctl_table_set *)) | ||
1585 | { | ||
1586 | memset(set, 0, sizeof(*set)); | ||
1587 | set->is_seen = is_seen; | ||
1588 | init_header(&set->dir.header, root, set, NULL, root_table); | ||
1589 | } | ||
1590 | |||
1591 | void retire_sysctl_set(struct ctl_table_set *set) | ||
1592 | { | ||
1593 | WARN_ON(!RB_EMPTY_ROOT(&set->dir.root)); | ||
1594 | } | ||
1595 | |||
475 | int __init proc_sys_init(void) | 1596 | int __init proc_sys_init(void) |
476 | { | 1597 | { |
477 | struct proc_dir_entry *proc_sys_root; | 1598 | struct proc_dir_entry *proc_sys_root; |
@@ -480,5 +1601,6 @@ int __init proc_sys_init(void) | |||
480 | proc_sys_root->proc_iops = &proc_sys_dir_operations; | 1601 | proc_sys_root->proc_iops = &proc_sys_dir_operations; |
481 | proc_sys_root->proc_fops = &proc_sys_dir_file_operations; | 1602 | proc_sys_root->proc_fops = &proc_sys_dir_file_operations; |
482 | proc_sys_root->nlink = 0; | 1603 | proc_sys_root->nlink = 0; |
483 | return 0; | 1604 | |
1605 | return sysctl_init(); | ||
484 | } | 1606 | } |