aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-28 18:07:55 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-28 18:07:55 -0400
commitcb28a1bbdb4790378e7366d6c9ee1d2340b84f92 (patch)
tree316436f77dac75335fd2c3ef5f109e71606c50d3 /fs/proc
parentb6d4f7e3ef25beb8c658c97867d98883e69dc544 (diff)
parentf934fb19ef34730263e6afc01e8ec27a8a71470f (diff)
Merge branch 'linus' into core/generic-dma-coherent
Conflicts: arch/x86/Kconfig Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/Kconfig59
-rw-r--r--fs/proc/array.c9
-rw-r--r--fs/proc/base.c89
-rw-r--r--fs/proc/generic.c19
-rw-r--r--fs/proc/inode.c88
-rw-r--r--fs/proc/internal.h8
-rw-r--r--fs/proc/kcore.c10
-rw-r--r--fs/proc/kmsg.c2
-rw-r--r--fs/proc/proc_misc.c19
-rw-r--r--fs/proc/proc_net.c43
-rw-r--r--fs/proc/proc_sysctl.c429
-rw-r--r--fs/proc/proc_tty.c48
-rw-r--r--fs/proc/task_mmu.c2
13 files changed, 485 insertions, 340 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
new file mode 100644
index 000000000000..73cd7a418f06
--- /dev/null
+++ b/fs/proc/Kconfig
@@ -0,0 +1,59 @@
1config PROC_FS
2 bool "/proc file system support" if EMBEDDED
3 default y
4 help
5 This is a virtual file system providing information about the status
6 of the system. "Virtual" means that it doesn't take up any space on
7 your hard disk: the files are created on the fly by the kernel when
8 you try to access them. Also, you cannot read the files with older
9 version of the program less: you need to use more or cat.
10
11 It's totally cool; for example, "cat /proc/interrupts" gives
12 information about what the different IRQs are used for at the moment
13 (there is a small number of Interrupt ReQuest lines in your computer
14 that are used by the attached devices to gain the CPU's attention --
15 often a source of trouble if two devices are mistakenly configured
16 to use the same IRQ). The program procinfo to display some
17 information about your system gathered from the /proc file system.
18
19 Before you can use the /proc file system, it has to be mounted,
20 meaning it has to be given a location in the directory hierarchy.
21 That location should be /proc. A command such as "mount -t proc proc
22 /proc" or the equivalent line in /etc/fstab does the job.
23
24 The /proc file system is explained in the file
25 <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
26 ("man 5 proc").
27
28 This option will enlarge your kernel by about 67 KB. Several
29 programs depend on this, so everyone should say Y here.
30
31config PROC_KCORE
32 bool "/proc/kcore support" if !ARM
33 depends on PROC_FS && MMU
34
35config PROC_VMCORE
36 bool "/proc/vmcore support (EXPERIMENTAL)"
37 depends on PROC_FS && CRASH_DUMP
38 default y
39 help
40 Exports the dump image of crashed kernel in ELF format.
41
42config PROC_SYSCTL
43 bool "Sysctl support (/proc/sys)" if EMBEDDED
44 depends on PROC_FS
45 select SYSCTL
46 default y
47 ---help---
48 The sysctl interface provides a means of dynamically changing
49 certain kernel parameters and variables on the fly without requiring
50 a recompile of the kernel or reboot of the system. The primary
51 interface is through /proc/sys. If you say Y here a tree of
52 modifiable sysctl entries will be generated beneath the
53 /proc/sys directory. They are explained in the files
54 in <file:Documentation/sysctl/>. Note that enabling this
55 option will enlarge the kernel by at least 8 KB.
56
57 As it is generally a good thing, you should say Y here unless
58 building a kernel for install/rescue disks or your system is very
59 limited in memory.
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 797d775e0354..0d6eb33597c6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -80,6 +80,7 @@
80#include <linux/delayacct.h> 80#include <linux/delayacct.h>
81#include <linux/seq_file.h> 81#include <linux/seq_file.h>
82#include <linux/pid_namespace.h> 82#include <linux/pid_namespace.h>
83#include <linux/tracehook.h>
83 84
84#include <asm/pgtable.h> 85#include <asm/pgtable.h>
85#include <asm/processor.h> 86#include <asm/processor.h>
@@ -168,8 +169,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
168 rcu_read_lock(); 169 rcu_read_lock();
169 ppid = pid_alive(p) ? 170 ppid = pid_alive(p) ?
170 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; 171 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
171 tpid = pid_alive(p) && p->ptrace ? 172 tpid = 0;
172 task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0; 173 if (pid_alive(p)) {
174 struct task_struct *tracer = tracehook_tracer_task(p);
175 if (tracer)
176 tpid = task_pid_nr_ns(tracer, ns);
177 }
173 seq_printf(m, 178 seq_printf(m,
174 "State:\t%s\n" 179 "State:\t%s\n"
175 "Tgid:\t%d\n" 180 "Tgid:\t%d\n"
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58c3e6a8e15e..01ed610f9b87 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -53,6 +53,7 @@
53#include <linux/time.h> 53#include <linux/time.h>
54#include <linux/proc_fs.h> 54#include <linux/proc_fs.h>
55#include <linux/stat.h> 55#include <linux/stat.h>
56#include <linux/task_io_accounting_ops.h>
56#include <linux/init.h> 57#include <linux/init.h>
57#include <linux/capability.h> 58#include <linux/capability.h>
58#include <linux/file.h> 59#include <linux/file.h>
@@ -69,6 +70,7 @@
69#include <linux/mount.h> 70#include <linux/mount.h>
70#include <linux/security.h> 71#include <linux/security.h>
71#include <linux/ptrace.h> 72#include <linux/ptrace.h>
73#include <linux/tracehook.h>
72#include <linux/cgroup.h> 74#include <linux/cgroup.h>
73#include <linux/cpuset.h> 75#include <linux/cpuset.h>
74#include <linux/audit.h> 76#include <linux/audit.h>
@@ -231,10 +233,14 @@ static int check_mem_permission(struct task_struct *task)
231 * If current is actively ptrace'ing, and would also be 233 * If current is actively ptrace'ing, and would also be
232 * permitted to freshly attach with ptrace now, permit it. 234 * permitted to freshly attach with ptrace now, permit it.
233 */ 235 */
234 if (task->parent == current && (task->ptrace & PT_PTRACED) && 236 if (task_is_stopped_or_traced(task)) {
235 task_is_stopped_or_traced(task) && 237 int match;
236 ptrace_may_access(task, PTRACE_MODE_ATTACH)) 238 rcu_read_lock();
237 return 0; 239 match = (tracehook_tracer_task(task) == current);
240 rcu_read_unlock();
241 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
242 return 0;
243 }
238 244
239 /* 245 /*
240 * Noone else is allowed. 246 * Noone else is allowed.
@@ -504,6 +510,26 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
504 return count; 510 return count;
505} 511}
506 512
513#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
514static int proc_pid_syscall(struct task_struct *task, char *buffer)
515{
516 long nr;
517 unsigned long args[6], sp, pc;
518
519 if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
520 return sprintf(buffer, "running\n");
521
522 if (nr < 0)
523 return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
524
525 return sprintf(buffer,
526 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
527 nr,
528 args[0], args[1], args[2], args[3], args[4], args[5],
529 sp, pc);
530}
531#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
532
507/************************************************************************/ 533/************************************************************************/
508/* Here the fs part begins */ 534/* Here the fs part begins */
509/************************************************************************/ 535/************************************************************************/
@@ -1834,8 +1860,7 @@ static const struct file_operations proc_fd_operations = {
1834 * /proc/pid/fd needs a special permission handler so that a process can still 1860 * /proc/pid/fd needs a special permission handler so that a process can still
1835 * access /proc/self/fd after it has executed a setuid(). 1861 * access /proc/self/fd after it has executed a setuid().
1836 */ 1862 */
1837static int proc_fd_permission(struct inode *inode, int mask, 1863static int proc_fd_permission(struct inode *inode, int mask)
1838 struct nameidata *nd)
1839{ 1864{
1840 int rv; 1865 int rv;
1841 1866
@@ -2376,29 +2401,44 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
2376} 2401}
2377 2402
2378#ifdef CONFIG_TASK_IO_ACCOUNTING 2403#ifdef CONFIG_TASK_IO_ACCOUNTING
2379static int proc_pid_io_accounting(struct task_struct *task, char *buffer) 2404static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2380{ 2405{
2406 struct task_io_accounting acct = task->ioac;
2407 unsigned long flags;
2408
2409 if (whole && lock_task_sighand(task, &flags)) {
2410 struct task_struct *t = task;
2411
2412 task_io_accounting_add(&acct, &task->signal->ioac);
2413 while_each_thread(task, t)
2414 task_io_accounting_add(&acct, &t->ioac);
2415
2416 unlock_task_sighand(task, &flags);
2417 }
2381 return sprintf(buffer, 2418 return sprintf(buffer,
2382#ifdef CONFIG_TASK_XACCT
2383 "rchar: %llu\n" 2419 "rchar: %llu\n"
2384 "wchar: %llu\n" 2420 "wchar: %llu\n"
2385 "syscr: %llu\n" 2421 "syscr: %llu\n"
2386 "syscw: %llu\n" 2422 "syscw: %llu\n"
2387#endif
2388 "read_bytes: %llu\n" 2423 "read_bytes: %llu\n"
2389 "write_bytes: %llu\n" 2424 "write_bytes: %llu\n"
2390 "cancelled_write_bytes: %llu\n", 2425 "cancelled_write_bytes: %llu\n",
2391#ifdef CONFIG_TASK_XACCT 2426 acct.rchar, acct.wchar,
2392 (unsigned long long)task->rchar, 2427 acct.syscr, acct.syscw,
2393 (unsigned long long)task->wchar, 2428 acct.read_bytes, acct.write_bytes,
2394 (unsigned long long)task->syscr, 2429 acct.cancelled_write_bytes);
2395 (unsigned long long)task->syscw, 2430}
2396#endif 2431
2397 (unsigned long long)task->ioac.read_bytes, 2432static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
2398 (unsigned long long)task->ioac.write_bytes, 2433{
2399 (unsigned long long)task->ioac.cancelled_write_bytes); 2434 return do_io_accounting(task, buffer, 0);
2400} 2435}
2401#endif 2436
2437static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2438{
2439 return do_io_accounting(task, buffer, 1);
2440}
2441#endif /* CONFIG_TASK_IO_ACCOUNTING */
2402 2442
2403/* 2443/*
2404 * Thread groups 2444 * Thread groups
@@ -2420,6 +2460,9 @@ static const struct pid_entry tgid_base_stuff[] = {
2420#ifdef CONFIG_SCHED_DEBUG 2460#ifdef CONFIG_SCHED_DEBUG
2421 REG("sched", S_IRUGO|S_IWUSR, pid_sched), 2461 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2422#endif 2462#endif
2463#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2464 INF("syscall", S_IRUSR, pid_syscall),
2465#endif
2423 INF("cmdline", S_IRUGO, pid_cmdline), 2466 INF("cmdline", S_IRUGO, pid_cmdline),
2424 ONE("stat", S_IRUGO, tgid_stat), 2467 ONE("stat", S_IRUGO, tgid_stat),
2425 ONE("statm", S_IRUGO, pid_statm), 2468 ONE("statm", S_IRUGO, pid_statm),
@@ -2470,7 +2513,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2470 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), 2513 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
2471#endif 2514#endif
2472#ifdef CONFIG_TASK_IO_ACCOUNTING 2515#ifdef CONFIG_TASK_IO_ACCOUNTING
2473 INF("io", S_IRUGO, pid_io_accounting), 2516 INF("io", S_IRUGO, tgid_io_accounting),
2474#endif 2517#endif
2475}; 2518};
2476 2519
@@ -2752,6 +2795,9 @@ static const struct pid_entry tid_base_stuff[] = {
2752#ifdef CONFIG_SCHED_DEBUG 2795#ifdef CONFIG_SCHED_DEBUG
2753 REG("sched", S_IRUGO|S_IWUSR, pid_sched), 2796 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2754#endif 2797#endif
2798#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2799 INF("syscall", S_IRUSR, pid_syscall),
2800#endif
2755 INF("cmdline", S_IRUGO, pid_cmdline), 2801 INF("cmdline", S_IRUGO, pid_cmdline),
2756 ONE("stat", S_IRUGO, tid_stat), 2802 ONE("stat", S_IRUGO, tid_stat),
2757 ONE("statm", S_IRUGO, pid_statm), 2803 ONE("statm", S_IRUGO, pid_statm),
@@ -2797,6 +2843,9 @@ static const struct pid_entry tid_base_stuff[] = {
2797#ifdef CONFIG_FAULT_INJECTION 2843#ifdef CONFIG_FAULT_INJECTION
2798 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), 2844 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
2799#endif 2845#endif
2846#ifdef CONFIG_TASK_IO_ACCOUNTING
2847 INF("io", S_IRUGO, tid_io_accounting),
2848#endif
2800}; 2849};
2801 2850
2802static int proc_tid_base_readdir(struct file * filp, 2851static int proc_tid_base_readdir(struct file * filp,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 43e54e86cefd..cb4096cc3fb7 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -597,6 +597,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
597 ent->pde_users = 0; 597 ent->pde_users = 0;
598 spin_lock_init(&ent->pde_unload_lock); 598 spin_lock_init(&ent->pde_unload_lock);
599 ent->pde_unload_completion = NULL; 599 ent->pde_unload_completion = NULL;
600 INIT_LIST_HEAD(&ent->pde_openers);
600 out: 601 out:
601 return ent; 602 return ent;
602} 603}
@@ -789,15 +790,25 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
789 spin_unlock(&de->pde_unload_lock); 790 spin_unlock(&de->pde_unload_lock);
790 791
791continue_removing: 792continue_removing:
793 spin_lock(&de->pde_unload_lock);
794 while (!list_empty(&de->pde_openers)) {
795 struct pde_opener *pdeo;
796
797 pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
798 list_del(&pdeo->lh);
799 spin_unlock(&de->pde_unload_lock);
800 pdeo->release(pdeo->inode, pdeo->file);
801 kfree(pdeo);
802 spin_lock(&de->pde_unload_lock);
803 }
804 spin_unlock(&de->pde_unload_lock);
805
792 if (S_ISDIR(de->mode)) 806 if (S_ISDIR(de->mode))
793 parent->nlink--; 807 parent->nlink--;
794 de->nlink = 0; 808 de->nlink = 0;
795 if (de->subdir) { 809 WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory "
796 printk(KERN_WARNING "%s: removing non-empty directory "
797 "'%s/%s', leaking at least '%s'\n", __func__, 810 "'%s/%s', leaking at least '%s'\n", __func__,
798 de->parent->name, de->name, de->subdir->name); 811 de->parent->name, de->name, de->subdir->name);
799 WARN_ON(1);
800 }
801 if (atomic_dec_and_test(&de->count)) 812 if (atomic_dec_and_test(&de->count))
802 free_proc_entry(de); 813 free_proc_entry(de);
803} 814}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b08d10017911..8bb03f056c28 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -17,6 +17,7 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
20#include <linux/sysctl.h>
20 21
21#include <asm/system.h> 22#include <asm/system.h>
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
@@ -65,6 +66,8 @@ static void proc_delete_inode(struct inode *inode)
65 module_put(de->owner); 66 module_put(de->owner);
66 de_put(de); 67 de_put(de);
67 } 68 }
69 if (PROC_I(inode)->sysctl)
70 sysctl_head_put(PROC_I(inode)->sysctl);
68 clear_inode(inode); 71 clear_inode(inode);
69} 72}
70 73
@@ -84,6 +87,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
84 ei->fd = 0; 87 ei->fd = 0;
85 ei->op.proc_get_link = NULL; 88 ei->op.proc_get_link = NULL;
86 ei->pde = NULL; 89 ei->pde = NULL;
90 ei->sysctl = NULL;
91 ei->sysctl_entry = NULL;
87 inode = &ei->vfs_inode; 92 inode = &ei->vfs_inode;
88 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 93 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
89 return inode; 94 return inode;
@@ -94,7 +99,7 @@ static void proc_destroy_inode(struct inode *inode)
94 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 99 kmem_cache_free(proc_inode_cachep, PROC_I(inode));
95} 100}
96 101
97static void init_once(struct kmem_cache * cachep, void *foo) 102static void init_once(void *foo)
98{ 103{
99 struct proc_inode *ei = (struct proc_inode *) foo; 104 struct proc_inode *ei = (struct proc_inode *) foo;
100 105
@@ -111,27 +116,25 @@ int __init proc_init_inodecache(void)
111 return 0; 116 return 0;
112} 117}
113 118
114static int proc_remount(struct super_block *sb, int *flags, char *data)
115{
116 *flags |= MS_NODIRATIME;
117 return 0;
118}
119
120static const struct super_operations proc_sops = { 119static const struct super_operations proc_sops = {
121 .alloc_inode = proc_alloc_inode, 120 .alloc_inode = proc_alloc_inode,
122 .destroy_inode = proc_destroy_inode, 121 .destroy_inode = proc_destroy_inode,
123 .drop_inode = generic_delete_inode, 122 .drop_inode = generic_delete_inode,
124 .delete_inode = proc_delete_inode, 123 .delete_inode = proc_delete_inode,
125 .statfs = simple_statfs, 124 .statfs = simple_statfs,
126 .remount_fs = proc_remount,
127}; 125};
128 126
129static void pde_users_dec(struct proc_dir_entry *pde) 127static void __pde_users_dec(struct proc_dir_entry *pde)
130{ 128{
131 spin_lock(&pde->pde_unload_lock);
132 pde->pde_users--; 129 pde->pde_users--;
133 if (pde->pde_unload_completion && pde->pde_users == 0) 130 if (pde->pde_unload_completion && pde->pde_users == 0)
134 complete(pde->pde_unload_completion); 131 complete(pde->pde_unload_completion);
132}
133
134static void pde_users_dec(struct proc_dir_entry *pde)
135{
136 spin_lock(&pde->pde_unload_lock);
137 __pde_users_dec(pde);
135 spin_unlock(&pde->pde_unload_lock); 138 spin_unlock(&pde->pde_unload_lock);
136} 139}
137 140
@@ -318,36 +321,97 @@ static int proc_reg_open(struct inode *inode, struct file *file)
318 struct proc_dir_entry *pde = PDE(inode); 321 struct proc_dir_entry *pde = PDE(inode);
319 int rv = 0; 322 int rv = 0;
320 int (*open)(struct inode *, struct file *); 323 int (*open)(struct inode *, struct file *);
324 int (*release)(struct inode *, struct file *);
325 struct pde_opener *pdeo;
326
327 /*
328 * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
329 * sequence. ->release won't be called because ->proc_fops will be
330 * cleared. Depending on complexity of ->release, consequences vary.
331 *
332 * We can't wait for mercy when close will be done for real, it's
333 * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
334 * by hand in remove_proc_entry(). For this, save opener's credentials
335 * for later.
336 */
337 pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
338 if (!pdeo)
339 return -ENOMEM;
321 340
322 spin_lock(&pde->pde_unload_lock); 341 spin_lock(&pde->pde_unload_lock);
323 if (!pde->proc_fops) { 342 if (!pde->proc_fops) {
324 spin_unlock(&pde->pde_unload_lock); 343 spin_unlock(&pde->pde_unload_lock);
344 kfree(pdeo);
325 return rv; 345 return rv;
326 } 346 }
327 pde->pde_users++; 347 pde->pde_users++;
328 open = pde->proc_fops->open; 348 open = pde->proc_fops->open;
349 release = pde->proc_fops->release;
329 spin_unlock(&pde->pde_unload_lock); 350 spin_unlock(&pde->pde_unload_lock);
330 351
331 if (open) 352 if (open)
332 rv = open(inode, file); 353 rv = open(inode, file);
333 354
334 pde_users_dec(pde); 355 spin_lock(&pde->pde_unload_lock);
356 if (rv == 0 && release) {
357 /* To know what to release. */
358 pdeo->inode = inode;
359 pdeo->file = file;
360 /* Strictly for "too late" ->release in proc_reg_release(). */
361 pdeo->release = release;
362 list_add(&pdeo->lh, &pde->pde_openers);
363 } else
364 kfree(pdeo);
365 __pde_users_dec(pde);
366 spin_unlock(&pde->pde_unload_lock);
335 return rv; 367 return rv;
336} 368}
337 369
370static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
371 struct inode *inode, struct file *file)
372{
373 struct pde_opener *pdeo;
374
375 list_for_each_entry(pdeo, &pde->pde_openers, lh) {
376 if (pdeo->inode == inode && pdeo->file == file)
377 return pdeo;
378 }
379 return NULL;
380}
381
338static int proc_reg_release(struct inode *inode, struct file *file) 382static int proc_reg_release(struct inode *inode, struct file *file)
339{ 383{
340 struct proc_dir_entry *pde = PDE(inode); 384 struct proc_dir_entry *pde = PDE(inode);
341 int rv = 0; 385 int rv = 0;
342 int (*release)(struct inode *, struct file *); 386 int (*release)(struct inode *, struct file *);
387 struct pde_opener *pdeo;
343 388
344 spin_lock(&pde->pde_unload_lock); 389 spin_lock(&pde->pde_unload_lock);
390 pdeo = find_pde_opener(pde, inode, file);
345 if (!pde->proc_fops) { 391 if (!pde->proc_fops) {
346 spin_unlock(&pde->pde_unload_lock); 392 /*
393 * Can't simply exit, __fput() will think that everything is OK,
394 * and move on to freeing struct file. remove_proc_entry() will
395 * find slacker in opener's list and will try to do non-trivial
396 * things with struct file. Therefore, remove opener from list.
397 *
398 * But if opener is removed from list, who will ->release it?
399 */
400 if (pdeo) {
401 list_del(&pdeo->lh);
402 spin_unlock(&pde->pde_unload_lock);
403 rv = pdeo->release(inode, file);
404 kfree(pdeo);
405 } else
406 spin_unlock(&pde->pde_unload_lock);
347 return rv; 407 return rv;
348 } 408 }
349 pde->pde_users++; 409 pde->pde_users++;
350 release = pde->proc_fops->release; 410 release = pde->proc_fops->release;
411 if (pdeo) {
412 list_del(&pdeo->lh);
413 kfree(pdeo);
414 }
351 spin_unlock(&pde->pde_unload_lock); 415 spin_unlock(&pde->pde_unload_lock);
352 416
353 if (release) 417 if (release)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 28cbca805905..442202314d53 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -63,6 +63,7 @@ extern const struct file_operations proc_smaps_operations;
63extern const struct file_operations proc_clear_refs_operations; 63extern const struct file_operations proc_clear_refs_operations;
64extern const struct file_operations proc_pagemap_operations; 64extern const struct file_operations proc_pagemap_operations;
65extern const struct file_operations proc_net_operations; 65extern const struct file_operations proc_net_operations;
66extern const struct file_operations proc_kmsg_operations;
66extern const struct inode_operations proc_net_inode_operations; 67extern const struct inode_operations proc_net_inode_operations;
67 68
68void free_proc_entry(struct proc_dir_entry *de); 69void free_proc_entry(struct proc_dir_entry *de);
@@ -88,3 +89,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
88 struct dentry *dentry); 89 struct dentry *dentry);
89int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, 90int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
90 filldir_t filldir); 91 filldir_t filldir);
92
93struct pde_opener {
94 struct inode *inode;
95 struct file *file;
96 int (*release)(struct inode *, struct file *);
97 struct list_head lh;
98};
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e78c81fcf547..c2370c76fb71 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -23,6 +23,10 @@
23 23
24#define CORE_STR "CORE" 24#define CORE_STR "CORE"
25 25
26#ifndef ELF_CORE_EFLAGS
27#define ELF_CORE_EFLAGS 0
28#endif
29
26static int open_kcore(struct inode * inode, struct file * filp) 30static int open_kcore(struct inode * inode, struct file * filp)
27{ 31{
28 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; 32 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
@@ -164,11 +168,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
164 elf->e_entry = 0; 168 elf->e_entry = 0;
165 elf->e_phoff = sizeof(struct elfhdr); 169 elf->e_phoff = sizeof(struct elfhdr);
166 elf->e_shoff = 0; 170 elf->e_shoff = 0;
167#if defined(CONFIG_H8300) 171 elf->e_flags = ELF_CORE_EFLAGS;
168 elf->e_flags = ELF_FLAGS;
169#else
170 elf->e_flags = 0;
171#endif
172 elf->e_ehsize = sizeof(struct elfhdr); 172 elf->e_ehsize = sizeof(struct elfhdr);
173 elf->e_phentsize= sizeof(struct elf_phdr); 173 elf->e_phentsize= sizeof(struct elf_phdr);
174 elf->e_phnum = nphdr; 174 elf->e_phnum = nphdr;
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index ff3b90b56e9d..9fd5df3f40ce 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -15,6 +15,8 @@
15#include <asm/uaccess.h> 15#include <asm/uaccess.h>
16#include <asm/io.h> 16#include <asm/io.h>
17 17
18#include "internal.h"
19
18extern wait_queue_head_t log_wait; 20extern wait_queue_head_t log_wait;
19 21
20extern int do_syslog(int type, char __user *bug, int count); 22extern int do_syslog(int type, char __user *bug, int count);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index c652d469dc08..ded969862960 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -232,7 +232,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
232#undef K 232#undef K
233} 233}
234 234
235extern const struct seq_operations fragmentation_op;
236static int fragmentation_open(struct inode *inode, struct file *file) 235static int fragmentation_open(struct inode *inode, struct file *file)
237{ 236{
238 (void)inode; 237 (void)inode;
@@ -246,7 +245,6 @@ static const struct file_operations fragmentation_file_operations = {
246 .release = seq_release, 245 .release = seq_release,
247}; 246};
248 247
249extern const struct seq_operations pagetypeinfo_op;
250static int pagetypeinfo_open(struct inode *inode, struct file *file) 248static int pagetypeinfo_open(struct inode *inode, struct file *file)
251{ 249{
252 return seq_open(file, &pagetypeinfo_op); 250 return seq_open(file, &pagetypeinfo_op);
@@ -259,7 +257,6 @@ static const struct file_operations pagetypeinfo_file_ops = {
259 .release = seq_release, 257 .release = seq_release,
260}; 258};
261 259
262extern const struct seq_operations zoneinfo_op;
263static int zoneinfo_open(struct inode *inode, struct file *file) 260static int zoneinfo_open(struct inode *inode, struct file *file)
264{ 261{
265 return seq_open(file, &zoneinfo_op); 262 return seq_open(file, &zoneinfo_op);
@@ -356,7 +353,6 @@ static const struct file_operations proc_devinfo_operations = {
356 .release = seq_release, 353 .release = seq_release,
357}; 354};
358 355
359extern const struct seq_operations vmstat_op;
360static int vmstat_open(struct inode *inode, struct file *file) 356static int vmstat_open(struct inode *inode, struct file *file)
361{ 357{
362 return seq_open(file, &vmstat_op); 358 return seq_open(file, &vmstat_op);
@@ -468,14 +464,25 @@ static const struct file_operations proc_slabstats_operations = {
468#ifdef CONFIG_MMU 464#ifdef CONFIG_MMU
469static int vmalloc_open(struct inode *inode, struct file *file) 465static int vmalloc_open(struct inode *inode, struct file *file)
470{ 466{
471 return seq_open(file, &vmalloc_op); 467 unsigned int *ptr = NULL;
468 int ret;
469
470 if (NUMA_BUILD)
471 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
472 ret = seq_open(file, &vmalloc_op);
473 if (!ret) {
474 struct seq_file *m = file->private_data;
475 m->private = ptr;
476 } else
477 kfree(ptr);
478 return ret;
472} 479}
473 480
474static const struct file_operations proc_vmalloc_operations = { 481static const struct file_operations proc_vmalloc_operations = {
475 .open = vmalloc_open, 482 .open = vmalloc_open,
476 .read = seq_read, 483 .read = seq_read,
477 .llseek = seq_lseek, 484 .llseek = seq_lseek,
478 .release = seq_release, 485 .release = seq_release_private,
479}; 486};
480#endif 487#endif
481 488
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 83f357b30d71..7bc296f424ae 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -27,6 +27,11 @@
27#include "internal.h" 27#include "internal.h"
28 28
29 29
30static struct net *get_proc_net(const struct inode *inode)
31{
32 return maybe_get_net(PDE_NET(PDE(inode)));
33}
34
30int seq_open_net(struct inode *ino, struct file *f, 35int seq_open_net(struct inode *ino, struct file *f,
31 const struct seq_operations *ops, int size) 36 const struct seq_operations *ops, int size)
32{ 37{
@@ -51,6 +56,30 @@ int seq_open_net(struct inode *ino, struct file *f,
51} 56}
52EXPORT_SYMBOL_GPL(seq_open_net); 57EXPORT_SYMBOL_GPL(seq_open_net);
53 58
59int single_open_net(struct inode *inode, struct file *file,
60 int (*show)(struct seq_file *, void *))
61{
62 int err;
63 struct net *net;
64
65 err = -ENXIO;
66 net = get_proc_net(inode);
67 if (net == NULL)
68 goto err_net;
69
70 err = single_open(file, show, net);
71 if (err < 0)
72 goto err_open;
73
74 return 0;
75
76err_open:
77 put_net(net);
78err_net:
79 return err;
80}
81EXPORT_SYMBOL_GPL(single_open_net);
82
54int seq_release_net(struct inode *ino, struct file *f) 83int seq_release_net(struct inode *ino, struct file *f)
55{ 84{
56 struct seq_file *seq; 85 struct seq_file *seq;
@@ -63,6 +92,14 @@ int seq_release_net(struct inode *ino, struct file *f)
63} 92}
64EXPORT_SYMBOL_GPL(seq_release_net); 93EXPORT_SYMBOL_GPL(seq_release_net);
65 94
95int single_release_net(struct inode *ino, struct file *f)
96{
97 struct seq_file *seq = f->private_data;
98 put_net(seq->private);
99 return single_release(ino, f);
100}
101EXPORT_SYMBOL_GPL(single_release_net);
102
66static struct net *get_proc_task_net(struct inode *dir) 103static struct net *get_proc_task_net(struct inode *dir)
67{ 104{
68 struct task_struct *task; 105 struct task_struct *task;
@@ -153,12 +190,6 @@ void proc_net_remove(struct net *net, const char *name)
153} 190}
154EXPORT_SYMBOL_GPL(proc_net_remove); 191EXPORT_SYMBOL_GPL(proc_net_remove);
155 192
156struct net *get_proc_net(const struct inode *inode)
157{
158 return maybe_get_net(PDE_NET(PDE(inode)));
159}
160EXPORT_SYMBOL_GPL(get_proc_net);
161
162static __net_init int proc_net_ns_init(struct net *net) 193static __net_init int proc_net_ns_init(struct net *net)
163{ 194{
164 struct proc_dir_entry *netd, *net_statd; 195 struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5acc001d49f6..f9a8b892718f 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -10,149 +10,110 @@
10static struct dentry_operations proc_sys_dentry_operations; 10static struct dentry_operations proc_sys_dentry_operations;
11static const struct file_operations proc_sys_file_operations; 11static const struct file_operations proc_sys_file_operations;
12static const struct inode_operations proc_sys_inode_operations; 12static const struct inode_operations proc_sys_inode_operations;
13static const struct file_operations proc_sys_dir_file_operations;
14static const struct inode_operations proc_sys_dir_operations;
13 15
14static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table) 16static struct inode *proc_sys_make_inode(struct super_block *sb,
15{ 17 struct ctl_table_header *head, struct ctl_table *table)
16 /* Refresh the cached information bits in the inode */
17 if (table) {
18 inode->i_uid = 0;
19 inode->i_gid = 0;
20 inode->i_mode = table->mode;
21 if (table->proc_handler) {
22 inode->i_mode |= S_IFREG;
23 inode->i_nlink = 1;
24 } else {
25 inode->i_mode |= S_IFDIR;
26 inode->i_nlink = 0; /* It is too hard to figure out */
27 }
28 }
29}
30
31static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
32{ 18{
33 struct inode *inode; 19 struct inode *inode;
34 struct proc_inode *dir_ei, *ei; 20 struct proc_inode *ei;
35 int depth;
36 21
37 inode = new_inode(dir->i_sb); 22 inode = new_inode(sb);
38 if (!inode) 23 if (!inode)
39 goto out; 24 goto out;
40 25
41 /* A directory is always one deeper than it's parent */ 26 sysctl_head_get(head);
42 dir_ei = PROC_I(dir);
43 depth = dir_ei->fd + 1;
44
45 ei = PROC_I(inode); 27 ei = PROC_I(inode);
46 ei->fd = depth; 28 ei->sysctl = head;
29 ei->sysctl_entry = table;
30
47 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 31 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
48 inode->i_op = &proc_sys_inode_operations;
49 inode->i_fop = &proc_sys_file_operations;
50 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */ 32 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
51 proc_sys_refresh_inode(inode, table); 33 inode->i_mode = table->mode;
34 if (!table->child) {
35 inode->i_mode |= S_IFREG;
36 inode->i_op = &proc_sys_inode_operations;
37 inode->i_fop = &proc_sys_file_operations;
38 } else {
39 inode->i_mode |= S_IFDIR;
40 inode->i_nlink = 0;
41 inode->i_op = &proc_sys_dir_operations;
42 inode->i_fop = &proc_sys_dir_file_operations;
43 }
52out: 44out:
53 return inode; 45 return inode;
54} 46}
55 47
56static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth) 48static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
57{
58 for (;;) {
59 struct proc_inode *ei;
60
61 ei = PROC_I(dentry->d_inode);
62 if (ei->fd == depth)
63 break; /* found */
64
65 dentry = dentry->d_parent;
66 }
67 return dentry;
68}
69
70static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table,
71 struct qstr *name)
72{ 49{
73 int len; 50 int len;
74 for ( ; table->ctl_name || table->procname; table++) { 51 for ( ; p->ctl_name || p->procname; p++) {
75 52
76 if (!table->procname) 53 if (!p->procname)
77 continue; 54 continue;
78 55
79 len = strlen(table->procname); 56 len = strlen(p->procname);
80 if (len != name->len) 57 if (len != name->len)
81 continue; 58 continue;
82 59
83 if (memcmp(table->procname, name->name, len) != 0) 60 if (memcmp(p->procname, name->name, len) != 0)
84 continue; 61 continue;
85 62
86 /* I have a match */ 63 /* I have a match */
87 return table; 64 return p;
88 } 65 }
89 return NULL; 66 return NULL;
90} 67}
91 68
92static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry, 69struct ctl_table_header *grab_header(struct inode *inode)
93 struct ctl_table *table)
94{ 70{
95 struct dentry *ancestor; 71 if (PROC_I(inode)->sysctl)
96 struct proc_inode *ei; 72 return sysctl_head_grab(PROC_I(inode)->sysctl);
97 int depth, i; 73 else
74 return sysctl_head_next(NULL);
75}
98 76
99 ei = PROC_I(dentry->d_inode); 77static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
100 depth = ei->fd; 78 struct nameidata *nd)
79{
80 struct ctl_table_header *head = grab_header(dir);
81 struct ctl_table *table = PROC_I(dir)->sysctl_entry;
82 struct ctl_table_header *h = NULL;
83 struct qstr *name = &dentry->d_name;
84 struct ctl_table *p;
85 struct inode *inode;
86 struct dentry *err = ERR_PTR(-ENOENT);
101 87
102 if (depth == 0) 88 if (IS_ERR(head))
103 return table; 89 return ERR_CAST(head);
104 90
105 for (i = 1; table && (i <= depth); i++) { 91 if (table && !table->child) {
106 ancestor = proc_sys_ancestor(dentry, i); 92 WARN_ON(1);
107 table = proc_sys_lookup_table_one(table, &ancestor->d_name); 93 goto out;
108 if (table)
109 table = table->child;
110 } 94 }
111 return table;
112
113}
114static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent,
115 struct qstr *name,
116 struct ctl_table *table)
117{
118 table = proc_sys_lookup_table(dparent, table);
119 if (table)
120 table = proc_sys_lookup_table_one(table, name);
121 return table;
122}
123 95
124static struct ctl_table *do_proc_sys_lookup(struct dentry *parent, 96 table = table ? table->child : head->ctl_table;
125 struct qstr *name,
126 struct ctl_table_header **ptr)
127{
128 struct ctl_table_header *head;
129 struct ctl_table *table = NULL;
130 97
131 for (head = sysctl_head_next(NULL); head; 98 p = find_in_table(table, name);
132 head = sysctl_head_next(head)) { 99 if (!p) {
133 table = proc_sys_lookup_entry(parent, name, head->ctl_table); 100 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
134 if (table) 101 if (h->attached_to != table)
135 break; 102 continue;
103 p = find_in_table(h->attached_by, name);
104 if (p)
105 break;
106 }
136 } 107 }
137 *ptr = head;
138 return table;
139}
140
141static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
142 struct nameidata *nd)
143{
144 struct ctl_table_header *head;
145 struct inode *inode;
146 struct dentry *err;
147 struct ctl_table *table;
148 108
149 err = ERR_PTR(-ENOENT); 109 if (!p)
150 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
151 if (!table)
152 goto out; 110 goto out;
153 111
154 err = ERR_PTR(-ENOMEM); 112 err = ERR_PTR(-ENOMEM);
155 inode = proc_sys_make_inode(dir, table); 113 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
114 if (h)
115 sysctl_head_finish(h);
116
156 if (!inode) 117 if (!inode)
157 goto out; 118 goto out;
158 119
@@ -168,22 +129,14 @@ out:
168static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, 129static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
169 size_t count, loff_t *ppos, int write) 130 size_t count, loff_t *ppos, int write)
170{ 131{
171 struct dentry *dentry = filp->f_dentry; 132 struct inode *inode = filp->f_path.dentry->d_inode;
172 struct ctl_table_header *head; 133 struct ctl_table_header *head = grab_header(inode);
173 struct ctl_table *table; 134 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
174 ssize_t error; 135 ssize_t error;
175 size_t res; 136 size_t res;
176 137
177 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); 138 if (IS_ERR(head))
178 /* Has the sysctl entry disappeared on us? */ 139 return PTR_ERR(head);
179 error = -ENOENT;
180 if (!table)
181 goto out;
182
183 /* Has the sysctl entry been replaced by a directory? */
184 error = -EISDIR;
185 if (!table->proc_handler)
186 goto out;
187 140
188 /* 141 /*
189 * At this point we know that the sysctl was not unregistered 142 * At this point we know that the sysctl was not unregistered
@@ -193,6 +146,11 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
193 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) 146 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
194 goto out; 147 goto out;
195 148
149 /* if that can happen at all, it should be -EINVAL, not -EISDIR */
150 error = -EINVAL;
151 if (!table->proc_handler)
152 goto out;
153
196 /* careful: calling conventions are nasty here */ 154 /* careful: calling conventions are nasty here */
197 res = count; 155 res = count;
198 error = table->proc_handler(table, write, filp, buf, &res, ppos); 156 error = table->proc_handler(table, write, filp, buf, &res, ppos);
@@ -218,82 +176,86 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
218 176
219 177
220static int proc_sys_fill_cache(struct file *filp, void *dirent, 178static int proc_sys_fill_cache(struct file *filp, void *dirent,
221 filldir_t filldir, struct ctl_table *table) 179 filldir_t filldir,
180 struct ctl_table_header *head,
181 struct ctl_table *table)
222{ 182{
223 struct ctl_table_header *head;
224 struct ctl_table *child_table = NULL;
225 struct dentry *child, *dir = filp->f_path.dentry; 183 struct dentry *child, *dir = filp->f_path.dentry;
226 struct inode *inode; 184 struct inode *inode;
227 struct qstr qname; 185 struct qstr qname;
228 ino_t ino = 0; 186 ino_t ino = 0;
229 unsigned type = DT_UNKNOWN; 187 unsigned type = DT_UNKNOWN;
230 int ret;
231 188
232 qname.name = table->procname; 189 qname.name = table->procname;
233 qname.len = strlen(table->procname); 190 qname.len = strlen(table->procname);
234 qname.hash = full_name_hash(qname.name, qname.len); 191 qname.hash = full_name_hash(qname.name, qname.len);
235 192
236 /* Suppress duplicates.
237 * Only fill a directory entry if it is the value that
238 * an ordinary lookup of that name returns. Hide all
239 * others.
240 *
241 * If we ever cache this translation in the dcache
242 * I should do a dcache lookup first. But for now
243 * it is just simpler not to.
244 */
245 ret = 0;
246 child_table = do_proc_sys_lookup(dir, &qname, &head);
247 sysctl_head_finish(head);
248 if (child_table != table)
249 return 0;
250
251 child = d_lookup(dir, &qname); 193 child = d_lookup(dir, &qname);
252 if (!child) { 194 if (!child) {
253 struct dentry *new; 195 child = d_alloc(dir, &qname);
254 new = d_alloc(dir, &qname); 196 if (child) {
255 if (new) { 197 inode = proc_sys_make_inode(dir->d_sb, head, table);
256 inode = proc_sys_make_inode(dir->d_inode, table); 198 if (!inode) {
257 if (!inode) 199 dput(child);
258 child = ERR_PTR(-ENOMEM); 200 return -ENOMEM;
259 else { 201 } else {
260 new->d_op = &proc_sys_dentry_operations; 202 child->d_op = &proc_sys_dentry_operations;
261 d_add(new, inode); 203 d_add(child, inode);
262 } 204 }
263 if (child) 205 } else {
264 dput(new); 206 return -ENOMEM;
265 else
266 child = new;
267 } 207 }
268 } 208 }
269 if (!child || IS_ERR(child) || !child->d_inode)
270 goto end_instantiate;
271 inode = child->d_inode; 209 inode = child->d_inode;
272 if (inode) { 210 ino = inode->i_ino;
273 ino = inode->i_ino; 211 type = inode->i_mode >> 12;
274 type = inode->i_mode >> 12;
275 }
276 dput(child); 212 dput(child);
277end_instantiate: 213 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
278 if (!ino) 214}
279 ino= find_inode_number(dir, &qname); 215
280 if (!ino) 216static int scan(struct ctl_table_header *head, ctl_table *table,
281 ino = 1; 217 unsigned long *pos, struct file *file,
282 return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); 218 void *dirent, filldir_t filldir)
219{
220
221 for (; table->ctl_name || table->procname; table++, (*pos)++) {
222 int res;
223
224 /* Can't do anything without a proc name */
225 if (!table->procname)
226 continue;
227
228 if (*pos < file->f_pos)
229 continue;
230
231 res = proc_sys_fill_cache(file, dirent, filldir, head, table);
232 if (res)
233 return res;
234
235 file->f_pos = *pos + 1;
236 }
237 return 0;
283} 238}
284 239
285static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) 240static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
286{ 241{
287 struct dentry *dentry = filp->f_dentry; 242 struct dentry *dentry = filp->f_path.dentry;
288 struct inode *inode = dentry->d_inode; 243 struct inode *inode = dentry->d_inode;
289 struct ctl_table_header *head = NULL; 244 struct ctl_table_header *head = grab_header(inode);
290 struct ctl_table *table; 245 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
246 struct ctl_table_header *h = NULL;
291 unsigned long pos; 247 unsigned long pos;
292 int ret; 248 int ret = -EINVAL;
249
250 if (IS_ERR(head))
251 return PTR_ERR(head);
293 252
294 ret = -ENOTDIR; 253 if (table && !table->child) {
295 if (!S_ISDIR(inode->i_mode)) 254 WARN_ON(1);
296 goto out; 255 goto out;
256 }
257
258 table = table ? table->child : head->ctl_table;
297 259
298 ret = 0; 260 ret = 0;
299 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ 261 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
@@ -311,30 +273,17 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
311 } 273 }
312 pos = 2; 274 pos = 2;
313 275
314 /* - Find each instance of the directory 276 ret = scan(head, table, &pos, filp, dirent, filldir);
315 * - Read all entries in each instance 277 if (ret)
316 * - Before returning an entry to user space lookup the entry 278 goto out;
317 * by name and if I find a different entry don't return
318 * this one because it means it is a buried dup.
319 * For sysctl this should only happen for directory entries.
320 */
321 for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) {
322 table = proc_sys_lookup_table(dentry, head->ctl_table);
323 279
324 if (!table) 280 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
281 if (h->attached_to != table)
325 continue; 282 continue;
326 283 ret = scan(h, h->attached_by, &pos, filp, dirent, filldir);
327 for (; table->ctl_name || table->procname; table++, pos++) { 284 if (ret) {
328 /* Can't do anything without a proc name */ 285 sysctl_head_finish(h);
329 if (!table->procname) 286 break;
330 continue;
331
332 if (pos < filp->f_pos)
333 continue;
334
335 if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0)
336 goto out;
337 filp->f_pos = pos + 1;
338 } 287 }
339 } 288 }
340 ret = 1; 289 ret = 1;
@@ -343,53 +292,24 @@ out:
343 return ret; 292 return ret;
344} 293}
345 294
346static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd) 295static int proc_sys_permission(struct inode *inode, int mask)
347{ 296{
348 /* 297 /*
349 * sysctl entries that are not writeable, 298 * sysctl entries that are not writeable,
350 * are _NOT_ writeable, capabilities or not. 299 * are _NOT_ writeable, capabilities or not.
351 */ 300 */
352 struct ctl_table_header *head; 301 struct ctl_table_header *head = grab_header(inode);
353 struct ctl_table *table; 302 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
354 struct dentry *dentry;
355 int mode;
356 int depth;
357 int error; 303 int error;
358 304
359 head = NULL; 305 if (IS_ERR(head))
360 depth = PROC_I(inode)->fd; 306 return PTR_ERR(head);
361
362 /* First check the cached permissions, in case we don't have
363 * enough information to lookup the sysctl table entry.
364 */
365 error = -EACCES;
366 mode = inode->i_mode;
367
368 if (current->euid == 0)
369 mode >>= 6;
370 else if (in_group_p(0))
371 mode >>= 3;
372
373 if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
374 error = 0;
375
376 /* If we can't get a sysctl table entry the permission
377 * checks on the cached mode will have to be enough.
378 */
379 if (!nd || !depth)
380 goto out;
381 307
382 dentry = nd->path.dentry; 308 if (!table) /* global root - r-xr-xr-x */
383 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); 309 error = mask & MAY_WRITE ? -EACCES : 0;
310 else /* Use the permissions on the sysctl table entry */
311 error = sysctl_perm(head->root, table, mask);
384 312
385 /* If the entry does not exist deny permission */
386 error = -EACCES;
387 if (!table)
388 goto out;
389
390 /* Use the permissions on the sysctl table entry */
391 error = sysctl_perm(head->root, table, mask);
392out:
393 sysctl_head_finish(head); 313 sysctl_head_finish(head);
394 return error; 314 return error;
395} 315}
@@ -409,33 +329,70 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
409 return error; 329 return error;
410} 330}
411 331
412/* I'm lazy and don't distinguish between files and directories, 332static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
413 * until access time. 333{
414 */ 334 struct inode *inode = dentry->d_inode;
335 struct ctl_table_header *head = grab_header(inode);
336 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
337
338 if (IS_ERR(head))
339 return PTR_ERR(head);
340
341 generic_fillattr(inode, stat);
342 if (table)
343 stat->mode = (stat->mode & S_IFMT) | table->mode;
344
345 sysctl_head_finish(head);
346 return 0;
347}
348
415static const struct file_operations proc_sys_file_operations = { 349static const struct file_operations proc_sys_file_operations = {
416 .read = proc_sys_read, 350 .read = proc_sys_read,
417 .write = proc_sys_write, 351 .write = proc_sys_write,
352};
353
354static const struct file_operations proc_sys_dir_file_operations = {
418 .readdir = proc_sys_readdir, 355 .readdir = proc_sys_readdir,
419}; 356};
420 357
421static const struct inode_operations proc_sys_inode_operations = { 358static const struct inode_operations proc_sys_inode_operations = {
359 .permission = proc_sys_permission,
360 .setattr = proc_sys_setattr,
361 .getattr = proc_sys_getattr,
362};
363
364static const struct inode_operations proc_sys_dir_operations = {
422 .lookup = proc_sys_lookup, 365 .lookup = proc_sys_lookup,
423 .permission = proc_sys_permission, 366 .permission = proc_sys_permission,
424 .setattr = proc_sys_setattr, 367 .setattr = proc_sys_setattr,
368 .getattr = proc_sys_getattr,
425}; 369};
426 370
427static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) 371static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
428{ 372{
429 struct ctl_table_header *head; 373 return !PROC_I(dentry->d_inode)->sysctl->unregistering;
430 struct ctl_table *table; 374}
431 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); 375
432 proc_sys_refresh_inode(dentry->d_inode, table); 376static int proc_sys_delete(struct dentry *dentry)
433 sysctl_head_finish(head); 377{
434 return !!table; 378 return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
379}
380
381static int proc_sys_compare(struct dentry *dir, struct qstr *qstr,
382 struct qstr *name)
383{
384 struct dentry *dentry = container_of(qstr, struct dentry, d_name);
385 if (qstr->len != name->len)
386 return 1;
387 if (memcmp(qstr->name, name->name, name->len))
388 return 1;
389 return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl);
435} 390}
436 391
437static struct dentry_operations proc_sys_dentry_operations = { 392static struct dentry_operations proc_sys_dentry_operations = {
438 .d_revalidate = proc_sys_revalidate, 393 .d_revalidate = proc_sys_revalidate,
394 .d_delete = proc_sys_delete,
395 .d_compare = proc_sys_compare,
439}; 396};
440 397
441static struct proc_dir_entry *proc_sys_root; 398static struct proc_dir_entry *proc_sys_root;
@@ -443,8 +400,8 @@ static struct proc_dir_entry *proc_sys_root;
443int proc_sys_init(void) 400int proc_sys_init(void)
444{ 401{
445 proc_sys_root = proc_mkdir("sys", NULL); 402 proc_sys_root = proc_mkdir("sys", NULL);
446 proc_sys_root->proc_iops = &proc_sys_inode_operations; 403 proc_sys_root->proc_iops = &proc_sys_dir_operations;
447 proc_sys_root->proc_fops = &proc_sys_file_operations; 404 proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
448 proc_sys_root->nlink = 0; 405 proc_sys_root->nlink = 0;
449 return 0; 406 return 0;
450} 407}
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 21f490f5d65c..d153946d6d15 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -136,54 +136,6 @@ static const struct file_operations proc_tty_drivers_operations = {
136 .release = seq_release, 136 .release = seq_release,
137}; 137};
138 138
139static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
140{
141 return (*pos < NR_LDISCS) ? pos : NULL;
142}
143
144static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
145{
146 (*pos)++;
147 return (*pos < NR_LDISCS) ? pos : NULL;
148}
149
150static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
151{
152}
153
154static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
155{
156 int i = *(loff_t *)v;
157 struct tty_ldisc *ld;
158
159 ld = tty_ldisc_get(i);
160 if (ld == NULL)
161 return 0;
162 seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i);
163 tty_ldisc_put(i);
164 return 0;
165}
166
167static const struct seq_operations tty_ldiscs_seq_ops = {
168 .start = tty_ldiscs_seq_start,
169 .next = tty_ldiscs_seq_next,
170 .stop = tty_ldiscs_seq_stop,
171 .show = tty_ldiscs_seq_show,
172};
173
174static int proc_tty_ldiscs_open(struct inode *inode, struct file *file)
175{
176 return seq_open(file, &tty_ldiscs_seq_ops);
177}
178
179static const struct file_operations tty_ldiscs_proc_fops = {
180 .owner = THIS_MODULE,
181 .open = proc_tty_ldiscs_open,
182 .read = seq_read,
183 .llseek = seq_lseek,
184 .release = seq_release,
185};
186
187/* 139/*
188 * This function is called by tty_register_driver() to handle 140 * This function is called by tty_register_driver() to handle
189 * registering the driver's /proc handler into /proc/tty/driver/<foo> 141 * registering the driver's /proc handler into /proc/tty/driver/<foo>
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 164bd9f9ede3..7546a918f790 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -636,7 +636,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
636 struct pagemapread pm; 636 struct pagemapread pm;
637 int pagecount; 637 int pagecount;
638 int ret = -ESRCH; 638 int ret = -ESRCH;
639 struct mm_walk pagemap_walk; 639 struct mm_walk pagemap_walk = {};
640 unsigned long src; 640 unsigned long src;
641 unsigned long svpfn; 641 unsigned long svpfn;
642 unsigned long start_vaddr; 642 unsigned long start_vaddr;