aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/Kconfig69
-rw-r--r--fs/proc/array.c75
-rw-r--r--fs/proc/base.c120
-rw-r--r--fs/proc/generic.c51
-rw-r--r--fs/proc/inode.c90
-rw-r--r--fs/proc/internal.h10
-rw-r--r--fs/proc/kcore.c10
-rw-r--r--fs/proc/kmsg.c2
-rw-r--r--fs/proc/nommu.c4
-rw-r--r--fs/proc/proc_misc.c47
-rw-r--r--fs/proc/proc_net.c43
-rw-r--r--fs/proc/proc_sysctl.c433
-rw-r--r--fs/proc/proc_tty.c48
-rw-r--r--fs/proc/task_mmu.c102
-rw-r--r--fs/proc/task_nommu.c5
-rw-r--r--fs/proc/vmcore.c6
16 files changed, 604 insertions, 511 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
new file mode 100644
index 000000000000..50f8f0600f06
--- /dev/null
+++ b/fs/proc/Kconfig
@@ -0,0 +1,69 @@
1config PROC_FS
2 bool "/proc file system support" if EMBEDDED
3 default y
4 help
5 This is a virtual file system providing information about the status
6 of the system. "Virtual" means that it doesn't take up any space on
7 your hard disk: the files are created on the fly by the kernel when
8 you try to access them. Also, you cannot read the files with older
9 version of the program less: you need to use more or cat.
10
11 It's totally cool; for example, "cat /proc/interrupts" gives
12 information about what the different IRQs are used for at the moment
13 (there is a small number of Interrupt ReQuest lines in your computer
14 that are used by the attached devices to gain the CPU's attention --
15 often a source of trouble if two devices are mistakenly configured
16 to use the same IRQ). The program procinfo to display some
17 information about your system gathered from the /proc file system.
18
19 Before you can use the /proc file system, it has to be mounted,
20 meaning it has to be given a location in the directory hierarchy.
21 That location should be /proc. A command such as "mount -t proc proc
22 /proc" or the equivalent line in /etc/fstab does the job.
23
24 The /proc file system is explained in the file
25 <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
26 ("man 5 proc").
27
28 This option will enlarge your kernel by about 67 KB. Several
29 programs depend on this, so everyone should say Y here.
30
31config PROC_KCORE
32 bool "/proc/kcore support" if !ARM
33 depends on PROC_FS && MMU
34
35config PROC_VMCORE
36 bool "/proc/vmcore support (EXPERIMENTAL)"
37 depends on PROC_FS && CRASH_DUMP
38 default y
39 help
40 Exports the dump image of crashed kernel in ELF format.
41
42config PROC_SYSCTL
43 bool "Sysctl support (/proc/sys)" if EMBEDDED
44 depends on PROC_FS
45 select SYSCTL
46 default y
47 ---help---
48 The sysctl interface provides a means of dynamically changing
49 certain kernel parameters and variables on the fly without requiring
50 a recompile of the kernel or reboot of the system. The primary
51 interface is through /proc/sys. If you say Y here a tree of
52 modifiable sysctl entries will be generated beneath the
53 /proc/sys directory. They are explained in the files
54 in <file:Documentation/sysctl/>. Note that enabling this
55 option will enlarge the kernel by at least 8 KB.
56
57 As it is generally a good thing, you should say Y here unless
58 building a kernel for install/rescue disks or your system is very
59 limited in memory.
60
61config PROC_PAGE_MONITOR
62 default y
63 depends on PROC_FS && MMU
64 bool "Enable /proc page monitoring" if EMBEDDED
65 help
66 Various /proc files exist to monitor process memory utilization:
67 /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap,
68 /proc/kpagecount, and /proc/kpageflags. Disabling these
69 interfaces will reduce the size of the kernel by approximately 4kb.
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 797d775e0354..f4bc0e789539 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -80,16 +80,12 @@
80#include <linux/delayacct.h> 80#include <linux/delayacct.h>
81#include <linux/seq_file.h> 81#include <linux/seq_file.h>
82#include <linux/pid_namespace.h> 82#include <linux/pid_namespace.h>
83#include <linux/tracehook.h>
83 84
84#include <asm/pgtable.h> 85#include <asm/pgtable.h>
85#include <asm/processor.h> 86#include <asm/processor.h>
86#include "internal.h" 87#include "internal.h"
87 88
88/* Gcc optimizes away "strlen(x)" for constant x */
89#define ADDBUF(buffer, string) \
90do { memcpy(buffer, string, strlen(string)); \
91 buffer += strlen(string); } while (0)
92
93static inline void task_name(struct seq_file *m, struct task_struct *p) 89static inline void task_name(struct seq_file *m, struct task_struct *p)
94{ 90{
95 int i; 91 int i;
@@ -168,8 +164,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
168 rcu_read_lock(); 164 rcu_read_lock();
169 ppid = pid_alive(p) ? 165 ppid = pid_alive(p) ?
170 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; 166 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
171 tpid = pid_alive(p) && p->ptrace ? 167 tpid = 0;
172 task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0; 168 if (pid_alive(p)) {
169 struct task_struct *tracer = tracehook_tracer_task(p);
170 if (tracer)
171 tpid = task_pid_nr_ns(tracer, ns);
172 }
173 seq_printf(m, 173 seq_printf(m,
174 "State:\t%s\n" 174 "State:\t%s\n"
175 "Tgid:\t%d\n" 175 "Tgid:\t%d\n"
@@ -256,7 +256,6 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
256 sigemptyset(&ignored); 256 sigemptyset(&ignored);
257 sigemptyset(&caught); 257 sigemptyset(&caught);
258 258
259 rcu_read_lock();
260 if (lock_task_sighand(p, &flags)) { 259 if (lock_task_sighand(p, &flags)) {
261 pending = p->pending.signal; 260 pending = p->pending.signal;
262 shpending = p->signal->shared_pending.signal; 261 shpending = p->signal->shared_pending.signal;
@@ -267,7 +266,6 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
267 qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; 266 qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
268 unlock_task_sighand(p, &flags); 267 unlock_task_sighand(p, &flags);
269 } 268 }
270 rcu_read_unlock();
271 269
272 seq_printf(m, "Threads:\t%d\n", num_threads); 270 seq_printf(m, "Threads:\t%d\n", num_threads);
273 seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); 271 seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim);
@@ -332,65 +330,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
332 return 0; 330 return 0;
333} 331}
334 332
335/*
336 * Use precise platform statistics if available:
337 */
338#ifdef CONFIG_VIRT_CPU_ACCOUNTING
339static cputime_t task_utime(struct task_struct *p)
340{
341 return p->utime;
342}
343
344static cputime_t task_stime(struct task_struct *p)
345{
346 return p->stime;
347}
348#else
349static cputime_t task_utime(struct task_struct *p)
350{
351 clock_t utime = cputime_to_clock_t(p->utime),
352 total = utime + cputime_to_clock_t(p->stime);
353 u64 temp;
354
355 /*
356 * Use CFS's precise accounting:
357 */
358 temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
359
360 if (total) {
361 temp *= utime;
362 do_div(temp, total);
363 }
364 utime = (clock_t)temp;
365
366 p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
367 return p->prev_utime;
368}
369
370static cputime_t task_stime(struct task_struct *p)
371{
372 clock_t stime;
373
374 /*
375 * Use CFS's precise accounting. (we subtract utime from
376 * the total, to make sure the total observed by userspace
377 * grows monotonically - apps rely on that):
378 */
379 stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
380 cputime_to_clock_t(task_utime(p));
381
382 if (stime >= 0)
383 p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
384
385 return p->prev_stime;
386}
387#endif
388
389static cputime_t task_gtime(struct task_struct *p)
390{
391 return p->gtime;
392}
393
394static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, 333static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
395 struct pid *pid, struct task_struct *task, int whole) 334 struct pid *pid, struct task_struct *task, int whole)
396{ 335{
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3b455371e7ff..b5918ae8ca79 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -53,6 +53,7 @@
53#include <linux/time.h> 53#include <linux/time.h>
54#include <linux/proc_fs.h> 54#include <linux/proc_fs.h>
55#include <linux/stat.h> 55#include <linux/stat.h>
56#include <linux/task_io_accounting_ops.h>
56#include <linux/init.h> 57#include <linux/init.h>
57#include <linux/capability.h> 58#include <linux/capability.h>
58#include <linux/file.h> 59#include <linux/file.h>
@@ -69,6 +70,7 @@
69#include <linux/mount.h> 70#include <linux/mount.h>
70#include <linux/security.h> 71#include <linux/security.h>
71#include <linux/ptrace.h> 72#include <linux/ptrace.h>
73#include <linux/tracehook.h>
72#include <linux/cgroup.h> 74#include <linux/cgroup.h>
73#include <linux/cpuset.h> 75#include <linux/cpuset.h>
74#include <linux/audit.h> 76#include <linux/audit.h>
@@ -146,9 +148,6 @@ static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
146 return count; 148 return count;
147} 149}
148 150
149int maps_protect;
150EXPORT_SYMBOL(maps_protect);
151
152static struct fs_struct *get_fs_struct(struct task_struct *task) 151static struct fs_struct *get_fs_struct(struct task_struct *task)
153{ 152{
154 struct fs_struct *fs; 153 struct fs_struct *fs;
@@ -162,7 +161,6 @@ static struct fs_struct *get_fs_struct(struct task_struct *task)
162 161
163static int get_nr_threads(struct task_struct *tsk) 162static int get_nr_threads(struct task_struct *tsk)
164{ 163{
165 /* Must be called with the rcu_read_lock held */
166 unsigned long flags; 164 unsigned long flags;
167 int count = 0; 165 int count = 0;
168 166
@@ -231,10 +229,14 @@ static int check_mem_permission(struct task_struct *task)
231 * If current is actively ptrace'ing, and would also be 229 * If current is actively ptrace'ing, and would also be
232 * permitted to freshly attach with ptrace now, permit it. 230 * permitted to freshly attach with ptrace now, permit it.
233 */ 231 */
234 if (task->parent == current && (task->ptrace & PT_PTRACED) && 232 if (task_is_stopped_or_traced(task)) {
235 task_is_stopped_or_traced(task) && 233 int match;
236 ptrace_may_attach(task)) 234 rcu_read_lock();
237 return 0; 235 match = (tracehook_tracer_task(task) == current);
236 rcu_read_unlock();
237 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
238 return 0;
239 }
238 240
239 /* 241 /*
240 * Noone else is allowed. 242 * Noone else is allowed.
@@ -251,7 +253,8 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
251 task_lock(task); 253 task_lock(task);
252 if (task->mm != mm) 254 if (task->mm != mm)
253 goto out; 255 goto out;
254 if (task->mm != current->mm && __ptrace_may_attach(task) < 0) 256 if (task->mm != current->mm &&
257 __ptrace_may_access(task, PTRACE_MODE_READ) < 0)
255 goto out; 258 goto out;
256 task_unlock(task); 259 task_unlock(task);
257 return mm; 260 return mm;
@@ -464,14 +467,10 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
464 467
465 struct rlimit rlim[RLIM_NLIMITS]; 468 struct rlimit rlim[RLIM_NLIMITS];
466 469
467 rcu_read_lock(); 470 if (!lock_task_sighand(task, &flags))
468 if (!lock_task_sighand(task,&flags)) {
469 rcu_read_unlock();
470 return 0; 471 return 0;
471 }
472 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); 472 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
473 unlock_task_sighand(task, &flags); 473 unlock_task_sighand(task, &flags);
474 rcu_read_unlock();
475 474
476 /* 475 /*
477 * print the file header 476 * print the file header
@@ -503,6 +502,26 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
503 return count; 502 return count;
504} 503}
505 504
505#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
506static int proc_pid_syscall(struct task_struct *task, char *buffer)
507{
508 long nr;
509 unsigned long args[6], sp, pc;
510
511 if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
512 return sprintf(buffer, "running\n");
513
514 if (nr < 0)
515 return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
516
517 return sprintf(buffer,
518 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
519 nr,
520 args[0], args[1], args[2], args[3], args[4], args[5],
521 sp, pc);
522}
523#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
524
506/************************************************************************/ 525/************************************************************************/
507/* Here the fs part begins */ 526/* Here the fs part begins */
508/************************************************************************/ 527/************************************************************************/
@@ -518,7 +537,7 @@ static int proc_fd_access_allowed(struct inode *inode)
518 */ 537 */
519 task = get_proc_task(inode); 538 task = get_proc_task(inode);
520 if (task) { 539 if (task) {
521 allowed = ptrace_may_attach(task); 540 allowed = ptrace_may_access(task, PTRACE_MODE_READ);
522 put_task_struct(task); 541 put_task_struct(task);
523 } 542 }
524 return allowed; 543 return allowed;
@@ -904,7 +923,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
904 if (!task) 923 if (!task)
905 goto out_no_task; 924 goto out_no_task;
906 925
907 if (!ptrace_may_attach(task)) 926 if (!ptrace_may_access(task, PTRACE_MODE_READ))
908 goto out; 927 goto out;
909 928
910 ret = -ENOMEM; 929 ret = -ENOMEM;
@@ -1833,8 +1852,7 @@ static const struct file_operations proc_fd_operations = {
1833 * /proc/pid/fd needs a special permission handler so that a process can still 1852 * /proc/pid/fd needs a special permission handler so that a process can still
1834 * access /proc/self/fd after it has executed a setuid(). 1853 * access /proc/self/fd after it has executed a setuid().
1835 */ 1854 */
1836static int proc_fd_permission(struct inode *inode, int mask, 1855static int proc_fd_permission(struct inode *inode, int mask)
1837 struct nameidata *nd)
1838{ 1856{
1839 int rv; 1857 int rv;
1840 1858
@@ -2375,29 +2393,54 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
2375} 2393}
2376 2394
2377#ifdef CONFIG_TASK_IO_ACCOUNTING 2395#ifdef CONFIG_TASK_IO_ACCOUNTING
2378static int proc_pid_io_accounting(struct task_struct *task, char *buffer) 2396static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2379{ 2397{
2398 struct task_io_accounting acct = task->ioac;
2399 unsigned long flags;
2400
2401 if (whole && lock_task_sighand(task, &flags)) {
2402 struct task_struct *t = task;
2403
2404 task_io_accounting_add(&acct, &task->signal->ioac);
2405 while_each_thread(task, t)
2406 task_io_accounting_add(&acct, &t->ioac);
2407
2408 unlock_task_sighand(task, &flags);
2409 }
2380 return sprintf(buffer, 2410 return sprintf(buffer,
2381#ifdef CONFIG_TASK_XACCT
2382 "rchar: %llu\n" 2411 "rchar: %llu\n"
2383 "wchar: %llu\n" 2412 "wchar: %llu\n"
2384 "syscr: %llu\n" 2413 "syscr: %llu\n"
2385 "syscw: %llu\n" 2414 "syscw: %llu\n"
2386#endif
2387 "read_bytes: %llu\n" 2415 "read_bytes: %llu\n"
2388 "write_bytes: %llu\n" 2416 "write_bytes: %llu\n"
2389 "cancelled_write_bytes: %llu\n", 2417 "cancelled_write_bytes: %llu\n",
2390#ifdef CONFIG_TASK_XACCT 2418 (unsigned long long)acct.rchar,
2391 (unsigned long long)task->rchar, 2419 (unsigned long long)acct.wchar,
2392 (unsigned long long)task->wchar, 2420 (unsigned long long)acct.syscr,
2393 (unsigned long long)task->syscr, 2421 (unsigned long long)acct.syscw,
2394 (unsigned long long)task->syscw, 2422 (unsigned long long)acct.read_bytes,
2395#endif 2423 (unsigned long long)acct.write_bytes,
2396 (unsigned long long)task->ioac.read_bytes, 2424 (unsigned long long)acct.cancelled_write_bytes);
2397 (unsigned long long)task->ioac.write_bytes, 2425}
2398 (unsigned long long)task->ioac.cancelled_write_bytes); 2426
2427static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
2428{
2429 return do_io_accounting(task, buffer, 0);
2430}
2431
2432static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2433{
2434 return do_io_accounting(task, buffer, 1);
2435}
2436#endif /* CONFIG_TASK_IO_ACCOUNTING */
2437
2438static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
2439 struct pid *pid, struct task_struct *task)
2440{
2441 seq_printf(m, "%08x\n", task->personality);
2442 return 0;
2399} 2443}
2400#endif
2401 2444
2402/* 2445/*
2403 * Thread groups 2446 * Thread groups
@@ -2415,10 +2458,14 @@ static const struct pid_entry tgid_base_stuff[] = {
2415 REG("environ", S_IRUSR, environ), 2458 REG("environ", S_IRUSR, environ),
2416 INF("auxv", S_IRUSR, pid_auxv), 2459 INF("auxv", S_IRUSR, pid_auxv),
2417 ONE("status", S_IRUGO, pid_status), 2460 ONE("status", S_IRUGO, pid_status),
2461 ONE("personality", S_IRUSR, pid_personality),
2418 INF("limits", S_IRUSR, pid_limits), 2462 INF("limits", S_IRUSR, pid_limits),
2419#ifdef CONFIG_SCHED_DEBUG 2463#ifdef CONFIG_SCHED_DEBUG
2420 REG("sched", S_IRUGO|S_IWUSR, pid_sched), 2464 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2421#endif 2465#endif
2466#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2467 INF("syscall", S_IRUSR, pid_syscall),
2468#endif
2422 INF("cmdline", S_IRUGO, pid_cmdline), 2469 INF("cmdline", S_IRUGO, pid_cmdline),
2423 ONE("stat", S_IRUGO, tgid_stat), 2470 ONE("stat", S_IRUGO, tgid_stat),
2424 ONE("statm", S_IRUGO, pid_statm), 2471 ONE("statm", S_IRUGO, pid_statm),
@@ -2469,7 +2516,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2469 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), 2516 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
2470#endif 2517#endif
2471#ifdef CONFIG_TASK_IO_ACCOUNTING 2518#ifdef CONFIG_TASK_IO_ACCOUNTING
2472 INF("io", S_IRUGO, pid_io_accounting), 2519 INF("io", S_IRUGO, tgid_io_accounting),
2473#endif 2520#endif
2474}; 2521};
2475 2522
@@ -2747,10 +2794,14 @@ static const struct pid_entry tid_base_stuff[] = {
2747 REG("environ", S_IRUSR, environ), 2794 REG("environ", S_IRUSR, environ),
2748 INF("auxv", S_IRUSR, pid_auxv), 2795 INF("auxv", S_IRUSR, pid_auxv),
2749 ONE("status", S_IRUGO, pid_status), 2796 ONE("status", S_IRUGO, pid_status),
2797 ONE("personality", S_IRUSR, pid_personality),
2750 INF("limits", S_IRUSR, pid_limits), 2798 INF("limits", S_IRUSR, pid_limits),
2751#ifdef CONFIG_SCHED_DEBUG 2799#ifdef CONFIG_SCHED_DEBUG
2752 REG("sched", S_IRUGO|S_IWUSR, pid_sched), 2800 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2753#endif 2801#endif
2802#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2803 INF("syscall", S_IRUSR, pid_syscall),
2804#endif
2754 INF("cmdline", S_IRUGO, pid_cmdline), 2805 INF("cmdline", S_IRUGO, pid_cmdline),
2755 ONE("stat", S_IRUGO, tid_stat), 2806 ONE("stat", S_IRUGO, tid_stat),
2756 ONE("statm", S_IRUGO, pid_statm), 2807 ONE("statm", S_IRUGO, pid_statm),
@@ -2796,6 +2847,9 @@ static const struct pid_entry tid_base_stuff[] = {
2796#ifdef CONFIG_FAULT_INJECTION 2847#ifdef CONFIG_FAULT_INJECTION
2797 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), 2848 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
2798#endif 2849#endif
2850#ifdef CONFIG_TASK_IO_ACCOUNTING
2851 INF("io", S_IRUGO, tid_io_accounting),
2852#endif
2799}; 2853};
2800 2854
2801static int proc_tid_base_readdir(struct file * filp, 2855static int proc_tid_base_readdir(struct file * filp,
@@ -3035,9 +3089,7 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
3035 generic_fillattr(inode, stat); 3089 generic_fillattr(inode, stat);
3036 3090
3037 if (p) { 3091 if (p) {
3038 rcu_read_lock();
3039 stat->nlink += get_nr_threads(p); 3092 stat->nlink += get_nr_threads(p);
3040 rcu_read_unlock();
3041 put_task_struct(p); 3093 put_task_struct(p);
3042 } 3094 }
3043 3095
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 43e54e86cefd..7821589a17d5 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -300,10 +300,10 @@ out:
300 return rtn; 300 return rtn;
301} 301}
302 302
303static DEFINE_IDR(proc_inum_idr); 303static DEFINE_IDA(proc_inum_ida);
304static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 304static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
305 305
306#define PROC_DYNAMIC_FIRST 0xF0000000UL 306#define PROC_DYNAMIC_FIRST 0xF0000000U
307 307
308/* 308/*
309 * Return an inode number between PROC_DYNAMIC_FIRST and 309 * Return an inode number between PROC_DYNAMIC_FIRST and
@@ -311,36 +311,34 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
311 */ 311 */
312static unsigned int get_inode_number(void) 312static unsigned int get_inode_number(void)
313{ 313{
314 int i, inum = 0; 314 unsigned int i;
315 int error; 315 int error;
316 316
317retry: 317retry:
318 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 318 if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
319 return 0; 319 return 0;
320 320
321 spin_lock(&proc_inum_lock); 321 spin_lock(&proc_inum_lock);
322 error = idr_get_new(&proc_inum_idr, NULL, &i); 322 error = ida_get_new(&proc_inum_ida, &i);
323 spin_unlock(&proc_inum_lock); 323 spin_unlock(&proc_inum_lock);
324 if (error == -EAGAIN) 324 if (error == -EAGAIN)
325 goto retry; 325 goto retry;
326 else if (error) 326 else if (error)
327 return 0; 327 return 0;
328 328
329 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 329 if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
330 330 spin_lock(&proc_inum_lock);
331 /* inum will never be more than 0xf0ffffff, so no check 331 ida_remove(&proc_inum_ida, i);
332 * for overflow. 332 spin_unlock(&proc_inum_lock);
333 */ 333 return 0;
334 334 }
335 return inum; 335 return PROC_DYNAMIC_FIRST + i;
336} 336}
337 337
338static void release_inode_number(unsigned int inum) 338static void release_inode_number(unsigned int inum)
339{ 339{
340 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK;
341
342 spin_lock(&proc_inum_lock); 340 spin_lock(&proc_inum_lock);
343 idr_remove(&proc_inum_idr, id); 341 ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
344 spin_unlock(&proc_inum_lock); 342 spin_unlock(&proc_inum_lock);
345} 343}
346 344
@@ -549,8 +547,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
549 547
550 for (tmp = dir->subdir; tmp; tmp = tmp->next) 548 for (tmp = dir->subdir; tmp; tmp = tmp->next)
551 if (strcmp(tmp->name, dp->name) == 0) { 549 if (strcmp(tmp->name, dp->name) == 0) {
552 printk(KERN_WARNING "proc_dir_entry '%s' already " 550 printk(KERN_WARNING "proc_dir_entry '%s/%s' already registered\n",
553 "registered\n", dp->name); 551 dir->name, dp->name);
554 dump_stack(); 552 dump_stack();
555 break; 553 break;
556 } 554 }
@@ -597,6 +595,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
597 ent->pde_users = 0; 595 ent->pde_users = 0;
598 spin_lock_init(&ent->pde_unload_lock); 596 spin_lock_init(&ent->pde_unload_lock);
599 ent->pde_unload_completion = NULL; 597 ent->pde_unload_completion = NULL;
598 INIT_LIST_HEAD(&ent->pde_openers);
600 out: 599 out:
601 return ent; 600 return ent;
602} 601}
@@ -789,15 +788,25 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
789 spin_unlock(&de->pde_unload_lock); 788 spin_unlock(&de->pde_unload_lock);
790 789
791continue_removing: 790continue_removing:
791 spin_lock(&de->pde_unload_lock);
792 while (!list_empty(&de->pde_openers)) {
793 struct pde_opener *pdeo;
794
795 pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
796 list_del(&pdeo->lh);
797 spin_unlock(&de->pde_unload_lock);
798 pdeo->release(pdeo->inode, pdeo->file);
799 kfree(pdeo);
800 spin_lock(&de->pde_unload_lock);
801 }
802 spin_unlock(&de->pde_unload_lock);
803
792 if (S_ISDIR(de->mode)) 804 if (S_ISDIR(de->mode))
793 parent->nlink--; 805 parent->nlink--;
794 de->nlink = 0; 806 de->nlink = 0;
795 if (de->subdir) { 807 WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory "
796 printk(KERN_WARNING "%s: removing non-empty directory "
797 "'%s/%s', leaking at least '%s'\n", __func__, 808 "'%s/%s', leaking at least '%s'\n", __func__,
798 de->parent->name, de->name, de->subdir->name); 809 de->parent->name, de->name, de->subdir->name);
799 WARN_ON(1);
800 }
801 if (atomic_dec_and_test(&de->count)) 810 if (atomic_dec_and_test(&de->count))
802 free_proc_entry(de); 811 free_proc_entry(de);
803} 812}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b08d10017911..c6b4fa7e3b49 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -17,6 +17,7 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
20#include <linux/sysctl.h>
20 21
21#include <asm/system.h> 22#include <asm/system.h>
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
@@ -65,6 +66,8 @@ static void proc_delete_inode(struct inode *inode)
65 module_put(de->owner); 66 module_put(de->owner);
66 de_put(de); 67 de_put(de);
67 } 68 }
69 if (PROC_I(inode)->sysctl)
70 sysctl_head_put(PROC_I(inode)->sysctl);
68 clear_inode(inode); 71 clear_inode(inode);
69} 72}
70 73
@@ -84,6 +87,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
84 ei->fd = 0; 87 ei->fd = 0;
85 ei->op.proc_get_link = NULL; 88 ei->op.proc_get_link = NULL;
86 ei->pde = NULL; 89 ei->pde = NULL;
90 ei->sysctl = NULL;
91 ei->sysctl_entry = NULL;
87 inode = &ei->vfs_inode; 92 inode = &ei->vfs_inode;
88 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 93 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
89 return inode; 94 return inode;
@@ -94,7 +99,7 @@ static void proc_destroy_inode(struct inode *inode)
94 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 99 kmem_cache_free(proc_inode_cachep, PROC_I(inode));
95} 100}
96 101
97static void init_once(struct kmem_cache * cachep, void *foo) 102static void init_once(void *foo)
98{ 103{
99 struct proc_inode *ei = (struct proc_inode *) foo; 104 struct proc_inode *ei = (struct proc_inode *) foo;
100 105
@@ -111,27 +116,25 @@ int __init proc_init_inodecache(void)
111 return 0; 116 return 0;
112} 117}
113 118
114static int proc_remount(struct super_block *sb, int *flags, char *data)
115{
116 *flags |= MS_NODIRATIME;
117 return 0;
118}
119
120static const struct super_operations proc_sops = { 119static const struct super_operations proc_sops = {
121 .alloc_inode = proc_alloc_inode, 120 .alloc_inode = proc_alloc_inode,
122 .destroy_inode = proc_destroy_inode, 121 .destroy_inode = proc_destroy_inode,
123 .drop_inode = generic_delete_inode, 122 .drop_inode = generic_delete_inode,
124 .delete_inode = proc_delete_inode, 123 .delete_inode = proc_delete_inode,
125 .statfs = simple_statfs, 124 .statfs = simple_statfs,
126 .remount_fs = proc_remount,
127}; 125};
128 126
129static void pde_users_dec(struct proc_dir_entry *pde) 127static void __pde_users_dec(struct proc_dir_entry *pde)
130{ 128{
131 spin_lock(&pde->pde_unload_lock);
132 pde->pde_users--; 129 pde->pde_users--;
133 if (pde->pde_unload_completion && pde->pde_users == 0) 130 if (pde->pde_unload_completion && pde->pde_users == 0)
134 complete(pde->pde_unload_completion); 131 complete(pde->pde_unload_completion);
132}
133
134static void pde_users_dec(struct proc_dir_entry *pde)
135{
136 spin_lock(&pde->pde_unload_lock);
137 __pde_users_dec(pde);
135 spin_unlock(&pde->pde_unload_lock); 138 spin_unlock(&pde->pde_unload_lock);
136} 139}
137 140
@@ -318,36 +321,97 @@ static int proc_reg_open(struct inode *inode, struct file *file)
318 struct proc_dir_entry *pde = PDE(inode); 321 struct proc_dir_entry *pde = PDE(inode);
319 int rv = 0; 322 int rv = 0;
320 int (*open)(struct inode *, struct file *); 323 int (*open)(struct inode *, struct file *);
324 int (*release)(struct inode *, struct file *);
325 struct pde_opener *pdeo;
326
327 /*
328 * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
329 * sequence. ->release won't be called because ->proc_fops will be
330 * cleared. Depending on complexity of ->release, consequences vary.
331 *
332 * We can't wait for mercy when close will be done for real, it's
333 * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
334 * by hand in remove_proc_entry(). For this, save opener's credentials
335 * for later.
336 */
337 pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
338 if (!pdeo)
339 return -ENOMEM;
321 340
322 spin_lock(&pde->pde_unload_lock); 341 spin_lock(&pde->pde_unload_lock);
323 if (!pde->proc_fops) { 342 if (!pde->proc_fops) {
324 spin_unlock(&pde->pde_unload_lock); 343 spin_unlock(&pde->pde_unload_lock);
325 return rv; 344 kfree(pdeo);
345 return -EINVAL;
326 } 346 }
327 pde->pde_users++; 347 pde->pde_users++;
328 open = pde->proc_fops->open; 348 open = pde->proc_fops->open;
349 release = pde->proc_fops->release;
329 spin_unlock(&pde->pde_unload_lock); 350 spin_unlock(&pde->pde_unload_lock);
330 351
331 if (open) 352 if (open)
332 rv = open(inode, file); 353 rv = open(inode, file);
333 354
334 pde_users_dec(pde); 355 spin_lock(&pde->pde_unload_lock);
356 if (rv == 0 && release) {
357 /* To know what to release. */
358 pdeo->inode = inode;
359 pdeo->file = file;
360 /* Strictly for "too late" ->release in proc_reg_release(). */
361 pdeo->release = release;
362 list_add(&pdeo->lh, &pde->pde_openers);
363 } else
364 kfree(pdeo);
365 __pde_users_dec(pde);
366 spin_unlock(&pde->pde_unload_lock);
335 return rv; 367 return rv;
336} 368}
337 369
370static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
371 struct inode *inode, struct file *file)
372{
373 struct pde_opener *pdeo;
374
375 list_for_each_entry(pdeo, &pde->pde_openers, lh) {
376 if (pdeo->inode == inode && pdeo->file == file)
377 return pdeo;
378 }
379 return NULL;
380}
381
338static int proc_reg_release(struct inode *inode, struct file *file) 382static int proc_reg_release(struct inode *inode, struct file *file)
339{ 383{
340 struct proc_dir_entry *pde = PDE(inode); 384 struct proc_dir_entry *pde = PDE(inode);
341 int rv = 0; 385 int rv = 0;
342 int (*release)(struct inode *, struct file *); 386 int (*release)(struct inode *, struct file *);
387 struct pde_opener *pdeo;
343 388
344 spin_lock(&pde->pde_unload_lock); 389 spin_lock(&pde->pde_unload_lock);
390 pdeo = find_pde_opener(pde, inode, file);
345 if (!pde->proc_fops) { 391 if (!pde->proc_fops) {
346 spin_unlock(&pde->pde_unload_lock); 392 /*
393 * Can't simply exit, __fput() will think that everything is OK,
394 * and move on to freeing struct file. remove_proc_entry() will
395 * find slacker in opener's list and will try to do non-trivial
396 * things with struct file. Therefore, remove opener from list.
397 *
398 * But if opener is removed from list, who will ->release it?
399 */
400 if (pdeo) {
401 list_del(&pdeo->lh);
402 spin_unlock(&pde->pde_unload_lock);
403 rv = pdeo->release(inode, file);
404 kfree(pdeo);
405 } else
406 spin_unlock(&pde->pde_unload_lock);
347 return rv; 407 return rv;
348 } 408 }
349 pde->pde_users++; 409 pde->pde_users++;
350 release = pde->proc_fops->release; 410 release = pde->proc_fops->release;
411 if (pdeo) {
412 list_del(&pdeo->lh);
413 kfree(pdeo);
414 }
351 spin_unlock(&pde->pde_unload_lock); 415 spin_unlock(&pde->pde_unload_lock);
352 416
353 if (release) 417 if (release)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 28cbca805905..3bfb7b8747b3 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -45,8 +45,6 @@ do { \
45extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); 45extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
46#endif 46#endif
47 47
48extern int maps_protect;
49
50extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, 48extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
51 struct pid *pid, struct task_struct *task); 49 struct pid *pid, struct task_struct *task);
52extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, 50extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
@@ -63,6 +61,7 @@ extern const struct file_operations proc_smaps_operations;
63extern const struct file_operations proc_clear_refs_operations; 61extern const struct file_operations proc_clear_refs_operations;
64extern const struct file_operations proc_pagemap_operations; 62extern const struct file_operations proc_pagemap_operations;
65extern const struct file_operations proc_net_operations; 63extern const struct file_operations proc_net_operations;
64extern const struct file_operations proc_kmsg_operations;
66extern const struct inode_operations proc_net_inode_operations; 65extern const struct inode_operations proc_net_inode_operations;
67 66
68void free_proc_entry(struct proc_dir_entry *de); 67void free_proc_entry(struct proc_dir_entry *de);
@@ -88,3 +87,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
88 struct dentry *dentry); 87 struct dentry *dentry);
89int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, 88int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
90 filldir_t filldir); 89 filldir_t filldir);
90
91struct pde_opener {
92 struct inode *inode;
93 struct file *file;
94 int (*release)(struct inode *, struct file *);
95 struct list_head lh;
96};
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e78c81fcf547..c2370c76fb71 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -23,6 +23,10 @@
23 23
24#define CORE_STR "CORE" 24#define CORE_STR "CORE"
25 25
26#ifndef ELF_CORE_EFLAGS
27#define ELF_CORE_EFLAGS 0
28#endif
29
26static int open_kcore(struct inode * inode, struct file * filp) 30static int open_kcore(struct inode * inode, struct file * filp)
27{ 31{
28 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; 32 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
@@ -164,11 +168,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
164 elf->e_entry = 0; 168 elf->e_entry = 0;
165 elf->e_phoff = sizeof(struct elfhdr); 169 elf->e_phoff = sizeof(struct elfhdr);
166 elf->e_shoff = 0; 170 elf->e_shoff = 0;
167#if defined(CONFIG_H8300) 171 elf->e_flags = ELF_CORE_EFLAGS;
168 elf->e_flags = ELF_FLAGS;
169#else
170 elf->e_flags = 0;
171#endif
172 elf->e_ehsize = sizeof(struct elfhdr); 172 elf->e_ehsize = sizeof(struct elfhdr);
173 elf->e_phentsize= sizeof(struct elf_phdr); 173 elf->e_phentsize= sizeof(struct elf_phdr);
174 elf->e_phnum = nphdr; 174 elf->e_phnum = nphdr;
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index ff3b90b56e9d..9fd5df3f40ce 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -15,6 +15,8 @@
15#include <asm/uaccess.h> 15#include <asm/uaccess.h>
16#include <asm/io.h> 16#include <asm/io.h>
17 17
18#include "internal.h"
19
18extern wait_queue_head_t log_wait; 20extern wait_queue_head_t log_wait;
19 21
20extern int do_syslog(int type, char __user *bug, int count); 22extern int do_syslog(int type, char __user *bug, int count);
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 79ecd281d2cb..3f87d2632947 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -52,14 +52,14 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
52 } 52 }
53 53
54 seq_printf(m, 54 seq_printf(m,
55 "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", 55 "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
56 vma->vm_start, 56 vma->vm_start,
57 vma->vm_end, 57 vma->vm_end,
58 flags & VM_READ ? 'r' : '-', 58 flags & VM_READ ? 'r' : '-',
59 flags & VM_WRITE ? 'w' : '-', 59 flags & VM_WRITE ? 'w' : '-',
60 flags & VM_EXEC ? 'x' : '-', 60 flags & VM_EXEC ? 'x' : '-',
61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', 61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
62 vma->vm_pgoff << PAGE_SHIFT, 62 ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
63 MAJOR(dev), MINOR(dev), ino, &len); 63 MAJOR(dev), MINOR(dev), ino, &len);
64 64
65 if (file) { 65 if (file) {
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 7e277f2ad466..b675a49c1823 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -24,6 +24,7 @@
24#include <linux/tty.h> 24#include <linux/tty.h>
25#include <linux/string.h> 25#include <linux/string.h>
26#include <linux/mman.h> 26#include <linux/mman.h>
27#include <linux/quicklist.h>
27#include <linux/proc_fs.h> 28#include <linux/proc_fs.h>
28#include <linux/ioport.h> 29#include <linux/ioport.h>
29#include <linux/mm.h> 30#include <linux/mm.h>
@@ -67,7 +68,6 @@
67extern int get_hardware_list(char *); 68extern int get_hardware_list(char *);
68extern int get_stram_list(char *); 69extern int get_stram_list(char *);
69extern int get_exec_domain_list(char *); 70extern int get_exec_domain_list(char *);
70extern int get_dma_list(char *);
71 71
72static int proc_calc_metrics(char *page, char **start, off_t off, 72static int proc_calc_metrics(char *page, char **start, off_t off,
73 int count, int *eof, int len) 73 int count, int *eof, int len)
@@ -123,6 +123,11 @@ static int uptime_read_proc(char *page, char **start, off_t off,
123 return proc_calc_metrics(page, start, off, count, eof, len); 123 return proc_calc_metrics(page, start, off, count, eof, len);
124} 124}
125 125
126int __attribute__((weak)) arch_report_meminfo(char *page)
127{
128 return 0;
129}
130
126static int meminfo_read_proc(char *page, char **start, off_t off, 131static int meminfo_read_proc(char *page, char **start, off_t off,
127 int count, int *eof, void *data) 132 int count, int *eof, void *data)
128{ 133{
@@ -177,6 +182,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
177 "SReclaimable: %8lu kB\n" 182 "SReclaimable: %8lu kB\n"
178 "SUnreclaim: %8lu kB\n" 183 "SUnreclaim: %8lu kB\n"
179 "PageTables: %8lu kB\n" 184 "PageTables: %8lu kB\n"
185#ifdef CONFIG_QUICKLIST
186 "Quicklists: %8lu kB\n"
187#endif
180 "NFS_Unstable: %8lu kB\n" 188 "NFS_Unstable: %8lu kB\n"
181 "Bounce: %8lu kB\n" 189 "Bounce: %8lu kB\n"
182 "WritebackTmp: %8lu kB\n" 190 "WritebackTmp: %8lu kB\n"
@@ -209,6 +217,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
209 K(global_page_state(NR_SLAB_RECLAIMABLE)), 217 K(global_page_state(NR_SLAB_RECLAIMABLE)),
210 K(global_page_state(NR_SLAB_UNRECLAIMABLE)), 218 K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
211 K(global_page_state(NR_PAGETABLE)), 219 K(global_page_state(NR_PAGETABLE)),
220#ifdef CONFIG_QUICKLIST
221 K(quicklist_total_size()),
222#endif
212 K(global_page_state(NR_UNSTABLE_NFS)), 223 K(global_page_state(NR_UNSTABLE_NFS)),
213 K(global_page_state(NR_BOUNCE)), 224 K(global_page_state(NR_BOUNCE)),
214 K(global_page_state(NR_WRITEBACK_TEMP)), 225 K(global_page_state(NR_WRITEBACK_TEMP)),
@@ -221,11 +232,12 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
221 232
222 len += hugetlb_report_meminfo(page + len); 233 len += hugetlb_report_meminfo(page + len);
223 234
235 len += arch_report_meminfo(page + len);
236
224 return proc_calc_metrics(page, start, off, count, eof, len); 237 return proc_calc_metrics(page, start, off, count, eof, len);
225#undef K 238#undef K
226} 239}
227 240
228extern const struct seq_operations fragmentation_op;
229static int fragmentation_open(struct inode *inode, struct file *file) 241static int fragmentation_open(struct inode *inode, struct file *file)
230{ 242{
231 (void)inode; 243 (void)inode;
@@ -239,7 +251,6 @@ static const struct file_operations fragmentation_file_operations = {
239 .release = seq_release, 251 .release = seq_release,
240}; 252};
241 253
242extern const struct seq_operations pagetypeinfo_op;
243static int pagetypeinfo_open(struct inode *inode, struct file *file) 254static int pagetypeinfo_open(struct inode *inode, struct file *file)
244{ 255{
245 return seq_open(file, &pagetypeinfo_op); 256 return seq_open(file, &pagetypeinfo_op);
@@ -252,7 +263,6 @@ static const struct file_operations pagetypeinfo_file_ops = {
252 .release = seq_release, 263 .release = seq_release,
253}; 264};
254 265
255extern const struct seq_operations zoneinfo_op;
256static int zoneinfo_open(struct inode *inode, struct file *file) 266static int zoneinfo_open(struct inode *inode, struct file *file)
257{ 267{
258 return seq_open(file, &zoneinfo_op); 268 return seq_open(file, &zoneinfo_op);
@@ -349,7 +359,6 @@ static const struct file_operations proc_devinfo_operations = {
349 .release = seq_release, 359 .release = seq_release,
350}; 360};
351 361
352extern const struct seq_operations vmstat_op;
353static int vmstat_open(struct inode *inode, struct file *file) 362static int vmstat_open(struct inode *inode, struct file *file)
354{ 363{
355 return seq_open(file, &vmstat_op); 364 return seq_open(file, &vmstat_op);
@@ -461,17 +470,35 @@ static const struct file_operations proc_slabstats_operations = {
461#ifdef CONFIG_MMU 470#ifdef CONFIG_MMU
462static int vmalloc_open(struct inode *inode, struct file *file) 471static int vmalloc_open(struct inode *inode, struct file *file)
463{ 472{
464 return seq_open(file, &vmalloc_op); 473 unsigned int *ptr = NULL;
474 int ret;
475
476 if (NUMA_BUILD)
477 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
478 ret = seq_open(file, &vmalloc_op);
479 if (!ret) {
480 struct seq_file *m = file->private_data;
481 m->private = ptr;
482 } else
483 kfree(ptr);
484 return ret;
465} 485}
466 486
467static const struct file_operations proc_vmalloc_operations = { 487static const struct file_operations proc_vmalloc_operations = {
468 .open = vmalloc_open, 488 .open = vmalloc_open,
469 .read = seq_read, 489 .read = seq_read,
470 .llseek = seq_lseek, 490 .llseek = seq_lseek,
471 .release = seq_release, 491 .release = seq_release_private,
472}; 492};
473#endif 493#endif
474 494
495#ifndef arch_irq_stat_cpu
496#define arch_irq_stat_cpu(cpu) 0
497#endif
498#ifndef arch_irq_stat
499#define arch_irq_stat() 0
500#endif
501
475static int show_stat(struct seq_file *p, void *v) 502static int show_stat(struct seq_file *p, void *v)
476{ 503{
477 int i; 504 int i;
@@ -509,7 +536,9 @@ static int show_stat(struct seq_file *p, void *v)
509 sum += temp; 536 sum += temp;
510 per_irq_sum[j] += temp; 537 per_irq_sum[j] += temp;
511 } 538 }
539 sum += arch_irq_stat_cpu(i);
512 } 540 }
541 sum += arch_irq_stat();
513 542
514 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", 543 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
515 (unsigned long long)cputime64_to_clock_t(user), 544 (unsigned long long)cputime64_to_clock_t(user),
@@ -654,6 +683,7 @@ static int cmdline_read_proc(char *page, char **start, off_t off,
654 return proc_calc_metrics(page, start, off, count, eof, len); 683 return proc_calc_metrics(page, start, off, count, eof, len);
655} 684}
656 685
686#ifdef CONFIG_FILE_LOCKING
657static int locks_open(struct inode *inode, struct file *filp) 687static int locks_open(struct inode *inode, struct file *filp)
658{ 688{
659 return seq_open(filp, &locks_seq_operations); 689 return seq_open(filp, &locks_seq_operations);
@@ -665,6 +695,7 @@ static const struct file_operations proc_locks_operations = {
665 .llseek = seq_lseek, 695 .llseek = seq_lseek,
666 .release = seq_release, 696 .release = seq_release,
667}; 697};
698#endif /* CONFIG_FILE_LOCKING */
668 699
669static int execdomains_read_proc(char *page, char **start, off_t off, 700static int execdomains_read_proc(char *page, char **start, off_t off,
670 int count, int *eof, void *data) 701 int count, int *eof, void *data)
@@ -858,7 +889,9 @@ void __init proc_misc_init(void)
858#ifdef CONFIG_PRINTK 889#ifdef CONFIG_PRINTK
859 proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); 890 proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations);
860#endif 891#endif
892#ifdef CONFIG_FILE_LOCKING
861 proc_create("locks", 0, NULL, &proc_locks_operations); 893 proc_create("locks", 0, NULL, &proc_locks_operations);
894#endif
862 proc_create("devices", 0, NULL, &proc_devinfo_operations); 895 proc_create("devices", 0, NULL, &proc_devinfo_operations);
863 proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); 896 proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations);
864#ifdef CONFIG_BLOCK 897#ifdef CONFIG_BLOCK
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 83f357b30d71..7bc296f424ae 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -27,6 +27,11 @@
27#include "internal.h" 27#include "internal.h"
28 28
29 29
30static struct net *get_proc_net(const struct inode *inode)
31{
32 return maybe_get_net(PDE_NET(PDE(inode)));
33}
34
30int seq_open_net(struct inode *ino, struct file *f, 35int seq_open_net(struct inode *ino, struct file *f,
31 const struct seq_operations *ops, int size) 36 const struct seq_operations *ops, int size)
32{ 37{
@@ -51,6 +56,30 @@ int seq_open_net(struct inode *ino, struct file *f,
51} 56}
52EXPORT_SYMBOL_GPL(seq_open_net); 57EXPORT_SYMBOL_GPL(seq_open_net);
53 58
59int single_open_net(struct inode *inode, struct file *file,
60 int (*show)(struct seq_file *, void *))
61{
62 int err;
63 struct net *net;
64
65 err = -ENXIO;
66 net = get_proc_net(inode);
67 if (net == NULL)
68 goto err_net;
69
70 err = single_open(file, show, net);
71 if (err < 0)
72 goto err_open;
73
74 return 0;
75
76err_open:
77 put_net(net);
78err_net:
79 return err;
80}
81EXPORT_SYMBOL_GPL(single_open_net);
82
54int seq_release_net(struct inode *ino, struct file *f) 83int seq_release_net(struct inode *ino, struct file *f)
55{ 84{
56 struct seq_file *seq; 85 struct seq_file *seq;
@@ -63,6 +92,14 @@ int seq_release_net(struct inode *ino, struct file *f)
63} 92}
64EXPORT_SYMBOL_GPL(seq_release_net); 93EXPORT_SYMBOL_GPL(seq_release_net);
65 94
95int single_release_net(struct inode *ino, struct file *f)
96{
97 struct seq_file *seq = f->private_data;
98 put_net(seq->private);
99 return single_release(ino, f);
100}
101EXPORT_SYMBOL_GPL(single_release_net);
102
66static struct net *get_proc_task_net(struct inode *dir) 103static struct net *get_proc_task_net(struct inode *dir)
67{ 104{
68 struct task_struct *task; 105 struct task_struct *task;
@@ -153,12 +190,6 @@ void proc_net_remove(struct net *net, const char *name)
153} 190}
154EXPORT_SYMBOL_GPL(proc_net_remove); 191EXPORT_SYMBOL_GPL(proc_net_remove);
155 192
156struct net *get_proc_net(const struct inode *inode)
157{
158 return maybe_get_net(PDE_NET(PDE(inode)));
159}
160EXPORT_SYMBOL_GPL(get_proc_net);
161
162static __net_init int proc_net_ns_init(struct net *net) 193static __net_init int proc_net_ns_init(struct net *net)
163{ 194{
164 struct proc_dir_entry *netd, *net_statd; 195 struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5acc001d49f6..945a81043ba2 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -10,149 +10,110 @@
10static struct dentry_operations proc_sys_dentry_operations; 10static struct dentry_operations proc_sys_dentry_operations;
11static const struct file_operations proc_sys_file_operations; 11static const struct file_operations proc_sys_file_operations;
12static const struct inode_operations proc_sys_inode_operations; 12static const struct inode_operations proc_sys_inode_operations;
13static const struct file_operations proc_sys_dir_file_operations;
14static const struct inode_operations proc_sys_dir_operations;
13 15
14static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table) 16static struct inode *proc_sys_make_inode(struct super_block *sb,
15{ 17 struct ctl_table_header *head, struct ctl_table *table)
16 /* Refresh the cached information bits in the inode */
17 if (table) {
18 inode->i_uid = 0;
19 inode->i_gid = 0;
20 inode->i_mode = table->mode;
21 if (table->proc_handler) {
22 inode->i_mode |= S_IFREG;
23 inode->i_nlink = 1;
24 } else {
25 inode->i_mode |= S_IFDIR;
26 inode->i_nlink = 0; /* It is too hard to figure out */
27 }
28 }
29}
30
31static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
32{ 18{
33 struct inode *inode; 19 struct inode *inode;
34 struct proc_inode *dir_ei, *ei; 20 struct proc_inode *ei;
35 int depth;
36 21
37 inode = new_inode(dir->i_sb); 22 inode = new_inode(sb);
38 if (!inode) 23 if (!inode)
39 goto out; 24 goto out;
40 25
41 /* A directory is always one deeper than it's parent */ 26 sysctl_head_get(head);
42 dir_ei = PROC_I(dir);
43 depth = dir_ei->fd + 1;
44
45 ei = PROC_I(inode); 27 ei = PROC_I(inode);
46 ei->fd = depth; 28 ei->sysctl = head;
29 ei->sysctl_entry = table;
30
47 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 31 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
48 inode->i_op = &proc_sys_inode_operations;
49 inode->i_fop = &proc_sys_file_operations;
50 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */ 32 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
51 proc_sys_refresh_inode(inode, table); 33 inode->i_mode = table->mode;
34 if (!table->child) {
35 inode->i_mode |= S_IFREG;
36 inode->i_op = &proc_sys_inode_operations;
37 inode->i_fop = &proc_sys_file_operations;
38 } else {
39 inode->i_mode |= S_IFDIR;
40 inode->i_nlink = 0;
41 inode->i_op = &proc_sys_dir_operations;
42 inode->i_fop = &proc_sys_dir_file_operations;
43 }
52out: 44out:
53 return inode; 45 return inode;
54} 46}
55 47
56static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth) 48static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
57{
58 for (;;) {
59 struct proc_inode *ei;
60
61 ei = PROC_I(dentry->d_inode);
62 if (ei->fd == depth)
63 break; /* found */
64
65 dentry = dentry->d_parent;
66 }
67 return dentry;
68}
69
70static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table,
71 struct qstr *name)
72{ 49{
73 int len; 50 int len;
74 for ( ; table->ctl_name || table->procname; table++) { 51 for ( ; p->ctl_name || p->procname; p++) {
75 52
76 if (!table->procname) 53 if (!p->procname)
77 continue; 54 continue;
78 55
79 len = strlen(table->procname); 56 len = strlen(p->procname);
80 if (len != name->len) 57 if (len != name->len)
81 continue; 58 continue;
82 59
83 if (memcmp(table->procname, name->name, len) != 0) 60 if (memcmp(p->procname, name->name, len) != 0)
84 continue; 61 continue;
85 62
86 /* I have a match */ 63 /* I have a match */
87 return table; 64 return p;
88 } 65 }
89 return NULL; 66 return NULL;
90} 67}
91 68
92static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry, 69static struct ctl_table_header *grab_header(struct inode *inode)
93 struct ctl_table *table)
94{ 70{
95 struct dentry *ancestor; 71 if (PROC_I(inode)->sysctl)
96 struct proc_inode *ei; 72 return sysctl_head_grab(PROC_I(inode)->sysctl);
97 int depth, i; 73 else
74 return sysctl_head_next(NULL);
75}
98 76
99 ei = PROC_I(dentry->d_inode); 77static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
100 depth = ei->fd; 78 struct nameidata *nd)
79{
80 struct ctl_table_header *head = grab_header(dir);
81 struct ctl_table *table = PROC_I(dir)->sysctl_entry;
82 struct ctl_table_header *h = NULL;
83 struct qstr *name = &dentry->d_name;
84 struct ctl_table *p;
85 struct inode *inode;
86 struct dentry *err = ERR_PTR(-ENOENT);
101 87
102 if (depth == 0) 88 if (IS_ERR(head))
103 return table; 89 return ERR_CAST(head);
104 90
105 for (i = 1; table && (i <= depth); i++) { 91 if (table && !table->child) {
106 ancestor = proc_sys_ancestor(dentry, i); 92 WARN_ON(1);
107 table = proc_sys_lookup_table_one(table, &ancestor->d_name); 93 goto out;
108 if (table)
109 table = table->child;
110 } 94 }
111 return table;
112 95
113} 96 table = table ? table->child : head->ctl_table;
114static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent,
115 struct qstr *name,
116 struct ctl_table *table)
117{
118 table = proc_sys_lookup_table(dparent, table);
119 if (table)
120 table = proc_sys_lookup_table_one(table, name);
121 return table;
122}
123 97
124static struct ctl_table *do_proc_sys_lookup(struct dentry *parent, 98 p = find_in_table(table, name);
125 struct qstr *name, 99 if (!p) {
126 struct ctl_table_header **ptr) 100 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
127{ 101 if (h->attached_to != table)
128 struct ctl_table_header *head; 102 continue;
129 struct ctl_table *table = NULL; 103 p = find_in_table(h->attached_by, name);
130 104 if (p)
131 for (head = sysctl_head_next(NULL); head; 105 break;
132 head = sysctl_head_next(head)) { 106 }
133 table = proc_sys_lookup_entry(parent, name, head->ctl_table);
134 if (table)
135 break;
136 } 107 }
137 *ptr = head;
138 return table;
139}
140
141static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
142 struct nameidata *nd)
143{
144 struct ctl_table_header *head;
145 struct inode *inode;
146 struct dentry *err;
147 struct ctl_table *table;
148 108
149 err = ERR_PTR(-ENOENT); 109 if (!p)
150 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
151 if (!table)
152 goto out; 110 goto out;
153 111
154 err = ERR_PTR(-ENOMEM); 112 err = ERR_PTR(-ENOMEM);
155 inode = proc_sys_make_inode(dir, table); 113 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
114 if (h)
115 sysctl_head_finish(h);
116
156 if (!inode) 117 if (!inode)
157 goto out; 118 goto out;
158 119
@@ -168,22 +129,14 @@ out:
168static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, 129static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
169 size_t count, loff_t *ppos, int write) 130 size_t count, loff_t *ppos, int write)
170{ 131{
171 struct dentry *dentry = filp->f_dentry; 132 struct inode *inode = filp->f_path.dentry->d_inode;
172 struct ctl_table_header *head; 133 struct ctl_table_header *head = grab_header(inode);
173 struct ctl_table *table; 134 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
174 ssize_t error; 135 ssize_t error;
175 size_t res; 136 size_t res;
176 137
177 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); 138 if (IS_ERR(head))
178 /* Has the sysctl entry disappeared on us? */ 139 return PTR_ERR(head);
179 error = -ENOENT;
180 if (!table)
181 goto out;
182
183 /* Has the sysctl entry been replaced by a directory? */
184 error = -EISDIR;
185 if (!table->proc_handler)
186 goto out;
187 140
188 /* 141 /*
189 * At this point we know that the sysctl was not unregistered 142 * At this point we know that the sysctl was not unregistered
@@ -193,6 +146,11 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
193 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) 146 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
194 goto out; 147 goto out;
195 148
149 /* if that can happen at all, it should be -EINVAL, not -EISDIR */
150 error = -EINVAL;
151 if (!table->proc_handler)
152 goto out;
153
196 /* careful: calling conventions are nasty here */ 154 /* careful: calling conventions are nasty here */
197 res = count; 155 res = count;
198 error = table->proc_handler(table, write, filp, buf, &res, ppos); 156 error = table->proc_handler(table, write, filp, buf, &res, ppos);
@@ -218,82 +176,86 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
218 176
219 177
220static int proc_sys_fill_cache(struct file *filp, void *dirent, 178static int proc_sys_fill_cache(struct file *filp, void *dirent,
221 filldir_t filldir, struct ctl_table *table) 179 filldir_t filldir,
180 struct ctl_table_header *head,
181 struct ctl_table *table)
222{ 182{
223 struct ctl_table_header *head;
224 struct ctl_table *child_table = NULL;
225 struct dentry *child, *dir = filp->f_path.dentry; 183 struct dentry *child, *dir = filp->f_path.dentry;
226 struct inode *inode; 184 struct inode *inode;
227 struct qstr qname; 185 struct qstr qname;
228 ino_t ino = 0; 186 ino_t ino = 0;
229 unsigned type = DT_UNKNOWN; 187 unsigned type = DT_UNKNOWN;
230 int ret;
231 188
232 qname.name = table->procname; 189 qname.name = table->procname;
233 qname.len = strlen(table->procname); 190 qname.len = strlen(table->procname);
234 qname.hash = full_name_hash(qname.name, qname.len); 191 qname.hash = full_name_hash(qname.name, qname.len);
235 192
236 /* Suppress duplicates.
237 * Only fill a directory entry if it is the value that
238 * an ordinary lookup of that name returns. Hide all
239 * others.
240 *
241 * If we ever cache this translation in the dcache
242 * I should do a dcache lookup first. But for now
243 * it is just simpler not to.
244 */
245 ret = 0;
246 child_table = do_proc_sys_lookup(dir, &qname, &head);
247 sysctl_head_finish(head);
248 if (child_table != table)
249 return 0;
250
251 child = d_lookup(dir, &qname); 193 child = d_lookup(dir, &qname);
252 if (!child) { 194 if (!child) {
253 struct dentry *new; 195 child = d_alloc(dir, &qname);
254 new = d_alloc(dir, &qname); 196 if (child) {
255 if (new) { 197 inode = proc_sys_make_inode(dir->d_sb, head, table);
256 inode = proc_sys_make_inode(dir->d_inode, table); 198 if (!inode) {
257 if (!inode) 199 dput(child);
258 child = ERR_PTR(-ENOMEM); 200 return -ENOMEM;
259 else { 201 } else {
260 new->d_op = &proc_sys_dentry_operations; 202 child->d_op = &proc_sys_dentry_operations;
261 d_add(new, inode); 203 d_add(child, inode);
262 } 204 }
263 if (child) 205 } else {
264 dput(new); 206 return -ENOMEM;
265 else
266 child = new;
267 } 207 }
268 } 208 }
269 if (!child || IS_ERR(child) || !child->d_inode)
270 goto end_instantiate;
271 inode = child->d_inode; 209 inode = child->d_inode;
272 if (inode) { 210 ino = inode->i_ino;
273 ino = inode->i_ino; 211 type = inode->i_mode >> 12;
274 type = inode->i_mode >> 12;
275 }
276 dput(child); 212 dput(child);
277end_instantiate: 213 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
278 if (!ino) 214}
279 ino= find_inode_number(dir, &qname); 215
280 if (!ino) 216static int scan(struct ctl_table_header *head, ctl_table *table,
281 ino = 1; 217 unsigned long *pos, struct file *file,
282 return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); 218 void *dirent, filldir_t filldir)
219{
220
221 for (; table->ctl_name || table->procname; table++, (*pos)++) {
222 int res;
223
224 /* Can't do anything without a proc name */
225 if (!table->procname)
226 continue;
227
228 if (*pos < file->f_pos)
229 continue;
230
231 res = proc_sys_fill_cache(file, dirent, filldir, head, table);
232 if (res)
233 return res;
234
235 file->f_pos = *pos + 1;
236 }
237 return 0;
283} 238}
284 239
285static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) 240static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
286{ 241{
287 struct dentry *dentry = filp->f_dentry; 242 struct dentry *dentry = filp->f_path.dentry;
288 struct inode *inode = dentry->d_inode; 243 struct inode *inode = dentry->d_inode;
289 struct ctl_table_header *head = NULL; 244 struct ctl_table_header *head = grab_header(inode);
290 struct ctl_table *table; 245 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
246 struct ctl_table_header *h = NULL;
291 unsigned long pos; 247 unsigned long pos;
292 int ret; 248 int ret = -EINVAL;
293 249
294 ret = -ENOTDIR; 250 if (IS_ERR(head))
295 if (!S_ISDIR(inode->i_mode)) 251 return PTR_ERR(head);
252
253 if (table && !table->child) {
254 WARN_ON(1);
296 goto out; 255 goto out;
256 }
257
258 table = table ? table->child : head->ctl_table;
297 259
298 ret = 0; 260 ret = 0;
299 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ 261 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
@@ -311,30 +273,17 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
311 } 273 }
312 pos = 2; 274 pos = 2;
313 275
314 /* - Find each instance of the directory 276 ret = scan(head, table, &pos, filp, dirent, filldir);
315 * - Read all entries in each instance 277 if (ret)
316 * - Before returning an entry to user space lookup the entry 278 goto out;
317 * by name and if I find a different entry don't return
318 * this one because it means it is a buried dup.
319 * For sysctl this should only happen for directory entries.
320 */
321 for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) {
322 table = proc_sys_lookup_table(dentry, head->ctl_table);
323 279
324 if (!table) 280 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
281 if (h->attached_to != table)
325 continue; 282 continue;
326 283 ret = scan(h, h->attached_by, &pos, filp, dirent, filldir);
327 for (; table->ctl_name || table->procname; table++, pos++) { 284 if (ret) {
328 /* Can't do anything without a proc name */ 285 sysctl_head_finish(h);
329 if (!table->procname) 286 break;
330 continue;
331
332 if (pos < filp->f_pos)
333 continue;
334
335 if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0)
336 goto out;
337 filp->f_pos = pos + 1;
338 } 287 }
339 } 288 }
340 ret = 1; 289 ret = 1;
@@ -343,53 +292,24 @@ out:
343 return ret; 292 return ret;
344} 293}
345 294
346static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd) 295static int proc_sys_permission(struct inode *inode, int mask)
347{ 296{
348 /* 297 /*
349 * sysctl entries that are not writeable, 298 * sysctl entries that are not writeable,
350 * are _NOT_ writeable, capabilities or not. 299 * are _NOT_ writeable, capabilities or not.
351 */ 300 */
352 struct ctl_table_header *head; 301 struct ctl_table_header *head = grab_header(inode);
353 struct ctl_table *table; 302 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
354 struct dentry *dentry;
355 int mode;
356 int depth;
357 int error; 303 int error;
358 304
359 head = NULL; 305 if (IS_ERR(head))
360 depth = PROC_I(inode)->fd; 306 return PTR_ERR(head);
361
362 /* First check the cached permissions, in case we don't have
363 * enough information to lookup the sysctl table entry.
364 */
365 error = -EACCES;
366 mode = inode->i_mode;
367
368 if (current->euid == 0)
369 mode >>= 6;
370 else if (in_group_p(0))
371 mode >>= 3;
372
373 if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
374 error = 0;
375
376 /* If we can't get a sysctl table entry the permission
377 * checks on the cached mode will have to be enough.
378 */
379 if (!nd || !depth)
380 goto out;
381
382 dentry = nd->path.dentry;
383 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
384 307
385 /* If the entry does not exist deny permission */ 308 if (!table) /* global root - r-xr-xr-x */
386 error = -EACCES; 309 error = mask & MAY_WRITE ? -EACCES : 0;
387 if (!table) 310 else /* Use the permissions on the sysctl table entry */
388 goto out; 311 error = sysctl_perm(head->root, table, mask);
389 312
390 /* Use the permissions on the sysctl table entry */
391 error = sysctl_perm(head->root, table, mask);
392out:
393 sysctl_head_finish(head); 313 sysctl_head_finish(head);
394 return error; 314 return error;
395} 315}
@@ -409,42 +329,79 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
409 return error; 329 return error;
410} 330}
411 331
412/* I'm lazy and don't distinguish between files and directories, 332static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
413 * until access time. 333{
414 */ 334 struct inode *inode = dentry->d_inode;
335 struct ctl_table_header *head = grab_header(inode);
336 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
337
338 if (IS_ERR(head))
339 return PTR_ERR(head);
340
341 generic_fillattr(inode, stat);
342 if (table)
343 stat->mode = (stat->mode & S_IFMT) | table->mode;
344
345 sysctl_head_finish(head);
346 return 0;
347}
348
415static const struct file_operations proc_sys_file_operations = { 349static const struct file_operations proc_sys_file_operations = {
416 .read = proc_sys_read, 350 .read = proc_sys_read,
417 .write = proc_sys_write, 351 .write = proc_sys_write,
352};
353
354static const struct file_operations proc_sys_dir_file_operations = {
418 .readdir = proc_sys_readdir, 355 .readdir = proc_sys_readdir,
419}; 356};
420 357
421static const struct inode_operations proc_sys_inode_operations = { 358static const struct inode_operations proc_sys_inode_operations = {
359 .permission = proc_sys_permission,
360 .setattr = proc_sys_setattr,
361 .getattr = proc_sys_getattr,
362};
363
364static const struct inode_operations proc_sys_dir_operations = {
422 .lookup = proc_sys_lookup, 365 .lookup = proc_sys_lookup,
423 .permission = proc_sys_permission, 366 .permission = proc_sys_permission,
424 .setattr = proc_sys_setattr, 367 .setattr = proc_sys_setattr,
368 .getattr = proc_sys_getattr,
425}; 369};
426 370
427static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) 371static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
428{ 372{
429 struct ctl_table_header *head; 373 return !PROC_I(dentry->d_inode)->sysctl->unregistering;
430 struct ctl_table *table; 374}
431 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); 375
432 proc_sys_refresh_inode(dentry->d_inode, table); 376static int proc_sys_delete(struct dentry *dentry)
433 sysctl_head_finish(head); 377{
434 return !!table; 378 return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
379}
380
381static int proc_sys_compare(struct dentry *dir, struct qstr *qstr,
382 struct qstr *name)
383{
384 struct dentry *dentry = container_of(qstr, struct dentry, d_name);
385 if (qstr->len != name->len)
386 return 1;
387 if (memcmp(qstr->name, name->name, name->len))
388 return 1;
389 return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl);
435} 390}
436 391
437static struct dentry_operations proc_sys_dentry_operations = { 392static struct dentry_operations proc_sys_dentry_operations = {
438 .d_revalidate = proc_sys_revalidate, 393 .d_revalidate = proc_sys_revalidate,
394 .d_delete = proc_sys_delete,
395 .d_compare = proc_sys_compare,
439}; 396};
440 397
441static struct proc_dir_entry *proc_sys_root;
442
443int proc_sys_init(void) 398int proc_sys_init(void)
444{ 399{
400 struct proc_dir_entry *proc_sys_root;
401
445 proc_sys_root = proc_mkdir("sys", NULL); 402 proc_sys_root = proc_mkdir("sys", NULL);
446 proc_sys_root->proc_iops = &proc_sys_inode_operations; 403 proc_sys_root->proc_iops = &proc_sys_dir_operations;
447 proc_sys_root->proc_fops = &proc_sys_file_operations; 404 proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
448 proc_sys_root->nlink = 0; 405 proc_sys_root->nlink = 0;
449 return 0; 406 return 0;
450} 407}
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 21f490f5d65c..d153946d6d15 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -136,54 +136,6 @@ static const struct file_operations proc_tty_drivers_operations = {
136 .release = seq_release, 136 .release = seq_release,
137}; 137};
138 138
139static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
140{
141 return (*pos < NR_LDISCS) ? pos : NULL;
142}
143
144static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
145{
146 (*pos)++;
147 return (*pos < NR_LDISCS) ? pos : NULL;
148}
149
150static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
151{
152}
153
154static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
155{
156 int i = *(loff_t *)v;
157 struct tty_ldisc *ld;
158
159 ld = tty_ldisc_get(i);
160 if (ld == NULL)
161 return 0;
162 seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i);
163 tty_ldisc_put(i);
164 return 0;
165}
166
167static const struct seq_operations tty_ldiscs_seq_ops = {
168 .start = tty_ldiscs_seq_start,
169 .next = tty_ldiscs_seq_next,
170 .stop = tty_ldiscs_seq_stop,
171 .show = tty_ldiscs_seq_show,
172};
173
174static int proc_tty_ldiscs_open(struct inode *inode, struct file *file)
175{
176 return seq_open(file, &tty_ldiscs_seq_ops);
177}
178
179static const struct file_operations tty_ldiscs_proc_fops = {
180 .owner = THIS_MODULE,
181 .open = proc_tty_ldiscs_open,
182 .read = seq_read,
183 .llseek = seq_lseek,
184 .release = seq_release,
185};
186
187/* 139/*
188 * This function is called by tty_register_driver() to handle 140 * This function is called by tty_register_driver() to handle
189 * registering the driver's /proc handler into /proc/tty/driver/<foo> 141 * registering the driver's /proc handler into /proc/tty/driver/<foo>
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ab8ccc9d14ff..4806830ea2a1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -210,23 +210,20 @@ static int show_map(struct seq_file *m, void *v)
210 dev_t dev = 0; 210 dev_t dev = 0;
211 int len; 211 int len;
212 212
213 if (maps_protect && !ptrace_may_attach(task))
214 return -EACCES;
215
216 if (file) { 213 if (file) {
217 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 214 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
218 dev = inode->i_sb->s_dev; 215 dev = inode->i_sb->s_dev;
219 ino = inode->i_ino; 216 ino = inode->i_ino;
220 } 217 }
221 218
222 seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", 219 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
223 vma->vm_start, 220 vma->vm_start,
224 vma->vm_end, 221 vma->vm_end,
225 flags & VM_READ ? 'r' : '-', 222 flags & VM_READ ? 'r' : '-',
226 flags & VM_WRITE ? 'w' : '-', 223 flags & VM_WRITE ? 'w' : '-',
227 flags & VM_EXEC ? 'x' : '-', 224 flags & VM_EXEC ? 'x' : '-',
228 flags & VM_MAYSHARE ? 's' : 'p', 225 flags & VM_MAYSHARE ? 's' : 'p',
229 vma->vm_pgoff << PAGE_SHIFT, 226 ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
230 MAJOR(dev), MINOR(dev), ino, &len); 227 MAJOR(dev), MINOR(dev), ino, &len);
231 228
232 /* 229 /*
@@ -476,10 +473,10 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
476 return -ESRCH; 473 return -ESRCH;
477 mm = get_task_mm(task); 474 mm = get_task_mm(task);
478 if (mm) { 475 if (mm) {
479 static struct mm_walk clear_refs_walk; 476 struct mm_walk clear_refs_walk = {
480 memset(&clear_refs_walk, 0, sizeof(clear_refs_walk)); 477 .pmd_entry = clear_refs_pte_range,
481 clear_refs_walk.pmd_entry = clear_refs_pte_range; 478 .mm = mm,
482 clear_refs_walk.mm = mm; 479 };
483 down_read(&mm->mmap_sem); 480 down_read(&mm->mmap_sem);
484 for (vma = mm->mmap; vma; vma = vma->vm_next) { 481 for (vma = mm->mmap; vma; vma = vma->vm_next) {
485 clear_refs_walk.private = vma; 482 clear_refs_walk.private = vma;
@@ -602,11 +599,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
602 return err; 599 return err;
603} 600}
604 601
605static struct mm_walk pagemap_walk = {
606 .pmd_entry = pagemap_pte_range,
607 .pte_hole = pagemap_pte_hole
608};
609
610/* 602/*
611 * /proc/pid/pagemap - an array mapping virtual pages to pfns 603 * /proc/pid/pagemap - an array mapping virtual pages to pfns
612 * 604 *
@@ -641,12 +633,17 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
641 struct pagemapread pm; 633 struct pagemapread pm;
642 int pagecount; 634 int pagecount;
643 int ret = -ESRCH; 635 int ret = -ESRCH;
636 struct mm_walk pagemap_walk = {};
637 unsigned long src;
638 unsigned long svpfn;
639 unsigned long start_vaddr;
640 unsigned long end_vaddr;
644 641
645 if (!task) 642 if (!task)
646 goto out; 643 goto out;
647 644
648 ret = -EACCES; 645 ret = -EACCES;
649 if (!ptrace_may_attach(task)) 646 if (!ptrace_may_access(task, PTRACE_MODE_READ))
650 goto out_task; 647 goto out_task;
651 648
652 ret = -EINVAL; 649 ret = -EINVAL;
@@ -659,11 +656,15 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
659 if (!mm) 656 if (!mm)
660 goto out_task; 657 goto out_task;
661 658
662 ret = -ENOMEM; 659
663 uaddr = (unsigned long)buf & PAGE_MASK; 660 uaddr = (unsigned long)buf & PAGE_MASK;
664 uend = (unsigned long)(buf + count); 661 uend = (unsigned long)(buf + count);
665 pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; 662 pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
666 pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); 663 ret = 0;
664 if (pagecount == 0)
665 goto out_mm;
666 pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
667 ret = -ENOMEM;
667 if (!pages) 668 if (!pages)
668 goto out_mm; 669 goto out_mm;
669 670
@@ -684,33 +685,33 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
684 pm.out = (u64 *)buf; 685 pm.out = (u64 *)buf;
685 pm.end = (u64 *)(buf + count); 686 pm.end = (u64 *)(buf + count);
686 687
687 if (!ptrace_may_attach(task)) { 688 pagemap_walk.pmd_entry = pagemap_pte_range;
688 ret = -EIO; 689 pagemap_walk.pte_hole = pagemap_pte_hole;
689 } else { 690 pagemap_walk.mm = mm;
690 unsigned long src = *ppos; 691 pagemap_walk.private = &pm;
691 unsigned long svpfn = src / PM_ENTRY_BYTES; 692
692 unsigned long start_vaddr = svpfn << PAGE_SHIFT; 693 src = *ppos;
693 unsigned long end_vaddr = TASK_SIZE_OF(task); 694 svpfn = src / PM_ENTRY_BYTES;
694 695 start_vaddr = svpfn << PAGE_SHIFT;
695 /* watch out for wraparound */ 696 end_vaddr = TASK_SIZE_OF(task);
696 if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) 697
697 start_vaddr = end_vaddr; 698 /* watch out for wraparound */
698 699 if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
699 /* 700 start_vaddr = end_vaddr;
700 * The odds are that this will stop walking way 701
701 * before end_vaddr, because the length of the 702 /*
702 * user buffer is tracked in "pm", and the walk 703 * The odds are that this will stop walking way
703 * will stop when we hit the end of the buffer. 704 * before end_vaddr, because the length of the
704 */ 705 * user buffer is tracked in "pm", and the walk
705 ret = walk_page_range(start_vaddr, end_vaddr, 706 * will stop when we hit the end of the buffer.
706 &pagemap_walk); 707 */
707 if (ret == PM_END_OF_BUFFER) 708 ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk);
708 ret = 0; 709 if (ret == PM_END_OF_BUFFER)
709 /* don't need mmap_sem for these, but this looks cleaner */ 710 ret = 0;
710 *ppos += (char *)pm.out - buf; 711 /* don't need mmap_sem for these, but this looks cleaner */
711 if (!ret) 712 *ppos += (char *)pm.out - buf;
712 ret = (char *)pm.out - buf; 713 if (!ret)
713 } 714 ret = (char *)pm.out - buf;
714 715
715out_pages: 716out_pages:
716 for (; pagecount; pagecount--) { 717 for (; pagecount; pagecount--) {
@@ -738,22 +739,11 @@ const struct file_operations proc_pagemap_operations = {
738#ifdef CONFIG_NUMA 739#ifdef CONFIG_NUMA
739extern int show_numa_map(struct seq_file *m, void *v); 740extern int show_numa_map(struct seq_file *m, void *v);
740 741
741static int show_numa_map_checked(struct seq_file *m, void *v)
742{
743 struct proc_maps_private *priv = m->private;
744 struct task_struct *task = priv->task;
745
746 if (maps_protect && !ptrace_may_attach(task))
747 return -EACCES;
748
749 return show_numa_map(m, v);
750}
751
752static const struct seq_operations proc_pid_numa_maps_op = { 742static const struct seq_operations proc_pid_numa_maps_op = {
753 .start = m_start, 743 .start = m_start,
754 .next = m_next, 744 .next = m_next,
755 .stop = m_stop, 745 .stop = m_stop,
756 .show = show_numa_map_checked 746 .show = show_numa_map,
757}; 747};
758 748
759static int numa_maps_open(struct inode *inode, struct file *file) 749static int numa_maps_open(struct inode *inode, struct file *file)
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 4b4f9cc2f186..219bd79ea894 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -110,11 +110,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
110static int show_map(struct seq_file *m, void *_vml) 110static int show_map(struct seq_file *m, void *_vml)
111{ 111{
112 struct vm_list_struct *vml = _vml; 112 struct vm_list_struct *vml = _vml;
113 struct proc_maps_private *priv = m->private;
114 struct task_struct *task = priv->task;
115
116 if (maps_protect && !ptrace_may_attach(task))
117 return -EACCES;
118 113
119 return nommu_vma_show(m, vml->vma); 114 return nommu_vma_show(m, vml->vma);
120} 115}
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 9ac0f5e064e0..841368b87a29 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -165,14 +165,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
165 return acc; 165 return acc;
166} 166}
167 167
168static int open_vmcore(struct inode *inode, struct file *filp)
169{
170 return 0;
171}
172
173const struct file_operations proc_vmcore_operations = { 168const struct file_operations proc_vmcore_operations = {
174 .read = read_vmcore, 169 .read = read_vmcore,
175 .open = open_vmcore,
176}; 170};
177 171
178static struct vmcore* __init get_new_element(void) 172static struct vmcore* __init get_new_element(void)