aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/Makefile14
-rw-r--r--fs/proc/array.c484
-rw-r--r--fs/proc/base.c2056
-rw-r--r--fs/proc/generic.c705
-rw-r--r--fs/proc/inode-alloc.txt14
-rw-r--r--fs/proc/inode.c218
-rw-r--r--fs/proc/internal.h48
-rw-r--r--fs/proc/kcore.c404
-rw-r--r--fs/proc/kmsg.c55
-rw-r--r--fs/proc/mmu.c67
-rw-r--r--fs/proc/nommu.c135
-rw-r--r--fs/proc/proc_devtree.c165
-rw-r--r--fs/proc/proc_misc.c615
-rw-r--r--fs/proc/proc_tty.c242
-rw-r--r--fs/proc/root.c161
-rw-r--r--fs/proc/task_mmu.c235
-rw-r--r--fs/proc/task_nommu.c164
17 files changed, 5782 insertions, 0 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
new file mode 100644
index 000000000000..738b9b602932
--- /dev/null
+++ b/fs/proc/Makefile
@@ -0,0 +1,14 @@
1#
2# Makefile for the Linux proc filesystem routines.
3#
4
5obj-$(CONFIG_PROC_FS) += proc.o
6
7proc-y := nommu.o task_nommu.o
8proc-$(CONFIG_MMU) := mmu.o task_mmu.o
9
10proc-y += inode.o root.o base.o generic.o array.o \
11 kmsg.o proc_tty.o proc_misc.o
12
13proc-$(CONFIG_PROC_KCORE) += kcore.o
14proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
new file mode 100644
index 000000000000..37668fe998ad
--- /dev/null
+++ b/fs/proc/array.c
@@ -0,0 +1,484 @@
1/*
2 * linux/fs/proc/array.c
3 *
4 * Copyright (C) 1992 by Linus Torvalds
5 * based on ideas by Darren Senn
6 *
7 * Fixes:
8 * Michael. K. Johnson: stat,statm extensions.
9 * <johnsonm@stolaf.edu>
10 *
11 * Pauline Middelink : Made cmdline,envline only break at '\0's, to
12 * make sure SET_PROCTITLE works. Also removed
13 * bad '!' which forced address recalculation for
14 * EVERY character on the current page.
15 * <middelin@polyware.iaf.nl>
16 *
17 * Danny ter Haar : added cpuinfo
18 * <dth@cistron.nl>
19 *
20 * Alessandro Rubini : profile extension.
21 * <rubini@ipvvis.unipv.it>
22 *
23 * Jeff Tranter : added BogoMips field to cpuinfo
24 * <Jeff_Tranter@Mitel.COM>
25 *
26 * Bruno Haible : remove 4K limit for the maps file
27 * <haible@ma2s2.mathematik.uni-karlsruhe.de>
28 *
29 * Yves Arrouye : remove removal of trailing spaces in get_array.
30 * <Yves.Arrouye@marin.fdn.fr>
31 *
32 * Jerome Forissier : added per-CPU time information to /proc/stat
33 * and /proc/<pid>/cpu extension
34 * <forissier@isia.cma.fr>
35 * - Incorporation and non-SMP safe operation
36 * of forissier patch in 2.1.78 by
37 * Hans Marcus <crowbar@concepts.nl>
38 *
39 * aeb@cwi.nl : /proc/partitions
40 *
41 *
42 * Alan Cox : security fixes.
43 * <Alan.Cox@linux.org>
44 *
45 * Al Viro : safe handling of mm_struct
46 *
47 * Gerhard Wichert : added BIGMEM support
48 * Siemens AG <Gerhard.Wichert@pdb.siemens.de>
49 *
50 * Al Viro & Jeff Garzik : moved most of the thing into base.c and
51 * : proc_misc.c. The rest may eventually go into
52 * : base.c too.
53 */
54
55#include <linux/config.h>
56#include <linux/types.h>
57#include <linux/errno.h>
58#include <linux/time.h>
59#include <linux/kernel.h>
60#include <linux/kernel_stat.h>
61#include <linux/tty.h>
62#include <linux/string.h>
63#include <linux/mman.h>
64#include <linux/proc_fs.h>
65#include <linux/ioport.h>
66#include <linux/mm.h>
67#include <linux/hugetlb.h>
68#include <linux/pagemap.h>
69#include <linux/swap.h>
70#include <linux/slab.h>
71#include <linux/smp.h>
72#include <linux/signal.h>
73#include <linux/highmem.h>
74#include <linux/file.h>
75#include <linux/times.h>
76#include <linux/cpuset.h>
77
78#include <asm/uaccess.h>
79#include <asm/pgtable.h>
80#include <asm/io.h>
81#include <asm/processor.h>
82#include "internal.h"
83
84/* Gcc optimizes away "strlen(x)" for constant x */
85#define ADDBUF(buffer, string) \
86do { memcpy(buffer, string, strlen(string)); \
87 buffer += strlen(string); } while (0)
88
89static inline char * task_name(struct task_struct *p, char * buf)
90{
91 int i;
92 char * name;
93 char tcomm[sizeof(p->comm)];
94
95 get_task_comm(tcomm, p);
96
97 ADDBUF(buf, "Name:\t");
98 name = tcomm;
99 i = sizeof(tcomm);
100 do {
101 unsigned char c = *name;
102 name++;
103 i--;
104 *buf = c;
105 if (!c)
106 break;
107 if (c == '\\') {
108 buf[1] = c;
109 buf += 2;
110 continue;
111 }
112 if (c == '\n') {
113 buf[0] = '\\';
114 buf[1] = 'n';
115 buf += 2;
116 continue;
117 }
118 buf++;
119 } while (i);
120 *buf = '\n';
121 return buf+1;
122}
123
124/*
125 * The task state array is a strange "bitmap" of
126 * reasons to sleep. Thus "running" is zero, and
127 * you can test for combinations of others with
128 * simple bit tests.
129 */
130static const char *task_state_array[] = {
131 "R (running)", /* 0 */
132 "S (sleeping)", /* 1 */
133 "D (disk sleep)", /* 2 */
134 "T (stopped)", /* 4 */
135 "T (tracing stop)", /* 8 */
136 "Z (zombie)", /* 16 */
137 "X (dead)" /* 32 */
138};
139
140static inline const char * get_task_state(struct task_struct *tsk)
141{
142 unsigned int state = (tsk->state & (TASK_RUNNING |
143 TASK_INTERRUPTIBLE |
144 TASK_UNINTERRUPTIBLE |
145 TASK_STOPPED |
146 TASK_TRACED)) |
147 (tsk->exit_state & (EXIT_ZOMBIE |
148 EXIT_DEAD));
149 const char **p = &task_state_array[0];
150
151 while (state) {
152 p++;
153 state >>= 1;
154 }
155 return *p;
156}
157
158static inline char * task_state(struct task_struct *p, char *buffer)
159{
160 struct group_info *group_info;
161 int g;
162
163 read_lock(&tasklist_lock);
164 buffer += sprintf(buffer,
165 "State:\t%s\n"
166 "SleepAVG:\t%lu%%\n"
167 "Tgid:\t%d\n"
168 "Pid:\t%d\n"
169 "PPid:\t%d\n"
170 "TracerPid:\t%d\n"
171 "Uid:\t%d\t%d\t%d\t%d\n"
172 "Gid:\t%d\t%d\t%d\t%d\n",
173 get_task_state(p),
174 (p->sleep_avg/1024)*100/(1020000000/1024),
175 p->tgid,
176 p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0,
177 pid_alive(p) && p->ptrace ? p->parent->pid : 0,
178 p->uid, p->euid, p->suid, p->fsuid,
179 p->gid, p->egid, p->sgid, p->fsgid);
180 read_unlock(&tasklist_lock);
181 task_lock(p);
182 buffer += sprintf(buffer,
183 "FDSize:\t%d\n"
184 "Groups:\t",
185 p->files ? p->files->max_fds : 0);
186
187 group_info = p->group_info;
188 get_group_info(group_info);
189 task_unlock(p);
190
191 for (g = 0; g < min(group_info->ngroups,NGROUPS_SMALL); g++)
192 buffer += sprintf(buffer, "%d ", GROUP_AT(group_info,g));
193 put_group_info(group_info);
194
195 buffer += sprintf(buffer, "\n");
196 return buffer;
197}
198
199static char * render_sigset_t(const char *header, sigset_t *set, char *buffer)
200{
201 int i, len;
202
203 len = strlen(header);
204 memcpy(buffer, header, len);
205 buffer += len;
206
207 i = _NSIG;
208 do {
209 int x = 0;
210
211 i -= 4;
212 if (sigismember(set, i+1)) x |= 1;
213 if (sigismember(set, i+2)) x |= 2;
214 if (sigismember(set, i+3)) x |= 4;
215 if (sigismember(set, i+4)) x |= 8;
216 *buffer++ = (x < 10 ? '0' : 'a' - 10) + x;
217 } while (i >= 4);
218
219 *buffer++ = '\n';
220 *buffer = 0;
221 return buffer;
222}
223
224static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
225 sigset_t *catch)
226{
227 struct k_sigaction *k;
228 int i;
229
230 k = p->sighand->action;
231 for (i = 1; i <= _NSIG; ++i, ++k) {
232 if (k->sa.sa_handler == SIG_IGN)
233 sigaddset(ign, i);
234 else if (k->sa.sa_handler != SIG_DFL)
235 sigaddset(catch, i);
236 }
237}
238
239static inline char * task_sig(struct task_struct *p, char *buffer)
240{
241 sigset_t pending, shpending, blocked, ignored, caught;
242 int num_threads = 0;
243 unsigned long qsize = 0;
244 unsigned long qlim = 0;
245
246 sigemptyset(&pending);
247 sigemptyset(&shpending);
248 sigemptyset(&blocked);
249 sigemptyset(&ignored);
250 sigemptyset(&caught);
251
252 /* Gather all the data with the appropriate locks held */
253 read_lock(&tasklist_lock);
254 if (p->sighand) {
255 spin_lock_irq(&p->sighand->siglock);
256 pending = p->pending.signal;
257 shpending = p->signal->shared_pending.signal;
258 blocked = p->blocked;
259 collect_sigign_sigcatch(p, &ignored, &caught);
260 num_threads = atomic_read(&p->signal->count);
261 qsize = atomic_read(&p->user->sigpending);
262 qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
263 spin_unlock_irq(&p->sighand->siglock);
264 }
265 read_unlock(&tasklist_lock);
266
267 buffer += sprintf(buffer, "Threads:\t%d\n", num_threads);
268 buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim);
269
270 /* render them all */
271 buffer = render_sigset_t("SigPnd:\t", &pending, buffer);
272 buffer = render_sigset_t("ShdPnd:\t", &shpending, buffer);
273 buffer = render_sigset_t("SigBlk:\t", &blocked, buffer);
274 buffer = render_sigset_t("SigIgn:\t", &ignored, buffer);
275 buffer = render_sigset_t("SigCgt:\t", &caught, buffer);
276
277 return buffer;
278}
279
280static inline char *task_cap(struct task_struct *p, char *buffer)
281{
282 return buffer + sprintf(buffer, "CapInh:\t%016x\n"
283 "CapPrm:\t%016x\n"
284 "CapEff:\t%016x\n",
285 cap_t(p->cap_inheritable),
286 cap_t(p->cap_permitted),
287 cap_t(p->cap_effective));
288}
289
290int proc_pid_status(struct task_struct *task, char * buffer)
291{
292 char * orig = buffer;
293 struct mm_struct *mm = get_task_mm(task);
294
295 buffer = task_name(task, buffer);
296 buffer = task_state(task, buffer);
297
298 if (mm) {
299 buffer = task_mem(mm, buffer);
300 mmput(mm);
301 }
302 buffer = task_sig(task, buffer);
303 buffer = task_cap(task, buffer);
304 buffer = cpuset_task_status_allowed(task, buffer);
305#if defined(CONFIG_ARCH_S390)
306 buffer = task_show_regs(task, buffer);
307#endif
308 return buffer - orig;
309}
310
311static int do_task_stat(struct task_struct *task, char * buffer, int whole)
312{
313 unsigned long vsize, eip, esp, wchan = ~0UL;
314 long priority, nice;
315 int tty_pgrp = -1, tty_nr = 0;
316 sigset_t sigign, sigcatch;
317 char state;
318 int res;
319 pid_t ppid, pgid = -1, sid = -1;
320 int num_threads = 0;
321 struct mm_struct *mm;
322 unsigned long long start_time;
323 unsigned long cmin_flt = 0, cmaj_flt = 0;
324 unsigned long min_flt = 0, maj_flt = 0;
325 cputime_t cutime, cstime, utime, stime;
326 unsigned long rsslim = 0;
327 unsigned long it_real_value = 0;
328 struct task_struct *t;
329 char tcomm[sizeof(task->comm)];
330
331 state = *get_task_state(task);
332 vsize = eip = esp = 0;
333 mm = get_task_mm(task);
334 if (mm) {
335 vsize = task_vsize(mm);
336 eip = KSTK_EIP(task);
337 esp = KSTK_ESP(task);
338 }
339
340 get_task_comm(tcomm, task);
341
342 sigemptyset(&sigign);
343 sigemptyset(&sigcatch);
344 cutime = cstime = utime = stime = cputime_zero;
345 read_lock(&tasklist_lock);
346 if (task->sighand) {
347 spin_lock_irq(&task->sighand->siglock);
348 num_threads = atomic_read(&task->signal->count);
349 collect_sigign_sigcatch(task, &sigign, &sigcatch);
350
351 /* add up live thread stats at the group level */
352 if (whole) {
353 t = task;
354 do {
355 min_flt += t->min_flt;
356 maj_flt += t->maj_flt;
357 utime = cputime_add(utime, t->utime);
358 stime = cputime_add(stime, t->stime);
359 t = next_thread(t);
360 } while (t != task);
361 }
362
363 spin_unlock_irq(&task->sighand->siglock);
364 }
365 if (task->signal) {
366 if (task->signal->tty) {
367 tty_pgrp = task->signal->tty->pgrp;
368 tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
369 }
370 pgid = process_group(task);
371 sid = task->signal->session;
372 cmin_flt = task->signal->cmin_flt;
373 cmaj_flt = task->signal->cmaj_flt;
374 cutime = task->signal->cutime;
375 cstime = task->signal->cstime;
376 rsslim = task->signal->rlim[RLIMIT_RSS].rlim_cur;
377 if (whole) {
378 min_flt += task->signal->min_flt;
379 maj_flt += task->signal->maj_flt;
380 utime = cputime_add(utime, task->signal->utime);
381 stime = cputime_add(stime, task->signal->stime);
382 }
383 it_real_value = task->signal->it_real_value;
384 }
385 ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
386 read_unlock(&tasklist_lock);
387
388 if (!whole || num_threads<2)
389 wchan = get_wchan(task);
390 if (!whole) {
391 min_flt = task->min_flt;
392 maj_flt = task->maj_flt;
393 utime = task->utime;
394 stime = task->stime;
395 }
396
397 /* scale priority and nice values from timeslices to -20..20 */
398 /* to make it look like a "normal" Unix priority/nice value */
399 priority = task_prio(task);
400 nice = task_nice(task);
401
402 /* Temporary variable needed for gcc-2.96 */
403 /* convert timespec -> nsec*/
404 start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
405 + task->start_time.tv_nsec;
406 /* convert nsec -> ticks */
407 start_time = nsec_to_clock_t(start_time);
408
409 res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
410%lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
411%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
412 task->pid,
413 tcomm,
414 state,
415 ppid,
416 pgid,
417 sid,
418 tty_nr,
419 tty_pgrp,
420 task->flags,
421 min_flt,
422 cmin_flt,
423 maj_flt,
424 cmaj_flt,
425 cputime_to_clock_t(utime),
426 cputime_to_clock_t(stime),
427 cputime_to_clock_t(cutime),
428 cputime_to_clock_t(cstime),
429 priority,
430 nice,
431 num_threads,
432 jiffies_to_clock_t(it_real_value),
433 start_time,
434 vsize,
435 mm ? get_mm_counter(mm, rss) : 0, /* you might want to shift this left 3 */
436 rsslim,
437 mm ? mm->start_code : 0,
438 mm ? mm->end_code : 0,
439 mm ? mm->start_stack : 0,
440 esp,
441 eip,
442 /* The signal information here is obsolete.
443 * It must be decimal for Linux 2.0 compatibility.
444 * Use /proc/#/status for real-time signals.
445 */
446 task->pending.signal.sig[0] & 0x7fffffffUL,
447 task->blocked.sig[0] & 0x7fffffffUL,
448 sigign .sig[0] & 0x7fffffffUL,
449 sigcatch .sig[0] & 0x7fffffffUL,
450 wchan,
451 0UL,
452 0UL,
453 task->exit_signal,
454 task_cpu(task),
455 task->rt_priority,
456 task->policy);
457 if(mm)
458 mmput(mm);
459 return res;
460}
461
462int proc_tid_stat(struct task_struct *task, char * buffer)
463{
464 return do_task_stat(task, buffer, 0);
465}
466
467int proc_tgid_stat(struct task_struct *task, char * buffer)
468{
469 return do_task_stat(task, buffer, 1);
470}
471
472int proc_pid_statm(struct task_struct *task, char *buffer)
473{
474 int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0;
475 struct mm_struct *mm = get_task_mm(task);
476
477 if (mm) {
478 size = task_statm(mm, &shared, &text, &data, &resident);
479 mmput(mm);
480 }
481
482 return sprintf(buffer,"%d %d %d %d %d %d %d\n",
483 size, resident, shared, text, lib, data, 0);
484}
diff --git a/fs/proc/base.c b/fs/proc/base.c
new file mode 100644
index 000000000000..dad8ea4e00a0
--- /dev/null
+++ b/fs/proc/base.c
@@ -0,0 +1,2056 @@
1/*
2 * linux/fs/proc/base.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 *
6 * proc base directory handling functions
7 *
8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part.
9 * Instead of using magical inumbers to determine the kind of object
10 * we allocate and fill in-core inodes upon lookup. They don't even
11 * go into icache. We cache the reference to task_struct upon lookup too.
12 * Eventually it should become a filesystem in its own. We don't use the
13 * rest of procfs anymore.
14 */
15
16#include <asm/uaccess.h>
17
18#include <linux/config.h>
19#include <linux/errno.h>
20#include <linux/time.h>
21#include <linux/proc_fs.h>
22#include <linux/stat.h>
23#include <linux/init.h>
24#include <linux/file.h>
25#include <linux/string.h>
26#include <linux/seq_file.h>
27#include <linux/namei.h>
28#include <linux/namespace.h>
29#include <linux/mm.h>
30#include <linux/smp_lock.h>
31#include <linux/kallsyms.h>
32#include <linux/mount.h>
33#include <linux/security.h>
34#include <linux/ptrace.h>
35#include <linux/seccomp.h>
36#include <linux/cpuset.h>
37#include <linux/audit.h>
38#include "internal.h"
39
40/*
41 * For hysterical raisins we keep the same inumbers as in the old procfs.
42 * Feel free to change the macro below - just keep the range distinct from
43 * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
44 * As soon as we'll get a separate superblock we will be able to forget
45 * about magical ranges too.
46 */
47
48#define fake_ino(pid,ino) (((pid)<<16)|(ino))
49
50enum pid_directory_inos {
51 PROC_TGID_INO = 2,
52 PROC_TGID_TASK,
53 PROC_TGID_STATUS,
54 PROC_TGID_MEM,
55#ifdef CONFIG_SECCOMP
56 PROC_TGID_SECCOMP,
57#endif
58 PROC_TGID_CWD,
59 PROC_TGID_ROOT,
60 PROC_TGID_EXE,
61 PROC_TGID_FD,
62 PROC_TGID_ENVIRON,
63 PROC_TGID_AUXV,
64 PROC_TGID_CMDLINE,
65 PROC_TGID_STAT,
66 PROC_TGID_STATM,
67 PROC_TGID_MAPS,
68 PROC_TGID_MOUNTS,
69 PROC_TGID_WCHAN,
70#ifdef CONFIG_SCHEDSTATS
71 PROC_TGID_SCHEDSTAT,
72#endif
73#ifdef CONFIG_CPUSETS
74 PROC_TGID_CPUSET,
75#endif
76#ifdef CONFIG_SECURITY
77 PROC_TGID_ATTR,
78 PROC_TGID_ATTR_CURRENT,
79 PROC_TGID_ATTR_PREV,
80 PROC_TGID_ATTR_EXEC,
81 PROC_TGID_ATTR_FSCREATE,
82#endif
83#ifdef CONFIG_AUDITSYSCALL
84 PROC_TGID_LOGINUID,
85#endif
86 PROC_TGID_FD_DIR,
87 PROC_TGID_OOM_SCORE,
88 PROC_TGID_OOM_ADJUST,
89 PROC_TID_INO,
90 PROC_TID_STATUS,
91 PROC_TID_MEM,
92#ifdef CONFIG_SECCOMP
93 PROC_TID_SECCOMP,
94#endif
95 PROC_TID_CWD,
96 PROC_TID_ROOT,
97 PROC_TID_EXE,
98 PROC_TID_FD,
99 PROC_TID_ENVIRON,
100 PROC_TID_AUXV,
101 PROC_TID_CMDLINE,
102 PROC_TID_STAT,
103 PROC_TID_STATM,
104 PROC_TID_MAPS,
105 PROC_TID_MOUNTS,
106 PROC_TID_WCHAN,
107#ifdef CONFIG_SCHEDSTATS
108 PROC_TID_SCHEDSTAT,
109#endif
110#ifdef CONFIG_CPUSETS
111 PROC_TID_CPUSET,
112#endif
113#ifdef CONFIG_SECURITY
114 PROC_TID_ATTR,
115 PROC_TID_ATTR_CURRENT,
116 PROC_TID_ATTR_PREV,
117 PROC_TID_ATTR_EXEC,
118 PROC_TID_ATTR_FSCREATE,
119#endif
120#ifdef CONFIG_AUDITSYSCALL
121 PROC_TID_LOGINUID,
122#endif
123 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */
124 PROC_TID_OOM_SCORE,
125 PROC_TID_OOM_ADJUST,
126};
127
128struct pid_entry {
129 int type;
130 int len;
131 char *name;
132 mode_t mode;
133};
134
135#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
136
137static struct pid_entry tgid_base_stuff[] = {
138 E(PROC_TGID_TASK, "task", S_IFDIR|S_IRUGO|S_IXUGO),
139 E(PROC_TGID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR),
140 E(PROC_TGID_ENVIRON, "environ", S_IFREG|S_IRUSR),
141 E(PROC_TGID_AUXV, "auxv", S_IFREG|S_IRUSR),
142 E(PROC_TGID_STATUS, "status", S_IFREG|S_IRUGO),
143 E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO),
144 E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO),
145 E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO),
146 E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO),
147 E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
148#ifdef CONFIG_SECCOMP
149 E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
150#endif
151 E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO),
152 E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO),
153 E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO),
154 E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
155#ifdef CONFIG_SECURITY
156 E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO),
157#endif
158#ifdef CONFIG_KALLSYMS
159 E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO),
160#endif
161#ifdef CONFIG_SCHEDSTATS
162 E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO),
163#endif
164#ifdef CONFIG_CPUSETS
165 E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO),
166#endif
167 E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
168 E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
169#ifdef CONFIG_AUDITSYSCALL
170 E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
171#endif
172 {0,0,NULL,0}
173};
174static struct pid_entry tid_base_stuff[] = {
175 E(PROC_TID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR),
176 E(PROC_TID_ENVIRON, "environ", S_IFREG|S_IRUSR),
177 E(PROC_TID_AUXV, "auxv", S_IFREG|S_IRUSR),
178 E(PROC_TID_STATUS, "status", S_IFREG|S_IRUGO),
179 E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO),
180 E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO),
181 E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO),
182 E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO),
183 E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
184#ifdef CONFIG_SECCOMP
185 E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
186#endif
187 E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO),
188 E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO),
189 E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO),
190 E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
191#ifdef CONFIG_SECURITY
192 E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO),
193#endif
194#ifdef CONFIG_KALLSYMS
195 E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO),
196#endif
197#ifdef CONFIG_SCHEDSTATS
198 E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO),
199#endif
200#ifdef CONFIG_CPUSETS
201 E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO),
202#endif
203 E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
204 E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
205#ifdef CONFIG_AUDITSYSCALL
206 E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
207#endif
208 {0,0,NULL,0}
209};
210
211#ifdef CONFIG_SECURITY
212static struct pid_entry tgid_attr_stuff[] = {
213 E(PROC_TGID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO),
214 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO),
215 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO),
216 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
217 {0,0,NULL,0}
218};
219static struct pid_entry tid_attr_stuff[] = {
220 E(PROC_TID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO),
221 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO),
222 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO),
223 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
224 {0,0,NULL,0}
225};
226#endif
227
228#undef E
229
230static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
231{
232 struct task_struct *task = proc_task(inode);
233 struct files_struct *files;
234 struct file *file;
235 int fd = proc_type(inode) - PROC_TID_FD_DIR;
236
237 files = get_files_struct(task);
238 if (files) {
239 spin_lock(&files->file_lock);
240 file = fcheck_files(files, fd);
241 if (file) {
242 *mnt = mntget(file->f_vfsmnt);
243 *dentry = dget(file->f_dentry);
244 spin_unlock(&files->file_lock);
245 put_files_struct(files);
246 return 0;
247 }
248 spin_unlock(&files->file_lock);
249 put_files_struct(files);
250 }
251 return -ENOENT;
252}
253
254static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
255{
256 struct fs_struct *fs;
257 int result = -ENOENT;
258 task_lock(proc_task(inode));
259 fs = proc_task(inode)->fs;
260 if(fs)
261 atomic_inc(&fs->count);
262 task_unlock(proc_task(inode));
263 if (fs) {
264 read_lock(&fs->lock);
265 *mnt = mntget(fs->pwdmnt);
266 *dentry = dget(fs->pwd);
267 read_unlock(&fs->lock);
268 result = 0;
269 put_fs_struct(fs);
270 }
271 return result;
272}
273
274static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
275{
276 struct fs_struct *fs;
277 int result = -ENOENT;
278 task_lock(proc_task(inode));
279 fs = proc_task(inode)->fs;
280 if(fs)
281 atomic_inc(&fs->count);
282 task_unlock(proc_task(inode));
283 if (fs) {
284 read_lock(&fs->lock);
285 *mnt = mntget(fs->rootmnt);
286 *dentry = dget(fs->root);
287 read_unlock(&fs->lock);
288 result = 0;
289 put_fs_struct(fs);
290 }
291 return result;
292}
293
294#define MAY_PTRACE(task) \
295 (task == current || \
296 (task->parent == current && \
297 (task->ptrace & PT_PTRACED) && \
298 (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
299 security_ptrace(current,task) == 0))
300
301static int may_ptrace_attach(struct task_struct *task)
302{
303 int retval = 0;
304
305 task_lock(task);
306
307 if (!task->mm)
308 goto out;
309 if (((current->uid != task->euid) ||
310 (current->uid != task->suid) ||
311 (current->uid != task->uid) ||
312 (current->gid != task->egid) ||
313 (current->gid != task->sgid) ||
314 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
315 goto out;
316 rmb();
317 if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
318 goto out;
319 if (security_ptrace(current, task))
320 goto out;
321
322 retval = 1;
323out:
324 task_unlock(task);
325 return retval;
326}
327
328static int proc_pid_environ(struct task_struct *task, char * buffer)
329{
330 int res = 0;
331 struct mm_struct *mm = get_task_mm(task);
332 if (mm) {
333 unsigned int len = mm->env_end - mm->env_start;
334 if (len > PAGE_SIZE)
335 len = PAGE_SIZE;
336 res = access_process_vm(task, mm->env_start, buffer, len, 0);
337 if (!may_ptrace_attach(task))
338 res = -ESRCH;
339 mmput(mm);
340 }
341 return res;
342}
343
344static int proc_pid_cmdline(struct task_struct *task, char * buffer)
345{
346 int res = 0;
347 unsigned int len;
348 struct mm_struct *mm = get_task_mm(task);
349 if (!mm)
350 goto out;
351 if (!mm->arg_end)
352 goto out_mm; /* Shh! No looking before we're done */
353
354 len = mm->arg_end - mm->arg_start;
355
356 if (len > PAGE_SIZE)
357 len = PAGE_SIZE;
358
359 res = access_process_vm(task, mm->arg_start, buffer, len, 0);
360
361 // If the nul at the end of args has been overwritten, then
362 // assume application is using setproctitle(3).
363 if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
364 len = strnlen(buffer, res);
365 if (len < res) {
366 res = len;
367 } else {
368 len = mm->env_end - mm->env_start;
369 if (len > PAGE_SIZE - res)
370 len = PAGE_SIZE - res;
371 res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
372 res = strnlen(buffer, res);
373 }
374 }
375out_mm:
376 mmput(mm);
377out:
378 return res;
379}
380
381static int proc_pid_auxv(struct task_struct *task, char *buffer)
382{
383 int res = 0;
384 struct mm_struct *mm = get_task_mm(task);
385 if (mm) {
386 unsigned int nwords = 0;
387 do
388 nwords += 2;
389 while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
390 res = nwords * sizeof(mm->saved_auxv[0]);
391 if (res > PAGE_SIZE)
392 res = PAGE_SIZE;
393 memcpy(buffer, mm->saved_auxv, res);
394 mmput(mm);
395 }
396 return res;
397}
398
399
400#ifdef CONFIG_KALLSYMS
401/*
402 * Provides a wchan file via kallsyms in a proper one-value-per-file format.
403 * Returns the resolved symbol. If that fails, simply return the address.
404 */
405static int proc_pid_wchan(struct task_struct *task, char *buffer)
406{
407 char *modname;
408 const char *sym_name;
409 unsigned long wchan, size, offset;
410 char namebuf[KSYM_NAME_LEN+1];
411
412 wchan = get_wchan(task);
413
414 sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf);
415 if (sym_name)
416 return sprintf(buffer, "%s", sym_name);
417 return sprintf(buffer, "%lu", wchan);
418}
419#endif /* CONFIG_KALLSYMS */
420
421#ifdef CONFIG_SCHEDSTATS
422/*
423 * Provides /proc/PID/schedstat
424 */
425static int proc_pid_schedstat(struct task_struct *task, char *buffer)
426{
427 return sprintf(buffer, "%lu %lu %lu\n",
428 task->sched_info.cpu_time,
429 task->sched_info.run_delay,
430 task->sched_info.pcnt);
431}
432#endif
433
434/* The badness from the OOM killer */
435unsigned long badness(struct task_struct *p, unsigned long uptime);
436static int proc_oom_score(struct task_struct *task, char *buffer)
437{
438 unsigned long points;
439 struct timespec uptime;
440
441 do_posix_clock_monotonic_gettime(&uptime);
442 points = badness(task, uptime.tv_sec);
443 return sprintf(buffer, "%lu\n", points);
444}
445
446/************************************************************************/
447/* Here the fs part begins */
448/************************************************************************/
449
450/* permission checks */
451
452static int proc_check_root(struct inode *inode)
453{
454 struct dentry *de, *base, *root;
455 struct vfsmount *our_vfsmnt, *vfsmnt, *mnt;
456 int res = 0;
457
458 if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
459 return -ENOENT;
460 read_lock(&current->fs->lock);
461 our_vfsmnt = mntget(current->fs->rootmnt);
462 base = dget(current->fs->root);
463 read_unlock(&current->fs->lock);
464
465 spin_lock(&vfsmount_lock);
466 de = root;
467 mnt = vfsmnt;
468
469 while (vfsmnt != our_vfsmnt) {
470 if (vfsmnt == vfsmnt->mnt_parent)
471 goto out;
472 de = vfsmnt->mnt_mountpoint;
473 vfsmnt = vfsmnt->mnt_parent;
474 }
475
476 if (!is_subdir(de, base))
477 goto out;
478 spin_unlock(&vfsmount_lock);
479
480exit:
481 dput(base);
482 mntput(our_vfsmnt);
483 dput(root);
484 mntput(mnt);
485 return res;
486out:
487 spin_unlock(&vfsmount_lock);
488 res = -EACCES;
489 goto exit;
490}
491
492static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
493{
494 if (generic_permission(inode, mask, NULL) != 0)
495 return -EACCES;
496 return proc_check_root(inode);
497}
498
499extern struct seq_operations proc_pid_maps_op;
500static int maps_open(struct inode *inode, struct file *file)
501{
502 struct task_struct *task = proc_task(inode);
503 int ret = seq_open(file, &proc_pid_maps_op);
504 if (!ret) {
505 struct seq_file *m = file->private_data;
506 m->private = task;
507 }
508 return ret;
509}
510
511static struct file_operations proc_maps_operations = {
512 .open = maps_open,
513 .read = seq_read,
514 .llseek = seq_lseek,
515 .release = seq_release,
516};
517
518extern struct seq_operations mounts_op;
519static int mounts_open(struct inode *inode, struct file *file)
520{
521 struct task_struct *task = proc_task(inode);
522 int ret = seq_open(file, &mounts_op);
523
524 if (!ret) {
525 struct seq_file *m = file->private_data;
526 struct namespace *namespace;
527 task_lock(task);
528 namespace = task->namespace;
529 if (namespace)
530 get_namespace(namespace);
531 task_unlock(task);
532
533 if (namespace)
534 m->private = namespace;
535 else {
536 seq_release(inode, file);
537 ret = -EINVAL;
538 }
539 }
540 return ret;
541}
542
543static int mounts_release(struct inode *inode, struct file *file)
544{
545 struct seq_file *m = file->private_data;
546 struct namespace *namespace = m->private;
547 put_namespace(namespace);
548 return seq_release(inode, file);
549}
550
551static struct file_operations proc_mounts_operations = {
552 .open = mounts_open,
553 .read = seq_read,
554 .llseek = seq_lseek,
555 .release = mounts_release,
556};
557
558#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
559
560static ssize_t proc_info_read(struct file * file, char __user * buf,
561 size_t count, loff_t *ppos)
562{
563 struct inode * inode = file->f_dentry->d_inode;
564 unsigned long page;
565 ssize_t length;
566 struct task_struct *task = proc_task(inode);
567
568 if (count > PROC_BLOCK_SIZE)
569 count = PROC_BLOCK_SIZE;
570 if (!(page = __get_free_page(GFP_KERNEL)))
571 return -ENOMEM;
572
573 length = PROC_I(inode)->op.proc_read(task, (char*)page);
574
575 if (length >= 0)
576 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
577 free_page(page);
578 return length;
579}
580
581static struct file_operations proc_info_file_operations = {
582 .read = proc_info_read,
583};
584
585static int mem_open(struct inode* inode, struct file* file)
586{
587 file->private_data = (void*)((long)current->self_exec_id);
588 return 0;
589}
590
591static ssize_t mem_read(struct file * file, char __user * buf,
592 size_t count, loff_t *ppos)
593{
594 struct task_struct *task = proc_task(file->f_dentry->d_inode);
595 char *page;
596 unsigned long src = *ppos;
597 int ret = -ESRCH;
598 struct mm_struct *mm;
599
600 if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
601 goto out;
602
603 ret = -ENOMEM;
604 page = (char *)__get_free_page(GFP_USER);
605 if (!page)
606 goto out;
607
608 ret = 0;
609
610 mm = get_task_mm(task);
611 if (!mm)
612 goto out_free;
613
614 ret = -EIO;
615
616 if (file->private_data != (void*)((long)current->self_exec_id))
617 goto out_put;
618
619 ret = 0;
620
621 while (count > 0) {
622 int this_len, retval;
623
624 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
625 retval = access_process_vm(task, src, page, this_len, 0);
626 if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) {
627 if (!ret)
628 ret = -EIO;
629 break;
630 }
631
632 if (copy_to_user(buf, page, retval)) {
633 ret = -EFAULT;
634 break;
635 }
636
637 ret += retval;
638 src += retval;
639 buf += retval;
640 count -= retval;
641 }
642 *ppos = src;
643
644out_put:
645 mmput(mm);
646out_free:
647 free_page((unsigned long) page);
648out:
649 return ret;
650}
651
652#define mem_write NULL
653
654#ifndef mem_write
655/* This is a security hazard */
656static ssize_t mem_write(struct file * file, const char * buf,
657 size_t count, loff_t *ppos)
658{
659 int copied = 0;
660 char *page;
661 struct task_struct *task = proc_task(file->f_dentry->d_inode);
662 unsigned long dst = *ppos;
663
664 if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
665 return -ESRCH;
666
667 page = (char *)__get_free_page(GFP_USER);
668 if (!page)
669 return -ENOMEM;
670
671 while (count > 0) {
672 int this_len, retval;
673
674 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
675 if (copy_from_user(page, buf, this_len)) {
676 copied = -EFAULT;
677 break;
678 }
679 retval = access_process_vm(task, dst, page, this_len, 1);
680 if (!retval) {
681 if (!copied)
682 copied = -EIO;
683 break;
684 }
685 copied += retval;
686 buf += retval;
687 dst += retval;
688 count -= retval;
689 }
690 *ppos = dst;
691 free_page((unsigned long) page);
692 return copied;
693}
694#endif
695
696static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
697{
698 switch (orig) {
699 case 0:
700 file->f_pos = offset;
701 break;
702 case 1:
703 file->f_pos += offset;
704 break;
705 default:
706 return -EINVAL;
707 }
708 force_successful_syscall_return();
709 return file->f_pos;
710}
711
712static struct file_operations proc_mem_operations = {
713 .llseek = mem_lseek,
714 .read = mem_read,
715 .write = mem_write,
716 .open = mem_open,
717};
718
719static ssize_t oom_adjust_read(struct file *file, char __user *buf,
720 size_t count, loff_t *ppos)
721{
722 struct task_struct *task = proc_task(file->f_dentry->d_inode);
723 char buffer[8];
724 size_t len;
725 int oom_adjust = task->oomkilladj;
726 loff_t __ppos = *ppos;
727
728 len = sprintf(buffer, "%i\n", oom_adjust);
729 if (__ppos >= len)
730 return 0;
731 if (count > len-__ppos)
732 count = len-__ppos;
733 if (copy_to_user(buf, buffer + __ppos, count))
734 return -EFAULT;
735 *ppos = __ppos + count;
736 return count;
737}
738
739static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
740 size_t count, loff_t *ppos)
741{
742 struct task_struct *task = proc_task(file->f_dentry->d_inode);
743 char buffer[8], *end;
744 int oom_adjust;
745
746 if (!capable(CAP_SYS_RESOURCE))
747 return -EPERM;
748 memset(buffer, 0, 8);
749 if (count > 6)
750 count = 6;
751 if (copy_from_user(buffer, buf, count))
752 return -EFAULT;
753 oom_adjust = simple_strtol(buffer, &end, 0);
754 if (oom_adjust < -16 || oom_adjust > 15)
755 return -EINVAL;
756 if (*end == '\n')
757 end++;
758 task->oomkilladj = oom_adjust;
759 if (end - buffer == 0)
760 return -EIO;
761 return end - buffer;
762}
763
764static struct file_operations proc_oom_adjust_operations = {
765 .read = oom_adjust_read,
766 .write = oom_adjust_write,
767};
768
769static struct inode_operations proc_mem_inode_operations = {
770 .permission = proc_permission,
771};
772
773#ifdef CONFIG_AUDITSYSCALL
774#define TMPBUFLEN 21
775static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
776 size_t count, loff_t *ppos)
777{
778 struct inode * inode = file->f_dentry->d_inode;
779 struct task_struct *task = proc_task(inode);
780 ssize_t length;
781 char tmpbuf[TMPBUFLEN];
782
783 length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
784 audit_get_loginuid(task->audit_context));
785 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
786}
787
788static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
789 size_t count, loff_t *ppos)
790{
791 struct inode * inode = file->f_dentry->d_inode;
792 char *page, *tmp;
793 ssize_t length;
794 struct task_struct *task = proc_task(inode);
795 uid_t loginuid;
796
797 if (!capable(CAP_AUDIT_CONTROL))
798 return -EPERM;
799
800 if (current != task)
801 return -EPERM;
802
803 if (count > PAGE_SIZE)
804 count = PAGE_SIZE;
805
806 if (*ppos != 0) {
807 /* No partial writes. */
808 return -EINVAL;
809 }
810 page = (char*)__get_free_page(GFP_USER);
811 if (!page)
812 return -ENOMEM;
813 length = -EFAULT;
814 if (copy_from_user(page, buf, count))
815 goto out_free_page;
816
817 loginuid = simple_strtoul(page, &tmp, 10);
818 if (tmp == page) {
819 length = -EINVAL;
820 goto out_free_page;
821
822 }
823 length = audit_set_loginuid(task->audit_context, loginuid);
824 if (likely(length == 0))
825 length = count;
826
827out_free_page:
828 free_page((unsigned long) page);
829 return length;
830}
831
832static struct file_operations proc_loginuid_operations = {
833 .read = proc_loginuid_read,
834 .write = proc_loginuid_write,
835};
836#endif
837
838#ifdef CONFIG_SECCOMP
839static ssize_t seccomp_read(struct file *file, char __user *buf,
840 size_t count, loff_t *ppos)
841{
842 struct task_struct *tsk = proc_task(file->f_dentry->d_inode);
843 char __buf[20];
844 loff_t __ppos = *ppos;
845 size_t len;
846
847 /* no need to print the trailing zero, so use only len */
848 len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
849 if (__ppos >= len)
850 return 0;
851 if (count > len - __ppos)
852 count = len - __ppos;
853 if (copy_to_user(buf, __buf + __ppos, count))
854 return -EFAULT;
855 *ppos = __ppos + count;
856 return count;
857}
858
859static ssize_t seccomp_write(struct file *file, const char __user *buf,
860 size_t count, loff_t *ppos)
861{
862 struct task_struct *tsk = proc_task(file->f_dentry->d_inode);
863 char __buf[20], *end;
864 unsigned int seccomp_mode;
865
866 /* can set it only once to be even more secure */
867 if (unlikely(tsk->seccomp.mode))
868 return -EPERM;
869
870 memset(__buf, 0, sizeof(__buf));
871 count = min(count, sizeof(__buf) - 1);
872 if (copy_from_user(__buf, buf, count))
873 return -EFAULT;
874 seccomp_mode = simple_strtoul(__buf, &end, 0);
875 if (*end == '\n')
876 end++;
877 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
878 tsk->seccomp.mode = seccomp_mode;
879 set_tsk_thread_flag(tsk, TIF_SECCOMP);
880 } else
881 return -EINVAL;
882 if (unlikely(!(end - __buf)))
883 return -EIO;
884 return end - __buf;
885}
886
887static struct file_operations proc_seccomp_operations = {
888 .read = seccomp_read,
889 .write = seccomp_write,
890};
891#endif /* CONFIG_SECCOMP */
892
893static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
894{
895 struct inode *inode = dentry->d_inode;
896 int error = -EACCES;
897
898 /* We don't need a base pointer in the /proc filesystem */
899 path_release(nd);
900
901 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
902 goto out;
903 error = proc_check_root(inode);
904 if (error)
905 goto out;
906
907 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
908 nd->last_type = LAST_BIND;
909out:
910 return error;
911}
912
913static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
914 char __user *buffer, int buflen)
915{
916 struct inode * inode;
917 char *tmp = (char*)__get_free_page(GFP_KERNEL), *path;
918 int len;
919
920 if (!tmp)
921 return -ENOMEM;
922
923 inode = dentry->d_inode;
924 path = d_path(dentry, mnt, tmp, PAGE_SIZE);
925 len = PTR_ERR(path);
926 if (IS_ERR(path))
927 goto out;
928 len = tmp + PAGE_SIZE - 1 - path;
929
930 if (len > buflen)
931 len = buflen;
932 if (copy_to_user(buffer, path, len))
933 len = -EFAULT;
934 out:
935 free_page((unsigned long)tmp);
936 return len;
937}
938
939static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
940{
941 int error = -EACCES;
942 struct inode *inode = dentry->d_inode;
943 struct dentry *de;
944 struct vfsmount *mnt = NULL;
945
946 lock_kernel();
947
948 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
949 goto out;
950 error = proc_check_root(inode);
951 if (error)
952 goto out;
953
954 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt);
955 if (error)
956 goto out;
957
958 error = do_proc_readlink(de, mnt, buffer, buflen);
959 dput(de);
960 mntput(mnt);
961out:
962 unlock_kernel();
963 return error;
964}
965
966static struct inode_operations proc_pid_link_inode_operations = {
967 .readlink = proc_pid_readlink,
968 .follow_link = proc_pid_follow_link
969};
970
971#define NUMBUF 10
972
973static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
974{
975 struct inode *inode = filp->f_dentry->d_inode;
976 struct task_struct *p = proc_task(inode);
977 unsigned int fd, tid, ino;
978 int retval;
979 char buf[NUMBUF];
980 struct files_struct * files;
981
982 retval = -ENOENT;
983 if (!pid_alive(p))
984 goto out;
985 retval = 0;
986 tid = p->pid;
987
988 fd = filp->f_pos;
989 switch (fd) {
990 case 0:
991 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
992 goto out;
993 filp->f_pos++;
994 case 1:
995 ino = fake_ino(tid, PROC_TID_INO);
996 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
997 goto out;
998 filp->f_pos++;
999 default:
1000 files = get_files_struct(p);
1001 if (!files)
1002 goto out;
1003 spin_lock(&files->file_lock);
1004 for (fd = filp->f_pos-2;
1005 fd < files->max_fds;
1006 fd++, filp->f_pos++) {
1007 unsigned int i,j;
1008
1009 if (!fcheck_files(files, fd))
1010 continue;
1011 spin_unlock(&files->file_lock);
1012
1013 j = NUMBUF;
1014 i = fd;
1015 do {
1016 j--;
1017 buf[j] = '0' + (i % 10);
1018 i /= 10;
1019 } while (i);
1020
1021 ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
1022 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
1023 spin_lock(&files->file_lock);
1024 break;
1025 }
1026 spin_lock(&files->file_lock);
1027 }
1028 spin_unlock(&files->file_lock);
1029 put_files_struct(files);
1030 }
1031out:
1032 return retval;
1033}
1034
1035static int proc_pident_readdir(struct file *filp,
1036 void *dirent, filldir_t filldir,
1037 struct pid_entry *ents, unsigned int nents)
1038{
1039 int i;
1040 int pid;
1041 struct dentry *dentry = filp->f_dentry;
1042 struct inode *inode = dentry->d_inode;
1043 struct pid_entry *p;
1044 ino_t ino;
1045 int ret;
1046
1047 ret = -ENOENT;
1048 if (!pid_alive(proc_task(inode)))
1049 goto out;
1050
1051 ret = 0;
1052 pid = proc_task(inode)->pid;
1053 i = filp->f_pos;
1054 switch (i) {
1055 case 0:
1056 ino = inode->i_ino;
1057 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
1058 goto out;
1059 i++;
1060 filp->f_pos++;
1061 /* fall through */
1062 case 1:
1063 ino = parent_ino(dentry);
1064 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
1065 goto out;
1066 i++;
1067 filp->f_pos++;
1068 /* fall through */
1069 default:
1070 i -= 2;
1071 if (i >= nents) {
1072 ret = 1;
1073 goto out;
1074 }
1075 p = ents + i;
1076 while (p->name) {
1077 if (filldir(dirent, p->name, p->len, filp->f_pos,
1078 fake_ino(pid, p->type), p->mode >> 12) < 0)
1079 goto out;
1080 filp->f_pos++;
1081 p++;
1082 }
1083 }
1084
1085 ret = 1;
1086out:
1087 return ret;
1088}
1089
1090static int proc_tgid_base_readdir(struct file * filp,
1091 void * dirent, filldir_t filldir)
1092{
1093 return proc_pident_readdir(filp,dirent,filldir,
1094 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
1095}
1096
1097static int proc_tid_base_readdir(struct file * filp,
1098 void * dirent, filldir_t filldir)
1099{
1100 return proc_pident_readdir(filp,dirent,filldir,
1101 tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
1102}
1103
1104/* building an inode */
1105
1106static int task_dumpable(struct task_struct *task)
1107{
1108 int dumpable = 0;
1109 struct mm_struct *mm;
1110
1111 task_lock(task);
1112 mm = task->mm;
1113 if (mm)
1114 dumpable = mm->dumpable;
1115 task_unlock(task);
1116 return dumpable;
1117}
1118
1119
1120static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
1121{
1122 struct inode * inode;
1123 struct proc_inode *ei;
1124
1125 /* We need a new inode */
1126
1127 inode = new_inode(sb);
1128 if (!inode)
1129 goto out;
1130
1131 /* Common stuff */
1132 ei = PROC_I(inode);
1133 ei->task = NULL;
1134 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1135 inode->i_ino = fake_ino(task->pid, ino);
1136
1137 if (!pid_alive(task))
1138 goto out_unlock;
1139
1140 /*
1141 * grab the reference to task.
1142 */
1143 get_task_struct(task);
1144 ei->task = task;
1145 ei->type = ino;
1146 inode->i_uid = 0;
1147 inode->i_gid = 0;
1148 if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) {
1149 inode->i_uid = task->euid;
1150 inode->i_gid = task->egid;
1151 }
1152 security_task_to_inode(task, inode);
1153
1154out:
1155 return inode;
1156
1157out_unlock:
1158 ei->pde = NULL;
1159 iput(inode);
1160 return NULL;
1161}
1162
1163/* dentry stuff */
1164
1165/*
1166 * Exceptional case: normally we are not allowed to unhash a busy
1167 * directory. In this case, however, we can do it - no aliasing problems
1168 * due to the way we treat inodes.
1169 *
1170 * Rewrite the inode's ownerships here because the owning task may have
1171 * performed a setuid(), etc.
1172 */
1173static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1174{
1175 struct inode *inode = dentry->d_inode;
1176 struct task_struct *task = proc_task(inode);
1177 if (pid_alive(task)) {
1178 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) {
1179 inode->i_uid = task->euid;
1180 inode->i_gid = task->egid;
1181 } else {
1182 inode->i_uid = 0;
1183 inode->i_gid = 0;
1184 }
1185 security_task_to_inode(task, inode);
1186 return 1;
1187 }
1188 d_drop(dentry);
1189 return 0;
1190}
1191
1192static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1193{
1194 struct inode *inode = dentry->d_inode;
1195 struct task_struct *task = proc_task(inode);
1196 int fd = proc_type(inode) - PROC_TID_FD_DIR;
1197 struct files_struct *files;
1198
1199 files = get_files_struct(task);
1200 if (files) {
1201 spin_lock(&files->file_lock);
1202 if (fcheck_files(files, fd)) {
1203 spin_unlock(&files->file_lock);
1204 put_files_struct(files);
1205 if (task_dumpable(task)) {
1206 inode->i_uid = task->euid;
1207 inode->i_gid = task->egid;
1208 } else {
1209 inode->i_uid = 0;
1210 inode->i_gid = 0;
1211 }
1212 security_task_to_inode(task, inode);
1213 return 1;
1214 }
1215 spin_unlock(&files->file_lock);
1216 put_files_struct(files);
1217 }
1218 d_drop(dentry);
1219 return 0;
1220}
1221
1222static void pid_base_iput(struct dentry *dentry, struct inode *inode)
1223{
1224 struct task_struct *task = proc_task(inode);
1225 spin_lock(&task->proc_lock);
1226 if (task->proc_dentry == dentry)
1227 task->proc_dentry = NULL;
1228 spin_unlock(&task->proc_lock);
1229 iput(inode);
1230}
1231
1232static int pid_delete_dentry(struct dentry * dentry)
1233{
1234 /* Is the task we represent dead?
1235 * If so, then don't put the dentry on the lru list,
1236 * kill it immediately.
1237 */
1238 return !pid_alive(proc_task(dentry->d_inode));
1239}
1240
1241static struct dentry_operations tid_fd_dentry_operations =
1242{
1243 .d_revalidate = tid_fd_revalidate,
1244 .d_delete = pid_delete_dentry,
1245};
1246
1247static struct dentry_operations pid_dentry_operations =
1248{
1249 .d_revalidate = pid_revalidate,
1250 .d_delete = pid_delete_dentry,
1251};
1252
1253static struct dentry_operations pid_base_dentry_operations =
1254{
1255 .d_revalidate = pid_revalidate,
1256 .d_iput = pid_base_iput,
1257 .d_delete = pid_delete_dentry,
1258};
1259
1260/* Lookups */
1261
1262static unsigned name_to_int(struct dentry *dentry)
1263{
1264 const char *name = dentry->d_name.name;
1265 int len = dentry->d_name.len;
1266 unsigned n = 0;
1267
1268 if (len > 1 && *name == '0')
1269 goto out;
1270 while (len-- > 0) {
1271 unsigned c = *name++ - '0';
1272 if (c > 9)
1273 goto out;
1274 if (n >= (~0U-9)/10)
1275 goto out;
1276 n *= 10;
1277 n += c;
1278 }
1279 return n;
1280out:
1281 return ~0U;
1282}
1283
1284/* SMP-safe */
1285static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
1286{
1287 struct task_struct *task = proc_task(dir);
1288 unsigned fd = name_to_int(dentry);
1289 struct file * file;
1290 struct files_struct * files;
1291 struct inode *inode;
1292 struct proc_inode *ei;
1293
1294 if (fd == ~0U)
1295 goto out;
1296 if (!pid_alive(task))
1297 goto out;
1298
1299 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd);
1300 if (!inode)
1301 goto out;
1302 ei = PROC_I(inode);
1303 files = get_files_struct(task);
1304 if (!files)
1305 goto out_unlock;
1306 inode->i_mode = S_IFLNK;
1307 spin_lock(&files->file_lock);
1308 file = fcheck_files(files, fd);
1309 if (!file)
1310 goto out_unlock2;
1311 if (file->f_mode & 1)
1312 inode->i_mode |= S_IRUSR | S_IXUSR;
1313 if (file->f_mode & 2)
1314 inode->i_mode |= S_IWUSR | S_IXUSR;
1315 spin_unlock(&files->file_lock);
1316 put_files_struct(files);
1317 inode->i_op = &proc_pid_link_inode_operations;
1318 inode->i_size = 64;
1319 ei->op.proc_get_link = proc_fd_link;
1320 dentry->d_op = &tid_fd_dentry_operations;
1321 d_add(dentry, inode);
1322 return NULL;
1323
1324out_unlock2:
1325 spin_unlock(&files->file_lock);
1326 put_files_struct(files);
1327out_unlock:
1328 iput(inode);
1329out:
1330 return ERR_PTR(-ENOENT);
1331}
1332
1333static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir);
1334static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd);
1335
1336static struct file_operations proc_fd_operations = {
1337 .read = generic_read_dir,
1338 .readdir = proc_readfd,
1339};
1340
1341static struct file_operations proc_task_operations = {
1342 .read = generic_read_dir,
1343 .readdir = proc_task_readdir,
1344};
1345
1346/*
1347 * proc directories can do almost nothing..
1348 */
1349static struct inode_operations proc_fd_inode_operations = {
1350 .lookup = proc_lookupfd,
1351 .permission = proc_permission,
1352};
1353
1354static struct inode_operations proc_task_inode_operations = {
1355 .lookup = proc_task_lookup,
1356 .permission = proc_permission,
1357};
1358
1359#ifdef CONFIG_SECURITY
1360static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1361 size_t count, loff_t *ppos)
1362{
1363 struct inode * inode = file->f_dentry->d_inode;
1364 unsigned long page;
1365 ssize_t length;
1366 struct task_struct *task = proc_task(inode);
1367
1368 if (count > PAGE_SIZE)
1369 count = PAGE_SIZE;
1370 if (!(page = __get_free_page(GFP_KERNEL)))
1371 return -ENOMEM;
1372
1373 length = security_getprocattr(task,
1374 (char*)file->f_dentry->d_name.name,
1375 (void*)page, count);
1376 if (length >= 0)
1377 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
1378 free_page(page);
1379 return length;
1380}
1381
1382static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
1383 size_t count, loff_t *ppos)
1384{
1385 struct inode * inode = file->f_dentry->d_inode;
1386 char *page;
1387 ssize_t length;
1388 struct task_struct *task = proc_task(inode);
1389
1390 if (count > PAGE_SIZE)
1391 count = PAGE_SIZE;
1392 if (*ppos != 0) {
1393 /* No partial writes. */
1394 return -EINVAL;
1395 }
1396 page = (char*)__get_free_page(GFP_USER);
1397 if (!page)
1398 return -ENOMEM;
1399 length = -EFAULT;
1400 if (copy_from_user(page, buf, count))
1401 goto out;
1402
1403 length = security_setprocattr(task,
1404 (char*)file->f_dentry->d_name.name,
1405 (void*)page, count);
1406out:
1407 free_page((unsigned long) page);
1408 return length;
1409}
1410
1411static struct file_operations proc_pid_attr_operations = {
1412 .read = proc_pid_attr_read,
1413 .write = proc_pid_attr_write,
1414};
1415
1416static struct file_operations proc_tid_attr_operations;
1417static struct inode_operations proc_tid_attr_inode_operations;
1418static struct file_operations proc_tgid_attr_operations;
1419static struct inode_operations proc_tgid_attr_inode_operations;
1420#endif
1421
1422/* SMP-safe */
1423static struct dentry *proc_pident_lookup(struct inode *dir,
1424 struct dentry *dentry,
1425 struct pid_entry *ents)
1426{
1427 struct inode *inode;
1428 int error;
1429 struct task_struct *task = proc_task(dir);
1430 struct pid_entry *p;
1431 struct proc_inode *ei;
1432
1433 error = -ENOENT;
1434 inode = NULL;
1435
1436 if (!pid_alive(task))
1437 goto out;
1438
1439 for (p = ents; p->name; p++) {
1440 if (p->len != dentry->d_name.len)
1441 continue;
1442 if (!memcmp(dentry->d_name.name, p->name, p->len))
1443 break;
1444 }
1445 if (!p->name)
1446 goto out;
1447
1448 error = -EINVAL;
1449 inode = proc_pid_make_inode(dir->i_sb, task, p->type);
1450 if (!inode)
1451 goto out;
1452
1453 ei = PROC_I(inode);
1454 inode->i_mode = p->mode;
1455 /*
1456 * Yes, it does not scale. And it should not. Don't add
1457 * new entries into /proc/<tgid>/ without very good reasons.
1458 */
1459 switch(p->type) {
1460 case PROC_TGID_TASK:
1461 inode->i_nlink = 3;
1462 inode->i_op = &proc_task_inode_operations;
1463 inode->i_fop = &proc_task_operations;
1464 break;
1465 case PROC_TID_FD:
1466 case PROC_TGID_FD:
1467 inode->i_nlink = 2;
1468 inode->i_op = &proc_fd_inode_operations;
1469 inode->i_fop = &proc_fd_operations;
1470 break;
1471 case PROC_TID_EXE:
1472 case PROC_TGID_EXE:
1473 inode->i_op = &proc_pid_link_inode_operations;
1474 ei->op.proc_get_link = proc_exe_link;
1475 break;
1476 case PROC_TID_CWD:
1477 case PROC_TGID_CWD:
1478 inode->i_op = &proc_pid_link_inode_operations;
1479 ei->op.proc_get_link = proc_cwd_link;
1480 break;
1481 case PROC_TID_ROOT:
1482 case PROC_TGID_ROOT:
1483 inode->i_op = &proc_pid_link_inode_operations;
1484 ei->op.proc_get_link = proc_root_link;
1485 break;
1486 case PROC_TID_ENVIRON:
1487 case PROC_TGID_ENVIRON:
1488 inode->i_fop = &proc_info_file_operations;
1489 ei->op.proc_read = proc_pid_environ;
1490 break;
1491 case PROC_TID_AUXV:
1492 case PROC_TGID_AUXV:
1493 inode->i_fop = &proc_info_file_operations;
1494 ei->op.proc_read = proc_pid_auxv;
1495 break;
1496 case PROC_TID_STATUS:
1497 case PROC_TGID_STATUS:
1498 inode->i_fop = &proc_info_file_operations;
1499 ei->op.proc_read = proc_pid_status;
1500 break;
1501 case PROC_TID_STAT:
1502 inode->i_fop = &proc_info_file_operations;
1503 ei->op.proc_read = proc_tid_stat;
1504 break;
1505 case PROC_TGID_STAT:
1506 inode->i_fop = &proc_info_file_operations;
1507 ei->op.proc_read = proc_tgid_stat;
1508 break;
1509 case PROC_TID_CMDLINE:
1510 case PROC_TGID_CMDLINE:
1511 inode->i_fop = &proc_info_file_operations;
1512 ei->op.proc_read = proc_pid_cmdline;
1513 break;
1514 case PROC_TID_STATM:
1515 case PROC_TGID_STATM:
1516 inode->i_fop = &proc_info_file_operations;
1517 ei->op.proc_read = proc_pid_statm;
1518 break;
1519 case PROC_TID_MAPS:
1520 case PROC_TGID_MAPS:
1521 inode->i_fop = &proc_maps_operations;
1522 break;
1523 case PROC_TID_MEM:
1524 case PROC_TGID_MEM:
1525 inode->i_op = &proc_mem_inode_operations;
1526 inode->i_fop = &proc_mem_operations;
1527 break;
1528#ifdef CONFIG_SECCOMP
1529 case PROC_TID_SECCOMP:
1530 case PROC_TGID_SECCOMP:
1531 inode->i_fop = &proc_seccomp_operations;
1532 break;
1533#endif /* CONFIG_SECCOMP */
1534 case PROC_TID_MOUNTS:
1535 case PROC_TGID_MOUNTS:
1536 inode->i_fop = &proc_mounts_operations;
1537 break;
1538#ifdef CONFIG_SECURITY
1539 case PROC_TID_ATTR:
1540 inode->i_nlink = 2;
1541 inode->i_op = &proc_tid_attr_inode_operations;
1542 inode->i_fop = &proc_tid_attr_operations;
1543 break;
1544 case PROC_TGID_ATTR:
1545 inode->i_nlink = 2;
1546 inode->i_op = &proc_tgid_attr_inode_operations;
1547 inode->i_fop = &proc_tgid_attr_operations;
1548 break;
1549 case PROC_TID_ATTR_CURRENT:
1550 case PROC_TGID_ATTR_CURRENT:
1551 case PROC_TID_ATTR_PREV:
1552 case PROC_TGID_ATTR_PREV:
1553 case PROC_TID_ATTR_EXEC:
1554 case PROC_TGID_ATTR_EXEC:
1555 case PROC_TID_ATTR_FSCREATE:
1556 case PROC_TGID_ATTR_FSCREATE:
1557 inode->i_fop = &proc_pid_attr_operations;
1558 break;
1559#endif
1560#ifdef CONFIG_KALLSYMS
1561 case PROC_TID_WCHAN:
1562 case PROC_TGID_WCHAN:
1563 inode->i_fop = &proc_info_file_operations;
1564 ei->op.proc_read = proc_pid_wchan;
1565 break;
1566#endif
1567#ifdef CONFIG_SCHEDSTATS
1568 case PROC_TID_SCHEDSTAT:
1569 case PROC_TGID_SCHEDSTAT:
1570 inode->i_fop = &proc_info_file_operations;
1571 ei->op.proc_read = proc_pid_schedstat;
1572 break;
1573#endif
1574#ifdef CONFIG_CPUSETS
1575 case PROC_TID_CPUSET:
1576 case PROC_TGID_CPUSET:
1577 inode->i_fop = &proc_cpuset_operations;
1578 break;
1579#endif
1580 case PROC_TID_OOM_SCORE:
1581 case PROC_TGID_OOM_SCORE:
1582 inode->i_fop = &proc_info_file_operations;
1583 ei->op.proc_read = proc_oom_score;
1584 break;
1585 case PROC_TID_OOM_ADJUST:
1586 case PROC_TGID_OOM_ADJUST:
1587 inode->i_fop = &proc_oom_adjust_operations;
1588 break;
1589#ifdef CONFIG_AUDITSYSCALL
1590 case PROC_TID_LOGINUID:
1591 case PROC_TGID_LOGINUID:
1592 inode->i_fop = &proc_loginuid_operations;
1593 break;
1594#endif
1595 default:
1596 printk("procfs: impossible type (%d)",p->type);
1597 iput(inode);
1598 return ERR_PTR(-EINVAL);
1599 }
1600 dentry->d_op = &pid_dentry_operations;
1601 d_add(dentry, inode);
1602 return NULL;
1603
1604out:
1605 return ERR_PTR(error);
1606}
1607
1608static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
1609 return proc_pident_lookup(dir, dentry, tgid_base_stuff);
1610}
1611
1612static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
1613 return proc_pident_lookup(dir, dentry, tid_base_stuff);
1614}
1615
1616static struct file_operations proc_tgid_base_operations = {
1617 .read = generic_read_dir,
1618 .readdir = proc_tgid_base_readdir,
1619};
1620
1621static struct file_operations proc_tid_base_operations = {
1622 .read = generic_read_dir,
1623 .readdir = proc_tid_base_readdir,
1624};
1625
1626static struct inode_operations proc_tgid_base_inode_operations = {
1627 .lookup = proc_tgid_base_lookup,
1628};
1629
1630static struct inode_operations proc_tid_base_inode_operations = {
1631 .lookup = proc_tid_base_lookup,
1632};
1633
1634#ifdef CONFIG_SECURITY
1635static int proc_tgid_attr_readdir(struct file * filp,
1636 void * dirent, filldir_t filldir)
1637{
1638 return proc_pident_readdir(filp,dirent,filldir,
1639 tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff));
1640}
1641
1642static int proc_tid_attr_readdir(struct file * filp,
1643 void * dirent, filldir_t filldir)
1644{
1645 return proc_pident_readdir(filp,dirent,filldir,
1646 tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff));
1647}
1648
1649static struct file_operations proc_tgid_attr_operations = {
1650 .read = generic_read_dir,
1651 .readdir = proc_tgid_attr_readdir,
1652};
1653
1654static struct file_operations proc_tid_attr_operations = {
1655 .read = generic_read_dir,
1656 .readdir = proc_tid_attr_readdir,
1657};
1658
1659static struct dentry *proc_tgid_attr_lookup(struct inode *dir,
1660 struct dentry *dentry, struct nameidata *nd)
1661{
1662 return proc_pident_lookup(dir, dentry, tgid_attr_stuff);
1663}
1664
1665static struct dentry *proc_tid_attr_lookup(struct inode *dir,
1666 struct dentry *dentry, struct nameidata *nd)
1667{
1668 return proc_pident_lookup(dir, dentry, tid_attr_stuff);
1669}
1670
1671static struct inode_operations proc_tgid_attr_inode_operations = {
1672 .lookup = proc_tgid_attr_lookup,
1673};
1674
1675static struct inode_operations proc_tid_attr_inode_operations = {
1676 .lookup = proc_tid_attr_lookup,
1677};
1678#endif
1679
1680/*
1681 * /proc/self:
1682 */
1683static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
1684 int buflen)
1685{
1686 char tmp[30];
1687 sprintf(tmp, "%d", current->tgid);
1688 return vfs_readlink(dentry,buffer,buflen,tmp);
1689}
1690
1691static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1692{
1693 char tmp[30];
1694 sprintf(tmp, "%d", current->tgid);
1695 return vfs_follow_link(nd,tmp);
1696}
1697
1698static struct inode_operations proc_self_inode_operations = {
1699 .readlink = proc_self_readlink,
1700 .follow_link = proc_self_follow_link,
1701};
1702
1703/**
1704 * proc_pid_unhash - Unhash /proc/<pid> entry from the dcache.
1705 * @p: task that should be flushed.
1706 *
1707 * Drops the /proc/<pid> dcache entry from the hash chains.
1708 *
1709 * Dropping /proc/<pid> entries and detach_pid must be synchroneous,
1710 * otherwise e.g. /proc/<pid>/exe might point to the wrong executable,
1711 * if the pid value is immediately reused. This is enforced by
1712 * - caller must acquire spin_lock(p->proc_lock)
1713 * - must be called before detach_pid()
1714 * - proc_pid_lookup acquires proc_lock, and checks that
1715 * the target is not dead by looking at the attach count
1716 * of PIDTYPE_PID.
1717 */
1718
1719struct dentry *proc_pid_unhash(struct task_struct *p)
1720{
1721 struct dentry *proc_dentry;
1722
1723 proc_dentry = p->proc_dentry;
1724 if (proc_dentry != NULL) {
1725
1726 spin_lock(&dcache_lock);
1727 spin_lock(&proc_dentry->d_lock);
1728 if (!d_unhashed(proc_dentry)) {
1729 dget_locked(proc_dentry);
1730 __d_drop(proc_dentry);
1731 spin_unlock(&proc_dentry->d_lock);
1732 } else {
1733 spin_unlock(&proc_dentry->d_lock);
1734 proc_dentry = NULL;
1735 }
1736 spin_unlock(&dcache_lock);
1737 }
1738 return proc_dentry;
1739}
1740
1741/**
1742 * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries
1743 * @proc_entry: directoy to prune.
1744 *
1745 * Shrink the /proc directory that was used by the just killed thread.
1746 */
1747
1748void proc_pid_flush(struct dentry *proc_dentry)
1749{
1750 might_sleep();
1751 if(proc_dentry != NULL) {
1752 shrink_dcache_parent(proc_dentry);
1753 dput(proc_dentry);
1754 }
1755}
1756
1757/* SMP-safe */
1758struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
1759{
1760 struct task_struct *task;
1761 struct inode *inode;
1762 struct proc_inode *ei;
1763 unsigned tgid;
1764 int died;
1765
1766 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
1767 inode = new_inode(dir->i_sb);
1768 if (!inode)
1769 return ERR_PTR(-ENOMEM);
1770 ei = PROC_I(inode);
1771 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1772 inode->i_ino = fake_ino(0, PROC_TGID_INO);
1773 ei->pde = NULL;
1774 inode->i_mode = S_IFLNK|S_IRWXUGO;
1775 inode->i_uid = inode->i_gid = 0;
1776 inode->i_size = 64;
1777 inode->i_op = &proc_self_inode_operations;
1778 d_add(dentry, inode);
1779 return NULL;
1780 }
1781 tgid = name_to_int(dentry);
1782 if (tgid == ~0U)
1783 goto out;
1784
1785 read_lock(&tasklist_lock);
1786 task = find_task_by_pid(tgid);
1787 if (task)
1788 get_task_struct(task);
1789 read_unlock(&tasklist_lock);
1790 if (!task)
1791 goto out;
1792
1793 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
1794
1795
1796 if (!inode) {
1797 put_task_struct(task);
1798 goto out;
1799 }
1800 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1801 inode->i_op = &proc_tgid_base_inode_operations;
1802 inode->i_fop = &proc_tgid_base_operations;
1803 inode->i_nlink = 3;
1804 inode->i_flags|=S_IMMUTABLE;
1805
1806 dentry->d_op = &pid_base_dentry_operations;
1807
1808 died = 0;
1809 d_add(dentry, inode);
1810 spin_lock(&task->proc_lock);
1811 task->proc_dentry = dentry;
1812 if (!pid_alive(task)) {
1813 dentry = proc_pid_unhash(task);
1814 died = 1;
1815 }
1816 spin_unlock(&task->proc_lock);
1817
1818 put_task_struct(task);
1819 if (died) {
1820 proc_pid_flush(dentry);
1821 goto out;
1822 }
1823 return NULL;
1824out:
1825 return ERR_PTR(-ENOENT);
1826}
1827
1828/* SMP-safe */
1829static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
1830{
1831 struct task_struct *task;
1832 struct task_struct *leader = proc_task(dir);
1833 struct inode *inode;
1834 unsigned tid;
1835
1836 tid = name_to_int(dentry);
1837 if (tid == ~0U)
1838 goto out;
1839
1840 read_lock(&tasklist_lock);
1841 task = find_task_by_pid(tid);
1842 if (task)
1843 get_task_struct(task);
1844 read_unlock(&tasklist_lock);
1845 if (!task)
1846 goto out;
1847 if (leader->tgid != task->tgid)
1848 goto out_drop_task;
1849
1850 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
1851
1852
1853 if (!inode)
1854 goto out_drop_task;
1855 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1856 inode->i_op = &proc_tid_base_inode_operations;
1857 inode->i_fop = &proc_tid_base_operations;
1858 inode->i_nlink = 3;
1859 inode->i_flags|=S_IMMUTABLE;
1860
1861 dentry->d_op = &pid_base_dentry_operations;
1862
1863 d_add(dentry, inode);
1864
1865 put_task_struct(task);
1866 return NULL;
1867out_drop_task:
1868 put_task_struct(task);
1869out:
1870 return ERR_PTR(-ENOENT);
1871}
1872
1873#define PROC_NUMBUF 10
1874#define PROC_MAXPIDS 20
1875
1876/*
1877 * Get a few tgid's to return for filldir - we need to hold the
1878 * tasklist lock while doing this, and we must release it before
1879 * we actually do the filldir itself, so we use a temp buffer..
1880 */
1881static int get_tgid_list(int index, unsigned long version, unsigned int *tgids)
1882{
1883 struct task_struct *p;
1884 int nr_tgids = 0;
1885
1886 index--;
1887 read_lock(&tasklist_lock);
1888 p = NULL;
1889 if (version) {
1890 p = find_task_by_pid(version);
1891 if (p && !thread_group_leader(p))
1892 p = NULL;
1893 }
1894
1895 if (p)
1896 index = 0;
1897 else
1898 p = next_task(&init_task);
1899
1900 for ( ; p != &init_task; p = next_task(p)) {
1901 int tgid = p->pid;
1902 if (!pid_alive(p))
1903 continue;
1904 if (--index >= 0)
1905 continue;
1906 tgids[nr_tgids] = tgid;
1907 nr_tgids++;
1908 if (nr_tgids >= PROC_MAXPIDS)
1909 break;
1910 }
1911 read_unlock(&tasklist_lock);
1912 return nr_tgids;
1913}
1914
1915/*
1916 * Get a few tid's to return for filldir - we need to hold the
1917 * tasklist lock while doing this, and we must release it before
1918 * we actually do the filldir itself, so we use a temp buffer..
1919 */
1920static int get_tid_list(int index, unsigned int *tids, struct inode *dir)
1921{
1922 struct task_struct *leader_task = proc_task(dir);
1923 struct task_struct *task = leader_task;
1924 int nr_tids = 0;
1925
1926 index -= 2;
1927 read_lock(&tasklist_lock);
1928 /*
1929 * The starting point task (leader_task) might be an already
1930 * unlinked task, which cannot be used to access the task-list
1931 * via next_thread().
1932 */
1933 if (pid_alive(task)) do {
1934 int tid = task->pid;
1935
1936 if (--index >= 0)
1937 continue;
1938 tids[nr_tids] = tid;
1939 nr_tids++;
1940 if (nr_tids >= PROC_MAXPIDS)
1941 break;
1942 } while ((task = next_thread(task)) != leader_task);
1943 read_unlock(&tasklist_lock);
1944 return nr_tids;
1945}
1946
1947/* for the /proc/ directory itself, after non-process stuff has been done */
1948int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
1949{
1950 unsigned int tgid_array[PROC_MAXPIDS];
1951 char buf[PROC_NUMBUF];
1952 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
1953 unsigned int nr_tgids, i;
1954 int next_tgid;
1955
1956 if (!nr) {
1957 ino_t ino = fake_ino(0,PROC_TGID_INO);
1958 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
1959 return 0;
1960 filp->f_pos++;
1961 nr++;
1962 }
1963
1964 /* f_version caches the tgid value that the last readdir call couldn't
1965 * return. lseek aka telldir automagically resets f_version to 0.
1966 */
1967 next_tgid = filp->f_version;
1968 filp->f_version = 0;
1969 for (;;) {
1970 nr_tgids = get_tgid_list(nr, next_tgid, tgid_array);
1971 if (!nr_tgids) {
1972 /* no more entries ! */
1973 break;
1974 }
1975 next_tgid = 0;
1976
1977 /* do not use the last found pid, reserve it for next_tgid */
1978 if (nr_tgids == PROC_MAXPIDS) {
1979 nr_tgids--;
1980 next_tgid = tgid_array[nr_tgids];
1981 }
1982
1983 for (i=0;i<nr_tgids;i++) {
1984 int tgid = tgid_array[i];
1985 ino_t ino = fake_ino(tgid,PROC_TGID_INO);
1986 unsigned long j = PROC_NUMBUF;
1987
1988 do
1989 buf[--j] = '0' + (tgid % 10);
1990 while ((tgid /= 10) != 0);
1991
1992 if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) {
1993 /* returning this tgid failed, save it as the first
1994 * pid for the next readir call */
1995 filp->f_version = tgid_array[i];
1996 goto out;
1997 }
1998 filp->f_pos++;
1999 nr++;
2000 }
2001 }
2002out:
2003 return 0;
2004}
2005
2006/* for the /proc/TGID/task/ directories */
2007static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
2008{
2009 unsigned int tid_array[PROC_MAXPIDS];
2010 char buf[PROC_NUMBUF];
2011 unsigned int nr_tids, i;
2012 struct dentry *dentry = filp->f_dentry;
2013 struct inode *inode = dentry->d_inode;
2014 int retval = -ENOENT;
2015 ino_t ino;
2016 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */
2017
2018 if (!pid_alive(proc_task(inode)))
2019 goto out;
2020 retval = 0;
2021
2022 switch (pos) {
2023 case 0:
2024 ino = inode->i_ino;
2025 if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
2026 goto out;
2027 pos++;
2028 /* fall through */
2029 case 1:
2030 ino = parent_ino(dentry);
2031 if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
2032 goto out;
2033 pos++;
2034 /* fall through */
2035 }
2036
2037 nr_tids = get_tid_list(pos, tid_array, inode);
2038
2039 for (i = 0; i < nr_tids; i++) {
2040 unsigned long j = PROC_NUMBUF;
2041 int tid = tid_array[i];
2042
2043 ino = fake_ino(tid,PROC_TID_INO);
2044
2045 do
2046 buf[--j] = '0' + (tid % 10);
2047 while ((tid /= 10) != 0);
2048
2049 if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0)
2050 break;
2051 pos++;
2052 }
2053out:
2054 filp->f_pos = pos;
2055 return retval;
2056}
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
new file mode 100644
index 000000000000..6c6315d04028
--- /dev/null
+++ b/fs/proc/generic.c
@@ -0,0 +1,705 @@
1/*
2 * proc/fs/generic.c --- generic routines for the proc-fs
3 *
4 * This file contains generic proc-fs routines for handling
5 * directories and files.
6 *
7 * Copyright (C) 1991, 1992 Linus Torvalds.
8 * Copyright (C) 1997 Theodore Ts'o
9 */
10
11#include <linux/errno.h>
12#include <linux/time.h>
13#include <linux/proc_fs.h>
14#include <linux/stat.h>
15#include <linux/module.h>
16#include <linux/mount.h>
17#include <linux/smp_lock.h>
18#include <linux/init.h>
19#include <linux/idr.h>
20#include <linux/namei.h>
21#include <linux/bitops.h>
22#include <asm/uaccess.h>
23
24static ssize_t proc_file_read(struct file *file, char __user *buf,
25 size_t nbytes, loff_t *ppos);
26static ssize_t proc_file_write(struct file *file, const char __user *buffer,
27 size_t count, loff_t *ppos);
28static loff_t proc_file_lseek(struct file *, loff_t, int);
29
30int proc_match(int len, const char *name, struct proc_dir_entry *de)
31{
32 if (de->namelen != len)
33 return 0;
34 return !memcmp(name, de->name, len);
35}
36
37static struct file_operations proc_file_operations = {
38 .llseek = proc_file_lseek,
39 .read = proc_file_read,
40 .write = proc_file_write,
41};
42
43/* buffer size is one page but our output routines use some slack for overruns */
44#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
45
46static ssize_t
47proc_file_read(struct file *file, char __user *buf, size_t nbytes,
48 loff_t *ppos)
49{
50 struct inode * inode = file->f_dentry->d_inode;
51 char *page;
52 ssize_t retval=0;
53 int eof=0;
54 ssize_t n, count;
55 char *start;
56 struct proc_dir_entry * dp;
57
58 dp = PDE(inode);
59 if (!(page = (char*) __get_free_page(GFP_KERNEL)))
60 return -ENOMEM;
61
62 while ((nbytes > 0) && !eof) {
63 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
64
65 start = NULL;
66 if (dp->get_info) {
67 /* Handle old net routines */
68 n = dp->get_info(page, &start, *ppos, count);
69 if (n < count)
70 eof = 1;
71 } else if (dp->read_proc) {
72 /*
73 * How to be a proc read function
74 * ------------------------------
75 * Prototype:
76 * int f(char *buffer, char **start, off_t offset,
77 * int count, int *peof, void *dat)
78 *
79 * Assume that the buffer is "count" bytes in size.
80 *
81 * If you know you have supplied all the data you
82 * have, set *peof.
83 *
84 * You have three ways to return data:
85 * 0) Leave *start = NULL. (This is the default.)
86 * Put the data of the requested offset at that
87 * offset within the buffer. Return the number (n)
88 * of bytes there are from the beginning of the
89 * buffer up to the last byte of data. If the
90 * number of supplied bytes (= n - offset) is
91 * greater than zero and you didn't signal eof
92 * and the reader is prepared to take more data
93 * you will be called again with the requested
94 * offset advanced by the number of bytes
95 * absorbed. This interface is useful for files
96 * no larger than the buffer.
97 * 1) Set *start = an unsigned long value less than
98 * the buffer address but greater than zero.
99 * Put the data of the requested offset at the
100 * beginning of the buffer. Return the number of
101 * bytes of data placed there. If this number is
102 * greater than zero and you didn't signal eof
103 * and the reader is prepared to take more data
104 * you will be called again with the requested
105 * offset advanced by *start. This interface is
106 * useful when you have a large file consisting
107 * of a series of blocks which you want to count
108 * and return as wholes.
109 * (Hack by Paul.Russell@rustcorp.com.au)
110 * 2) Set *start = an address within the buffer.
111 * Put the data of the requested offset at *start.
112 * Return the number of bytes of data placed there.
113 * If this number is greater than zero and you
114 * didn't signal eof and the reader is prepared to
115 * take more data you will be called again with the
116 * requested offset advanced by the number of bytes
117 * absorbed.
118 */
119 n = dp->read_proc(page, &start, *ppos,
120 count, &eof, dp->data);
121 } else
122 break;
123
124 if (n == 0) /* end of file */
125 break;
126 if (n < 0) { /* error */
127 if (retval == 0)
128 retval = n;
129 break;
130 }
131
132 if (start == NULL) {
133 if (n > PAGE_SIZE) {
134 printk(KERN_ERR
135 "proc_file_read: Apparent buffer overflow!\n");
136 n = PAGE_SIZE;
137 }
138 n -= *ppos;
139 if (n <= 0)
140 break;
141 if (n > count)
142 n = count;
143 start = page + *ppos;
144 } else if (start < page) {
145 if (n > PAGE_SIZE) {
146 printk(KERN_ERR
147 "proc_file_read: Apparent buffer overflow!\n");
148 n = PAGE_SIZE;
149 }
150 if (n > count) {
151 /*
152 * Don't reduce n because doing so might
153 * cut off part of a data block.
154 */
155 printk(KERN_WARNING
156 "proc_file_read: Read count exceeded\n");
157 }
158 } else /* start >= page */ {
159 unsigned long startoff = (unsigned long)(start - page);
160 if (n > (PAGE_SIZE - startoff)) {
161 printk(KERN_ERR
162 "proc_file_read: Apparent buffer overflow!\n");
163 n = PAGE_SIZE - startoff;
164 }
165 if (n > count)
166 n = count;
167 }
168
169 n -= copy_to_user(buf, start < page ? page : start, n);
170 if (n == 0) {
171 if (retval == 0)
172 retval = -EFAULT;
173 break;
174 }
175
176 *ppos += start < page ? (unsigned long)start : n;
177 nbytes -= n;
178 buf += n;
179 retval += n;
180 }
181 free_page((unsigned long) page);
182 return retval;
183}
184
185static ssize_t
186proc_file_write(struct file *file, const char __user *buffer,
187 size_t count, loff_t *ppos)
188{
189 struct inode *inode = file->f_dentry->d_inode;
190 struct proc_dir_entry * dp;
191
192 dp = PDE(inode);
193
194 if (!dp->write_proc)
195 return -EIO;
196
197 /* FIXME: does this routine need ppos? probably... */
198 return dp->write_proc(file, buffer, count, dp->data);
199}
200
201
202static loff_t
203proc_file_lseek(struct file *file, loff_t offset, int orig)
204{
205 lock_kernel();
206
207 switch (orig) {
208 case 0:
209 if (offset < 0)
210 goto out;
211 file->f_pos = offset;
212 unlock_kernel();
213 return(file->f_pos);
214 case 1:
215 if (offset + file->f_pos < 0)
216 goto out;
217 file->f_pos += offset;
218 unlock_kernel();
219 return(file->f_pos);
220 case 2:
221 goto out;
222 default:
223 goto out;
224 }
225
226out:
227 unlock_kernel();
228 return -EINVAL;
229}
230
231static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
232{
233 struct inode *inode = dentry->d_inode;
234 struct proc_dir_entry *de = PDE(inode);
235 int error;
236
237 error = inode_change_ok(inode, iattr);
238 if (error)
239 goto out;
240
241 error = inode_setattr(inode, iattr);
242 if (error)
243 goto out;
244
245 de->uid = inode->i_uid;
246 de->gid = inode->i_gid;
247 de->mode = inode->i_mode;
248out:
249 return error;
250}
251
252static struct inode_operations proc_file_inode_operations = {
253 .setattr = proc_notify_change,
254};
255
256/*
257 * This function parses a name such as "tty/driver/serial", and
258 * returns the struct proc_dir_entry for "/proc/tty/driver", and
259 * returns "serial" in residual.
260 */
261static int xlate_proc_name(const char *name,
262 struct proc_dir_entry **ret, const char **residual)
263{
264 const char *cp = name, *next;
265 struct proc_dir_entry *de;
266 int len;
267
268 de = &proc_root;
269 while (1) {
270 next = strchr(cp, '/');
271 if (!next)
272 break;
273
274 len = next - cp;
275 for (de = de->subdir; de ; de = de->next) {
276 if (proc_match(len, cp, de))
277 break;
278 }
279 if (!de)
280 return -ENOENT;
281 cp += len + 1;
282 }
283 *residual = cp;
284 *ret = de;
285 return 0;
286}
287
288static DEFINE_IDR(proc_inum_idr);
289static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
290
291#define PROC_DYNAMIC_FIRST 0xF0000000UL
292
293/*
294 * Return an inode number between PROC_DYNAMIC_FIRST and
295 * 0xffffffff, or zero on failure.
296 */
297static unsigned int get_inode_number(void)
298{
299 int i, inum = 0;
300 int error;
301
302retry:
303 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0)
304 return 0;
305
306 spin_lock(&proc_inum_lock);
307 error = idr_get_new(&proc_inum_idr, NULL, &i);
308 spin_unlock(&proc_inum_lock);
309 if (error == -EAGAIN)
310 goto retry;
311 else if (error)
312 return 0;
313
314 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST;
315
316 /* inum will never be more than 0xf0ffffff, so no check
317 * for overflow.
318 */
319
320 return inum;
321}
322
323static void release_inode_number(unsigned int inum)
324{
325 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK;
326
327 spin_lock(&proc_inum_lock);
328 idr_remove(&proc_inum_idr, id);
329 spin_unlock(&proc_inum_lock);
330}
331
332static int proc_follow_link(struct dentry *dentry, struct nameidata *nd)
333{
334 nd_set_link(nd, PDE(dentry->d_inode)->data);
335 return 0;
336}
337
338static struct inode_operations proc_link_inode_operations = {
339 .readlink = generic_readlink,
340 .follow_link = proc_follow_link,
341};
342
343/*
344 * As some entries in /proc are volatile, we want to
345 * get rid of unused dentries. This could be made
346 * smarter: we could keep a "volatile" flag in the
347 * inode to indicate which ones to keep.
348 */
349static int proc_delete_dentry(struct dentry * dentry)
350{
351 return 1;
352}
353
354static struct dentry_operations proc_dentry_operations =
355{
356 .d_delete = proc_delete_dentry,
357};
358
359/*
360 * Don't create negative dentries here, return -ENOENT by hand
361 * instead.
362 */
363struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
364{
365 struct inode *inode = NULL;
366 struct proc_dir_entry * de;
367 int error = -ENOENT;
368
369 lock_kernel();
370 de = PDE(dir);
371 if (de) {
372 for (de = de->subdir; de ; de = de->next) {
373 if (de->namelen != dentry->d_name.len)
374 continue;
375 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
376 unsigned int ino = de->low_ino;
377
378 error = -EINVAL;
379 inode = proc_get_inode(dir->i_sb, ino, de);
380 break;
381 }
382 }
383 }
384 unlock_kernel();
385
386 if (inode) {
387 dentry->d_op = &proc_dentry_operations;
388 d_add(dentry, inode);
389 return NULL;
390 }
391 return ERR_PTR(error);
392}
393
394/*
395 * This returns non-zero if at EOF, so that the /proc
396 * root directory can use this and check if it should
397 * continue with the <pid> entries..
398 *
399 * Note that the VFS-layer doesn't care about the return
400 * value of the readdir() call, as long as it's non-negative
401 * for success..
402 */
403int proc_readdir(struct file * filp,
404 void * dirent, filldir_t filldir)
405{
406 struct proc_dir_entry * de;
407 unsigned int ino;
408 int i;
409 struct inode *inode = filp->f_dentry->d_inode;
410 int ret = 0;
411
412 lock_kernel();
413
414 ino = inode->i_ino;
415 de = PDE(inode);
416 if (!de) {
417 ret = -EINVAL;
418 goto out;
419 }
420 i = filp->f_pos;
421 switch (i) {
422 case 0:
423 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
424 goto out;
425 i++;
426 filp->f_pos++;
427 /* fall through */
428 case 1:
429 if (filldir(dirent, "..", 2, i,
430 parent_ino(filp->f_dentry),
431 DT_DIR) < 0)
432 goto out;
433 i++;
434 filp->f_pos++;
435 /* fall through */
436 default:
437 de = de->subdir;
438 i -= 2;
439 for (;;) {
440 if (!de) {
441 ret = 1;
442 goto out;
443 }
444 if (!i)
445 break;
446 de = de->next;
447 i--;
448 }
449
450 do {
451 if (filldir(dirent, de->name, de->namelen, filp->f_pos,
452 de->low_ino, de->mode >> 12) < 0)
453 goto out;
454 filp->f_pos++;
455 de = de->next;
456 } while (de);
457 }
458 ret = 1;
459out: unlock_kernel();
460 return ret;
461}
462
463/*
464 * These are the generic /proc directory operations. They
465 * use the in-memory "struct proc_dir_entry" tree to parse
466 * the /proc directory.
467 */
468static struct file_operations proc_dir_operations = {
469 .read = generic_read_dir,
470 .readdir = proc_readdir,
471};
472
473/*
474 * proc directories can do almost nothing..
475 */
476static struct inode_operations proc_dir_inode_operations = {
477 .lookup = proc_lookup,
478 .setattr = proc_notify_change,
479};
480
481static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
482{
483 unsigned int i;
484
485 i = get_inode_number();
486 if (i == 0)
487 return -EAGAIN;
488 dp->low_ino = i;
489 dp->next = dir->subdir;
490 dp->parent = dir;
491 dir->subdir = dp;
492 if (S_ISDIR(dp->mode)) {
493 if (dp->proc_iops == NULL) {
494 dp->proc_fops = &proc_dir_operations;
495 dp->proc_iops = &proc_dir_inode_operations;
496 }
497 dir->nlink++;
498 } else if (S_ISLNK(dp->mode)) {
499 if (dp->proc_iops == NULL)
500 dp->proc_iops = &proc_link_inode_operations;
501 } else if (S_ISREG(dp->mode)) {
502 if (dp->proc_fops == NULL)
503 dp->proc_fops = &proc_file_operations;
504 if (dp->proc_iops == NULL)
505 dp->proc_iops = &proc_file_inode_operations;
506 }
507 return 0;
508}
509
510/*
511 * Kill an inode that got unregistered..
512 */
513static void proc_kill_inodes(struct proc_dir_entry *de)
514{
515 struct list_head *p;
516 struct super_block *sb = proc_mnt->mnt_sb;
517
518 /*
519 * Actually it's a partial revoke().
520 */
521 file_list_lock();
522 list_for_each(p, &sb->s_files) {
523 struct file * filp = list_entry(p, struct file, f_list);
524 struct dentry * dentry = filp->f_dentry;
525 struct inode * inode;
526 struct file_operations *fops;
527
528 if (dentry->d_op != &proc_dentry_operations)
529 continue;
530 inode = dentry->d_inode;
531 if (PDE(inode) != de)
532 continue;
533 fops = filp->f_op;
534 filp->f_op = NULL;
535 fops_put(fops);
536 }
537 file_list_unlock();
538}
539
540static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent,
541 const char *name,
542 mode_t mode,
543 nlink_t nlink)
544{
545 struct proc_dir_entry *ent = NULL;
546 const char *fn = name;
547 int len;
548
549 /* make sure name is valid */
550 if (!name || !strlen(name)) goto out;
551
552 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0)
553 goto out;
554
555 /* At this point there must not be any '/' characters beyond *fn */
556 if (strchr(fn, '/'))
557 goto out;
558
559 len = strlen(fn);
560
561 ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
562 if (!ent) goto out;
563
564 memset(ent, 0, sizeof(struct proc_dir_entry));
565 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
566 ent->name = ((char *) ent) + sizeof(*ent);
567 ent->namelen = len;
568 ent->mode = mode;
569 ent->nlink = nlink;
570 out:
571 return ent;
572}
573
574struct proc_dir_entry *proc_symlink(const char *name,
575 struct proc_dir_entry *parent, const char *dest)
576{
577 struct proc_dir_entry *ent;
578
579 ent = proc_create(&parent,name,
580 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1);
581
582 if (ent) {
583 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL);
584 if (ent->data) {
585 strcpy((char*)ent->data,dest);
586 if (proc_register(parent, ent) < 0) {
587 kfree(ent->data);
588 kfree(ent);
589 ent = NULL;
590 }
591 } else {
592 kfree(ent);
593 ent = NULL;
594 }
595 }
596 return ent;
597}
598
599struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
600 struct proc_dir_entry *parent)
601{
602 struct proc_dir_entry *ent;
603
604 ent = proc_create(&parent, name, S_IFDIR | mode, 2);
605 if (ent) {
606 ent->proc_fops = &proc_dir_operations;
607 ent->proc_iops = &proc_dir_inode_operations;
608
609 if (proc_register(parent, ent) < 0) {
610 kfree(ent);
611 ent = NULL;
612 }
613 }
614 return ent;
615}
616
617struct proc_dir_entry *proc_mkdir(const char *name,
618 struct proc_dir_entry *parent)
619{
620 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
621}
622
623struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
624 struct proc_dir_entry *parent)
625{
626 struct proc_dir_entry *ent;
627 nlink_t nlink;
628
629 if (S_ISDIR(mode)) {
630 if ((mode & S_IALLUGO) == 0)
631 mode |= S_IRUGO | S_IXUGO;
632 nlink = 2;
633 } else {
634 if ((mode & S_IFMT) == 0)
635 mode |= S_IFREG;
636 if ((mode & S_IALLUGO) == 0)
637 mode |= S_IRUGO;
638 nlink = 1;
639 }
640
641 ent = proc_create(&parent,name,mode,nlink);
642 if (ent) {
643 if (S_ISDIR(mode)) {
644 ent->proc_fops = &proc_dir_operations;
645 ent->proc_iops = &proc_dir_inode_operations;
646 }
647 if (proc_register(parent, ent) < 0) {
648 kfree(ent);
649 ent = NULL;
650 }
651 }
652 return ent;
653}
654
655void free_proc_entry(struct proc_dir_entry *de)
656{
657 unsigned int ino = de->low_ino;
658
659 if (ino < PROC_DYNAMIC_FIRST)
660 return;
661
662 release_inode_number(ino);
663
664 if (S_ISLNK(de->mode) && de->data)
665 kfree(de->data);
666 kfree(de);
667}
668
669/*
670 * Remove a /proc entry and free it if it's not currently in use.
671 * If it is in use, we set the 'deleted' flag.
672 */
673void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
674{
675 struct proc_dir_entry **p;
676 struct proc_dir_entry *de;
677 const char *fn = name;
678 int len;
679
680 if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
681 goto out;
682 len = strlen(fn);
683 for (p = &parent->subdir; *p; p=&(*p)->next ) {
684 if (!proc_match(len, fn, *p))
685 continue;
686 de = *p;
687 *p = de->next;
688 de->next = NULL;
689 if (S_ISDIR(de->mode))
690 parent->nlink--;
691 proc_kill_inodes(de);
692 de->nlink = 0;
693 WARN_ON(de->subdir);
694 if (!atomic_read(&de->count))
695 free_proc_entry(de);
696 else {
697 de->deleted = 1;
698 printk("remove_proc_entry: %s/%s busy, count=%d\n",
699 parent->name, de->name, atomic_read(&de->count));
700 }
701 break;
702 }
703out:
704 return;
705}
diff --git a/fs/proc/inode-alloc.txt b/fs/proc/inode-alloc.txt
new file mode 100644
index 000000000000..77212f938c2c
--- /dev/null
+++ b/fs/proc/inode-alloc.txt
@@ -0,0 +1,14 @@
1Current inode allocations in the proc-fs (hex-numbers):
2
3 00000000 reserved
4 00000001-00000fff static entries (goners)
5 001 root-ino
6
7 00001000-00001fff unused
8 0001xxxx-7fffxxxx pid-dir entries for pid 1-7fff
9 80000000-efffffff unused
10 f0000000-ffffffff dynamic entries
11
12Goal:
13 a) once we'll split the thing into several virtual filesystems we
14 will get rid of magical ranges (and this file, BTW).
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
new file mode 100644
index 000000000000..133c28685105
--- /dev/null
+++ b/fs/proc/inode.c
@@ -0,0 +1,218 @@
1/*
2 * linux/fs/proc/inode.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7#include <linux/time.h>
8#include <linux/proc_fs.h>
9#include <linux/kernel.h>
10#include <linux/mm.h>
11#include <linux/string.h>
12#include <linux/stat.h>
13#include <linux/file.h>
14#include <linux/limits.h>
15#include <linux/init.h>
16#include <linux/module.h>
17#include <linux/smp_lock.h>
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21
22extern void free_proc_entry(struct proc_dir_entry *);
23
24static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
25{
26 if (de)
27 atomic_inc(&de->count);
28 return de;
29}
30
31/*
32 * Decrements the use count and checks for deferred deletion.
33 */
34static void de_put(struct proc_dir_entry *de)
35{
36 if (de) {
37 lock_kernel();
38 if (!atomic_read(&de->count)) {
39 printk("de_put: entry %s already free!\n", de->name);
40 unlock_kernel();
41 return;
42 }
43
44 if (atomic_dec_and_test(&de->count)) {
45 if (de->deleted) {
46 printk("de_put: deferred delete of %s\n",
47 de->name);
48 free_proc_entry(de);
49 }
50 }
51 unlock_kernel();
52 }
53}
54
55/*
56 * Decrement the use count of the proc_dir_entry.
57 */
58static void proc_delete_inode(struct inode *inode)
59{
60 struct proc_dir_entry *de;
61 struct task_struct *tsk;
62
63 /* Let go of any associated process */
64 tsk = PROC_I(inode)->task;
65 if (tsk)
66 put_task_struct(tsk);
67
68 /* Let go of any associated proc directory entry */
69 de = PROC_I(inode)->pde;
70 if (de) {
71 if (de->owner)
72 module_put(de->owner);
73 de_put(de);
74 }
75 clear_inode(inode);
76}
77
78struct vfsmount *proc_mnt;
79
80static void proc_read_inode(struct inode * inode)
81{
82 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
83}
84
85static kmem_cache_t * proc_inode_cachep;
86
87static struct inode *proc_alloc_inode(struct super_block *sb)
88{
89 struct proc_inode *ei;
90 struct inode *inode;
91
92 ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL);
93 if (!ei)
94 return NULL;
95 ei->task = NULL;
96 ei->type = 0;
97 ei->op.proc_get_link = NULL;
98 ei->pde = NULL;
99 inode = &ei->vfs_inode;
100 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
101 return inode;
102}
103
104static void proc_destroy_inode(struct inode *inode)
105{
106 kmem_cache_free(proc_inode_cachep, PROC_I(inode));
107}
108
109static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
110{
111 struct proc_inode *ei = (struct proc_inode *) foo;
112
113 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
114 SLAB_CTOR_CONSTRUCTOR)
115 inode_init_once(&ei->vfs_inode);
116}
117
118int __init proc_init_inodecache(void)
119{
120 proc_inode_cachep = kmem_cache_create("proc_inode_cache",
121 sizeof(struct proc_inode),
122 0, SLAB_RECLAIM_ACCOUNT,
123 init_once, NULL);
124 if (proc_inode_cachep == NULL)
125 return -ENOMEM;
126 return 0;
127}
128
129static int proc_remount(struct super_block *sb, int *flags, char *data)
130{
131 *flags |= MS_NODIRATIME;
132 return 0;
133}
134
135static struct super_operations proc_sops = {
136 .alloc_inode = proc_alloc_inode,
137 .destroy_inode = proc_destroy_inode,
138 .read_inode = proc_read_inode,
139 .drop_inode = generic_delete_inode,
140 .delete_inode = proc_delete_inode,
141 .statfs = simple_statfs,
142 .remount_fs = proc_remount,
143};
144
145struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
146 struct proc_dir_entry *de)
147{
148 struct inode * inode;
149
150 /*
151 * Increment the use count so the dir entry can't disappear.
152 */
153 de_get(de);
154
155 WARN_ON(de && de->deleted);
156
157 inode = iget(sb, ino);
158 if (!inode)
159 goto out_fail;
160
161 PROC_I(inode)->pde = de;
162 if (de) {
163 if (de->mode) {
164 inode->i_mode = de->mode;
165 inode->i_uid = de->uid;
166 inode->i_gid = de->gid;
167 }
168 if (de->size)
169 inode->i_size = de->size;
170 if (de->nlink)
171 inode->i_nlink = de->nlink;
172 if (!try_module_get(de->owner))
173 goto out_fail;
174 if (de->proc_iops)
175 inode->i_op = de->proc_iops;
176 if (de->proc_fops)
177 inode->i_fop = de->proc_fops;
178 }
179
180out:
181 return inode;
182
183out_fail:
184 de_put(de);
185 goto out;
186}
187
188int proc_fill_super(struct super_block *s, void *data, int silent)
189{
190 struct inode * root_inode;
191
192 s->s_flags |= MS_NODIRATIME;
193 s->s_blocksize = 1024;
194 s->s_blocksize_bits = 10;
195 s->s_magic = PROC_SUPER_MAGIC;
196 s->s_op = &proc_sops;
197 s->s_time_gran = 1;
198
199 root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root);
200 if (!root_inode)
201 goto out_no_root;
202 /*
203 * Fixup the root inode's nlink value
204 */
205 root_inode->i_nlink += nr_processes();
206 root_inode->i_uid = 0;
207 root_inode->i_gid = 0;
208 s->s_root = d_alloc_root(root_inode);
209 if (!s->s_root)
210 goto out_no_root;
211 return 0;
212
213out_no_root:
214 printk("proc_read_super: get root inode failed\n");
215 iput(root_inode);
216 return -ENOMEM;
217}
218MODULE_LICENSE("GPL");
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
new file mode 100644
index 000000000000..3e55198f9806
--- /dev/null
+++ b/fs/proc/internal.h
@@ -0,0 +1,48 @@
1/* internal.h: internal procfs definitions
2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/proc_fs.h>
13
14struct vmalloc_info {
15 unsigned long used;
16 unsigned long largest_chunk;
17};
18
19#ifdef CONFIG_MMU
20#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
21extern void get_vmalloc_info(struct vmalloc_info *vmi);
22#else
23
24#define VMALLOC_TOTAL 0UL
25#define get_vmalloc_info(vmi) \
26do { \
27 (vmi)->used = 0; \
28 (vmi)->largest_chunk = 0; \
29} while(0)
30
31#endif
32
33extern void create_seq_entry(char *name, mode_t mode, struct file_operations *f);
34extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount **);
35extern int proc_tid_stat(struct task_struct *, char *);
36extern int proc_tgid_stat(struct task_struct *, char *);
37extern int proc_pid_status(struct task_struct *, char *);
38extern int proc_pid_statm(struct task_struct *, char *);
39
40static inline struct task_struct *proc_task(struct inode *inode)
41{
42 return PROC_I(inode)->task;
43}
44
45static inline int proc_type(struct inode *inode)
46{
47 return PROC_I(inode)->type;
48}
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
new file mode 100644
index 000000000000..1c7da988fcc3
--- /dev/null
+++ b/fs/proc/kcore.c
@@ -0,0 +1,404 @@
1/*
2 * fs/proc/kcore.c kernel ELF core dumper
3 *
4 * Modelled on fs/exec.c:aout_core_dump()
5 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
6 * ELF version written by David Howells <David.Howells@nexor.co.uk>
7 * Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com>
8 * Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com>
9 * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
10 */
11
12#include <linux/config.h>
13#include <linux/mm.h>
14#include <linux/proc_fs.h>
15#include <linux/user.h>
16#include <linux/a.out.h>
17#include <linux/elf.h>
18#include <linux/elfcore.h>
19#include <linux/vmalloc.h>
20#include <linux/highmem.h>
21#include <linux/init.h>
22#include <asm/uaccess.h>
23#include <asm/io.h>
24
25
26static int open_kcore(struct inode * inode, struct file * filp)
27{
28 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
29}
30
31static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *);
32
33struct file_operations proc_kcore_operations = {
34 .read = read_kcore,
35 .open = open_kcore,
36};
37
38#ifndef kc_vaddr_to_offset
39#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
40#endif
41#ifndef kc_offset_to_vaddr
42#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
43#endif
44
45#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
46
47/* An ELF note in memory */
48struct memelfnote
49{
50 const char *name;
51 int type;
52 unsigned int datasz;
53 void *data;
54};
55
56static struct kcore_list *kclist;
57static DEFINE_RWLOCK(kclist_lock);
58
59void
60kclist_add(struct kcore_list *new, void *addr, size_t size)
61{
62 new->addr = (unsigned long)addr;
63 new->size = size;
64
65 write_lock(&kclist_lock);
66 new->next = kclist;
67 kclist = new;
68 write_unlock(&kclist_lock);
69}
70
71static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
72{
73 size_t try, size;
74 struct kcore_list *m;
75
76 *nphdr = 1; /* PT_NOTE */
77 size = 0;
78
79 for (m=kclist; m; m=m->next) {
80 try = kc_vaddr_to_offset((size_t)m->addr + m->size);
81 if (try > size)
82 size = try;
83 *nphdr = *nphdr + 1;
84 }
85 *elf_buflen = sizeof(struct elfhdr) +
86 (*nphdr + 2)*sizeof(struct elf_phdr) +
87 3 * (sizeof(struct elf_note) + 4) +
88 sizeof(struct elf_prstatus) +
89 sizeof(struct elf_prpsinfo) +
90 sizeof(struct task_struct);
91 *elf_buflen = PAGE_ALIGN(*elf_buflen);
92 return size + *elf_buflen;
93}
94
95
96/*****************************************************************************/
97/*
98 * determine size of ELF note
99 */
100static int notesize(struct memelfnote *en)
101{
102 int sz;
103
104 sz = sizeof(struct elf_note);
105 sz += roundup(strlen(en->name), 4);
106 sz += roundup(en->datasz, 4);
107
108 return sz;
109} /* end notesize() */
110
111/*****************************************************************************/
112/*
113 * store a note in the header buffer
114 */
115static char *storenote(struct memelfnote *men, char *bufp)
116{
117 struct elf_note en;
118
119#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
120
121 en.n_namesz = strlen(men->name);
122 en.n_descsz = men->datasz;
123 en.n_type = men->type;
124
125 DUMP_WRITE(&en, sizeof(en));
126 DUMP_WRITE(men->name, en.n_namesz);
127
128 /* XXX - cast from long long to long to avoid need for libgcc.a */
129 bufp = (char*) roundup((unsigned long)bufp,4);
130 DUMP_WRITE(men->data, men->datasz);
131 bufp = (char*) roundup((unsigned long)bufp,4);
132
133#undef DUMP_WRITE
134
135 return bufp;
136} /* end storenote() */
137
138/*
139 * store an ELF coredump header in the supplied buffer
140 * nphdr is the number of elf_phdr to insert
141 */
142static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
143{
144 struct elf_prstatus prstatus; /* NT_PRSTATUS */
145 struct elf_prpsinfo prpsinfo; /* NT_PRPSINFO */
146 struct elf_phdr *nhdr, *phdr;
147 struct elfhdr *elf;
148 struct memelfnote notes[3];
149 off_t offset = 0;
150 struct kcore_list *m;
151
152 /* setup ELF header */
153 elf = (struct elfhdr *) bufp;
154 bufp += sizeof(struct elfhdr);
155 offset += sizeof(struct elfhdr);
156 memcpy(elf->e_ident, ELFMAG, SELFMAG);
157 elf->e_ident[EI_CLASS] = ELF_CLASS;
158 elf->e_ident[EI_DATA] = ELF_DATA;
159 elf->e_ident[EI_VERSION]= EV_CURRENT;
160 elf->e_ident[EI_OSABI] = ELF_OSABI;
161 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
162 elf->e_type = ET_CORE;
163 elf->e_machine = ELF_ARCH;
164 elf->e_version = EV_CURRENT;
165 elf->e_entry = 0;
166 elf->e_phoff = sizeof(struct elfhdr);
167 elf->e_shoff = 0;
168#if defined(CONFIG_H8300)
169 elf->e_flags = ELF_FLAGS;
170#else
171 elf->e_flags = 0;
172#endif
173 elf->e_ehsize = sizeof(struct elfhdr);
174 elf->e_phentsize= sizeof(struct elf_phdr);
175 elf->e_phnum = nphdr;
176 elf->e_shentsize= 0;
177 elf->e_shnum = 0;
178 elf->e_shstrndx = 0;
179
180 /* setup ELF PT_NOTE program header */
181 nhdr = (struct elf_phdr *) bufp;
182 bufp += sizeof(struct elf_phdr);
183 offset += sizeof(struct elf_phdr);
184 nhdr->p_type = PT_NOTE;
185 nhdr->p_offset = 0;
186 nhdr->p_vaddr = 0;
187 nhdr->p_paddr = 0;
188 nhdr->p_filesz = 0;
189 nhdr->p_memsz = 0;
190 nhdr->p_flags = 0;
191 nhdr->p_align = 0;
192
193 /* setup ELF PT_LOAD program header for every area */
194 for (m=kclist; m; m=m->next) {
195 phdr = (struct elf_phdr *) bufp;
196 bufp += sizeof(struct elf_phdr);
197 offset += sizeof(struct elf_phdr);
198
199 phdr->p_type = PT_LOAD;
200 phdr->p_flags = PF_R|PF_W|PF_X;
201 phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff;
202 phdr->p_vaddr = (size_t)m->addr;
203 phdr->p_paddr = 0;
204 phdr->p_filesz = phdr->p_memsz = m->size;
205 phdr->p_align = PAGE_SIZE;
206 }
207
208 /*
209 * Set up the notes in similar form to SVR4 core dumps made
210 * with info from their /proc.
211 */
212 nhdr->p_offset = offset;
213
214 /* set up the process status */
215 notes[0].name = "CORE";
216 notes[0].type = NT_PRSTATUS;
217 notes[0].datasz = sizeof(struct elf_prstatus);
218 notes[0].data = &prstatus;
219
220 memset(&prstatus, 0, sizeof(struct elf_prstatus));
221
222 nhdr->p_filesz = notesize(&notes[0]);
223 bufp = storenote(&notes[0], bufp);
224
225 /* set up the process info */
226 notes[1].name = "CORE";
227 notes[1].type = NT_PRPSINFO;
228 notes[1].datasz = sizeof(struct elf_prpsinfo);
229 notes[1].data = &prpsinfo;
230
231 memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
232 prpsinfo.pr_state = 0;
233 prpsinfo.pr_sname = 'R';
234 prpsinfo.pr_zomb = 0;
235
236 strcpy(prpsinfo.pr_fname, "vmlinux");
237 strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ);
238
239 nhdr->p_filesz += notesize(&notes[1]);
240 bufp = storenote(&notes[1], bufp);
241
242 /* set up the task structure */
243 notes[2].name = "CORE";
244 notes[2].type = NT_TASKSTRUCT;
245 notes[2].datasz = sizeof(struct task_struct);
246 notes[2].data = current;
247
248 nhdr->p_filesz += notesize(&notes[2]);
249 bufp = storenote(&notes[2], bufp);
250
251} /* end elf_kcore_store_hdr() */
252
253/*****************************************************************************/
254/*
255 * read from the ELF header and then kernel memory
256 */
257static ssize_t
258read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
259{
260 ssize_t acc = 0;
261 size_t size, tsz;
262 size_t elf_buflen;
263 int nphdr;
264 unsigned long start;
265
266 read_lock(&kclist_lock);
267 proc_root_kcore->size = size = get_kcore_size(&nphdr, &elf_buflen);
268 if (buflen == 0 || *fpos >= size) {
269 read_unlock(&kclist_lock);
270 return 0;
271 }
272
273 /* trim buflen to not go beyond EOF */
274 if (buflen > size - *fpos)
275 buflen = size - *fpos;
276
277 /* construct an ELF core header if we'll need some of it */
278 if (*fpos < elf_buflen) {
279 char * elf_buf;
280
281 tsz = elf_buflen - *fpos;
282 if (buflen < tsz)
283 tsz = buflen;
284 elf_buf = kmalloc(elf_buflen, GFP_ATOMIC);
285 if (!elf_buf) {
286 read_unlock(&kclist_lock);
287 return -ENOMEM;
288 }
289 memset(elf_buf, 0, elf_buflen);
290 elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
291 read_unlock(&kclist_lock);
292 if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
293 kfree(elf_buf);
294 return -EFAULT;
295 }
296 kfree(elf_buf);
297 buflen -= tsz;
298 *fpos += tsz;
299 buffer += tsz;
300 acc += tsz;
301
302 /* leave now if filled buffer already */
303 if (buflen == 0)
304 return acc;
305 } else
306 read_unlock(&kclist_lock);
307
308 /*
309 * Check to see if our file offset matches with any of
310 * the addresses in the elf_phdr on our list.
311 */
312 start = kc_offset_to_vaddr(*fpos - elf_buflen);
313 if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
314 tsz = buflen;
315
316 while (buflen) {
317 struct kcore_list *m;
318
319 read_lock(&kclist_lock);
320 for (m=kclist; m; m=m->next) {
321 if (start >= m->addr && start < (m->addr+m->size))
322 break;
323 }
324 read_unlock(&kclist_lock);
325
326 if (m == NULL) {
327 if (clear_user(buffer, tsz))
328 return -EFAULT;
329 } else if ((start >= VMALLOC_START) && (start < VMALLOC_END)) {
330 char * elf_buf;
331 struct vm_struct *m;
332 unsigned long curstart = start;
333 unsigned long cursize = tsz;
334
335 elf_buf = kmalloc(tsz, GFP_KERNEL);
336 if (!elf_buf)
337 return -ENOMEM;
338 memset(elf_buf, 0, tsz);
339
340 read_lock(&vmlist_lock);
341 for (m=vmlist; m && cursize; m=m->next) {
342 unsigned long vmstart;
343 unsigned long vmsize;
344 unsigned long msize = m->size - PAGE_SIZE;
345
346 if (((unsigned long)m->addr + msize) <
347 curstart)
348 continue;
349 if ((unsigned long)m->addr > (curstart +
350 cursize))
351 break;
352 vmstart = (curstart < (unsigned long)m->addr ?
353 (unsigned long)m->addr : curstart);
354 if (((unsigned long)m->addr + msize) >
355 (curstart + cursize))
356 vmsize = curstart + cursize - vmstart;
357 else
358 vmsize = (unsigned long)m->addr +
359 msize - vmstart;
360 curstart = vmstart + vmsize;
361 cursize -= vmsize;
362 /* don't dump ioremap'd stuff! (TA) */
363 if (m->flags & VM_IOREMAP)
364 continue;
365 memcpy(elf_buf + (vmstart - start),
366 (char *)vmstart, vmsize);
367 }
368 read_unlock(&vmlist_lock);
369 if (copy_to_user(buffer, elf_buf, tsz)) {
370 kfree(elf_buf);
371 return -EFAULT;
372 }
373 kfree(elf_buf);
374 } else {
375 if (kern_addr_valid(start)) {
376 unsigned long n;
377
378 n = copy_to_user(buffer, (char *)start, tsz);
379 /*
380 * We cannot distingush between fault on source
381 * and fault on destination. When this happens
382 * we clear too and hope it will trigger the
383 * EFAULT again.
384 */
385 if (n) {
386 if (clear_user(buffer + tsz - n,
387 tsz - n))
388 return -EFAULT;
389 }
390 } else {
391 if (clear_user(buffer, tsz))
392 return -EFAULT;
393 }
394 }
395 buflen -= tsz;
396 *fpos += tsz;
397 buffer += tsz;
398 acc += tsz;
399 start += tsz;
400 tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
401 }
402
403 return acc;
404}
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
new file mode 100644
index 000000000000..10d37bf25206
--- /dev/null
+++ b/fs/proc/kmsg.c
@@ -0,0 +1,55 @@
1/*
2 * linux/fs/proc/kmsg.c
3 *
4 * Copyright (C) 1992 by Linus Torvalds
5 *
6 */
7
8#include <linux/types.h>
9#include <linux/errno.h>
10#include <linux/time.h>
11#include <linux/kernel.h>
12#include <linux/poll.h>
13#include <linux/fs.h>
14
15#include <asm/uaccess.h>
16#include <asm/io.h>
17
18extern wait_queue_head_t log_wait;
19
20extern int do_syslog(int type, char __user *bug, int count);
21
22static int kmsg_open(struct inode * inode, struct file * file)
23{
24 return do_syslog(1,NULL,0);
25}
26
27static int kmsg_release(struct inode * inode, struct file * file)
28{
29 (void) do_syslog(0,NULL,0);
30 return 0;
31}
32
33static ssize_t kmsg_read(struct file *file, char __user *buf,
34 size_t count, loff_t *ppos)
35{
36 if ((file->f_flags & O_NONBLOCK) && !do_syslog(9, NULL, 0))
37 return -EAGAIN;
38 return do_syslog(2, buf, count);
39}
40
41static unsigned int kmsg_poll(struct file *file, poll_table *wait)
42{
43 poll_wait(file, &log_wait, wait);
44 if (do_syslog(9, NULL, 0))
45 return POLLIN | POLLRDNORM;
46 return 0;
47}
48
49
50struct file_operations proc_kmsg_operations = {
51 .read = kmsg_read,
52 .poll = kmsg_poll,
53 .open = kmsg_open,
54 .release = kmsg_release,
55};
diff --git a/fs/proc/mmu.c b/fs/proc/mmu.c
new file mode 100644
index 000000000000..a7041038ad56
--- /dev/null
+++ b/fs/proc/mmu.c
@@ -0,0 +1,67 @@
1/* mmu.c: mmu memory info files
2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/types.h>
13#include <linux/errno.h>
14#include <linux/time.h>
15#include <linux/kernel.h>
16#include <linux/string.h>
17#include <linux/mman.h>
18#include <linux/proc_fs.h>
19#include <linux/mm.h>
20#include <linux/mmzone.h>
21#include <linux/pagemap.h>
22#include <linux/swap.h>
23#include <linux/slab.h>
24#include <linux/smp.h>
25#include <linux/seq_file.h>
26#include <linux/hugetlb.h>
27#include <linux/vmalloc.h>
28#include <asm/uaccess.h>
29#include <asm/pgtable.h>
30#include <asm/tlb.h>
31#include <asm/div64.h>
32#include "internal.h"
33
34void get_vmalloc_info(struct vmalloc_info *vmi)
35{
36 struct vm_struct *vma;
37 unsigned long free_area_size;
38 unsigned long prev_end;
39
40 vmi->used = 0;
41
42 if (!vmlist) {
43 vmi->largest_chunk = VMALLOC_TOTAL;
44 }
45 else {
46 vmi->largest_chunk = 0;
47
48 prev_end = VMALLOC_START;
49
50 read_lock(&vmlist_lock);
51
52 for (vma = vmlist; vma; vma = vma->next) {
53 vmi->used += vma->size;
54
55 free_area_size = (unsigned long) vma->addr - prev_end;
56 if (vmi->largest_chunk < free_area_size)
57 vmi->largest_chunk = free_area_size;
58
59 prev_end = vma->size + (unsigned long) vma->addr;
60 }
61
62 if (VMALLOC_END - prev_end > vmi->largest_chunk)
63 vmi->largest_chunk = VMALLOC_END - prev_end;
64
65 read_unlock(&vmlist_lock);
66 }
67}
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
new file mode 100644
index 000000000000..f3bf016d5ee3
--- /dev/null
+++ b/fs/proc/nommu.c
@@ -0,0 +1,135 @@
1/* nommu.c: mmu-less memory info files
2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/errno.h>
15#include <linux/time.h>
16#include <linux/kernel.h>
17#include <linux/string.h>
18#include <linux/mman.h>
19#include <linux/proc_fs.h>
20#include <linux/mm.h>
21#include <linux/mmzone.h>
22#include <linux/pagemap.h>
23#include <linux/swap.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/seq_file.h>
27#include <linux/hugetlb.h>
28#include <linux/vmalloc.h>
29#include <asm/uaccess.h>
30#include <asm/pgtable.h>
31#include <asm/tlb.h>
32#include <asm/div64.h>
33#include "internal.h"
34
35/*
36 * display a list of all the VMAs the kernel knows about
37 * - nommu kernals have a single flat list
38 */
39static int nommu_vma_list_show(struct seq_file *m, void *v)
40{
41 struct vm_area_struct *vma;
42 unsigned long ino = 0;
43 struct file *file;
44 dev_t dev = 0;
45 int flags, len;
46
47 vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb);
48
49 flags = vma->vm_flags;
50 file = vma->vm_file;
51
52 if (file) {
53 struct inode *inode = vma->vm_file->f_dentry->d_inode;
54 dev = inode->i_sb->s_dev;
55 ino = inode->i_ino;
56 }
57
58 seq_printf(m,
59 "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
60 vma->vm_start,
61 vma->vm_end,
62 flags & VM_READ ? 'r' : '-',
63 flags & VM_WRITE ? 'w' : '-',
64 flags & VM_EXEC ? 'x' : '-',
65 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
66 vma->vm_pgoff << PAGE_SHIFT,
67 MAJOR(dev), MINOR(dev), ino, &len);
68
69 if (file) {
70 len = 25 + sizeof(void *) * 6 - len;
71 if (len < 1)
72 len = 1;
73 seq_printf(m, "%*c", len, ' ');
74 seq_path(m, file->f_vfsmnt, file->f_dentry, "");
75 }
76
77 seq_putc(m, '\n');
78 return 0;
79}
80
81static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos)
82{
83 struct rb_node *_rb;
84 loff_t pos = *_pos;
85 void *next = NULL;
86
87 down_read(&nommu_vma_sem);
88
89 for (_rb = rb_first(&nommu_vma_tree); _rb; _rb = rb_next(_rb)) {
90 if (pos == 0) {
91 next = _rb;
92 break;
93 }
94 }
95
96 return next;
97}
98
99static void nommu_vma_list_stop(struct seq_file *m, void *v)
100{
101 up_read(&nommu_vma_sem);
102}
103
104static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos)
105{
106 (*pos)++;
107 return rb_next((struct rb_node *) v);
108}
109
110static struct seq_operations proc_nommu_vma_list_seqop = {
111 .start = nommu_vma_list_start,
112 .next = nommu_vma_list_next,
113 .stop = nommu_vma_list_stop,
114 .show = nommu_vma_list_show
115};
116
117static int proc_nommu_vma_list_open(struct inode *inode, struct file *file)
118{
119 return seq_open(file, &proc_nommu_vma_list_seqop);
120}
121
122static struct file_operations proc_nommu_vma_list_operations = {
123 .open = proc_nommu_vma_list_open,
124 .read = seq_read,
125 .llseek = seq_lseek,
126 .release = seq_release,
127};
128
129static int __init proc_nommu_init(void)
130{
131 create_seq_entry("maps", S_IRUGO, &proc_nommu_vma_list_operations);
132 return 0;
133}
134
135module_init(proc_nommu_init);
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
new file mode 100644
index 000000000000..67423c696c0a
--- /dev/null
+++ b/fs/proc/proc_devtree.c
@@ -0,0 +1,165 @@
1/*
2 * proc_devtree.c - handles /proc/device-tree
3 *
4 * Copyright 1997 Paul Mackerras
5 */
6#include <linux/errno.h>
7#include <linux/time.h>
8#include <linux/proc_fs.h>
9#include <linux/stat.h>
10#include <linux/string.h>
11#include <asm/prom.h>
12#include <asm/uaccess.h>
13
14#ifndef HAVE_ARCH_DEVTREE_FIXUPS
15static inline void set_node_proc_entry(struct device_node *np, struct proc_dir_entry *de)
16{
17}
18
19static void inline set_node_name_link(struct device_node *np, struct proc_dir_entry *de)
20{
21}
22
23static void inline set_node_addr_link(struct device_node *np, struct proc_dir_entry *de)
24{
25}
26#endif
27
28static struct proc_dir_entry *proc_device_tree;
29
30/*
31 * Supply data on a read from /proc/device-tree/node/property.
32 */
33static int property_read_proc(char *page, char **start, off_t off,
34 int count, int *eof, void *data)
35{
36 struct property *pp = data;
37 int n;
38
39 if (off >= pp->length) {
40 *eof = 1;
41 return 0;
42 }
43 n = pp->length - off;
44 if (n > count)
45 n = count;
46 else
47 *eof = 1;
48 memcpy(page, pp->value + off, n);
49 *start = page;
50 return n;
51}
52
53/*
54 * For a node with a name like "gc@10", we make symlinks called "gc"
55 * and "@10" to it.
56 */
57
58/*
59 * Process a node, adding entries for its children and its properties.
60 */
61void proc_device_tree_add_node(struct device_node *np, struct proc_dir_entry *de)
62{
63 struct property *pp;
64 struct proc_dir_entry *ent;
65 struct device_node *child, *sib;
66 const char *p, *at;
67 int l;
68 struct proc_dir_entry *list, **lastp, *al;
69
70 set_node_proc_entry(np, de);
71 lastp = &list;
72 for (pp = np->properties; pp != 0; pp = pp->next) {
73 /*
74 * Unfortunately proc_register puts each new entry
75 * at the beginning of the list. So we rearrange them.
76 */
77 ent = create_proc_read_entry(pp->name, strncmp(pp->name, "security-", 9) ?
78 S_IRUGO : S_IRUSR, de, property_read_proc, pp);
79 if (ent == 0)
80 break;
81 if (!strncmp(pp->name, "security-", 9))
82 ent->size = 0; /* don't leak number of password chars */
83 else
84 ent->size = pp->length;
85 *lastp = ent;
86 lastp = &ent->next;
87 }
88 child = NULL;
89 while ((child = of_get_next_child(np, child))) {
90 p = strrchr(child->full_name, '/');
91 if (!p)
92 p = child->full_name;
93 else
94 ++p;
95 /* chop off '@0' if the name ends with that */
96 l = strlen(p);
97 if (l > 2 && p[l-2] == '@' && p[l-1] == '0')
98 l -= 2;
99 ent = proc_mkdir(p, de);
100 if (ent == 0)
101 break;
102 *lastp = ent;
103 lastp = &ent->next;
104 proc_device_tree_add_node(child, ent);
105
106 /*
107 * If we left the address part on the name, consider
108 * adding symlinks from the name and address parts.
109 */
110 if (p[l] != 0 || (at = strchr(p, '@')) == 0)
111 continue;
112
113 /*
114 * If this is the first node with a given name property,
115 * add a symlink with the name property as its name.
116 */
117 sib = NULL;
118 while ((sib = of_get_next_child(np, sib)) && sib != child)
119 if (sib->name && strcmp(sib->name, child->name) == 0)
120 break;
121 if (sib == child && strncmp(p, child->name, l) != 0) {
122 al = proc_symlink(child->name, de, ent->name);
123 if (al == 0) {
124 of_node_put(sib);
125 break;
126 }
127 set_node_name_link(child, al);
128 *lastp = al;
129 lastp = &al->next;
130 }
131 of_node_put(sib);
132 /*
133 * Add another directory with the @address part as its name.
134 */
135 al = proc_symlink(at, de, ent->name);
136 if (al == 0)
137 break;
138 set_node_addr_link(child, al);
139 *lastp = al;
140 lastp = &al->next;
141 }
142 of_node_put(child);
143 *lastp = NULL;
144 de->subdir = list;
145}
146
147/*
148 * Called on initialization to set up the /proc/device-tree subtree
149 */
150void proc_device_tree_init(void)
151{
152 struct device_node *root;
153 if ( !have_of )
154 return;
155 proc_device_tree = proc_mkdir("device-tree", NULL);
156 if (proc_device_tree == 0)
157 return;
158 root = of_find_node_by_path("/");
159 if (root == 0) {
160 printk(KERN_ERR "/proc/device-tree: can't find root\n");
161 return;
162 }
163 proc_device_tree_add_node(root, proc_device_tree);
164 of_node_put(root);
165}
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
new file mode 100644
index 000000000000..1d75d6ab6897
--- /dev/null
+++ b/fs/proc/proc_misc.c
@@ -0,0 +1,615 @@
1/*
2 * linux/fs/proc/proc_misc.c
3 *
4 * linux/fs/proc/array.c
5 * Copyright (C) 1992 by Linus Torvalds
6 * based on ideas by Darren Senn
7 *
8 * This used to be the part of array.c. See the rest of history and credits
9 * there. I took this into a separate file and switched the thing to generic
10 * proc_file_inode_operations, leaving in array.c only per-process stuff.
11 * Inumbers allocation made dynamic (via create_proc_entry()). AV, May 1999.
12 *
13 * Changes:
14 * Fulton Green : Encapsulated position metric calculations.
15 * <kernel@FultonGreen.com>
16 */
17
18#include <linux/types.h>
19#include <linux/errno.h>
20#include <linux/time.h>
21#include <linux/kernel.h>
22#include <linux/kernel_stat.h>
23#include <linux/tty.h>
24#include <linux/string.h>
25#include <linux/mman.h>
26#include <linux/proc_fs.h>
27#include <linux/ioport.h>
28#include <linux/config.h>
29#include <linux/mm.h>
30#include <linux/mmzone.h>
31#include <linux/pagemap.h>
32#include <linux/swap.h>
33#include <linux/slab.h>
34#include <linux/smp.h>
35#include <linux/signal.h>
36#include <linux/module.h>
37#include <linux/init.h>
38#include <linux/smp_lock.h>
39#include <linux/seq_file.h>
40#include <linux/times.h>
41#include <linux/profile.h>
42#include <linux/blkdev.h>
43#include <linux/hugetlb.h>
44#include <linux/jiffies.h>
45#include <linux/sysrq.h>
46#include <linux/vmalloc.h>
47#include <asm/uaccess.h>
48#include <asm/pgtable.h>
49#include <asm/io.h>
50#include <asm/tlb.h>
51#include <asm/div64.h>
52#include "internal.h"
53
54#define LOAD_INT(x) ((x) >> FSHIFT)
55#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
56/*
57 * Warning: stuff below (imported functions) assumes that its output will fit
58 * into one page. For some of those functions it may be wrong. Moreover, we
59 * have a way to deal with that gracefully. Right now I used straightforward
60 * wrappers, but this needs further analysis wrt potential overflows.
61 */
62extern int get_hardware_list(char *);
63extern int get_stram_list(char *);
64extern int get_chrdev_list(char *);
65extern int get_filesystem_list(char *);
66extern int get_exec_domain_list(char *);
67extern int get_dma_list(char *);
68extern int get_locks_status (char *, char **, off_t, int);
69
70static int proc_calc_metrics(char *page, char **start, off_t off,
71 int count, int *eof, int len)
72{
73 if (len <= off+count) *eof = 1;
74 *start = page + off;
75 len -= off;
76 if (len>count) len = count;
77 if (len<0) len = 0;
78 return len;
79}
80
81static int loadavg_read_proc(char *page, char **start, off_t off,
82 int count, int *eof, void *data)
83{
84 int a, b, c;
85 int len;
86
87 a = avenrun[0] + (FIXED_1/200);
88 b = avenrun[1] + (FIXED_1/200);
89 c = avenrun[2] + (FIXED_1/200);
90 len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
91 LOAD_INT(a), LOAD_FRAC(a),
92 LOAD_INT(b), LOAD_FRAC(b),
93 LOAD_INT(c), LOAD_FRAC(c),
94 nr_running(), nr_threads, last_pid);
95 return proc_calc_metrics(page, start, off, count, eof, len);
96}
97
98static int uptime_read_proc(char *page, char **start, off_t off,
99 int count, int *eof, void *data)
100{
101 struct timespec uptime;
102 struct timespec idle;
103 int len;
104 cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
105
106 do_posix_clock_monotonic_gettime(&uptime);
107 cputime_to_timespec(idletime, &idle);
108 len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
109 (unsigned long) uptime.tv_sec,
110 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
111 (unsigned long) idle.tv_sec,
112 (idle.tv_nsec / (NSEC_PER_SEC / 100)));
113
114 return proc_calc_metrics(page, start, off, count, eof, len);
115}
116
117static int meminfo_read_proc(char *page, char **start, off_t off,
118 int count, int *eof, void *data)
119{
120 struct sysinfo i;
121 int len;
122 struct page_state ps;
123 unsigned long inactive;
124 unsigned long active;
125 unsigned long free;
126 unsigned long committed;
127 unsigned long allowed;
128 struct vmalloc_info vmi;
129
130 get_page_state(&ps);
131 get_zone_counts(&active, &inactive, &free);
132
133/*
134 * display in kilobytes.
135 */
136#define K(x) ((x) << (PAGE_SHIFT - 10))
137 si_meminfo(&i);
138 si_swapinfo(&i);
139 committed = atomic_read(&vm_committed_space);
140 allowed = ((totalram_pages - hugetlb_total_pages())
141 * sysctl_overcommit_ratio / 100) + total_swap_pages;
142
143 get_vmalloc_info(&vmi);
144
145 /*
146 * Tagged format, for easy grepping and expansion.
147 */
148 len = sprintf(page,
149 "MemTotal: %8lu kB\n"
150 "MemFree: %8lu kB\n"
151 "Buffers: %8lu kB\n"
152 "Cached: %8lu kB\n"
153 "SwapCached: %8lu kB\n"
154 "Active: %8lu kB\n"
155 "Inactive: %8lu kB\n"
156 "HighTotal: %8lu kB\n"
157 "HighFree: %8lu kB\n"
158 "LowTotal: %8lu kB\n"
159 "LowFree: %8lu kB\n"
160 "SwapTotal: %8lu kB\n"
161 "SwapFree: %8lu kB\n"
162 "Dirty: %8lu kB\n"
163 "Writeback: %8lu kB\n"
164 "Mapped: %8lu kB\n"
165 "Slab: %8lu kB\n"
166 "CommitLimit: %8lu kB\n"
167 "Committed_AS: %8lu kB\n"
168 "PageTables: %8lu kB\n"
169 "VmallocTotal: %8lu kB\n"
170 "VmallocUsed: %8lu kB\n"
171 "VmallocChunk: %8lu kB\n",
172 K(i.totalram),
173 K(i.freeram),
174 K(i.bufferram),
175 K(get_page_cache_size()-total_swapcache_pages-i.bufferram),
176 K(total_swapcache_pages),
177 K(active),
178 K(inactive),
179 K(i.totalhigh),
180 K(i.freehigh),
181 K(i.totalram-i.totalhigh),
182 K(i.freeram-i.freehigh),
183 K(i.totalswap),
184 K(i.freeswap),
185 K(ps.nr_dirty),
186 K(ps.nr_writeback),
187 K(ps.nr_mapped),
188 K(ps.nr_slab),
189 K(allowed),
190 K(committed),
191 K(ps.nr_page_table_pages),
192 (unsigned long)VMALLOC_TOTAL >> 10,
193 vmi.used >> 10,
194 vmi.largest_chunk >> 10
195 );
196
197 len += hugetlb_report_meminfo(page + len);
198
199 return proc_calc_metrics(page, start, off, count, eof, len);
200#undef K
201}
202
203extern struct seq_operations fragmentation_op;
204static int fragmentation_open(struct inode *inode, struct file *file)
205{
206 (void)inode;
207 return seq_open(file, &fragmentation_op);
208}
209
210static struct file_operations fragmentation_file_operations = {
211 .open = fragmentation_open,
212 .read = seq_read,
213 .llseek = seq_lseek,
214 .release = seq_release,
215};
216
217static int version_read_proc(char *page, char **start, off_t off,
218 int count, int *eof, void *data)
219{
220 int len;
221
222 strcpy(page, linux_banner);
223 len = strlen(page);
224 return proc_calc_metrics(page, start, off, count, eof, len);
225}
226
227extern struct seq_operations cpuinfo_op;
228static int cpuinfo_open(struct inode *inode, struct file *file)
229{
230 return seq_open(file, &cpuinfo_op);
231}
232static struct file_operations proc_cpuinfo_operations = {
233 .open = cpuinfo_open,
234 .read = seq_read,
235 .llseek = seq_lseek,
236 .release = seq_release,
237};
238
239extern struct seq_operations vmstat_op;
240static int vmstat_open(struct inode *inode, struct file *file)
241{
242 return seq_open(file, &vmstat_op);
243}
244static struct file_operations proc_vmstat_file_operations = {
245 .open = vmstat_open,
246 .read = seq_read,
247 .llseek = seq_lseek,
248 .release = seq_release,
249};
250
251#ifdef CONFIG_PROC_HARDWARE
252static int hardware_read_proc(char *page, char **start, off_t off,
253 int count, int *eof, void *data)
254{
255 int len = get_hardware_list(page);
256 return proc_calc_metrics(page, start, off, count, eof, len);
257}
258#endif
259
260#ifdef CONFIG_STRAM_PROC
261static int stram_read_proc(char *page, char **start, off_t off,
262 int count, int *eof, void *data)
263{
264 int len = get_stram_list(page);
265 return proc_calc_metrics(page, start, off, count, eof, len);
266}
267#endif
268
269extern struct seq_operations partitions_op;
270static int partitions_open(struct inode *inode, struct file *file)
271{
272 return seq_open(file, &partitions_op);
273}
274static struct file_operations proc_partitions_operations = {
275 .open = partitions_open,
276 .read = seq_read,
277 .llseek = seq_lseek,
278 .release = seq_release,
279};
280
281extern struct seq_operations diskstats_op;
282static int diskstats_open(struct inode *inode, struct file *file)
283{
284 return seq_open(file, &diskstats_op);
285}
286static struct file_operations proc_diskstats_operations = {
287 .open = diskstats_open,
288 .read = seq_read,
289 .llseek = seq_lseek,
290 .release = seq_release,
291};
292
293#ifdef CONFIG_MODULES
294extern struct seq_operations modules_op;
295static int modules_open(struct inode *inode, struct file *file)
296{
297 return seq_open(file, &modules_op);
298}
299static struct file_operations proc_modules_operations = {
300 .open = modules_open,
301 .read = seq_read,
302 .llseek = seq_lseek,
303 .release = seq_release,
304};
305#endif
306
307extern struct seq_operations slabinfo_op;
308extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *);
309static int slabinfo_open(struct inode *inode, struct file *file)
310{
311 return seq_open(file, &slabinfo_op);
312}
313static struct file_operations proc_slabinfo_operations = {
314 .open = slabinfo_open,
315 .read = seq_read,
316 .write = slabinfo_write,
317 .llseek = seq_lseek,
318 .release = seq_release,
319};
320
321static int show_stat(struct seq_file *p, void *v)
322{
323 int i;
324 unsigned long jif;
325 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
326 u64 sum = 0;
327
328 user = nice = system = idle = iowait =
329 irq = softirq = steal = cputime64_zero;
330 jif = - wall_to_monotonic.tv_sec;
331 if (wall_to_monotonic.tv_nsec)
332 --jif;
333
334 for_each_cpu(i) {
335 int j;
336
337 user = cputime64_add(user, kstat_cpu(i).cpustat.user);
338 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
339 system = cputime64_add(system, kstat_cpu(i).cpustat.system);
340 idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle);
341 iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait);
342 irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
343 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
344 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
345 for (j = 0 ; j < NR_IRQS ; j++)
346 sum += kstat_cpu(i).irqs[j];
347 }
348
349 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu\n",
350 (unsigned long long)cputime64_to_clock_t(user),
351 (unsigned long long)cputime64_to_clock_t(nice),
352 (unsigned long long)cputime64_to_clock_t(system),
353 (unsigned long long)cputime64_to_clock_t(idle),
354 (unsigned long long)cputime64_to_clock_t(iowait),
355 (unsigned long long)cputime64_to_clock_t(irq),
356 (unsigned long long)cputime64_to_clock_t(softirq),
357 (unsigned long long)cputime64_to_clock_t(steal));
358 for_each_online_cpu(i) {
359
360 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
361 user = kstat_cpu(i).cpustat.user;
362 nice = kstat_cpu(i).cpustat.nice;
363 system = kstat_cpu(i).cpustat.system;
364 idle = kstat_cpu(i).cpustat.idle;
365 iowait = kstat_cpu(i).cpustat.iowait;
366 irq = kstat_cpu(i).cpustat.irq;
367 softirq = kstat_cpu(i).cpustat.softirq;
368 steal = kstat_cpu(i).cpustat.steal;
369 seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu\n",
370 i,
371 (unsigned long long)cputime64_to_clock_t(user),
372 (unsigned long long)cputime64_to_clock_t(nice),
373 (unsigned long long)cputime64_to_clock_t(system),
374 (unsigned long long)cputime64_to_clock_t(idle),
375 (unsigned long long)cputime64_to_clock_t(iowait),
376 (unsigned long long)cputime64_to_clock_t(irq),
377 (unsigned long long)cputime64_to_clock_t(softirq),
378 (unsigned long long)cputime64_to_clock_t(steal));
379 }
380 seq_printf(p, "intr %llu", (unsigned long long)sum);
381
382#if !defined(CONFIG_PPC64) && !defined(CONFIG_ALPHA)
383 for (i = 0; i < NR_IRQS; i++)
384 seq_printf(p, " %u", kstat_irqs(i));
385#endif
386
387 seq_printf(p,
388 "\nctxt %llu\n"
389 "btime %lu\n"
390 "processes %lu\n"
391 "procs_running %lu\n"
392 "procs_blocked %lu\n",
393 nr_context_switches(),
394 (unsigned long)jif,
395 total_forks,
396 nr_running(),
397 nr_iowait());
398
399 return 0;
400}
401
402static int stat_open(struct inode *inode, struct file *file)
403{
404 unsigned size = 4096 * (1 + num_possible_cpus() / 32);
405 char *buf;
406 struct seq_file *m;
407 int res;
408
409 /* don't ask for more than the kmalloc() max size, currently 128 KB */
410 if (size > 128 * 1024)
411 size = 128 * 1024;
412 buf = kmalloc(size, GFP_KERNEL);
413 if (!buf)
414 return -ENOMEM;
415
416 res = single_open(file, show_stat, NULL);
417 if (!res) {
418 m = file->private_data;
419 m->buf = buf;
420 m->size = size;
421 } else
422 kfree(buf);
423 return res;
424}
425static struct file_operations proc_stat_operations = {
426 .open = stat_open,
427 .read = seq_read,
428 .llseek = seq_lseek,
429 .release = single_release,
430};
431
432static int devices_read_proc(char *page, char **start, off_t off,
433 int count, int *eof, void *data)
434{
435 int len = get_chrdev_list(page);
436 len += get_blkdev_list(page+len);
437 return proc_calc_metrics(page, start, off, count, eof, len);
438}
439
440/*
441 * /proc/interrupts
442 */
443static void *int_seq_start(struct seq_file *f, loff_t *pos)
444{
445 return (*pos <= NR_IRQS) ? pos : NULL;
446}
447
448static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
449{
450 (*pos)++;
451 if (*pos > NR_IRQS)
452 return NULL;
453 return pos;
454}
455
456static void int_seq_stop(struct seq_file *f, void *v)
457{
458 /* Nothing to do */
459}
460
461
462extern int show_interrupts(struct seq_file *f, void *v); /* In arch code */
463static struct seq_operations int_seq_ops = {
464 .start = int_seq_start,
465 .next = int_seq_next,
466 .stop = int_seq_stop,
467 .show = show_interrupts
468};
469
470static int interrupts_open(struct inode *inode, struct file *filp)
471{
472 return seq_open(filp, &int_seq_ops);
473}
474
475static struct file_operations proc_interrupts_operations = {
476 .open = interrupts_open,
477 .read = seq_read,
478 .llseek = seq_lseek,
479 .release = seq_release,
480};
481
482static int filesystems_read_proc(char *page, char **start, off_t off,
483 int count, int *eof, void *data)
484{
485 int len = get_filesystem_list(page);
486 return proc_calc_metrics(page, start, off, count, eof, len);
487}
488
489static int cmdline_read_proc(char *page, char **start, off_t off,
490 int count, int *eof, void *data)
491{
492 int len;
493
494 len = sprintf(page, "%s\n", saved_command_line);
495 return proc_calc_metrics(page, start, off, count, eof, len);
496}
497
498static int locks_read_proc(char *page, char **start, off_t off,
499 int count, int *eof, void *data)
500{
501 int len = get_locks_status(page, start, off, count);
502
503 if (len < count)
504 *eof = 1;
505 return len;
506}
507
508static int execdomains_read_proc(char *page, char **start, off_t off,
509 int count, int *eof, void *data)
510{
511 int len = get_exec_domain_list(page);
512 return proc_calc_metrics(page, start, off, count, eof, len);
513}
514
515#ifdef CONFIG_MAGIC_SYSRQ
516/*
517 * writing 'C' to /proc/sysrq-trigger is like sysrq-C
518 */
519static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf,
520 size_t count, loff_t *ppos)
521{
522 if (count) {
523 char c;
524
525 if (get_user(c, buf))
526 return -EFAULT;
527 __handle_sysrq(c, NULL, NULL, 0);
528 }
529 return count;
530}
531
532static struct file_operations proc_sysrq_trigger_operations = {
533 .write = write_sysrq_trigger,
534};
535#endif
536
537struct proc_dir_entry *proc_root_kcore;
538
539void create_seq_entry(char *name, mode_t mode, struct file_operations *f)
540{
541 struct proc_dir_entry *entry;
542 entry = create_proc_entry(name, mode, NULL);
543 if (entry)
544 entry->proc_fops = f;
545}
546
547void __init proc_misc_init(void)
548{
549 struct proc_dir_entry *entry;
550 static struct {
551 char *name;
552 int (*read_proc)(char*,char**,off_t,int,int*,void*);
553 } *p, simple_ones[] = {
554 {"loadavg", loadavg_read_proc},
555 {"uptime", uptime_read_proc},
556 {"meminfo", meminfo_read_proc},
557 {"version", version_read_proc},
558#ifdef CONFIG_PROC_HARDWARE
559 {"hardware", hardware_read_proc},
560#endif
561#ifdef CONFIG_STRAM_PROC
562 {"stram", stram_read_proc},
563#endif
564 {"devices", devices_read_proc},
565 {"filesystems", filesystems_read_proc},
566 {"cmdline", cmdline_read_proc},
567 {"locks", locks_read_proc},
568 {"execdomains", execdomains_read_proc},
569 {NULL,}
570 };
571 for (p = simple_ones; p->name; p++)
572 create_proc_read_entry(p->name, 0, NULL, p->read_proc, NULL);
573
574 proc_symlink("mounts", NULL, "self/mounts");
575
576 /* And now for trickier ones */
577 entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
578 if (entry)
579 entry->proc_fops = &proc_kmsg_operations;
580 create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
581 create_seq_entry("partitions", 0, &proc_partitions_operations);
582 create_seq_entry("stat", 0, &proc_stat_operations);
583 create_seq_entry("interrupts", 0, &proc_interrupts_operations);
584 create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
585 create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
586 create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
587 create_seq_entry("diskstats", 0, &proc_diskstats_operations);
588#ifdef CONFIG_MODULES
589 create_seq_entry("modules", 0, &proc_modules_operations);
590#endif
591#ifdef CONFIG_SCHEDSTATS
592 create_seq_entry("schedstat", 0, &proc_schedstat_operations);
593#endif
594#ifdef CONFIG_PROC_KCORE
595 proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL);
596 if (proc_root_kcore) {
597 proc_root_kcore->proc_fops = &proc_kcore_operations;
598 proc_root_kcore->size =
599 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
600 }
601#endif
602#ifdef CONFIG_MAGIC_SYSRQ
603 entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
604 if (entry)
605 entry->proc_fops = &proc_sysrq_trigger_operations;
606#endif
607#ifdef CONFIG_PPC32
608 {
609 extern struct file_operations ppc_htab_operations;
610 entry = create_proc_entry("ppc_htab", S_IRUGO|S_IWUSR, NULL);
611 if (entry)
612 entry->proc_fops = &ppc_htab_operations;
613 }
614#endif
615}
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
new file mode 100644
index 000000000000..15c4455b09eb
--- /dev/null
+++ b/fs/proc/proc_tty.c
@@ -0,0 +1,242 @@
1/*
2 * proc_tty.c -- handles /proc/tty
3 *
4 * Copyright 1997, Theodore Ts'o
5 */
6
7#include <asm/uaccess.h>
8
9#include <linux/init.h>
10#include <linux/errno.h>
11#include <linux/time.h>
12#include <linux/proc_fs.h>
13#include <linux/stat.h>
14#include <linux/tty.h>
15#include <linux/seq_file.h>
16#include <linux/bitops.h>
17
18static int tty_ldiscs_read_proc(char *page, char **start, off_t off,
19 int count, int *eof, void *data);
20
21/*
22 * The /proc/tty directory inodes...
23 */
24static struct proc_dir_entry *proc_tty_ldisc, *proc_tty_driver;
25
26/*
27 * This is the handler for /proc/tty/drivers
28 */
29static void show_tty_range(struct seq_file *m, struct tty_driver *p,
30 dev_t from, int num)
31{
32 seq_printf(m, "%-20s ", p->driver_name ? p->driver_name : "unknown");
33 seq_printf(m, "/dev/%-8s ", p->name);
34 if (p->num > 1) {
35 seq_printf(m, "%3d %d-%d ", MAJOR(from), MINOR(from),
36 MINOR(from) + num - 1);
37 } else {
38 seq_printf(m, "%3d %7d ", MAJOR(from), MINOR(from));
39 }
40 switch (p->type) {
41 case TTY_DRIVER_TYPE_SYSTEM:
42 seq_printf(m, "system");
43 if (p->subtype == SYSTEM_TYPE_TTY)
44 seq_printf(m, ":/dev/tty");
45 else if (p->subtype == SYSTEM_TYPE_SYSCONS)
46 seq_printf(m, ":console");
47 else if (p->subtype == SYSTEM_TYPE_CONSOLE)
48 seq_printf(m, ":vtmaster");
49 break;
50 case TTY_DRIVER_TYPE_CONSOLE:
51 seq_printf(m, "console");
52 break;
53 case TTY_DRIVER_TYPE_SERIAL:
54 seq_printf(m, "serial");
55 break;
56 case TTY_DRIVER_TYPE_PTY:
57 if (p->subtype == PTY_TYPE_MASTER)
58 seq_printf(m, "pty:master");
59 else if (p->subtype == PTY_TYPE_SLAVE)
60 seq_printf(m, "pty:slave");
61 else
62 seq_printf(m, "pty");
63 break;
64 default:
65 seq_printf(m, "type:%d.%d", p->type, p->subtype);
66 }
67 seq_putc(m, '\n');
68}
69
70static int show_tty_driver(struct seq_file *m, void *v)
71{
72 struct tty_driver *p = v;
73 dev_t from = MKDEV(p->major, p->minor_start);
74 dev_t to = from + p->num;
75
76 if (&p->tty_drivers == tty_drivers.next) {
77 /* pseudo-drivers first */
78 seq_printf(m, "%-20s /dev/%-8s ", "/dev/tty", "tty");
79 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 0);
80 seq_printf(m, "system:/dev/tty\n");
81 seq_printf(m, "%-20s /dev/%-8s ", "/dev/console", "console");
82 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 1);
83 seq_printf(m, "system:console\n");
84#ifdef CONFIG_UNIX98_PTYS
85 seq_printf(m, "%-20s /dev/%-8s ", "/dev/ptmx", "ptmx");
86 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 2);
87 seq_printf(m, "system\n");
88#endif
89#ifdef CONFIG_VT
90 seq_printf(m, "%-20s /dev/%-8s ", "/dev/vc/0", "vc/0");
91 seq_printf(m, "%3d %7d ", TTY_MAJOR, 0);
92 seq_printf(m, "system:vtmaster\n");
93#endif
94 }
95
96 while (MAJOR(from) < MAJOR(to)) {
97 dev_t next = MKDEV(MAJOR(from)+1, 0);
98 show_tty_range(m, p, from, next - from);
99 from = next;
100 }
101 if (from != to)
102 show_tty_range(m, p, from, to - from);
103 return 0;
104}
105
106/* iterator */
107static void *t_start(struct seq_file *m, loff_t *pos)
108{
109 struct list_head *p;
110 loff_t l = *pos;
111 list_for_each(p, &tty_drivers)
112 if (!l--)
113 return list_entry(p, struct tty_driver, tty_drivers);
114 return NULL;
115}
116
117static void *t_next(struct seq_file *m, void *v, loff_t *pos)
118{
119 struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next;
120 (*pos)++;
121 return p==&tty_drivers ? NULL :
122 list_entry(p, struct tty_driver, tty_drivers);
123}
124
125static void t_stop(struct seq_file *m, void *v)
126{
127}
128
129static struct seq_operations tty_drivers_op = {
130 .start = t_start,
131 .next = t_next,
132 .stop = t_stop,
133 .show = show_tty_driver
134};
135
136static int tty_drivers_open(struct inode *inode, struct file *file)
137{
138 return seq_open(file, &tty_drivers_op);
139}
140
141static struct file_operations proc_tty_drivers_operations = {
142 .open = tty_drivers_open,
143 .read = seq_read,
144 .llseek = seq_lseek,
145 .release = seq_release,
146};
147
148/*
149 * This is the handler for /proc/tty/ldiscs
150 */
151static int tty_ldiscs_read_proc(char *page, char **start, off_t off,
152 int count, int *eof, void *data)
153{
154 int i;
155 int len = 0;
156 off_t begin = 0;
157 struct tty_ldisc *ld;
158
159 for (i=0; i < NR_LDISCS; i++) {
160 ld = tty_ldisc_get(i);
161 if (ld == NULL)
162 continue;
163 len += sprintf(page+len, "%-10s %2d\n",
164 ld->name ? ld->name : "???", i);
165 tty_ldisc_put(i);
166 if (len+begin > off+count)
167 break;
168 if (len+begin < off) {
169 begin += len;
170 len = 0;
171 }
172 }
173 if (i >= NR_LDISCS)
174 *eof = 1;
175 if (off >= len+begin)
176 return 0;
177 *start = page + (off-begin);
178 return ((count < begin+len-off) ? count : begin+len-off);
179}
180
181/*
182 * This function is called by tty_register_driver() to handle
183 * registering the driver's /proc handler into /proc/tty/driver/<foo>
184 */
185void proc_tty_register_driver(struct tty_driver *driver)
186{
187 struct proc_dir_entry *ent;
188
189 if ((!driver->read_proc && !driver->write_proc) ||
190 !driver->driver_name ||
191 driver->proc_entry)
192 return;
193
194 ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver);
195 if (!ent)
196 return;
197 ent->read_proc = driver->read_proc;
198 ent->write_proc = driver->write_proc;
199 ent->owner = driver->owner;
200 ent->data = driver;
201
202 driver->proc_entry = ent;
203}
204
205/*
206 * This function is called by tty_unregister_driver()
207 */
208void proc_tty_unregister_driver(struct tty_driver *driver)
209{
210 struct proc_dir_entry *ent;
211
212 ent = driver->proc_entry;
213 if (!ent)
214 return;
215
216 remove_proc_entry(driver->driver_name, proc_tty_driver);
217
218 driver->proc_entry = NULL;
219}
220
221/*
222 * Called by proc_root_init() to initialize the /proc/tty subtree
223 */
224void __init proc_tty_init(void)
225{
226 struct proc_dir_entry *entry;
227 if (!proc_mkdir("tty", NULL))
228 return;
229 proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL);
230 /*
231 * /proc/tty/driver/serial reveals the exact character counts for
232 * serial links which is just too easy to abuse for inferring
233 * password lengths and inter-keystroke timings during password
234 * entry.
235 */
236 proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR | S_IXUSR, NULL);
237
238 create_proc_read_entry("tty/ldiscs", 0, NULL, tty_ldiscs_read_proc, NULL);
239 entry = create_proc_entry("tty/drivers", 0, NULL);
240 if (entry)
241 entry->proc_fops = &proc_tty_drivers_operations;
242}
diff --git a/fs/proc/root.c b/fs/proc/root.c
new file mode 100644
index 000000000000..aef148f099a2
--- /dev/null
+++ b/fs/proc/root.c
@@ -0,0 +1,161 @@
1/*
2 * linux/fs/proc/root.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 *
6 * proc root directory handling functions
7 */
8
9#include <asm/uaccess.h>
10
11#include <linux/errno.h>
12#include <linux/time.h>
13#include <linux/proc_fs.h>
14#include <linux/stat.h>
15#include <linux/config.h>
16#include <linux/init.h>
17#include <linux/module.h>
18#include <linux/bitops.h>
19#include <linux/smp_lock.h>
20
21struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
22
23#ifdef CONFIG_SYSCTL
24struct proc_dir_entry *proc_sys_root;
25#endif
26
27static struct super_block *proc_get_sb(struct file_system_type *fs_type,
28 int flags, const char *dev_name, void *data)
29{
30 return get_sb_single(fs_type, flags, data, proc_fill_super);
31}
32
33static struct file_system_type proc_fs_type = {
34 .name = "proc",
35 .get_sb = proc_get_sb,
36 .kill_sb = kill_anon_super,
37};
38
39extern int __init proc_init_inodecache(void);
40void __init proc_root_init(void)
41{
42 int err = proc_init_inodecache();
43 if (err)
44 return;
45 err = register_filesystem(&proc_fs_type);
46 if (err)
47 return;
48 proc_mnt = kern_mount(&proc_fs_type);
49 err = PTR_ERR(proc_mnt);
50 if (IS_ERR(proc_mnt)) {
51 unregister_filesystem(&proc_fs_type);
52 return;
53 }
54 proc_misc_init();
55 proc_net = proc_mkdir("net", NULL);
56 proc_net_stat = proc_mkdir("net/stat", NULL);
57
58#ifdef CONFIG_SYSVIPC
59 proc_mkdir("sysvipc", NULL);
60#endif
61#ifdef CONFIG_SYSCTL
62 proc_sys_root = proc_mkdir("sys", NULL);
63#endif
64#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
65 proc_mkdir("sys/fs", NULL);
66 proc_mkdir("sys/fs/binfmt_misc", NULL);
67#endif
68 proc_root_fs = proc_mkdir("fs", NULL);
69 proc_root_driver = proc_mkdir("driver", NULL);
70 proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
71#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
72 /* just give it a mountpoint */
73 proc_mkdir("openprom", NULL);
74#endif
75 proc_tty_init();
76#ifdef CONFIG_PROC_DEVICETREE
77 proc_device_tree_init();
78#endif
79 proc_bus = proc_mkdir("bus", NULL);
80}
81
82static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
83{
84 /*
85 * nr_threads is actually protected by the tasklist_lock;
86 * however, it's conventional to do reads, especially for
87 * reporting, without any locking whatsoever.
88 */
89 if (dir->i_ino == PROC_ROOT_INO) /* check for safety... */
90 dir->i_nlink = proc_root.nlink + nr_threads;
91
92 if (!proc_lookup(dir, dentry, nd)) {
93 return NULL;
94 }
95
96 return proc_pid_lookup(dir, dentry, nd);
97}
98
99static int proc_root_readdir(struct file * filp,
100 void * dirent, filldir_t filldir)
101{
102 unsigned int nr = filp->f_pos;
103 int ret;
104
105 lock_kernel();
106
107 if (nr < FIRST_PROCESS_ENTRY) {
108 int error = proc_readdir(filp, dirent, filldir);
109 if (error <= 0) {
110 unlock_kernel();
111 return error;
112 }
113 filp->f_pos = FIRST_PROCESS_ENTRY;
114 }
115 unlock_kernel();
116
117 ret = proc_pid_readdir(filp, dirent, filldir);
118 return ret;
119}
120
121/*
122 * The root /proc directory is special, as it has the
123 * <pid> directories. Thus we don't use the generic
124 * directory handling functions for that..
125 */
126static struct file_operations proc_root_operations = {
127 .read = generic_read_dir,
128 .readdir = proc_root_readdir,
129};
130
131/*
132 * proc root can do almost nothing..
133 */
134static struct inode_operations proc_root_inode_operations = {
135 .lookup = proc_root_lookup,
136};
137
138/*
139 * This is the root "inode" in the /proc tree..
140 */
141struct proc_dir_entry proc_root = {
142 .low_ino = PROC_ROOT_INO,
143 .namelen = 5,
144 .name = "/proc",
145 .mode = S_IFDIR | S_IRUGO | S_IXUGO,
146 .nlink = 2,
147 .proc_iops = &proc_root_inode_operations,
148 .proc_fops = &proc_root_operations,
149 .parent = &proc_root,
150};
151
152EXPORT_SYMBOL(proc_symlink);
153EXPORT_SYMBOL(proc_mkdir);
154EXPORT_SYMBOL(create_proc_entry);
155EXPORT_SYMBOL(remove_proc_entry);
156EXPORT_SYMBOL(proc_root);
157EXPORT_SYMBOL(proc_root_fs);
158EXPORT_SYMBOL(proc_net);
159EXPORT_SYMBOL(proc_net_stat);
160EXPORT_SYMBOL(proc_bus);
161EXPORT_SYMBOL(proc_root_driver);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
new file mode 100644
index 000000000000..28b4a0253a92
--- /dev/null
+++ b/fs/proc/task_mmu.c
@@ -0,0 +1,235 @@
1#include <linux/mm.h>
2#include <linux/hugetlb.h>
3#include <linux/mount.h>
4#include <linux/seq_file.h>
5#include <asm/elf.h>
6#include <asm/uaccess.h>
7#include "internal.h"
8
9char *task_mem(struct mm_struct *mm, char *buffer)
10{
11 unsigned long data, text, lib;
12
13 data = mm->total_vm - mm->shared_vm - mm->stack_vm;
14 text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
15 lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
16 buffer += sprintf(buffer,
17 "VmSize:\t%8lu kB\n"
18 "VmLck:\t%8lu kB\n"
19 "VmRSS:\t%8lu kB\n"
20 "VmData:\t%8lu kB\n"
21 "VmStk:\t%8lu kB\n"
22 "VmExe:\t%8lu kB\n"
23 "VmLib:\t%8lu kB\n"
24 "VmPTE:\t%8lu kB\n",
25 (mm->total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
26 mm->locked_vm << (PAGE_SHIFT-10),
27 get_mm_counter(mm, rss) << (PAGE_SHIFT-10),
28 data << (PAGE_SHIFT-10),
29 mm->stack_vm << (PAGE_SHIFT-10), text, lib,
30 (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
31 return buffer;
32}
33
34unsigned long task_vsize(struct mm_struct *mm)
35{
36 return PAGE_SIZE * mm->total_vm;
37}
38
39int task_statm(struct mm_struct *mm, int *shared, int *text,
40 int *data, int *resident)
41{
42 int rss = get_mm_counter(mm, rss);
43
44 *shared = rss - get_mm_counter(mm, anon_rss);
45 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
46 >> PAGE_SHIFT;
47 *data = mm->total_vm - mm->shared_vm;
48 *resident = rss;
49 return mm->total_vm;
50}
51
52int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
53{
54 struct vm_area_struct * vma;
55 int result = -ENOENT;
56 struct task_struct *task = proc_task(inode);
57 struct mm_struct * mm = get_task_mm(task);
58
59 if (!mm)
60 goto out;
61 down_read(&mm->mmap_sem);
62
63 vma = mm->mmap;
64 while (vma) {
65 if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
66 break;
67 vma = vma->vm_next;
68 }
69
70 if (vma) {
71 *mnt = mntget(vma->vm_file->f_vfsmnt);
72 *dentry = dget(vma->vm_file->f_dentry);
73 result = 0;
74 }
75
76 up_read(&mm->mmap_sem);
77 mmput(mm);
78out:
79 return result;
80}
81
82static void pad_len_spaces(struct seq_file *m, int len)
83{
84 len = 25 + sizeof(void*) * 6 - len;
85 if (len < 1)
86 len = 1;
87 seq_printf(m, "%*c", len, ' ');
88}
89
90static int show_map(struct seq_file *m, void *v)
91{
92 struct task_struct *task = m->private;
93 struct vm_area_struct *map = v;
94 struct mm_struct *mm = map->vm_mm;
95 struct file *file = map->vm_file;
96 int flags = map->vm_flags;
97 unsigned long ino = 0;
98 dev_t dev = 0;
99 int len;
100
101 if (file) {
102 struct inode *inode = map->vm_file->f_dentry->d_inode;
103 dev = inode->i_sb->s_dev;
104 ino = inode->i_ino;
105 }
106
107 seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
108 map->vm_start,
109 map->vm_end,
110 flags & VM_READ ? 'r' : '-',
111 flags & VM_WRITE ? 'w' : '-',
112 flags & VM_EXEC ? 'x' : '-',
113 flags & VM_MAYSHARE ? 's' : 'p',
114 map->vm_pgoff << PAGE_SHIFT,
115 MAJOR(dev), MINOR(dev), ino, &len);
116
117 /*
118 * Print the dentry name for named mappings, and a
119 * special [heap] marker for the heap:
120 */
121 if (map->vm_file) {
122 pad_len_spaces(m, len);
123 seq_path(m, file->f_vfsmnt, file->f_dentry, "");
124 } else {
125 if (mm) {
126 if (map->vm_start <= mm->start_brk &&
127 map->vm_end >= mm->brk) {
128 pad_len_spaces(m, len);
129 seq_puts(m, "[heap]");
130 } else {
131 if (map->vm_start <= mm->start_stack &&
132 map->vm_end >= mm->start_stack) {
133
134 pad_len_spaces(m, len);
135 seq_puts(m, "[stack]");
136 }
137 }
138 } else {
139 pad_len_spaces(m, len);
140 seq_puts(m, "[vdso]");
141 }
142 }
143 seq_putc(m, '\n');
144 if (m->count < m->size) /* map is copied successfully */
145 m->version = (map != get_gate_vma(task))? map->vm_start: 0;
146 return 0;
147}
148
149static void *m_start(struct seq_file *m, loff_t *pos)
150{
151 struct task_struct *task = m->private;
152 unsigned long last_addr = m->version;
153 struct mm_struct *mm;
154 struct vm_area_struct *map, *tail_map;
155 loff_t l = *pos;
156
157 /*
158 * We remember last_addr rather than next_addr to hit with
159 * mmap_cache most of the time. We have zero last_addr at
160 * the begining and also after lseek. We will have -1 last_addr
161 * after the end of the maps.
162 */
163
164 if (last_addr == -1UL)
165 return NULL;
166
167 mm = get_task_mm(task);
168 if (!mm)
169 return NULL;
170
171 tail_map = get_gate_vma(task);
172 down_read(&mm->mmap_sem);
173
174 /* Start with last addr hint */
175 if (last_addr && (map = find_vma(mm, last_addr))) {
176 map = map->vm_next;
177 goto out;
178 }
179
180 /*
181 * Check the map index is within the range and do
182 * sequential scan until m_index.
183 */
184 map = NULL;
185 if ((unsigned long)l < mm->map_count) {
186 map = mm->mmap;
187 while (l-- && map)
188 map = map->vm_next;
189 goto out;
190 }
191
192 if (l != mm->map_count)
193 tail_map = NULL; /* After gate map */
194
195out:
196 if (map)
197 return map;
198
199 /* End of maps has reached */
200 m->version = (tail_map != NULL)? 0: -1UL;
201 up_read(&mm->mmap_sem);
202 mmput(mm);
203 return tail_map;
204}
205
206static void m_stop(struct seq_file *m, void *v)
207{
208 struct task_struct *task = m->private;
209 struct vm_area_struct *map = v;
210 if (map && map != get_gate_vma(task)) {
211 struct mm_struct *mm = map->vm_mm;
212 up_read(&mm->mmap_sem);
213 mmput(mm);
214 }
215}
216
217static void *m_next(struct seq_file *m, void *v, loff_t *pos)
218{
219 struct task_struct *task = m->private;
220 struct vm_area_struct *map = v;
221 struct vm_area_struct *tail_map = get_gate_vma(task);
222
223 (*pos)++;
224 if (map && (map != tail_map) && map->vm_next)
225 return map->vm_next;
226 m_stop(m, v);
227 return (map != tail_map)? tail_map: NULL;
228}
229
230struct seq_operations proc_pid_maps_op = {
231 .start = m_start,
232 .next = m_next,
233 .stop = m_stop,
234 .show = show_map
235};
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
new file mode 100644
index 000000000000..8f68827ed10e
--- /dev/null
+++ b/fs/proc/task_nommu.c
@@ -0,0 +1,164 @@
1
2#include <linux/mm.h>
3#include <linux/file.h>
4#include <linux/mount.h>
5#include <linux/seq_file.h>
6#include "internal.h"
7
8/*
9 * Logic: we've got two memory sums for each process, "shared", and
10 * "non-shared". Shared memory may get counted more then once, for
11 * each process that owns it. Non-shared memory is counted
12 * accurately.
13 */
14char *task_mem(struct mm_struct *mm, char *buffer)
15{
16 struct vm_list_struct *vml;
17 unsigned long bytes = 0, sbytes = 0, slack = 0;
18
19 down_read(&mm->mmap_sem);
20 for (vml = mm->context.vmlist; vml; vml = vml->next) {
21 if (!vml->vma)
22 continue;
23
24 bytes += kobjsize(vml);
25 if (atomic_read(&mm->mm_count) > 1 ||
26 atomic_read(&vml->vma->vm_usage) > 1
27 ) {
28 sbytes += kobjsize((void *) vml->vma->vm_start);
29 sbytes += kobjsize(vml->vma);
30 } else {
31 bytes += kobjsize((void *) vml->vma->vm_start);
32 bytes += kobjsize(vml->vma);
33 slack += kobjsize((void *) vml->vma->vm_start) -
34 (vml->vma->vm_end - vml->vma->vm_start);
35 }
36 }
37
38 if (atomic_read(&mm->mm_count) > 1)
39 sbytes += kobjsize(mm);
40 else
41 bytes += kobjsize(mm);
42
43 if (current->fs && atomic_read(&current->fs->count) > 1)
44 sbytes += kobjsize(current->fs);
45 else
46 bytes += kobjsize(current->fs);
47
48 if (current->files && atomic_read(&current->files->count) > 1)
49 sbytes += kobjsize(current->files);
50 else
51 bytes += kobjsize(current->files);
52
53 if (current->sighand && atomic_read(&current->sighand->count) > 1)
54 sbytes += kobjsize(current->sighand);
55 else
56 bytes += kobjsize(current->sighand);
57
58 bytes += kobjsize(current); /* includes kernel stack */
59
60 buffer += sprintf(buffer,
61 "Mem:\t%8lu bytes\n"
62 "Slack:\t%8lu bytes\n"
63 "Shared:\t%8lu bytes\n",
64 bytes, slack, sbytes);
65
66 up_read(&mm->mmap_sem);
67 return buffer;
68}
69
70unsigned long task_vsize(struct mm_struct *mm)
71{
72 struct vm_list_struct *tbp;
73 unsigned long vsize = 0;
74
75 down_read(&mm->mmap_sem);
76 for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) {
77 if (tbp->vma)
78 vsize += kobjsize((void *) tbp->vma->vm_start);
79 }
80 up_read(&mm->mmap_sem);
81 return vsize;
82}
83
84int task_statm(struct mm_struct *mm, int *shared, int *text,
85 int *data, int *resident)
86{
87 struct vm_list_struct *tbp;
88 int size = kobjsize(mm);
89
90 down_read(&mm->mmap_sem);
91 for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) {
92 size += kobjsize(tbp);
93 if (tbp->vma) {
94 size += kobjsize(tbp->vma);
95 size += kobjsize((void *) tbp->vma->vm_start);
96 }
97 }
98
99 size += (*text = mm->end_code - mm->start_code);
100 size += (*data = mm->start_stack - mm->start_data);
101 up_read(&mm->mmap_sem);
102 *resident = size;
103 return size;
104}
105
106int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
107{
108 struct vm_list_struct *vml;
109 struct vm_area_struct *vma;
110 struct task_struct *task = proc_task(inode);
111 struct mm_struct *mm = get_task_mm(task);
112 int result = -ENOENT;
113
114 if (!mm)
115 goto out;
116 down_read(&mm->mmap_sem);
117
118 vml = mm->context.vmlist;
119 vma = NULL;
120 while (vml) {
121 if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) {
122 vma = vml->vma;
123 break;
124 }
125 vml = vml->next;
126 }
127
128 if (vma) {
129 *mnt = mntget(vma->vm_file->f_vfsmnt);
130 *dentry = dget(vma->vm_file->f_dentry);
131 result = 0;
132 }
133
134 up_read(&mm->mmap_sem);
135 mmput(mm);
136out:
137 return result;
138}
139
140/*
141 * Albert D. Cahalan suggested to fake entries for the traditional
142 * sections here. This might be worth investigating.
143 */
144static int show_map(struct seq_file *m, void *v)
145{
146 return 0;
147}
148static void *m_start(struct seq_file *m, loff_t *pos)
149{
150 return NULL;
151}
152static void m_stop(struct seq_file *m, void *v)
153{
154}
155static void *m_next(struct seq_file *m, void *v, loff_t *pos)
156{
157 return NULL;
158}
159struct seq_operations proc_pid_maps_op = {
160 .start = m_start,
161 .next = m_next,
162 .stop = m_stop,
163 .show = show_map
164};