aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/Kconfig10
-rw-r--r--fs/proc/Makefile2
-rw-r--r--fs/proc/array.c39
-rw-r--r--fs/proc/base.c600
-rw-r--r--fs/proc/consoles.c114
-rw-r--r--fs/proc/devices.c4
-rw-r--r--fs/proc/generic.c30
-rw-r--r--fs/proc/inode.c34
-rw-r--r--fs/proc/internal.h32
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/meminfo.c14
-rw-r--r--fs/proc/namespaces.c201
-rw-r--r--fs/proc/page.c16
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/proc/proc_sysctl.c35
-rw-r--r--fs/proc/proc_tty.c26
-rw-r--r--fs/proc/root.c58
-rw-r--r--fs/proc/softirqs.c8
-rw-r--r--fs/proc/stat.c22
-rw-r--r--fs/proc/task_mmu.c393
-rw-r--r--fs/proc/task_nommu.c13
-rw-r--r--fs/proc/vmcore.c54
22 files changed, 1240 insertions, 469 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 50f8f0600f06..15af6222f8a4 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -1,5 +1,5 @@
1config PROC_FS 1config PROC_FS
2 bool "/proc file system support" if EMBEDDED 2 bool "/proc file system support" if EXPERT
3 default y 3 default y
4 help 4 help
5 This is a virtual file system providing information about the status 5 This is a virtual file system providing information about the status
@@ -33,14 +33,14 @@ config PROC_KCORE
33 depends on PROC_FS && MMU 33 depends on PROC_FS && MMU
34 34
35config PROC_VMCORE 35config PROC_VMCORE
36 bool "/proc/vmcore support (EXPERIMENTAL)" 36 bool "/proc/vmcore support"
37 depends on PROC_FS && CRASH_DUMP 37 depends on PROC_FS && CRASH_DUMP
38 default y 38 default y
39 help 39 help
40 Exports the dump image of crashed kernel in ELF format. 40 Exports the dump image of crashed kernel in ELF format.
41 41
42config PROC_SYSCTL 42config PROC_SYSCTL
43 bool "Sysctl support (/proc/sys)" if EMBEDDED 43 bool "Sysctl support (/proc/sys)" if EXPERT
44 depends on PROC_FS 44 depends on PROC_FS
45 select SYSCTL 45 select SYSCTL
46 default y 46 default y
@@ -61,7 +61,7 @@ config PROC_SYSCTL
61config PROC_PAGE_MONITOR 61config PROC_PAGE_MONITOR
62 default y 62 default y
63 depends on PROC_FS && MMU 63 depends on PROC_FS && MMU
64 bool "Enable /proc page monitoring" if EMBEDDED 64 bool "Enable /proc page monitoring" if EXPERT
65 help 65 help
66 Various /proc files exist to monitor process memory utilization: 66 Various /proc files exist to monitor process memory utilization:
67 /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, 67 /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap,
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 2758e2afc518..c1c729335924 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -10,6 +10,7 @@ proc-$(CONFIG_MMU) := mmu.o task_mmu.o
10proc-y += inode.o root.o base.o generic.o array.o \ 10proc-y += inode.o root.o base.o generic.o array.o \
11 proc_tty.o 11 proc_tty.o
12proc-y += cmdline.o 12proc-y += cmdline.o
13proc-y += consoles.o
13proc-y += cpuinfo.o 14proc-y += cpuinfo.o
14proc-y += devices.o 15proc-y += devices.o
15proc-y += interrupts.o 16proc-y += interrupts.o
@@ -19,6 +20,7 @@ proc-y += stat.o
19proc-y += uptime.o 20proc-y += uptime.o
20proc-y += version.o 21proc-y += version.o
21proc-y += softirqs.o 22proc-y += softirqs.o
23proc-y += namespaces.o
22proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o 24proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
23proc-$(CONFIG_NET) += proc_net.o 25proc-$(CONFIG_NET) += proc_net.o
24proc-$(CONFIG_PROC_KCORE) += kcore.o 26proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index fff6572676ae..9b45ee84fbcc 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -95,7 +95,7 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
95 95
96 get_task_comm(tcomm, p); 96 get_task_comm(tcomm, p);
97 97
98 seq_printf(m, "Name:\t"); 98 seq_puts(m, "Name:\t");
99 end = m->buf + m->size; 99 end = m->buf + m->size;
100 buf = m->buf + m->count; 100 buf = m->buf + m->count;
101 name = tcomm; 101 name = tcomm;
@@ -122,7 +122,7 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
122 buf++; 122 buf++;
123 } 123 }
124 m->count = buf - m->buf; 124 m->count = buf - m->buf;
125 seq_printf(m, "\n"); 125 seq_putc(m, '\n');
126} 126}
127 127
128/* 128/*
@@ -131,7 +131,7 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
131 * you can test for combinations of others with 131 * you can test for combinations of others with
132 * simple bit tests. 132 * simple bit tests.
133 */ 133 */
134static const char *task_state_array[] = { 134static const char * const task_state_array[] = {
135 "R (running)", /* 0 */ 135 "R (running)", /* 0 */
136 "S (sleeping)", /* 1 */ 136 "S (sleeping)", /* 1 */
137 "D (disk sleep)", /* 2 */ 137 "D (disk sleep)", /* 2 */
@@ -147,7 +147,7 @@ static const char *task_state_array[] = {
147static inline const char *get_task_state(struct task_struct *tsk) 147static inline const char *get_task_state(struct task_struct *tsk)
148{ 148{
149 unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state; 149 unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
150 const char **p = &task_state_array[0]; 150 const char * const *p = &task_state_array[0];
151 151
152 BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array)); 152 BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
153 153
@@ -208,7 +208,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
208 seq_printf(m, "%d ", GROUP_AT(group_info, g)); 208 seq_printf(m, "%d ", GROUP_AT(group_info, g));
209 put_cred(cred); 209 put_cred(cred);
210 210
211 seq_printf(m, "\n"); 211 seq_putc(m, '\n');
212} 212}
213 213
214static void render_sigset_t(struct seq_file *m, const char *header, 214static void render_sigset_t(struct seq_file *m, const char *header,
@@ -216,7 +216,7 @@ static void render_sigset_t(struct seq_file *m, const char *header,
216{ 216{
217 int i; 217 int i;
218 218
219 seq_printf(m, "%s", header); 219 seq_puts(m, header);
220 220
221 i = _NSIG; 221 i = _NSIG;
222 do { 222 do {
@@ -230,7 +230,7 @@ static void render_sigset_t(struct seq_file *m, const char *header,
230 seq_printf(m, "%x", x); 230 seq_printf(m, "%x", x);
231 } while (i >= 4); 231 } while (i >= 4);
232 232
233 seq_printf(m, "\n"); 233 seq_putc(m, '\n');
234} 234}
235 235
236static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, 236static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
@@ -291,12 +291,12 @@ static void render_cap_t(struct seq_file *m, const char *header,
291{ 291{
292 unsigned __capi; 292 unsigned __capi;
293 293
294 seq_printf(m, "%s", header); 294 seq_puts(m, header);
295 CAP_FOR_EACH_U32(__capi) { 295 CAP_FOR_EACH_U32(__capi) {
296 seq_printf(m, "%08x", 296 seq_printf(m, "%08x",
297 a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]); 297 a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
298 } 298 }
299 seq_printf(m, "\n"); 299 seq_putc(m, '\n');
300} 300}
301 301
302static inline void task_cap(struct seq_file *m, struct task_struct *p) 302static inline void task_cap(struct seq_file *m, struct task_struct *p)
@@ -329,12 +329,12 @@ static inline void task_context_switch_counts(struct seq_file *m,
329 329
330static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) 330static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
331{ 331{
332 seq_printf(m, "Cpus_allowed:\t"); 332 seq_puts(m, "Cpus_allowed:\t");
333 seq_cpumask(m, &task->cpus_allowed); 333 seq_cpumask(m, &task->cpus_allowed);
334 seq_printf(m, "\n"); 334 seq_putc(m, '\n');
335 seq_printf(m, "Cpus_allowed_list:\t"); 335 seq_puts(m, "Cpus_allowed_list:\t");
336 seq_cpumask_list(m, &task->cpus_allowed); 336 seq_cpumask_list(m, &task->cpus_allowed);
337 seq_printf(m, "\n"); 337 seq_putc(m, '\n');
338} 338}
339 339
340int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, 340int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
@@ -353,9 +353,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
353 task_cap(m, task); 353 task_cap(m, task);
354 task_cpus_allowed(m, task); 354 task_cpus_allowed(m, task);
355 cpuset_task_status_allowed(m, task); 355 cpuset_task_status_allowed(m, task);
356#if defined(CONFIG_S390)
357 task_show_regs(m, task);
358#endif
359 task_context_switch_counts(m, task); 356 task_context_switch_counts(m, task);
360 return 0; 357 return 0;
361} 358}
@@ -492,8 +489,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
492 vsize, 489 vsize,
493 mm ? get_mm_rss(mm) : 0, 490 mm ? get_mm_rss(mm) : 0,
494 rsslim, 491 rsslim,
495 mm ? mm->start_code : 0, 492 mm ? (permitted ? mm->start_code : 1) : 0,
496 mm ? mm->end_code : 0, 493 mm ? (permitted ? mm->end_code : 1) : 0,
497 (permitted && mm) ? mm->start_stack : 0, 494 (permitted && mm) ? mm->start_stack : 0,
498 esp, 495 esp,
499 eip, 496 eip,
@@ -535,15 +532,15 @@ int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
535int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, 532int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
536 struct pid *pid, struct task_struct *task) 533 struct pid *pid, struct task_struct *task)
537{ 534{
538 int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0; 535 unsigned long size = 0, resident = 0, shared = 0, text = 0, data = 0;
539 struct mm_struct *mm = get_task_mm(task); 536 struct mm_struct *mm = get_task_mm(task);
540 537
541 if (mm) { 538 if (mm) {
542 size = task_statm(mm, &shared, &text, &data, &resident); 539 size = task_statm(mm, &shared, &text, &data, &resident);
543 mmput(mm); 540 mmput(mm);
544 } 541 }
545 seq_printf(m, "%d %d %d %d %d %d %d\n", 542 seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
546 size, resident, shared, text, lib, data, 0); 543 size, resident, shared, text, data);
547 544
548 return 0; 545 return 0;
549} 546}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8e4addaa5424..fc5bc2767692 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -83,6 +83,9 @@
83#include <linux/pid_namespace.h> 83#include <linux/pid_namespace.h>
84#include <linux/fs_struct.h> 84#include <linux/fs_struct.h>
85#include <linux/slab.h> 85#include <linux/slab.h>
86#ifdef CONFIG_HARDWALL
87#include <asm/hardwall.h>
88#endif
86#include "internal.h" 89#include "internal.h"
87 90
88/* NOTE: 91/* NOTE:
@@ -191,17 +194,20 @@ static int proc_root_link(struct inode *inode, struct path *path)
191 return result; 194 return result;
192} 195}
193 196
194/* 197static struct mm_struct *__check_mem_permission(struct task_struct *task)
195 * Return zero if current may access user memory in @task, -error if not.
196 */
197static int check_mem_permission(struct task_struct *task)
198{ 198{
199 struct mm_struct *mm;
200
201 mm = get_task_mm(task);
202 if (!mm)
203 return ERR_PTR(-EINVAL);
204
199 /* 205 /*
200 * A task can always look at itself, in case it chooses 206 * A task can always look at itself, in case it chooses
201 * to use system calls instead of load instructions. 207 * to use system calls instead of load instructions.
202 */ 208 */
203 if (task == current) 209 if (task == current)
204 return 0; 210 return mm;
205 211
206 /* 212 /*
207 * If current is actively ptrace'ing, and would also be 213 * If current is actively ptrace'ing, and would also be
@@ -213,29 +219,55 @@ static int check_mem_permission(struct task_struct *task)
213 match = (tracehook_tracer_task(task) == current); 219 match = (tracehook_tracer_task(task) == current);
214 rcu_read_unlock(); 220 rcu_read_unlock();
215 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) 221 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
216 return 0; 222 return mm;
217 } 223 }
218 224
219 /* 225 /*
220 * Noone else is allowed. 226 * No one else is allowed.
221 */ 227 */
222 return -EPERM; 228 mmput(mm);
229 return ERR_PTR(-EPERM);
230}
231
232/*
233 * If current may access user memory in @task return a reference to the
234 * corresponding mm, otherwise ERR_PTR.
235 */
236static struct mm_struct *check_mem_permission(struct task_struct *task)
237{
238 struct mm_struct *mm;
239 int err;
240
241 /*
242 * Avoid racing if task exec's as we might get a new mm but validate
243 * against old credentials.
244 */
245 err = mutex_lock_killable(&task->signal->cred_guard_mutex);
246 if (err)
247 return ERR_PTR(err);
248
249 mm = __check_mem_permission(task);
250 mutex_unlock(&task->signal->cred_guard_mutex);
251
252 return mm;
223} 253}
224 254
225struct mm_struct *mm_for_maps(struct task_struct *task) 255struct mm_struct *mm_for_maps(struct task_struct *task)
226{ 256{
227 struct mm_struct *mm; 257 struct mm_struct *mm;
258 int err;
228 259
229 if (mutex_lock_killable(&task->cred_guard_mutex)) 260 err = mutex_lock_killable(&task->signal->cred_guard_mutex);
230 return NULL; 261 if (err)
262 return ERR_PTR(err);
231 263
232 mm = get_task_mm(task); 264 mm = get_task_mm(task);
233 if (mm && mm != current->mm && 265 if (mm && mm != current->mm &&
234 !ptrace_may_access(task, PTRACE_MODE_READ)) { 266 !ptrace_may_access(task, PTRACE_MODE_READ)) {
235 mmput(mm); 267 mmput(mm);
236 mm = NULL; 268 mm = ERR_PTR(-EACCES);
237 } 269 }
238 mutex_unlock(&task->cred_guard_mutex); 270 mutex_unlock(&task->signal->cred_guard_mutex);
239 271
240 return mm; 272 return mm;
241} 273}
@@ -279,9 +311,9 @@ out:
279 311
280static int proc_pid_auxv(struct task_struct *task, char *buffer) 312static int proc_pid_auxv(struct task_struct *task, char *buffer)
281{ 313{
282 int res = 0; 314 struct mm_struct *mm = mm_for_maps(task);
283 struct mm_struct *mm = get_task_mm(task); 315 int res = PTR_ERR(mm);
284 if (mm) { 316 if (mm && !IS_ERR(mm)) {
285 unsigned int nwords = 0; 317 unsigned int nwords = 0;
286 do { 318 do {
287 nwords += 2; 319 nwords += 2;
@@ -318,6 +350,23 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
318} 350}
319#endif /* CONFIG_KALLSYMS */ 351#endif /* CONFIG_KALLSYMS */
320 352
353static int lock_trace(struct task_struct *task)
354{
355 int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
356 if (err)
357 return err;
358 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
359 mutex_unlock(&task->signal->cred_guard_mutex);
360 return -EPERM;
361 }
362 return 0;
363}
364
365static void unlock_trace(struct task_struct *task)
366{
367 mutex_unlock(&task->signal->cred_guard_mutex);
368}
369
321#ifdef CONFIG_STACKTRACE 370#ifdef CONFIG_STACKTRACE
322 371
323#define MAX_STACK_TRACE_DEPTH 64 372#define MAX_STACK_TRACE_DEPTH 64
@@ -327,6 +376,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
327{ 376{
328 struct stack_trace trace; 377 struct stack_trace trace;
329 unsigned long *entries; 378 unsigned long *entries;
379 int err;
330 int i; 380 int i;
331 381
332 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); 382 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
@@ -337,15 +387,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
337 trace.max_entries = MAX_STACK_TRACE_DEPTH; 387 trace.max_entries = MAX_STACK_TRACE_DEPTH;
338 trace.entries = entries; 388 trace.entries = entries;
339 trace.skip = 0; 389 trace.skip = 0;
340 save_stack_trace_tsk(task, &trace);
341 390
342 for (i = 0; i < trace.nr_entries; i++) { 391 err = lock_trace(task);
343 seq_printf(m, "[<%p>] %pS\n", 392 if (!err) {
344 (void *)entries[i], (void *)entries[i]); 393 save_stack_trace_tsk(task, &trace);
394
395 for (i = 0; i < trace.nr_entries; i++) {
396 seq_printf(m, "[<%pK>] %pS\n",
397 (void *)entries[i], (void *)entries[i]);
398 }
399 unlock_trace(task);
345 } 400 }
346 kfree(entries); 401 kfree(entries);
347 402
348 return 0; 403 return err;
349} 404}
350#endif 405#endif
351 406
@@ -373,26 +428,20 @@ static int lstats_show_proc(struct seq_file *m, void *v)
373 return -ESRCH; 428 return -ESRCH;
374 seq_puts(m, "Latency Top version : v0.1\n"); 429 seq_puts(m, "Latency Top version : v0.1\n");
375 for (i = 0; i < 32; i++) { 430 for (i = 0; i < 32; i++) {
376 if (task->latency_record[i].backtrace[0]) { 431 struct latency_record *lr = &task->latency_record[i];
432 if (lr->backtrace[0]) {
377 int q; 433 int q;
378 seq_printf(m, "%i %li %li ", 434 seq_printf(m, "%i %li %li",
379 task->latency_record[i].count, 435 lr->count, lr->time, lr->max);
380 task->latency_record[i].time,
381 task->latency_record[i].max);
382 for (q = 0; q < LT_BACKTRACEDEPTH; q++) { 436 for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
383 char sym[KSYM_SYMBOL_LEN]; 437 unsigned long bt = lr->backtrace[q];
384 char *c; 438 if (!bt)
385 if (!task->latency_record[i].backtrace[q])
386 break; 439 break;
387 if (task->latency_record[i].backtrace[q] == ULONG_MAX) 440 if (bt == ULONG_MAX)
388 break; 441 break;
389 sprint_symbol(sym, task->latency_record[i].backtrace[q]); 442 seq_printf(m, " %ps", (void *)bt);
390 c = strchr(sym, '+');
391 if (c)
392 *c = 0;
393 seq_printf(m, "%s ", sym);
394 } 443 }
395 seq_printf(m, "\n"); 444 seq_putc(m, '\n');
396 } 445 }
397 446
398 } 447 }
@@ -514,18 +563,22 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer)
514{ 563{
515 long nr; 564 long nr;
516 unsigned long args[6], sp, pc; 565 unsigned long args[6], sp, pc;
566 int res = lock_trace(task);
567 if (res)
568 return res;
517 569
518 if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) 570 if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
519 return sprintf(buffer, "running\n"); 571 res = sprintf(buffer, "running\n");
520 572 else if (nr < 0)
521 if (nr < 0) 573 res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
522 return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); 574 else
523 575 res = sprintf(buffer,
524 return sprintf(buffer,
525 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", 576 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
526 nr, 577 nr,
527 args[0], args[1], args[2], args[3], args[4], args[5], 578 args[0], args[1], args[2], args[3], args[4], args[5],
528 sp, pc); 579 sp, pc);
580 unlock_trace(task);
581 return res;
529} 582}
530#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 583#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
531 584
@@ -550,7 +603,7 @@ static int proc_fd_access_allowed(struct inode *inode)
550 return allowed; 603 return allowed;
551} 604}
552 605
553static int proc_setattr(struct dentry *dentry, struct iattr *attr) 606int proc_setattr(struct dentry *dentry, struct iattr *attr)
554{ 607{
555 int error; 608 int error;
556 struct inode *inode = dentry->d_inode; 609 struct inode *inode = dentry->d_inode;
@@ -751,14 +804,7 @@ static int proc_single_show(struct seq_file *m, void *v)
751 804
752static int proc_single_open(struct inode *inode, struct file *filp) 805static int proc_single_open(struct inode *inode, struct file *filp)
753{ 806{
754 int ret; 807 return single_open(filp, proc_single_show, inode);
755 ret = single_open(filp, proc_single_show, NULL);
756 if (!ret) {
757 struct seq_file *m = filp->private_data;
758
759 m->private = inode;
760 }
761 return ret;
762} 808}
763 809
764static const struct file_operations proc_single_file_operations = { 810static const struct file_operations proc_single_file_operations = {
@@ -771,6 +817,8 @@ static const struct file_operations proc_single_file_operations = {
771static int mem_open(struct inode* inode, struct file* file) 817static int mem_open(struct inode* inode, struct file* file)
772{ 818{
773 file->private_data = (void*)((long)current->self_exec_id); 819 file->private_data = (void*)((long)current->self_exec_id);
820 /* OK to pass negative loff_t, we can catch out-of-range */
821 file->f_mode |= FMODE_UNSIGNED_OFFSET;
774 return 0; 822 return 0;
775} 823}
776 824
@@ -786,18 +834,14 @@ static ssize_t mem_read(struct file * file, char __user * buf,
786 if (!task) 834 if (!task)
787 goto out_no_task; 835 goto out_no_task;
788 836
789 if (check_mem_permission(task))
790 goto out;
791
792 ret = -ENOMEM; 837 ret = -ENOMEM;
793 page = (char *)__get_free_page(GFP_TEMPORARY); 838 page = (char *)__get_free_page(GFP_TEMPORARY);
794 if (!page) 839 if (!page)
795 goto out; 840 goto out;
796 841
797 ret = 0; 842 mm = check_mem_permission(task);
798 843 ret = PTR_ERR(mm);
799 mm = get_task_mm(task); 844 if (IS_ERR(mm))
800 if (!mm)
801 goto out_free; 845 goto out_free;
802 846
803 ret = -EIO; 847 ret = -EIO;
@@ -811,8 +855,8 @@ static ssize_t mem_read(struct file * file, char __user * buf,
811 int this_len, retval; 855 int this_len, retval;
812 856
813 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 857 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
814 retval = access_process_vm(task, src, page, this_len, 0); 858 retval = access_remote_vm(mm, src, page, this_len, 0);
815 if (!retval || check_mem_permission(task)) { 859 if (!retval) {
816 if (!ret) 860 if (!ret)
817 ret = -EIO; 861 ret = -EIO;
818 break; 862 break;
@@ -840,10 +884,6 @@ out_no_task:
840 return ret; 884 return ret;
841} 885}
842 886
843#define mem_write NULL
844
845#ifndef mem_write
846/* This is a security hazard */
847static ssize_t mem_write(struct file * file, const char __user *buf, 887static ssize_t mem_write(struct file * file, const char __user *buf,
848 size_t count, loff_t *ppos) 888 size_t count, loff_t *ppos)
849{ 889{
@@ -851,18 +891,25 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
851 char *page; 891 char *page;
852 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 892 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
853 unsigned long dst = *ppos; 893 unsigned long dst = *ppos;
894 struct mm_struct *mm;
854 895
855 copied = -ESRCH; 896 copied = -ESRCH;
856 if (!task) 897 if (!task)
857 goto out_no_task; 898 goto out_no_task;
858 899
859 if (check_mem_permission(task))
860 goto out;
861
862 copied = -ENOMEM; 900 copied = -ENOMEM;
863 page = (char *)__get_free_page(GFP_TEMPORARY); 901 page = (char *)__get_free_page(GFP_TEMPORARY);
864 if (!page) 902 if (!page)
865 goto out; 903 goto out_task;
904
905 mm = check_mem_permission(task);
906 copied = PTR_ERR(mm);
907 if (IS_ERR(mm))
908 goto out_free;
909
910 copied = -EIO;
911 if (file->private_data != (void *)((long)current->self_exec_id))
912 goto out_mm;
866 913
867 copied = 0; 914 copied = 0;
868 while (count > 0) { 915 while (count > 0) {
@@ -873,7 +920,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
873 copied = -EFAULT; 920 copied = -EFAULT;
874 break; 921 break;
875 } 922 }
876 retval = access_process_vm(task, dst, page, this_len, 1); 923 retval = access_remote_vm(mm, dst, page, this_len, 1);
877 if (!retval) { 924 if (!retval) {
878 if (!copied) 925 if (!copied)
879 copied = -EIO; 926 copied = -EIO;
@@ -885,13 +932,16 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
885 count -= retval; 932 count -= retval;
886 } 933 }
887 *ppos = dst; 934 *ppos = dst;
935
936out_mm:
937 mmput(mm);
938out_free:
888 free_page((unsigned long) page); 939 free_page((unsigned long) page);
889out: 940out_task:
890 put_task_struct(task); 941 put_task_struct(task);
891out_no_task: 942out_no_task:
892 return copied; 943 return copied;
893} 944}
894#endif
895 945
896loff_t mem_lseek(struct file *file, loff_t offset, int orig) 946loff_t mem_lseek(struct file *file, loff_t offset, int orig)
897{ 947{
@@ -928,20 +978,18 @@ static ssize_t environ_read(struct file *file, char __user *buf,
928 if (!task) 978 if (!task)
929 goto out_no_task; 979 goto out_no_task;
930 980
931 if (!ptrace_may_access(task, PTRACE_MODE_READ))
932 goto out;
933
934 ret = -ENOMEM; 981 ret = -ENOMEM;
935 page = (char *)__get_free_page(GFP_TEMPORARY); 982 page = (char *)__get_free_page(GFP_TEMPORARY);
936 if (!page) 983 if (!page)
937 goto out; 984 goto out;
938 985
939 ret = 0;
940 986
941 mm = get_task_mm(task); 987 mm = mm_for_maps(task);
942 if (!mm) 988 ret = PTR_ERR(mm);
989 if (!mm || IS_ERR(mm))
943 goto out_free; 990 goto out_free;
944 991
992 ret = 0;
945 while (count > 0) { 993 while (count > 0) {
946 int this_len, retval, max_len; 994 int this_len, retval, max_len;
947 995
@@ -1016,35 +1064,54 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1016{ 1064{
1017 struct task_struct *task; 1065 struct task_struct *task;
1018 char buffer[PROC_NUMBUF]; 1066 char buffer[PROC_NUMBUF];
1019 long oom_adjust; 1067 int oom_adjust;
1020 unsigned long flags; 1068 unsigned long flags;
1021 int err; 1069 int err;
1022 1070
1023 memset(buffer, 0, sizeof(buffer)); 1071 memset(buffer, 0, sizeof(buffer));
1024 if (count > sizeof(buffer) - 1) 1072 if (count > sizeof(buffer) - 1)
1025 count = sizeof(buffer) - 1; 1073 count = sizeof(buffer) - 1;
1026 if (copy_from_user(buffer, buf, count)) 1074 if (copy_from_user(buffer, buf, count)) {
1027 return -EFAULT; 1075 err = -EFAULT;
1076 goto out;
1077 }
1028 1078
1029 err = strict_strtol(strstrip(buffer), 0, &oom_adjust); 1079 err = kstrtoint(strstrip(buffer), 0, &oom_adjust);
1030 if (err) 1080 if (err)
1031 return -EINVAL; 1081 goto out;
1032 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && 1082 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
1033 oom_adjust != OOM_DISABLE) 1083 oom_adjust != OOM_DISABLE) {
1034 return -EINVAL; 1084 err = -EINVAL;
1085 goto out;
1086 }
1035 1087
1036 task = get_proc_task(file->f_path.dentry->d_inode); 1088 task = get_proc_task(file->f_path.dentry->d_inode);
1037 if (!task) 1089 if (!task) {
1038 return -ESRCH; 1090 err = -ESRCH;
1091 goto out;
1092 }
1093
1094 task_lock(task);
1095 if (!task->mm) {
1096 err = -EINVAL;
1097 goto err_task_lock;
1098 }
1099
1039 if (!lock_task_sighand(task, &flags)) { 1100 if (!lock_task_sighand(task, &flags)) {
1040 put_task_struct(task); 1101 err = -ESRCH;
1041 return -ESRCH; 1102 goto err_task_lock;
1042 } 1103 }
1043 1104
1044 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { 1105 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1045 unlock_task_sighand(task, &flags); 1106 err = -EACCES;
1046 put_task_struct(task); 1107 goto err_sighand;
1047 return -EACCES; 1108 }
1109
1110 if (oom_adjust != task->signal->oom_adj) {
1111 if (oom_adjust == OOM_DISABLE)
1112 atomic_inc(&task->mm->oom_disable_count);
1113 if (task->signal->oom_adj == OOM_DISABLE)
1114 atomic_dec(&task->mm->oom_disable_count);
1048 } 1115 }
1049 1116
1050 /* 1117 /*
@@ -1065,10 +1132,13 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1065 else 1132 else
1066 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / 1133 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
1067 -OOM_DISABLE; 1134 -OOM_DISABLE;
1135err_sighand:
1068 unlock_task_sighand(task, &flags); 1136 unlock_task_sighand(task, &flags);
1137err_task_lock:
1138 task_unlock(task);
1069 put_task_struct(task); 1139 put_task_struct(task);
1070 1140out:
1071 return count; 1141 return err < 0 ? err : count;
1072} 1142}
1073 1143
1074static const struct file_operations proc_oom_adjust_operations = { 1144static const struct file_operations proc_oom_adjust_operations = {
@@ -1103,37 +1173,58 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1103 struct task_struct *task; 1173 struct task_struct *task;
1104 char buffer[PROC_NUMBUF]; 1174 char buffer[PROC_NUMBUF];
1105 unsigned long flags; 1175 unsigned long flags;
1106 long oom_score_adj; 1176 int oom_score_adj;
1107 int err; 1177 int err;
1108 1178
1109 memset(buffer, 0, sizeof(buffer)); 1179 memset(buffer, 0, sizeof(buffer));
1110 if (count > sizeof(buffer) - 1) 1180 if (count > sizeof(buffer) - 1)
1111 count = sizeof(buffer) - 1; 1181 count = sizeof(buffer) - 1;
1112 if (copy_from_user(buffer, buf, count)) 1182 if (copy_from_user(buffer, buf, count)) {
1113 return -EFAULT; 1183 err = -EFAULT;
1184 goto out;
1185 }
1114 1186
1115 err = strict_strtol(strstrip(buffer), 0, &oom_score_adj); 1187 err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
1116 if (err) 1188 if (err)
1117 return -EINVAL; 1189 goto out;
1118 if (oom_score_adj < OOM_SCORE_ADJ_MIN || 1190 if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
1119 oom_score_adj > OOM_SCORE_ADJ_MAX) 1191 oom_score_adj > OOM_SCORE_ADJ_MAX) {
1120 return -EINVAL; 1192 err = -EINVAL;
1193 goto out;
1194 }
1121 1195
1122 task = get_proc_task(file->f_path.dentry->d_inode); 1196 task = get_proc_task(file->f_path.dentry->d_inode);
1123 if (!task) 1197 if (!task) {
1124 return -ESRCH; 1198 err = -ESRCH;
1199 goto out;
1200 }
1201
1202 task_lock(task);
1203 if (!task->mm) {
1204 err = -EINVAL;
1205 goto err_task_lock;
1206 }
1207
1125 if (!lock_task_sighand(task, &flags)) { 1208 if (!lock_task_sighand(task, &flags)) {
1126 put_task_struct(task); 1209 err = -ESRCH;
1127 return -ESRCH; 1210 goto err_task_lock;
1128 } 1211 }
1129 if (oom_score_adj < task->signal->oom_score_adj && 1212
1213 if (oom_score_adj < task->signal->oom_score_adj_min &&
1130 !capable(CAP_SYS_RESOURCE)) { 1214 !capable(CAP_SYS_RESOURCE)) {
1131 unlock_task_sighand(task, &flags); 1215 err = -EACCES;
1132 put_task_struct(task); 1216 goto err_sighand;
1133 return -EACCES;
1134 } 1217 }
1135 1218
1219 if (oom_score_adj != task->signal->oom_score_adj) {
1220 if (oom_score_adj == OOM_SCORE_ADJ_MIN)
1221 atomic_inc(&task->mm->oom_disable_count);
1222 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1223 atomic_dec(&task->mm->oom_disable_count);
1224 }
1136 task->signal->oom_score_adj = oom_score_adj; 1225 task->signal->oom_score_adj = oom_score_adj;
1226 if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
1227 task->signal->oom_score_adj_min = oom_score_adj;
1137 /* 1228 /*
1138 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is 1229 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
1139 * always attainable. 1230 * always attainable.
@@ -1143,14 +1234,19 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1143 else 1234 else
1144 task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / 1235 task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) /
1145 OOM_SCORE_ADJ_MAX; 1236 OOM_SCORE_ADJ_MAX;
1237err_sighand:
1146 unlock_task_sighand(task, &flags); 1238 unlock_task_sighand(task, &flags);
1239err_task_lock:
1240 task_unlock(task);
1147 put_task_struct(task); 1241 put_task_struct(task);
1148 return count; 1242out:
1243 return err < 0 ? err : count;
1149} 1244}
1150 1245
1151static const struct file_operations proc_oom_score_adj_operations = { 1246static const struct file_operations proc_oom_score_adj_operations = {
1152 .read = oom_score_adj_read, 1247 .read = oom_score_adj_read,
1153 .write = oom_score_adj_write, 1248 .write = oom_score_adj_write,
1249 .llseek = default_llseek,
1154}; 1250};
1155 1251
1156#ifdef CONFIG_AUDITSYSCALL 1252#ifdef CONFIG_AUDITSYSCALL
@@ -1338,9 +1434,77 @@ sched_write(struct file *file, const char __user *buf,
1338 1434
1339static int sched_open(struct inode *inode, struct file *filp) 1435static int sched_open(struct inode *inode, struct file *filp)
1340{ 1436{
1437 return single_open(filp, sched_show, inode);
1438}
1439
1440static const struct file_operations proc_pid_sched_operations = {
1441 .open = sched_open,
1442 .read = seq_read,
1443 .write = sched_write,
1444 .llseek = seq_lseek,
1445 .release = single_release,
1446};
1447
1448#endif
1449
1450#ifdef CONFIG_SCHED_AUTOGROUP
1451/*
1452 * Print out autogroup related information:
1453 */
1454static int sched_autogroup_show(struct seq_file *m, void *v)
1455{
1456 struct inode *inode = m->private;
1457 struct task_struct *p;
1458
1459 p = get_proc_task(inode);
1460 if (!p)
1461 return -ESRCH;
1462 proc_sched_autogroup_show_task(p, m);
1463
1464 put_task_struct(p);
1465
1466 return 0;
1467}
1468
1469static ssize_t
1470sched_autogroup_write(struct file *file, const char __user *buf,
1471 size_t count, loff_t *offset)
1472{
1473 struct inode *inode = file->f_path.dentry->d_inode;
1474 struct task_struct *p;
1475 char buffer[PROC_NUMBUF];
1476 int nice;
1477 int err;
1478
1479 memset(buffer, 0, sizeof(buffer));
1480 if (count > sizeof(buffer) - 1)
1481 count = sizeof(buffer) - 1;
1482 if (copy_from_user(buffer, buf, count))
1483 return -EFAULT;
1484
1485 err = kstrtoint(strstrip(buffer), 0, &nice);
1486 if (err < 0)
1487 return err;
1488
1489 p = get_proc_task(inode);
1490 if (!p)
1491 return -ESRCH;
1492
1493 err = nice;
1494 err = proc_sched_autogroup_set_nice(p, &err);
1495 if (err)
1496 count = err;
1497
1498 put_task_struct(p);
1499
1500 return count;
1501}
1502
1503static int sched_autogroup_open(struct inode *inode, struct file *filp)
1504{
1341 int ret; 1505 int ret;
1342 1506
1343 ret = single_open(filp, sched_show, NULL); 1507 ret = single_open(filp, sched_autogroup_show, NULL);
1344 if (!ret) { 1508 if (!ret) {
1345 struct seq_file *m = filp->private_data; 1509 struct seq_file *m = filp->private_data;
1346 1510
@@ -1349,15 +1513,15 @@ static int sched_open(struct inode *inode, struct file *filp)
1349 return ret; 1513 return ret;
1350} 1514}
1351 1515
1352static const struct file_operations proc_pid_sched_operations = { 1516static const struct file_operations proc_pid_sched_autogroup_operations = {
1353 .open = sched_open, 1517 .open = sched_autogroup_open,
1354 .read = seq_read, 1518 .read = seq_read,
1355 .write = sched_write, 1519 .write = sched_autogroup_write,
1356 .llseek = seq_lseek, 1520 .llseek = seq_lseek,
1357 .release = single_release, 1521 .release = single_release,
1358}; 1522};
1359 1523
1360#endif 1524#endif /* CONFIG_SCHED_AUTOGROUP */
1361 1525
1362static ssize_t comm_write(struct file *file, const char __user *buf, 1526static ssize_t comm_write(struct file *file, const char __user *buf,
1363 size_t count, loff_t *offset) 1527 size_t count, loff_t *offset)
@@ -1406,15 +1570,7 @@ static int comm_show(struct seq_file *m, void *v)
1406 1570
1407static int comm_open(struct inode *inode, struct file *filp) 1571static int comm_open(struct inode *inode, struct file *filp)
1408{ 1572{
1409 int ret; 1573 return single_open(filp, comm_show, inode);
1410
1411 ret = single_open(filp, comm_show, NULL);
1412 if (!ret) {
1413 struct seq_file *m = filp->private_data;
1414
1415 m->private = inode;
1416 }
1417 return ret;
1418} 1574}
1419 1575
1420static const struct file_operations proc_pid_set_comm_operations = { 1576static const struct file_operations proc_pid_set_comm_operations = {
@@ -1425,57 +1581,6 @@ static const struct file_operations proc_pid_set_comm_operations = {
1425 .release = single_release, 1581 .release = single_release,
1426}; 1582};
1427 1583
1428/*
1429 * We added or removed a vma mapping the executable. The vmas are only mapped
1430 * during exec and are not mapped with the mmap system call.
1431 * Callers must hold down_write() on the mm's mmap_sem for these
1432 */
1433void added_exe_file_vma(struct mm_struct *mm)
1434{
1435 mm->num_exe_file_vmas++;
1436}
1437
1438void removed_exe_file_vma(struct mm_struct *mm)
1439{
1440 mm->num_exe_file_vmas--;
1441 if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
1442 fput(mm->exe_file);
1443 mm->exe_file = NULL;
1444 }
1445
1446}
1447
1448void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
1449{
1450 if (new_exe_file)
1451 get_file(new_exe_file);
1452 if (mm->exe_file)
1453 fput(mm->exe_file);
1454 mm->exe_file = new_exe_file;
1455 mm->num_exe_file_vmas = 0;
1456}
1457
1458struct file *get_mm_exe_file(struct mm_struct *mm)
1459{
1460 struct file *exe_file;
1461
1462 /* We need mmap_sem to protect against races with removal of
1463 * VM_EXECUTABLE vmas */
1464 down_read(&mm->mmap_sem);
1465 exe_file = mm->exe_file;
1466 if (exe_file)
1467 get_file(exe_file);
1468 up_read(&mm->mmap_sem);
1469 return exe_file;
1470}
1471
1472void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
1473{
1474 /* It's safe to write the exe_file pointer without exe_file_lock because
1475 * this is called during fork when the task is not yet in /proc */
1476 newmm->exe_file = get_mm_exe_file(oldmm);
1477}
1478
1479static int proc_exe_link(struct inode *inode, struct path *exe_path) 1584static int proc_exe_link(struct inode *inode, struct path *exe_path)
1480{ 1585{
1481 struct task_struct *task; 1586 struct task_struct *task;
@@ -1526,7 +1631,7 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
1526 if (!tmp) 1631 if (!tmp)
1527 return -ENOMEM; 1632 return -ENOMEM;
1528 1633
1529 pathname = d_path_with_unreachable(path, tmp, PAGE_SIZE); 1634 pathname = d_path(path, tmp, PAGE_SIZE);
1530 len = PTR_ERR(pathname); 1635 len = PTR_ERR(pathname);
1531 if (IS_ERR(pathname)) 1636 if (IS_ERR(pathname))
1532 goto out; 1637 goto out;
@@ -1585,8 +1690,7 @@ static int task_dumpable(struct task_struct *task)
1585 return 0; 1690 return 0;
1586} 1691}
1587 1692
1588 1693struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1589static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1590{ 1694{
1591 struct inode * inode; 1695 struct inode * inode;
1592 struct proc_inode *ei; 1696 struct proc_inode *ei;
@@ -1600,6 +1704,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
1600 1704
1601 /* Common stuff */ 1705 /* Common stuff */
1602 ei = PROC_I(inode); 1706 ei = PROC_I(inode);
1707 inode->i_ino = get_next_ino();
1603 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1708 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1604 inode->i_op = &proc_def_inode_operations; 1709 inode->i_op = &proc_def_inode_operations;
1605 1710
@@ -1627,7 +1732,7 @@ out_unlock:
1627 return NULL; 1732 return NULL;
1628} 1733}
1629 1734
1630static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 1735int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1631{ 1736{
1632 struct inode *inode = dentry->d_inode; 1737 struct inode *inode = dentry->d_inode;
1633 struct task_struct *task; 1738 struct task_struct *task;
@@ -1668,12 +1773,18 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
1668 * made this apply to all per process world readable and executable 1773 * made this apply to all per process world readable and executable
1669 * directories. 1774 * directories.
1670 */ 1775 */
1671static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1776int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1672{ 1777{
1673 struct inode *inode = dentry->d_inode; 1778 struct inode *inode;
1674 struct task_struct *task = get_proc_task(inode); 1779 struct task_struct *task;
1675 const struct cred *cred; 1780 const struct cred *cred;
1676 1781
1782 if (nd && nd->flags & LOOKUP_RCU)
1783 return -ECHILD;
1784
1785 inode = dentry->d_inode;
1786 task = get_proc_task(inode);
1787
1677 if (task) { 1788 if (task) {
1678 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1789 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1679 task_dumpable(task)) { 1790 task_dumpable(task)) {
@@ -1695,7 +1806,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1695 return 0; 1806 return 0;
1696} 1807}
1697 1808
1698static int pid_delete_dentry(struct dentry * dentry) 1809static int pid_delete_dentry(const struct dentry * dentry)
1699{ 1810{
1700 /* Is the task we represent dead? 1811 /* Is the task we represent dead?
1701 * If so, then don't put the dentry on the lru list, 1812 * If so, then don't put the dentry on the lru list,
@@ -1704,7 +1815,7 @@ static int pid_delete_dentry(struct dentry * dentry)
1704 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; 1815 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1705} 1816}
1706 1817
1707static const struct dentry_operations pid_dentry_operations = 1818const struct dentry_operations pid_dentry_operations =
1708{ 1819{
1709 .d_revalidate = pid_revalidate, 1820 .d_revalidate = pid_revalidate,
1710 .d_delete = pid_delete_dentry, 1821 .d_delete = pid_delete_dentry,
@@ -1712,9 +1823,6 @@ static const struct dentry_operations pid_dentry_operations =
1712 1823
1713/* Lookups */ 1824/* Lookups */
1714 1825
1715typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
1716 struct task_struct *, const void *);
1717
1718/* 1826/*
1719 * Fill a directory entry. 1827 * Fill a directory entry.
1720 * 1828 *
@@ -1727,8 +1835,8 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
1727 * reported by readdir in sync with the inode numbers reported 1835 * reported by readdir in sync with the inode numbers reported
1728 * by stat. 1836 * by stat.
1729 */ 1837 */
1730static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 1838int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
1731 char *name, int len, 1839 const char *name, int len,
1732 instantiate_t instantiate, struct task_struct *task, const void *ptr) 1840 instantiate_t instantiate, struct task_struct *task, const void *ptr)
1733{ 1841{
1734 struct dentry *child, *dir = filp->f_path.dentry; 1842 struct dentry *child, *dir = filp->f_path.dentry;
@@ -1839,12 +1947,19 @@ static int proc_fd_link(struct inode *inode, struct path *path)
1839 1947
1840static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1948static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1841{ 1949{
1842 struct inode *inode = dentry->d_inode; 1950 struct inode *inode;
1843 struct task_struct *task = get_proc_task(inode); 1951 struct task_struct *task;
1844 int fd = proc_fd(inode); 1952 int fd;
1845 struct files_struct *files; 1953 struct files_struct *files;
1846 const struct cred *cred; 1954 const struct cred *cred;
1847 1955
1956 if (nd && nd->flags & LOOKUP_RCU)
1957 return -ECHILD;
1958
1959 inode = dentry->d_inode;
1960 task = get_proc_task(inode);
1961 fd = proc_fd(inode);
1962
1848 if (task) { 1963 if (task) {
1849 files = get_files_struct(task); 1964 files = get_files_struct(task);
1850 if (files) { 1965 if (files) {
@@ -1920,7 +2035,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
1920 inode->i_op = &proc_pid_link_inode_operations; 2035 inode->i_op = &proc_pid_link_inode_operations;
1921 inode->i_size = 64; 2036 inode->i_size = 64;
1922 ei->op.proc_get_link = proc_fd_link; 2037 ei->op.proc_get_link = proc_fd_link;
1923 dentry->d_op = &tid_fd_dentry_operations; 2038 d_set_d_op(dentry, &tid_fd_dentry_operations);
1924 d_add(dentry, inode); 2039 d_add(dentry, inode);
1925 /* Close the race of the process dying before we return the dentry */ 2040 /* Close the race of the process dying before we return the dentry */
1926 if (tid_fd_revalidate(dentry, NULL)) 2041 if (tid_fd_revalidate(dentry, NULL))
@@ -2039,22 +2154,22 @@ static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
2039static const struct file_operations proc_fdinfo_file_operations = { 2154static const struct file_operations proc_fdinfo_file_operations = {
2040 .open = nonseekable_open, 2155 .open = nonseekable_open,
2041 .read = proc_fdinfo_read, 2156 .read = proc_fdinfo_read,
2157 .llseek = no_llseek,
2042}; 2158};
2043 2159
2044static const struct file_operations proc_fd_operations = { 2160static const struct file_operations proc_fd_operations = {
2045 .read = generic_read_dir, 2161 .read = generic_read_dir,
2046 .readdir = proc_readfd, 2162 .readdir = proc_readfd,
2163 .llseek = default_llseek,
2047}; 2164};
2048 2165
2049/* 2166/*
2050 * /proc/pid/fd needs a special permission handler so that a process can still 2167 * /proc/pid/fd needs a special permission handler so that a process can still
2051 * access /proc/self/fd after it has executed a setuid(). 2168 * access /proc/self/fd after it has executed a setuid().
2052 */ 2169 */
2053static int proc_fd_permission(struct inode *inode, int mask) 2170static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags)
2054{ 2171{
2055 int rv; 2172 int rv = generic_permission(inode, mask, flags, NULL);
2056
2057 rv = generic_permission(inode, mask, NULL);
2058 if (rv == 0) 2173 if (rv == 0)
2059 return 0; 2174 return 0;
2060 if (task_pid(current) == proc_pid(inode)) 2175 if (task_pid(current) == proc_pid(inode))
@@ -2086,7 +2201,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2086 ei->fd = fd; 2201 ei->fd = fd;
2087 inode->i_mode = S_IFREG | S_IRUSR; 2202 inode->i_mode = S_IFREG | S_IRUSR;
2088 inode->i_fop = &proc_fdinfo_file_operations; 2203 inode->i_fop = &proc_fdinfo_file_operations;
2089 dentry->d_op = &tid_fd_dentry_operations; 2204 d_set_d_op(dentry, &tid_fd_dentry_operations);
2090 d_add(dentry, inode); 2205 d_add(dentry, inode);
2091 /* Close the race of the process dying before we return the dentry */ 2206 /* Close the race of the process dying before we return the dentry */
2092 if (tid_fd_revalidate(dentry, NULL)) 2207 if (tid_fd_revalidate(dentry, NULL))
@@ -2112,6 +2227,7 @@ static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
2112static const struct file_operations proc_fdinfo_operations = { 2227static const struct file_operations proc_fdinfo_operations = {
2113 .read = generic_read_dir, 2228 .read = generic_read_dir,
2114 .readdir = proc_readfdinfo, 2229 .readdir = proc_readfdinfo,
2230 .llseek = default_llseek,
2115}; 2231};
2116 2232
2117/* 2233/*
@@ -2144,7 +2260,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
2144 if (p->fop) 2260 if (p->fop)
2145 inode->i_fop = p->fop; 2261 inode->i_fop = p->fop;
2146 ei->op = p->op; 2262 ei->op = p->op;
2147 dentry->d_op = &pid_dentry_operations; 2263 d_set_d_op(dentry, &pid_dentry_operations);
2148 d_add(dentry, inode); 2264 d_add(dentry, inode);
2149 /* Close the race of the process dying before we return the dentry */ 2265 /* Close the race of the process dying before we return the dentry */
2150 if (pid_revalidate(dentry, NULL)) 2266 if (pid_revalidate(dentry, NULL))
@@ -2302,14 +2418,14 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
2302 goto out_free; 2418 goto out_free;
2303 2419
2304 /* Guard against adverse ptrace interaction */ 2420 /* Guard against adverse ptrace interaction */
2305 length = mutex_lock_interruptible(&task->cred_guard_mutex); 2421 length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
2306 if (length < 0) 2422 if (length < 0)
2307 goto out_free; 2423 goto out_free;
2308 2424
2309 length = security_setprocattr(task, 2425 length = security_setprocattr(task,
2310 (char*)file->f_path.dentry->d_name.name, 2426 (char*)file->f_path.dentry->d_name.name,
2311 (void*)page, count); 2427 (void*)page, count);
2312 mutex_unlock(&task->cred_guard_mutex); 2428 mutex_unlock(&task->signal->cred_guard_mutex);
2313out_free: 2429out_free:
2314 free_page((unsigned long) page); 2430 free_page((unsigned long) page);
2315out: 2431out:
@@ -2343,6 +2459,7 @@ static int proc_attr_dir_readdir(struct file * filp,
2343static const struct file_operations proc_attr_dir_operations = { 2459static const struct file_operations proc_attr_dir_operations = {
2344 .read = generic_read_dir, 2460 .read = generic_read_dir,
2345 .readdir = proc_attr_dir_readdir, 2461 .readdir = proc_attr_dir_readdir,
2462 .llseek = default_llseek,
2346}; 2463};
2347 2464
2348static struct dentry *proc_attr_dir_lookup(struct inode *dir, 2465static struct dentry *proc_attr_dir_lookup(struct inode *dir,
@@ -2503,29 +2620,6 @@ static const struct pid_entry proc_base_stuff[] = {
2503 &proc_self_inode_operations, NULL, {}), 2620 &proc_self_inode_operations, NULL, {}),
2504}; 2621};
2505 2622
2506/*
2507 * Exceptional case: normally we are not allowed to unhash a busy
2508 * directory. In this case, however, we can do it - no aliasing problems
2509 * due to the way we treat inodes.
2510 */
2511static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
2512{
2513 struct inode *inode = dentry->d_inode;
2514 struct task_struct *task = get_proc_task(inode);
2515 if (task) {
2516 put_task_struct(task);
2517 return 1;
2518 }
2519 d_drop(dentry);
2520 return 0;
2521}
2522
2523static const struct dentry_operations proc_base_dentry_operations =
2524{
2525 .d_revalidate = proc_base_revalidate,
2526 .d_delete = pid_delete_dentry,
2527};
2528
2529static struct dentry *proc_base_instantiate(struct inode *dir, 2623static struct dentry *proc_base_instantiate(struct inode *dir,
2530 struct dentry *dentry, struct task_struct *task, const void *ptr) 2624 struct dentry *dentry, struct task_struct *task, const void *ptr)
2531{ 2625{
@@ -2542,6 +2636,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2542 2636
2543 /* Initialize the inode */ 2637 /* Initialize the inode */
2544 ei = PROC_I(inode); 2638 ei = PROC_I(inode);
2639 inode->i_ino = get_next_ino();
2545 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 2640 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2546 2641
2547 /* 2642 /*
@@ -2561,7 +2656,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2561 if (p->fop) 2656 if (p->fop)
2562 inode->i_fop = p->fop; 2657 inode->i_fop = p->fop;
2563 ei->op = p->op; 2658 ei->op = p->op;
2564 dentry->d_op = &proc_base_dentry_operations;
2565 d_add(dentry, inode); 2659 d_add(dentry, inode);
2566 error = NULL; 2660 error = NULL;
2567out: 2661out:
@@ -2614,6 +2708,9 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2614 struct task_io_accounting acct = task->ioac; 2708 struct task_io_accounting acct = task->ioac;
2615 unsigned long flags; 2709 unsigned long flags;
2616 2710
2711 if (!ptrace_may_access(task, PTRACE_MODE_READ))
2712 return -EACCES;
2713
2617 if (whole && lock_task_sighand(task, &flags)) { 2714 if (whole && lock_task_sighand(task, &flags)) {
2618 struct task_struct *t = task; 2715 struct task_struct *t = task;
2619 2716
@@ -2654,8 +2751,12 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2654static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2751static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
2655 struct pid *pid, struct task_struct *task) 2752 struct pid *pid, struct task_struct *task)
2656{ 2753{
2657 seq_printf(m, "%08x\n", task->personality); 2754 int err = lock_trace(task);
2658 return 0; 2755 if (!err) {
2756 seq_printf(m, "%08x\n", task->personality);
2757 unlock_trace(task);
2758 }
2759 return err;
2659} 2760}
2660 2761
2661/* 2762/*
@@ -2668,20 +2769,24 @@ static const struct pid_entry tgid_base_stuff[] = {
2668 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 2769 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
2669 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2770 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2670 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 2771 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2772 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2671#ifdef CONFIG_NET 2773#ifdef CONFIG_NET
2672 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 2774 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
2673#endif 2775#endif
2674 REG("environ", S_IRUSR, proc_environ_operations), 2776 REG("environ", S_IRUSR, proc_environ_operations),
2675 INF("auxv", S_IRUSR, proc_pid_auxv), 2777 INF("auxv", S_IRUSR, proc_pid_auxv),
2676 ONE("status", S_IRUGO, proc_pid_status), 2778 ONE("status", S_IRUGO, proc_pid_status),
2677 ONE("personality", S_IRUSR, proc_pid_personality), 2779 ONE("personality", S_IRUGO, proc_pid_personality),
2678 INF("limits", S_IRUGO, proc_pid_limits), 2780 INF("limits", S_IRUGO, proc_pid_limits),
2679#ifdef CONFIG_SCHED_DEBUG 2781#ifdef CONFIG_SCHED_DEBUG
2680 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2782 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2681#endif 2783#endif
2784#ifdef CONFIG_SCHED_AUTOGROUP
2785 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
2786#endif
2682 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2787 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2683#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2788#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2684 INF("syscall", S_IRUSR, proc_pid_syscall), 2789 INF("syscall", S_IRUGO, proc_pid_syscall),
2685#endif 2790#endif
2686 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2791 INF("cmdline", S_IRUGO, proc_pid_cmdline),
2687 ONE("stat", S_IRUGO, proc_tgid_stat), 2792 ONE("stat", S_IRUGO, proc_tgid_stat),
@@ -2700,7 +2805,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2700#ifdef CONFIG_PROC_PAGE_MONITOR 2805#ifdef CONFIG_PROC_PAGE_MONITOR
2701 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2806 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
2702 REG("smaps", S_IRUGO, proc_smaps_operations), 2807 REG("smaps", S_IRUGO, proc_smaps_operations),
2703 REG("pagemap", S_IRUSR, proc_pagemap_operations), 2808 REG("pagemap", S_IRUGO, proc_pagemap_operations),
2704#endif 2809#endif
2705#ifdef CONFIG_SECURITY 2810#ifdef CONFIG_SECURITY
2706 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2811 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -2709,7 +2814,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2709 INF("wchan", S_IRUGO, proc_pid_wchan), 2814 INF("wchan", S_IRUGO, proc_pid_wchan),
2710#endif 2815#endif
2711#ifdef CONFIG_STACKTRACE 2816#ifdef CONFIG_STACKTRACE
2712 ONE("stack", S_IRUSR, proc_pid_stack), 2817 ONE("stack", S_IRUGO, proc_pid_stack),
2713#endif 2818#endif
2714#ifdef CONFIG_SCHEDSTATS 2819#ifdef CONFIG_SCHEDSTATS
2715 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2820 INF("schedstat", S_IRUGO, proc_pid_schedstat),
@@ -2737,7 +2842,10 @@ static const struct pid_entry tgid_base_stuff[] = {
2737 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 2842 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
2738#endif 2843#endif
2739#ifdef CONFIG_TASK_IO_ACCOUNTING 2844#ifdef CONFIG_TASK_IO_ACCOUNTING
2740 INF("io", S_IRUGO, proc_tgid_io_accounting), 2845 INF("io", S_IRUSR, proc_tgid_io_accounting),
2846#endif
2847#ifdef CONFIG_HARDWALL
2848 INF("hardwall", S_IRUGO, proc_pid_hardwall),
2741#endif 2849#endif
2742}; 2850};
2743 2851
@@ -2751,6 +2859,7 @@ static int proc_tgid_base_readdir(struct file * filp,
2751static const struct file_operations proc_tgid_base_operations = { 2859static const struct file_operations proc_tgid_base_operations = {
2752 .read = generic_read_dir, 2860 .read = generic_read_dir,
2753 .readdir = proc_tgid_base_readdir, 2861 .readdir = proc_tgid_base_readdir,
2862 .llseek = default_llseek,
2754}; 2863};
2755 2864
2756static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 2865static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
@@ -2871,7 +2980,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
2871 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, 2980 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
2872 ARRAY_SIZE(tgid_base_stuff)); 2981 ARRAY_SIZE(tgid_base_stuff));
2873 2982
2874 dentry->d_op = &pid_dentry_operations; 2983 d_set_d_op(dentry, &pid_dentry_operations);
2875 2984
2876 d_add(dentry, inode); 2985 d_add(dentry, inode);
2877 /* Close the race of the process dying before we return the dentry */ 2986 /* Close the race of the process dying before we return the dentry */
@@ -2968,11 +3077,16 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
2968/* for the /proc/ directory itself, after non-process stuff has been done */ 3077/* for the /proc/ directory itself, after non-process stuff has been done */
2969int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 3078int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2970{ 3079{
2971 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 3080 unsigned int nr;
2972 struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); 3081 struct task_struct *reaper;
2973 struct tgid_iter iter; 3082 struct tgid_iter iter;
2974 struct pid_namespace *ns; 3083 struct pid_namespace *ns;
2975 3084
3085 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
3086 goto out_no_task;
3087 nr = filp->f_pos - FIRST_PROCESS_ENTRY;
3088
3089 reaper = get_proc_task(filp->f_path.dentry->d_inode);
2976 if (!reaper) 3090 if (!reaper)
2977 goto out_no_task; 3091 goto out_no_task;
2978 3092
@@ -3007,17 +3121,18 @@ out_no_task:
3007static const struct pid_entry tid_base_stuff[] = { 3121static const struct pid_entry tid_base_stuff[] = {
3008 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3122 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3009 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3123 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3124 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
3010 REG("environ", S_IRUSR, proc_environ_operations), 3125 REG("environ", S_IRUSR, proc_environ_operations),
3011 INF("auxv", S_IRUSR, proc_pid_auxv), 3126 INF("auxv", S_IRUSR, proc_pid_auxv),
3012 ONE("status", S_IRUGO, proc_pid_status), 3127 ONE("status", S_IRUGO, proc_pid_status),
3013 ONE("personality", S_IRUSR, proc_pid_personality), 3128 ONE("personality", S_IRUGO, proc_pid_personality),
3014 INF("limits", S_IRUGO, proc_pid_limits), 3129 INF("limits", S_IRUGO, proc_pid_limits),
3015#ifdef CONFIG_SCHED_DEBUG 3130#ifdef CONFIG_SCHED_DEBUG
3016 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3131 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
3017#endif 3132#endif
3018 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 3133 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
3019#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 3134#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3020 INF("syscall", S_IRUSR, proc_pid_syscall), 3135 INF("syscall", S_IRUGO, proc_pid_syscall),
3021#endif 3136#endif
3022 INF("cmdline", S_IRUGO, proc_pid_cmdline), 3137 INF("cmdline", S_IRUGO, proc_pid_cmdline),
3023 ONE("stat", S_IRUGO, proc_tid_stat), 3138 ONE("stat", S_IRUGO, proc_tid_stat),
@@ -3035,7 +3150,7 @@ static const struct pid_entry tid_base_stuff[] = {
3035#ifdef CONFIG_PROC_PAGE_MONITOR 3150#ifdef CONFIG_PROC_PAGE_MONITOR
3036 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3151 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3037 REG("smaps", S_IRUGO, proc_smaps_operations), 3152 REG("smaps", S_IRUGO, proc_smaps_operations),
3038 REG("pagemap", S_IRUSR, proc_pagemap_operations), 3153 REG("pagemap", S_IRUGO, proc_pagemap_operations),
3039#endif 3154#endif
3040#ifdef CONFIG_SECURITY 3155#ifdef CONFIG_SECURITY
3041 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 3156 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -3044,7 +3159,7 @@ static const struct pid_entry tid_base_stuff[] = {
3044 INF("wchan", S_IRUGO, proc_pid_wchan), 3159 INF("wchan", S_IRUGO, proc_pid_wchan),
3045#endif 3160#endif
3046#ifdef CONFIG_STACKTRACE 3161#ifdef CONFIG_STACKTRACE
3047 ONE("stack", S_IRUSR, proc_pid_stack), 3162 ONE("stack", S_IRUGO, proc_pid_stack),
3048#endif 3163#endif
3049#ifdef CONFIG_SCHEDSTATS 3164#ifdef CONFIG_SCHEDSTATS
3050 INF("schedstat", S_IRUGO, proc_pid_schedstat), 3165 INF("schedstat", S_IRUGO, proc_pid_schedstat),
@@ -3063,13 +3178,16 @@ static const struct pid_entry tid_base_stuff[] = {
3063 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3178 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3064#ifdef CONFIG_AUDITSYSCALL 3179#ifdef CONFIG_AUDITSYSCALL
3065 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3180 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
3066 REG("sessionid", S_IRUSR, proc_sessionid_operations), 3181 REG("sessionid", S_IRUGO, proc_sessionid_operations),
3067#endif 3182#endif
3068#ifdef CONFIG_FAULT_INJECTION 3183#ifdef CONFIG_FAULT_INJECTION
3069 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 3184 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
3070#endif 3185#endif
3071#ifdef CONFIG_TASK_IO_ACCOUNTING 3186#ifdef CONFIG_TASK_IO_ACCOUNTING
3072 INF("io", S_IRUGO, proc_tid_io_accounting), 3187 INF("io", S_IRUSR, proc_tid_io_accounting),
3188#endif
3189#ifdef CONFIG_HARDWALL
3190 INF("hardwall", S_IRUGO, proc_pid_hardwall),
3073#endif 3191#endif
3074}; 3192};
3075 3193
@@ -3088,6 +3206,7 @@ static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *den
3088static const struct file_operations proc_tid_base_operations = { 3206static const struct file_operations proc_tid_base_operations = {
3089 .read = generic_read_dir, 3207 .read = generic_read_dir,
3090 .readdir = proc_tid_base_readdir, 3208 .readdir = proc_tid_base_readdir,
3209 .llseek = default_llseek,
3091}; 3210};
3092 3211
3093static const struct inode_operations proc_tid_base_inode_operations = { 3212static const struct inode_operations proc_tid_base_inode_operations = {
@@ -3113,7 +3232,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
3113 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, 3232 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
3114 ARRAY_SIZE(tid_base_stuff)); 3233 ARRAY_SIZE(tid_base_stuff));
3115 3234
3116 dentry->d_op = &pid_dentry_operations; 3235 d_set_d_op(dentry, &pid_dentry_operations);
3117 3236
3118 d_add(dentry, inode); 3237 d_add(dentry, inode);
3119 /* Close the race of the process dying before we return the dentry */ 3238 /* Close the race of the process dying before we return the dentry */
@@ -3324,4 +3443,5 @@ static const struct inode_operations proc_task_inode_operations = {
3324static const struct file_operations proc_task_operations = { 3443static const struct file_operations proc_task_operations = {
3325 .read = generic_read_dir, 3444 .read = generic_read_dir,
3326 .readdir = proc_task_readdir, 3445 .readdir = proc_task_readdir,
3446 .llseek = default_llseek,
3327}; 3447};
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
new file mode 100644
index 000000000000..b701eaa482bf
--- /dev/null
+++ b/fs/proc/consoles.c
@@ -0,0 +1,114 @@
1/*
2 * Copyright (c) 2010 Werner Fink, Jiri Slaby
3 *
4 * Licensed under GPLv2
5 */
6
7#include <linux/console.h>
8#include <linux/kernel.h>
9#include <linux/proc_fs.h>
10#include <linux/seq_file.h>
11#include <linux/tty_driver.h>
12
13/*
14 * This is handler for /proc/consoles
15 */
16static int show_console_dev(struct seq_file *m, void *v)
17{
18 static const struct {
19 short flag;
20 char name;
21 } con_flags[] = {
22 { CON_ENABLED, 'E' },
23 { CON_CONSDEV, 'C' },
24 { CON_BOOT, 'B' },
25 { CON_PRINTBUFFER, 'p' },
26 { CON_BRL, 'b' },
27 { CON_ANYTIME, 'a' },
28 };
29 char flags[ARRAY_SIZE(con_flags) + 1];
30 struct console *con = v;
31 unsigned int a;
32 int len;
33 dev_t dev = 0;
34
35 if (con->device) {
36 const struct tty_driver *driver;
37 int index;
38 driver = con->device(con, &index);
39 if (driver) {
40 dev = MKDEV(driver->major, driver->minor_start);
41 dev += index;
42 }
43 }
44
45 for (a = 0; a < ARRAY_SIZE(con_flags); a++)
46 flags[a] = (con->flags & con_flags[a].flag) ?
47 con_flags[a].name : ' ';
48 flags[a] = 0;
49
50 seq_printf(m, "%s%d%n", con->name, con->index, &len);
51 len = 21 - len;
52 if (len < 1)
53 len = 1;
54 seq_printf(m, "%*c%c%c%c (%s)", len, ' ', con->read ? 'R' : '-',
55 con->write ? 'W' : '-', con->unblank ? 'U' : '-',
56 flags);
57 if (dev)
58 seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev));
59
60 seq_printf(m, "\n");
61
62 return 0;
63}
64
65static void *c_start(struct seq_file *m, loff_t *pos)
66{
67 struct console *con;
68 loff_t off = 0;
69
70 console_lock();
71 for_each_console(con)
72 if (off++ == *pos)
73 break;
74
75 return con;
76}
77
78static void *c_next(struct seq_file *m, void *v, loff_t *pos)
79{
80 struct console *con = v;
81 ++*pos;
82 return con->next;
83}
84
85static void c_stop(struct seq_file *m, void *v)
86{
87 console_unlock();
88}
89
90static const struct seq_operations consoles_op = {
91 .start = c_start,
92 .next = c_next,
93 .stop = c_stop,
94 .show = show_console_dev
95};
96
97static int consoles_open(struct inode *inode, struct file *file)
98{
99 return seq_open(file, &consoles_op);
100}
101
102static const struct file_operations proc_consoles_operations = {
103 .open = consoles_open,
104 .read = seq_read,
105 .llseek = seq_lseek,
106 .release = seq_release,
107};
108
109static int __init proc_consoles_init(void)
110{
111 proc_create("consoles", 0, NULL, &proc_consoles_operations);
112 return 0;
113}
114module_init(proc_consoles_init);
diff --git a/fs/proc/devices.c b/fs/proc/devices.c
index 59ee7da959c9..b14347167c35 100644
--- a/fs/proc/devices.c
+++ b/fs/proc/devices.c
@@ -9,14 +9,14 @@ static int devinfo_show(struct seq_file *f, void *v)
9 9
10 if (i < CHRDEV_MAJOR_HASH_SIZE) { 10 if (i < CHRDEV_MAJOR_HASH_SIZE) {
11 if (i == 0) 11 if (i == 0)
12 seq_printf(f, "Character devices:\n"); 12 seq_puts(f, "Character devices:\n");
13 chrdev_show(f, i); 13 chrdev_show(f, i);
14 } 14 }
15#ifdef CONFIG_BLOCK 15#ifdef CONFIG_BLOCK
16 else { 16 else {
17 i -= CHRDEV_MAJOR_HASH_SIZE; 17 i -= CHRDEV_MAJOR_HASH_SIZE;
18 if (i == 0) 18 if (i == 0)
19 seq_printf(f, "\nBlock devices:\n"); 19 seq_puts(f, "\nBlock devices:\n");
20 blkdev_show(f, i); 20 blkdev_show(f, i);
21 } 21 }
22#endif 22#endif
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index dd29f0337661..f1637f17c37c 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -28,7 +28,7 @@
28 28
29DEFINE_SPINLOCK(proc_subdir_lock); 29DEFINE_SPINLOCK(proc_subdir_lock);
30 30
31static int proc_match(int len, const char *name, struct proc_dir_entry *de) 31static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
32{ 32{
33 if (de->namelen != len) 33 if (de->namelen != len)
34 return 0; 34 return 0;
@@ -303,7 +303,7 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret,
303{ 303{
304 const char *cp = name, *next; 304 const char *cp = name, *next;
305 struct proc_dir_entry *de; 305 struct proc_dir_entry *de;
306 int len; 306 unsigned int len;
307 307
308 de = *ret; 308 de = *ret;
309 if (!de) 309 if (!de)
@@ -400,7 +400,7 @@ static const struct inode_operations proc_link_inode_operations = {
400 * smarter: we could keep a "volatile" flag in the 400 * smarter: we could keep a "volatile" flag in the
401 * inode to indicate which ones to keep. 401 * inode to indicate which ones to keep.
402 */ 402 */
403static int proc_delete_dentry(struct dentry * dentry) 403static int proc_delete_dentry(const struct dentry * dentry)
404{ 404{
405 return 1; 405 return 1;
406} 406}
@@ -425,13 +425,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
425 if (de->namelen != dentry->d_name.len) 425 if (de->namelen != dentry->d_name.len)
426 continue; 426 continue;
427 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 427 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
428 unsigned int ino;
429
430 ino = de->low_ino;
431 pde_get(de); 428 pde_get(de);
432 spin_unlock(&proc_subdir_lock); 429 spin_unlock(&proc_subdir_lock);
433 error = -EINVAL; 430 error = -EINVAL;
434 inode = proc_get_inode(dir->i_sb, ino, de); 431 inode = proc_get_inode(dir->i_sb, de);
435 goto out_unlock; 432 goto out_unlock;
436 } 433 }
437 } 434 }
@@ -439,7 +436,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
439out_unlock: 436out_unlock:
440 437
441 if (inode) { 438 if (inode) {
442 dentry->d_op = &proc_dentry_operations; 439 d_set_d_op(dentry, &proc_dentry_operations);
443 d_add(dentry, inode); 440 d_add(dentry, inode);
444 return NULL; 441 return NULL;
445 } 442 }
@@ -605,7 +602,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
605{ 602{
606 struct proc_dir_entry *ent = NULL; 603 struct proc_dir_entry *ent = NULL;
607 const char *fn = name; 604 const char *fn = name;
608 int len; 605 unsigned int len;
609 606
610 /* make sure name is valid */ 607 /* make sure name is valid */
611 if (!name || !strlen(name)) goto out; 608 if (!name || !strlen(name)) goto out;
@@ -677,6 +674,7 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
677 } 674 }
678 return ent; 675 return ent;
679} 676}
677EXPORT_SYMBOL(proc_mkdir_mode);
680 678
681struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, 679struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
682 struct proc_dir_entry *parent) 680 struct proc_dir_entry *parent)
@@ -768,12 +766,7 @@ EXPORT_SYMBOL(proc_create_data);
768 766
769static void free_proc_entry(struct proc_dir_entry *de) 767static void free_proc_entry(struct proc_dir_entry *de)
770{ 768{
771 unsigned int ino = de->low_ino; 769 release_inode_number(de->low_ino);
772
773 if (ino < PROC_DYNAMIC_FIRST)
774 return;
775
776 release_inode_number(ino);
777 770
778 if (S_ISLNK(de->mode)) 771 if (S_ISLNK(de->mode))
779 kfree(de->data); 772 kfree(de->data);
@@ -794,7 +787,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
794 struct proc_dir_entry **p; 787 struct proc_dir_entry **p;
795 struct proc_dir_entry *de = NULL; 788 struct proc_dir_entry *de = NULL;
796 const char *fn = name; 789 const char *fn = name;
797 int len; 790 unsigned int len;
798 791
799 spin_lock(&proc_subdir_lock); 792 spin_lock(&proc_subdir_lock);
800 if (__xlate_proc_name(name, &parent, &fn) != 0) { 793 if (__xlate_proc_name(name, &parent, &fn) != 0) {
@@ -834,12 +827,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
834 827
835 wait_for_completion(de->pde_unload_completion); 828 wait_for_completion(de->pde_unload_completion);
836 829
837 goto continue_removing; 830 spin_lock(&de->pde_unload_lock);
838 } 831 }
839 spin_unlock(&de->pde_unload_lock);
840 832
841continue_removing:
842 spin_lock(&de->pde_unload_lock);
843 while (!list_empty(&de->pde_openers)) { 833 while (!list_empty(&de->pde_openers)) {
844 struct pde_opener *pdeo; 834 struct pde_opener *pdeo;
845 835
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 9c2b5f484879..74b48cfa1bb2 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -16,7 +16,6 @@
16#include <linux/limits.h> 16#include <linux/limits.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/smp_lock.h>
20#include <linux/sysctl.h> 19#include <linux/sysctl.h>
21#include <linux/slab.h> 20#include <linux/slab.h>
22 21
@@ -28,6 +27,8 @@
28static void proc_evict_inode(struct inode *inode) 27static void proc_evict_inode(struct inode *inode)
29{ 28{
30 struct proc_dir_entry *de; 29 struct proc_dir_entry *de;
30 struct ctl_table_header *head;
31 const struct proc_ns_operations *ns_ops;
31 32
32 truncate_inode_pages(&inode->i_data, 0); 33 truncate_inode_pages(&inode->i_data, 0);
33 end_writeback(inode); 34 end_writeback(inode);
@@ -39,12 +40,17 @@ static void proc_evict_inode(struct inode *inode)
39 de = PROC_I(inode)->pde; 40 de = PROC_I(inode)->pde;
40 if (de) 41 if (de)
41 pde_put(de); 42 pde_put(de);
42 if (PROC_I(inode)->sysctl) 43 head = PROC_I(inode)->sysctl;
43 sysctl_head_put(PROC_I(inode)->sysctl); 44 if (head) {
45 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
46 sysctl_head_put(head);
47 }
48 /* Release any associated namespace */
49 ns_ops = PROC_I(inode)->ns_ops;
50 if (ns_ops && ns_ops->put)
51 ns_ops->put(PROC_I(inode)->ns);
44} 52}
45 53
46struct vfsmount *proc_mnt;
47
48static struct kmem_cache * proc_inode_cachep; 54static struct kmem_cache * proc_inode_cachep;
49 55
50static struct inode *proc_alloc_inode(struct super_block *sb) 56static struct inode *proc_alloc_inode(struct super_block *sb)
@@ -61,16 +67,25 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
61 ei->pde = NULL; 67 ei->pde = NULL;
62 ei->sysctl = NULL; 68 ei->sysctl = NULL;
63 ei->sysctl_entry = NULL; 69 ei->sysctl_entry = NULL;
70 ei->ns = NULL;
71 ei->ns_ops = NULL;
64 inode = &ei->vfs_inode; 72 inode = &ei->vfs_inode;
65 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 73 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
66 return inode; 74 return inode;
67} 75}
68 76
69static void proc_destroy_inode(struct inode *inode) 77static void proc_i_callback(struct rcu_head *head)
70{ 78{
79 struct inode *inode = container_of(head, struct inode, i_rcu);
80 INIT_LIST_HEAD(&inode->i_dentry);
71 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 81 kmem_cache_free(proc_inode_cachep, PROC_I(inode));
72} 82}
73 83
84static void proc_destroy_inode(struct inode *inode)
85{
86 call_rcu(&inode->i_rcu, proc_i_callback);
87}
88
74static void init_once(void *foo) 89static void init_once(void *foo)
75{ 90{
76 struct proc_inode *ei = (struct proc_inode *) foo; 91 struct proc_inode *ei = (struct proc_inode *) foo;
@@ -410,12 +425,11 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
410}; 425};
411#endif 426#endif
412 427
413struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, 428struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
414 struct proc_dir_entry *de)
415{ 429{
416 struct inode * inode; 430 struct inode * inode;
417 431
418 inode = iget_locked(sb, ino); 432 inode = iget_locked(sb, de->low_ino);
419 if (!inode) 433 if (!inode)
420 return NULL; 434 return NULL;
421 if (inode->i_state & I_NEW) { 435 if (inode->i_state & I_NEW) {
@@ -465,7 +479,7 @@ int proc_fill_super(struct super_block *s)
465 s->s_time_gran = 1; 479 s->s_time_gran = 1;
466 480
467 pde_get(&proc_root); 481 pde_get(&proc_root);
468 root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); 482 root_inode = proc_get_inode(s, &proc_root);
469 if (!root_inode) 483 if (!root_inode)
470 goto out_no_root; 484 goto out_no_root;
471 root_inode->i_uid = 0; 485 root_inode->i_uid = 0;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 1f24a3eddd12..7838e5cfec14 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -61,6 +61,14 @@ extern const struct file_operations proc_pagemap_operations;
61extern const struct file_operations proc_net_operations; 61extern const struct file_operations proc_net_operations;
62extern const struct inode_operations proc_net_inode_operations; 62extern const struct inode_operations proc_net_inode_operations;
63 63
64struct proc_maps_private {
65 struct pid *pid;
66 struct task_struct *task;
67#ifdef CONFIG_MMU
68 struct vm_area_struct *tail_vma;
69#endif
70};
71
64void proc_init_inodecache(void); 72void proc_init_inodecache(void);
65 73
66static inline struct pid *proc_pid(struct inode *inode) 74static inline struct pid *proc_pid(struct inode *inode)
@@ -96,7 +104,8 @@ extern spinlock_t proc_subdir_lock;
96struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); 104struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
97int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); 105int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
98unsigned long task_vsize(struct mm_struct *); 106unsigned long task_vsize(struct mm_struct *);
99int task_statm(struct mm_struct *, int *, int *, int *, int *); 107unsigned long task_statm(struct mm_struct *,
108 unsigned long *, unsigned long *, unsigned long *, unsigned long *);
100void task_mem(struct seq_file *, struct mm_struct *); 109void task_mem(struct seq_file *, struct mm_struct *);
101 110
102static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) 111static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
@@ -106,9 +115,8 @@ static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
106} 115}
107void pde_put(struct proc_dir_entry *pde); 116void pde_put(struct proc_dir_entry *pde);
108 117
109extern struct vfsmount *proc_mnt;
110int proc_fill_super(struct super_block *); 118int proc_fill_super(struct super_block *);
111struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); 119struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
112 120
113/* 121/*
114 * These are generic /proc routines that use the internal 122 * These are generic /proc routines that use the internal
@@ -119,3 +127,21 @@ struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir
119 */ 127 */
120int proc_readdir(struct file *, void *, filldir_t); 128int proc_readdir(struct file *, void *, filldir_t);
121struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); 129struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
130
131
132
133/* Lookups */
134typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
135 struct task_struct *, const void *);
136int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
137 const char *name, int len,
138 instantiate_t instantiate, struct task_struct *task, const void *ptr);
139int pid_revalidate(struct dentry *dentry, struct nameidata *nd);
140struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
141extern const struct dentry_operations pid_dentry_operations;
142int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
143int proc_setattr(struct dentry *dentry, struct iattr *attr);
144
145extern const struct inode_operations proc_ns_dir_inode_operations;
146extern const struct file_operations proc_ns_dir_operations;
147
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 6f37c391468d..d245cb23dd72 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -558,7 +558,7 @@ static int open_kcore(struct inode *inode, struct file *filp)
558static const struct file_operations proc_kcore_operations = { 558static const struct file_operations proc_kcore_operations = {
559 .read = read_kcore, 559 .read = read_kcore,
560 .open = open_kcore, 560 .open = open_kcore,
561 .llseek = generic_file_llseek, 561 .llseek = default_llseek,
562}; 562};
563 563
564#ifdef CONFIG_MEMORY_HOTPLUG 564#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index a65239cfd97e..ed257d141568 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -101,6 +101,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
101#ifdef CONFIG_MEMORY_FAILURE 101#ifdef CONFIG_MEMORY_FAILURE
102 "HardwareCorrupted: %5lu kB\n" 102 "HardwareCorrupted: %5lu kB\n"
103#endif 103#endif
104#ifdef CONFIG_TRANSPARENT_HUGEPAGE
105 "AnonHugePages: %8lu kB\n"
106#endif
104 , 107 ,
105 K(i.totalram), 108 K(i.totalram),
106 K(i.freeram), 109 K(i.freeram),
@@ -128,7 +131,12 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
128 K(i.freeswap), 131 K(i.freeswap),
129 K(global_page_state(NR_FILE_DIRTY)), 132 K(global_page_state(NR_FILE_DIRTY)),
130 K(global_page_state(NR_WRITEBACK)), 133 K(global_page_state(NR_WRITEBACK)),
131 K(global_page_state(NR_ANON_PAGES)), 134 K(global_page_state(NR_ANON_PAGES)
135#ifdef CONFIG_TRANSPARENT_HUGEPAGE
136 + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
137 HPAGE_PMD_NR
138#endif
139 ),
132 K(global_page_state(NR_FILE_MAPPED)), 140 K(global_page_state(NR_FILE_MAPPED)),
133 K(global_page_state(NR_SHMEM)), 141 K(global_page_state(NR_SHMEM)),
134 K(global_page_state(NR_SLAB_RECLAIMABLE) + 142 K(global_page_state(NR_SLAB_RECLAIMABLE) +
@@ -151,6 +159,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
151#ifdef CONFIG_MEMORY_FAILURE 159#ifdef CONFIG_MEMORY_FAILURE
152 ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10) 160 ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10)
153#endif 161#endif
162#ifdef CONFIG_TRANSPARENT_HUGEPAGE
163 ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
164 HPAGE_PMD_NR)
165#endif
154 ); 166 );
155 167
156 hugetlb_report_meminfo(m); 168 hugetlb_report_meminfo(m);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
new file mode 100644
index 000000000000..be177f702acb
--- /dev/null
+++ b/fs/proc/namespaces.c
@@ -0,0 +1,201 @@
1#include <linux/proc_fs.h>
2#include <linux/nsproxy.h>
3#include <linux/sched.h>
4#include <linux/ptrace.h>
5#include <linux/fs_struct.h>
6#include <linux/mount.h>
7#include <linux/path.h>
8#include <linux/namei.h>
9#include <linux/file.h>
10#include <linux/utsname.h>
11#include <net/net_namespace.h>
12#include <linux/mnt_namespace.h>
13#include <linux/ipc_namespace.h>
14#include <linux/pid_namespace.h>
15#include "internal.h"
16
17
18static const struct proc_ns_operations *ns_entries[] = {
19#ifdef CONFIG_NET_NS
20 &netns_operations,
21#endif
22#ifdef CONFIG_UTS_NS
23 &utsns_operations,
24#endif
25#ifdef CONFIG_IPC_NS
26 &ipcns_operations,
27#endif
28};
29
30static const struct file_operations ns_file_operations = {
31 .llseek = no_llseek,
32};
33
34static struct dentry *proc_ns_instantiate(struct inode *dir,
35 struct dentry *dentry, struct task_struct *task, const void *ptr)
36{
37 const struct proc_ns_operations *ns_ops = ptr;
38 struct inode *inode;
39 struct proc_inode *ei;
40 struct dentry *error = ERR_PTR(-ENOENT);
41 void *ns;
42
43 inode = proc_pid_make_inode(dir->i_sb, task);
44 if (!inode)
45 goto out;
46
47 ns = ns_ops->get(task);
48 if (!ns)
49 goto out_iput;
50
51 ei = PROC_I(inode);
52 inode->i_mode = S_IFREG|S_IRUSR;
53 inode->i_fop = &ns_file_operations;
54 ei->ns_ops = ns_ops;
55 ei->ns = ns;
56
57 dentry->d_op = &pid_dentry_operations;
58 d_add(dentry, inode);
59 /* Close the race of the process dying before we return the dentry */
60 if (pid_revalidate(dentry, NULL))
61 error = NULL;
62out:
63 return error;
64out_iput:
65 iput(inode);
66 goto out;
67}
68
69static int proc_ns_fill_cache(struct file *filp, void *dirent,
70 filldir_t filldir, struct task_struct *task,
71 const struct proc_ns_operations *ops)
72{
73 return proc_fill_cache(filp, dirent, filldir,
74 ops->name, strlen(ops->name),
75 proc_ns_instantiate, task, ops);
76}
77
78static int proc_ns_dir_readdir(struct file *filp, void *dirent,
79 filldir_t filldir)
80{
81 int i;
82 struct dentry *dentry = filp->f_path.dentry;
83 struct inode *inode = dentry->d_inode;
84 struct task_struct *task = get_proc_task(inode);
85 const struct proc_ns_operations **entry, **last;
86 ino_t ino;
87 int ret;
88
89 ret = -ENOENT;
90 if (!task)
91 goto out_no_task;
92
93 ret = -EPERM;
94 if (!ptrace_may_access(task, PTRACE_MODE_READ))
95 goto out;
96
97 ret = 0;
98 i = filp->f_pos;
99 switch (i) {
100 case 0:
101 ino = inode->i_ino;
102 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
103 goto out;
104 i++;
105 filp->f_pos++;
106 /* fall through */
107 case 1:
108 ino = parent_ino(dentry);
109 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
110 goto out;
111 i++;
112 filp->f_pos++;
113 /* fall through */
114 default:
115 i -= 2;
116 if (i >= ARRAY_SIZE(ns_entries)) {
117 ret = 1;
118 goto out;
119 }
120 entry = ns_entries + i;
121 last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
122 while (entry <= last) {
123 if (proc_ns_fill_cache(filp, dirent, filldir,
124 task, *entry) < 0)
125 goto out;
126 filp->f_pos++;
127 entry++;
128 }
129 }
130
131 ret = 1;
132out:
133 put_task_struct(task);
134out_no_task:
135 return ret;
136}
137
138const struct file_operations proc_ns_dir_operations = {
139 .read = generic_read_dir,
140 .readdir = proc_ns_dir_readdir,
141};
142
143static struct dentry *proc_ns_dir_lookup(struct inode *dir,
144 struct dentry *dentry, struct nameidata *nd)
145{
146 struct dentry *error;
147 struct task_struct *task = get_proc_task(dir);
148 const struct proc_ns_operations **entry, **last;
149 unsigned int len = dentry->d_name.len;
150
151 error = ERR_PTR(-ENOENT);
152
153 if (!task)
154 goto out_no_task;
155
156 error = ERR_PTR(-EPERM);
157 if (!ptrace_may_access(task, PTRACE_MODE_READ))
158 goto out;
159
160 last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
161 for (entry = ns_entries; entry <= last; entry++) {
162 if (strlen((*entry)->name) != len)
163 continue;
164 if (!memcmp(dentry->d_name.name, (*entry)->name, len))
165 break;
166 }
167 error = ERR_PTR(-ENOENT);
168 if (entry > last)
169 goto out;
170
171 error = proc_ns_instantiate(dir, dentry, task, *entry);
172out:
173 put_task_struct(task);
174out_no_task:
175 return error;
176}
177
178const struct inode_operations proc_ns_dir_inode_operations = {
179 .lookup = proc_ns_dir_lookup,
180 .getattr = pid_getattr,
181 .setattr = proc_setattr,
182};
183
184struct file *proc_ns_fget(int fd)
185{
186 struct file *file;
187
188 file = fget(fd);
189 if (!file)
190 return ERR_PTR(-EBADF);
191
192 if (file->f_op != &ns_file_operations)
193 goto out_invalid;
194
195 return file;
196
197out_invalid:
198 fput(file);
199 return ERR_PTR(-EINVAL);
200}
201
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 3b8b45660331..6d8e6a9e93ab 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -40,7 +40,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
40 ppage = pfn_to_page(pfn); 40 ppage = pfn_to_page(pfn);
41 else 41 else
42 ppage = NULL; 42 ppage = NULL;
43 if (!ppage) 43 if (!ppage || PageSlab(ppage))
44 pcount = 0; 44 pcount = 0;
45 else 45 else
46 pcount = page_mapcount(ppage); 46 pcount = page_mapcount(ppage);
@@ -116,15 +116,17 @@ u64 stable_page_flags(struct page *page)
116 if (PageHuge(page)) 116 if (PageHuge(page))
117 u |= 1 << KPF_HUGE; 117 u |= 1 << KPF_HUGE;
118 118
119 u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked);
120
121 /* 119 /*
122 * Caveats on high order pages: 120 * Caveats on high order pages: page->_count will only be set
123 * PG_buddy will only be set on the head page; SLUB/SLQB do the same 121 * -1 on the head page; SLUB/SLQB do the same for PG_slab;
124 * for PG_slab; SLOB won't set PG_slab at all on compound pages. 122 * SLOB won't set PG_slab at all on compound pages.
125 */ 123 */
124 if (PageBuddy(page))
125 u |= 1 << KPF_BUDDY;
126
127 u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked);
128
126 u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); 129 u |= kpf_copy_bit(k, KPF_SLAB, PG_slab);
127 u |= kpf_copy_bit(k, KPF_BUDDY, PG_buddy);
128 130
129 u |= kpf_copy_bit(k, KPF_ERROR, PG_error); 131 u |= kpf_copy_bit(k, KPF_ERROR, PG_error);
130 u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); 132 u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty);
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index d9396a4fc7ff..927cbd115e53 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -233,7 +233,7 @@ void __init proc_device_tree_init(void)
233 return; 233 return;
234 root = of_find_node_by_path("/"); 234 root = of_find_node_by_path("/");
235 if (root == NULL) { 235 if (root == NULL) {
236 printk(KERN_ERR "/proc/device-tree: can't find root\n"); 236 pr_debug("/proc/device-tree: can't find root\n");
237 return; 237 return;
238 } 238 }
239 proc_device_tree_add_node(root, proc_device_tree); 239 proc_device_tree_add_node(root, proc_device_tree);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5be436ea088e..d167de365a8d 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -5,6 +5,7 @@
5#include <linux/sysctl.h> 5#include <linux/sysctl.h>
6#include <linux/proc_fs.h> 6#include <linux/proc_fs.h>
7#include <linux/security.h> 7#include <linux/security.h>
8#include <linux/namei.h>
8#include "internal.h" 9#include "internal.h"
9 10
10static const struct dentry_operations proc_sys_dentry_operations; 11static const struct dentry_operations proc_sys_dentry_operations;
@@ -23,13 +24,14 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
23 if (!inode) 24 if (!inode)
24 goto out; 25 goto out;
25 26
27 inode->i_ino = get_next_ino();
28
26 sysctl_head_get(head); 29 sysctl_head_get(head);
27 ei = PROC_I(inode); 30 ei = PROC_I(inode);
28 ei->sysctl = head; 31 ei->sysctl = head;
29 ei->sysctl_entry = table; 32 ei->sysctl_entry = table;
30 33
31 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 34 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
32 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
33 inode->i_mode = table->mode; 35 inode->i_mode = table->mode;
34 if (!table->child) { 36 if (!table->child) {
35 inode->i_mode |= S_IFREG; 37 inode->i_mode |= S_IFREG;
@@ -118,7 +120,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
118 goto out; 120 goto out;
119 121
120 err = NULL; 122 err = NULL;
121 dentry->d_op = &proc_sys_dentry_operations; 123 d_set_d_op(dentry, &proc_sys_dentry_operations);
122 d_add(dentry, inode); 124 d_add(dentry, inode);
123 125
124out: 126out:
@@ -199,7 +201,7 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent,
199 dput(child); 201 dput(child);
200 return -ENOMEM; 202 return -ENOMEM;
201 } else { 203 } else {
202 child->d_op = &proc_sys_dentry_operations; 204 d_set_d_op(child, &proc_sys_dentry_operations);
203 d_add(child, inode); 205 d_add(child, inode);
204 } 206 }
205 } else { 207 } else {
@@ -292,7 +294,7 @@ out:
292 return ret; 294 return ret;
293} 295}
294 296
295static int proc_sys_permission(struct inode *inode, int mask) 297static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)
296{ 298{
297 /* 299 /*
298 * sysctl entries that are not writeable, 300 * sysctl entries that are not writeable,
@@ -364,6 +366,7 @@ static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
364static const struct file_operations proc_sys_file_operations = { 366static const struct file_operations proc_sys_file_operations = {
365 .read = proc_sys_read, 367 .read = proc_sys_read,
366 .write = proc_sys_write, 368 .write = proc_sys_write,
369 .llseek = default_llseek,
367}; 370};
368 371
369static const struct file_operations proc_sys_dir_file_operations = { 372static const struct file_operations proc_sys_dir_file_operations = {
@@ -386,23 +389,33 @@ static const struct inode_operations proc_sys_dir_operations = {
386 389
387static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) 390static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
388{ 391{
392 if (nd->flags & LOOKUP_RCU)
393 return -ECHILD;
389 return !PROC_I(dentry->d_inode)->sysctl->unregistering; 394 return !PROC_I(dentry->d_inode)->sysctl->unregistering;
390} 395}
391 396
392static int proc_sys_delete(struct dentry *dentry) 397static int proc_sys_delete(const struct dentry *dentry)
393{ 398{
394 return !!PROC_I(dentry->d_inode)->sysctl->unregistering; 399 return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
395} 400}
396 401
397static int proc_sys_compare(struct dentry *dir, struct qstr *qstr, 402static int proc_sys_compare(const struct dentry *parent,
398 struct qstr *name) 403 const struct inode *pinode,
404 const struct dentry *dentry, const struct inode *inode,
405 unsigned int len, const char *str, const struct qstr *name)
399{ 406{
400 struct dentry *dentry = container_of(qstr, struct dentry, d_name); 407 struct ctl_table_header *head;
401 if (qstr->len != name->len) 408 /* Although proc doesn't have negative dentries, rcu-walk means
409 * that inode here can be NULL */
410 /* AV: can it, indeed? */
411 if (!inode)
412 return 1;
413 if (name->len != len)
402 return 1; 414 return 1;
403 if (memcmp(qstr->name, name->name, name->len)) 415 if (memcmp(name->name, str, len))
404 return 1; 416 return 1;
405 return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl); 417 head = rcu_dereference(PROC_I(inode)->sysctl);
418 return !head || !sysctl_is_seen(head);
406} 419}
407 420
408static const struct dentry_operations proc_sys_dentry_operations = { 421static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 83adcc869437..cb761f010300 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -36,27 +36,27 @@ static void show_tty_range(struct seq_file *m, struct tty_driver *p,
36 } 36 }
37 switch (p->type) { 37 switch (p->type) {
38 case TTY_DRIVER_TYPE_SYSTEM: 38 case TTY_DRIVER_TYPE_SYSTEM:
39 seq_printf(m, "system"); 39 seq_puts(m, "system");
40 if (p->subtype == SYSTEM_TYPE_TTY) 40 if (p->subtype == SYSTEM_TYPE_TTY)
41 seq_printf(m, ":/dev/tty"); 41 seq_puts(m, ":/dev/tty");
42 else if (p->subtype == SYSTEM_TYPE_SYSCONS) 42 else if (p->subtype == SYSTEM_TYPE_SYSCONS)
43 seq_printf(m, ":console"); 43 seq_puts(m, ":console");
44 else if (p->subtype == SYSTEM_TYPE_CONSOLE) 44 else if (p->subtype == SYSTEM_TYPE_CONSOLE)
45 seq_printf(m, ":vtmaster"); 45 seq_puts(m, ":vtmaster");
46 break; 46 break;
47 case TTY_DRIVER_TYPE_CONSOLE: 47 case TTY_DRIVER_TYPE_CONSOLE:
48 seq_printf(m, "console"); 48 seq_puts(m, "console");
49 break; 49 break;
50 case TTY_DRIVER_TYPE_SERIAL: 50 case TTY_DRIVER_TYPE_SERIAL:
51 seq_printf(m, "serial"); 51 seq_puts(m, "serial");
52 break; 52 break;
53 case TTY_DRIVER_TYPE_PTY: 53 case TTY_DRIVER_TYPE_PTY:
54 if (p->subtype == PTY_TYPE_MASTER) 54 if (p->subtype == PTY_TYPE_MASTER)
55 seq_printf(m, "pty:master"); 55 seq_puts(m, "pty:master");
56 else if (p->subtype == PTY_TYPE_SLAVE) 56 else if (p->subtype == PTY_TYPE_SLAVE)
57 seq_printf(m, "pty:slave"); 57 seq_puts(m, "pty:slave");
58 else 58 else
59 seq_printf(m, "pty"); 59 seq_puts(m, "pty");
60 break; 60 break;
61 default: 61 default:
62 seq_printf(m, "type:%d.%d", p->type, p->subtype); 62 seq_printf(m, "type:%d.%d", p->type, p->subtype);
@@ -74,19 +74,19 @@ static int show_tty_driver(struct seq_file *m, void *v)
74 /* pseudo-drivers first */ 74 /* pseudo-drivers first */
75 seq_printf(m, "%-20s /dev/%-8s ", "/dev/tty", "tty"); 75 seq_printf(m, "%-20s /dev/%-8s ", "/dev/tty", "tty");
76 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 0); 76 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 0);
77 seq_printf(m, "system:/dev/tty\n"); 77 seq_puts(m, "system:/dev/tty\n");
78 seq_printf(m, "%-20s /dev/%-8s ", "/dev/console", "console"); 78 seq_printf(m, "%-20s /dev/%-8s ", "/dev/console", "console");
79 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 1); 79 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 1);
80 seq_printf(m, "system:console\n"); 80 seq_puts(m, "system:console\n");
81#ifdef CONFIG_UNIX98_PTYS 81#ifdef CONFIG_UNIX98_PTYS
82 seq_printf(m, "%-20s /dev/%-8s ", "/dev/ptmx", "ptmx"); 82 seq_printf(m, "%-20s /dev/%-8s ", "/dev/ptmx", "ptmx");
83 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 2); 83 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 2);
84 seq_printf(m, "system\n"); 84 seq_puts(m, "system\n");
85#endif 85#endif
86#ifdef CONFIG_VT 86#ifdef CONFIG_VT
87 seq_printf(m, "%-20s /dev/%-8s ", "/dev/vc/0", "vc/0"); 87 seq_printf(m, "%-20s /dev/%-8s ", "/dev/vc/0", "vc/0");
88 seq_printf(m, "%3d %7d ", TTY_MAJOR, 0); 88 seq_printf(m, "%3d %7d ", TTY_MAJOR, 0);
89 seq_printf(m, "system:vtmaster\n"); 89 seq_puts(m, "system:vtmaster\n");
90#endif 90#endif
91 } 91 }
92 92
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 4258384ed22d..d6c3b416529b 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -28,32 +28,22 @@ static int proc_test_super(struct super_block *sb, void *data)
28 28
29static int proc_set_super(struct super_block *sb, void *data) 29static int proc_set_super(struct super_block *sb, void *data)
30{ 30{
31 struct pid_namespace *ns; 31 int err = set_anon_super(sb, NULL);
32 32 if (!err) {
33 ns = (struct pid_namespace *)data; 33 struct pid_namespace *ns = (struct pid_namespace *)data;
34 sb->s_fs_info = get_pid_ns(ns); 34 sb->s_fs_info = get_pid_ns(ns);
35 return set_anon_super(sb, NULL); 35 }
36 return err;
36} 37}
37 38
38static int proc_get_sb(struct file_system_type *fs_type, 39static struct dentry *proc_mount(struct file_system_type *fs_type,
39 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 40 int flags, const char *dev_name, void *data)
40{ 41{
41 int err; 42 int err;
42 struct super_block *sb; 43 struct super_block *sb;
43 struct pid_namespace *ns; 44 struct pid_namespace *ns;
44 struct proc_inode *ei; 45 struct proc_inode *ei;
45 46
46 if (proc_mnt) {
47 /* Seed the root directory with a pid so it doesn't need
48 * to be special in base.c. I would do this earlier but
49 * the only task alive when /proc is mounted the first time
50 * is the init_task and it doesn't have any pids.
51 */
52 ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
53 if (!ei->pid)
54 ei->pid = find_get_pid(1);
55 }
56
57 if (flags & MS_KERNMOUNT) 47 if (flags & MS_KERNMOUNT)
58 ns = (struct pid_namespace *)data; 48 ns = (struct pid_namespace *)data;
59 else 49 else
@@ -61,29 +51,27 @@ static int proc_get_sb(struct file_system_type *fs_type,
61 51
62 sb = sget(fs_type, proc_test_super, proc_set_super, ns); 52 sb = sget(fs_type, proc_test_super, proc_set_super, ns);
63 if (IS_ERR(sb)) 53 if (IS_ERR(sb))
64 return PTR_ERR(sb); 54 return ERR_CAST(sb);
65 55
66 if (!sb->s_root) { 56 if (!sb->s_root) {
67 sb->s_flags = flags; 57 sb->s_flags = flags;
68 err = proc_fill_super(sb); 58 err = proc_fill_super(sb);
69 if (err) { 59 if (err) {
70 deactivate_locked_super(sb); 60 deactivate_locked_super(sb);
71 return err; 61 return ERR_PTR(err);
72 }
73
74 ei = PROC_I(sb->s_root->d_inode);
75 if (!ei->pid) {
76 rcu_read_lock();
77 ei->pid = get_pid(find_pid_ns(1, ns));
78 rcu_read_unlock();
79 } 62 }
80 63
81 sb->s_flags |= MS_ACTIVE; 64 sb->s_flags |= MS_ACTIVE;
82 ns->proc_mnt = mnt;
83 } 65 }
84 66
85 simple_set_mnt(mnt, sb); 67 ei = PROC_I(sb->s_root->d_inode);
86 return 0; 68 if (!ei->pid) {
69 rcu_read_lock();
70 ei->pid = get_pid(find_pid_ns(1, ns));
71 rcu_read_unlock();
72 }
73
74 return dget(sb->s_root);
87} 75}
88 76
89static void proc_kill_sb(struct super_block *sb) 77static void proc_kill_sb(struct super_block *sb)
@@ -97,24 +85,26 @@ static void proc_kill_sb(struct super_block *sb)
97 85
98static struct file_system_type proc_fs_type = { 86static struct file_system_type proc_fs_type = {
99 .name = "proc", 87 .name = "proc",
100 .get_sb = proc_get_sb, 88 .mount = proc_mount,
101 .kill_sb = proc_kill_sb, 89 .kill_sb = proc_kill_sb,
102}; 90};
103 91
104void __init proc_root_init(void) 92void __init proc_root_init(void)
105{ 93{
94 struct vfsmount *mnt;
106 int err; 95 int err;
107 96
108 proc_init_inodecache(); 97 proc_init_inodecache();
109 err = register_filesystem(&proc_fs_type); 98 err = register_filesystem(&proc_fs_type);
110 if (err) 99 if (err)
111 return; 100 return;
112 proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); 101 mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
113 if (IS_ERR(proc_mnt)) { 102 if (IS_ERR(mnt)) {
114 unregister_filesystem(&proc_fs_type); 103 unregister_filesystem(&proc_fs_type);
115 return; 104 return;
116 } 105 }
117 106
107 init_pid_ns.proc_mnt = mnt;
118 proc_symlink("mounts", NULL, "self/mounts"); 108 proc_symlink("mounts", NULL, "self/mounts");
119 109
120 proc_net_init(); 110 proc_net_init();
@@ -179,6 +169,7 @@ static int proc_root_readdir(struct file * filp,
179static const struct file_operations proc_root_operations = { 169static const struct file_operations proc_root_operations = {
180 .read = generic_read_dir, 170 .read = generic_read_dir,
181 .readdir = proc_root_readdir, 171 .readdir = proc_root_readdir,
172 .llseek = default_llseek,
182}; 173};
183 174
184/* 175/*
@@ -212,6 +203,7 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
212 if (IS_ERR(mnt)) 203 if (IS_ERR(mnt))
213 return PTR_ERR(mnt); 204 return PTR_ERR(mnt);
214 205
206 ns->proc_mnt = mnt;
215 return 0; 207 return 0;
216} 208}
217 209
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index 1807c2419f17..62604be9f58d 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -10,16 +10,16 @@ static int show_softirqs(struct seq_file *p, void *v)
10{ 10{
11 int i, j; 11 int i, j;
12 12
13 seq_printf(p, " "); 13 seq_puts(p, " ");
14 for_each_possible_cpu(i) 14 for_each_possible_cpu(i)
15 seq_printf(p, "CPU%-8d", i); 15 seq_printf(p, "CPU%-8d", i);
16 seq_printf(p, "\n"); 16 seq_putc(p, '\n');
17 17
18 for (i = 0; i < NR_SOFTIRQS; i++) { 18 for (i = 0; i < NR_SOFTIRQS; i++) {
19 seq_printf(p, "%8s:", softirq_to_name[i]); 19 seq_printf(p, "%12s:", softirq_to_name[i]);
20 for_each_possible_cpu(j) 20 for_each_possible_cpu(j)
21 seq_printf(p, " %10u", kstat_softirqs_cpu(i, j)); 21 seq_printf(p, " %10u", kstat_softirqs_cpu(i, j));
22 seq_printf(p, "\n"); 22 seq_putc(p, '\n');
23 } 23 }
24 return 0; 24 return 0;
25} 25}
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index bf31b03fc275..9758b654a1bc 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -31,7 +31,6 @@ static int show_stat(struct seq_file *p, void *v)
31 u64 sum_softirq = 0; 31 u64 sum_softirq = 0;
32 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; 32 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
33 struct timespec boottime; 33 struct timespec boottime;
34 unsigned int per_irq_sum;
35 34
36 user = nice = system = idle = iowait = 35 user = nice = system = idle = iowait =
37 irq = softirq = steal = cputime64_zero; 36 irq = softirq = steal = cputime64_zero;
@@ -52,9 +51,7 @@ static int show_stat(struct seq_file *p, void *v)
52 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); 51 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
53 guest_nice = cputime64_add(guest_nice, 52 guest_nice = cputime64_add(guest_nice,
54 kstat_cpu(i).cpustat.guest_nice); 53 kstat_cpu(i).cpustat.guest_nice);
55 for_each_irq_nr(j) { 54 sum += kstat_cpu_irqs_sum(i);
56 sum += kstat_irqs_cpu(j, i);
57 }
58 sum += arch_irq_stat_cpu(i); 55 sum += arch_irq_stat_cpu(i);
59 56
60 for (j = 0; j < NR_SOFTIRQS; j++) { 57 for (j = 0; j < NR_SOFTIRQS; j++) {
@@ -110,13 +107,8 @@ static int show_stat(struct seq_file *p, void *v)
110 seq_printf(p, "intr %llu", (unsigned long long)sum); 107 seq_printf(p, "intr %llu", (unsigned long long)sum);
111 108
112 /* sum again ? it could be updated? */ 109 /* sum again ? it could be updated? */
113 for_each_irq_nr(j) { 110 for_each_irq_nr(j)
114 per_irq_sum = 0; 111 seq_printf(p, " %u", kstat_irqs(j));
115 for_each_possible_cpu(i)
116 per_irq_sum += kstat_irqs_cpu(j, i);
117
118 seq_printf(p, " %u", per_irq_sum);
119 }
120 112
121 seq_printf(p, 113 seq_printf(p,
122 "\nctxt %llu\n" 114 "\nctxt %llu\n"
@@ -134,7 +126,7 @@ static int show_stat(struct seq_file *p, void *v)
134 126
135 for (i = 0; i < NR_SOFTIRQS; i++) 127 for (i = 0; i < NR_SOFTIRQS; i++)
136 seq_printf(p, " %u", per_softirq_sums[i]); 128 seq_printf(p, " %u", per_softirq_sums[i]);
137 seq_printf(p, "\n"); 129 seq_putc(p, '\n');
138 130
139 return 0; 131 return 0;
140} 132}
@@ -146,9 +138,9 @@ static int stat_open(struct inode *inode, struct file *file)
146 struct seq_file *m; 138 struct seq_file *m;
147 int res; 139 int res;
148 140
149 /* don't ask for more than the kmalloc() max size, currently 128 KB */ 141 /* don't ask for more than the kmalloc() max size */
150 if (size > 128 * 1024) 142 if (size > KMALLOC_MAX_SIZE)
151 size = 128 * 1024; 143 size = KMALLOC_MAX_SIZE;
152 buf = kmalloc(size, GFP_KERNEL); 144 buf = kmalloc(size, GFP_KERNEL);
153 if (!buf) 145 if (!buf)
154 return -ENOMEM; 146 return -ENOMEM;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 1dbca4e8cc16..25b6a887adb9 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,5 +1,6 @@
1#include <linux/mm.h> 1#include <linux/mm.h>
2#include <linux/hugetlb.h> 2#include <linux/hugetlb.h>
3#include <linux/huge_mm.h>
3#include <linux/mount.h> 4#include <linux/mount.h>
4#include <linux/seq_file.h> 5#include <linux/seq_file.h>
5#include <linux/highmem.h> 6#include <linux/highmem.h>
@@ -7,6 +8,7 @@
7#include <linux/slab.h> 8#include <linux/slab.h>
8#include <linux/pagemap.h> 9#include <linux/pagemap.h>
9#include <linux/mempolicy.h> 10#include <linux/mempolicy.h>
11#include <linux/rmap.h>
10#include <linux/swap.h> 12#include <linux/swap.h>
11#include <linux/swapops.h> 13#include <linux/swapops.h>
12 14
@@ -66,8 +68,9 @@ unsigned long task_vsize(struct mm_struct *mm)
66 return PAGE_SIZE * mm->total_vm; 68 return PAGE_SIZE * mm->total_vm;
67} 69}
68 70
69int task_statm(struct mm_struct *mm, int *shared, int *text, 71unsigned long task_statm(struct mm_struct *mm,
70 int *data, int *resident) 72 unsigned long *shared, unsigned long *text,
73 unsigned long *data, unsigned long *resident)
71{ 74{
72 *shared = get_mm_counter(mm, MM_FILEPAGES); 75 *shared = get_mm_counter(mm, MM_FILEPAGES);
73 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) 76 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
@@ -118,14 +121,14 @@ static void *m_start(struct seq_file *m, loff_t *pos)
118 121
119 priv->task = get_pid_task(priv->pid, PIDTYPE_PID); 122 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
120 if (!priv->task) 123 if (!priv->task)
121 return NULL; 124 return ERR_PTR(-ESRCH);
122 125
123 mm = mm_for_maps(priv->task); 126 mm = mm_for_maps(priv->task);
124 if (!mm) 127 if (!mm || IS_ERR(mm))
125 return NULL; 128 return mm;
126 down_read(&mm->mmap_sem); 129 down_read(&mm->mmap_sem);
127 130
128 tail_vma = get_gate_vma(priv->task); 131 tail_vma = get_gate_vma(priv->task->mm);
129 priv->tail_vma = tail_vma; 132 priv->tail_vma = tail_vma;
130 133
131 /* Start with last addr hint */ 134 /* Start with last addr hint */
@@ -179,7 +182,8 @@ static void m_stop(struct seq_file *m, void *v)
179 struct proc_maps_private *priv = m->private; 182 struct proc_maps_private *priv = m->private;
180 struct vm_area_struct *vma = v; 183 struct vm_area_struct *vma = v;
181 184
182 vma_stop(priv, vma); 185 if (!IS_ERR(vma))
186 vma_stop(priv, vma);
183 if (priv->task) 187 if (priv->task)
184 put_task_struct(priv->task); 188 put_task_struct(priv->task);
185} 189}
@@ -207,10 +211,10 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
207{ 211{
208 struct mm_struct *mm = vma->vm_mm; 212 struct mm_struct *mm = vma->vm_mm;
209 struct file *file = vma->vm_file; 213 struct file *file = vma->vm_file;
210 int flags = vma->vm_flags; 214 vm_flags_t flags = vma->vm_flags;
211 unsigned long ino = 0; 215 unsigned long ino = 0;
212 unsigned long long pgoff = 0; 216 unsigned long long pgoff = 0;
213 unsigned long start; 217 unsigned long start, end;
214 dev_t dev = 0; 218 dev_t dev = 0;
215 int len; 219 int len;
216 220
@@ -223,13 +227,15 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
223 227
224 /* We don't show the stack guard page in /proc/maps */ 228 /* We don't show the stack guard page in /proc/maps */
225 start = vma->vm_start; 229 start = vma->vm_start;
226 if (vma->vm_flags & VM_GROWSDOWN) 230 if (stack_guard_page_start(vma, start))
227 if (!vma_stack_continue(vma->vm_prev, vma->vm_start)) 231 start += PAGE_SIZE;
228 start += PAGE_SIZE; 232 end = vma->vm_end;
233 if (stack_guard_page_end(vma, end))
234 end -= PAGE_SIZE;
229 235
230 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", 236 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
231 start, 237 start,
232 vma->vm_end, 238 end,
233 flags & VM_READ ? 'r' : '-', 239 flags & VM_READ ? 'r' : '-',
234 flags & VM_WRITE ? 'w' : '-', 240 flags & VM_WRITE ? 'w' : '-',
235 flags & VM_EXEC ? 'x' : '-', 241 flags & VM_EXEC ? 'x' : '-',
@@ -248,8 +254,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
248 const char *name = arch_vma_name(vma); 254 const char *name = arch_vma_name(vma);
249 if (!name) { 255 if (!name) {
250 if (mm) { 256 if (mm) {
251 if (vma->vm_start <= mm->start_brk && 257 if (vma->vm_start <= mm->brk &&
252 vma->vm_end >= mm->brk) { 258 vma->vm_end >= mm->start_brk) {
253 name = "[heap]"; 259 name = "[heap]";
254 } else if (vma->vm_start <= mm->start_stack && 260 } else if (vma->vm_start <= mm->start_stack &&
255 vma->vm_end >= mm->start_stack) { 261 vma->vm_end >= mm->start_stack) {
@@ -276,7 +282,8 @@ static int show_map(struct seq_file *m, void *v)
276 show_map_vma(m, vma); 282 show_map_vma(m, vma);
277 283
278 if (m->count < m->size) /* vma is copied successfully */ 284 if (m->count < m->size) /* vma is copied successfully */
279 m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; 285 m->version = (vma != get_gate_vma(task->mm))
286 ? vma->vm_start : 0;
280 return 0; 287 return 0;
281} 288}
282 289
@@ -327,55 +334,87 @@ struct mem_size_stats {
327 unsigned long private_clean; 334 unsigned long private_clean;
328 unsigned long private_dirty; 335 unsigned long private_dirty;
329 unsigned long referenced; 336 unsigned long referenced;
337 unsigned long anonymous;
338 unsigned long anonymous_thp;
330 unsigned long swap; 339 unsigned long swap;
331 u64 pss; 340 u64 pss;
332}; 341};
333 342
334static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 343
335 struct mm_walk *walk) 344static void smaps_pte_entry(pte_t ptent, unsigned long addr,
345 unsigned long ptent_size, struct mm_walk *walk)
336{ 346{
337 struct mem_size_stats *mss = walk->private; 347 struct mem_size_stats *mss = walk->private;
338 struct vm_area_struct *vma = mss->vma; 348 struct vm_area_struct *vma = mss->vma;
339 pte_t *pte, ptent;
340 spinlock_t *ptl;
341 struct page *page; 349 struct page *page;
342 int mapcount; 350 int mapcount;
343 351
344 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 352 if (is_swap_pte(ptent)) {
345 for (; addr != end; pte++, addr += PAGE_SIZE) { 353 mss->swap += ptent_size;
346 ptent = *pte; 354 return;
347 355 }
348 if (is_swap_pte(ptent)) {
349 mss->swap += PAGE_SIZE;
350 continue;
351 }
352 356
353 if (!pte_present(ptent)) 357 if (!pte_present(ptent))
354 continue; 358 return;
359
360 page = vm_normal_page(vma, addr, ptent);
361 if (!page)
362 return;
363
364 if (PageAnon(page))
365 mss->anonymous += ptent_size;
366
367 mss->resident += ptent_size;
368 /* Accumulate the size in pages that have been accessed. */
369 if (pte_young(ptent) || PageReferenced(page))
370 mss->referenced += ptent_size;
371 mapcount = page_mapcount(page);
372 if (mapcount >= 2) {
373 if (pte_dirty(ptent) || PageDirty(page))
374 mss->shared_dirty += ptent_size;
375 else
376 mss->shared_clean += ptent_size;
377 mss->pss += (ptent_size << PSS_SHIFT) / mapcount;
378 } else {
379 if (pte_dirty(ptent) || PageDirty(page))
380 mss->private_dirty += ptent_size;
381 else
382 mss->private_clean += ptent_size;
383 mss->pss += (ptent_size << PSS_SHIFT);
384 }
385}
355 386
356 page = vm_normal_page(vma, addr, ptent); 387static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
357 if (!page) 388 struct mm_walk *walk)
358 continue; 389{
390 struct mem_size_stats *mss = walk->private;
391 struct vm_area_struct *vma = mss->vma;
392 pte_t *pte;
393 spinlock_t *ptl;
359 394
360 mss->resident += PAGE_SIZE; 395 spin_lock(&walk->mm->page_table_lock);
361 /* Accumulate the size in pages that have been accessed. */ 396 if (pmd_trans_huge(*pmd)) {
362 if (pte_young(ptent) || PageReferenced(page)) 397 if (pmd_trans_splitting(*pmd)) {
363 mss->referenced += PAGE_SIZE; 398 spin_unlock(&walk->mm->page_table_lock);
364 mapcount = page_mapcount(page); 399 wait_split_huge_page(vma->anon_vma, pmd);
365 if (mapcount >= 2) {
366 if (pte_dirty(ptent) || PageDirty(page))
367 mss->shared_dirty += PAGE_SIZE;
368 else
369 mss->shared_clean += PAGE_SIZE;
370 mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
371 } else { 400 } else {
372 if (pte_dirty(ptent) || PageDirty(page)) 401 smaps_pte_entry(*(pte_t *)pmd, addr,
373 mss->private_dirty += PAGE_SIZE; 402 HPAGE_PMD_SIZE, walk);
374 else 403 spin_unlock(&walk->mm->page_table_lock);
375 mss->private_clean += PAGE_SIZE; 404 mss->anonymous_thp += HPAGE_PMD_SIZE;
376 mss->pss += (PAGE_SIZE << PSS_SHIFT); 405 return 0;
377 } 406 }
407 } else {
408 spin_unlock(&walk->mm->page_table_lock);
378 } 409 }
410 /*
411 * The mmap_sem held all the way back in m_start() is what
412 * keeps khugepaged out of here and from collapsing things
413 * in here.
414 */
415 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
416 for (; addr != end; pte++, addr += PAGE_SIZE)
417 smaps_pte_entry(*pte, addr, PAGE_SIZE, walk);
379 pte_unmap_unlock(pte - 1, ptl); 418 pte_unmap_unlock(pte - 1, ptl);
380 cond_resched(); 419 cond_resched();
381 return 0; 420 return 0;
@@ -410,9 +449,12 @@ static int show_smap(struct seq_file *m, void *v)
410 "Private_Clean: %8lu kB\n" 449 "Private_Clean: %8lu kB\n"
411 "Private_Dirty: %8lu kB\n" 450 "Private_Dirty: %8lu kB\n"
412 "Referenced: %8lu kB\n" 451 "Referenced: %8lu kB\n"
452 "Anonymous: %8lu kB\n"
453 "AnonHugePages: %8lu kB\n"
413 "Swap: %8lu kB\n" 454 "Swap: %8lu kB\n"
414 "KernelPageSize: %8lu kB\n" 455 "KernelPageSize: %8lu kB\n"
415 "MMUPageSize: %8lu kB\n", 456 "MMUPageSize: %8lu kB\n"
457 "Locked: %8lu kB\n",
416 (vma->vm_end - vma->vm_start) >> 10, 458 (vma->vm_end - vma->vm_start) >> 10,
417 mss.resident >> 10, 459 mss.resident >> 10,
418 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), 460 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
@@ -421,12 +463,17 @@ static int show_smap(struct seq_file *m, void *v)
421 mss.private_clean >> 10, 463 mss.private_clean >> 10,
422 mss.private_dirty >> 10, 464 mss.private_dirty >> 10,
423 mss.referenced >> 10, 465 mss.referenced >> 10,
466 mss.anonymous >> 10,
467 mss.anonymous_thp >> 10,
424 mss.swap >> 10, 468 mss.swap >> 10,
425 vma_kernel_pagesize(vma) >> 10, 469 vma_kernel_pagesize(vma) >> 10,
426 vma_mmu_pagesize(vma) >> 10); 470 vma_mmu_pagesize(vma) >> 10,
471 (vma->vm_flags & VM_LOCKED) ?
472 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
427 473
428 if (m->count < m->size) /* vma is copied successfully */ 474 if (m->count < m->size) /* vma is copied successfully */
429 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; 475 m->version = (vma != get_gate_vma(task->mm))
476 ? vma->vm_start : 0;
430 return 0; 477 return 0;
431} 478}
432 479
@@ -457,6 +504,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
457 spinlock_t *ptl; 504 spinlock_t *ptl;
458 struct page *page; 505 struct page *page;
459 506
507 split_huge_page_pmd(walk->mm, pmd);
508
460 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 509 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
461 for (; addr != end; pte++, addr += PAGE_SIZE) { 510 for (; addr != end; pte++, addr += PAGE_SIZE) {
462 ptent = *pte; 511 ptent = *pte;
@@ -487,15 +536,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
487 char buffer[PROC_NUMBUF]; 536 char buffer[PROC_NUMBUF];
488 struct mm_struct *mm; 537 struct mm_struct *mm;
489 struct vm_area_struct *vma; 538 struct vm_area_struct *vma;
490 long type; 539 int type;
540 int rv;
491 541
492 memset(buffer, 0, sizeof(buffer)); 542 memset(buffer, 0, sizeof(buffer));
493 if (count > sizeof(buffer) - 1) 543 if (count > sizeof(buffer) - 1)
494 count = sizeof(buffer) - 1; 544 count = sizeof(buffer) - 1;
495 if (copy_from_user(buffer, buf, count)) 545 if (copy_from_user(buffer, buf, count))
496 return -EFAULT; 546 return -EFAULT;
497 if (strict_strtol(strstrip(buffer), 10, &type)) 547 rv = kstrtoint(strstrip(buffer), 10, &type);
498 return -EINVAL; 548 if (rv < 0)
549 return rv;
499 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) 550 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
500 return -EINVAL; 551 return -EINVAL;
501 task = get_proc_task(file->f_path.dentry->d_inode); 552 task = get_proc_task(file->f_path.dentry->d_inode);
@@ -539,6 +590,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
539 590
540const struct file_operations proc_clear_refs_operations = { 591const struct file_operations proc_clear_refs_operations = {
541 .write = clear_refs_write, 592 .write = clear_refs_write,
593 .llseek = noop_llseek,
542}; 594};
543 595
544struct pagemapread { 596struct pagemapread {
@@ -612,6 +664,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
612 pte_t *pte; 664 pte_t *pte;
613 int err = 0; 665 int err = 0;
614 666
667 split_huge_page_pmd(walk->mm, pmd);
668
615 /* find the first VMA at or above 'addr' */ 669 /* find the first VMA at or above 'addr' */
616 vma = find_vma(walk->mm, addr); 670 vma = find_vma(walk->mm, addr);
617 for (; addr != end; addr += PAGE_SIZE) { 671 for (; addr != end; addr += PAGE_SIZE) {
@@ -699,6 +753,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
699 * skip over unmapped regions. 753 * skip over unmapped regions.
700 */ 754 */
701#define PAGEMAP_WALK_SIZE (PMD_SIZE) 755#define PAGEMAP_WALK_SIZE (PMD_SIZE)
756#define PAGEMAP_WALK_MASK (PMD_MASK)
702static ssize_t pagemap_read(struct file *file, char __user *buf, 757static ssize_t pagemap_read(struct file *file, char __user *buf,
703 size_t count, loff_t *ppos) 758 size_t count, loff_t *ppos)
704{ 759{
@@ -716,29 +771,25 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
716 if (!task) 771 if (!task)
717 goto out; 772 goto out;
718 773
719 ret = -EACCES;
720 if (!ptrace_may_access(task, PTRACE_MODE_READ))
721 goto out_task;
722
723 ret = -EINVAL; 774 ret = -EINVAL;
724 /* file position must be aligned */ 775 /* file position must be aligned */
725 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) 776 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
726 goto out_task; 777 goto out_task;
727 778
728 ret = 0; 779 ret = 0;
729
730 if (!count) 780 if (!count)
731 goto out_task; 781 goto out_task;
732 782
733 mm = get_task_mm(task);
734 if (!mm)
735 goto out_task;
736
737 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); 783 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
738 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); 784 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
739 ret = -ENOMEM; 785 ret = -ENOMEM;
740 if (!pm.buffer) 786 if (!pm.buffer)
741 goto out_mm; 787 goto out_task;
788
789 mm = mm_for_maps(task);
790 ret = PTR_ERR(mm);
791 if (!mm || IS_ERR(mm))
792 goto out_free;
742 793
743 pagemap_walk.pmd_entry = pagemap_pte_range; 794 pagemap_walk.pmd_entry = pagemap_pte_range;
744 pagemap_walk.pte_hole = pagemap_pte_hole; 795 pagemap_walk.pte_hole = pagemap_pte_hole;
@@ -769,7 +820,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
769 unsigned long end; 820 unsigned long end;
770 821
771 pm.pos = 0; 822 pm.pos = 0;
772 end = start_vaddr + PAGEMAP_WALK_SIZE; 823 end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
773 /* overflow ? */ 824 /* overflow ? */
774 if (end < start_vaddr || end > end_vaddr) 825 if (end < start_vaddr || end > end_vaddr)
775 end = end_vaddr; 826 end = end_vaddr;
@@ -781,7 +832,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
781 len = min(count, PM_ENTRY_BYTES * pm.pos); 832 len = min(count, PM_ENTRY_BYTES * pm.pos);
782 if (copy_to_user(buf, pm.buffer, len)) { 833 if (copy_to_user(buf, pm.buffer, len)) {
783 ret = -EFAULT; 834 ret = -EFAULT;
784 goto out_free; 835 goto out_mm;
785 } 836 }
786 copied += len; 837 copied += len;
787 buf += len; 838 buf += len;
@@ -791,10 +842,10 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
791 if (!ret || ret == PM_END_OF_BUFFER) 842 if (!ret || ret == PM_END_OF_BUFFER)
792 ret = copied; 843 ret = copied;
793 844
794out_free:
795 kfree(pm.buffer);
796out_mm: 845out_mm:
797 mmput(mm); 846 mmput(mm);
847out_free:
848 kfree(pm.buffer);
798out_task: 849out_task:
799 put_task_struct(task); 850 put_task_struct(task);
800out: 851out:
@@ -808,7 +859,192 @@ const struct file_operations proc_pagemap_operations = {
808#endif /* CONFIG_PROC_PAGE_MONITOR */ 859#endif /* CONFIG_PROC_PAGE_MONITOR */
809 860
810#ifdef CONFIG_NUMA 861#ifdef CONFIG_NUMA
811extern int show_numa_map(struct seq_file *m, void *v); 862
863struct numa_maps {
864 struct vm_area_struct *vma;
865 unsigned long pages;
866 unsigned long anon;
867 unsigned long active;
868 unsigned long writeback;
869 unsigned long mapcount_max;
870 unsigned long dirty;
871 unsigned long swapcache;
872 unsigned long node[MAX_NUMNODES];
873};
874
875struct numa_maps_private {
876 struct proc_maps_private proc_maps;
877 struct numa_maps md;
878};
879
880static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty)
881{
882 int count = page_mapcount(page);
883
884 md->pages++;
885 if (pte_dirty || PageDirty(page))
886 md->dirty++;
887
888 if (PageSwapCache(page))
889 md->swapcache++;
890
891 if (PageActive(page) || PageUnevictable(page))
892 md->active++;
893
894 if (PageWriteback(page))
895 md->writeback++;
896
897 if (PageAnon(page))
898 md->anon++;
899
900 if (count > md->mapcount_max)
901 md->mapcount_max = count;
902
903 md->node[page_to_nid(page)]++;
904}
905
906static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
907 unsigned long end, struct mm_walk *walk)
908{
909 struct numa_maps *md;
910 spinlock_t *ptl;
911 pte_t *orig_pte;
912 pte_t *pte;
913
914 md = walk->private;
915 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
916 do {
917 struct page *page;
918 int nid;
919
920 if (!pte_present(*pte))
921 continue;
922
923 page = vm_normal_page(md->vma, addr, *pte);
924 if (!page)
925 continue;
926
927 if (PageReserved(page))
928 continue;
929
930 nid = page_to_nid(page);
931 if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
932 continue;
933
934 gather_stats(page, md, pte_dirty(*pte));
935
936 } while (pte++, addr += PAGE_SIZE, addr != end);
937 pte_unmap_unlock(orig_pte, ptl);
938 return 0;
939}
940#ifdef CONFIG_HUGETLB_PAGE
941static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
942 unsigned long addr, unsigned long end, struct mm_walk *walk)
943{
944 struct numa_maps *md;
945 struct page *page;
946
947 if (pte_none(*pte))
948 return 0;
949
950 page = pte_page(*pte);
951 if (!page)
952 return 0;
953
954 md = walk->private;
955 gather_stats(page, md, pte_dirty(*pte));
956 return 0;
957}
958
959#else
960static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
961 unsigned long addr, unsigned long end, struct mm_walk *walk)
962{
963 return 0;
964}
965#endif
966
967/*
968 * Display pages allocated per node and memory policy via /proc.
969 */
970static int show_numa_map(struct seq_file *m, void *v)
971{
972 struct numa_maps_private *numa_priv = m->private;
973 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
974 struct vm_area_struct *vma = v;
975 struct numa_maps *md = &numa_priv->md;
976 struct file *file = vma->vm_file;
977 struct mm_struct *mm = vma->vm_mm;
978 struct mm_walk walk = {};
979 struct mempolicy *pol;
980 int n;
981 char buffer[50];
982
983 if (!mm)
984 return 0;
985
986 /* Ensure we start with an empty set of numa_maps statistics. */
987 memset(md, 0, sizeof(*md));
988
989 md->vma = vma;
990
991 walk.hugetlb_entry = gather_hugetbl_stats;
992 walk.pmd_entry = gather_pte_stats;
993 walk.private = md;
994 walk.mm = mm;
995
996 pol = get_vma_policy(proc_priv->task, vma, vma->vm_start);
997 mpol_to_str(buffer, sizeof(buffer), pol, 0);
998 mpol_cond_put(pol);
999
1000 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1001
1002 if (file) {
1003 seq_printf(m, " file=");
1004 seq_path(m, &file->f_path, "\n\t= ");
1005 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1006 seq_printf(m, " heap");
1007 } else if (vma->vm_start <= mm->start_stack &&
1008 vma->vm_end >= mm->start_stack) {
1009 seq_printf(m, " stack");
1010 }
1011
1012 walk_page_range(vma->vm_start, vma->vm_end, &walk);
1013
1014 if (!md->pages)
1015 goto out;
1016
1017 if (md->anon)
1018 seq_printf(m, " anon=%lu", md->anon);
1019
1020 if (md->dirty)
1021 seq_printf(m, " dirty=%lu", md->dirty);
1022
1023 if (md->pages != md->anon && md->pages != md->dirty)
1024 seq_printf(m, " mapped=%lu", md->pages);
1025
1026 if (md->mapcount_max > 1)
1027 seq_printf(m, " mapmax=%lu", md->mapcount_max);
1028
1029 if (md->swapcache)
1030 seq_printf(m, " swapcache=%lu", md->swapcache);
1031
1032 if (md->active < md->pages && !is_vm_hugetlb_page(vma))
1033 seq_printf(m, " active=%lu", md->active);
1034
1035 if (md->writeback)
1036 seq_printf(m, " writeback=%lu", md->writeback);
1037
1038 for_each_node_state(n, N_HIGH_MEMORY)
1039 if (md->node[n])
1040 seq_printf(m, " N%d=%lu", n, md->node[n]);
1041out:
1042 seq_putc(m, '\n');
1043
1044 if (m->count < m->size)
1045 m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0;
1046 return 0;
1047}
812 1048
813static const struct seq_operations proc_pid_numa_maps_op = { 1049static const struct seq_operations proc_pid_numa_maps_op = {
814 .start = m_start, 1050 .start = m_start,
@@ -819,7 +1055,20 @@ static const struct seq_operations proc_pid_numa_maps_op = {
819 1055
820static int numa_maps_open(struct inode *inode, struct file *file) 1056static int numa_maps_open(struct inode *inode, struct file *file)
821{ 1057{
822 return do_maps_open(inode, file, &proc_pid_numa_maps_op); 1058 struct numa_maps_private *priv;
1059 int ret = -ENOMEM;
1060 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1061 if (priv) {
1062 priv->proc_maps.pid = proc_pid(inode);
1063 ret = seq_open(file, &proc_pid_numa_maps_op);
1064 if (!ret) {
1065 struct seq_file *m = file->private_data;
1066 m->private = priv;
1067 } else {
1068 kfree(priv);
1069 }
1070 }
1071 return ret;
823} 1072}
824 1073
825const struct file_operations proc_numa_maps_operations = { 1074const struct file_operations proc_numa_maps_operations = {
@@ -828,4 +1077,4 @@ const struct file_operations proc_numa_maps_operations = {
828 .llseek = seq_lseek, 1077 .llseek = seq_lseek,
829 .release = seq_release_private, 1078 .release = seq_release_private,
830}; 1079};
831#endif 1080#endif /* CONFIG_NUMA */
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index cb6306e63843..980de547c070 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -92,13 +92,14 @@ unsigned long task_vsize(struct mm_struct *mm)
92 return vsize; 92 return vsize;
93} 93}
94 94
95int task_statm(struct mm_struct *mm, int *shared, int *text, 95unsigned long task_statm(struct mm_struct *mm,
96 int *data, int *resident) 96 unsigned long *shared, unsigned long *text,
97 unsigned long *data, unsigned long *resident)
97{ 98{
98 struct vm_area_struct *vma; 99 struct vm_area_struct *vma;
99 struct vm_region *region; 100 struct vm_region *region;
100 struct rb_node *p; 101 struct rb_node *p;
101 int size = kobjsize(mm); 102 unsigned long size = kobjsize(mm);
102 103
103 down_read(&mm->mmap_sem); 104 down_read(&mm->mmap_sem);
104 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { 105 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
@@ -198,13 +199,13 @@ static void *m_start(struct seq_file *m, loff_t *pos)
198 /* pin the task and mm whilst we play with them */ 199 /* pin the task and mm whilst we play with them */
199 priv->task = get_pid_task(priv->pid, PIDTYPE_PID); 200 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
200 if (!priv->task) 201 if (!priv->task)
201 return NULL; 202 return ERR_PTR(-ESRCH);
202 203
203 mm = mm_for_maps(priv->task); 204 mm = mm_for_maps(priv->task);
204 if (!mm) { 205 if (!mm || IS_ERR(mm)) {
205 put_task_struct(priv->task); 206 put_task_struct(priv->task);
206 priv->task = NULL; 207 priv->task = NULL;
207 return NULL; 208 return mm;
208 } 209 }
209 down_read(&mm->mmap_sem); 210 down_read(&mm->mmap_sem);
210 211
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 2367fb3f70bc..cd99bf557650 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -35,6 +35,46 @@ static u64 vmcore_size;
35 35
36static struct proc_dir_entry *proc_vmcore = NULL; 36static struct proc_dir_entry *proc_vmcore = NULL;
37 37
38/*
39 * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
40 * The called function has to take care of module refcounting.
41 */
42static int (*oldmem_pfn_is_ram)(unsigned long pfn);
43
44int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn))
45{
46 if (oldmem_pfn_is_ram)
47 return -EBUSY;
48 oldmem_pfn_is_ram = fn;
49 return 0;
50}
51EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram);
52
53void unregister_oldmem_pfn_is_ram(void)
54{
55 oldmem_pfn_is_ram = NULL;
56 wmb();
57}
58EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram);
59
60static int pfn_is_ram(unsigned long pfn)
61{
62 int (*fn)(unsigned long pfn);
63 /* pfn is ram unless fn() checks pagetype */
64 int ret = 1;
65
66 /*
67 * Ask hypervisor if the pfn is really ram.
68 * A ballooned page contains no data and reading from such a page
69 * will cause high load in the hypervisor.
70 */
71 fn = oldmem_pfn_is_ram;
72 if (fn)
73 ret = fn(pfn);
74
75 return ret;
76}
77
38/* Reads a page from the oldmem device from given offset. */ 78/* Reads a page from the oldmem device from given offset. */
39static ssize_t read_from_oldmem(char *buf, size_t count, 79static ssize_t read_from_oldmem(char *buf, size_t count,
40 u64 *ppos, int userbuf) 80 u64 *ppos, int userbuf)
@@ -55,9 +95,15 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
55 else 95 else
56 nr_bytes = count; 96 nr_bytes = count;
57 97
58 tmp = copy_oldmem_page(pfn, buf, nr_bytes, offset, userbuf); 98 /* If pfn is not ram, return zeros for sparse dump files */
59 if (tmp < 0) 99 if (pfn_is_ram(pfn) == 0)
60 return tmp; 100 memset(buf, 0, nr_bytes);
101 else {
102 tmp = copy_oldmem_page(pfn, buf, nr_bytes,
103 offset, userbuf);
104 if (tmp < 0)
105 return tmp;
106 }
61 *ppos += nr_bytes; 107 *ppos += nr_bytes;
62 count -= nr_bytes; 108 count -= nr_bytes;
63 buf += nr_bytes; 109 buf += nr_bytes;
@@ -499,7 +545,7 @@ static int __init parse_crash_elf64_headers(void)
499 /* Do some basic Verification. */ 545 /* Do some basic Verification. */
500 if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || 546 if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
501 (ehdr.e_type != ET_CORE) || 547 (ehdr.e_type != ET_CORE) ||
502 !vmcore_elf_check_arch(&ehdr) || 548 !vmcore_elf64_check_arch(&ehdr) ||
503 ehdr.e_ident[EI_CLASS] != ELFCLASS64 || 549 ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
504 ehdr.e_ident[EI_VERSION] != EV_CURRENT || 550 ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
505 ehdr.e_version != EV_CURRENT || 551 ehdr.e_version != EV_CURRENT ||