aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-10-25 12:30:53 -0400
committerIngo Molnar <mingo@elte.hu>2009-10-25 12:30:53 -0400
commit0b9e31e9264f1bad89856afb96da1688292f13b4 (patch)
tree7a9e9b6456dce993efeed8734de0a15a1f16ae94 /fs/proc
parentcf82ff7ea7695b0e82ba07bc5e9f1bd03a74e1aa (diff)
parent964fe080d94db82a3268443e9b9ece4c60246414 (diff)
Merge branch 'linus' into sched/core
Conflicts: fs/proc/array.c Merge reason: resolve conflict and queue up dependent patch. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/array.c92
-rw-r--r--fs/proc/base.c67
-rw-r--r--fs/proc/kcore.c334
-rw-r--r--fs/proc/meminfo.c13
-rw-r--r--fs/proc/nommu.c2
-rw-r--r--fs/proc/page.c10
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/proc/task_mmu.c57
-rw-r--r--fs/proc/uptime.c7
9 files changed, 485 insertions, 99 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 762aea9c9c71..e209f64ab27b 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -82,6 +82,7 @@
82#include <linux/pid_namespace.h> 82#include <linux/pid_namespace.h>
83#include <linux/ptrace.h> 83#include <linux/ptrace.h>
84#include <linux/tracehook.h> 84#include <linux/tracehook.h>
85#include <linux/swapops.h>
85 86
86#include <asm/pgtable.h> 87#include <asm/pgtable.h>
87#include <asm/processor.h> 88#include <asm/processor.h>
@@ -321,6 +322,94 @@ static inline void task_context_switch_counts(struct seq_file *m,
321 p->nivcsw); 322 p->nivcsw);
322} 323}
323 324
325#ifdef CONFIG_MMU
326
327struct stack_stats {
328 struct vm_area_struct *vma;
329 unsigned long startpage;
330 unsigned long usage;
331};
332
333static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr,
334 unsigned long end, struct mm_walk *walk)
335{
336 struct stack_stats *ss = walk->private;
337 struct vm_area_struct *vma = ss->vma;
338 pte_t *pte, ptent;
339 spinlock_t *ptl;
340 int ret = 0;
341
342 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
343 for (; addr != end; pte++, addr += PAGE_SIZE) {
344 ptent = *pte;
345
346#ifdef CONFIG_STACK_GROWSUP
347 if (pte_present(ptent) || is_swap_pte(ptent))
348 ss->usage = addr - ss->startpage + PAGE_SIZE;
349#else
350 if (pte_present(ptent) || is_swap_pte(ptent)) {
351 ss->usage = ss->startpage - addr + PAGE_SIZE;
352 pte++;
353 ret = 1;
354 break;
355 }
356#endif
357 }
358 pte_unmap_unlock(pte - 1, ptl);
359 cond_resched();
360 return ret;
361}
362
363static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma,
364 struct task_struct *task)
365{
366 struct stack_stats ss;
367 struct mm_walk stack_walk = {
368 .pmd_entry = stack_usage_pte_range,
369 .mm = vma->vm_mm,
370 .private = &ss,
371 };
372
373 if (!vma->vm_mm || is_vm_hugetlb_page(vma))
374 return 0;
375
376 ss.vma = vma;
377 ss.startpage = task->stack_start & PAGE_MASK;
378 ss.usage = 0;
379
380#ifdef CONFIG_STACK_GROWSUP
381 walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end,
382 &stack_walk);
383#else
384 walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE,
385 &stack_walk);
386#endif
387 return ss.usage;
388}
389
390static inline void task_show_stack_usage(struct seq_file *m,
391 struct task_struct *task)
392{
393 struct vm_area_struct *vma;
394 struct mm_struct *mm = get_task_mm(task);
395
396 if (mm) {
397 down_read(&mm->mmap_sem);
398 vma = find_vma(mm, task->stack_start);
399 if (vma)
400 seq_printf(m, "Stack usage:\t%lu kB\n",
401 get_stack_usage_in_bytes(vma, task) >> 10);
402
403 up_read(&mm->mmap_sem);
404 mmput(mm);
405 }
406}
407#else
408static void task_show_stack_usage(struct seq_file *m, struct task_struct *task)
409{
410}
411#endif /* CONFIG_MMU */
412
324static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) 413static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
325{ 414{
326 seq_printf(m, "Cpus_allowed:\t"); 415 seq_printf(m, "Cpus_allowed:\t");
@@ -351,6 +440,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
351 task_show_regs(m, task); 440 task_show_regs(m, task);
352#endif 441#endif
353 task_context_switch_counts(m, task); 442 task_context_switch_counts(m, task);
443 task_show_stack_usage(m, task);
354 return 0; 444 return 0;
355} 445}
356 446
@@ -492,7 +582,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
492 rsslim, 582 rsslim,
493 mm ? mm->start_code : 0, 583 mm ? mm->start_code : 0,
494 mm ? mm->end_code : 0, 584 mm ? mm->end_code : 0,
495 (permitted && mm) ? mm->start_stack : 0, 585 (permitted) ? task->stack_start : 0,
496 esp, 586 esp,
497 eip, 587 eip,
498 /* The signal information here is obsolete. 588 /* The signal information here is obsolete.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6f742f6658a9..837469a96598 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -447,7 +447,7 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
447 447
448 do_posix_clock_monotonic_gettime(&uptime); 448 do_posix_clock_monotonic_gettime(&uptime);
449 read_lock(&tasklist_lock); 449 read_lock(&tasklist_lock);
450 points = badness(task, uptime.tv_sec); 450 points = badness(task->group_leader, uptime.tv_sec);
451 read_unlock(&tasklist_lock); 451 read_unlock(&tasklist_lock);
452 return sprintf(buffer, "%lu\n", points); 452 return sprintf(buffer, "%lu\n", points);
453} 453}
@@ -458,7 +458,7 @@ struct limit_names {
458}; 458};
459 459
460static const struct limit_names lnames[RLIM_NLIMITS] = { 460static const struct limit_names lnames[RLIM_NLIMITS] = {
461 [RLIMIT_CPU] = {"Max cpu time", "ms"}, 461 [RLIMIT_CPU] = {"Max cpu time", "seconds"},
462 [RLIMIT_FSIZE] = {"Max file size", "bytes"}, 462 [RLIMIT_FSIZE] = {"Max file size", "bytes"},
463 [RLIMIT_DATA] = {"Max data size", "bytes"}, 463 [RLIMIT_DATA] = {"Max data size", "bytes"},
464 [RLIMIT_STACK] = {"Max stack size", "bytes"}, 464 [RLIMIT_STACK] = {"Max stack size", "bytes"},
@@ -999,11 +999,17 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
999 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 999 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
1000 char buffer[PROC_NUMBUF]; 1000 char buffer[PROC_NUMBUF];
1001 size_t len; 1001 size_t len;
1002 int oom_adjust; 1002 int oom_adjust = OOM_DISABLE;
1003 unsigned long flags;
1003 1004
1004 if (!task) 1005 if (!task)
1005 return -ESRCH; 1006 return -ESRCH;
1006 oom_adjust = task->oomkilladj; 1007
1008 if (lock_task_sighand(task, &flags)) {
1009 oom_adjust = task->signal->oom_adj;
1010 unlock_task_sighand(task, &flags);
1011 }
1012
1007 put_task_struct(task); 1013 put_task_struct(task);
1008 1014
1009 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 1015 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1015,32 +1021,44 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1015 size_t count, loff_t *ppos) 1021 size_t count, loff_t *ppos)
1016{ 1022{
1017 struct task_struct *task; 1023 struct task_struct *task;
1018 char buffer[PROC_NUMBUF], *end; 1024 char buffer[PROC_NUMBUF];
1019 int oom_adjust; 1025 long oom_adjust;
1026 unsigned long flags;
1027 int err;
1020 1028
1021 memset(buffer, 0, sizeof(buffer)); 1029 memset(buffer, 0, sizeof(buffer));
1022 if (count > sizeof(buffer) - 1) 1030 if (count > sizeof(buffer) - 1)
1023 count = sizeof(buffer) - 1; 1031 count = sizeof(buffer) - 1;
1024 if (copy_from_user(buffer, buf, count)) 1032 if (copy_from_user(buffer, buf, count))
1025 return -EFAULT; 1033 return -EFAULT;
1026 oom_adjust = simple_strtol(buffer, &end, 0); 1034
1035 err = strict_strtol(strstrip(buffer), 0, &oom_adjust);
1036 if (err)
1037 return -EINVAL;
1027 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && 1038 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
1028 oom_adjust != OOM_DISABLE) 1039 oom_adjust != OOM_DISABLE)
1029 return -EINVAL; 1040 return -EINVAL;
1030 if (*end == '\n') 1041
1031 end++;
1032 task = get_proc_task(file->f_path.dentry->d_inode); 1042 task = get_proc_task(file->f_path.dentry->d_inode);
1033 if (!task) 1043 if (!task)
1034 return -ESRCH; 1044 return -ESRCH;
1035 if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { 1045 if (!lock_task_sighand(task, &flags)) {
1046 put_task_struct(task);
1047 return -ESRCH;
1048 }
1049
1050 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1051 unlock_task_sighand(task, &flags);
1036 put_task_struct(task); 1052 put_task_struct(task);
1037 return -EACCES; 1053 return -EACCES;
1038 } 1054 }
1039 task->oomkilladj = oom_adjust; 1055
1056 task->signal->oom_adj = oom_adjust;
1057
1058 unlock_task_sighand(task, &flags);
1040 put_task_struct(task); 1059 put_task_struct(task);
1041 if (end - buffer == 0) 1060
1042 return -EIO; 1061 return count;
1043 return end - buffer;
1044} 1062}
1045 1063
1046static const struct file_operations proc_oom_adjust_operations = { 1064static const struct file_operations proc_oom_adjust_operations = {
@@ -1169,17 +1187,16 @@ static ssize_t proc_fault_inject_write(struct file * file,
1169 count = sizeof(buffer) - 1; 1187 count = sizeof(buffer) - 1;
1170 if (copy_from_user(buffer, buf, count)) 1188 if (copy_from_user(buffer, buf, count))
1171 return -EFAULT; 1189 return -EFAULT;
1172 make_it_fail = simple_strtol(buffer, &end, 0); 1190 make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
1173 if (*end == '\n') 1191 if (*end)
1174 end++; 1192 return -EINVAL;
1175 task = get_proc_task(file->f_dentry->d_inode); 1193 task = get_proc_task(file->f_dentry->d_inode);
1176 if (!task) 1194 if (!task)
1177 return -ESRCH; 1195 return -ESRCH;
1178 task->make_it_fail = make_it_fail; 1196 task->make_it_fail = make_it_fail;
1179 put_task_struct(task); 1197 put_task_struct(task);
1180 if (end - buffer == 0) 1198
1181 return -EIO; 1199 return count;
1182 return end - buffer;
1183} 1200}
1184 1201
1185static const struct file_operations proc_fault_inject_operations = { 1202static const struct file_operations proc_fault_inject_operations = {
@@ -2586,9 +2603,6 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
2586 dput(dentry); 2603 dput(dentry);
2587 } 2604 }
2588 2605
2589 if (tgid == 0)
2590 goto out;
2591
2592 name.name = buf; 2606 name.name = buf;
2593 name.len = snprintf(buf, sizeof(buf), "%d", tgid); 2607 name.len = snprintf(buf, sizeof(buf), "%d", tgid);
2594 leader = d_hash_and_lookup(mnt->mnt_root, &name); 2608 leader = d_hash_and_lookup(mnt->mnt_root, &name);
@@ -2645,17 +2659,16 @@ out:
2645void proc_flush_task(struct task_struct *task) 2659void proc_flush_task(struct task_struct *task)
2646{ 2660{
2647 int i; 2661 int i;
2648 struct pid *pid, *tgid = NULL; 2662 struct pid *pid, *tgid;
2649 struct upid *upid; 2663 struct upid *upid;
2650 2664
2651 pid = task_pid(task); 2665 pid = task_pid(task);
2652 if (thread_group_leader(task)) 2666 tgid = task_tgid(task);
2653 tgid = task_tgid(task);
2654 2667
2655 for (i = 0; i <= pid->level; i++) { 2668 for (i = 0; i <= pid->level; i++) {
2656 upid = &pid->numbers[i]; 2669 upid = &pid->numbers[i];
2657 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, 2670 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
2658 tgid ? tgid->numbers[i].nr : 0); 2671 tgid->numbers[i].nr);
2659 } 2672 }
2660 2673
2661 upid = &pid->numbers[pid->level]; 2674 upid = &pid->numbers[pid->level];
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 59b43a068872..a44a7897fd4d 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -17,9 +17,14 @@
17#include <linux/elfcore.h> 17#include <linux/elfcore.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/highmem.h> 19#include <linux/highmem.h>
20#include <linux/bootmem.h>
20#include <linux/init.h> 21#include <linux/init.h>
21#include <asm/uaccess.h> 22#include <asm/uaccess.h>
22#include <asm/io.h> 23#include <asm/io.h>
24#include <linux/list.h>
25#include <linux/ioport.h>
26#include <linux/memory.h>
27#include <asm/sections.h>
23 28
24#define CORE_STR "CORE" 29#define CORE_STR "CORE"
25 30
@@ -29,17 +34,6 @@
29 34
30static struct proc_dir_entry *proc_root_kcore; 35static struct proc_dir_entry *proc_root_kcore;
31 36
32static int open_kcore(struct inode * inode, struct file * filp)
33{
34 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
35}
36
37static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *);
38
39static const struct file_operations proc_kcore_operations = {
40 .read = read_kcore,
41 .open = open_kcore,
42};
43 37
44#ifndef kc_vaddr_to_offset 38#ifndef kc_vaddr_to_offset
45#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET) 39#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
@@ -57,18 +51,19 @@ struct memelfnote
57 void *data; 51 void *data;
58}; 52};
59 53
60static struct kcore_list *kclist; 54static LIST_HEAD(kclist_head);
61static DEFINE_RWLOCK(kclist_lock); 55static DEFINE_RWLOCK(kclist_lock);
56static int kcore_need_update = 1;
62 57
63void 58void
64kclist_add(struct kcore_list *new, void *addr, size_t size) 59kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
65{ 60{
66 new->addr = (unsigned long)addr; 61 new->addr = (unsigned long)addr;
67 new->size = size; 62 new->size = size;
63 new->type = type;
68 64
69 write_lock(&kclist_lock); 65 write_lock(&kclist_lock);
70 new->next = kclist; 66 list_add_tail(&new->list, &kclist_head);
71 kclist = new;
72 write_unlock(&kclist_lock); 67 write_unlock(&kclist_lock);
73} 68}
74 69
@@ -80,7 +75,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
80 *nphdr = 1; /* PT_NOTE */ 75 *nphdr = 1; /* PT_NOTE */
81 size = 0; 76 size = 0;
82 77
83 for (m=kclist; m; m=m->next) { 78 list_for_each_entry(m, &kclist_head, list) {
84 try = kc_vaddr_to_offset((size_t)m->addr + m->size); 79 try = kc_vaddr_to_offset((size_t)m->addr + m->size);
85 if (try > size) 80 if (try > size)
86 size = try; 81 size = try;
@@ -97,6 +92,177 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
97 return size + *elf_buflen; 92 return size + *elf_buflen;
98} 93}
99 94
95static void free_kclist_ents(struct list_head *head)
96{
97 struct kcore_list *tmp, *pos;
98
99 list_for_each_entry_safe(pos, tmp, head, list) {
100 list_del(&pos->list);
101 kfree(pos);
102 }
103}
104/*
105 * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
106 */
107static void __kcore_update_ram(struct list_head *list)
108{
109 int nphdr;
110 size_t size;
111 struct kcore_list *tmp, *pos;
112 LIST_HEAD(garbage);
113
114 write_lock(&kclist_lock);
115 if (kcore_need_update) {
116 list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
117 if (pos->type == KCORE_RAM
118 || pos->type == KCORE_VMEMMAP)
119 list_move(&pos->list, &garbage);
120 }
121 list_splice_tail(list, &kclist_head);
122 } else
123 list_splice(list, &garbage);
124 kcore_need_update = 0;
125 proc_root_kcore->size = get_kcore_size(&nphdr, &size);
126 write_unlock(&kclist_lock);
127
128 free_kclist_ents(&garbage);
129}
130
131
132#ifdef CONFIG_HIGHMEM
133/*
134 * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
135 * because memory hole is not as big as !HIGHMEM case.
136 * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
137 */
138static int kcore_update_ram(void)
139{
140 LIST_HEAD(head);
141 struct kcore_list *ent;
142 int ret = 0;
143
144 ent = kmalloc(sizeof(*ent), GFP_KERNEL);
145 if (!ent)
146 return -ENOMEM;
147 ent->addr = (unsigned long)__va(0);
148 ent->size = max_low_pfn << PAGE_SHIFT;
149 ent->type = KCORE_RAM;
150 list_add(&ent->list, &head);
151 __kcore_update_ram(&head);
152 return ret;
153}
154
155#else /* !CONFIG_HIGHMEM */
156
157#ifdef CONFIG_SPARSEMEM_VMEMMAP
158/* calculate vmemmap's address from given system ram pfn and register it */
159int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
160{
161 unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
162 unsigned long nr_pages = ent->size >> PAGE_SHIFT;
163 unsigned long start, end;
164 struct kcore_list *vmm, *tmp;
165
166
167 start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
168 end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
169 end = ALIGN(end, PAGE_SIZE);
170 /* overlap check (because we have to align page */
171 list_for_each_entry(tmp, head, list) {
172 if (tmp->type != KCORE_VMEMMAP)
173 continue;
174 if (start < tmp->addr + tmp->size)
175 if (end > tmp->addr)
176 end = tmp->addr;
177 }
178 if (start < end) {
179 vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
180 if (!vmm)
181 return 0;
182 vmm->addr = start;
183 vmm->size = end - start;
184 vmm->type = KCORE_VMEMMAP;
185 list_add_tail(&vmm->list, head);
186 }
187 return 1;
188
189}
190#else
191int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
192{
193 return 1;
194}
195
196#endif
197
198static int
199kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
200{
201 struct list_head *head = (struct list_head *)arg;
202 struct kcore_list *ent;
203
204 ent = kmalloc(sizeof(*ent), GFP_KERNEL);
205 if (!ent)
206 return -ENOMEM;
207 ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
208 ent->size = nr_pages << PAGE_SHIFT;
209
210 /* Sanity check: Can happen in 32bit arch...maybe */
211 if (ent->addr < (unsigned long) __va(0))
212 goto free_out;
213
214 /* cut not-mapped area. ....from ppc-32 code. */
215 if (ULONG_MAX - ent->addr < ent->size)
216 ent->size = ULONG_MAX - ent->addr;
217
218 /* cut when vmalloc() area is higher than direct-map area */
219 if (VMALLOC_START > (unsigned long)__va(0)) {
220 if (ent->addr > VMALLOC_START)
221 goto free_out;
222 if (VMALLOC_START - ent->addr < ent->size)
223 ent->size = VMALLOC_START - ent->addr;
224 }
225
226 ent->type = KCORE_RAM;
227 list_add_tail(&ent->list, head);
228
229 if (!get_sparsemem_vmemmap_info(ent, head)) {
230 list_del(&ent->list);
231 goto free_out;
232 }
233
234 return 0;
235free_out:
236 kfree(ent);
237 return 1;
238}
239
240static int kcore_update_ram(void)
241{
242 int nid, ret;
243 unsigned long end_pfn;
244 LIST_HEAD(head);
245
246 /* Not inialized....update now */
247 /* find out "max pfn" */
248 end_pfn = 0;
249 for_each_node_state(nid, N_HIGH_MEMORY) {
250 unsigned long node_end;
251 node_end = NODE_DATA(nid)->node_start_pfn +
252 NODE_DATA(nid)->node_spanned_pages;
253 if (end_pfn < node_end)
254 end_pfn = node_end;
255 }
256 /* scan 0 to max_pfn */
257 ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
258 if (ret) {
259 free_kclist_ents(&head);
260 return -ENOMEM;
261 }
262 __kcore_update_ram(&head);
263 return ret;
264}
265#endif /* CONFIG_HIGHMEM */
100 266
101/*****************************************************************************/ 267/*****************************************************************************/
102/* 268/*
@@ -192,7 +358,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
192 nhdr->p_align = 0; 358 nhdr->p_align = 0;
193 359
194 /* setup ELF PT_LOAD program header for every area */ 360 /* setup ELF PT_LOAD program header for every area */
195 for (m=kclist; m; m=m->next) { 361 list_for_each_entry(m, &kclist_head, list) {
196 phdr = (struct elf_phdr *) bufp; 362 phdr = (struct elf_phdr *) bufp;
197 bufp += sizeof(struct elf_phdr); 363 bufp += sizeof(struct elf_phdr);
198 offset += sizeof(struct elf_phdr); 364 offset += sizeof(struct elf_phdr);
@@ -265,7 +431,8 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
265 unsigned long start; 431 unsigned long start;
266 432
267 read_lock(&kclist_lock); 433 read_lock(&kclist_lock);
268 proc_root_kcore->size = size = get_kcore_size(&nphdr, &elf_buflen); 434 size = get_kcore_size(&nphdr, &elf_buflen);
435
269 if (buflen == 0 || *fpos >= size) { 436 if (buflen == 0 || *fpos >= size) {
270 read_unlock(&kclist_lock); 437 read_unlock(&kclist_lock);
271 return 0; 438 return 0;
@@ -317,7 +484,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
317 struct kcore_list *m; 484 struct kcore_list *m;
318 485
319 read_lock(&kclist_lock); 486 read_lock(&kclist_lock);
320 for (m=kclist; m; m=m->next) { 487 list_for_each_entry(m, &kclist_head, list) {
321 if (start >= m->addr && start < (m->addr+m->size)) 488 if (start >= m->addr && start < (m->addr+m->size))
322 break; 489 break;
323 } 490 }
@@ -326,45 +493,14 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
326 if (m == NULL) { 493 if (m == NULL) {
327 if (clear_user(buffer, tsz)) 494 if (clear_user(buffer, tsz))
328 return -EFAULT; 495 return -EFAULT;
329 } else if (is_vmalloc_addr((void *)start)) { 496 } else if (is_vmalloc_or_module_addr((void *)start)) {
330 char * elf_buf; 497 char * elf_buf;
331 struct vm_struct *m;
332 unsigned long curstart = start;
333 unsigned long cursize = tsz;
334 498
335 elf_buf = kzalloc(tsz, GFP_KERNEL); 499 elf_buf = kzalloc(tsz, GFP_KERNEL);
336 if (!elf_buf) 500 if (!elf_buf)
337 return -ENOMEM; 501 return -ENOMEM;
338 502 vread(elf_buf, (char *)start, tsz);
339 read_lock(&vmlist_lock); 503 /* we have to zero-fill user buffer even if no read */
340 for (m=vmlist; m && cursize; m=m->next) {
341 unsigned long vmstart;
342 unsigned long vmsize;
343 unsigned long msize = m->size - PAGE_SIZE;
344
345 if (((unsigned long)m->addr + msize) <
346 curstart)
347 continue;
348 if ((unsigned long)m->addr > (curstart +
349 cursize))
350 break;
351 vmstart = (curstart < (unsigned long)m->addr ?
352 (unsigned long)m->addr : curstart);
353 if (((unsigned long)m->addr + msize) >
354 (curstart + cursize))
355 vmsize = curstart + cursize - vmstart;
356 else
357 vmsize = (unsigned long)m->addr +
358 msize - vmstart;
359 curstart = vmstart + vmsize;
360 cursize -= vmsize;
361 /* don't dump ioremap'd stuff! (TA) */
362 if (m->flags & VM_IOREMAP)
363 continue;
364 memcpy(elf_buf + (vmstart - start),
365 (char *)vmstart, vmsize);
366 }
367 read_unlock(&vmlist_lock);
368 if (copy_to_user(buffer, elf_buf, tsz)) { 504 if (copy_to_user(buffer, elf_buf, tsz)) {
369 kfree(elf_buf); 505 kfree(elf_buf);
370 return -EFAULT; 506 return -EFAULT;
@@ -402,12 +538,96 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
402 return acc; 538 return acc;
403} 539}
404 540
541
542static int open_kcore(struct inode *inode, struct file *filp)
543{
544 if (!capable(CAP_SYS_RAWIO))
545 return -EPERM;
546 if (kcore_need_update)
547 kcore_update_ram();
548 if (i_size_read(inode) != proc_root_kcore->size) {
549 mutex_lock(&inode->i_mutex);
550 i_size_write(inode, proc_root_kcore->size);
551 mutex_unlock(&inode->i_mutex);
552 }
553 return 0;
554}
555
556
557static const struct file_operations proc_kcore_operations = {
558 .read = read_kcore,
559 .open = open_kcore,
560};
561
562#ifdef CONFIG_MEMORY_HOTPLUG
563/* just remember that we have to update kcore */
564static int __meminit kcore_callback(struct notifier_block *self,
565 unsigned long action, void *arg)
566{
567 switch (action) {
568 case MEM_ONLINE:
569 case MEM_OFFLINE:
570 write_lock(&kclist_lock);
571 kcore_need_update = 1;
572 write_unlock(&kclist_lock);
573 }
574 return NOTIFY_OK;
575}
576#endif
577
578
579static struct kcore_list kcore_vmalloc;
580
581#ifdef CONFIG_ARCH_PROC_KCORE_TEXT
582static struct kcore_list kcore_text;
583/*
584 * If defined, special segment is used for mapping kernel text instead of
585 * direct-map area. We need to create special TEXT section.
586 */
587static void __init proc_kcore_text_init(void)
588{
589 kclist_add(&kcore_text, _stext, _end - _stext, KCORE_TEXT);
590}
591#else
592static void __init proc_kcore_text_init(void)
593{
594}
595#endif
596
597#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
598/*
599 * MODULES_VADDR has no intersection with VMALLOC_ADDR.
600 */
601struct kcore_list kcore_modules;
602static void __init add_modules_range(void)
603{
604 kclist_add(&kcore_modules, (void *)MODULES_VADDR,
605 MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
606}
607#else
608static void __init add_modules_range(void)
609{
610}
611#endif
612
405static int __init proc_kcore_init(void) 613static int __init proc_kcore_init(void)
406{ 614{
407 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); 615 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
408 if (proc_root_kcore) 616 &proc_kcore_operations);
409 proc_root_kcore->size = 617 if (!proc_root_kcore) {
410 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; 618 printk(KERN_ERR "couldn't create /proc/kcore\n");
619 return 0; /* Always returns 0. */
620 }
621 /* Store text area if it's special */
622 proc_kcore_text_init();
623 /* Store vmalloc area */
624 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
625 VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
626 add_modules_range();
627 /* Store direct-map area from physical memory map */
628 kcore_update_ram();
629 hotplug_memory_notifier(kcore_callback, 0);
630
411 return 0; 631 return 0;
412} 632}
413module_init(proc_kcore_init); 633module_init(proc_kcore_init);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index d5c410d47fae..c7bff4f603ff 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -81,9 +81,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
81 "Writeback: %8lu kB\n" 81 "Writeback: %8lu kB\n"
82 "AnonPages: %8lu kB\n" 82 "AnonPages: %8lu kB\n"
83 "Mapped: %8lu kB\n" 83 "Mapped: %8lu kB\n"
84 "Shmem: %8lu kB\n"
84 "Slab: %8lu kB\n" 85 "Slab: %8lu kB\n"
85 "SReclaimable: %8lu kB\n" 86 "SReclaimable: %8lu kB\n"
86 "SUnreclaim: %8lu kB\n" 87 "SUnreclaim: %8lu kB\n"
88 "KernelStack: %8lu kB\n"
87 "PageTables: %8lu kB\n" 89 "PageTables: %8lu kB\n"
88#ifdef CONFIG_QUICKLIST 90#ifdef CONFIG_QUICKLIST
89 "Quicklists: %8lu kB\n" 91 "Quicklists: %8lu kB\n"
@@ -95,7 +97,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
95 "Committed_AS: %8lu kB\n" 97 "Committed_AS: %8lu kB\n"
96 "VmallocTotal: %8lu kB\n" 98 "VmallocTotal: %8lu kB\n"
97 "VmallocUsed: %8lu kB\n" 99 "VmallocUsed: %8lu kB\n"
98 "VmallocChunk: %8lu kB\n", 100 "VmallocChunk: %8lu kB\n"
101#ifdef CONFIG_MEMORY_FAILURE
102 "HardwareCorrupted: %8lu kB\n"
103#endif
104 ,
99 K(i.totalram), 105 K(i.totalram),
100 K(i.freeram), 106 K(i.freeram),
101 K(i.bufferram), 107 K(i.bufferram),
@@ -124,10 +130,12 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
124 K(global_page_state(NR_WRITEBACK)), 130 K(global_page_state(NR_WRITEBACK)),
125 K(global_page_state(NR_ANON_PAGES)), 131 K(global_page_state(NR_ANON_PAGES)),
126 K(global_page_state(NR_FILE_MAPPED)), 132 K(global_page_state(NR_FILE_MAPPED)),
133 K(global_page_state(NR_SHMEM)),
127 K(global_page_state(NR_SLAB_RECLAIMABLE) + 134 K(global_page_state(NR_SLAB_RECLAIMABLE) +
128 global_page_state(NR_SLAB_UNRECLAIMABLE)), 135 global_page_state(NR_SLAB_UNRECLAIMABLE)),
129 K(global_page_state(NR_SLAB_RECLAIMABLE)), 136 K(global_page_state(NR_SLAB_RECLAIMABLE)),
130 K(global_page_state(NR_SLAB_UNRECLAIMABLE)), 137 K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
138 global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024,
131 K(global_page_state(NR_PAGETABLE)), 139 K(global_page_state(NR_PAGETABLE)),
132#ifdef CONFIG_QUICKLIST 140#ifdef CONFIG_QUICKLIST
133 K(quicklist_total_size()), 141 K(quicklist_total_size()),
@@ -140,6 +148,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
140 (unsigned long)VMALLOC_TOTAL >> 10, 148 (unsigned long)VMALLOC_TOTAL >> 10,
141 vmi.used >> 10, 149 vmi.used >> 10,
142 vmi.largest_chunk >> 10 150 vmi.largest_chunk >> 10
151#ifdef CONFIG_MEMORY_FAILURE
152 ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10)
153#endif
143 ); 154 );
144 155
145 hugetlb_report_meminfo(m); 156 hugetlb_report_meminfo(m);
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 7e14d1a04001..9fe7d7ebe115 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -109,7 +109,7 @@ static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos)
109 return rb_next((struct rb_node *) v); 109 return rb_next((struct rb_node *) v);
110} 110}
111 111
112static struct seq_operations proc_nommu_region_list_seqop = { 112static const struct seq_operations proc_nommu_region_list_seqop = {
113 .start = nommu_region_list_start, 113 .start = nommu_region_list_start,
114 .next = nommu_region_list_next, 114 .next = nommu_region_list_next,
115 .stop = nommu_region_list_stop, 115 .stop = nommu_region_list_stop,
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 2707c6c7a20f..5033ce0d254b 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -2,6 +2,7 @@
2#include <linux/compiler.h> 2#include <linux/compiler.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/ksm.h>
5#include <linux/mm.h> 6#include <linux/mm.h>
6#include <linux/mmzone.h> 7#include <linux/mmzone.h>
7#include <linux/proc_fs.h> 8#include <linux/proc_fs.h>
@@ -93,8 +94,11 @@ static const struct file_operations proc_kpagecount_operations = {
93#define KPF_COMPOUND_TAIL 16 94#define KPF_COMPOUND_TAIL 16
94#define KPF_HUGE 17 95#define KPF_HUGE 17
95#define KPF_UNEVICTABLE 18 96#define KPF_UNEVICTABLE 18
97#define KPF_HWPOISON 19
96#define KPF_NOPAGE 20 98#define KPF_NOPAGE 20
97 99
100#define KPF_KSM 21
101
98/* kernel hacking assistances 102/* kernel hacking assistances
99 * WARNING: subject to change, never rely on them! 103 * WARNING: subject to change, never rely on them!
100 */ 104 */
@@ -137,6 +141,8 @@ static u64 get_uflags(struct page *page)
137 u |= 1 << KPF_MMAP; 141 u |= 1 << KPF_MMAP;
138 if (PageAnon(page)) 142 if (PageAnon(page))
139 u |= 1 << KPF_ANON; 143 u |= 1 << KPF_ANON;
144 if (PageKsm(page))
145 u |= 1 << KPF_KSM;
140 146
141 /* 147 /*
142 * compound pages: export both head/tail info 148 * compound pages: export both head/tail info
@@ -175,6 +181,10 @@ static u64 get_uflags(struct page *page)
175 u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); 181 u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable);
176 u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked); 182 u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked);
177 183
184#ifdef CONFIG_MEMORY_FAILURE
185 u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison);
186#endif
187
178#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR 188#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
179 u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); 189 u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached);
180#endif 190#endif
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 9b1e4e9a16bf..f667e8aeabdf 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -153,7 +153,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
153 153
154 /* careful: calling conventions are nasty here */ 154 /* careful: calling conventions are nasty here */
155 res = count; 155 res = count;
156 error = table->proc_handler(table, write, filp, buf, &res, ppos); 156 error = table->proc_handler(table, write, buf, &res, ppos);
157 if (!error) 157 if (!error)
158 error = res; 158 error = res;
159out: 159out:
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9bd8be1d235c..2a1bef9203c6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -243,6 +243,25 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
243 } else if (vma->vm_start <= mm->start_stack && 243 } else if (vma->vm_start <= mm->start_stack &&
244 vma->vm_end >= mm->start_stack) { 244 vma->vm_end >= mm->start_stack) {
245 name = "[stack]"; 245 name = "[stack]";
246 } else {
247 unsigned long stack_start;
248 struct proc_maps_private *pmp;
249
250 pmp = m->private;
251 stack_start = pmp->task->stack_start;
252
253 if (vma->vm_start <= stack_start &&
254 vma->vm_end >= stack_start) {
255 pad_len_spaces(m, len);
256 seq_printf(m,
257 "[threadstack:%08lx]",
258#ifdef CONFIG_STACK_GROWSUP
259 vma->vm_end - stack_start
260#else
261 stack_start - vma->vm_start
262#endif
263 );
264 }
246 } 265 }
247 } else { 266 } else {
248 name = "[vdso]"; 267 name = "[vdso]";
@@ -465,23 +484,28 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
465 return 0; 484 return 0;
466} 485}
467 486
487#define CLEAR_REFS_ALL 1
488#define CLEAR_REFS_ANON 2
489#define CLEAR_REFS_MAPPED 3
490
468static ssize_t clear_refs_write(struct file *file, const char __user *buf, 491static ssize_t clear_refs_write(struct file *file, const char __user *buf,
469 size_t count, loff_t *ppos) 492 size_t count, loff_t *ppos)
470{ 493{
471 struct task_struct *task; 494 struct task_struct *task;
472 char buffer[PROC_NUMBUF], *end; 495 char buffer[PROC_NUMBUF];
473 struct mm_struct *mm; 496 struct mm_struct *mm;
474 struct vm_area_struct *vma; 497 struct vm_area_struct *vma;
498 long type;
475 499
476 memset(buffer, 0, sizeof(buffer)); 500 memset(buffer, 0, sizeof(buffer));
477 if (count > sizeof(buffer) - 1) 501 if (count > sizeof(buffer) - 1)
478 count = sizeof(buffer) - 1; 502 count = sizeof(buffer) - 1;
479 if (copy_from_user(buffer, buf, count)) 503 if (copy_from_user(buffer, buf, count))
480 return -EFAULT; 504 return -EFAULT;
481 if (!simple_strtol(buffer, &end, 0)) 505 if (strict_strtol(strstrip(buffer), 10, &type))
506 return -EINVAL;
507 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
482 return -EINVAL; 508 return -EINVAL;
483 if (*end == '\n')
484 end++;
485 task = get_proc_task(file->f_path.dentry->d_inode); 509 task = get_proc_task(file->f_path.dentry->d_inode);
486 if (!task) 510 if (!task)
487 return -ESRCH; 511 return -ESRCH;
@@ -494,18 +518,31 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
494 down_read(&mm->mmap_sem); 518 down_read(&mm->mmap_sem);
495 for (vma = mm->mmap; vma; vma = vma->vm_next) { 519 for (vma = mm->mmap; vma; vma = vma->vm_next) {
496 clear_refs_walk.private = vma; 520 clear_refs_walk.private = vma;
497 if (!is_vm_hugetlb_page(vma)) 521 if (is_vm_hugetlb_page(vma))
498 walk_page_range(vma->vm_start, vma->vm_end, 522 continue;
499 &clear_refs_walk); 523 /*
524 * Writing 1 to /proc/pid/clear_refs affects all pages.
525 *
526 * Writing 2 to /proc/pid/clear_refs only affects
527 * Anonymous pages.
528 *
529 * Writing 3 to /proc/pid/clear_refs only affects file
530 * mapped pages.
531 */
532 if (type == CLEAR_REFS_ANON && vma->vm_file)
533 continue;
534 if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
535 continue;
536 walk_page_range(vma->vm_start, vma->vm_end,
537 &clear_refs_walk);
500 } 538 }
501 flush_tlb_mm(mm); 539 flush_tlb_mm(mm);
502 up_read(&mm->mmap_sem); 540 up_read(&mm->mmap_sem);
503 mmput(mm); 541 mmput(mm);
504 } 542 }
505 put_task_struct(task); 543 put_task_struct(task);
506 if (end - buffer == 0) 544
507 return -EIO; 545 return count;
508 return end - buffer;
509} 546}
510 547
511const struct file_operations proc_clear_refs_operations = { 548const struct file_operations proc_clear_refs_operations = {
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 0c10a0b3f146..766b1d456050 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -4,13 +4,18 @@
4#include <linux/sched.h> 4#include <linux/sched.h>
5#include <linux/seq_file.h> 5#include <linux/seq_file.h>
6#include <linux/time.h> 6#include <linux/time.h>
7#include <linux/kernel_stat.h>
7#include <asm/cputime.h> 8#include <asm/cputime.h>
8 9
9static int uptime_proc_show(struct seq_file *m, void *v) 10static int uptime_proc_show(struct seq_file *m, void *v)
10{ 11{
11 struct timespec uptime; 12 struct timespec uptime;
12 struct timespec idle; 13 struct timespec idle;
13 cputime_t idletime = cputime_add(init_task.utime, init_task.stime); 14 int i;
15 cputime_t idletime = cputime_zero;
16
17 for_each_possible_cpu(i)
18 idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
14 19
15 do_posix_clock_monotonic_gettime(&uptime); 20 do_posix_clock_monotonic_gettime(&uptime);
16 monotonic_to_bootbased(&uptime); 21 monotonic_to_bootbased(&uptime);