aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/array.c85
-rw-r--r--fs/proc/base.c67
-rw-r--r--fs/proc/kcore.c335
-rw-r--r--fs/proc/meminfo.c4
-rw-r--r--fs/proc/nommu.c2
-rw-r--r--fs/proc/page.c5
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/proc/task_mmu.c57
8 files changed, 460 insertions, 97 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 725a650bbbb8..0c6bc602e6c4 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -82,6 +82,7 @@
82#include <linux/pid_namespace.h> 82#include <linux/pid_namespace.h>
83#include <linux/ptrace.h> 83#include <linux/ptrace.h>
84#include <linux/tracehook.h> 84#include <linux/tracehook.h>
85#include <linux/swapops.h>
85 86
86#include <asm/pgtable.h> 87#include <asm/pgtable.h>
87#include <asm/processor.h> 88#include <asm/processor.h>
@@ -321,6 +322,87 @@ static inline void task_context_switch_counts(struct seq_file *m,
321 p->nivcsw); 322 p->nivcsw);
322} 323}
323 324
325struct stack_stats {
326 struct vm_area_struct *vma;
327 unsigned long startpage;
328 unsigned long usage;
329};
330
331static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr,
332 unsigned long end, struct mm_walk *walk)
333{
334 struct stack_stats *ss = walk->private;
335 struct vm_area_struct *vma = ss->vma;
336 pte_t *pte, ptent;
337 spinlock_t *ptl;
338 int ret = 0;
339
340 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
341 for (; addr != end; pte++, addr += PAGE_SIZE) {
342 ptent = *pte;
343
344#ifdef CONFIG_STACK_GROWSUP
345 if (pte_present(ptent) || is_swap_pte(ptent))
346 ss->usage = addr - ss->startpage + PAGE_SIZE;
347#else
348 if (pte_present(ptent) || is_swap_pte(ptent)) {
349 ss->usage = ss->startpage - addr + PAGE_SIZE;
350 pte++;
351 ret = 1;
352 break;
353 }
354#endif
355 }
356 pte_unmap_unlock(pte - 1, ptl);
357 cond_resched();
358 return ret;
359}
360
361static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma,
362 struct task_struct *task)
363{
364 struct stack_stats ss;
365 struct mm_walk stack_walk = {
366 .pmd_entry = stack_usage_pte_range,
367 .mm = vma->vm_mm,
368 .private = &ss,
369 };
370
371 if (!vma->vm_mm || is_vm_hugetlb_page(vma))
372 return 0;
373
374 ss.vma = vma;
375 ss.startpage = task->stack_start & PAGE_MASK;
376 ss.usage = 0;
377
378#ifdef CONFIG_STACK_GROWSUP
379 walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end,
380 &stack_walk);
381#else
382 walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE,
383 &stack_walk);
384#endif
385 return ss.usage;
386}
387
388static inline void task_show_stack_usage(struct seq_file *m,
389 struct task_struct *task)
390{
391 struct vm_area_struct *vma;
392 struct mm_struct *mm = get_task_mm(task);
393
394 if (mm) {
395 down_read(&mm->mmap_sem);
396 vma = find_vma(mm, task->stack_start);
397 if (vma)
398 seq_printf(m, "Stack usage:\t%lu kB\n",
399 get_stack_usage_in_bytes(vma, task) >> 10);
400
401 up_read(&mm->mmap_sem);
402 mmput(mm);
403 }
404}
405
324int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, 406int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
325 struct pid *pid, struct task_struct *task) 407 struct pid *pid, struct task_struct *task)
326{ 408{
@@ -340,6 +422,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
340 task_show_regs(m, task); 422 task_show_regs(m, task);
341#endif 423#endif
342 task_context_switch_counts(m, task); 424 task_context_switch_counts(m, task);
425 task_show_stack_usage(m, task);
343 return 0; 426 return 0;
344} 427}
345 428
@@ -481,7 +564,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
481 rsslim, 564 rsslim,
482 mm ? mm->start_code : 0, 565 mm ? mm->start_code : 0,
483 mm ? mm->end_code : 0, 566 mm ? mm->end_code : 0,
484 (permitted && mm) ? mm->start_stack : 0, 567 (permitted) ? task->stack_start : 0,
485 esp, 568 esp,
486 eip, 569 eip,
487 /* The signal information here is obsolete. 570 /* The signal information here is obsolete.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6f742f6658a9..837469a96598 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -447,7 +447,7 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
447 447
448 do_posix_clock_monotonic_gettime(&uptime); 448 do_posix_clock_monotonic_gettime(&uptime);
449 read_lock(&tasklist_lock); 449 read_lock(&tasklist_lock);
450 points = badness(task, uptime.tv_sec); 450 points = badness(task->group_leader, uptime.tv_sec);
451 read_unlock(&tasklist_lock); 451 read_unlock(&tasklist_lock);
452 return sprintf(buffer, "%lu\n", points); 452 return sprintf(buffer, "%lu\n", points);
453} 453}
@@ -458,7 +458,7 @@ struct limit_names {
458}; 458};
459 459
460static const struct limit_names lnames[RLIM_NLIMITS] = { 460static const struct limit_names lnames[RLIM_NLIMITS] = {
461 [RLIMIT_CPU] = {"Max cpu time", "ms"}, 461 [RLIMIT_CPU] = {"Max cpu time", "seconds"},
462 [RLIMIT_FSIZE] = {"Max file size", "bytes"}, 462 [RLIMIT_FSIZE] = {"Max file size", "bytes"},
463 [RLIMIT_DATA] = {"Max data size", "bytes"}, 463 [RLIMIT_DATA] = {"Max data size", "bytes"},
464 [RLIMIT_STACK] = {"Max stack size", "bytes"}, 464 [RLIMIT_STACK] = {"Max stack size", "bytes"},
@@ -999,11 +999,17 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
999 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 999 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
1000 char buffer[PROC_NUMBUF]; 1000 char buffer[PROC_NUMBUF];
1001 size_t len; 1001 size_t len;
1002 int oom_adjust; 1002 int oom_adjust = OOM_DISABLE;
1003 unsigned long flags;
1003 1004
1004 if (!task) 1005 if (!task)
1005 return -ESRCH; 1006 return -ESRCH;
1006 oom_adjust = task->oomkilladj; 1007
1008 if (lock_task_sighand(task, &flags)) {
1009 oom_adjust = task->signal->oom_adj;
1010 unlock_task_sighand(task, &flags);
1011 }
1012
1007 put_task_struct(task); 1013 put_task_struct(task);
1008 1014
1009 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 1015 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1015,32 +1021,44 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1015 size_t count, loff_t *ppos) 1021 size_t count, loff_t *ppos)
1016{ 1022{
1017 struct task_struct *task; 1023 struct task_struct *task;
1018 char buffer[PROC_NUMBUF], *end; 1024 char buffer[PROC_NUMBUF];
1019 int oom_adjust; 1025 long oom_adjust;
1026 unsigned long flags;
1027 int err;
1020 1028
1021 memset(buffer, 0, sizeof(buffer)); 1029 memset(buffer, 0, sizeof(buffer));
1022 if (count > sizeof(buffer) - 1) 1030 if (count > sizeof(buffer) - 1)
1023 count = sizeof(buffer) - 1; 1031 count = sizeof(buffer) - 1;
1024 if (copy_from_user(buffer, buf, count)) 1032 if (copy_from_user(buffer, buf, count))
1025 return -EFAULT; 1033 return -EFAULT;
1026 oom_adjust = simple_strtol(buffer, &end, 0); 1034
1035 err = strict_strtol(strstrip(buffer), 0, &oom_adjust);
1036 if (err)
1037 return -EINVAL;
1027 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && 1038 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
1028 oom_adjust != OOM_DISABLE) 1039 oom_adjust != OOM_DISABLE)
1029 return -EINVAL; 1040 return -EINVAL;
1030 if (*end == '\n') 1041
1031 end++;
1032 task = get_proc_task(file->f_path.dentry->d_inode); 1042 task = get_proc_task(file->f_path.dentry->d_inode);
1033 if (!task) 1043 if (!task)
1034 return -ESRCH; 1044 return -ESRCH;
1035 if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { 1045 if (!lock_task_sighand(task, &flags)) {
1046 put_task_struct(task);
1047 return -ESRCH;
1048 }
1049
1050 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1051 unlock_task_sighand(task, &flags);
1036 put_task_struct(task); 1052 put_task_struct(task);
1037 return -EACCES; 1053 return -EACCES;
1038 } 1054 }
1039 task->oomkilladj = oom_adjust; 1055
1056 task->signal->oom_adj = oom_adjust;
1057
1058 unlock_task_sighand(task, &flags);
1040 put_task_struct(task); 1059 put_task_struct(task);
1041 if (end - buffer == 0) 1060
1042 return -EIO; 1061 return count;
1043 return end - buffer;
1044} 1062}
1045 1063
1046static const struct file_operations proc_oom_adjust_operations = { 1064static const struct file_operations proc_oom_adjust_operations = {
@@ -1169,17 +1187,16 @@ static ssize_t proc_fault_inject_write(struct file * file,
1169 count = sizeof(buffer) - 1; 1187 count = sizeof(buffer) - 1;
1170 if (copy_from_user(buffer, buf, count)) 1188 if (copy_from_user(buffer, buf, count))
1171 return -EFAULT; 1189 return -EFAULT;
1172 make_it_fail = simple_strtol(buffer, &end, 0); 1190 make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
1173 if (*end == '\n') 1191 if (*end)
1174 end++; 1192 return -EINVAL;
1175 task = get_proc_task(file->f_dentry->d_inode); 1193 task = get_proc_task(file->f_dentry->d_inode);
1176 if (!task) 1194 if (!task)
1177 return -ESRCH; 1195 return -ESRCH;
1178 task->make_it_fail = make_it_fail; 1196 task->make_it_fail = make_it_fail;
1179 put_task_struct(task); 1197 put_task_struct(task);
1180 if (end - buffer == 0) 1198
1181 return -EIO; 1199 return count;
1182 return end - buffer;
1183} 1200}
1184 1201
1185static const struct file_operations proc_fault_inject_operations = { 1202static const struct file_operations proc_fault_inject_operations = {
@@ -2586,9 +2603,6 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
2586 dput(dentry); 2603 dput(dentry);
2587 } 2604 }
2588 2605
2589 if (tgid == 0)
2590 goto out;
2591
2592 name.name = buf; 2606 name.name = buf;
2593 name.len = snprintf(buf, sizeof(buf), "%d", tgid); 2607 name.len = snprintf(buf, sizeof(buf), "%d", tgid);
2594 leader = d_hash_and_lookup(mnt->mnt_root, &name); 2608 leader = d_hash_and_lookup(mnt->mnt_root, &name);
@@ -2645,17 +2659,16 @@ out:
2645void proc_flush_task(struct task_struct *task) 2659void proc_flush_task(struct task_struct *task)
2646{ 2660{
2647 int i; 2661 int i;
2648 struct pid *pid, *tgid = NULL; 2662 struct pid *pid, *tgid;
2649 struct upid *upid; 2663 struct upid *upid;
2650 2664
2651 pid = task_pid(task); 2665 pid = task_pid(task);
2652 if (thread_group_leader(task)) 2666 tgid = task_tgid(task);
2653 tgid = task_tgid(task);
2654 2667
2655 for (i = 0; i <= pid->level; i++) { 2668 for (i = 0; i <= pid->level; i++) {
2656 upid = &pid->numbers[i]; 2669 upid = &pid->numbers[i];
2657 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, 2670 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
2658 tgid ? tgid->numbers[i].nr : 0); 2671 tgid->numbers[i].nr);
2659 } 2672 }
2660 2673
2661 upid = &pid->numbers[pid->level]; 2674 upid = &pid->numbers[pid->level];
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 59b43a068872..56013371f9f3 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -17,9 +17,15 @@
17#include <linux/elfcore.h> 17#include <linux/elfcore.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/highmem.h> 19#include <linux/highmem.h>
20#include <linux/bootmem.h>
20#include <linux/init.h> 21#include <linux/init.h>
21#include <asm/uaccess.h> 22#include <asm/uaccess.h>
22#include <asm/io.h> 23#include <asm/io.h>
24#include <linux/list.h>
25#include <linux/ioport.h>
26#include <linux/mm.h>
27#include <linux/memory.h>
28#include <asm/sections.h>
23 29
24#define CORE_STR "CORE" 30#define CORE_STR "CORE"
25 31
@@ -29,17 +35,6 @@
29 35
30static struct proc_dir_entry *proc_root_kcore; 36static struct proc_dir_entry *proc_root_kcore;
31 37
32static int open_kcore(struct inode * inode, struct file * filp)
33{
34 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
35}
36
37static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *);
38
39static const struct file_operations proc_kcore_operations = {
40 .read = read_kcore,
41 .open = open_kcore,
42};
43 38
44#ifndef kc_vaddr_to_offset 39#ifndef kc_vaddr_to_offset
45#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET) 40#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
@@ -57,18 +52,19 @@ struct memelfnote
57 void *data; 52 void *data;
58}; 53};
59 54
60static struct kcore_list *kclist; 55static LIST_HEAD(kclist_head);
61static DEFINE_RWLOCK(kclist_lock); 56static DEFINE_RWLOCK(kclist_lock);
57static int kcore_need_update = 1;
62 58
63void 59void
64kclist_add(struct kcore_list *new, void *addr, size_t size) 60kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
65{ 61{
66 new->addr = (unsigned long)addr; 62 new->addr = (unsigned long)addr;
67 new->size = size; 63 new->size = size;
64 new->type = type;
68 65
69 write_lock(&kclist_lock); 66 write_lock(&kclist_lock);
70 new->next = kclist; 67 list_add_tail(&new->list, &kclist_head);
71 kclist = new;
72 write_unlock(&kclist_lock); 68 write_unlock(&kclist_lock);
73} 69}
74 70
@@ -80,7 +76,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
80 *nphdr = 1; /* PT_NOTE */ 76 *nphdr = 1; /* PT_NOTE */
81 size = 0; 77 size = 0;
82 78
83 for (m=kclist; m; m=m->next) { 79 list_for_each_entry(m, &kclist_head, list) {
84 try = kc_vaddr_to_offset((size_t)m->addr + m->size); 80 try = kc_vaddr_to_offset((size_t)m->addr + m->size);
85 if (try > size) 81 if (try > size)
86 size = try; 82 size = try;
@@ -97,6 +93,177 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
97 return size + *elf_buflen; 93 return size + *elf_buflen;
98} 94}
99 95
96static void free_kclist_ents(struct list_head *head)
97{
98 struct kcore_list *tmp, *pos;
99
100 list_for_each_entry_safe(pos, tmp, head, list) {
101 list_del(&pos->list);
102 kfree(pos);
103 }
104}
105/*
106 * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
107 */
108static void __kcore_update_ram(struct list_head *list)
109{
110 int nphdr;
111 size_t size;
112 struct kcore_list *tmp, *pos;
113 LIST_HEAD(garbage);
114
115 write_lock(&kclist_lock);
116 if (kcore_need_update) {
117 list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
118 if (pos->type == KCORE_RAM
119 || pos->type == KCORE_VMEMMAP)
120 list_move(&pos->list, &garbage);
121 }
122 list_splice_tail(list, &kclist_head);
123 } else
124 list_splice(list, &garbage);
125 kcore_need_update = 0;
126 proc_root_kcore->size = get_kcore_size(&nphdr, &size);
127 write_unlock(&kclist_lock);
128
129 free_kclist_ents(&garbage);
130}
131
132
133#ifdef CONFIG_HIGHMEM
134/*
135 * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
136 * because memory hole is not as big as !HIGHMEM case.
137 * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
138 */
139static int kcore_update_ram(void)
140{
141 LIST_HEAD(head);
142 struct kcore_list *ent;
143 int ret = 0;
144
145 ent = kmalloc(sizeof(*ent), GFP_KERNEL);
146 if (!ent)
147 return -ENOMEM;
148 ent->addr = (unsigned long)__va(0);
149 ent->size = max_low_pfn << PAGE_SHIFT;
150 ent->type = KCORE_RAM;
151 list_add(&ent->list, &head);
152 __kcore_update_ram(&head);
153 return ret;
154}
155
156#else /* !CONFIG_HIGHMEM */
157
158#ifdef CONFIG_SPARSEMEM_VMEMMAP
159/* calculate vmemmap's address from given system ram pfn and register it */
160int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
161{
162 unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
163 unsigned long nr_pages = ent->size >> PAGE_SHIFT;
164 unsigned long start, end;
165 struct kcore_list *vmm, *tmp;
166
167
168 start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
169 end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
170 end = ALIGN(end, PAGE_SIZE);
171 /* overlap check (because we have to align page */
172 list_for_each_entry(tmp, head, list) {
173 if (tmp->type != KCORE_VMEMMAP)
174 continue;
175 if (start < tmp->addr + tmp->size)
176 if (end > tmp->addr)
177 end = tmp->addr;
178 }
179 if (start < end) {
180 vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
181 if (!vmm)
182 return 0;
183 vmm->addr = start;
184 vmm->size = end - start;
185 vmm->type = KCORE_VMEMMAP;
186 list_add_tail(&vmm->list, head);
187 }
188 return 1;
189
190}
191#else
192int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
193{
194 return 1;
195}
196
197#endif
198
199static int
200kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
201{
202 struct list_head *head = (struct list_head *)arg;
203 struct kcore_list *ent;
204
205 ent = kmalloc(sizeof(*ent), GFP_KERNEL);
206 if (!ent)
207 return -ENOMEM;
208 ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
209 ent->size = nr_pages << PAGE_SHIFT;
210
211 /* Sanity check: Can happen in 32bit arch...maybe */
212 if (ent->addr < (unsigned long) __va(0))
213 goto free_out;
214
215 /* cut not-mapped area. ....from ppc-32 code. */
216 if (ULONG_MAX - ent->addr < ent->size)
217 ent->size = ULONG_MAX - ent->addr;
218
219 /* cut when vmalloc() area is higher than direct-map area */
220 if (VMALLOC_START > (unsigned long)__va(0)) {
221 if (ent->addr > VMALLOC_START)
222 goto free_out;
223 if (VMALLOC_START - ent->addr < ent->size)
224 ent->size = VMALLOC_START - ent->addr;
225 }
226
227 ent->type = KCORE_RAM;
228 list_add_tail(&ent->list, head);
229
230 if (!get_sparsemem_vmemmap_info(ent, head)) {
231 list_del(&ent->list);
232 goto free_out;
233 }
234
235 return 0;
236free_out:
237 kfree(ent);
238 return 1;
239}
240
241static int kcore_update_ram(void)
242{
243 int nid, ret;
244 unsigned long end_pfn;
245 LIST_HEAD(head);
246
247 /* Not inialized....update now */
248 /* find out "max pfn" */
249 end_pfn = 0;
250 for_each_node_state(nid, N_HIGH_MEMORY) {
251 unsigned long node_end;
252 node_end = NODE_DATA(nid)->node_start_pfn +
253 NODE_DATA(nid)->node_spanned_pages;
254 if (end_pfn < node_end)
255 end_pfn = node_end;
256 }
257 /* scan 0 to max_pfn */
258 ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
259 if (ret) {
260 free_kclist_ents(&head);
261 return -ENOMEM;
262 }
263 __kcore_update_ram(&head);
264 return ret;
265}
266#endif /* CONFIG_HIGHMEM */
100 267
101/*****************************************************************************/ 268/*****************************************************************************/
102/* 269/*
@@ -192,7 +359,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
192 nhdr->p_align = 0; 359 nhdr->p_align = 0;
193 360
194 /* setup ELF PT_LOAD program header for every area */ 361 /* setup ELF PT_LOAD program header for every area */
195 for (m=kclist; m; m=m->next) { 362 list_for_each_entry(m, &kclist_head, list) {
196 phdr = (struct elf_phdr *) bufp; 363 phdr = (struct elf_phdr *) bufp;
197 bufp += sizeof(struct elf_phdr); 364 bufp += sizeof(struct elf_phdr);
198 offset += sizeof(struct elf_phdr); 365 offset += sizeof(struct elf_phdr);
@@ -265,7 +432,8 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
265 unsigned long start; 432 unsigned long start;
266 433
267 read_lock(&kclist_lock); 434 read_lock(&kclist_lock);
268 proc_root_kcore->size = size = get_kcore_size(&nphdr, &elf_buflen); 435 size = get_kcore_size(&nphdr, &elf_buflen);
436
269 if (buflen == 0 || *fpos >= size) { 437 if (buflen == 0 || *fpos >= size) {
270 read_unlock(&kclist_lock); 438 read_unlock(&kclist_lock);
271 return 0; 439 return 0;
@@ -317,7 +485,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
317 struct kcore_list *m; 485 struct kcore_list *m;
318 486
319 read_lock(&kclist_lock); 487 read_lock(&kclist_lock);
320 for (m=kclist; m; m=m->next) { 488 list_for_each_entry(m, &kclist_head, list) {
321 if (start >= m->addr && start < (m->addr+m->size)) 489 if (start >= m->addr && start < (m->addr+m->size))
322 break; 490 break;
323 } 491 }
@@ -326,45 +494,14 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
326 if (m == NULL) { 494 if (m == NULL) {
327 if (clear_user(buffer, tsz)) 495 if (clear_user(buffer, tsz))
328 return -EFAULT; 496 return -EFAULT;
329 } else if (is_vmalloc_addr((void *)start)) { 497 } else if (is_vmalloc_or_module_addr((void *)start)) {
330 char * elf_buf; 498 char * elf_buf;
331 struct vm_struct *m;
332 unsigned long curstart = start;
333 unsigned long cursize = tsz;
334 499
335 elf_buf = kzalloc(tsz, GFP_KERNEL); 500 elf_buf = kzalloc(tsz, GFP_KERNEL);
336 if (!elf_buf) 501 if (!elf_buf)
337 return -ENOMEM; 502 return -ENOMEM;
338 503 vread(elf_buf, (char *)start, tsz);
339 read_lock(&vmlist_lock); 504 /* we have to zero-fill user buffer even if no read */
340 for (m=vmlist; m && cursize; m=m->next) {
341 unsigned long vmstart;
342 unsigned long vmsize;
343 unsigned long msize = m->size - PAGE_SIZE;
344
345 if (((unsigned long)m->addr + msize) <
346 curstart)
347 continue;
348 if ((unsigned long)m->addr > (curstart +
349 cursize))
350 break;
351 vmstart = (curstart < (unsigned long)m->addr ?
352 (unsigned long)m->addr : curstart);
353 if (((unsigned long)m->addr + msize) >
354 (curstart + cursize))
355 vmsize = curstart + cursize - vmstart;
356 else
357 vmsize = (unsigned long)m->addr +
358 msize - vmstart;
359 curstart = vmstart + vmsize;
360 cursize -= vmsize;
361 /* don't dump ioremap'd stuff! (TA) */
362 if (m->flags & VM_IOREMAP)
363 continue;
364 memcpy(elf_buf + (vmstart - start),
365 (char *)vmstart, vmsize);
366 }
367 read_unlock(&vmlist_lock);
368 if (copy_to_user(buffer, elf_buf, tsz)) { 505 if (copy_to_user(buffer, elf_buf, tsz)) {
369 kfree(elf_buf); 506 kfree(elf_buf);
370 return -EFAULT; 507 return -EFAULT;
@@ -402,12 +539,96 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
402 return acc; 539 return acc;
403} 540}
404 541
542
543static int open_kcore(struct inode *inode, struct file *filp)
544{
545 if (!capable(CAP_SYS_RAWIO))
546 return -EPERM;
547 if (kcore_need_update)
548 kcore_update_ram();
549 if (i_size_read(inode) != proc_root_kcore->size) {
550 mutex_lock(&inode->i_mutex);
551 i_size_write(inode, proc_root_kcore->size);
552 mutex_unlock(&inode->i_mutex);
553 }
554 return 0;
555}
556
557
558static const struct file_operations proc_kcore_operations = {
559 .read = read_kcore,
560 .open = open_kcore,
561};
562
563#ifdef CONFIG_MEMORY_HOTPLUG
564/* just remember that we have to update kcore */
565static int __meminit kcore_callback(struct notifier_block *self,
566 unsigned long action, void *arg)
567{
568 switch (action) {
569 case MEM_ONLINE:
570 case MEM_OFFLINE:
571 write_lock(&kclist_lock);
572 kcore_need_update = 1;
573 write_unlock(&kclist_lock);
574 }
575 return NOTIFY_OK;
576}
577#endif
578
579
580static struct kcore_list kcore_vmalloc;
581
582#ifdef CONFIG_ARCH_PROC_KCORE_TEXT
583static struct kcore_list kcore_text;
584/*
585 * If defined, special segment is used for mapping kernel text instead of
586 * direct-map area. We need to create special TEXT section.
587 */
588static void __init proc_kcore_text_init(void)
589{
590 kclist_add(&kcore_text, _stext, _end - _stext, KCORE_TEXT);
591}
592#else
593static void __init proc_kcore_text_init(void)
594{
595}
596#endif
597
598#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
599/*
600 * MODULES_VADDR has no intersection with VMALLOC_ADDR.
601 */
602struct kcore_list kcore_modules;
603static void __init add_modules_range(void)
604{
605 kclist_add(&kcore_modules, (void *)MODULES_VADDR,
606 MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
607}
608#else
609static void __init add_modules_range(void)
610{
611}
612#endif
613
405static int __init proc_kcore_init(void) 614static int __init proc_kcore_init(void)
406{ 615{
407 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); 616 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
408 if (proc_root_kcore) 617 &proc_kcore_operations);
409 proc_root_kcore->size = 618 if (!proc_root_kcore) {
410 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; 619 printk(KERN_ERR "couldn't create /proc/kcore\n");
620 return 0; /* Always returns 0. */
621 }
622 /* Store text area if it's special */
623 proc_kcore_text_init();
624 /* Store vmalloc area */
625 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
626 VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
627 add_modules_range();
628 /* Store direct-map area from physical memory map */
629 kcore_update_ram();
630 hotplug_memory_notifier(kcore_callback, 0);
631
411 return 0; 632 return 0;
412} 633}
413module_init(proc_kcore_init); 634module_init(proc_kcore_init);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 78faedcb0a8d..c7bff4f603ff 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -81,9 +81,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
81 "Writeback: %8lu kB\n" 81 "Writeback: %8lu kB\n"
82 "AnonPages: %8lu kB\n" 82 "AnonPages: %8lu kB\n"
83 "Mapped: %8lu kB\n" 83 "Mapped: %8lu kB\n"
84 "Shmem: %8lu kB\n"
84 "Slab: %8lu kB\n" 85 "Slab: %8lu kB\n"
85 "SReclaimable: %8lu kB\n" 86 "SReclaimable: %8lu kB\n"
86 "SUnreclaim: %8lu kB\n" 87 "SUnreclaim: %8lu kB\n"
88 "KernelStack: %8lu kB\n"
87 "PageTables: %8lu kB\n" 89 "PageTables: %8lu kB\n"
88#ifdef CONFIG_QUICKLIST 90#ifdef CONFIG_QUICKLIST
89 "Quicklists: %8lu kB\n" 91 "Quicklists: %8lu kB\n"
@@ -128,10 +130,12 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
128 K(global_page_state(NR_WRITEBACK)), 130 K(global_page_state(NR_WRITEBACK)),
129 K(global_page_state(NR_ANON_PAGES)), 131 K(global_page_state(NR_ANON_PAGES)),
130 K(global_page_state(NR_FILE_MAPPED)), 132 K(global_page_state(NR_FILE_MAPPED)),
133 K(global_page_state(NR_SHMEM)),
131 K(global_page_state(NR_SLAB_RECLAIMABLE) + 134 K(global_page_state(NR_SLAB_RECLAIMABLE) +
132 global_page_state(NR_SLAB_UNRECLAIMABLE)), 135 global_page_state(NR_SLAB_UNRECLAIMABLE)),
133 K(global_page_state(NR_SLAB_RECLAIMABLE)), 136 K(global_page_state(NR_SLAB_RECLAIMABLE)),
134 K(global_page_state(NR_SLAB_UNRECLAIMABLE)), 137 K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
138 global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024,
135 K(global_page_state(NR_PAGETABLE)), 139 K(global_page_state(NR_PAGETABLE)),
136#ifdef CONFIG_QUICKLIST 140#ifdef CONFIG_QUICKLIST
137 K(quicklist_total_size()), 141 K(quicklist_total_size()),
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 7e14d1a04001..9fe7d7ebe115 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -109,7 +109,7 @@ static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos)
109 return rb_next((struct rb_node *) v); 109 return rb_next((struct rb_node *) v);
110} 110}
111 111
112static struct seq_operations proc_nommu_region_list_seqop = { 112static const struct seq_operations proc_nommu_region_list_seqop = {
113 .start = nommu_region_list_start, 113 .start = nommu_region_list_start,
114 .next = nommu_region_list_next, 114 .next = nommu_region_list_next,
115 .stop = nommu_region_list_stop, 115 .stop = nommu_region_list_stop,
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 2707c6c7a20f..2281c2cbfe2b 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -2,6 +2,7 @@
2#include <linux/compiler.h> 2#include <linux/compiler.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/ksm.h>
5#include <linux/mm.h> 6#include <linux/mm.h>
6#include <linux/mmzone.h> 7#include <linux/mmzone.h>
7#include <linux/proc_fs.h> 8#include <linux/proc_fs.h>
@@ -95,6 +96,8 @@ static const struct file_operations proc_kpagecount_operations = {
95#define KPF_UNEVICTABLE 18 96#define KPF_UNEVICTABLE 18
96#define KPF_NOPAGE 20 97#define KPF_NOPAGE 20
97 98
99#define KPF_KSM 21
100
98/* kernel hacking assistances 101/* kernel hacking assistances
99 * WARNING: subject to change, never rely on them! 102 * WARNING: subject to change, never rely on them!
100 */ 103 */
@@ -137,6 +140,8 @@ static u64 get_uflags(struct page *page)
137 u |= 1 << KPF_MMAP; 140 u |= 1 << KPF_MMAP;
138 if (PageAnon(page)) 141 if (PageAnon(page))
139 u |= 1 << KPF_ANON; 142 u |= 1 << KPF_ANON;
143 if (PageKsm(page))
144 u |= 1 << KPF_KSM;
140 145
141 /* 146 /*
142 * compound pages: export both head/tail info 147 * compound pages: export both head/tail info
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 9b1e4e9a16bf..f667e8aeabdf 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -153,7 +153,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
153 153
154 /* careful: calling conventions are nasty here */ 154 /* careful: calling conventions are nasty here */
155 res = count; 155 res = count;
156 error = table->proc_handler(table, write, filp, buf, &res, ppos); 156 error = table->proc_handler(table, write, buf, &res, ppos);
157 if (!error) 157 if (!error)
158 error = res; 158 error = res;
159out: 159out:
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9bd8be1d235c..2a1bef9203c6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -243,6 +243,25 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
243 } else if (vma->vm_start <= mm->start_stack && 243 } else if (vma->vm_start <= mm->start_stack &&
244 vma->vm_end >= mm->start_stack) { 244 vma->vm_end >= mm->start_stack) {
245 name = "[stack]"; 245 name = "[stack]";
246 } else {
247 unsigned long stack_start;
248 struct proc_maps_private *pmp;
249
250 pmp = m->private;
251 stack_start = pmp->task->stack_start;
252
253 if (vma->vm_start <= stack_start &&
254 vma->vm_end >= stack_start) {
255 pad_len_spaces(m, len);
256 seq_printf(m,
257 "[threadstack:%08lx]",
258#ifdef CONFIG_STACK_GROWSUP
259 vma->vm_end - stack_start
260#else
261 stack_start - vma->vm_start
262#endif
263 );
264 }
246 } 265 }
247 } else { 266 } else {
248 name = "[vdso]"; 267 name = "[vdso]";
@@ -465,23 +484,28 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
465 return 0; 484 return 0;
466} 485}
467 486
487#define CLEAR_REFS_ALL 1
488#define CLEAR_REFS_ANON 2
489#define CLEAR_REFS_MAPPED 3
490
468static ssize_t clear_refs_write(struct file *file, const char __user *buf, 491static ssize_t clear_refs_write(struct file *file, const char __user *buf,
469 size_t count, loff_t *ppos) 492 size_t count, loff_t *ppos)
470{ 493{
471 struct task_struct *task; 494 struct task_struct *task;
472 char buffer[PROC_NUMBUF], *end; 495 char buffer[PROC_NUMBUF];
473 struct mm_struct *mm; 496 struct mm_struct *mm;
474 struct vm_area_struct *vma; 497 struct vm_area_struct *vma;
498 long type;
475 499
476 memset(buffer, 0, sizeof(buffer)); 500 memset(buffer, 0, sizeof(buffer));
477 if (count > sizeof(buffer) - 1) 501 if (count > sizeof(buffer) - 1)
478 count = sizeof(buffer) - 1; 502 count = sizeof(buffer) - 1;
479 if (copy_from_user(buffer, buf, count)) 503 if (copy_from_user(buffer, buf, count))
480 return -EFAULT; 504 return -EFAULT;
481 if (!simple_strtol(buffer, &end, 0)) 505 if (strict_strtol(strstrip(buffer), 10, &type))
506 return -EINVAL;
507 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
482 return -EINVAL; 508 return -EINVAL;
483 if (*end == '\n')
484 end++;
485 task = get_proc_task(file->f_path.dentry->d_inode); 509 task = get_proc_task(file->f_path.dentry->d_inode);
486 if (!task) 510 if (!task)
487 return -ESRCH; 511 return -ESRCH;
@@ -494,18 +518,31 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
494 down_read(&mm->mmap_sem); 518 down_read(&mm->mmap_sem);
495 for (vma = mm->mmap; vma; vma = vma->vm_next) { 519 for (vma = mm->mmap; vma; vma = vma->vm_next) {
496 clear_refs_walk.private = vma; 520 clear_refs_walk.private = vma;
497 if (!is_vm_hugetlb_page(vma)) 521 if (is_vm_hugetlb_page(vma))
498 walk_page_range(vma->vm_start, vma->vm_end, 522 continue;
499 &clear_refs_walk); 523 /*
524 * Writing 1 to /proc/pid/clear_refs affects all pages.
525 *
526 * Writing 2 to /proc/pid/clear_refs only affects
527 * Anonymous pages.
528 *
529 * Writing 3 to /proc/pid/clear_refs only affects file
530 * mapped pages.
531 */
532 if (type == CLEAR_REFS_ANON && vma->vm_file)
533 continue;
534 if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
535 continue;
536 walk_page_range(vma->vm_start, vma->vm_end,
537 &clear_refs_walk);
500 } 538 }
501 flush_tlb_mm(mm); 539 flush_tlb_mm(mm);
502 up_read(&mm->mmap_sem); 540 up_read(&mm->mmap_sem);
503 mmput(mm); 541 mmput(mm);
504 } 542 }
505 put_task_struct(task); 543 put_task_struct(task);
506 if (end - buffer == 0) 544
507 return -EIO; 545 return count;
508 return end - buffer;
509} 546}
510 547
511const struct file_operations proc_clear_refs_operations = { 548const struct file_operations proc_clear_refs_operations = {