aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc/task_mmu.c
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2006-06-26 03:25:55 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-26 12:58:25 -0400
commit99f895518368252ba862cc15ce4eb98ebbe1bec6 (patch)
treea9dcc01963221d1fd6a7e357b95d361ebfe91c6d /fs/proc/task_mmu.c
parent8578cea7509cbdec25b31d08b48a92fcc3b1a9e3 (diff)
[PATCH] proc: don't lock task_structs indefinitely
Every inode in /proc holds a reference to a struct task_struct. If a directory or file is opened and remains open after the the task exits this pinning continues. With 8K stacks on a 32bit machine the amount pinned per file descriptor is about 10K. Normally I would figure a reasonable per user process limit is about 100 processes. With 80 processes, with a 1000 file descriptors each I can trigger the 00M killer on a 32bit kernel, because I have pinned about 800MB of useless data. This patch replaces the struct task_struct pointer with a pointer to a struct task_ref which has a struct task_struct pointer. The so the pinning of dead tasks does not happen. The code now has to contend with the fact that the task may now exit at any time. Which is a little but not muh more complicated. With this change it takes about 1000 processes each opening up 1000 file descriptors before I can trigger the OOM killer. Much better. [mlp@google.com: task_mmu small fixes] Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Cc: Paul Jackson <pj@sgi.com> Cc: Oleg Nesterov <oleg@tv-sign.ru> Cc: Albert Cahalan <acahalan@gmail.com> Signed-off-by: Prasanna Meda <mlp@google.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs/proc/task_mmu.c')
-rw-r--r--fs/proc/task_mmu.c72
1 files changed, 50 insertions, 22 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4187b4e9cdb3..abf3208c3f60 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -75,9 +75,13 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *
75{ 75{
76 struct vm_area_struct * vma; 76 struct vm_area_struct * vma;
77 int result = -ENOENT; 77 int result = -ENOENT;
78 struct task_struct *task = proc_task(inode); 78 struct task_struct *task = get_proc_task(inode);
79 struct mm_struct * mm = get_task_mm(task); 79 struct mm_struct * mm = NULL;
80 80
81 if (task) {
82 mm = get_task_mm(task);
83 put_task_struct(task);
84 }
81 if (!mm) 85 if (!mm)
82 goto out; 86 goto out;
83 down_read(&mm->mmap_sem); 87 down_read(&mm->mmap_sem);
@@ -120,7 +124,8 @@ struct mem_size_stats
120 124
121static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) 125static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
122{ 126{
123 struct task_struct *task = m->private; 127 struct proc_maps_private *priv = m->private;
128 struct task_struct *task = priv->task;
124 struct vm_area_struct *vma = v; 129 struct vm_area_struct *vma = v;
125 struct mm_struct *mm = vma->vm_mm; 130 struct mm_struct *mm = vma->vm_mm;
126 struct file *file = vma->vm_file; 131 struct file *file = vma->vm_file;
@@ -295,12 +300,16 @@ static int show_smap(struct seq_file *m, void *v)
295 300
296static void *m_start(struct seq_file *m, loff_t *pos) 301static void *m_start(struct seq_file *m, loff_t *pos)
297{ 302{
298 struct task_struct *task = m->private; 303 struct proc_maps_private *priv = m->private;
299 unsigned long last_addr = m->version; 304 unsigned long last_addr = m->version;
300 struct mm_struct *mm; 305 struct mm_struct *mm;
301 struct vm_area_struct *vma, *tail_vma; 306 struct vm_area_struct *vma, *tail_vma = NULL;
302 loff_t l = *pos; 307 loff_t l = *pos;
303 308
309 /* Clear the per syscall fields in priv */
310 priv->task = NULL;
311 priv->tail_vma = NULL;
312
304 /* 313 /*
305 * We remember last_addr rather than next_addr to hit with 314 * We remember last_addr rather than next_addr to hit with
306 * mmap_cache most of the time. We have zero last_addr at 315 * mmap_cache most of the time. We have zero last_addr at
@@ -311,11 +320,15 @@ static void *m_start(struct seq_file *m, loff_t *pos)
311 if (last_addr == -1UL) 320 if (last_addr == -1UL)
312 return NULL; 321 return NULL;
313 322
314 mm = get_task_mm(task); 323 priv->task = get_tref_task(priv->tref);
324 if (!priv->task)
325 return NULL;
326
327 mm = get_task_mm(priv->task);
315 if (!mm) 328 if (!mm)
316 return NULL; 329 return NULL;
317 330
318 tail_vma = get_gate_vma(task); 331 priv->tail_vma = tail_vma = get_gate_vma(priv->task);
319 down_read(&mm->mmap_sem); 332 down_read(&mm->mmap_sem);
320 333
321 /* Start with last addr hint */ 334 /* Start with last addr hint */
@@ -350,11 +363,9 @@ out:
350 return tail_vma; 363 return tail_vma;
351} 364}
352 365
353static void m_stop(struct seq_file *m, void *v) 366static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
354{ 367{
355 struct task_struct *task = m->private; 368 if (vma && vma != priv->tail_vma) {
356 struct vm_area_struct *vma = v;
357 if (vma && vma != get_gate_vma(task)) {
358 struct mm_struct *mm = vma->vm_mm; 369 struct mm_struct *mm = vma->vm_mm;
359 up_read(&mm->mmap_sem); 370 up_read(&mm->mmap_sem);
360 mmput(mm); 371 mmput(mm);
@@ -363,17 +374,27 @@ static void m_stop(struct seq_file *m, void *v)
363 374
364static void *m_next(struct seq_file *m, void *v, loff_t *pos) 375static void *m_next(struct seq_file *m, void *v, loff_t *pos)
365{ 376{
366 struct task_struct *task = m->private; 377 struct proc_maps_private *priv = m->private;
367 struct vm_area_struct *vma = v; 378 struct vm_area_struct *vma = v;
368 struct vm_area_struct *tail_vma = get_gate_vma(task); 379 struct vm_area_struct *tail_vma = priv->tail_vma;
369 380
370 (*pos)++; 381 (*pos)++;
371 if (vma && (vma != tail_vma) && vma->vm_next) 382 if (vma && (vma != tail_vma) && vma->vm_next)
372 return vma->vm_next; 383 return vma->vm_next;
373 m_stop(m, v); 384 vma_stop(priv, vma);
374 return (vma != tail_vma)? tail_vma: NULL; 385 return (vma != tail_vma)? tail_vma: NULL;
375} 386}
376 387
388static void m_stop(struct seq_file *m, void *v)
389{
390 struct proc_maps_private *priv = m->private;
391 struct vm_area_struct *vma = v;
392
393 vma_stop(priv, vma);
394 if (priv->task)
395 put_task_struct(priv->task);
396}
397
377static struct seq_operations proc_pid_maps_op = { 398static struct seq_operations proc_pid_maps_op = {
378 .start = m_start, 399 .start = m_start,
379 .next = m_next, 400 .next = m_next,
@@ -391,11 +412,18 @@ static struct seq_operations proc_pid_smaps_op = {
391static int do_maps_open(struct inode *inode, struct file *file, 412static int do_maps_open(struct inode *inode, struct file *file,
392 struct seq_operations *ops) 413 struct seq_operations *ops)
393{ 414{
394 struct task_struct *task = proc_task(inode); 415 struct proc_maps_private *priv;
395 int ret = seq_open(file, ops); 416 int ret = -ENOMEM;
396 if (!ret) { 417 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
397 struct seq_file *m = file->private_data; 418 if (priv) {
398 m->private = task; 419 priv->tref = proc_tref(inode);
420 ret = seq_open(file, ops);
421 if (!ret) {
422 struct seq_file *m = file->private_data;
423 m->private = priv;
424 } else {
425 kfree(priv);
426 }
399 } 427 }
400 return ret; 428 return ret;
401} 429}
@@ -409,7 +437,7 @@ struct file_operations proc_maps_operations = {
409 .open = maps_open, 437 .open = maps_open,
410 .read = seq_read, 438 .read = seq_read,
411 .llseek = seq_lseek, 439 .llseek = seq_lseek,
412 .release = seq_release, 440 .release = seq_release_private,
413}; 441};
414 442
415#ifdef CONFIG_NUMA 443#ifdef CONFIG_NUMA
@@ -431,7 +459,7 @@ struct file_operations proc_numa_maps_operations = {
431 .open = numa_maps_open, 459 .open = numa_maps_open,
432 .read = seq_read, 460 .read = seq_read,
433 .llseek = seq_lseek, 461 .llseek = seq_lseek,
434 .release = seq_release, 462 .release = seq_release_private,
435}; 463};
436#endif 464#endif
437 465
@@ -444,5 +472,5 @@ struct file_operations proc_smaps_operations = {
444 .open = smaps_open, 472 .open = smaps_open,
445 .read = seq_read, 473 .read = seq_read,
446 .llseek = seq_lseek, 474 .llseek = seq_lseek,
447 .release = seq_release, 475 .release = seq_release_private,
448}; 476};