aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2006-06-26 03:25:48 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-26 12:58:24 -0400
commit48e6484d49020dba3578ad117b461e8a391e8f0f (patch)
tree7824ca84bfe71c3fe2c09a1fedc31106fec4f500
parent662795deb854b31501e0ffb42b7f0cce802c134a (diff)
[PATCH] proc: Rewrite the proc dentry flush on exit optimization
To keep the dcache from filling up with dead /proc entries we flush them on process exit. However over the years that code has gotten hairy with a dentry_pointer and a lock in task_struct and misdocumented as a correctness feature. I have rewritten this code to look and see if we have a corresponding entry in the dcache and if so flush it on process exit. This removes the extra fields in the task_struct and allows me to trivially handle the case of a /proc/<tgid>/task/<pid> entry as well as the current /proc/<pid> entries. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/exec.c10
-rw-r--r--fs/proc/base.c134
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/proc_fs.h6
-rw-r--r--include/linux/sched.h3
-rw-r--r--kernel/exit.c7
-rw-r--r--kernel/fork.c3
7 files changed, 64 insertions, 100 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 0b88bf646143..8c5196087f31 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -666,8 +666,6 @@ static int de_thread(struct task_struct *tsk)
666 * and to assume its PID: 666 * and to assume its PID:
667 */ 667 */
668 if (!thread_group_leader(current)) { 668 if (!thread_group_leader(current)) {
669 struct dentry *proc_dentry1, *proc_dentry2;
670
671 /* 669 /*
672 * Wait for the thread group leader to be a zombie. 670 * Wait for the thread group leader to be a zombie.
673 * It should already be zombie at this point, most 671 * It should already be zombie at this point, most
@@ -689,10 +687,6 @@ static int de_thread(struct task_struct *tsk)
689 */ 687 */
690 current->start_time = leader->start_time; 688 current->start_time = leader->start_time;
691 689
692 spin_lock(&leader->proc_lock);
693 spin_lock(&current->proc_lock);
694 proc_dentry1 = proc_pid_unhash(current);
695 proc_dentry2 = proc_pid_unhash(leader);
696 write_lock_irq(&tasklist_lock); 690 write_lock_irq(&tasklist_lock);
697 691
698 BUG_ON(leader->tgid != current->tgid); 692 BUG_ON(leader->tgid != current->tgid);
@@ -729,10 +723,6 @@ static int de_thread(struct task_struct *tsk)
729 leader->exit_state = EXIT_DEAD; 723 leader->exit_state = EXIT_DEAD;
730 724
731 write_unlock_irq(&tasklist_lock); 725 write_unlock_irq(&tasklist_lock);
732 spin_unlock(&leader->proc_lock);
733 spin_unlock(&current->proc_lock);
734 proc_pid_flush(proc_dentry1);
735 proc_pid_flush(proc_dentry2);
736 } 726 }
737 727
738 /* 728 /*
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c8636841bbcf..f435932e6432 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1352,16 +1352,6 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1352 return 0; 1352 return 0;
1353} 1353}
1354 1354
1355static void pid_base_iput(struct dentry *dentry, struct inode *inode)
1356{
1357 struct task_struct *task = proc_task(inode);
1358 spin_lock(&task->proc_lock);
1359 if (task->proc_dentry == dentry)
1360 task->proc_dentry = NULL;
1361 spin_unlock(&task->proc_lock);
1362 iput(inode);
1363}
1364
1365static int pid_delete_dentry(struct dentry * dentry) 1355static int pid_delete_dentry(struct dentry * dentry)
1366{ 1356{
1367 /* Is the task we represent dead? 1357 /* Is the task we represent dead?
@@ -1383,13 +1373,6 @@ static struct dentry_operations pid_dentry_operations =
1383 .d_delete = pid_delete_dentry, 1373 .d_delete = pid_delete_dentry,
1384}; 1374};
1385 1375
1386static struct dentry_operations pid_base_dentry_operations =
1387{
1388 .d_revalidate = pid_revalidate,
1389 .d_iput = pid_base_iput,
1390 .d_delete = pid_delete_dentry,
1391};
1392
1393/* Lookups */ 1376/* Lookups */
1394 1377
1395static unsigned name_to_int(struct dentry *dentry) 1378static unsigned name_to_int(struct dentry *dentry)
@@ -1859,57 +1842,70 @@ static struct inode_operations proc_self_inode_operations = {
1859}; 1842};
1860 1843
1861/** 1844/**
1862 * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. 1845 * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
1863 * @p: task that should be flushed. 1846 *
1847 * @task: task that should be flushed.
1848 *
1849 * Looks in the dcache for
1850 * /proc/@pid
1851 * /proc/@tgid/task/@pid
1852 * if either directory is present flushes it and all of it'ts children
1853 * from the dcache.
1864 * 1854 *
1865 * Drops the /proc/@pid dcache entry from the hash chains. 1855 * It is safe and reasonable to cache /proc entries for a task until
1856 * that task exits. After that they just clog up the dcache with
1857 * useless entries, possibly causing useful dcache entries to be
1858 * flushed instead. This routine is proved to flush those useless
1859 * dcache entries at process exit time.
1866 * 1860 *
1867 * Dropping /proc/@pid entries and detach_pid must be synchroneous, 1861 * NOTE: This routine is just an optimization so it does not guarantee
1868 * otherwise e.g. /proc/@pid/exe might point to the wrong executable, 1862 * that no dcache entries will exist at process exit time it
1869 * if the pid value is immediately reused. This is enforced by 1863 * just makes it very unlikely that any will persist.
1870 * - caller must acquire spin_lock(p->proc_lock)
1871 * - must be called before detach_pid()
1872 * - proc_pid_lookup acquires proc_lock, and checks that
1873 * the target is not dead by looking at the attach count
1874 * of PIDTYPE_PID.
1875 */ 1864 */
1876 1865void proc_flush_task(struct task_struct *task)
1877struct dentry *proc_pid_unhash(struct task_struct *p)
1878{ 1866{
1879 struct dentry *proc_dentry; 1867 struct dentry *dentry, *leader, *dir;
1868 char buf[30];
1869 struct qstr name;
1870
1871 name.name = buf;
1872 name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
1873 dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name);
1874 if (dentry) {
1875 shrink_dcache_parent(dentry);
1876 d_drop(dentry);
1877 dput(dentry);
1878 }
1880 1879
1881 proc_dentry = p->proc_dentry; 1880 if (thread_group_leader(task))
1882 if (proc_dentry != NULL) { 1881 goto out;
1883 1882
1884 spin_lock(&dcache_lock); 1883 name.name = buf;
1885 spin_lock(&proc_dentry->d_lock); 1884 name.len = snprintf(buf, sizeof(buf), "%d", task->tgid);
1886 if (!d_unhashed(proc_dentry)) { 1885 leader = d_hash_and_lookup(proc_mnt->mnt_root, &name);
1887 dget_locked(proc_dentry); 1886 if (!leader)
1888 __d_drop(proc_dentry); 1887 goto out;
1889 spin_unlock(&proc_dentry->d_lock);
1890 } else {
1891 spin_unlock(&proc_dentry->d_lock);
1892 proc_dentry = NULL;
1893 }
1894 spin_unlock(&dcache_lock);
1895 }
1896 return proc_dentry;
1897}
1898 1888
1899/** 1889 name.name = "task";
1900 * proc_pid_flush - recover memory used by stale /proc/@pid/x entries 1890 name.len = strlen(name.name);
1901 * @proc_dentry: directoy to prune. 1891 dir = d_hash_and_lookup(leader, &name);
1902 * 1892 if (!dir)
1903 * Shrink the /proc directory that was used by the just killed thread. 1893 goto out_put_leader;
1904 */ 1894
1905 1895 name.name = buf;
1906void proc_pid_flush(struct dentry *proc_dentry) 1896 name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
1907{ 1897 dentry = d_hash_and_lookup(dir, &name);
1908 might_sleep(); 1898 if (dentry) {
1909 if(proc_dentry != NULL) { 1899 shrink_dcache_parent(dentry);
1910 shrink_dcache_parent(proc_dentry); 1900 d_drop(dentry);
1911 dput(proc_dentry); 1901 dput(dentry);
1912 } 1902 }
1903
1904 dput(dir);
1905out_put_leader:
1906 dput(leader);
1907out:
1908 return;
1913} 1909}
1914 1910
1915/* SMP-safe */ 1911/* SMP-safe */
@@ -1919,7 +1915,6 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
1919 struct inode *inode; 1915 struct inode *inode;
1920 struct proc_inode *ei; 1916 struct proc_inode *ei;
1921 unsigned tgid; 1917 unsigned tgid;
1922 int died;
1923 1918
1924 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { 1919 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
1925 inode = new_inode(dir->i_sb); 1920 inode = new_inode(dir->i_sb);
@@ -1965,23 +1960,16 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
1965 inode->i_nlink = 4; 1960 inode->i_nlink = 4;
1966#endif 1961#endif
1967 1962
1968 dentry->d_op = &pid_base_dentry_operations; 1963 dentry->d_op = &pid_dentry_operations;
1969 1964
1970 died = 0;
1971 d_add(dentry, inode); 1965 d_add(dentry, inode);
1972 spin_lock(&task->proc_lock);
1973 task->proc_dentry = dentry;
1974 if (!pid_alive(task)) { 1966 if (!pid_alive(task)) {
1975 dentry = proc_pid_unhash(task); 1967 d_drop(dentry);
1976 died = 1; 1968 shrink_dcache_parent(dentry);
1969 goto out;
1977 } 1970 }
1978 spin_unlock(&task->proc_lock);
1979 1971
1980 put_task_struct(task); 1972 put_task_struct(task);
1981 if (died) {
1982 proc_pid_flush(dentry);
1983 goto out;
1984 }
1985 return NULL; 1973 return NULL;
1986out: 1974out:
1987 return ERR_PTR(-ENOENT); 1975 return ERR_PTR(-ENOENT);
@@ -2024,7 +2012,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
2024 inode->i_nlink = 3; 2012 inode->i_nlink = 3;
2025#endif 2013#endif
2026 2014
2027 dentry->d_op = &pid_base_dentry_operations; 2015 dentry->d_op = &pid_dentry_operations;
2028 2016
2029 d_add(dentry, inode); 2017 d_add(dentry, inode);
2030 2018
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 41ecbb847f32..e127ef7e8da8 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -119,7 +119,6 @@ extern struct group_info init_groups;
119 .signal = {{0}}}, \ 119 .signal = {{0}}}, \
120 .blocked = {{0}}, \ 120 .blocked = {{0}}, \
121 .alloc_lock = SPIN_LOCK_UNLOCKED, \ 121 .alloc_lock = SPIN_LOCK_UNLOCKED, \
122 .proc_lock = SPIN_LOCK_UNLOCKED, \
123 .journal_info = NULL, \ 122 .journal_info = NULL, \
124 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ 123 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
125 .fs_excl = ATOMIC_INIT(0), \ 124 .fs_excl = ATOMIC_INIT(0), \
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 9dd84884abb1..d4d2081dbaf7 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -99,9 +99,8 @@ extern void proc_misc_init(void);
99 99
100struct mm_struct; 100struct mm_struct;
101 101
102void proc_flush_task(struct task_struct *task);
102struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); 103struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
103struct dentry *proc_pid_unhash(struct task_struct *p);
104void proc_pid_flush(struct dentry *proc_dentry);
105int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); 104int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
106unsigned long task_vsize(struct mm_struct *); 105unsigned long task_vsize(struct mm_struct *);
107int task_statm(struct mm_struct *, int *, int *, int *, int *); 106int task_statm(struct mm_struct *, int *, int *, int *, int *);
@@ -211,8 +210,7 @@ static inline void proc_net_remove(const char *name)
211#define proc_net_create(name, mode, info) ({ (void)(mode), NULL; }) 210#define proc_net_create(name, mode, info) ({ (void)(mode), NULL; })
212static inline void proc_net_remove(const char *name) {} 211static inline void proc_net_remove(const char *name) {}
213 212
214static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; } 213static inline void proc_flush_task(struct task_struct *task) { }
215static inline void proc_pid_flush(struct dentry *proc_dentry) { }
216 214
217static inline struct proc_dir_entry *create_proc_entry(const char *name, 215static inline struct proc_dir_entry *create_proc_entry(const char *name,
218 mode_t mode, struct proc_dir_entry *parent) { return NULL; } 216 mode_t mode, struct proc_dir_entry *parent) { return NULL; }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d11d9310db0..122a25c1b997 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -842,8 +842,6 @@ struct task_struct {
842 u32 self_exec_id; 842 u32 self_exec_id;
843/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */ 843/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
844 spinlock_t alloc_lock; 844 spinlock_t alloc_lock;
845/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
846 spinlock_t proc_lock;
847 845
848#ifdef CONFIG_DEBUG_MUTEXES 846#ifdef CONFIG_DEBUG_MUTEXES
849 /* mutex deadlock detection */ 847 /* mutex deadlock detection */
@@ -856,7 +854,6 @@ struct task_struct {
856/* VM state */ 854/* VM state */
857 struct reclaim_state *reclaim_state; 855 struct reclaim_state *reclaim_state;
858 856
859 struct dentry *proc_dentry;
860 struct backing_dev_info *backing_dev_info; 857 struct backing_dev_info *backing_dev_info;
861 858
862 struct io_context *io_context; 859 struct io_context *io_context;
diff --git a/kernel/exit.c b/kernel/exit.c
index e76bd02e930e..304ef637be6c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -137,12 +137,8 @@ void release_task(struct task_struct * p)
137{ 137{
138 int zap_leader; 138 int zap_leader;
139 task_t *leader; 139 task_t *leader;
140 struct dentry *proc_dentry;
141
142repeat: 140repeat:
143 atomic_dec(&p->user->processes); 141 atomic_dec(&p->user->processes);
144 spin_lock(&p->proc_lock);
145 proc_dentry = proc_pid_unhash(p);
146 write_lock_irq(&tasklist_lock); 142 write_lock_irq(&tasklist_lock);
147 ptrace_unlink(p); 143 ptrace_unlink(p);
148 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); 144 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
@@ -171,8 +167,7 @@ repeat:
171 167
172 sched_exit(p); 168 sched_exit(p);
173 write_unlock_irq(&tasklist_lock); 169 write_unlock_irq(&tasklist_lock);
174 spin_unlock(&p->proc_lock); 170 proc_flush_task(p);
175 proc_pid_flush(proc_dentry);
176 release_thread(p); 171 release_thread(p);
177 call_rcu(&p->rcu, delayed_put_task_struct); 172 call_rcu(&p->rcu, delayed_put_task_struct);
178 173
diff --git a/kernel/fork.c b/kernel/fork.c
index dfd10cb370c3..79e91046f36e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -993,13 +993,10 @@ static task_t *copy_process(unsigned long clone_flags,
993 if (put_user(p->pid, parent_tidptr)) 993 if (put_user(p->pid, parent_tidptr))
994 goto bad_fork_cleanup; 994 goto bad_fork_cleanup;
995 995
996 p->proc_dentry = NULL;
997
998 INIT_LIST_HEAD(&p->children); 996 INIT_LIST_HEAD(&p->children);
999 INIT_LIST_HEAD(&p->sibling); 997 INIT_LIST_HEAD(&p->sibling);
1000 p->vfork_done = NULL; 998 p->vfork_done = NULL;
1001 spin_lock_init(&p->alloc_lock); 999 spin_lock_init(&p->alloc_lock);
1002 spin_lock_init(&p->proc_lock);
1003 1000
1004 clear_tsk_thread_flag(p, TIF_SIGPENDING); 1001 clear_tsk_thread_flag(p, TIF_SIGPENDING);
1005 init_sigpending(&p->pending); 1002 init_sigpending(&p->pending);