diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2006-06-26 03:25:48 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-26 12:58:24 -0400 |
commit | 48e6484d49020dba3578ad117b461e8a391e8f0f (patch) | |
tree | 7824ca84bfe71c3fe2c09a1fedc31106fec4f500 | |
parent | 662795deb854b31501e0ffb42b7f0cce802c134a (diff) |
[PATCH] proc: Rewrite the proc dentry flush on exit optimization
To keep the dcache from filling up with dead /proc entries we flush them on
process exit. However over the years that code has gotten hairy with a
dentry_pointer and a lock in task_struct and misdocumented as a correctness
feature.
I have rewritten this code to look and see if we have a corresponding entry in
the dcache and if so flush it on process exit. This removes the extra fields
in the task_struct and allows me to trivially handle the case of a
/proc/<tgid>/task/<pid> entry as well as the current /proc/<pid> entries.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/exec.c | 10 | ||||
-rw-r--r-- | fs/proc/base.c | 134 | ||||
-rw-r--r-- | include/linux/init_task.h | 1 | ||||
-rw-r--r-- | include/linux/proc_fs.h | 6 | ||||
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | kernel/exit.c | 7 | ||||
-rw-r--r-- | kernel/fork.c | 3 |
7 files changed, 64 insertions, 100 deletions
@@ -666,8 +666,6 @@ static int de_thread(struct task_struct *tsk) | |||
666 | * and to assume its PID: | 666 | * and to assume its PID: |
667 | */ | 667 | */ |
668 | if (!thread_group_leader(current)) { | 668 | if (!thread_group_leader(current)) { |
669 | struct dentry *proc_dentry1, *proc_dentry2; | ||
670 | |||
671 | /* | 669 | /* |
672 | * Wait for the thread group leader to be a zombie. | 670 | * Wait for the thread group leader to be a zombie. |
673 | * It should already be zombie at this point, most | 671 | * It should already be zombie at this point, most |
@@ -689,10 +687,6 @@ static int de_thread(struct task_struct *tsk) | |||
689 | */ | 687 | */ |
690 | current->start_time = leader->start_time; | 688 | current->start_time = leader->start_time; |
691 | 689 | ||
692 | spin_lock(&leader->proc_lock); | ||
693 | spin_lock(¤t->proc_lock); | ||
694 | proc_dentry1 = proc_pid_unhash(current); | ||
695 | proc_dentry2 = proc_pid_unhash(leader); | ||
696 | write_lock_irq(&tasklist_lock); | 690 | write_lock_irq(&tasklist_lock); |
697 | 691 | ||
698 | BUG_ON(leader->tgid != current->tgid); | 692 | BUG_ON(leader->tgid != current->tgid); |
@@ -729,10 +723,6 @@ static int de_thread(struct task_struct *tsk) | |||
729 | leader->exit_state = EXIT_DEAD; | 723 | leader->exit_state = EXIT_DEAD; |
730 | 724 | ||
731 | write_unlock_irq(&tasklist_lock); | 725 | write_unlock_irq(&tasklist_lock); |
732 | spin_unlock(&leader->proc_lock); | ||
733 | spin_unlock(¤t->proc_lock); | ||
734 | proc_pid_flush(proc_dentry1); | ||
735 | proc_pid_flush(proc_dentry2); | ||
736 | } | 726 | } |
737 | 727 | ||
738 | /* | 728 | /* |
diff --git a/fs/proc/base.c b/fs/proc/base.c index c8636841bbcf..f435932e6432 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1352,16 +1352,6 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1352 | return 0; | 1352 | return 0; |
1353 | } | 1353 | } |
1354 | 1354 | ||
1355 | static void pid_base_iput(struct dentry *dentry, struct inode *inode) | ||
1356 | { | ||
1357 | struct task_struct *task = proc_task(inode); | ||
1358 | spin_lock(&task->proc_lock); | ||
1359 | if (task->proc_dentry == dentry) | ||
1360 | task->proc_dentry = NULL; | ||
1361 | spin_unlock(&task->proc_lock); | ||
1362 | iput(inode); | ||
1363 | } | ||
1364 | |||
1365 | static int pid_delete_dentry(struct dentry * dentry) | 1355 | static int pid_delete_dentry(struct dentry * dentry) |
1366 | { | 1356 | { |
1367 | /* Is the task we represent dead? | 1357 | /* Is the task we represent dead? |
@@ -1383,13 +1373,6 @@ static struct dentry_operations pid_dentry_operations = | |||
1383 | .d_delete = pid_delete_dentry, | 1373 | .d_delete = pid_delete_dentry, |
1384 | }; | 1374 | }; |
1385 | 1375 | ||
1386 | static struct dentry_operations pid_base_dentry_operations = | ||
1387 | { | ||
1388 | .d_revalidate = pid_revalidate, | ||
1389 | .d_iput = pid_base_iput, | ||
1390 | .d_delete = pid_delete_dentry, | ||
1391 | }; | ||
1392 | |||
1393 | /* Lookups */ | 1376 | /* Lookups */ |
1394 | 1377 | ||
1395 | static unsigned name_to_int(struct dentry *dentry) | 1378 | static unsigned name_to_int(struct dentry *dentry) |
@@ -1859,57 +1842,70 @@ static struct inode_operations proc_self_inode_operations = { | |||
1859 | }; | 1842 | }; |
1860 | 1843 | ||
1861 | /** | 1844 | /** |
1862 | * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. | 1845 | * proc_flush_task - Remove dcache entries for @task from the /proc dcache. |
1863 | * @p: task that should be flushed. | 1846 | * |
1847 | * @task: task that should be flushed. | ||
1848 | * | ||
1849 | * Looks in the dcache for | ||
1850 | * /proc/@pid | ||
1851 | * /proc/@tgid/task/@pid | ||
1852 | * if either directory is present flushes it and all of it'ts children | ||
1853 | * from the dcache. | ||
1864 | * | 1854 | * |
1865 | * Drops the /proc/@pid dcache entry from the hash chains. | 1855 | * It is safe and reasonable to cache /proc entries for a task until |
1856 | * that task exits. After that they just clog up the dcache with | ||
1857 | * useless entries, possibly causing useful dcache entries to be | ||
1858 | * flushed instead. This routine is proved to flush those useless | ||
1859 | * dcache entries at process exit time. | ||
1866 | * | 1860 | * |
1867 | * Dropping /proc/@pid entries and detach_pid must be synchroneous, | 1861 | * NOTE: This routine is just an optimization so it does not guarantee |
1868 | * otherwise e.g. /proc/@pid/exe might point to the wrong executable, | 1862 | * that no dcache entries will exist at process exit time it |
1869 | * if the pid value is immediately reused. This is enforced by | 1863 | * just makes it very unlikely that any will persist. |
1870 | * - caller must acquire spin_lock(p->proc_lock) | ||
1871 | * - must be called before detach_pid() | ||
1872 | * - proc_pid_lookup acquires proc_lock, and checks that | ||
1873 | * the target is not dead by looking at the attach count | ||
1874 | * of PIDTYPE_PID. | ||
1875 | */ | 1864 | */ |
1876 | 1865 | void proc_flush_task(struct task_struct *task) | |
1877 | struct dentry *proc_pid_unhash(struct task_struct *p) | ||
1878 | { | 1866 | { |
1879 | struct dentry *proc_dentry; | 1867 | struct dentry *dentry, *leader, *dir; |
1868 | char buf[30]; | ||
1869 | struct qstr name; | ||
1870 | |||
1871 | name.name = buf; | ||
1872 | name.len = snprintf(buf, sizeof(buf), "%d", task->pid); | ||
1873 | dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name); | ||
1874 | if (dentry) { | ||
1875 | shrink_dcache_parent(dentry); | ||
1876 | d_drop(dentry); | ||
1877 | dput(dentry); | ||
1878 | } | ||
1880 | 1879 | ||
1881 | proc_dentry = p->proc_dentry; | 1880 | if (thread_group_leader(task)) |
1882 | if (proc_dentry != NULL) { | 1881 | goto out; |
1883 | 1882 | ||
1884 | spin_lock(&dcache_lock); | 1883 | name.name = buf; |
1885 | spin_lock(&proc_dentry->d_lock); | 1884 | name.len = snprintf(buf, sizeof(buf), "%d", task->tgid); |
1886 | if (!d_unhashed(proc_dentry)) { | 1885 | leader = d_hash_and_lookup(proc_mnt->mnt_root, &name); |
1887 | dget_locked(proc_dentry); | 1886 | if (!leader) |
1888 | __d_drop(proc_dentry); | 1887 | goto out; |
1889 | spin_unlock(&proc_dentry->d_lock); | ||
1890 | } else { | ||
1891 | spin_unlock(&proc_dentry->d_lock); | ||
1892 | proc_dentry = NULL; | ||
1893 | } | ||
1894 | spin_unlock(&dcache_lock); | ||
1895 | } | ||
1896 | return proc_dentry; | ||
1897 | } | ||
1898 | 1888 | ||
1899 | /** | 1889 | name.name = "task"; |
1900 | * proc_pid_flush - recover memory used by stale /proc/@pid/x entries | 1890 | name.len = strlen(name.name); |
1901 | * @proc_dentry: directoy to prune. | 1891 | dir = d_hash_and_lookup(leader, &name); |
1902 | * | 1892 | if (!dir) |
1903 | * Shrink the /proc directory that was used by the just killed thread. | 1893 | goto out_put_leader; |
1904 | */ | 1894 | |
1905 | 1895 | name.name = buf; | |
1906 | void proc_pid_flush(struct dentry *proc_dentry) | 1896 | name.len = snprintf(buf, sizeof(buf), "%d", task->pid); |
1907 | { | 1897 | dentry = d_hash_and_lookup(dir, &name); |
1908 | might_sleep(); | 1898 | if (dentry) { |
1909 | if(proc_dentry != NULL) { | 1899 | shrink_dcache_parent(dentry); |
1910 | shrink_dcache_parent(proc_dentry); | 1900 | d_drop(dentry); |
1911 | dput(proc_dentry); | 1901 | dput(dentry); |
1912 | } | 1902 | } |
1903 | |||
1904 | dput(dir); | ||
1905 | out_put_leader: | ||
1906 | dput(leader); | ||
1907 | out: | ||
1908 | return; | ||
1913 | } | 1909 | } |
1914 | 1910 | ||
1915 | /* SMP-safe */ | 1911 | /* SMP-safe */ |
@@ -1919,7 +1915,6 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct | |||
1919 | struct inode *inode; | 1915 | struct inode *inode; |
1920 | struct proc_inode *ei; | 1916 | struct proc_inode *ei; |
1921 | unsigned tgid; | 1917 | unsigned tgid; |
1922 | int died; | ||
1923 | 1918 | ||
1924 | if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { | 1919 | if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { |
1925 | inode = new_inode(dir->i_sb); | 1920 | inode = new_inode(dir->i_sb); |
@@ -1965,23 +1960,16 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct | |||
1965 | inode->i_nlink = 4; | 1960 | inode->i_nlink = 4; |
1966 | #endif | 1961 | #endif |
1967 | 1962 | ||
1968 | dentry->d_op = &pid_base_dentry_operations; | 1963 | dentry->d_op = &pid_dentry_operations; |
1969 | 1964 | ||
1970 | died = 0; | ||
1971 | d_add(dentry, inode); | 1965 | d_add(dentry, inode); |
1972 | spin_lock(&task->proc_lock); | ||
1973 | task->proc_dentry = dentry; | ||
1974 | if (!pid_alive(task)) { | 1966 | if (!pid_alive(task)) { |
1975 | dentry = proc_pid_unhash(task); | 1967 | d_drop(dentry); |
1976 | died = 1; | 1968 | shrink_dcache_parent(dentry); |
1969 | goto out; | ||
1977 | } | 1970 | } |
1978 | spin_unlock(&task->proc_lock); | ||
1979 | 1971 | ||
1980 | put_task_struct(task); | 1972 | put_task_struct(task); |
1981 | if (died) { | ||
1982 | proc_pid_flush(dentry); | ||
1983 | goto out; | ||
1984 | } | ||
1985 | return NULL; | 1973 | return NULL; |
1986 | out: | 1974 | out: |
1987 | return ERR_PTR(-ENOENT); | 1975 | return ERR_PTR(-ENOENT); |
@@ -2024,7 +2012,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry | |||
2024 | inode->i_nlink = 3; | 2012 | inode->i_nlink = 3; |
2025 | #endif | 2013 | #endif |
2026 | 2014 | ||
2027 | dentry->d_op = &pid_base_dentry_operations; | 2015 | dentry->d_op = &pid_dentry_operations; |
2028 | 2016 | ||
2029 | d_add(dentry, inode); | 2017 | d_add(dentry, inode); |
2030 | 2018 | ||
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 41ecbb847f32..e127ef7e8da8 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -119,7 +119,6 @@ extern struct group_info init_groups; | |||
119 | .signal = {{0}}}, \ | 119 | .signal = {{0}}}, \ |
120 | .blocked = {{0}}, \ | 120 | .blocked = {{0}}, \ |
121 | .alloc_lock = SPIN_LOCK_UNLOCKED, \ | 121 | .alloc_lock = SPIN_LOCK_UNLOCKED, \ |
122 | .proc_lock = SPIN_LOCK_UNLOCKED, \ | ||
123 | .journal_info = NULL, \ | 122 | .journal_info = NULL, \ |
124 | .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ | 123 | .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ |
125 | .fs_excl = ATOMIC_INIT(0), \ | 124 | .fs_excl = ATOMIC_INIT(0), \ |
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 9dd84884abb1..d4d2081dbaf7 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h | |||
@@ -99,9 +99,8 @@ extern void proc_misc_init(void); | |||
99 | 99 | ||
100 | struct mm_struct; | 100 | struct mm_struct; |
101 | 101 | ||
102 | void proc_flush_task(struct task_struct *task); | ||
102 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); | 103 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); |
103 | struct dentry *proc_pid_unhash(struct task_struct *p); | ||
104 | void proc_pid_flush(struct dentry *proc_dentry); | ||
105 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); | 104 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); |
106 | unsigned long task_vsize(struct mm_struct *); | 105 | unsigned long task_vsize(struct mm_struct *); |
107 | int task_statm(struct mm_struct *, int *, int *, int *, int *); | 106 | int task_statm(struct mm_struct *, int *, int *, int *, int *); |
@@ -211,8 +210,7 @@ static inline void proc_net_remove(const char *name) | |||
211 | #define proc_net_create(name, mode, info) ({ (void)(mode), NULL; }) | 210 | #define proc_net_create(name, mode, info) ({ (void)(mode), NULL; }) |
212 | static inline void proc_net_remove(const char *name) {} | 211 | static inline void proc_net_remove(const char *name) {} |
213 | 212 | ||
214 | static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; } | 213 | static inline void proc_flush_task(struct task_struct *task) { } |
215 | static inline void proc_pid_flush(struct dentry *proc_dentry) { } | ||
216 | 214 | ||
217 | static inline struct proc_dir_entry *create_proc_entry(const char *name, | 215 | static inline struct proc_dir_entry *create_proc_entry(const char *name, |
218 | mode_t mode, struct proc_dir_entry *parent) { return NULL; } | 216 | mode_t mode, struct proc_dir_entry *parent) { return NULL; } |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 8d11d9310db0..122a25c1b997 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -842,8 +842,6 @@ struct task_struct { | |||
842 | u32 self_exec_id; | 842 | u32 self_exec_id; |
843 | /* Protection of (de-)allocation: mm, files, fs, tty, keyrings */ | 843 | /* Protection of (de-)allocation: mm, files, fs, tty, keyrings */ |
844 | spinlock_t alloc_lock; | 844 | spinlock_t alloc_lock; |
845 | /* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */ | ||
846 | spinlock_t proc_lock; | ||
847 | 845 | ||
848 | #ifdef CONFIG_DEBUG_MUTEXES | 846 | #ifdef CONFIG_DEBUG_MUTEXES |
849 | /* mutex deadlock detection */ | 847 | /* mutex deadlock detection */ |
@@ -856,7 +854,6 @@ struct task_struct { | |||
856 | /* VM state */ | 854 | /* VM state */ |
857 | struct reclaim_state *reclaim_state; | 855 | struct reclaim_state *reclaim_state; |
858 | 856 | ||
859 | struct dentry *proc_dentry; | ||
860 | struct backing_dev_info *backing_dev_info; | 857 | struct backing_dev_info *backing_dev_info; |
861 | 858 | ||
862 | struct io_context *io_context; | 859 | struct io_context *io_context; |
diff --git a/kernel/exit.c b/kernel/exit.c index e76bd02e930e..304ef637be6c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -137,12 +137,8 @@ void release_task(struct task_struct * p) | |||
137 | { | 137 | { |
138 | int zap_leader; | 138 | int zap_leader; |
139 | task_t *leader; | 139 | task_t *leader; |
140 | struct dentry *proc_dentry; | ||
141 | |||
142 | repeat: | 140 | repeat: |
143 | atomic_dec(&p->user->processes); | 141 | atomic_dec(&p->user->processes); |
144 | spin_lock(&p->proc_lock); | ||
145 | proc_dentry = proc_pid_unhash(p); | ||
146 | write_lock_irq(&tasklist_lock); | 142 | write_lock_irq(&tasklist_lock); |
147 | ptrace_unlink(p); | 143 | ptrace_unlink(p); |
148 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); | 144 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); |
@@ -171,8 +167,7 @@ repeat: | |||
171 | 167 | ||
172 | sched_exit(p); | 168 | sched_exit(p); |
173 | write_unlock_irq(&tasklist_lock); | 169 | write_unlock_irq(&tasklist_lock); |
174 | spin_unlock(&p->proc_lock); | 170 | proc_flush_task(p); |
175 | proc_pid_flush(proc_dentry); | ||
176 | release_thread(p); | 171 | release_thread(p); |
177 | call_rcu(&p->rcu, delayed_put_task_struct); | 172 | call_rcu(&p->rcu, delayed_put_task_struct); |
178 | 173 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index dfd10cb370c3..79e91046f36e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -993,13 +993,10 @@ static task_t *copy_process(unsigned long clone_flags, | |||
993 | if (put_user(p->pid, parent_tidptr)) | 993 | if (put_user(p->pid, parent_tidptr)) |
994 | goto bad_fork_cleanup; | 994 | goto bad_fork_cleanup; |
995 | 995 | ||
996 | p->proc_dentry = NULL; | ||
997 | |||
998 | INIT_LIST_HEAD(&p->children); | 996 | INIT_LIST_HEAD(&p->children); |
999 | INIT_LIST_HEAD(&p->sibling); | 997 | INIT_LIST_HEAD(&p->sibling); |
1000 | p->vfork_done = NULL; | 998 | p->vfork_done = NULL; |
1001 | spin_lock_init(&p->alloc_lock); | 999 | spin_lock_init(&p->alloc_lock); |
1002 | spin_lock_init(&p->proc_lock); | ||
1003 | 1000 | ||
1004 | clear_tsk_thread_flag(p, TIF_SIGPENDING); | 1001 | clear_tsk_thread_flag(p, TIF_SIGPENDING); |
1005 | init_sigpending(&p->pending); | 1002 | init_sigpending(&p->pending); |