diff options
author | Matt Helsley <matthltc@us.ibm.com> | 2008-04-29 04:01:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-29 11:06:17 -0400 |
commit | 925d1c401fa6cfd0df5d2e37da8981494ccdec07 (patch) | |
tree | 4f3b7a09311cd99783b822350628125e44f9902d /fs | |
parent | e93b4ea20adb20f1f1f07f10ba5d7dd739d2843e (diff) |
procfs task exe symlink
The kernel implements readlink of /proc/pid/exe by getting the file from
the first executable VMA. Then the path to the file is reconstructed and
reported as the result.
Because of the VMA walk the code is slightly different on nommu systems.
This patch avoids separate /proc/pid/exe code on nommu systems. Instead of
walking the VMAs to find the first executable file-backed VMA we store a
reference to the exec'd file in the mm_struct.
That reference would prevent the filesystem holding the executable file
from being unmounted even after unmapping the VMAs. So we track the number
of VM_EXECUTABLE VMAs and drop the new reference when the last one is
unmapped. This avoids pinning the mounted filesystem.
[akpm@linux-foundation.org: improve comments]
[yamamoto@valinux.co.jp: fix dup_mmap]
Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: David Howells <dhowells@redhat.com>
Cc:"Eric W. Biederman" <ebiederm@xmission.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/binfmt_flat.c | 3 | ||||
-rw-r--r-- | fs/exec.c | 2 | ||||
-rw-r--r-- | fs/proc/base.c | 75 | ||||
-rw-r--r-- | fs/proc/internal.h | 1 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 34 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 34 |
6 files changed, 79 insertions, 70 deletions
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index c12cc362fd3b..3b40d45a3a16 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -531,7 +531,8 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
531 | DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); | 531 | DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); |
532 | 532 | ||
533 | down_write(¤t->mm->mmap_sem); | 533 | down_write(¤t->mm->mmap_sem); |
534 | textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE, 0); | 534 | textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, |
535 | MAP_PRIVATE|MAP_EXECUTABLE, 0); | ||
535 | up_write(¤t->mm->mmap_sem); | 536 | up_write(¤t->mm->mmap_sem); |
536 | if (!textpos || textpos >= (unsigned long) -4096) { | 537 | if (!textpos || textpos >= (unsigned long) -4096) { |
537 | if (!textpos) | 538 | if (!textpos) |
@@ -964,6 +964,8 @@ int flush_old_exec(struct linux_binprm * bprm) | |||
964 | if (retval) | 964 | if (retval) |
965 | goto out; | 965 | goto out; |
966 | 966 | ||
967 | set_mm_exe_file(bprm->mm, bprm->file); | ||
968 | |||
967 | /* | 969 | /* |
968 | * Release all of the old mmap stuff | 970 | * Release all of the old mmap stuff |
969 | */ | 971 | */ |
diff --git a/fs/proc/base.c b/fs/proc/base.c index c5e412a00b17..b48ddb119945 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1181,6 +1181,81 @@ static const struct file_operations proc_pid_sched_operations = { | |||
1181 | 1181 | ||
1182 | #endif | 1182 | #endif |
1183 | 1183 | ||
1184 | /* | ||
1185 | * We added or removed a vma mapping the executable. The vmas are only mapped | ||
1186 | * during exec and are not mapped with the mmap system call. | ||
1187 | * Callers must hold down_write() on the mm's mmap_sem for these | ||
1188 | */ | ||
1189 | void added_exe_file_vma(struct mm_struct *mm) | ||
1190 | { | ||
1191 | mm->num_exe_file_vmas++; | ||
1192 | } | ||
1193 | |||
1194 | void removed_exe_file_vma(struct mm_struct *mm) | ||
1195 | { | ||
1196 | mm->num_exe_file_vmas--; | ||
1197 | if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ | ||
1198 | fput(mm->exe_file); | ||
1199 | mm->exe_file = NULL; | ||
1200 | } | ||
1201 | |||
1202 | } | ||
1203 | |||
1204 | void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) | ||
1205 | { | ||
1206 | if (new_exe_file) | ||
1207 | get_file(new_exe_file); | ||
1208 | if (mm->exe_file) | ||
1209 | fput(mm->exe_file); | ||
1210 | mm->exe_file = new_exe_file; | ||
1211 | mm->num_exe_file_vmas = 0; | ||
1212 | } | ||
1213 | |||
1214 | struct file *get_mm_exe_file(struct mm_struct *mm) | ||
1215 | { | ||
1216 | struct file *exe_file; | ||
1217 | |||
1218 | /* We need mmap_sem to protect against races with removal of | ||
1219 | * VM_EXECUTABLE vmas */ | ||
1220 | down_read(&mm->mmap_sem); | ||
1221 | exe_file = mm->exe_file; | ||
1222 | if (exe_file) | ||
1223 | get_file(exe_file); | ||
1224 | up_read(&mm->mmap_sem); | ||
1225 | return exe_file; | ||
1226 | } | ||
1227 | |||
1228 | void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) | ||
1229 | { | ||
1230 | /* It's safe to write the exe_file pointer without exe_file_lock because | ||
1231 | * this is called during fork when the task is not yet in /proc */ | ||
1232 | newmm->exe_file = get_mm_exe_file(oldmm); | ||
1233 | } | ||
1234 | |||
1235 | static int proc_exe_link(struct inode *inode, struct path *exe_path) | ||
1236 | { | ||
1237 | struct task_struct *task; | ||
1238 | struct mm_struct *mm; | ||
1239 | struct file *exe_file; | ||
1240 | |||
1241 | task = get_proc_task(inode); | ||
1242 | if (!task) | ||
1243 | return -ENOENT; | ||
1244 | mm = get_task_mm(task); | ||
1245 | put_task_struct(task); | ||
1246 | if (!mm) | ||
1247 | return -ENOENT; | ||
1248 | exe_file = get_mm_exe_file(mm); | ||
1249 | mmput(mm); | ||
1250 | if (exe_file) { | ||
1251 | *exe_path = exe_file->f_path; | ||
1252 | path_get(&exe_file->f_path); | ||
1253 | fput(exe_file); | ||
1254 | return 0; | ||
1255 | } else | ||
1256 | return -ENOENT; | ||
1257 | } | ||
1258 | |||
1184 | static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) | 1259 | static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) |
1185 | { | 1260 | { |
1186 | struct inode *inode = dentry->d_inode; | 1261 | struct inode *inode = dentry->d_inode; |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index bc72f5c8c47d..45abb9803988 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -48,7 +48,6 @@ extern int maps_protect; | |||
48 | 48 | ||
49 | extern void create_seq_entry(char *name, mode_t mode, | 49 | extern void create_seq_entry(char *name, mode_t mode, |
50 | const struct file_operations *f); | 50 | const struct file_operations *f); |
51 | extern int proc_exe_link(struct inode *, struct path *); | ||
52 | extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, | 51 | extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, |
53 | struct pid *pid, struct task_struct *task); | 52 | struct pid *pid, struct task_struct *task); |
54 | extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, | 53 | extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7415eeb7cc3a..e2b8e769f510 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -75,40 +75,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, | |||
75 | return mm->total_vm; | 75 | return mm->total_vm; |
76 | } | 76 | } |
77 | 77 | ||
78 | int proc_exe_link(struct inode *inode, struct path *path) | ||
79 | { | ||
80 | struct vm_area_struct * vma; | ||
81 | int result = -ENOENT; | ||
82 | struct task_struct *task = get_proc_task(inode); | ||
83 | struct mm_struct * mm = NULL; | ||
84 | |||
85 | if (task) { | ||
86 | mm = get_task_mm(task); | ||
87 | put_task_struct(task); | ||
88 | } | ||
89 | if (!mm) | ||
90 | goto out; | ||
91 | down_read(&mm->mmap_sem); | ||
92 | |||
93 | vma = mm->mmap; | ||
94 | while (vma) { | ||
95 | if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) | ||
96 | break; | ||
97 | vma = vma->vm_next; | ||
98 | } | ||
99 | |||
100 | if (vma) { | ||
101 | *path = vma->vm_file->f_path; | ||
102 | path_get(&vma->vm_file->f_path); | ||
103 | result = 0; | ||
104 | } | ||
105 | |||
106 | up_read(&mm->mmap_sem); | ||
107 | mmput(mm); | ||
108 | out: | ||
109 | return result; | ||
110 | } | ||
111 | |||
112 | static void pad_len_spaces(struct seq_file *m, int len) | 78 | static void pad_len_spaces(struct seq_file *m, int len) |
113 | { | 79 | { |
114 | len = 25 + sizeof(void*) * 6 - len; | 80 | len = 25 + sizeof(void*) * 6 - len; |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 8011528518bd..4b733f108455 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -103,40 +103,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, | |||
103 | return size; | 103 | return size; |
104 | } | 104 | } |
105 | 105 | ||
106 | int proc_exe_link(struct inode *inode, struct path *path) | ||
107 | { | ||
108 | struct vm_list_struct *vml; | ||
109 | struct vm_area_struct *vma; | ||
110 | struct task_struct *task = get_proc_task(inode); | ||
111 | struct mm_struct *mm = get_task_mm(task); | ||
112 | int result = -ENOENT; | ||
113 | |||
114 | if (!mm) | ||
115 | goto out; | ||
116 | down_read(&mm->mmap_sem); | ||
117 | |||
118 | vml = mm->context.vmlist; | ||
119 | vma = NULL; | ||
120 | while (vml) { | ||
121 | if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) { | ||
122 | vma = vml->vma; | ||
123 | break; | ||
124 | } | ||
125 | vml = vml->next; | ||
126 | } | ||
127 | |||
128 | if (vma) { | ||
129 | *path = vma->vm_file->f_path; | ||
130 | path_get(&vma->vm_file->f_path); | ||
131 | result = 0; | ||
132 | } | ||
133 | |||
134 | up_read(&mm->mmap_sem); | ||
135 | mmput(mm); | ||
136 | out: | ||
137 | return result; | ||
138 | } | ||
139 | |||
140 | /* | 106 | /* |
141 | * display mapping lines for a particular process's /proc/pid/maps | 107 | * display mapping lines for a particular process's /proc/pid/maps |
142 | */ | 108 | */ |