diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/proc/base.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/proc/base.c')
-rw-r--r-- | fs/proc/base.c | 2056 |
1 files changed, 2056 insertions, 0 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c new file mode 100644 index 000000000000..dad8ea4e00a0 --- /dev/null +++ b/fs/proc/base.c | |||
@@ -0,0 +1,2056 @@ | |||
1 | /* | ||
2 | * linux/fs/proc/base.c | ||
3 | * | ||
4 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
5 | * | ||
6 | * proc base directory handling functions | ||
7 | * | ||
8 | * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. | ||
9 | * Instead of using magical inumbers to determine the kind of object | ||
10 | * we allocate and fill in-core inodes upon lookup. They don't even | ||
11 | * go into icache. We cache the reference to task_struct upon lookup too. | ||
12 | * Eventually it should become a filesystem in its own. We don't use the | ||
13 | * rest of procfs anymore. | ||
14 | */ | ||
15 | |||
16 | #include <asm/uaccess.h> | ||
17 | |||
18 | #include <linux/config.h> | ||
19 | #include <linux/errno.h> | ||
20 | #include <linux/time.h> | ||
21 | #include <linux/proc_fs.h> | ||
22 | #include <linux/stat.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/file.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/seq_file.h> | ||
27 | #include <linux/namei.h> | ||
28 | #include <linux/namespace.h> | ||
29 | #include <linux/mm.h> | ||
30 | #include <linux/smp_lock.h> | ||
31 | #include <linux/kallsyms.h> | ||
32 | #include <linux/mount.h> | ||
33 | #include <linux/security.h> | ||
34 | #include <linux/ptrace.h> | ||
35 | #include <linux/seccomp.h> | ||
36 | #include <linux/cpuset.h> | ||
37 | #include <linux/audit.h> | ||
38 | #include "internal.h" | ||
39 | |||
40 | /* | ||
41 | * For hysterical raisins we keep the same inumbers as in the old procfs. | ||
42 | * Feel free to change the macro below - just keep the range distinct from | ||
43 | * inumbers of the rest of procfs (currently those are in 0x0000--0xffff). | ||
44 | * As soon as we'll get a separate superblock we will be able to forget | ||
45 | * about magical ranges too. | ||
46 | */ | ||
47 | |||
48 | #define fake_ino(pid,ino) (((pid)<<16)|(ino)) | ||
49 | |||
50 | enum pid_directory_inos { | ||
51 | PROC_TGID_INO = 2, | ||
52 | PROC_TGID_TASK, | ||
53 | PROC_TGID_STATUS, | ||
54 | PROC_TGID_MEM, | ||
55 | #ifdef CONFIG_SECCOMP | ||
56 | PROC_TGID_SECCOMP, | ||
57 | #endif | ||
58 | PROC_TGID_CWD, | ||
59 | PROC_TGID_ROOT, | ||
60 | PROC_TGID_EXE, | ||
61 | PROC_TGID_FD, | ||
62 | PROC_TGID_ENVIRON, | ||
63 | PROC_TGID_AUXV, | ||
64 | PROC_TGID_CMDLINE, | ||
65 | PROC_TGID_STAT, | ||
66 | PROC_TGID_STATM, | ||
67 | PROC_TGID_MAPS, | ||
68 | PROC_TGID_MOUNTS, | ||
69 | PROC_TGID_WCHAN, | ||
70 | #ifdef CONFIG_SCHEDSTATS | ||
71 | PROC_TGID_SCHEDSTAT, | ||
72 | #endif | ||
73 | #ifdef CONFIG_CPUSETS | ||
74 | PROC_TGID_CPUSET, | ||
75 | #endif | ||
76 | #ifdef CONFIG_SECURITY | ||
77 | PROC_TGID_ATTR, | ||
78 | PROC_TGID_ATTR_CURRENT, | ||
79 | PROC_TGID_ATTR_PREV, | ||
80 | PROC_TGID_ATTR_EXEC, | ||
81 | PROC_TGID_ATTR_FSCREATE, | ||
82 | #endif | ||
83 | #ifdef CONFIG_AUDITSYSCALL | ||
84 | PROC_TGID_LOGINUID, | ||
85 | #endif | ||
86 | PROC_TGID_FD_DIR, | ||
87 | PROC_TGID_OOM_SCORE, | ||
88 | PROC_TGID_OOM_ADJUST, | ||
89 | PROC_TID_INO, | ||
90 | PROC_TID_STATUS, | ||
91 | PROC_TID_MEM, | ||
92 | #ifdef CONFIG_SECCOMP | ||
93 | PROC_TID_SECCOMP, | ||
94 | #endif | ||
95 | PROC_TID_CWD, | ||
96 | PROC_TID_ROOT, | ||
97 | PROC_TID_EXE, | ||
98 | PROC_TID_FD, | ||
99 | PROC_TID_ENVIRON, | ||
100 | PROC_TID_AUXV, | ||
101 | PROC_TID_CMDLINE, | ||
102 | PROC_TID_STAT, | ||
103 | PROC_TID_STATM, | ||
104 | PROC_TID_MAPS, | ||
105 | PROC_TID_MOUNTS, | ||
106 | PROC_TID_WCHAN, | ||
107 | #ifdef CONFIG_SCHEDSTATS | ||
108 | PROC_TID_SCHEDSTAT, | ||
109 | #endif | ||
110 | #ifdef CONFIG_CPUSETS | ||
111 | PROC_TID_CPUSET, | ||
112 | #endif | ||
113 | #ifdef CONFIG_SECURITY | ||
114 | PROC_TID_ATTR, | ||
115 | PROC_TID_ATTR_CURRENT, | ||
116 | PROC_TID_ATTR_PREV, | ||
117 | PROC_TID_ATTR_EXEC, | ||
118 | PROC_TID_ATTR_FSCREATE, | ||
119 | #endif | ||
120 | #ifdef CONFIG_AUDITSYSCALL | ||
121 | PROC_TID_LOGINUID, | ||
122 | #endif | ||
123 | PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ | ||
124 | PROC_TID_OOM_SCORE, | ||
125 | PROC_TID_OOM_ADJUST, | ||
126 | }; | ||
127 | |||
128 | struct pid_entry { | ||
129 | int type; | ||
130 | int len; | ||
131 | char *name; | ||
132 | mode_t mode; | ||
133 | }; | ||
134 | |||
135 | #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} | ||
136 | |||
137 | static struct pid_entry tgid_base_stuff[] = { | ||
138 | E(PROC_TGID_TASK, "task", S_IFDIR|S_IRUGO|S_IXUGO), | ||
139 | E(PROC_TGID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), | ||
140 | E(PROC_TGID_ENVIRON, "environ", S_IFREG|S_IRUSR), | ||
141 | E(PROC_TGID_AUXV, "auxv", S_IFREG|S_IRUSR), | ||
142 | E(PROC_TGID_STATUS, "status", S_IFREG|S_IRUGO), | ||
143 | E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), | ||
144 | E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), | ||
145 | E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), | ||
146 | E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), | ||
147 | E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), | ||
148 | #ifdef CONFIG_SECCOMP | ||
149 | E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), | ||
150 | #endif | ||
151 | E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO), | ||
152 | E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), | ||
153 | E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), | ||
154 | E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), | ||
155 | #ifdef CONFIG_SECURITY | ||
156 | E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), | ||
157 | #endif | ||
158 | #ifdef CONFIG_KALLSYMS | ||
159 | E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), | ||
160 | #endif | ||
161 | #ifdef CONFIG_SCHEDSTATS | ||
162 | E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO), | ||
163 | #endif | ||
164 | #ifdef CONFIG_CPUSETS | ||
165 | E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO), | ||
166 | #endif | ||
167 | E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), | ||
168 | E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR), | ||
169 | #ifdef CONFIG_AUDITSYSCALL | ||
170 | E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), | ||
171 | #endif | ||
172 | {0,0,NULL,0} | ||
173 | }; | ||
174 | static struct pid_entry tid_base_stuff[] = { | ||
175 | E(PROC_TID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), | ||
176 | E(PROC_TID_ENVIRON, "environ", S_IFREG|S_IRUSR), | ||
177 | E(PROC_TID_AUXV, "auxv", S_IFREG|S_IRUSR), | ||
178 | E(PROC_TID_STATUS, "status", S_IFREG|S_IRUGO), | ||
179 | E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), | ||
180 | E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), | ||
181 | E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), | ||
182 | E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), | ||
183 | E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), | ||
184 | #ifdef CONFIG_SECCOMP | ||
185 | E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), | ||
186 | #endif | ||
187 | E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO), | ||
188 | E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), | ||
189 | E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), | ||
190 | E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), | ||
191 | #ifdef CONFIG_SECURITY | ||
192 | E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), | ||
193 | #endif | ||
194 | #ifdef CONFIG_KALLSYMS | ||
195 | E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO), | ||
196 | #endif | ||
197 | #ifdef CONFIG_SCHEDSTATS | ||
198 | E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO), | ||
199 | #endif | ||
200 | #ifdef CONFIG_CPUSETS | ||
201 | E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO), | ||
202 | #endif | ||
203 | E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), | ||
204 | E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR), | ||
205 | #ifdef CONFIG_AUDITSYSCALL | ||
206 | E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), | ||
207 | #endif | ||
208 | {0,0,NULL,0} | ||
209 | }; | ||
210 | |||
211 | #ifdef CONFIG_SECURITY | ||
212 | static struct pid_entry tgid_attr_stuff[] = { | ||
213 | E(PROC_TGID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), | ||
214 | E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), | ||
215 | E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), | ||
216 | E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
217 | {0,0,NULL,0} | ||
218 | }; | ||
219 | static struct pid_entry tid_attr_stuff[] = { | ||
220 | E(PROC_TID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), | ||
221 | E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), | ||
222 | E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), | ||
223 | E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
224 | {0,0,NULL,0} | ||
225 | }; | ||
226 | #endif | ||
227 | |||
228 | #undef E | ||
229 | |||
230 | static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | ||
231 | { | ||
232 | struct task_struct *task = proc_task(inode); | ||
233 | struct files_struct *files; | ||
234 | struct file *file; | ||
235 | int fd = proc_type(inode) - PROC_TID_FD_DIR; | ||
236 | |||
237 | files = get_files_struct(task); | ||
238 | if (files) { | ||
239 | spin_lock(&files->file_lock); | ||
240 | file = fcheck_files(files, fd); | ||
241 | if (file) { | ||
242 | *mnt = mntget(file->f_vfsmnt); | ||
243 | *dentry = dget(file->f_dentry); | ||
244 | spin_unlock(&files->file_lock); | ||
245 | put_files_struct(files); | ||
246 | return 0; | ||
247 | } | ||
248 | spin_unlock(&files->file_lock); | ||
249 | put_files_struct(files); | ||
250 | } | ||
251 | return -ENOENT; | ||
252 | } | ||
253 | |||
254 | static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | ||
255 | { | ||
256 | struct fs_struct *fs; | ||
257 | int result = -ENOENT; | ||
258 | task_lock(proc_task(inode)); | ||
259 | fs = proc_task(inode)->fs; | ||
260 | if(fs) | ||
261 | atomic_inc(&fs->count); | ||
262 | task_unlock(proc_task(inode)); | ||
263 | if (fs) { | ||
264 | read_lock(&fs->lock); | ||
265 | *mnt = mntget(fs->pwdmnt); | ||
266 | *dentry = dget(fs->pwd); | ||
267 | read_unlock(&fs->lock); | ||
268 | result = 0; | ||
269 | put_fs_struct(fs); | ||
270 | } | ||
271 | return result; | ||
272 | } | ||
273 | |||
274 | static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | ||
275 | { | ||
276 | struct fs_struct *fs; | ||
277 | int result = -ENOENT; | ||
278 | task_lock(proc_task(inode)); | ||
279 | fs = proc_task(inode)->fs; | ||
280 | if(fs) | ||
281 | atomic_inc(&fs->count); | ||
282 | task_unlock(proc_task(inode)); | ||
283 | if (fs) { | ||
284 | read_lock(&fs->lock); | ||
285 | *mnt = mntget(fs->rootmnt); | ||
286 | *dentry = dget(fs->root); | ||
287 | read_unlock(&fs->lock); | ||
288 | result = 0; | ||
289 | put_fs_struct(fs); | ||
290 | } | ||
291 | return result; | ||
292 | } | ||
293 | |||
294 | #define MAY_PTRACE(task) \ | ||
295 | (task == current || \ | ||
296 | (task->parent == current && \ | ||
297 | (task->ptrace & PT_PTRACED) && \ | ||
298 | (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ | ||
299 | security_ptrace(current,task) == 0)) | ||
300 | |||
301 | static int may_ptrace_attach(struct task_struct *task) | ||
302 | { | ||
303 | int retval = 0; | ||
304 | |||
305 | task_lock(task); | ||
306 | |||
307 | if (!task->mm) | ||
308 | goto out; | ||
309 | if (((current->uid != task->euid) || | ||
310 | (current->uid != task->suid) || | ||
311 | (current->uid != task->uid) || | ||
312 | (current->gid != task->egid) || | ||
313 | (current->gid != task->sgid) || | ||
314 | (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) | ||
315 | goto out; | ||
316 | rmb(); | ||
317 | if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) | ||
318 | goto out; | ||
319 | if (security_ptrace(current, task)) | ||
320 | goto out; | ||
321 | |||
322 | retval = 1; | ||
323 | out: | ||
324 | task_unlock(task); | ||
325 | return retval; | ||
326 | } | ||
327 | |||
328 | static int proc_pid_environ(struct task_struct *task, char * buffer) | ||
329 | { | ||
330 | int res = 0; | ||
331 | struct mm_struct *mm = get_task_mm(task); | ||
332 | if (mm) { | ||
333 | unsigned int len = mm->env_end - mm->env_start; | ||
334 | if (len > PAGE_SIZE) | ||
335 | len = PAGE_SIZE; | ||
336 | res = access_process_vm(task, mm->env_start, buffer, len, 0); | ||
337 | if (!may_ptrace_attach(task)) | ||
338 | res = -ESRCH; | ||
339 | mmput(mm); | ||
340 | } | ||
341 | return res; | ||
342 | } | ||
343 | |||
344 | static int proc_pid_cmdline(struct task_struct *task, char * buffer) | ||
345 | { | ||
346 | int res = 0; | ||
347 | unsigned int len; | ||
348 | struct mm_struct *mm = get_task_mm(task); | ||
349 | if (!mm) | ||
350 | goto out; | ||
351 | if (!mm->arg_end) | ||
352 | goto out_mm; /* Shh! No looking before we're done */ | ||
353 | |||
354 | len = mm->arg_end - mm->arg_start; | ||
355 | |||
356 | if (len > PAGE_SIZE) | ||
357 | len = PAGE_SIZE; | ||
358 | |||
359 | res = access_process_vm(task, mm->arg_start, buffer, len, 0); | ||
360 | |||
361 | // If the nul at the end of args has been overwritten, then | ||
362 | // assume application is using setproctitle(3). | ||
363 | if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { | ||
364 | len = strnlen(buffer, res); | ||
365 | if (len < res) { | ||
366 | res = len; | ||
367 | } else { | ||
368 | len = mm->env_end - mm->env_start; | ||
369 | if (len > PAGE_SIZE - res) | ||
370 | len = PAGE_SIZE - res; | ||
371 | res += access_process_vm(task, mm->env_start, buffer+res, len, 0); | ||
372 | res = strnlen(buffer, res); | ||
373 | } | ||
374 | } | ||
375 | out_mm: | ||
376 | mmput(mm); | ||
377 | out: | ||
378 | return res; | ||
379 | } | ||
380 | |||
381 | static int proc_pid_auxv(struct task_struct *task, char *buffer) | ||
382 | { | ||
383 | int res = 0; | ||
384 | struct mm_struct *mm = get_task_mm(task); | ||
385 | if (mm) { | ||
386 | unsigned int nwords = 0; | ||
387 | do | ||
388 | nwords += 2; | ||
389 | while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ | ||
390 | res = nwords * sizeof(mm->saved_auxv[0]); | ||
391 | if (res > PAGE_SIZE) | ||
392 | res = PAGE_SIZE; | ||
393 | memcpy(buffer, mm->saved_auxv, res); | ||
394 | mmput(mm); | ||
395 | } | ||
396 | return res; | ||
397 | } | ||
398 | |||
399 | |||
400 | #ifdef CONFIG_KALLSYMS | ||
401 | /* | ||
402 | * Provides a wchan file via kallsyms in a proper one-value-per-file format. | ||
403 | * Returns the resolved symbol. If that fails, simply return the address. | ||
404 | */ | ||
405 | static int proc_pid_wchan(struct task_struct *task, char *buffer) | ||
406 | { | ||
407 | char *modname; | ||
408 | const char *sym_name; | ||
409 | unsigned long wchan, size, offset; | ||
410 | char namebuf[KSYM_NAME_LEN+1]; | ||
411 | |||
412 | wchan = get_wchan(task); | ||
413 | |||
414 | sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf); | ||
415 | if (sym_name) | ||
416 | return sprintf(buffer, "%s", sym_name); | ||
417 | return sprintf(buffer, "%lu", wchan); | ||
418 | } | ||
419 | #endif /* CONFIG_KALLSYMS */ | ||
420 | |||
421 | #ifdef CONFIG_SCHEDSTATS | ||
422 | /* | ||
423 | * Provides /proc/PID/schedstat | ||
424 | */ | ||
425 | static int proc_pid_schedstat(struct task_struct *task, char *buffer) | ||
426 | { | ||
427 | return sprintf(buffer, "%lu %lu %lu\n", | ||
428 | task->sched_info.cpu_time, | ||
429 | task->sched_info.run_delay, | ||
430 | task->sched_info.pcnt); | ||
431 | } | ||
432 | #endif | ||
433 | |||
434 | /* The badness from the OOM killer */ | ||
435 | unsigned long badness(struct task_struct *p, unsigned long uptime); | ||
436 | static int proc_oom_score(struct task_struct *task, char *buffer) | ||
437 | { | ||
438 | unsigned long points; | ||
439 | struct timespec uptime; | ||
440 | |||
441 | do_posix_clock_monotonic_gettime(&uptime); | ||
442 | points = badness(task, uptime.tv_sec); | ||
443 | return sprintf(buffer, "%lu\n", points); | ||
444 | } | ||
445 | |||
446 | /************************************************************************/ | ||
447 | /* Here the fs part begins */ | ||
448 | /************************************************************************/ | ||
449 | |||
450 | /* permission checks */ | ||
451 | |||
452 | static int proc_check_root(struct inode *inode) | ||
453 | { | ||
454 | struct dentry *de, *base, *root; | ||
455 | struct vfsmount *our_vfsmnt, *vfsmnt, *mnt; | ||
456 | int res = 0; | ||
457 | |||
458 | if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ | ||
459 | return -ENOENT; | ||
460 | read_lock(¤t->fs->lock); | ||
461 | our_vfsmnt = mntget(current->fs->rootmnt); | ||
462 | base = dget(current->fs->root); | ||
463 | read_unlock(¤t->fs->lock); | ||
464 | |||
465 | spin_lock(&vfsmount_lock); | ||
466 | de = root; | ||
467 | mnt = vfsmnt; | ||
468 | |||
469 | while (vfsmnt != our_vfsmnt) { | ||
470 | if (vfsmnt == vfsmnt->mnt_parent) | ||
471 | goto out; | ||
472 | de = vfsmnt->mnt_mountpoint; | ||
473 | vfsmnt = vfsmnt->mnt_parent; | ||
474 | } | ||
475 | |||
476 | if (!is_subdir(de, base)) | ||
477 | goto out; | ||
478 | spin_unlock(&vfsmount_lock); | ||
479 | |||
480 | exit: | ||
481 | dput(base); | ||
482 | mntput(our_vfsmnt); | ||
483 | dput(root); | ||
484 | mntput(mnt); | ||
485 | return res; | ||
486 | out: | ||
487 | spin_unlock(&vfsmount_lock); | ||
488 | res = -EACCES; | ||
489 | goto exit; | ||
490 | } | ||
491 | |||
492 | static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) | ||
493 | { | ||
494 | if (generic_permission(inode, mask, NULL) != 0) | ||
495 | return -EACCES; | ||
496 | return proc_check_root(inode); | ||
497 | } | ||
498 | |||
499 | extern struct seq_operations proc_pid_maps_op; | ||
500 | static int maps_open(struct inode *inode, struct file *file) | ||
501 | { | ||
502 | struct task_struct *task = proc_task(inode); | ||
503 | int ret = seq_open(file, &proc_pid_maps_op); | ||
504 | if (!ret) { | ||
505 | struct seq_file *m = file->private_data; | ||
506 | m->private = task; | ||
507 | } | ||
508 | return ret; | ||
509 | } | ||
510 | |||
511 | static struct file_operations proc_maps_operations = { | ||
512 | .open = maps_open, | ||
513 | .read = seq_read, | ||
514 | .llseek = seq_lseek, | ||
515 | .release = seq_release, | ||
516 | }; | ||
517 | |||
518 | extern struct seq_operations mounts_op; | ||
519 | static int mounts_open(struct inode *inode, struct file *file) | ||
520 | { | ||
521 | struct task_struct *task = proc_task(inode); | ||
522 | int ret = seq_open(file, &mounts_op); | ||
523 | |||
524 | if (!ret) { | ||
525 | struct seq_file *m = file->private_data; | ||
526 | struct namespace *namespace; | ||
527 | task_lock(task); | ||
528 | namespace = task->namespace; | ||
529 | if (namespace) | ||
530 | get_namespace(namespace); | ||
531 | task_unlock(task); | ||
532 | |||
533 | if (namespace) | ||
534 | m->private = namespace; | ||
535 | else { | ||
536 | seq_release(inode, file); | ||
537 | ret = -EINVAL; | ||
538 | } | ||
539 | } | ||
540 | return ret; | ||
541 | } | ||
542 | |||
543 | static int mounts_release(struct inode *inode, struct file *file) | ||
544 | { | ||
545 | struct seq_file *m = file->private_data; | ||
546 | struct namespace *namespace = m->private; | ||
547 | put_namespace(namespace); | ||
548 | return seq_release(inode, file); | ||
549 | } | ||
550 | |||
551 | static struct file_operations proc_mounts_operations = { | ||
552 | .open = mounts_open, | ||
553 | .read = seq_read, | ||
554 | .llseek = seq_lseek, | ||
555 | .release = mounts_release, | ||
556 | }; | ||
557 | |||
558 | #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ | ||
559 | |||
560 | static ssize_t proc_info_read(struct file * file, char __user * buf, | ||
561 | size_t count, loff_t *ppos) | ||
562 | { | ||
563 | struct inode * inode = file->f_dentry->d_inode; | ||
564 | unsigned long page; | ||
565 | ssize_t length; | ||
566 | struct task_struct *task = proc_task(inode); | ||
567 | |||
568 | if (count > PROC_BLOCK_SIZE) | ||
569 | count = PROC_BLOCK_SIZE; | ||
570 | if (!(page = __get_free_page(GFP_KERNEL))) | ||
571 | return -ENOMEM; | ||
572 | |||
573 | length = PROC_I(inode)->op.proc_read(task, (char*)page); | ||
574 | |||
575 | if (length >= 0) | ||
576 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); | ||
577 | free_page(page); | ||
578 | return length; | ||
579 | } | ||
580 | |||
581 | static struct file_operations proc_info_file_operations = { | ||
582 | .read = proc_info_read, | ||
583 | }; | ||
584 | |||
585 | static int mem_open(struct inode* inode, struct file* file) | ||
586 | { | ||
587 | file->private_data = (void*)((long)current->self_exec_id); | ||
588 | return 0; | ||
589 | } | ||
590 | |||
591 | static ssize_t mem_read(struct file * file, char __user * buf, | ||
592 | size_t count, loff_t *ppos) | ||
593 | { | ||
594 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | ||
595 | char *page; | ||
596 | unsigned long src = *ppos; | ||
597 | int ret = -ESRCH; | ||
598 | struct mm_struct *mm; | ||
599 | |||
600 | if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) | ||
601 | goto out; | ||
602 | |||
603 | ret = -ENOMEM; | ||
604 | page = (char *)__get_free_page(GFP_USER); | ||
605 | if (!page) | ||
606 | goto out; | ||
607 | |||
608 | ret = 0; | ||
609 | |||
610 | mm = get_task_mm(task); | ||
611 | if (!mm) | ||
612 | goto out_free; | ||
613 | |||
614 | ret = -EIO; | ||
615 | |||
616 | if (file->private_data != (void*)((long)current->self_exec_id)) | ||
617 | goto out_put; | ||
618 | |||
619 | ret = 0; | ||
620 | |||
621 | while (count > 0) { | ||
622 | int this_len, retval; | ||
623 | |||
624 | this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; | ||
625 | retval = access_process_vm(task, src, page, this_len, 0); | ||
626 | if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) { | ||
627 | if (!ret) | ||
628 | ret = -EIO; | ||
629 | break; | ||
630 | } | ||
631 | |||
632 | if (copy_to_user(buf, page, retval)) { | ||
633 | ret = -EFAULT; | ||
634 | break; | ||
635 | } | ||
636 | |||
637 | ret += retval; | ||
638 | src += retval; | ||
639 | buf += retval; | ||
640 | count -= retval; | ||
641 | } | ||
642 | *ppos = src; | ||
643 | |||
644 | out_put: | ||
645 | mmput(mm); | ||
646 | out_free: | ||
647 | free_page((unsigned long) page); | ||
648 | out: | ||
649 | return ret; | ||
650 | } | ||
651 | |||
652 | #define mem_write NULL | ||
653 | |||
654 | #ifndef mem_write | ||
655 | /* This is a security hazard */ | ||
656 | static ssize_t mem_write(struct file * file, const char * buf, | ||
657 | size_t count, loff_t *ppos) | ||
658 | { | ||
659 | int copied = 0; | ||
660 | char *page; | ||
661 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | ||
662 | unsigned long dst = *ppos; | ||
663 | |||
664 | if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) | ||
665 | return -ESRCH; | ||
666 | |||
667 | page = (char *)__get_free_page(GFP_USER); | ||
668 | if (!page) | ||
669 | return -ENOMEM; | ||
670 | |||
671 | while (count > 0) { | ||
672 | int this_len, retval; | ||
673 | |||
674 | this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; | ||
675 | if (copy_from_user(page, buf, this_len)) { | ||
676 | copied = -EFAULT; | ||
677 | break; | ||
678 | } | ||
679 | retval = access_process_vm(task, dst, page, this_len, 1); | ||
680 | if (!retval) { | ||
681 | if (!copied) | ||
682 | copied = -EIO; | ||
683 | break; | ||
684 | } | ||
685 | copied += retval; | ||
686 | buf += retval; | ||
687 | dst += retval; | ||
688 | count -= retval; | ||
689 | } | ||
690 | *ppos = dst; | ||
691 | free_page((unsigned long) page); | ||
692 | return copied; | ||
693 | } | ||
694 | #endif | ||
695 | |||
696 | static loff_t mem_lseek(struct file * file, loff_t offset, int orig) | ||
697 | { | ||
698 | switch (orig) { | ||
699 | case 0: | ||
700 | file->f_pos = offset; | ||
701 | break; | ||
702 | case 1: | ||
703 | file->f_pos += offset; | ||
704 | break; | ||
705 | default: | ||
706 | return -EINVAL; | ||
707 | } | ||
708 | force_successful_syscall_return(); | ||
709 | return file->f_pos; | ||
710 | } | ||
711 | |||
712 | static struct file_operations proc_mem_operations = { | ||
713 | .llseek = mem_lseek, | ||
714 | .read = mem_read, | ||
715 | .write = mem_write, | ||
716 | .open = mem_open, | ||
717 | }; | ||
718 | |||
719 | static ssize_t oom_adjust_read(struct file *file, char __user *buf, | ||
720 | size_t count, loff_t *ppos) | ||
721 | { | ||
722 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | ||
723 | char buffer[8]; | ||
724 | size_t len; | ||
725 | int oom_adjust = task->oomkilladj; | ||
726 | loff_t __ppos = *ppos; | ||
727 | |||
728 | len = sprintf(buffer, "%i\n", oom_adjust); | ||
729 | if (__ppos >= len) | ||
730 | return 0; | ||
731 | if (count > len-__ppos) | ||
732 | count = len-__ppos; | ||
733 | if (copy_to_user(buf, buffer + __ppos, count)) | ||
734 | return -EFAULT; | ||
735 | *ppos = __ppos + count; | ||
736 | return count; | ||
737 | } | ||
738 | |||
739 | static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | ||
740 | size_t count, loff_t *ppos) | ||
741 | { | ||
742 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | ||
743 | char buffer[8], *end; | ||
744 | int oom_adjust; | ||
745 | |||
746 | if (!capable(CAP_SYS_RESOURCE)) | ||
747 | return -EPERM; | ||
748 | memset(buffer, 0, 8); | ||
749 | if (count > 6) | ||
750 | count = 6; | ||
751 | if (copy_from_user(buffer, buf, count)) | ||
752 | return -EFAULT; | ||
753 | oom_adjust = simple_strtol(buffer, &end, 0); | ||
754 | if (oom_adjust < -16 || oom_adjust > 15) | ||
755 | return -EINVAL; | ||
756 | if (*end == '\n') | ||
757 | end++; | ||
758 | task->oomkilladj = oom_adjust; | ||
759 | if (end - buffer == 0) | ||
760 | return -EIO; | ||
761 | return end - buffer; | ||
762 | } | ||
763 | |||
764 | static struct file_operations proc_oom_adjust_operations = { | ||
765 | .read = oom_adjust_read, | ||
766 | .write = oom_adjust_write, | ||
767 | }; | ||
768 | |||
769 | static struct inode_operations proc_mem_inode_operations = { | ||
770 | .permission = proc_permission, | ||
771 | }; | ||
772 | |||
773 | #ifdef CONFIG_AUDITSYSCALL | ||
774 | #define TMPBUFLEN 21 | ||
775 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, | ||
776 | size_t count, loff_t *ppos) | ||
777 | { | ||
778 | struct inode * inode = file->f_dentry->d_inode; | ||
779 | struct task_struct *task = proc_task(inode); | ||
780 | ssize_t length; | ||
781 | char tmpbuf[TMPBUFLEN]; | ||
782 | |||
783 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", | ||
784 | audit_get_loginuid(task->audit_context)); | ||
785 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); | ||
786 | } | ||
787 | |||
788 | static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | ||
789 | size_t count, loff_t *ppos) | ||
790 | { | ||
791 | struct inode * inode = file->f_dentry->d_inode; | ||
792 | char *page, *tmp; | ||
793 | ssize_t length; | ||
794 | struct task_struct *task = proc_task(inode); | ||
795 | uid_t loginuid; | ||
796 | |||
797 | if (!capable(CAP_AUDIT_CONTROL)) | ||
798 | return -EPERM; | ||
799 | |||
800 | if (current != task) | ||
801 | return -EPERM; | ||
802 | |||
803 | if (count > PAGE_SIZE) | ||
804 | count = PAGE_SIZE; | ||
805 | |||
806 | if (*ppos != 0) { | ||
807 | /* No partial writes. */ | ||
808 | return -EINVAL; | ||
809 | } | ||
810 | page = (char*)__get_free_page(GFP_USER); | ||
811 | if (!page) | ||
812 | return -ENOMEM; | ||
813 | length = -EFAULT; | ||
814 | if (copy_from_user(page, buf, count)) | ||
815 | goto out_free_page; | ||
816 | |||
817 | loginuid = simple_strtoul(page, &tmp, 10); | ||
818 | if (tmp == page) { | ||
819 | length = -EINVAL; | ||
820 | goto out_free_page; | ||
821 | |||
822 | } | ||
823 | length = audit_set_loginuid(task->audit_context, loginuid); | ||
824 | if (likely(length == 0)) | ||
825 | length = count; | ||
826 | |||
827 | out_free_page: | ||
828 | free_page((unsigned long) page); | ||
829 | return length; | ||
830 | } | ||
831 | |||
832 | static struct file_operations proc_loginuid_operations = { | ||
833 | .read = proc_loginuid_read, | ||
834 | .write = proc_loginuid_write, | ||
835 | }; | ||
836 | #endif | ||
837 | |||
838 | #ifdef CONFIG_SECCOMP | ||
839 | static ssize_t seccomp_read(struct file *file, char __user *buf, | ||
840 | size_t count, loff_t *ppos) | ||
841 | { | ||
842 | struct task_struct *tsk = proc_task(file->f_dentry->d_inode); | ||
843 | char __buf[20]; | ||
844 | loff_t __ppos = *ppos; | ||
845 | size_t len; | ||
846 | |||
847 | /* no need to print the trailing zero, so use only len */ | ||
848 | len = sprintf(__buf, "%u\n", tsk->seccomp.mode); | ||
849 | if (__ppos >= len) | ||
850 | return 0; | ||
851 | if (count > len - __ppos) | ||
852 | count = len - __ppos; | ||
853 | if (copy_to_user(buf, __buf + __ppos, count)) | ||
854 | return -EFAULT; | ||
855 | *ppos = __ppos + count; | ||
856 | return count; | ||
857 | } | ||
858 | |||
859 | static ssize_t seccomp_write(struct file *file, const char __user *buf, | ||
860 | size_t count, loff_t *ppos) | ||
861 | { | ||
862 | struct task_struct *tsk = proc_task(file->f_dentry->d_inode); | ||
863 | char __buf[20], *end; | ||
864 | unsigned int seccomp_mode; | ||
865 | |||
866 | /* can set it only once to be even more secure */ | ||
867 | if (unlikely(tsk->seccomp.mode)) | ||
868 | return -EPERM; | ||
869 | |||
870 | memset(__buf, 0, sizeof(__buf)); | ||
871 | count = min(count, sizeof(__buf) - 1); | ||
872 | if (copy_from_user(__buf, buf, count)) | ||
873 | return -EFAULT; | ||
874 | seccomp_mode = simple_strtoul(__buf, &end, 0); | ||
875 | if (*end == '\n') | ||
876 | end++; | ||
877 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { | ||
878 | tsk->seccomp.mode = seccomp_mode; | ||
879 | set_tsk_thread_flag(tsk, TIF_SECCOMP); | ||
880 | } else | ||
881 | return -EINVAL; | ||
882 | if (unlikely(!(end - __buf))) | ||
883 | return -EIO; | ||
884 | return end - __buf; | ||
885 | } | ||
886 | |||
887 | static struct file_operations proc_seccomp_operations = { | ||
888 | .read = seccomp_read, | ||
889 | .write = seccomp_write, | ||
890 | }; | ||
891 | #endif /* CONFIG_SECCOMP */ | ||
892 | |||
893 | static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
894 | { | ||
895 | struct inode *inode = dentry->d_inode; | ||
896 | int error = -EACCES; | ||
897 | |||
898 | /* We don't need a base pointer in the /proc filesystem */ | ||
899 | path_release(nd); | ||
900 | |||
901 | if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) | ||
902 | goto out; | ||
903 | error = proc_check_root(inode); | ||
904 | if (error) | ||
905 | goto out; | ||
906 | |||
907 | error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); | ||
908 | nd->last_type = LAST_BIND; | ||
909 | out: | ||
910 | return error; | ||
911 | } | ||
912 | |||
913 | static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, | ||
914 | char __user *buffer, int buflen) | ||
915 | { | ||
916 | struct inode * inode; | ||
917 | char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; | ||
918 | int len; | ||
919 | |||
920 | if (!tmp) | ||
921 | return -ENOMEM; | ||
922 | |||
923 | inode = dentry->d_inode; | ||
924 | path = d_path(dentry, mnt, tmp, PAGE_SIZE); | ||
925 | len = PTR_ERR(path); | ||
926 | if (IS_ERR(path)) | ||
927 | goto out; | ||
928 | len = tmp + PAGE_SIZE - 1 - path; | ||
929 | |||
930 | if (len > buflen) | ||
931 | len = buflen; | ||
932 | if (copy_to_user(buffer, path, len)) | ||
933 | len = -EFAULT; | ||
934 | out: | ||
935 | free_page((unsigned long)tmp); | ||
936 | return len; | ||
937 | } | ||
938 | |||
939 | static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) | ||
940 | { | ||
941 | int error = -EACCES; | ||
942 | struct inode *inode = dentry->d_inode; | ||
943 | struct dentry *de; | ||
944 | struct vfsmount *mnt = NULL; | ||
945 | |||
946 | lock_kernel(); | ||
947 | |||
948 | if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) | ||
949 | goto out; | ||
950 | error = proc_check_root(inode); | ||
951 | if (error) | ||
952 | goto out; | ||
953 | |||
954 | error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); | ||
955 | if (error) | ||
956 | goto out; | ||
957 | |||
958 | error = do_proc_readlink(de, mnt, buffer, buflen); | ||
959 | dput(de); | ||
960 | mntput(mnt); | ||
961 | out: | ||
962 | unlock_kernel(); | ||
963 | return error; | ||
964 | } | ||
965 | |||
966 | static struct inode_operations proc_pid_link_inode_operations = { | ||
967 | .readlink = proc_pid_readlink, | ||
968 | .follow_link = proc_pid_follow_link | ||
969 | }; | ||
970 | |||
971 | #define NUMBUF 10 | ||
972 | |||
973 | static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | ||
974 | { | ||
975 | struct inode *inode = filp->f_dentry->d_inode; | ||
976 | struct task_struct *p = proc_task(inode); | ||
977 | unsigned int fd, tid, ino; | ||
978 | int retval; | ||
979 | char buf[NUMBUF]; | ||
980 | struct files_struct * files; | ||
981 | |||
982 | retval = -ENOENT; | ||
983 | if (!pid_alive(p)) | ||
984 | goto out; | ||
985 | retval = 0; | ||
986 | tid = p->pid; | ||
987 | |||
988 | fd = filp->f_pos; | ||
989 | switch (fd) { | ||
990 | case 0: | ||
991 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
992 | goto out; | ||
993 | filp->f_pos++; | ||
994 | case 1: | ||
995 | ino = fake_ino(tid, PROC_TID_INO); | ||
996 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
997 | goto out; | ||
998 | filp->f_pos++; | ||
999 | default: | ||
1000 | files = get_files_struct(p); | ||
1001 | if (!files) | ||
1002 | goto out; | ||
1003 | spin_lock(&files->file_lock); | ||
1004 | for (fd = filp->f_pos-2; | ||
1005 | fd < files->max_fds; | ||
1006 | fd++, filp->f_pos++) { | ||
1007 | unsigned int i,j; | ||
1008 | |||
1009 | if (!fcheck_files(files, fd)) | ||
1010 | continue; | ||
1011 | spin_unlock(&files->file_lock); | ||
1012 | |||
1013 | j = NUMBUF; | ||
1014 | i = fd; | ||
1015 | do { | ||
1016 | j--; | ||
1017 | buf[j] = '0' + (i % 10); | ||
1018 | i /= 10; | ||
1019 | } while (i); | ||
1020 | |||
1021 | ino = fake_ino(tid, PROC_TID_FD_DIR + fd); | ||
1022 | if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { | ||
1023 | spin_lock(&files->file_lock); | ||
1024 | break; | ||
1025 | } | ||
1026 | spin_lock(&files->file_lock); | ||
1027 | } | ||
1028 | spin_unlock(&files->file_lock); | ||
1029 | put_files_struct(files); | ||
1030 | } | ||
1031 | out: | ||
1032 | return retval; | ||
1033 | } | ||
1034 | |||
1035 | static int proc_pident_readdir(struct file *filp, | ||
1036 | void *dirent, filldir_t filldir, | ||
1037 | struct pid_entry *ents, unsigned int nents) | ||
1038 | { | ||
1039 | int i; | ||
1040 | int pid; | ||
1041 | struct dentry *dentry = filp->f_dentry; | ||
1042 | struct inode *inode = dentry->d_inode; | ||
1043 | struct pid_entry *p; | ||
1044 | ino_t ino; | ||
1045 | int ret; | ||
1046 | |||
1047 | ret = -ENOENT; | ||
1048 | if (!pid_alive(proc_task(inode))) | ||
1049 | goto out; | ||
1050 | |||
1051 | ret = 0; | ||
1052 | pid = proc_task(inode)->pid; | ||
1053 | i = filp->f_pos; | ||
1054 | switch (i) { | ||
1055 | case 0: | ||
1056 | ino = inode->i_ino; | ||
1057 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | ||
1058 | goto out; | ||
1059 | i++; | ||
1060 | filp->f_pos++; | ||
1061 | /* fall through */ | ||
1062 | case 1: | ||
1063 | ino = parent_ino(dentry); | ||
1064 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | ||
1065 | goto out; | ||
1066 | i++; | ||
1067 | filp->f_pos++; | ||
1068 | /* fall through */ | ||
1069 | default: | ||
1070 | i -= 2; | ||
1071 | if (i >= nents) { | ||
1072 | ret = 1; | ||
1073 | goto out; | ||
1074 | } | ||
1075 | p = ents + i; | ||
1076 | while (p->name) { | ||
1077 | if (filldir(dirent, p->name, p->len, filp->f_pos, | ||
1078 | fake_ino(pid, p->type), p->mode >> 12) < 0) | ||
1079 | goto out; | ||
1080 | filp->f_pos++; | ||
1081 | p++; | ||
1082 | } | ||
1083 | } | ||
1084 | |||
1085 | ret = 1; | ||
1086 | out: | ||
1087 | return ret; | ||
1088 | } | ||
1089 | |||
1090 | static int proc_tgid_base_readdir(struct file * filp, | ||
1091 | void * dirent, filldir_t filldir) | ||
1092 | { | ||
1093 | return proc_pident_readdir(filp,dirent,filldir, | ||
1094 | tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); | ||
1095 | } | ||
1096 | |||
1097 | static int proc_tid_base_readdir(struct file * filp, | ||
1098 | void * dirent, filldir_t filldir) | ||
1099 | { | ||
1100 | return proc_pident_readdir(filp,dirent,filldir, | ||
1101 | tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); | ||
1102 | } | ||
1103 | |||
1104 | /* building an inode */ | ||
1105 | |||
1106 | static int task_dumpable(struct task_struct *task) | ||
1107 | { | ||
1108 | int dumpable = 0; | ||
1109 | struct mm_struct *mm; | ||
1110 | |||
1111 | task_lock(task); | ||
1112 | mm = task->mm; | ||
1113 | if (mm) | ||
1114 | dumpable = mm->dumpable; | ||
1115 | task_unlock(task); | ||
1116 | return dumpable; | ||
1117 | } | ||
1118 | |||
1119 | |||
1120 | static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino) | ||
1121 | { | ||
1122 | struct inode * inode; | ||
1123 | struct proc_inode *ei; | ||
1124 | |||
1125 | /* We need a new inode */ | ||
1126 | |||
1127 | inode = new_inode(sb); | ||
1128 | if (!inode) | ||
1129 | goto out; | ||
1130 | |||
1131 | /* Common stuff */ | ||
1132 | ei = PROC_I(inode); | ||
1133 | ei->task = NULL; | ||
1134 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
1135 | inode->i_ino = fake_ino(task->pid, ino); | ||
1136 | |||
1137 | if (!pid_alive(task)) | ||
1138 | goto out_unlock; | ||
1139 | |||
1140 | /* | ||
1141 | * grab the reference to task. | ||
1142 | */ | ||
1143 | get_task_struct(task); | ||
1144 | ei->task = task; | ||
1145 | ei->type = ino; | ||
1146 | inode->i_uid = 0; | ||
1147 | inode->i_gid = 0; | ||
1148 | if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { | ||
1149 | inode->i_uid = task->euid; | ||
1150 | inode->i_gid = task->egid; | ||
1151 | } | ||
1152 | security_task_to_inode(task, inode); | ||
1153 | |||
1154 | out: | ||
1155 | return inode; | ||
1156 | |||
1157 | out_unlock: | ||
1158 | ei->pde = NULL; | ||
1159 | iput(inode); | ||
1160 | return NULL; | ||
1161 | } | ||
1162 | |||
1163 | /* dentry stuff */ | ||
1164 | |||
1165 | /* | ||
1166 | * Exceptional case: normally we are not allowed to unhash a busy | ||
1167 | * directory. In this case, however, we can do it - no aliasing problems | ||
1168 | * due to the way we treat inodes. | ||
1169 | * | ||
1170 | * Rewrite the inode's ownerships here because the owning task may have | ||
1171 | * performed a setuid(), etc. | ||
1172 | */ | ||
1173 | static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) | ||
1174 | { | ||
1175 | struct inode *inode = dentry->d_inode; | ||
1176 | struct task_struct *task = proc_task(inode); | ||
1177 | if (pid_alive(task)) { | ||
1178 | if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { | ||
1179 | inode->i_uid = task->euid; | ||
1180 | inode->i_gid = task->egid; | ||
1181 | } else { | ||
1182 | inode->i_uid = 0; | ||
1183 | inode->i_gid = 0; | ||
1184 | } | ||
1185 | security_task_to_inode(task, inode); | ||
1186 | return 1; | ||
1187 | } | ||
1188 | d_drop(dentry); | ||
1189 | return 0; | ||
1190 | } | ||
1191 | |||
1192 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | ||
1193 | { | ||
1194 | struct inode *inode = dentry->d_inode; | ||
1195 | struct task_struct *task = proc_task(inode); | ||
1196 | int fd = proc_type(inode) - PROC_TID_FD_DIR; | ||
1197 | struct files_struct *files; | ||
1198 | |||
1199 | files = get_files_struct(task); | ||
1200 | if (files) { | ||
1201 | spin_lock(&files->file_lock); | ||
1202 | if (fcheck_files(files, fd)) { | ||
1203 | spin_unlock(&files->file_lock); | ||
1204 | put_files_struct(files); | ||
1205 | if (task_dumpable(task)) { | ||
1206 | inode->i_uid = task->euid; | ||
1207 | inode->i_gid = task->egid; | ||
1208 | } else { | ||
1209 | inode->i_uid = 0; | ||
1210 | inode->i_gid = 0; | ||
1211 | } | ||
1212 | security_task_to_inode(task, inode); | ||
1213 | return 1; | ||
1214 | } | ||
1215 | spin_unlock(&files->file_lock); | ||
1216 | put_files_struct(files); | ||
1217 | } | ||
1218 | d_drop(dentry); | ||
1219 | return 0; | ||
1220 | } | ||
1221 | |||
1222 | static void pid_base_iput(struct dentry *dentry, struct inode *inode) | ||
1223 | { | ||
1224 | struct task_struct *task = proc_task(inode); | ||
1225 | spin_lock(&task->proc_lock); | ||
1226 | if (task->proc_dentry == dentry) | ||
1227 | task->proc_dentry = NULL; | ||
1228 | spin_unlock(&task->proc_lock); | ||
1229 | iput(inode); | ||
1230 | } | ||
1231 | |||
1232 | static int pid_delete_dentry(struct dentry * dentry) | ||
1233 | { | ||
1234 | /* Is the task we represent dead? | ||
1235 | * If so, then don't put the dentry on the lru list, | ||
1236 | * kill it immediately. | ||
1237 | */ | ||
1238 | return !pid_alive(proc_task(dentry->d_inode)); | ||
1239 | } | ||
1240 | |||
1241 | static struct dentry_operations tid_fd_dentry_operations = | ||
1242 | { | ||
1243 | .d_revalidate = tid_fd_revalidate, | ||
1244 | .d_delete = pid_delete_dentry, | ||
1245 | }; | ||
1246 | |||
1247 | static struct dentry_operations pid_dentry_operations = | ||
1248 | { | ||
1249 | .d_revalidate = pid_revalidate, | ||
1250 | .d_delete = pid_delete_dentry, | ||
1251 | }; | ||
1252 | |||
1253 | static struct dentry_operations pid_base_dentry_operations = | ||
1254 | { | ||
1255 | .d_revalidate = pid_revalidate, | ||
1256 | .d_iput = pid_base_iput, | ||
1257 | .d_delete = pid_delete_dentry, | ||
1258 | }; | ||
1259 | |||
1260 | /* Lookups */ | ||
1261 | |||
1262 | static unsigned name_to_int(struct dentry *dentry) | ||
1263 | { | ||
1264 | const char *name = dentry->d_name.name; | ||
1265 | int len = dentry->d_name.len; | ||
1266 | unsigned n = 0; | ||
1267 | |||
1268 | if (len > 1 && *name == '0') | ||
1269 | goto out; | ||
1270 | while (len-- > 0) { | ||
1271 | unsigned c = *name++ - '0'; | ||
1272 | if (c > 9) | ||
1273 | goto out; | ||
1274 | if (n >= (~0U-9)/10) | ||
1275 | goto out; | ||
1276 | n *= 10; | ||
1277 | n += c; | ||
1278 | } | ||
1279 | return n; | ||
1280 | out: | ||
1281 | return ~0U; | ||
1282 | } | ||
1283 | |||
1284 | /* SMP-safe */ | ||
1285 | static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) | ||
1286 | { | ||
1287 | struct task_struct *task = proc_task(dir); | ||
1288 | unsigned fd = name_to_int(dentry); | ||
1289 | struct file * file; | ||
1290 | struct files_struct * files; | ||
1291 | struct inode *inode; | ||
1292 | struct proc_inode *ei; | ||
1293 | |||
1294 | if (fd == ~0U) | ||
1295 | goto out; | ||
1296 | if (!pid_alive(task)) | ||
1297 | goto out; | ||
1298 | |||
1299 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); | ||
1300 | if (!inode) | ||
1301 | goto out; | ||
1302 | ei = PROC_I(inode); | ||
1303 | files = get_files_struct(task); | ||
1304 | if (!files) | ||
1305 | goto out_unlock; | ||
1306 | inode->i_mode = S_IFLNK; | ||
1307 | spin_lock(&files->file_lock); | ||
1308 | file = fcheck_files(files, fd); | ||
1309 | if (!file) | ||
1310 | goto out_unlock2; | ||
1311 | if (file->f_mode & 1) | ||
1312 | inode->i_mode |= S_IRUSR | S_IXUSR; | ||
1313 | if (file->f_mode & 2) | ||
1314 | inode->i_mode |= S_IWUSR | S_IXUSR; | ||
1315 | spin_unlock(&files->file_lock); | ||
1316 | put_files_struct(files); | ||
1317 | inode->i_op = &proc_pid_link_inode_operations; | ||
1318 | inode->i_size = 64; | ||
1319 | ei->op.proc_get_link = proc_fd_link; | ||
1320 | dentry->d_op = &tid_fd_dentry_operations; | ||
1321 | d_add(dentry, inode); | ||
1322 | return NULL; | ||
1323 | |||
1324 | out_unlock2: | ||
1325 | spin_unlock(&files->file_lock); | ||
1326 | put_files_struct(files); | ||
1327 | out_unlock: | ||
1328 | iput(inode); | ||
1329 | out: | ||
1330 | return ERR_PTR(-ENOENT); | ||
1331 | } | ||
1332 | |||
1333 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); | ||
1334 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); | ||
1335 | |||
1336 | static struct file_operations proc_fd_operations = { | ||
1337 | .read = generic_read_dir, | ||
1338 | .readdir = proc_readfd, | ||
1339 | }; | ||
1340 | |||
1341 | static struct file_operations proc_task_operations = { | ||
1342 | .read = generic_read_dir, | ||
1343 | .readdir = proc_task_readdir, | ||
1344 | }; | ||
1345 | |||
1346 | /* | ||
1347 | * proc directories can do almost nothing.. | ||
1348 | */ | ||
1349 | static struct inode_operations proc_fd_inode_operations = { | ||
1350 | .lookup = proc_lookupfd, | ||
1351 | .permission = proc_permission, | ||
1352 | }; | ||
1353 | |||
1354 | static struct inode_operations proc_task_inode_operations = { | ||
1355 | .lookup = proc_task_lookup, | ||
1356 | .permission = proc_permission, | ||
1357 | }; | ||
1358 | |||
1359 | #ifdef CONFIG_SECURITY | ||
1360 | static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, | ||
1361 | size_t count, loff_t *ppos) | ||
1362 | { | ||
1363 | struct inode * inode = file->f_dentry->d_inode; | ||
1364 | unsigned long page; | ||
1365 | ssize_t length; | ||
1366 | struct task_struct *task = proc_task(inode); | ||
1367 | |||
1368 | if (count > PAGE_SIZE) | ||
1369 | count = PAGE_SIZE; | ||
1370 | if (!(page = __get_free_page(GFP_KERNEL))) | ||
1371 | return -ENOMEM; | ||
1372 | |||
1373 | length = security_getprocattr(task, | ||
1374 | (char*)file->f_dentry->d_name.name, | ||
1375 | (void*)page, count); | ||
1376 | if (length >= 0) | ||
1377 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); | ||
1378 | free_page(page); | ||
1379 | return length; | ||
1380 | } | ||
1381 | |||
1382 | static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, | ||
1383 | size_t count, loff_t *ppos) | ||
1384 | { | ||
1385 | struct inode * inode = file->f_dentry->d_inode; | ||
1386 | char *page; | ||
1387 | ssize_t length; | ||
1388 | struct task_struct *task = proc_task(inode); | ||
1389 | |||
1390 | if (count > PAGE_SIZE) | ||
1391 | count = PAGE_SIZE; | ||
1392 | if (*ppos != 0) { | ||
1393 | /* No partial writes. */ | ||
1394 | return -EINVAL; | ||
1395 | } | ||
1396 | page = (char*)__get_free_page(GFP_USER); | ||
1397 | if (!page) | ||
1398 | return -ENOMEM; | ||
1399 | length = -EFAULT; | ||
1400 | if (copy_from_user(page, buf, count)) | ||
1401 | goto out; | ||
1402 | |||
1403 | length = security_setprocattr(task, | ||
1404 | (char*)file->f_dentry->d_name.name, | ||
1405 | (void*)page, count); | ||
1406 | out: | ||
1407 | free_page((unsigned long) page); | ||
1408 | return length; | ||
1409 | } | ||
1410 | |||
1411 | static struct file_operations proc_pid_attr_operations = { | ||
1412 | .read = proc_pid_attr_read, | ||
1413 | .write = proc_pid_attr_write, | ||
1414 | }; | ||
1415 | |||
1416 | static struct file_operations proc_tid_attr_operations; | ||
1417 | static struct inode_operations proc_tid_attr_inode_operations; | ||
1418 | static struct file_operations proc_tgid_attr_operations; | ||
1419 | static struct inode_operations proc_tgid_attr_inode_operations; | ||
1420 | #endif | ||
1421 | |||
1422 | /* SMP-safe */ | ||
1423 | static struct dentry *proc_pident_lookup(struct inode *dir, | ||
1424 | struct dentry *dentry, | ||
1425 | struct pid_entry *ents) | ||
1426 | { | ||
1427 | struct inode *inode; | ||
1428 | int error; | ||
1429 | struct task_struct *task = proc_task(dir); | ||
1430 | struct pid_entry *p; | ||
1431 | struct proc_inode *ei; | ||
1432 | |||
1433 | error = -ENOENT; | ||
1434 | inode = NULL; | ||
1435 | |||
1436 | if (!pid_alive(task)) | ||
1437 | goto out; | ||
1438 | |||
1439 | for (p = ents; p->name; p++) { | ||
1440 | if (p->len != dentry->d_name.len) | ||
1441 | continue; | ||
1442 | if (!memcmp(dentry->d_name.name, p->name, p->len)) | ||
1443 | break; | ||
1444 | } | ||
1445 | if (!p->name) | ||
1446 | goto out; | ||
1447 | |||
1448 | error = -EINVAL; | ||
1449 | inode = proc_pid_make_inode(dir->i_sb, task, p->type); | ||
1450 | if (!inode) | ||
1451 | goto out; | ||
1452 | |||
1453 | ei = PROC_I(inode); | ||
1454 | inode->i_mode = p->mode; | ||
1455 | /* | ||
1456 | * Yes, it does not scale. And it should not. Don't add | ||
1457 | * new entries into /proc/<tgid>/ without very good reasons. | ||
1458 | */ | ||
1459 | switch(p->type) { | ||
1460 | case PROC_TGID_TASK: | ||
1461 | inode->i_nlink = 3; | ||
1462 | inode->i_op = &proc_task_inode_operations; | ||
1463 | inode->i_fop = &proc_task_operations; | ||
1464 | break; | ||
1465 | case PROC_TID_FD: | ||
1466 | case PROC_TGID_FD: | ||
1467 | inode->i_nlink = 2; | ||
1468 | inode->i_op = &proc_fd_inode_operations; | ||
1469 | inode->i_fop = &proc_fd_operations; | ||
1470 | break; | ||
1471 | case PROC_TID_EXE: | ||
1472 | case PROC_TGID_EXE: | ||
1473 | inode->i_op = &proc_pid_link_inode_operations; | ||
1474 | ei->op.proc_get_link = proc_exe_link; | ||
1475 | break; | ||
1476 | case PROC_TID_CWD: | ||
1477 | case PROC_TGID_CWD: | ||
1478 | inode->i_op = &proc_pid_link_inode_operations; | ||
1479 | ei->op.proc_get_link = proc_cwd_link; | ||
1480 | break; | ||
1481 | case PROC_TID_ROOT: | ||
1482 | case PROC_TGID_ROOT: | ||
1483 | inode->i_op = &proc_pid_link_inode_operations; | ||
1484 | ei->op.proc_get_link = proc_root_link; | ||
1485 | break; | ||
1486 | case PROC_TID_ENVIRON: | ||
1487 | case PROC_TGID_ENVIRON: | ||
1488 | inode->i_fop = &proc_info_file_operations; | ||
1489 | ei->op.proc_read = proc_pid_environ; | ||
1490 | break; | ||
1491 | case PROC_TID_AUXV: | ||
1492 | case PROC_TGID_AUXV: | ||
1493 | inode->i_fop = &proc_info_file_operations; | ||
1494 | ei->op.proc_read = proc_pid_auxv; | ||
1495 | break; | ||
1496 | case PROC_TID_STATUS: | ||
1497 | case PROC_TGID_STATUS: | ||
1498 | inode->i_fop = &proc_info_file_operations; | ||
1499 | ei->op.proc_read = proc_pid_status; | ||
1500 | break; | ||
1501 | case PROC_TID_STAT: | ||
1502 | inode->i_fop = &proc_info_file_operations; | ||
1503 | ei->op.proc_read = proc_tid_stat; | ||
1504 | break; | ||
1505 | case PROC_TGID_STAT: | ||
1506 | inode->i_fop = &proc_info_file_operations; | ||
1507 | ei->op.proc_read = proc_tgid_stat; | ||
1508 | break; | ||
1509 | case PROC_TID_CMDLINE: | ||
1510 | case PROC_TGID_CMDLINE: | ||
1511 | inode->i_fop = &proc_info_file_operations; | ||
1512 | ei->op.proc_read = proc_pid_cmdline; | ||
1513 | break; | ||
1514 | case PROC_TID_STATM: | ||
1515 | case PROC_TGID_STATM: | ||
1516 | inode->i_fop = &proc_info_file_operations; | ||
1517 | ei->op.proc_read = proc_pid_statm; | ||
1518 | break; | ||
1519 | case PROC_TID_MAPS: | ||
1520 | case PROC_TGID_MAPS: | ||
1521 | inode->i_fop = &proc_maps_operations; | ||
1522 | break; | ||
1523 | case PROC_TID_MEM: | ||
1524 | case PROC_TGID_MEM: | ||
1525 | inode->i_op = &proc_mem_inode_operations; | ||
1526 | inode->i_fop = &proc_mem_operations; | ||
1527 | break; | ||
1528 | #ifdef CONFIG_SECCOMP | ||
1529 | case PROC_TID_SECCOMP: | ||
1530 | case PROC_TGID_SECCOMP: | ||
1531 | inode->i_fop = &proc_seccomp_operations; | ||
1532 | break; | ||
1533 | #endif /* CONFIG_SECCOMP */ | ||
1534 | case PROC_TID_MOUNTS: | ||
1535 | case PROC_TGID_MOUNTS: | ||
1536 | inode->i_fop = &proc_mounts_operations; | ||
1537 | break; | ||
1538 | #ifdef CONFIG_SECURITY | ||
1539 | case PROC_TID_ATTR: | ||
1540 | inode->i_nlink = 2; | ||
1541 | inode->i_op = &proc_tid_attr_inode_operations; | ||
1542 | inode->i_fop = &proc_tid_attr_operations; | ||
1543 | break; | ||
1544 | case PROC_TGID_ATTR: | ||
1545 | inode->i_nlink = 2; | ||
1546 | inode->i_op = &proc_tgid_attr_inode_operations; | ||
1547 | inode->i_fop = &proc_tgid_attr_operations; | ||
1548 | break; | ||
1549 | case PROC_TID_ATTR_CURRENT: | ||
1550 | case PROC_TGID_ATTR_CURRENT: | ||
1551 | case PROC_TID_ATTR_PREV: | ||
1552 | case PROC_TGID_ATTR_PREV: | ||
1553 | case PROC_TID_ATTR_EXEC: | ||
1554 | case PROC_TGID_ATTR_EXEC: | ||
1555 | case PROC_TID_ATTR_FSCREATE: | ||
1556 | case PROC_TGID_ATTR_FSCREATE: | ||
1557 | inode->i_fop = &proc_pid_attr_operations; | ||
1558 | break; | ||
1559 | #endif | ||
1560 | #ifdef CONFIG_KALLSYMS | ||
1561 | case PROC_TID_WCHAN: | ||
1562 | case PROC_TGID_WCHAN: | ||
1563 | inode->i_fop = &proc_info_file_operations; | ||
1564 | ei->op.proc_read = proc_pid_wchan; | ||
1565 | break; | ||
1566 | #endif | ||
1567 | #ifdef CONFIG_SCHEDSTATS | ||
1568 | case PROC_TID_SCHEDSTAT: | ||
1569 | case PROC_TGID_SCHEDSTAT: | ||
1570 | inode->i_fop = &proc_info_file_operations; | ||
1571 | ei->op.proc_read = proc_pid_schedstat; | ||
1572 | break; | ||
1573 | #endif | ||
1574 | #ifdef CONFIG_CPUSETS | ||
1575 | case PROC_TID_CPUSET: | ||
1576 | case PROC_TGID_CPUSET: | ||
1577 | inode->i_fop = &proc_cpuset_operations; | ||
1578 | break; | ||
1579 | #endif | ||
1580 | case PROC_TID_OOM_SCORE: | ||
1581 | case PROC_TGID_OOM_SCORE: | ||
1582 | inode->i_fop = &proc_info_file_operations; | ||
1583 | ei->op.proc_read = proc_oom_score; | ||
1584 | break; | ||
1585 | case PROC_TID_OOM_ADJUST: | ||
1586 | case PROC_TGID_OOM_ADJUST: | ||
1587 | inode->i_fop = &proc_oom_adjust_operations; | ||
1588 | break; | ||
1589 | #ifdef CONFIG_AUDITSYSCALL | ||
1590 | case PROC_TID_LOGINUID: | ||
1591 | case PROC_TGID_LOGINUID: | ||
1592 | inode->i_fop = &proc_loginuid_operations; | ||
1593 | break; | ||
1594 | #endif | ||
1595 | default: | ||
1596 | printk("procfs: impossible type (%d)",p->type); | ||
1597 | iput(inode); | ||
1598 | return ERR_PTR(-EINVAL); | ||
1599 | } | ||
1600 | dentry->d_op = &pid_dentry_operations; | ||
1601 | d_add(dentry, inode); | ||
1602 | return NULL; | ||
1603 | |||
1604 | out: | ||
1605 | return ERR_PTR(error); | ||
1606 | } | ||
1607 | |||
1608 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ | ||
1609 | return proc_pident_lookup(dir, dentry, tgid_base_stuff); | ||
1610 | } | ||
1611 | |||
1612 | static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ | ||
1613 | return proc_pident_lookup(dir, dentry, tid_base_stuff); | ||
1614 | } | ||
1615 | |||
1616 | static struct file_operations proc_tgid_base_operations = { | ||
1617 | .read = generic_read_dir, | ||
1618 | .readdir = proc_tgid_base_readdir, | ||
1619 | }; | ||
1620 | |||
1621 | static struct file_operations proc_tid_base_operations = { | ||
1622 | .read = generic_read_dir, | ||
1623 | .readdir = proc_tid_base_readdir, | ||
1624 | }; | ||
1625 | |||
1626 | static struct inode_operations proc_tgid_base_inode_operations = { | ||
1627 | .lookup = proc_tgid_base_lookup, | ||
1628 | }; | ||
1629 | |||
1630 | static struct inode_operations proc_tid_base_inode_operations = { | ||
1631 | .lookup = proc_tid_base_lookup, | ||
1632 | }; | ||
1633 | |||
1634 | #ifdef CONFIG_SECURITY | ||
1635 | static int proc_tgid_attr_readdir(struct file * filp, | ||
1636 | void * dirent, filldir_t filldir) | ||
1637 | { | ||
1638 | return proc_pident_readdir(filp,dirent,filldir, | ||
1639 | tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff)); | ||
1640 | } | ||
1641 | |||
1642 | static int proc_tid_attr_readdir(struct file * filp, | ||
1643 | void * dirent, filldir_t filldir) | ||
1644 | { | ||
1645 | return proc_pident_readdir(filp,dirent,filldir, | ||
1646 | tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff)); | ||
1647 | } | ||
1648 | |||
1649 | static struct file_operations proc_tgid_attr_operations = { | ||
1650 | .read = generic_read_dir, | ||
1651 | .readdir = proc_tgid_attr_readdir, | ||
1652 | }; | ||
1653 | |||
1654 | static struct file_operations proc_tid_attr_operations = { | ||
1655 | .read = generic_read_dir, | ||
1656 | .readdir = proc_tid_attr_readdir, | ||
1657 | }; | ||
1658 | |||
1659 | static struct dentry *proc_tgid_attr_lookup(struct inode *dir, | ||
1660 | struct dentry *dentry, struct nameidata *nd) | ||
1661 | { | ||
1662 | return proc_pident_lookup(dir, dentry, tgid_attr_stuff); | ||
1663 | } | ||
1664 | |||
1665 | static struct dentry *proc_tid_attr_lookup(struct inode *dir, | ||
1666 | struct dentry *dentry, struct nameidata *nd) | ||
1667 | { | ||
1668 | return proc_pident_lookup(dir, dentry, tid_attr_stuff); | ||
1669 | } | ||
1670 | |||
1671 | static struct inode_operations proc_tgid_attr_inode_operations = { | ||
1672 | .lookup = proc_tgid_attr_lookup, | ||
1673 | }; | ||
1674 | |||
1675 | static struct inode_operations proc_tid_attr_inode_operations = { | ||
1676 | .lookup = proc_tid_attr_lookup, | ||
1677 | }; | ||
1678 | #endif | ||
1679 | |||
1680 | /* | ||
1681 | * /proc/self: | ||
1682 | */ | ||
1683 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, | ||
1684 | int buflen) | ||
1685 | { | ||
1686 | char tmp[30]; | ||
1687 | sprintf(tmp, "%d", current->tgid); | ||
1688 | return vfs_readlink(dentry,buffer,buflen,tmp); | ||
1689 | } | ||
1690 | |||
1691 | static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
1692 | { | ||
1693 | char tmp[30]; | ||
1694 | sprintf(tmp, "%d", current->tgid); | ||
1695 | return vfs_follow_link(nd,tmp); | ||
1696 | } | ||
1697 | |||
1698 | static struct inode_operations proc_self_inode_operations = { | ||
1699 | .readlink = proc_self_readlink, | ||
1700 | .follow_link = proc_self_follow_link, | ||
1701 | }; | ||
1702 | |||
1703 | /** | ||
1704 | * proc_pid_unhash - Unhash /proc/<pid> entry from the dcache. | ||
1705 | * @p: task that should be flushed. | ||
1706 | * | ||
1707 | * Drops the /proc/<pid> dcache entry from the hash chains. | ||
1708 | * | ||
1709 | * Dropping /proc/<pid> entries and detach_pid must be synchroneous, | ||
1710 | * otherwise e.g. /proc/<pid>/exe might point to the wrong executable, | ||
1711 | * if the pid value is immediately reused. This is enforced by | ||
1712 | * - caller must acquire spin_lock(p->proc_lock) | ||
1713 | * - must be called before detach_pid() | ||
1714 | * - proc_pid_lookup acquires proc_lock, and checks that | ||
1715 | * the target is not dead by looking at the attach count | ||
1716 | * of PIDTYPE_PID. | ||
1717 | */ | ||
1718 | |||
1719 | struct dentry *proc_pid_unhash(struct task_struct *p) | ||
1720 | { | ||
1721 | struct dentry *proc_dentry; | ||
1722 | |||
1723 | proc_dentry = p->proc_dentry; | ||
1724 | if (proc_dentry != NULL) { | ||
1725 | |||
1726 | spin_lock(&dcache_lock); | ||
1727 | spin_lock(&proc_dentry->d_lock); | ||
1728 | if (!d_unhashed(proc_dentry)) { | ||
1729 | dget_locked(proc_dentry); | ||
1730 | __d_drop(proc_dentry); | ||
1731 | spin_unlock(&proc_dentry->d_lock); | ||
1732 | } else { | ||
1733 | spin_unlock(&proc_dentry->d_lock); | ||
1734 | proc_dentry = NULL; | ||
1735 | } | ||
1736 | spin_unlock(&dcache_lock); | ||
1737 | } | ||
1738 | return proc_dentry; | ||
1739 | } | ||
1740 | |||
1741 | /** | ||
1742 | * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries | ||
1743 | * @proc_entry: directoy to prune. | ||
1744 | * | ||
1745 | * Shrink the /proc directory that was used by the just killed thread. | ||
1746 | */ | ||
1747 | |||
1748 | void proc_pid_flush(struct dentry *proc_dentry) | ||
1749 | { | ||
1750 | might_sleep(); | ||
1751 | if(proc_dentry != NULL) { | ||
1752 | shrink_dcache_parent(proc_dentry); | ||
1753 | dput(proc_dentry); | ||
1754 | } | ||
1755 | } | ||
1756 | |||
1757 | /* SMP-safe */ | ||
1758 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | ||
1759 | { | ||
1760 | struct task_struct *task; | ||
1761 | struct inode *inode; | ||
1762 | struct proc_inode *ei; | ||
1763 | unsigned tgid; | ||
1764 | int died; | ||
1765 | |||
1766 | if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { | ||
1767 | inode = new_inode(dir->i_sb); | ||
1768 | if (!inode) | ||
1769 | return ERR_PTR(-ENOMEM); | ||
1770 | ei = PROC_I(inode); | ||
1771 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
1772 | inode->i_ino = fake_ino(0, PROC_TGID_INO); | ||
1773 | ei->pde = NULL; | ||
1774 | inode->i_mode = S_IFLNK|S_IRWXUGO; | ||
1775 | inode->i_uid = inode->i_gid = 0; | ||
1776 | inode->i_size = 64; | ||
1777 | inode->i_op = &proc_self_inode_operations; | ||
1778 | d_add(dentry, inode); | ||
1779 | return NULL; | ||
1780 | } | ||
1781 | tgid = name_to_int(dentry); | ||
1782 | if (tgid == ~0U) | ||
1783 | goto out; | ||
1784 | |||
1785 | read_lock(&tasklist_lock); | ||
1786 | task = find_task_by_pid(tgid); | ||
1787 | if (task) | ||
1788 | get_task_struct(task); | ||
1789 | read_unlock(&tasklist_lock); | ||
1790 | if (!task) | ||
1791 | goto out; | ||
1792 | |||
1793 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); | ||
1794 | |||
1795 | |||
1796 | if (!inode) { | ||
1797 | put_task_struct(task); | ||
1798 | goto out; | ||
1799 | } | ||
1800 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; | ||
1801 | inode->i_op = &proc_tgid_base_inode_operations; | ||
1802 | inode->i_fop = &proc_tgid_base_operations; | ||
1803 | inode->i_nlink = 3; | ||
1804 | inode->i_flags|=S_IMMUTABLE; | ||
1805 | |||
1806 | dentry->d_op = &pid_base_dentry_operations; | ||
1807 | |||
1808 | died = 0; | ||
1809 | d_add(dentry, inode); | ||
1810 | spin_lock(&task->proc_lock); | ||
1811 | task->proc_dentry = dentry; | ||
1812 | if (!pid_alive(task)) { | ||
1813 | dentry = proc_pid_unhash(task); | ||
1814 | died = 1; | ||
1815 | } | ||
1816 | spin_unlock(&task->proc_lock); | ||
1817 | |||
1818 | put_task_struct(task); | ||
1819 | if (died) { | ||
1820 | proc_pid_flush(dentry); | ||
1821 | goto out; | ||
1822 | } | ||
1823 | return NULL; | ||
1824 | out: | ||
1825 | return ERR_PTR(-ENOENT); | ||
1826 | } | ||
1827 | |||
1828 | /* SMP-safe */ | ||
1829 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | ||
1830 | { | ||
1831 | struct task_struct *task; | ||
1832 | struct task_struct *leader = proc_task(dir); | ||
1833 | struct inode *inode; | ||
1834 | unsigned tid; | ||
1835 | |||
1836 | tid = name_to_int(dentry); | ||
1837 | if (tid == ~0U) | ||
1838 | goto out; | ||
1839 | |||
1840 | read_lock(&tasklist_lock); | ||
1841 | task = find_task_by_pid(tid); | ||
1842 | if (task) | ||
1843 | get_task_struct(task); | ||
1844 | read_unlock(&tasklist_lock); | ||
1845 | if (!task) | ||
1846 | goto out; | ||
1847 | if (leader->tgid != task->tgid) | ||
1848 | goto out_drop_task; | ||
1849 | |||
1850 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); | ||
1851 | |||
1852 | |||
1853 | if (!inode) | ||
1854 | goto out_drop_task; | ||
1855 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; | ||
1856 | inode->i_op = &proc_tid_base_inode_operations; | ||
1857 | inode->i_fop = &proc_tid_base_operations; | ||
1858 | inode->i_nlink = 3; | ||
1859 | inode->i_flags|=S_IMMUTABLE; | ||
1860 | |||
1861 | dentry->d_op = &pid_base_dentry_operations; | ||
1862 | |||
1863 | d_add(dentry, inode); | ||
1864 | |||
1865 | put_task_struct(task); | ||
1866 | return NULL; | ||
1867 | out_drop_task: | ||
1868 | put_task_struct(task); | ||
1869 | out: | ||
1870 | return ERR_PTR(-ENOENT); | ||
1871 | } | ||
1872 | |||
1873 | #define PROC_NUMBUF 10 | ||
1874 | #define PROC_MAXPIDS 20 | ||
1875 | |||
1876 | /* | ||
1877 | * Get a few tgid's to return for filldir - we need to hold the | ||
1878 | * tasklist lock while doing this, and we must release it before | ||
1879 | * we actually do the filldir itself, so we use a temp buffer.. | ||
1880 | */ | ||
1881 | static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) | ||
1882 | { | ||
1883 | struct task_struct *p; | ||
1884 | int nr_tgids = 0; | ||
1885 | |||
1886 | index--; | ||
1887 | read_lock(&tasklist_lock); | ||
1888 | p = NULL; | ||
1889 | if (version) { | ||
1890 | p = find_task_by_pid(version); | ||
1891 | if (p && !thread_group_leader(p)) | ||
1892 | p = NULL; | ||
1893 | } | ||
1894 | |||
1895 | if (p) | ||
1896 | index = 0; | ||
1897 | else | ||
1898 | p = next_task(&init_task); | ||
1899 | |||
1900 | for ( ; p != &init_task; p = next_task(p)) { | ||
1901 | int tgid = p->pid; | ||
1902 | if (!pid_alive(p)) | ||
1903 | continue; | ||
1904 | if (--index >= 0) | ||
1905 | continue; | ||
1906 | tgids[nr_tgids] = tgid; | ||
1907 | nr_tgids++; | ||
1908 | if (nr_tgids >= PROC_MAXPIDS) | ||
1909 | break; | ||
1910 | } | ||
1911 | read_unlock(&tasklist_lock); | ||
1912 | return nr_tgids; | ||
1913 | } | ||
1914 | |||
1915 | /* | ||
1916 | * Get a few tid's to return for filldir - we need to hold the | ||
1917 | * tasklist lock while doing this, and we must release it before | ||
1918 | * we actually do the filldir itself, so we use a temp buffer.. | ||
1919 | */ | ||
1920 | static int get_tid_list(int index, unsigned int *tids, struct inode *dir) | ||
1921 | { | ||
1922 | struct task_struct *leader_task = proc_task(dir); | ||
1923 | struct task_struct *task = leader_task; | ||
1924 | int nr_tids = 0; | ||
1925 | |||
1926 | index -= 2; | ||
1927 | read_lock(&tasklist_lock); | ||
1928 | /* | ||
1929 | * The starting point task (leader_task) might be an already | ||
1930 | * unlinked task, which cannot be used to access the task-list | ||
1931 | * via next_thread(). | ||
1932 | */ | ||
1933 | if (pid_alive(task)) do { | ||
1934 | int tid = task->pid; | ||
1935 | |||
1936 | if (--index >= 0) | ||
1937 | continue; | ||
1938 | tids[nr_tids] = tid; | ||
1939 | nr_tids++; | ||
1940 | if (nr_tids >= PROC_MAXPIDS) | ||
1941 | break; | ||
1942 | } while ((task = next_thread(task)) != leader_task); | ||
1943 | read_unlock(&tasklist_lock); | ||
1944 | return nr_tids; | ||
1945 | } | ||
1946 | |||
1947 | /* for the /proc/ directory itself, after non-process stuff has been done */ | ||
1948 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | ||
1949 | { | ||
1950 | unsigned int tgid_array[PROC_MAXPIDS]; | ||
1951 | char buf[PROC_NUMBUF]; | ||
1952 | unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; | ||
1953 | unsigned int nr_tgids, i; | ||
1954 | int next_tgid; | ||
1955 | |||
1956 | if (!nr) { | ||
1957 | ino_t ino = fake_ino(0,PROC_TGID_INO); | ||
1958 | if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0) | ||
1959 | return 0; | ||
1960 | filp->f_pos++; | ||
1961 | nr++; | ||
1962 | } | ||
1963 | |||
1964 | /* f_version caches the tgid value that the last readdir call couldn't | ||
1965 | * return. lseek aka telldir automagically resets f_version to 0. | ||
1966 | */ | ||
1967 | next_tgid = filp->f_version; | ||
1968 | filp->f_version = 0; | ||
1969 | for (;;) { | ||
1970 | nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); | ||
1971 | if (!nr_tgids) { | ||
1972 | /* no more entries ! */ | ||
1973 | break; | ||
1974 | } | ||
1975 | next_tgid = 0; | ||
1976 | |||
1977 | /* do not use the last found pid, reserve it for next_tgid */ | ||
1978 | if (nr_tgids == PROC_MAXPIDS) { | ||
1979 | nr_tgids--; | ||
1980 | next_tgid = tgid_array[nr_tgids]; | ||
1981 | } | ||
1982 | |||
1983 | for (i=0;i<nr_tgids;i++) { | ||
1984 | int tgid = tgid_array[i]; | ||
1985 | ino_t ino = fake_ino(tgid,PROC_TGID_INO); | ||
1986 | unsigned long j = PROC_NUMBUF; | ||
1987 | |||
1988 | do | ||
1989 | buf[--j] = '0' + (tgid % 10); | ||
1990 | while ((tgid /= 10) != 0); | ||
1991 | |||
1992 | if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { | ||
1993 | /* returning this tgid failed, save it as the first | ||
1994 | * pid for the next readir call */ | ||
1995 | filp->f_version = tgid_array[i]; | ||
1996 | goto out; | ||
1997 | } | ||
1998 | filp->f_pos++; | ||
1999 | nr++; | ||
2000 | } | ||
2001 | } | ||
2002 | out: | ||
2003 | return 0; | ||
2004 | } | ||
2005 | |||
2006 | /* for the /proc/TGID/task/ directories */ | ||
2007 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) | ||
2008 | { | ||
2009 | unsigned int tid_array[PROC_MAXPIDS]; | ||
2010 | char buf[PROC_NUMBUF]; | ||
2011 | unsigned int nr_tids, i; | ||
2012 | struct dentry *dentry = filp->f_dentry; | ||
2013 | struct inode *inode = dentry->d_inode; | ||
2014 | int retval = -ENOENT; | ||
2015 | ino_t ino; | ||
2016 | unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ | ||
2017 | |||
2018 | if (!pid_alive(proc_task(inode))) | ||
2019 | goto out; | ||
2020 | retval = 0; | ||
2021 | |||
2022 | switch (pos) { | ||
2023 | case 0: | ||
2024 | ino = inode->i_ino; | ||
2025 | if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) | ||
2026 | goto out; | ||
2027 | pos++; | ||
2028 | /* fall through */ | ||
2029 | case 1: | ||
2030 | ino = parent_ino(dentry); | ||
2031 | if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) | ||
2032 | goto out; | ||
2033 | pos++; | ||
2034 | /* fall through */ | ||
2035 | } | ||
2036 | |||
2037 | nr_tids = get_tid_list(pos, tid_array, inode); | ||
2038 | |||
2039 | for (i = 0; i < nr_tids; i++) { | ||
2040 | unsigned long j = PROC_NUMBUF; | ||
2041 | int tid = tid_array[i]; | ||
2042 | |||
2043 | ino = fake_ino(tid,PROC_TID_INO); | ||
2044 | |||
2045 | do | ||
2046 | buf[--j] = '0' + (tid % 10); | ||
2047 | while ((tid /= 10) != 0); | ||
2048 | |||
2049 | if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) | ||
2050 | break; | ||
2051 | pos++; | ||
2052 | } | ||
2053 | out: | ||
2054 | filp->f_pos = pos; | ||
2055 | return retval; | ||
2056 | } | ||