diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2009-03-30 07:20:30 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2009-03-31 23:00:26 -0400 |
commit | 498052bba55ecaff58db6a1436b0e25bfd75a7ff (patch) | |
tree | bd3644ac60737e3733995a203acebd70cfd1b21b | |
parent | 3e93cd671813e204c258f1e6c797959920cf7772 (diff) |
New locking/refcounting for fs_struct
* all changes of current->fs are done under task_lock and write_lock of
old fs->lock
* refcount is not atomic anymore (same protection)
* its decrements are done when removing reference from current; at the
same time we decide whether to free it.
* put_fs_struct() is gone
* new field - ->in_exec. Set by check_unsafe_exec() if we are trying to do
execve() and only subthreads share fs_struct. Cleared when finishing exec
(success and failure alike). Makes CLONE_FS fail with -EAGAIN if set.
* check_unsafe_exec() may fail with -EAGAIN if another execve() from subthread
is in progress.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/compat.c | 16 | ||||
-rw-r--r-- | fs/exec.c | 31 | ||||
-rw-r--r-- | fs/fs_struct.c | 69 | ||||
-rw-r--r-- | fs/internal.h | 2 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 2 | ||||
-rw-r--r-- | include/linux/fs_struct.h | 8 | ||||
-rw-r--r-- | kernel/fork.c | 37 |
7 files changed, 121 insertions, 44 deletions
diff --git a/fs/compat.c b/fs/compat.c index 55efdfebdf5a..baabf203b847 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/poll.h> | 51 | #include <linux/poll.h> |
52 | #include <linux/mm.h> | 52 | #include <linux/mm.h> |
53 | #include <linux/eventpoll.h> | 53 | #include <linux/eventpoll.h> |
54 | #include <linux/fs_struct.h> | ||
54 | 55 | ||
55 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
56 | #include <asm/mmu_context.h> | 57 | #include <asm/mmu_context.h> |
@@ -1441,12 +1442,15 @@ int compat_do_execve(char * filename, | |||
1441 | bprm->cred = prepare_exec_creds(); | 1442 | bprm->cred = prepare_exec_creds(); |
1442 | if (!bprm->cred) | 1443 | if (!bprm->cred) |
1443 | goto out_unlock; | 1444 | goto out_unlock; |
1444 | check_unsafe_exec(bprm); | 1445 | |
1446 | retval = check_unsafe_exec(bprm); | ||
1447 | if (retval) | ||
1448 | goto out_unlock; | ||
1445 | 1449 | ||
1446 | file = open_exec(filename); | 1450 | file = open_exec(filename); |
1447 | retval = PTR_ERR(file); | 1451 | retval = PTR_ERR(file); |
1448 | if (IS_ERR(file)) | 1452 | if (IS_ERR(file)) |
1449 | goto out_unlock; | 1453 | goto out_unmark; |
1450 | 1454 | ||
1451 | sched_exec(); | 1455 | sched_exec(); |
1452 | 1456 | ||
@@ -1488,6 +1492,9 @@ int compat_do_execve(char * filename, | |||
1488 | goto out; | 1492 | goto out; |
1489 | 1493 | ||
1490 | /* execve succeeded */ | 1494 | /* execve succeeded */ |
1495 | write_lock(¤t->fs->lock); | ||
1496 | current->fs->in_exec = 0; | ||
1497 | write_unlock(¤t->fs->lock); | ||
1491 | current->in_execve = 0; | 1498 | current->in_execve = 0; |
1492 | mutex_unlock(¤t->cred_exec_mutex); | 1499 | mutex_unlock(¤t->cred_exec_mutex); |
1493 | acct_update_integrals(current); | 1500 | acct_update_integrals(current); |
@@ -1506,6 +1513,11 @@ out_file: | |||
1506 | fput(bprm->file); | 1513 | fput(bprm->file); |
1507 | } | 1514 | } |
1508 | 1515 | ||
1516 | out_unmark: | ||
1517 | write_lock(¤t->fs->lock); | ||
1518 | current->fs->in_exec = 0; | ||
1519 | write_unlock(¤t->fs->lock); | ||
1520 | |||
1509 | out_unlock: | 1521 | out_unlock: |
1510 | current->in_execve = 0; | 1522 | current->in_execve = 0; |
1511 | mutex_unlock(¤t->cred_exec_mutex); | 1523 | mutex_unlock(¤t->cred_exec_mutex); |
@@ -1056,16 +1056,18 @@ EXPORT_SYMBOL(install_exec_creds); | |||
1056 | * - the caller must hold current->cred_exec_mutex to protect against | 1056 | * - the caller must hold current->cred_exec_mutex to protect against |
1057 | * PTRACE_ATTACH | 1057 | * PTRACE_ATTACH |
1058 | */ | 1058 | */ |
1059 | void check_unsafe_exec(struct linux_binprm *bprm) | 1059 | int check_unsafe_exec(struct linux_binprm *bprm) |
1060 | { | 1060 | { |
1061 | struct task_struct *p = current, *t; | 1061 | struct task_struct *p = current, *t; |
1062 | unsigned long flags; | 1062 | unsigned long flags; |
1063 | unsigned n_fs, n_sighand; | 1063 | unsigned n_fs, n_sighand; |
1064 | int res = 0; | ||
1064 | 1065 | ||
1065 | bprm->unsafe = tracehook_unsafe_exec(p); | 1066 | bprm->unsafe = tracehook_unsafe_exec(p); |
1066 | 1067 | ||
1067 | n_fs = 1; | 1068 | n_fs = 1; |
1068 | n_sighand = 1; | 1069 | n_sighand = 1; |
1070 | write_lock(&p->fs->lock); | ||
1069 | lock_task_sighand(p, &flags); | 1071 | lock_task_sighand(p, &flags); |
1070 | for (t = next_thread(p); t != p; t = next_thread(t)) { | 1072 | for (t = next_thread(p); t != p; t = next_thread(t)) { |
1071 | if (t->fs == p->fs) | 1073 | if (t->fs == p->fs) |
@@ -1073,11 +1075,19 @@ void check_unsafe_exec(struct linux_binprm *bprm) | |||
1073 | n_sighand++; | 1075 | n_sighand++; |
1074 | } | 1076 | } |
1075 | 1077 | ||
1076 | if (atomic_read(&p->fs->count) > n_fs || | 1078 | if (p->fs->users > n_fs || |
1077 | atomic_read(&p->sighand->count) > n_sighand) | 1079 | atomic_read(&p->sighand->count) > n_sighand) { |
1078 | bprm->unsafe |= LSM_UNSAFE_SHARE; | 1080 | bprm->unsafe |= LSM_UNSAFE_SHARE; |
1081 | } else { | ||
1082 | if (p->fs->in_exec) | ||
1083 | res = -EAGAIN; | ||
1084 | p->fs->in_exec = 1; | ||
1085 | } | ||
1079 | 1086 | ||
1080 | unlock_task_sighand(p, &flags); | 1087 | unlock_task_sighand(p, &flags); |
1088 | write_unlock(&p->fs->lock); | ||
1089 | |||
1090 | return res; | ||
1081 | } | 1091 | } |
1082 | 1092 | ||
1083 | /* | 1093 | /* |
@@ -1296,12 +1306,15 @@ int do_execve(char * filename, | |||
1296 | bprm->cred = prepare_exec_creds(); | 1306 | bprm->cred = prepare_exec_creds(); |
1297 | if (!bprm->cred) | 1307 | if (!bprm->cred) |
1298 | goto out_unlock; | 1308 | goto out_unlock; |
1299 | check_unsafe_exec(bprm); | 1309 | |
1310 | retval = check_unsafe_exec(bprm); | ||
1311 | if (retval) | ||
1312 | goto out_unlock; | ||
1300 | 1313 | ||
1301 | file = open_exec(filename); | 1314 | file = open_exec(filename); |
1302 | retval = PTR_ERR(file); | 1315 | retval = PTR_ERR(file); |
1303 | if (IS_ERR(file)) | 1316 | if (IS_ERR(file)) |
1304 | goto out_unlock; | 1317 | goto out_unmark; |
1305 | 1318 | ||
1306 | sched_exec(); | 1319 | sched_exec(); |
1307 | 1320 | ||
@@ -1344,6 +1357,9 @@ int do_execve(char * filename, | |||
1344 | goto out; | 1357 | goto out; |
1345 | 1358 | ||
1346 | /* execve succeeded */ | 1359 | /* execve succeeded */ |
1360 | write_lock(¤t->fs->lock); | ||
1361 | current->fs->in_exec = 0; | ||
1362 | write_unlock(¤t->fs->lock); | ||
1347 | current->in_execve = 0; | 1363 | current->in_execve = 0; |
1348 | mutex_unlock(¤t->cred_exec_mutex); | 1364 | mutex_unlock(¤t->cred_exec_mutex); |
1349 | acct_update_integrals(current); | 1365 | acct_update_integrals(current); |
@@ -1362,6 +1378,11 @@ out_file: | |||
1362 | fput(bprm->file); | 1378 | fput(bprm->file); |
1363 | } | 1379 | } |
1364 | 1380 | ||
1381 | out_unmark: | ||
1382 | write_lock(¤t->fs->lock); | ||
1383 | current->fs->in_exec = 0; | ||
1384 | write_unlock(¤t->fs->lock); | ||
1385 | |||
1365 | out_unlock: | 1386 | out_unlock: |
1366 | current->in_execve = 0; | 1387 | current->in_execve = 0; |
1367 | mutex_unlock(¤t->cred_exec_mutex); | 1388 | mutex_unlock(¤t->cred_exec_mutex); |
diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 36e0a123bbf3..41cff72b377b 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c | |||
@@ -72,25 +72,27 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) | |||
72 | path_put(old_root); | 72 | path_put(old_root); |
73 | } | 73 | } |
74 | 74 | ||
75 | void put_fs_struct(struct fs_struct *fs) | 75 | void free_fs_struct(struct fs_struct *fs) |
76 | { | 76 | { |
77 | /* No need to hold fs->lock if we are killing it */ | 77 | path_put(&fs->root); |
78 | if (atomic_dec_and_test(&fs->count)) { | 78 | path_put(&fs->pwd); |
79 | path_put(&fs->root); | 79 | kmem_cache_free(fs_cachep, fs); |
80 | path_put(&fs->pwd); | ||
81 | kmem_cache_free(fs_cachep, fs); | ||
82 | } | ||
83 | } | 80 | } |
84 | 81 | ||
85 | void exit_fs(struct task_struct *tsk) | 82 | void exit_fs(struct task_struct *tsk) |
86 | { | 83 | { |
87 | struct fs_struct * fs = tsk->fs; | 84 | struct fs_struct *fs = tsk->fs; |
88 | 85 | ||
89 | if (fs) { | 86 | if (fs) { |
87 | int kill; | ||
90 | task_lock(tsk); | 88 | task_lock(tsk); |
89 | write_lock(&fs->lock); | ||
91 | tsk->fs = NULL; | 90 | tsk->fs = NULL; |
91 | kill = !--fs->users; | ||
92 | write_unlock(&fs->lock); | ||
92 | task_unlock(tsk); | 93 | task_unlock(tsk); |
93 | put_fs_struct(fs); | 94 | if (kill) |
95 | free_fs_struct(fs); | ||
94 | } | 96 | } |
95 | } | 97 | } |
96 | 98 | ||
@@ -99,7 +101,8 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) | |||
99 | struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); | 101 | struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); |
100 | /* We don't need to lock fs - think why ;-) */ | 102 | /* We don't need to lock fs - think why ;-) */ |
101 | if (fs) { | 103 | if (fs) { |
102 | atomic_set(&fs->count, 1); | 104 | fs->users = 1; |
105 | fs->in_exec = 0; | ||
103 | rwlock_init(&fs->lock); | 106 | rwlock_init(&fs->lock); |
104 | fs->umask = old->umask; | 107 | fs->umask = old->umask; |
105 | read_lock(&old->lock); | 108 | read_lock(&old->lock); |
@@ -114,28 +117,54 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) | |||
114 | 117 | ||
115 | int unshare_fs_struct(void) | 118 | int unshare_fs_struct(void) |
116 | { | 119 | { |
117 | struct fs_struct *fsp = copy_fs_struct(current->fs); | 120 | struct fs_struct *fs = current->fs; |
118 | if (!fsp) | 121 | struct fs_struct *new_fs = copy_fs_struct(fs); |
122 | int kill; | ||
123 | |||
124 | if (!new_fs) | ||
119 | return -ENOMEM; | 125 | return -ENOMEM; |
120 | exit_fs(current); | 126 | |
121 | current->fs = fsp; | 127 | task_lock(current); |
128 | write_lock(&fs->lock); | ||
129 | kill = !--fs->users; | ||
130 | current->fs = new_fs; | ||
131 | write_unlock(&fs->lock); | ||
132 | task_unlock(current); | ||
133 | |||
134 | if (kill) | ||
135 | free_fs_struct(fs); | ||
136 | |||
122 | return 0; | 137 | return 0; |
123 | } | 138 | } |
124 | EXPORT_SYMBOL_GPL(unshare_fs_struct); | 139 | EXPORT_SYMBOL_GPL(unshare_fs_struct); |
125 | 140 | ||
126 | /* to be mentioned only in INIT_TASK */ | 141 | /* to be mentioned only in INIT_TASK */ |
127 | struct fs_struct init_fs = { | 142 | struct fs_struct init_fs = { |
128 | .count = ATOMIC_INIT(1), | 143 | .users = 1, |
129 | .lock = __RW_LOCK_UNLOCKED(init_fs.lock), | 144 | .lock = __RW_LOCK_UNLOCKED(init_fs.lock), |
130 | .umask = 0022, | 145 | .umask = 0022, |
131 | }; | 146 | }; |
132 | 147 | ||
133 | void daemonize_fs_struct(void) | 148 | void daemonize_fs_struct(void) |
134 | { | 149 | { |
135 | struct fs_struct *fs; | 150 | struct fs_struct *fs = current->fs; |
151 | |||
152 | if (fs) { | ||
153 | int kill; | ||
154 | |||
155 | task_lock(current); | ||
136 | 156 | ||
137 | exit_fs(current); /* current->fs->count--; */ | 157 | write_lock(&init_fs.lock); |
138 | fs = &init_fs; | 158 | init_fs.users++; |
139 | current->fs = fs; | 159 | write_unlock(&init_fs.lock); |
140 | atomic_inc(&fs->count); | 160 | |
161 | write_lock(&fs->lock); | ||
162 | current->fs = &init_fs; | ||
163 | kill = !--fs->users; | ||
164 | write_unlock(&fs->lock); | ||
165 | |||
166 | task_unlock(current); | ||
167 | if (kill) | ||
168 | free_fs_struct(fs); | ||
169 | } | ||
141 | } | 170 | } |
diff --git a/fs/internal.h b/fs/internal.h index 477a105f8df3..b4dac4fb6b61 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -44,7 +44,7 @@ extern void __init chrdev_init(void); | |||
44 | /* | 44 | /* |
45 | * exec.c | 45 | * exec.c |
46 | */ | 46 | */ |
47 | extern void check_unsafe_exec(struct linux_binprm *); | 47 | extern int check_unsafe_exec(struct linux_binprm *); |
48 | 48 | ||
49 | /* | 49 | /* |
50 | * namespace.c | 50 | * namespace.c |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 343ea1216bc8..6ca01052c5bc 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -49,7 +49,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
49 | else | 49 | else |
50 | bytes += kobjsize(mm); | 50 | bytes += kobjsize(mm); |
51 | 51 | ||
52 | if (current->fs && atomic_read(¤t->fs->count) > 1) | 52 | if (current->fs && current->fs->users > 1) |
53 | sbytes += kobjsize(current->fs); | 53 | sbytes += kobjsize(current->fs); |
54 | else | 54 | else |
55 | bytes += kobjsize(current->fs); | 55 | bytes += kobjsize(current->fs); |
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 298cef1c0793..78a05bfcd8eb 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h | |||
@@ -4,12 +4,10 @@ | |||
4 | #include <linux/path.h> | 4 | #include <linux/path.h> |
5 | 5 | ||
6 | struct fs_struct { | 6 | struct fs_struct { |
7 | atomic_t count; /* This usage count is used by check_unsafe_exec() for | 7 | int users; |
8 | * security checking purposes - therefore it may not be | ||
9 | * incremented, except by clone(CLONE_FS). | ||
10 | */ | ||
11 | rwlock_t lock; | 8 | rwlock_t lock; |
12 | int umask; | 9 | int umask; |
10 | int in_exec; | ||
13 | struct path root, pwd; | 11 | struct path root, pwd; |
14 | }; | 12 | }; |
15 | 13 | ||
@@ -19,7 +17,7 @@ extern void exit_fs(struct task_struct *); | |||
19 | extern void set_fs_root(struct fs_struct *, struct path *); | 17 | extern void set_fs_root(struct fs_struct *, struct path *); |
20 | extern void set_fs_pwd(struct fs_struct *, struct path *); | 18 | extern void set_fs_pwd(struct fs_struct *, struct path *); |
21 | extern struct fs_struct *copy_fs_struct(struct fs_struct *); | 19 | extern struct fs_struct *copy_fs_struct(struct fs_struct *); |
22 | extern void put_fs_struct(struct fs_struct *); | 20 | extern void free_fs_struct(struct fs_struct *); |
23 | extern void daemonize_fs_struct(void); | 21 | extern void daemonize_fs_struct(void); |
24 | extern int unshare_fs_struct(void); | 22 | extern int unshare_fs_struct(void); |
25 | 23 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 05c02dc586b1..51f138a131de 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -683,11 +683,19 @@ fail_nomem: | |||
683 | 683 | ||
684 | static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) | 684 | static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) |
685 | { | 685 | { |
686 | struct fs_struct *fs = current->fs; | ||
686 | if (clone_flags & CLONE_FS) { | 687 | if (clone_flags & CLONE_FS) { |
687 | atomic_inc(¤t->fs->count); | 688 | /* tsk->fs is already what we want */ |
689 | write_lock(&fs->lock); | ||
690 | if (fs->in_exec) { | ||
691 | write_unlock(&fs->lock); | ||
692 | return -EAGAIN; | ||
693 | } | ||
694 | fs->users++; | ||
695 | write_unlock(&fs->lock); | ||
688 | return 0; | 696 | return 0; |
689 | } | 697 | } |
690 | tsk->fs = copy_fs_struct(current->fs); | 698 | tsk->fs = copy_fs_struct(fs); |
691 | if (!tsk->fs) | 699 | if (!tsk->fs) |
692 | return -ENOMEM; | 700 | return -ENOMEM; |
693 | return 0; | 701 | return 0; |
@@ -1518,12 +1526,16 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) | |||
1518 | { | 1526 | { |
1519 | struct fs_struct *fs = current->fs; | 1527 | struct fs_struct *fs = current->fs; |
1520 | 1528 | ||
1521 | if ((unshare_flags & CLONE_FS) && | 1529 | if (!(unshare_flags & CLONE_FS) || !fs) |
1522 | (fs && atomic_read(&fs->count) > 1)) { | 1530 | return 0; |
1523 | *new_fsp = copy_fs_struct(current->fs); | 1531 | |
1524 | if (!*new_fsp) | 1532 | /* don't need lock here; in the worst case we'll do useless copy */ |
1525 | return -ENOMEM; | 1533 | if (fs->users == 1) |
1526 | } | 1534 | return 0; |
1535 | |||
1536 | *new_fsp = copy_fs_struct(fs); | ||
1537 | if (!*new_fsp) | ||
1538 | return -ENOMEM; | ||
1527 | 1539 | ||
1528 | return 0; | 1540 | return 0; |
1529 | } | 1541 | } |
@@ -1639,8 +1651,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1639 | 1651 | ||
1640 | if (new_fs) { | 1652 | if (new_fs) { |
1641 | fs = current->fs; | 1653 | fs = current->fs; |
1654 | write_lock(&fs->lock); | ||
1642 | current->fs = new_fs; | 1655 | current->fs = new_fs; |
1643 | new_fs = fs; | 1656 | if (--fs->users) |
1657 | new_fs = NULL; | ||
1658 | else | ||
1659 | new_fs = fs; | ||
1660 | write_unlock(&fs->lock); | ||
1644 | } | 1661 | } |
1645 | 1662 | ||
1646 | if (new_mm) { | 1663 | if (new_mm) { |
@@ -1679,7 +1696,7 @@ bad_unshare_cleanup_sigh: | |||
1679 | 1696 | ||
1680 | bad_unshare_cleanup_fs: | 1697 | bad_unshare_cleanup_fs: |
1681 | if (new_fs) | 1698 | if (new_fs) |
1682 | put_fs_struct(new_fs); | 1699 | free_fs_struct(new_fs); |
1683 | 1700 | ||
1684 | bad_unshare_cleanup_thread: | 1701 | bad_unshare_cleanup_thread: |
1685 | bad_unshare_out: | 1702 | bad_unshare_out: |