diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2015-01-10 17:53:21 -0500 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2015-01-25 23:17:28 -0500 |
commit | 59eda0e07f43c950d31756213b607af673e551f0 (patch) | |
tree | f40f7b67133576c36a65a4cba9aca5df68d00f34 | |
parent | fdab684d7202774bfd8762d4a656a553b787c8ec (diff) |
new fs_pin killing logics
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/fs_pin.c | 54 | ||||
-rw-r--r-- | include/linux/fs_pin.h | 13 | ||||
-rw-r--r-- | include/linux/pid_namespace.h | 4 | ||||
-rw-r--r-- | kernel/acct.c | 81 |
4 files changed, 96 insertions, 56 deletions
diff --git a/fs/fs_pin.c b/fs/fs_pin.c index 50ef7d2ef03c..0c77bdc238b2 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c | |||
@@ -1,4 +1,5 @@ | |||
1 | #include <linux/fs.h> | 1 | #include <linux/fs.h> |
2 | #include <linux/sched.h> | ||
2 | #include <linux/slab.h> | 3 | #include <linux/slab.h> |
3 | #include <linux/fs_pin.h> | 4 | #include <linux/fs_pin.h> |
4 | #include "internal.h" | 5 | #include "internal.h" |
@@ -12,6 +13,10 @@ void pin_remove(struct fs_pin *pin) | |||
12 | hlist_del(&pin->m_list); | 13 | hlist_del(&pin->m_list); |
13 | hlist_del(&pin->s_list); | 14 | hlist_del(&pin->s_list); |
14 | spin_unlock(&pin_lock); | 15 | spin_unlock(&pin_lock); |
16 | spin_lock_irq(&pin->wait.lock); | ||
17 | pin->done = 1; | ||
18 | wake_up_locked(&pin->wait); | ||
19 | spin_unlock_irq(&pin->wait.lock); | ||
15 | } | 20 | } |
16 | 21 | ||
17 | void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p) | 22 | void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p) |
@@ -28,19 +33,58 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m) | |||
28 | pin_insert_group(pin, m, &m->mnt_sb->s_pins); | 33 | pin_insert_group(pin, m, &m->mnt_sb->s_pins); |
29 | } | 34 | } |
30 | 35 | ||
36 | void pin_kill(struct fs_pin *p) | ||
37 | { | ||
38 | wait_queue_t wait; | ||
39 | |||
40 | if (!p) { | ||
41 | rcu_read_unlock(); | ||
42 | return; | ||
43 | } | ||
44 | init_wait(&wait); | ||
45 | spin_lock_irq(&p->wait.lock); | ||
46 | if (likely(!p->done)) { | ||
47 | p->done = -1; | ||
48 | spin_unlock_irq(&p->wait.lock); | ||
49 | rcu_read_unlock(); | ||
50 | p->kill(p); | ||
51 | return; | ||
52 | } | ||
53 | if (p->done > 0) { | ||
54 | spin_unlock_irq(&p->wait.lock); | ||
55 | rcu_read_unlock(); | ||
56 | return; | ||
57 | } | ||
58 | __add_wait_queue(&p->wait, &wait); | ||
59 | while (1) { | ||
60 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
61 | spin_unlock_irq(&p->wait.lock); | ||
62 | rcu_read_unlock(); | ||
63 | schedule(); | ||
64 | rcu_read_lock(); | ||
65 | if (likely(list_empty(&wait.task_list))) | ||
66 | break; | ||
67 | /* OK, we know p couldn't have been freed yet */ | ||
68 | spin_lock_irq(&p->wait.lock); | ||
69 | if (p->done > 0) { | ||
70 | spin_unlock_irq(&p->wait.lock); | ||
71 | break; | ||
72 | } | ||
73 | } | ||
74 | rcu_read_unlock(); | ||
75 | } | ||
76 | |||
31 | void mnt_pin_kill(struct mount *m) | 77 | void mnt_pin_kill(struct mount *m) |
32 | { | 78 | { |
33 | while (1) { | 79 | while (1) { |
34 | struct hlist_node *p; | 80 | struct hlist_node *p; |
35 | struct fs_pin *pin; | ||
36 | rcu_read_lock(); | 81 | rcu_read_lock(); |
37 | p = ACCESS_ONCE(m->mnt_pins.first); | 82 | p = ACCESS_ONCE(m->mnt_pins.first); |
38 | if (!p) { | 83 | if (!p) { |
39 | rcu_read_unlock(); | 84 | rcu_read_unlock(); |
40 | break; | 85 | break; |
41 | } | 86 | } |
42 | pin = hlist_entry(p, struct fs_pin, m_list); | 87 | pin_kill(hlist_entry(p, struct fs_pin, m_list)); |
43 | pin->kill(pin); | ||
44 | } | 88 | } |
45 | } | 89 | } |
46 | 90 | ||
@@ -48,14 +92,12 @@ void group_pin_kill(struct hlist_head *p) | |||
48 | { | 92 | { |
49 | while (1) { | 93 | while (1) { |
50 | struct hlist_node *q; | 94 | struct hlist_node *q; |
51 | struct fs_pin *pin; | ||
52 | rcu_read_lock(); | 95 | rcu_read_lock(); |
53 | q = ACCESS_ONCE(p->first); | 96 | q = ACCESS_ONCE(p->first); |
54 | if (!q) { | 97 | if (!q) { |
55 | rcu_read_unlock(); | 98 | rcu_read_unlock(); |
56 | break; | 99 | break; |
57 | } | 100 | } |
58 | pin = hlist_entry(q, struct fs_pin, s_list); | 101 | pin_kill(hlist_entry(q, struct fs_pin, s_list)); |
59 | pin->kill(pin); | ||
60 | } | 102 | } |
61 | } | 103 | } |
diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index 2be38d1464ae..9dc4e0384bfb 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h | |||
@@ -1,11 +1,22 @@ | |||
1 | #include <linux/fs.h> | 1 | #include <linux/wait.h> |
2 | 2 | ||
3 | struct fs_pin { | 3 | struct fs_pin { |
4 | wait_queue_head_t wait; | ||
5 | int done; | ||
4 | struct hlist_node s_list; | 6 | struct hlist_node s_list; |
5 | struct hlist_node m_list; | 7 | struct hlist_node m_list; |
6 | void (*kill)(struct fs_pin *); | 8 | void (*kill)(struct fs_pin *); |
7 | }; | 9 | }; |
8 | 10 | ||
11 | struct vfsmount; | ||
12 | |||
13 | static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *)) | ||
14 | { | ||
15 | init_waitqueue_head(&p->wait); | ||
16 | p->kill = kill; | ||
17 | } | ||
18 | |||
9 | void pin_remove(struct fs_pin *); | 19 | void pin_remove(struct fs_pin *); |
10 | void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *); | 20 | void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *); |
11 | void pin_insert(struct fs_pin *, struct vfsmount *); | 21 | void pin_insert(struct fs_pin *, struct vfsmount *); |
22 | void pin_kill(struct fs_pin *); | ||
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index b9cf6c51b181..918b117a7cd3 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h | |||
@@ -19,7 +19,7 @@ struct pidmap { | |||
19 | #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) | 19 | #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) |
20 | #define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE) | 20 | #define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE) |
21 | 21 | ||
22 | struct bsd_acct_struct; | 22 | struct fs_pin; |
23 | 23 | ||
24 | struct pid_namespace { | 24 | struct pid_namespace { |
25 | struct kref kref; | 25 | struct kref kref; |
@@ -37,7 +37,7 @@ struct pid_namespace { | |||
37 | struct dentry *proc_thread_self; | 37 | struct dentry *proc_thread_self; |
38 | #endif | 38 | #endif |
39 | #ifdef CONFIG_BSD_PROCESS_ACCT | 39 | #ifdef CONFIG_BSD_PROCESS_ACCT |
40 | struct bsd_acct_struct *bacct; | 40 | struct fs_pin *bacct; |
41 | #endif | 41 | #endif |
42 | struct user_namespace *user_ns; | 42 | struct user_namespace *user_ns; |
43 | struct work_struct proc_work; | 43 | struct work_struct proc_work; |
diff --git a/kernel/acct.c b/kernel/acct.c index cf6588ab517b..e6c10d1a4058 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -76,7 +76,6 @@ int acct_parm[3] = {4, 2, 30}; | |||
76 | /* | 76 | /* |
77 | * External references and all of the globals. | 77 | * External references and all of the globals. |
78 | */ | 78 | */ |
79 | static void do_acct_process(struct bsd_acct_struct *acct); | ||
80 | 79 | ||
81 | struct bsd_acct_struct { | 80 | struct bsd_acct_struct { |
82 | struct fs_pin pin; | 81 | struct fs_pin pin; |
@@ -91,6 +90,8 @@ struct bsd_acct_struct { | |||
91 | struct completion done; | 90 | struct completion done; |
92 | }; | 91 | }; |
93 | 92 | ||
93 | static void do_acct_process(struct bsd_acct_struct *acct); | ||
94 | |||
94 | /* | 95 | /* |
95 | * Check the amount of free space and suspend/resume accordingly. | 96 | * Check the amount of free space and suspend/resume accordingly. |
96 | */ | 97 | */ |
@@ -132,13 +133,18 @@ static void acct_put(struct bsd_acct_struct *p) | |||
132 | kfree_rcu(p, rcu); | 133 | kfree_rcu(p, rcu); |
133 | } | 134 | } |
134 | 135 | ||
136 | static inline struct bsd_acct_struct *to_acct(struct fs_pin *p) | ||
137 | { | ||
138 | return p ? container_of(p, struct bsd_acct_struct, pin) : NULL; | ||
139 | } | ||
140 | |||
135 | static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) | 141 | static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) |
136 | { | 142 | { |
137 | struct bsd_acct_struct *res; | 143 | struct bsd_acct_struct *res; |
138 | again: | 144 | again: |
139 | smp_rmb(); | 145 | smp_rmb(); |
140 | rcu_read_lock(); | 146 | rcu_read_lock(); |
141 | res = ACCESS_ONCE(ns->bacct); | 147 | res = to_acct(ACCESS_ONCE(ns->bacct)); |
142 | if (!res) { | 148 | if (!res) { |
143 | rcu_read_unlock(); | 149 | rcu_read_unlock(); |
144 | return NULL; | 150 | return NULL; |
@@ -150,7 +156,7 @@ again: | |||
150 | } | 156 | } |
151 | rcu_read_unlock(); | 157 | rcu_read_unlock(); |
152 | mutex_lock(&res->lock); | 158 | mutex_lock(&res->lock); |
153 | if (!res->ns) { | 159 | if (res != to_acct(ACCESS_ONCE(ns->bacct))) { |
154 | mutex_unlock(&res->lock); | 160 | mutex_unlock(&res->lock); |
155 | acct_put(res); | 161 | acct_put(res); |
156 | goto again; | 162 | goto again; |
@@ -158,6 +164,19 @@ again: | |||
158 | return res; | 164 | return res; |
159 | } | 165 | } |
160 | 166 | ||
167 | static void acct_pin_kill(struct fs_pin *pin) | ||
168 | { | ||
169 | struct bsd_acct_struct *acct = to_acct(pin); | ||
170 | mutex_lock(&acct->lock); | ||
171 | do_acct_process(acct); | ||
172 | schedule_work(&acct->work); | ||
173 | wait_for_completion(&acct->done); | ||
174 | cmpxchg(&acct->ns->bacct, pin, NULL); | ||
175 | mutex_unlock(&acct->lock); | ||
176 | pin_remove(pin); | ||
177 | acct_put(acct); | ||
178 | } | ||
179 | |||
161 | static void close_work(struct work_struct *work) | 180 | static void close_work(struct work_struct *work) |
162 | { | 181 | { |
163 | struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); | 182 | struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); |
@@ -168,49 +187,13 @@ static void close_work(struct work_struct *work) | |||
168 | complete(&acct->done); | 187 | complete(&acct->done); |
169 | } | 188 | } |
170 | 189 | ||
171 | static void acct_kill(struct bsd_acct_struct *acct) | ||
172 | { | ||
173 | if (acct) { | ||
174 | struct pid_namespace *ns = acct->ns; | ||
175 | do_acct_process(acct); | ||
176 | INIT_WORK(&acct->work, close_work); | ||
177 | init_completion(&acct->done); | ||
178 | schedule_work(&acct->work); | ||
179 | wait_for_completion(&acct->done); | ||
180 | pin_remove(&acct->pin); | ||
181 | cmpxchg(&ns->bacct, acct, NULL); | ||
182 | acct->ns = NULL; | ||
183 | atomic_long_dec(&acct->count); | ||
184 | mutex_unlock(&acct->lock); | ||
185 | acct_put(acct); | ||
186 | } | ||
187 | } | ||
188 | |||
189 | static void acct_pin_kill(struct fs_pin *pin) | ||
190 | { | ||
191 | struct bsd_acct_struct *acct; | ||
192 | acct = container_of(pin, struct bsd_acct_struct, pin); | ||
193 | if (!atomic_long_inc_not_zero(&acct->count)) { | ||
194 | rcu_read_unlock(); | ||
195 | cpu_relax(); | ||
196 | return; | ||
197 | } | ||
198 | rcu_read_unlock(); | ||
199 | mutex_lock(&acct->lock); | ||
200 | if (!acct->ns) { | ||
201 | mutex_unlock(&acct->lock); | ||
202 | acct_put(acct); | ||
203 | acct = NULL; | ||
204 | } | ||
205 | acct_kill(acct); | ||
206 | } | ||
207 | |||
208 | static int acct_on(struct filename *pathname) | 190 | static int acct_on(struct filename *pathname) |
209 | { | 191 | { |
210 | struct file *file; | 192 | struct file *file; |
211 | struct vfsmount *mnt, *internal; | 193 | struct vfsmount *mnt, *internal; |
212 | struct pid_namespace *ns = task_active_pid_ns(current); | 194 | struct pid_namespace *ns = task_active_pid_ns(current); |
213 | struct bsd_acct_struct *acct, *old; | 195 | struct bsd_acct_struct *acct; |
196 | struct fs_pin *old; | ||
214 | int err; | 197 | int err; |
215 | 198 | ||
216 | acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); | 199 | acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); |
@@ -252,18 +235,20 @@ static int acct_on(struct filename *pathname) | |||
252 | file->f_path.mnt = internal; | 235 | file->f_path.mnt = internal; |
253 | 236 | ||
254 | atomic_long_set(&acct->count, 1); | 237 | atomic_long_set(&acct->count, 1); |
255 | acct->pin.kill = acct_pin_kill; | 238 | init_fs_pin(&acct->pin, acct_pin_kill); |
256 | acct->file = file; | 239 | acct->file = file; |
257 | acct->needcheck = jiffies; | 240 | acct->needcheck = jiffies; |
258 | acct->ns = ns; | 241 | acct->ns = ns; |
259 | mutex_init(&acct->lock); | 242 | mutex_init(&acct->lock); |
243 | INIT_WORK(&acct->work, close_work); | ||
244 | init_completion(&acct->done); | ||
260 | mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ | 245 | mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ |
261 | pin_insert(&acct->pin, mnt); | 246 | pin_insert(&acct->pin, mnt); |
262 | 247 | ||
263 | old = acct_get(ns); | 248 | rcu_read_lock(); |
264 | ns->bacct = acct; | 249 | old = xchg(&ns->bacct, &acct->pin); |
265 | acct_kill(old); | ||
266 | mutex_unlock(&acct->lock); | 250 | mutex_unlock(&acct->lock); |
251 | pin_kill(old); | ||
267 | mnt_drop_write(mnt); | 252 | mnt_drop_write(mnt); |
268 | mntput(mnt); | 253 | mntput(mnt); |
269 | return 0; | 254 | return 0; |
@@ -299,7 +284,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name) | |||
299 | mutex_unlock(&acct_on_mutex); | 284 | mutex_unlock(&acct_on_mutex); |
300 | putname(tmp); | 285 | putname(tmp); |
301 | } else { | 286 | } else { |
302 | acct_kill(acct_get(task_active_pid_ns(current))); | 287 | rcu_read_lock(); |
288 | pin_kill(task_active_pid_ns(current)->bacct); | ||
303 | } | 289 | } |
304 | 290 | ||
305 | return error; | 291 | return error; |
@@ -307,7 +293,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name) | |||
307 | 293 | ||
308 | void acct_exit_ns(struct pid_namespace *ns) | 294 | void acct_exit_ns(struct pid_namespace *ns) |
309 | { | 295 | { |
310 | acct_kill(acct_get(ns)); | 296 | rcu_read_lock(); |
297 | pin_kill(ns->bacct); | ||
311 | } | 298 | } |
312 | 299 | ||
313 | /* | 300 | /* |