aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2015-01-10 17:53:21 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2015-01-25 23:17:28 -0500
commit59eda0e07f43c950d31756213b607af673e551f0 (patch)
treef40f7b67133576c36a65a4cba9aca5df68d00f34
parentfdab684d7202774bfd8762d4a656a553b787c8ec (diff)
new fs_pin killing logics
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/fs_pin.c54
-rw-r--r--include/linux/fs_pin.h13
-rw-r--r--include/linux/pid_namespace.h4
-rw-r--r--kernel/acct.c81
4 files changed, 96 insertions, 56 deletions
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index 50ef7d2ef03c..0c77bdc238b2 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -1,4 +1,5 @@
1#include <linux/fs.h> 1#include <linux/fs.h>
2#include <linux/sched.h>
2#include <linux/slab.h> 3#include <linux/slab.h>
3#include <linux/fs_pin.h> 4#include <linux/fs_pin.h>
4#include "internal.h" 5#include "internal.h"
@@ -12,6 +13,10 @@ void pin_remove(struct fs_pin *pin)
12 hlist_del(&pin->m_list); 13 hlist_del(&pin->m_list);
13 hlist_del(&pin->s_list); 14 hlist_del(&pin->s_list);
14 spin_unlock(&pin_lock); 15 spin_unlock(&pin_lock);
16 spin_lock_irq(&pin->wait.lock);
17 pin->done = 1;
18 wake_up_locked(&pin->wait);
19 spin_unlock_irq(&pin->wait.lock);
15} 20}
16 21
17void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p) 22void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p)
@@ -28,19 +33,58 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m)
28 pin_insert_group(pin, m, &m->mnt_sb->s_pins); 33 pin_insert_group(pin, m, &m->mnt_sb->s_pins);
29} 34}
30 35
36void pin_kill(struct fs_pin *p)
37{
38 wait_queue_t wait;
39
40 if (!p) {
41 rcu_read_unlock();
42 return;
43 }
44 init_wait(&wait);
45 spin_lock_irq(&p->wait.lock);
46 if (likely(!p->done)) {
47 p->done = -1;
48 spin_unlock_irq(&p->wait.lock);
49 rcu_read_unlock();
50 p->kill(p);
51 return;
52 }
53 if (p->done > 0) {
54 spin_unlock_irq(&p->wait.lock);
55 rcu_read_unlock();
56 return;
57 }
58 __add_wait_queue(&p->wait, &wait);
59 while (1) {
60 set_current_state(TASK_UNINTERRUPTIBLE);
61 spin_unlock_irq(&p->wait.lock);
62 rcu_read_unlock();
63 schedule();
64 rcu_read_lock();
65 if (likely(list_empty(&wait.task_list)))
66 break;
67 /* OK, we know p couldn't have been freed yet */
68 spin_lock_irq(&p->wait.lock);
69 if (p->done > 0) {
70 spin_unlock_irq(&p->wait.lock);
71 break;
72 }
73 }
74 rcu_read_unlock();
75}
76
31void mnt_pin_kill(struct mount *m) 77void mnt_pin_kill(struct mount *m)
32{ 78{
33 while (1) { 79 while (1) {
34 struct hlist_node *p; 80 struct hlist_node *p;
35 struct fs_pin *pin;
36 rcu_read_lock(); 81 rcu_read_lock();
37 p = ACCESS_ONCE(m->mnt_pins.first); 82 p = ACCESS_ONCE(m->mnt_pins.first);
38 if (!p) { 83 if (!p) {
39 rcu_read_unlock(); 84 rcu_read_unlock();
40 break; 85 break;
41 } 86 }
42 pin = hlist_entry(p, struct fs_pin, m_list); 87 pin_kill(hlist_entry(p, struct fs_pin, m_list));
43 pin->kill(pin);
44 } 88 }
45} 89}
46 90
@@ -48,14 +92,12 @@ void group_pin_kill(struct hlist_head *p)
48{ 92{
49 while (1) { 93 while (1) {
50 struct hlist_node *q; 94 struct hlist_node *q;
51 struct fs_pin *pin;
52 rcu_read_lock(); 95 rcu_read_lock();
53 q = ACCESS_ONCE(p->first); 96 q = ACCESS_ONCE(p->first);
54 if (!q) { 97 if (!q) {
55 rcu_read_unlock(); 98 rcu_read_unlock();
56 break; 99 break;
57 } 100 }
58 pin = hlist_entry(q, struct fs_pin, s_list); 101 pin_kill(hlist_entry(q, struct fs_pin, s_list));
59 pin->kill(pin);
60 } 102 }
61} 103}
diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h
index 2be38d1464ae..9dc4e0384bfb 100644
--- a/include/linux/fs_pin.h
+++ b/include/linux/fs_pin.h
@@ -1,11 +1,22 @@
1#include <linux/fs.h> 1#include <linux/wait.h>
2 2
3struct fs_pin { 3struct fs_pin {
4 wait_queue_head_t wait;
5 int done;
4 struct hlist_node s_list; 6 struct hlist_node s_list;
5 struct hlist_node m_list; 7 struct hlist_node m_list;
6 void (*kill)(struct fs_pin *); 8 void (*kill)(struct fs_pin *);
7}; 9};
8 10
11struct vfsmount;
12
13static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *))
14{
15 init_waitqueue_head(&p->wait);
16 p->kill = kill;
17}
18
9void pin_remove(struct fs_pin *); 19void pin_remove(struct fs_pin *);
10void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *); 20void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *);
11void pin_insert(struct fs_pin *, struct vfsmount *); 21void pin_insert(struct fs_pin *, struct vfsmount *);
22void pin_kill(struct fs_pin *);
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index b9cf6c51b181..918b117a7cd3 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -19,7 +19,7 @@ struct pidmap {
19#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) 19#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
20#define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE) 20#define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE)
21 21
22struct bsd_acct_struct; 22struct fs_pin;
23 23
24struct pid_namespace { 24struct pid_namespace {
25 struct kref kref; 25 struct kref kref;
@@ -37,7 +37,7 @@ struct pid_namespace {
37 struct dentry *proc_thread_self; 37 struct dentry *proc_thread_self;
38#endif 38#endif
39#ifdef CONFIG_BSD_PROCESS_ACCT 39#ifdef CONFIG_BSD_PROCESS_ACCT
40 struct bsd_acct_struct *bacct; 40 struct fs_pin *bacct;
41#endif 41#endif
42 struct user_namespace *user_ns; 42 struct user_namespace *user_ns;
43 struct work_struct proc_work; 43 struct work_struct proc_work;
diff --git a/kernel/acct.c b/kernel/acct.c
index cf6588ab517b..e6c10d1a4058 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -76,7 +76,6 @@ int acct_parm[3] = {4, 2, 30};
76/* 76/*
77 * External references and all of the globals. 77 * External references and all of the globals.
78 */ 78 */
79static void do_acct_process(struct bsd_acct_struct *acct);
80 79
81struct bsd_acct_struct { 80struct bsd_acct_struct {
82 struct fs_pin pin; 81 struct fs_pin pin;
@@ -91,6 +90,8 @@ struct bsd_acct_struct {
91 struct completion done; 90 struct completion done;
92}; 91};
93 92
93static void do_acct_process(struct bsd_acct_struct *acct);
94
94/* 95/*
95 * Check the amount of free space and suspend/resume accordingly. 96 * Check the amount of free space and suspend/resume accordingly.
96 */ 97 */
@@ -132,13 +133,18 @@ static void acct_put(struct bsd_acct_struct *p)
132 kfree_rcu(p, rcu); 133 kfree_rcu(p, rcu);
133} 134}
134 135
136static inline struct bsd_acct_struct *to_acct(struct fs_pin *p)
137{
138 return p ? container_of(p, struct bsd_acct_struct, pin) : NULL;
139}
140
135static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) 141static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
136{ 142{
137 struct bsd_acct_struct *res; 143 struct bsd_acct_struct *res;
138again: 144again:
139 smp_rmb(); 145 smp_rmb();
140 rcu_read_lock(); 146 rcu_read_lock();
141 res = ACCESS_ONCE(ns->bacct); 147 res = to_acct(ACCESS_ONCE(ns->bacct));
142 if (!res) { 148 if (!res) {
143 rcu_read_unlock(); 149 rcu_read_unlock();
144 return NULL; 150 return NULL;
@@ -150,7 +156,7 @@ again:
150 } 156 }
151 rcu_read_unlock(); 157 rcu_read_unlock();
152 mutex_lock(&res->lock); 158 mutex_lock(&res->lock);
153 if (!res->ns) { 159 if (res != to_acct(ACCESS_ONCE(ns->bacct))) {
154 mutex_unlock(&res->lock); 160 mutex_unlock(&res->lock);
155 acct_put(res); 161 acct_put(res);
156 goto again; 162 goto again;
@@ -158,6 +164,19 @@ again:
158 return res; 164 return res;
159} 165}
160 166
167static void acct_pin_kill(struct fs_pin *pin)
168{
169 struct bsd_acct_struct *acct = to_acct(pin);
170 mutex_lock(&acct->lock);
171 do_acct_process(acct);
172 schedule_work(&acct->work);
173 wait_for_completion(&acct->done);
174 cmpxchg(&acct->ns->bacct, pin, NULL);
175 mutex_unlock(&acct->lock);
176 pin_remove(pin);
177 acct_put(acct);
178}
179
161static void close_work(struct work_struct *work) 180static void close_work(struct work_struct *work)
162{ 181{
163 struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); 182 struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
@@ -168,49 +187,13 @@ static void close_work(struct work_struct *work)
168 complete(&acct->done); 187 complete(&acct->done);
169} 188}
170 189
171static void acct_kill(struct bsd_acct_struct *acct)
172{
173 if (acct) {
174 struct pid_namespace *ns = acct->ns;
175 do_acct_process(acct);
176 INIT_WORK(&acct->work, close_work);
177 init_completion(&acct->done);
178 schedule_work(&acct->work);
179 wait_for_completion(&acct->done);
180 pin_remove(&acct->pin);
181 cmpxchg(&ns->bacct, acct, NULL);
182 acct->ns = NULL;
183 atomic_long_dec(&acct->count);
184 mutex_unlock(&acct->lock);
185 acct_put(acct);
186 }
187}
188
189static void acct_pin_kill(struct fs_pin *pin)
190{
191 struct bsd_acct_struct *acct;
192 acct = container_of(pin, struct bsd_acct_struct, pin);
193 if (!atomic_long_inc_not_zero(&acct->count)) {
194 rcu_read_unlock();
195 cpu_relax();
196 return;
197 }
198 rcu_read_unlock();
199 mutex_lock(&acct->lock);
200 if (!acct->ns) {
201 mutex_unlock(&acct->lock);
202 acct_put(acct);
203 acct = NULL;
204 }
205 acct_kill(acct);
206}
207
208static int acct_on(struct filename *pathname) 190static int acct_on(struct filename *pathname)
209{ 191{
210 struct file *file; 192 struct file *file;
211 struct vfsmount *mnt, *internal; 193 struct vfsmount *mnt, *internal;
212 struct pid_namespace *ns = task_active_pid_ns(current); 194 struct pid_namespace *ns = task_active_pid_ns(current);
213 struct bsd_acct_struct *acct, *old; 195 struct bsd_acct_struct *acct;
196 struct fs_pin *old;
214 int err; 197 int err;
215 198
216 acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); 199 acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
@@ -252,18 +235,20 @@ static int acct_on(struct filename *pathname)
252 file->f_path.mnt = internal; 235 file->f_path.mnt = internal;
253 236
254 atomic_long_set(&acct->count, 1); 237 atomic_long_set(&acct->count, 1);
255 acct->pin.kill = acct_pin_kill; 238 init_fs_pin(&acct->pin, acct_pin_kill);
256 acct->file = file; 239 acct->file = file;
257 acct->needcheck = jiffies; 240 acct->needcheck = jiffies;
258 acct->ns = ns; 241 acct->ns = ns;
259 mutex_init(&acct->lock); 242 mutex_init(&acct->lock);
243 INIT_WORK(&acct->work, close_work);
244 init_completion(&acct->done);
260 mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ 245 mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */
261 pin_insert(&acct->pin, mnt); 246 pin_insert(&acct->pin, mnt);
262 247
263 old = acct_get(ns); 248 rcu_read_lock();
264 ns->bacct = acct; 249 old = xchg(&ns->bacct, &acct->pin);
265 acct_kill(old);
266 mutex_unlock(&acct->lock); 250 mutex_unlock(&acct->lock);
251 pin_kill(old);
267 mnt_drop_write(mnt); 252 mnt_drop_write(mnt);
268 mntput(mnt); 253 mntput(mnt);
269 return 0; 254 return 0;
@@ -299,7 +284,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
299 mutex_unlock(&acct_on_mutex); 284 mutex_unlock(&acct_on_mutex);
300 putname(tmp); 285 putname(tmp);
301 } else { 286 } else {
302 acct_kill(acct_get(task_active_pid_ns(current))); 287 rcu_read_lock();
288 pin_kill(task_active_pid_ns(current)->bacct);
303 } 289 }
304 290
305 return error; 291 return error;
@@ -307,7 +293,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
307 293
308void acct_exit_ns(struct pid_namespace *ns) 294void acct_exit_ns(struct pid_namespace *ns)
309{ 295{
310 acct_kill(acct_get(ns)); 296 rcu_read_lock();
297 pin_kill(ns->bacct);
311} 298}
312 299
313/* 300/*