aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/audit_tree.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-27 14:58:50 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-27 14:58:50 -0500
commit047ce6d380e8e66cfb6cbc22e873af89dd0c216c (patch)
tree725d5f911d34ed234a5df8b6ef07772ca6c678b9 /kernel/audit_tree.c
parenta3b5c1065f3fb934a87dd07d23def99916023d6f (diff)
parentd406db524c32ca35bd85cada28a547fff3115715 (diff)
Merge tag 'audit-pr-20181224' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit
Pull audit updates from Paul Moore: "In the finest of holiday of traditions, I have a number of gifts to share today. While most of them are re-gifts from others, unlike the typical re-gift, these are things you will want in and around your tree; I promise. This pull request is perhaps a bit larger than our typical PR, but most of it comes from Jan's rework of audit's fanotify code; a very welcome improvement. We ran this through our normal regression tests, as well as some newly created stress tests and everything looks good. Richard added a few patches, mostly cleaning up a few things and and shortening some of the audit records that we send to userspace; a change the userspace folks are quite happy about. Finally YueHaibing and I kick in a few patches to simplify things a bit and make the code less prone to errors. Lastly, I want to say thanks one more time to everyone who has contributed patches, testing, and code reviews for the audit subsystem over the past year. The project is what it is due to your help and contributions - thank you" * tag 'audit-pr-20181224' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit: (22 commits) audit: remove duplicated include from audit.c audit: shorten PATH cap values when zero audit: use current whenever possible audit: minimize our use of audit_log_format() audit: remove WATCH and TREE config options audit: use session_info helper audit: localize audit_log_session_info prototype audit: Use 'mark' name for fsnotify_mark variables audit: Replace chunk attached to mark instead of replacing mark audit: Simplify locking around untag_chunk() audit: Drop all unused chunk nodes during deletion audit: Guarantee forward progress of chunk untagging audit: Allocate fsnotify mark independently of chunk audit: Provide helper for dropping mark's chunk reference audit: Remove pointless check in insert_hash() audit: Factor out chunk replacement code audit: Make hash table insertion safe against concurrent lookups audit: Embed key into chunk audit: Fix possible tagging failures audit: Fix possible spurious -ENOSPC error ...
Diffstat (limited to 'kernel/audit_tree.c')
-rw-r--r--kernel/audit_tree.c498
1 files changed, 277 insertions, 221 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ea43181cde4a..d4af4d97f847 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -24,9 +24,9 @@ struct audit_tree {
24 24
25struct audit_chunk { 25struct audit_chunk {
26 struct list_head hash; 26 struct list_head hash;
27 struct fsnotify_mark mark; 27 unsigned long key;
28 struct fsnotify_mark *mark;
28 struct list_head trees; /* with root here */ 29 struct list_head trees; /* with root here */
29 int dead;
30 int count; 30 int count;
31 atomic_long_t refs; 31 atomic_long_t refs;
32 struct rcu_head head; 32 struct rcu_head head;
@@ -37,13 +37,25 @@ struct audit_chunk {
37 } owners[]; 37 } owners[];
38}; 38};
39 39
40struct audit_tree_mark {
41 struct fsnotify_mark mark;
42 struct audit_chunk *chunk;
43};
44
40static LIST_HEAD(tree_list); 45static LIST_HEAD(tree_list);
41static LIST_HEAD(prune_list); 46static LIST_HEAD(prune_list);
42static struct task_struct *prune_thread; 47static struct task_struct *prune_thread;
43 48
44/* 49/*
45 * One struct chunk is attached to each inode of interest. 50 * One struct chunk is attached to each inode of interest through
46 * We replace struct chunk on tagging/untagging. 51 * audit_tree_mark (fsnotify mark). We replace struct chunk on tagging /
52 * untagging, the mark is stable as long as there is chunk attached. The
53 * association between mark and chunk is protected by hash_lock and
54 * audit_tree_group->mark_mutex. Thus as long as we hold
55 * audit_tree_group->mark_mutex and check that the mark is alive by
56 * FSNOTIFY_MARK_FLAG_ATTACHED flag check, we are sure the mark points to
57 * the current chunk.
58 *
47 * Rules have pointer to struct audit_tree. 59 * Rules have pointer to struct audit_tree.
48 * Rules have struct list_head rlist forming a list of rules over 60 * Rules have struct list_head rlist forming a list of rules over
49 * the same tree. 61 * the same tree.
@@ -62,8 +74,12 @@ static struct task_struct *prune_thread;
62 * tree is refcounted; one reference for "some rules on rules_list refer to 74 * tree is refcounted; one reference for "some rules on rules_list refer to
63 * it", one for each chunk with pointer to it. 75 * it", one for each chunk with pointer to it.
64 * 76 *
65 * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount 77 * chunk is refcounted by embedded .refs. Mark associated with the chunk holds
66 * of watch contributes 1 to .refs). 78 * one chunk reference. This reference is dropped either when a mark is going
79 * to be freed (corresponding inode goes away) or when chunk attached to the
80 * mark gets replaced. This reference must be dropped using
81 * audit_mark_put_chunk() to make sure the reference is dropped only after RCU
82 * grace period as it protects RCU readers of the hash table.
67 * 83 *
68 * node.index allows to get from node.list to containing chunk. 84 * node.index allows to get from node.list to containing chunk.
69 * MSB of that sucker is stolen to mark taggings that we might have to 85 * MSB of that sucker is stolen to mark taggings that we might have to
@@ -72,6 +88,7 @@ static struct task_struct *prune_thread;
72 */ 88 */
73 89
74static struct fsnotify_group *audit_tree_group; 90static struct fsnotify_group *audit_tree_group;
91static struct kmem_cache *audit_tree_mark_cachep __read_mostly;
75 92
76static struct audit_tree *alloc_tree(const char *s) 93static struct audit_tree *alloc_tree(const char *s)
77{ 94{
@@ -131,12 +148,43 @@ static void __put_chunk(struct rcu_head *rcu)
131 audit_put_chunk(chunk); 148 audit_put_chunk(chunk);
132} 149}
133 150
134static void audit_tree_destroy_watch(struct fsnotify_mark *entry) 151/*
152 * Drop reference to the chunk that was held by the mark. This is the reference
153 * that gets dropped after we've removed the chunk from the hash table and we
154 * use it to make sure chunk cannot be freed before RCU grace period expires.
155 */
156static void audit_mark_put_chunk(struct audit_chunk *chunk)
135{ 157{
136 struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
137 call_rcu(&chunk->head, __put_chunk); 158 call_rcu(&chunk->head, __put_chunk);
138} 159}
139 160
161static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *mark)
162{
163 return container_of(mark, struct audit_tree_mark, mark);
164}
165
166static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
167{
168 return audit_mark(mark)->chunk;
169}
170
171static void audit_tree_destroy_watch(struct fsnotify_mark *mark)
172{
173 kmem_cache_free(audit_tree_mark_cachep, audit_mark(mark));
174}
175
176static struct fsnotify_mark *alloc_mark(void)
177{
178 struct audit_tree_mark *amark;
179
180 amark = kmem_cache_zalloc(audit_tree_mark_cachep, GFP_KERNEL);
181 if (!amark)
182 return NULL;
183 fsnotify_init_mark(&amark->mark, audit_tree_group);
184 amark->mark.mask = FS_IN_IGNORED;
185 return &amark->mark;
186}
187
140static struct audit_chunk *alloc_chunk(int count) 188static struct audit_chunk *alloc_chunk(int count)
141{ 189{
142 struct audit_chunk *chunk; 190 struct audit_chunk *chunk;
@@ -156,8 +204,6 @@ static struct audit_chunk *alloc_chunk(int count)
156 INIT_LIST_HEAD(&chunk->owners[i].list); 204 INIT_LIST_HEAD(&chunk->owners[i].list);
157 chunk->owners[i].index = i; 205 chunk->owners[i].index = i;
158 } 206 }
159 fsnotify_init_mark(&chunk->mark, audit_tree_group);
160 chunk->mark.mask = FS_IN_IGNORED;
161 return chunk; 207 return chunk;
162} 208}
163 209
@@ -172,36 +218,25 @@ static unsigned long inode_to_key(const struct inode *inode)
172 return (unsigned long)&inode->i_fsnotify_marks; 218 return (unsigned long)&inode->i_fsnotify_marks;
173} 219}
174 220
175/*
176 * Function to return search key in our hash from chunk. Key 0 is special and
177 * should never be present in the hash.
178 */
179static unsigned long chunk_to_key(struct audit_chunk *chunk)
180{
181 /*
182 * We have a reference to the mark so it should be attached to a
183 * connector.
184 */
185 if (WARN_ON_ONCE(!chunk->mark.connector))
186 return 0;
187 return (unsigned long)chunk->mark.connector->obj;
188}
189
190static inline struct list_head *chunk_hash(unsigned long key) 221static inline struct list_head *chunk_hash(unsigned long key)
191{ 222{
192 unsigned long n = key / L1_CACHE_BYTES; 223 unsigned long n = key / L1_CACHE_BYTES;
193 return chunk_hash_heads + n % HASH_SIZE; 224 return chunk_hash_heads + n % HASH_SIZE;
194} 225}
195 226
196/* hash_lock & entry->lock is held by caller */ 227/* hash_lock & mark->group->mark_mutex is held by caller */
197static void insert_hash(struct audit_chunk *chunk) 228static void insert_hash(struct audit_chunk *chunk)
198{ 229{
199 unsigned long key = chunk_to_key(chunk);
200 struct list_head *list; 230 struct list_head *list;
201 231
202 if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED)) 232 /*
203 return; 233 * Make sure chunk is fully initialized before making it visible in the
204 list = chunk_hash(key); 234 * hash. Pairs with a data dependency barrier in READ_ONCE() in
235 * audit_tree_lookup().
236 */
237 smp_wmb();
238 WARN_ON_ONCE(!chunk->key);
239 list = chunk_hash(chunk->key);
205 list_add_rcu(&chunk->hash, list); 240 list_add_rcu(&chunk->hash, list);
206} 241}
207 242
@@ -213,7 +248,11 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
213 struct audit_chunk *p; 248 struct audit_chunk *p;
214 249
215 list_for_each_entry_rcu(p, list, hash) { 250 list_for_each_entry_rcu(p, list, hash) {
216 if (chunk_to_key(p) == key) { 251 /*
252 * We use a data dependency barrier in READ_ONCE() to make sure
253 * the chunk we see is fully initialized.
254 */
255 if (READ_ONCE(p->key) == key) {
217 atomic_long_inc(&p->refs); 256 atomic_long_inc(&p->refs);
218 return p; 257 return p;
219 } 258 }
@@ -239,137 +278,159 @@ static struct audit_chunk *find_chunk(struct node *p)
239 return container_of(p, struct audit_chunk, owners[0]); 278 return container_of(p, struct audit_chunk, owners[0]);
240} 279}
241 280
242static void untag_chunk(struct node *p) 281static void replace_mark_chunk(struct fsnotify_mark *mark,
282 struct audit_chunk *chunk)
283{
284 struct audit_chunk *old;
285
286 assert_spin_locked(&hash_lock);
287 old = mark_chunk(mark);
288 audit_mark(mark)->chunk = chunk;
289 if (chunk)
290 chunk->mark = mark;
291 if (old)
292 old->mark = NULL;
293}
294
295static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
243{ 296{
244 struct audit_chunk *chunk = find_chunk(p);
245 struct fsnotify_mark *entry = &chunk->mark;
246 struct audit_chunk *new = NULL;
247 struct audit_tree *owner; 297 struct audit_tree *owner;
248 int size = chunk->count - 1;
249 int i, j; 298 int i, j;
250 299
251 fsnotify_get_mark(entry); 300 new->key = old->key;
301 list_splice_init(&old->trees, &new->trees);
302 list_for_each_entry(owner, &new->trees, same_root)
303 owner->root = new;
304 for (i = j = 0; j < old->count; i++, j++) {
305 if (!old->owners[j].owner) {
306 i--;
307 continue;
308 }
309 owner = old->owners[j].owner;
310 new->owners[i].owner = owner;
311 new->owners[i].index = old->owners[j].index - j + i;
312 if (!owner) /* result of earlier fallback */
313 continue;
314 get_tree(owner);
315 list_replace_init(&old->owners[j].list, &new->owners[i].list);
316 }
317 replace_mark_chunk(old->mark, new);
318 /*
319 * Make sure chunk is fully initialized before making it visible in the
320 * hash. Pairs with a data dependency barrier in READ_ONCE() in
321 * audit_tree_lookup().
322 */
323 smp_wmb();
324 list_replace_rcu(&old->hash, &new->hash);
325}
252 326
253 spin_unlock(&hash_lock); 327static void remove_chunk_node(struct audit_chunk *chunk, struct node *p)
328{
329 struct audit_tree *owner = p->owner;
330
331 if (owner->root == chunk) {
332 list_del_init(&owner->same_root);
333 owner->root = NULL;
334 }
335 list_del_init(&p->list);
336 p->owner = NULL;
337 put_tree(owner);
338}
254 339
255 if (size) 340static int chunk_count_trees(struct audit_chunk *chunk)
256 new = alloc_chunk(size); 341{
342 int i;
343 int ret = 0;
257 344
258 mutex_lock(&entry->group->mark_mutex); 345 for (i = 0; i < chunk->count; i++)
259 spin_lock(&entry->lock); 346 if (chunk->owners[i].owner)
347 ret++;
348 return ret;
349}
350
351static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark)
352{
353 struct audit_chunk *new;
354 int size;
355
356 mutex_lock(&audit_tree_group->mark_mutex);
260 /* 357 /*
261 * mark_mutex protects mark from getting detached and thus also from 358 * mark_mutex stabilizes chunk attached to the mark so we can check
262 * mark->connector->obj getting NULL. 359 * whether it didn't change while we've dropped hash_lock.
263 */ 360 */
264 if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { 361 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
265 spin_unlock(&entry->lock); 362 mark_chunk(mark) != chunk)
266 mutex_unlock(&entry->group->mark_mutex); 363 goto out_mutex;
267 if (new)
268 fsnotify_put_mark(&new->mark);
269 goto out;
270 }
271
272 owner = p->owner;
273 364
365 size = chunk_count_trees(chunk);
274 if (!size) { 366 if (!size) {
275 chunk->dead = 1;
276 spin_lock(&hash_lock); 367 spin_lock(&hash_lock);
277 list_del_init(&chunk->trees); 368 list_del_init(&chunk->trees);
278 if (owner->root == chunk)
279 owner->root = NULL;
280 list_del_init(&p->list);
281 list_del_rcu(&chunk->hash); 369 list_del_rcu(&chunk->hash);
370 replace_mark_chunk(mark, NULL);
282 spin_unlock(&hash_lock); 371 spin_unlock(&hash_lock);
283 spin_unlock(&entry->lock); 372 fsnotify_detach_mark(mark);
284 mutex_unlock(&entry->group->mark_mutex); 373 mutex_unlock(&audit_tree_group->mark_mutex);
285 fsnotify_destroy_mark(entry, audit_tree_group); 374 audit_mark_put_chunk(chunk);
286 goto out; 375 fsnotify_free_mark(mark);
376 return;
287 } 377 }
288 378
379 new = alloc_chunk(size);
289 if (!new) 380 if (!new)
290 goto Fallback; 381 goto out_mutex;
291 382
292 if (fsnotify_add_mark_locked(&new->mark, entry->connector->obj,
293 FSNOTIFY_OBJ_TYPE_INODE, 1)) {
294 fsnotify_put_mark(&new->mark);
295 goto Fallback;
296 }
297
298 chunk->dead = 1;
299 spin_lock(&hash_lock); 383 spin_lock(&hash_lock);
300 list_replace_init(&chunk->trees, &new->trees); 384 /*
301 if (owner->root == chunk) { 385 * This has to go last when updating chunk as once replace_chunk() is
302 list_del_init(&owner->same_root); 386 * called, new RCU readers can see the new chunk.
303 owner->root = NULL; 387 */
304 } 388 replace_chunk(new, chunk);
305
306 for (i = j = 0; j <= size; i++, j++) {
307 struct audit_tree *s;
308 if (&chunk->owners[j] == p) {
309 list_del_init(&p->list);
310 i--;
311 continue;
312 }
313 s = chunk->owners[j].owner;
314 new->owners[i].owner = s;
315 new->owners[i].index = chunk->owners[j].index - j + i;
316 if (!s) /* result of earlier fallback */
317 continue;
318 get_tree(s);
319 list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
320 }
321
322 list_replace_rcu(&chunk->hash, &new->hash);
323 list_for_each_entry(owner, &new->trees, same_root)
324 owner->root = new;
325 spin_unlock(&hash_lock);
326 spin_unlock(&entry->lock);
327 mutex_unlock(&entry->group->mark_mutex);
328 fsnotify_destroy_mark(entry, audit_tree_group);
329 fsnotify_put_mark(&new->mark); /* drop initial reference */
330 goto out;
331
332Fallback:
333 // do the best we can
334 spin_lock(&hash_lock);
335 if (owner->root == chunk) {
336 list_del_init(&owner->same_root);
337 owner->root = NULL;
338 }
339 list_del_init(&p->list);
340 p->owner = NULL;
341 put_tree(owner);
342 spin_unlock(&hash_lock); 389 spin_unlock(&hash_lock);
343 spin_unlock(&entry->lock); 390 mutex_unlock(&audit_tree_group->mark_mutex);
344 mutex_unlock(&entry->group->mark_mutex); 391 audit_mark_put_chunk(chunk);
345out: 392 return;
346 fsnotify_put_mark(entry); 393
347 spin_lock(&hash_lock); 394out_mutex:
395 mutex_unlock(&audit_tree_group->mark_mutex);
348} 396}
349 397
398/* Call with group->mark_mutex held, releases it */
350static int create_chunk(struct inode *inode, struct audit_tree *tree) 399static int create_chunk(struct inode *inode, struct audit_tree *tree)
351{ 400{
352 struct fsnotify_mark *entry; 401 struct fsnotify_mark *mark;
353 struct audit_chunk *chunk = alloc_chunk(1); 402 struct audit_chunk *chunk = alloc_chunk(1);
354 if (!chunk) 403
404 if (!chunk) {
405 mutex_unlock(&audit_tree_group->mark_mutex);
355 return -ENOMEM; 406 return -ENOMEM;
407 }
356 408
357 entry = &chunk->mark; 409 mark = alloc_mark();
358 if (fsnotify_add_inode_mark(entry, inode, 0)) { 410 if (!mark) {
359 fsnotify_put_mark(entry); 411 mutex_unlock(&audit_tree_group->mark_mutex);
412 kfree(chunk);
413 return -ENOMEM;
414 }
415
416 if (fsnotify_add_inode_mark_locked(mark, inode, 0)) {
417 mutex_unlock(&audit_tree_group->mark_mutex);
418 fsnotify_put_mark(mark);
419 kfree(chunk);
360 return -ENOSPC; 420 return -ENOSPC;
361 } 421 }
362 422
363 spin_lock(&entry->lock);
364 spin_lock(&hash_lock); 423 spin_lock(&hash_lock);
365 if (tree->goner) { 424 if (tree->goner) {
366 spin_unlock(&hash_lock); 425 spin_unlock(&hash_lock);
367 chunk->dead = 1; 426 fsnotify_detach_mark(mark);
368 spin_unlock(&entry->lock); 427 mutex_unlock(&audit_tree_group->mark_mutex);
369 fsnotify_destroy_mark(entry, audit_tree_group); 428 fsnotify_free_mark(mark);
370 fsnotify_put_mark(entry); 429 fsnotify_put_mark(mark);
430 kfree(chunk);
371 return 0; 431 return 0;
372 } 432 }
433 replace_mark_chunk(mark, chunk);
373 chunk->owners[0].index = (1U << 31); 434 chunk->owners[0].index = (1U << 31);
374 chunk->owners[0].owner = tree; 435 chunk->owners[0].owner = tree;
375 get_tree(tree); 436 get_tree(tree);
@@ -378,35 +439,49 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
378 tree->root = chunk; 439 tree->root = chunk;
379 list_add(&tree->same_root, &chunk->trees); 440 list_add(&tree->same_root, &chunk->trees);
380 } 441 }
442 chunk->key = inode_to_key(inode);
443 /*
444 * Inserting into the hash table has to go last as once we do that RCU
445 * readers can see the chunk.
446 */
381 insert_hash(chunk); 447 insert_hash(chunk);
382 spin_unlock(&hash_lock); 448 spin_unlock(&hash_lock);
383 spin_unlock(&entry->lock); 449 mutex_unlock(&audit_tree_group->mark_mutex);
384 fsnotify_put_mark(entry); /* drop initial reference */ 450 /*
451 * Drop our initial reference. When mark we point to is getting freed,
452 * we get notification through ->freeing_mark callback and cleanup
453 * chunk pointing to this mark.
454 */
455 fsnotify_put_mark(mark);
385 return 0; 456 return 0;
386} 457}
387 458
388/* the first tagged inode becomes root of tree */ 459/* the first tagged inode becomes root of tree */
389static int tag_chunk(struct inode *inode, struct audit_tree *tree) 460static int tag_chunk(struct inode *inode, struct audit_tree *tree)
390{ 461{
391 struct fsnotify_mark *old_entry, *chunk_entry; 462 struct fsnotify_mark *mark;
392 struct audit_tree *owner;
393 struct audit_chunk *chunk, *old; 463 struct audit_chunk *chunk, *old;
394 struct node *p; 464 struct node *p;
395 int n; 465 int n;
396 466
397 old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks, 467 mutex_lock(&audit_tree_group->mark_mutex);
398 audit_tree_group); 468 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
399 if (!old_entry) 469 if (!mark)
400 return create_chunk(inode, tree); 470 return create_chunk(inode, tree);
401 471
402 old = container_of(old_entry, struct audit_chunk, mark); 472 /*
403 473 * Found mark is guaranteed to be attached and mark_mutex protects mark
474 * from getting detached and thus it makes sure there is chunk attached
475 * to the mark.
476 */
404 /* are we already there? */ 477 /* are we already there? */
405 spin_lock(&hash_lock); 478 spin_lock(&hash_lock);
479 old = mark_chunk(mark);
406 for (n = 0; n < old->count; n++) { 480 for (n = 0; n < old->count; n++) {
407 if (old->owners[n].owner == tree) { 481 if (old->owners[n].owner == tree) {
408 spin_unlock(&hash_lock); 482 spin_unlock(&hash_lock);
409 fsnotify_put_mark(old_entry); 483 mutex_unlock(&audit_tree_group->mark_mutex);
484 fsnotify_put_mark(mark);
410 return 0; 485 return 0;
411 } 486 }
412 } 487 }
@@ -414,83 +489,38 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
414 489
415 chunk = alloc_chunk(old->count + 1); 490 chunk = alloc_chunk(old->count + 1);
416 if (!chunk) { 491 if (!chunk) {
417 fsnotify_put_mark(old_entry); 492 mutex_unlock(&audit_tree_group->mark_mutex);
493 fsnotify_put_mark(mark);
418 return -ENOMEM; 494 return -ENOMEM;
419 } 495 }
420 496
421 chunk_entry = &chunk->mark;
422
423 mutex_lock(&old_entry->group->mark_mutex);
424 spin_lock(&old_entry->lock);
425 /*
426 * mark_mutex protects mark from getting detached and thus also from
427 * mark->connector->obj getting NULL.
428 */
429 if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
430 /* old_entry is being shot, lets just lie */
431 spin_unlock(&old_entry->lock);
432 mutex_unlock(&old_entry->group->mark_mutex);
433 fsnotify_put_mark(old_entry);
434 fsnotify_put_mark(&chunk->mark);
435 return -ENOENT;
436 }
437
438 if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
439 FSNOTIFY_OBJ_TYPE_INODE, 1)) {
440 spin_unlock(&old_entry->lock);
441 mutex_unlock(&old_entry->group->mark_mutex);
442 fsnotify_put_mark(chunk_entry);
443 fsnotify_put_mark(old_entry);
444 return -ENOSPC;
445 }
446
447 /* even though we hold old_entry->lock, this is safe since chunk_entry->lock could NEVER have been grabbed before */
448 spin_lock(&chunk_entry->lock);
449 spin_lock(&hash_lock); 497 spin_lock(&hash_lock);
450
451 /* we now hold old_entry->lock, chunk_entry->lock, and hash_lock */
452 if (tree->goner) { 498 if (tree->goner) {
453 spin_unlock(&hash_lock); 499 spin_unlock(&hash_lock);
454 chunk->dead = 1; 500 mutex_unlock(&audit_tree_group->mark_mutex);
455 spin_unlock(&chunk_entry->lock); 501 fsnotify_put_mark(mark);
456 spin_unlock(&old_entry->lock); 502 kfree(chunk);
457 mutex_unlock(&old_entry->group->mark_mutex);
458
459 fsnotify_destroy_mark(chunk_entry, audit_tree_group);
460
461 fsnotify_put_mark(chunk_entry);
462 fsnotify_put_mark(old_entry);
463 return 0; 503 return 0;
464 } 504 }
465 list_replace_init(&old->trees, &chunk->trees); 505 p = &chunk->owners[chunk->count - 1];
466 for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
467 struct audit_tree *s = old->owners[n].owner;
468 p->owner = s;
469 p->index = old->owners[n].index;
470 if (!s) /* result of fallback in untag */
471 continue;
472 get_tree(s);
473 list_replace_init(&old->owners[n].list, &p->list);
474 }
475 p->index = (chunk->count - 1) | (1U<<31); 506 p->index = (chunk->count - 1) | (1U<<31);
476 p->owner = tree; 507 p->owner = tree;
477 get_tree(tree); 508 get_tree(tree);
478 list_add(&p->list, &tree->chunks); 509 list_add(&p->list, &tree->chunks);
479 list_replace_rcu(&old->hash, &chunk->hash);
480 list_for_each_entry(owner, &chunk->trees, same_root)
481 owner->root = chunk;
482 old->dead = 1;
483 if (!tree->root) { 510 if (!tree->root) {
484 tree->root = chunk; 511 tree->root = chunk;
485 list_add(&tree->same_root, &chunk->trees); 512 list_add(&tree->same_root, &chunk->trees);
486 } 513 }
514 /*
515 * This has to go last when updating chunk as once replace_chunk() is
516 * called, new RCU readers can see the new chunk.
517 */
518 replace_chunk(chunk, old);
487 spin_unlock(&hash_lock); 519 spin_unlock(&hash_lock);
488 spin_unlock(&chunk_entry->lock); 520 mutex_unlock(&audit_tree_group->mark_mutex);
489 spin_unlock(&old_entry->lock); 521 fsnotify_put_mark(mark); /* pair to fsnotify_find_mark */
490 mutex_unlock(&old_entry->group->mark_mutex); 522 audit_mark_put_chunk(old);
491 fsnotify_destroy_mark(old_entry, audit_tree_group); 523
492 fsnotify_put_mark(chunk_entry); /* drop initial reference */
493 fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
494 return 0; 524 return 0;
495} 525}
496 526
@@ -503,8 +533,7 @@ static void audit_tree_log_remove_rule(struct audit_krule *rule)
503 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 533 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
504 if (unlikely(!ab)) 534 if (unlikely(!ab))
505 return; 535 return;
506 audit_log_format(ab, "op=remove_rule"); 536 audit_log_format(ab, "op=remove_rule dir=");
507 audit_log_format(ab, " dir=");
508 audit_log_untrustedstring(ab, rule->tree->pathname); 537 audit_log_untrustedstring(ab, rule->tree->pathname);
509 audit_log_key(ab, rule->filterkey); 538 audit_log_key(ab, rule->filterkey);
510 audit_log_format(ab, " list=%d res=1", rule->listnr); 539 audit_log_format(ab, " list=%d res=1", rule->listnr);
@@ -534,22 +563,48 @@ static void kill_rules(struct audit_tree *tree)
534} 563}
535 564
536/* 565/*
537 * finish killing struct audit_tree 566 * Remove tree from chunks. If 'tagged' is set, remove tree only from tagged
567 * chunks. The function expects tagged chunks are all at the beginning of the
568 * chunks list.
538 */ 569 */
539static void prune_one(struct audit_tree *victim) 570static void prune_tree_chunks(struct audit_tree *victim, bool tagged)
540{ 571{
541 spin_lock(&hash_lock); 572 spin_lock(&hash_lock);
542 while (!list_empty(&victim->chunks)) { 573 while (!list_empty(&victim->chunks)) {
543 struct node *p; 574 struct node *p;
575 struct audit_chunk *chunk;
576 struct fsnotify_mark *mark;
577
578 p = list_first_entry(&victim->chunks, struct node, list);
579 /* have we run out of marked? */
580 if (tagged && !(p->index & (1U<<31)))
581 break;
582 chunk = find_chunk(p);
583 mark = chunk->mark;
584 remove_chunk_node(chunk, p);
585 /* Racing with audit_tree_freeing_mark()? */
586 if (!mark)
587 continue;
588 fsnotify_get_mark(mark);
589 spin_unlock(&hash_lock);
544 590
545 p = list_entry(victim->chunks.next, struct node, list); 591 untag_chunk(chunk, mark);
592 fsnotify_put_mark(mark);
546 593
547 untag_chunk(p); 594 spin_lock(&hash_lock);
548 } 595 }
549 spin_unlock(&hash_lock); 596 spin_unlock(&hash_lock);
550 put_tree(victim); 597 put_tree(victim);
551} 598}
552 599
600/*
601 * finish killing struct audit_tree
602 */
603static void prune_one(struct audit_tree *victim)
604{
605 prune_tree_chunks(victim, false);
606}
607
553/* trim the uncommitted chunks from tree */ 608/* trim the uncommitted chunks from tree */
554 609
555static void trim_marked(struct audit_tree *tree) 610static void trim_marked(struct audit_tree *tree)
@@ -569,18 +624,11 @@ static void trim_marked(struct audit_tree *tree)
569 list_add(p, &tree->chunks); 624 list_add(p, &tree->chunks);
570 } 625 }
571 } 626 }
627 spin_unlock(&hash_lock);
572 628
573 while (!list_empty(&tree->chunks)) { 629 prune_tree_chunks(tree, true);
574 struct node *node;
575
576 node = list_entry(tree->chunks.next, struct node, list);
577
578 /* have we run out of marked? */
579 if (!(node->index & (1U<<31)))
580 break;
581 630
582 untag_chunk(node); 631 spin_lock(&hash_lock);
583 }
584 if (!tree->root && !tree->goner) { 632 if (!tree->root && !tree->goner) {
585 tree->goner = 1; 633 tree->goner = 1;
586 spin_unlock(&hash_lock); 634 spin_unlock(&hash_lock);
@@ -661,7 +709,7 @@ void audit_trim_trees(void)
661 /* this could be NULL if the watch is dying else where... */ 709 /* this could be NULL if the watch is dying else where... */
662 node->index |= 1U<<31; 710 node->index |= 1U<<31;
663 if (iterate_mounts(compare_root, 711 if (iterate_mounts(compare_root,
664 (void *)chunk_to_key(chunk), 712 (void *)(chunk->key),
665 root_mnt)) 713 root_mnt))
666 node->index &= ~(1U<<31); 714 node->index &= ~(1U<<31);
667 } 715 }
@@ -959,10 +1007,6 @@ static void evict_chunk(struct audit_chunk *chunk)
959 int need_prune = 0; 1007 int need_prune = 0;
960 int n; 1008 int n;
961 1009
962 if (chunk->dead)
963 return;
964
965 chunk->dead = 1;
966 mutex_lock(&audit_filter_mutex); 1010 mutex_lock(&audit_filter_mutex);
967 spin_lock(&hash_lock); 1011 spin_lock(&hash_lock);
968 while (!list_empty(&chunk->trees)) { 1012 while (!list_empty(&chunk->trees)) {
@@ -999,17 +1043,27 @@ static int audit_tree_handle_event(struct fsnotify_group *group,
999 return 0; 1043 return 0;
1000} 1044}
1001 1045
1002static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group) 1046static void audit_tree_freeing_mark(struct fsnotify_mark *mark,
1047 struct fsnotify_group *group)
1003{ 1048{
1004 struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark); 1049 struct audit_chunk *chunk;
1005 1050
1006 evict_chunk(chunk); 1051 mutex_lock(&mark->group->mark_mutex);
1052 spin_lock(&hash_lock);
1053 chunk = mark_chunk(mark);
1054 replace_mark_chunk(mark, NULL);
1055 spin_unlock(&hash_lock);
1056 mutex_unlock(&mark->group->mark_mutex);
1057 if (chunk) {
1058 evict_chunk(chunk);
1059 audit_mark_put_chunk(chunk);
1060 }
1007 1061
1008 /* 1062 /*
1009 * We are guaranteed to have at least one reference to the mark from 1063 * We are guaranteed to have at least one reference to the mark from
1010 * either the inode or the caller of fsnotify_destroy_mark(). 1064 * either the inode or the caller of fsnotify_destroy_mark().
1011 */ 1065 */
1012 BUG_ON(refcount_read(&entry->refcnt) < 1); 1066 BUG_ON(refcount_read(&mark->refcnt) < 1);
1013} 1067}
1014 1068
1015static const struct fsnotify_ops audit_tree_ops = { 1069static const struct fsnotify_ops audit_tree_ops = {
@@ -1022,6 +1076,8 @@ static int __init audit_tree_init(void)
1022{ 1076{
1023 int i; 1077 int i;
1024 1078
1079 audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC);
1080
1025 audit_tree_group = fsnotify_alloc_group(&audit_tree_ops); 1081 audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
1026 if (IS_ERR(audit_tree_group)) 1082 if (IS_ERR(audit_tree_group))
1027 audit_panic("cannot initialize fsnotify group for rectree watches"); 1083 audit_panic("cannot initialize fsnotify group for rectree watches");