aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/audit.c532
-rw-r--r--kernel/audit_fsnotify.c5
-rw-r--r--kernel/audit_tree.c3
-rw-r--r--kernel/audit_watch.c5
-rw-r--r--kernel/auditfilter.c5
-rw-r--r--kernel/auditsc.c12
-rw-r--r--kernel/capability.c36
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/padata.c4
-rw-r--r--kernel/ptrace.c70
-rw-r--r--kernel/seccomp.c7
12 files changed, 433 insertions, 257 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index eb26e12c6c2a..eaee9de224bd 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -115,8 +115,6 @@ obj-$(CONFIG_HAS_IOMEM) += memremap.o
115 115
116$(obj)/configs.o: $(obj)/config_data.h 116$(obj)/configs.o: $(obj)/config_data.h
117 117
118# config_data.h contains the same information as ikconfig.h but gzipped.
119# Info from config_data can be extracted from /proc/config*
120targets += config_data.gz 118targets += config_data.gz
121$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE 119$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
122 $(call if_changed,gzip) 120 $(call if_changed,gzip)
diff --git a/kernel/audit.c b/kernel/audit.c
index 67b9fbd871be..91bff3c0b368 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -107,7 +107,6 @@ static u32 audit_rate_limit;
107 * When set to zero, this means unlimited. */ 107 * When set to zero, this means unlimited. */
108static u32 audit_backlog_limit = 64; 108static u32 audit_backlog_limit = 64;
109#define AUDIT_BACKLOG_WAIT_TIME (60 * HZ) 109#define AUDIT_BACKLOG_WAIT_TIME (60 * HZ)
110static u32 audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME;
111static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME; 110static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME;
112 111
113/* The identity of the user shutting down the audit system. */ 112/* The identity of the user shutting down the audit system. */
@@ -138,11 +137,18 @@ static DEFINE_SPINLOCK(audit_freelist_lock);
138static int audit_freelist_count; 137static int audit_freelist_count;
139static LIST_HEAD(audit_freelist); 138static LIST_HEAD(audit_freelist);
140 139
141static struct sk_buff_head audit_skb_queue; 140/* queue msgs to send via kauditd_task */
142/* queue of skbs to send to auditd when/if it comes back */ 141static struct sk_buff_head audit_queue;
143static struct sk_buff_head audit_skb_hold_queue; 142/* queue msgs due to temporary unicast send problems */
143static struct sk_buff_head audit_retry_queue;
144/* queue msgs waiting for new auditd connection */
145static struct sk_buff_head audit_hold_queue;
146
147/* queue servicing thread */
144static struct task_struct *kauditd_task; 148static struct task_struct *kauditd_task;
145static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); 149static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
150
151/* waitqueue for callers who are blocked on the audit backlog */
146static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); 152static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
147 153
148static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION, 154static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION,
@@ -338,7 +344,7 @@ static int audit_set_backlog_limit(u32 limit)
338static int audit_set_backlog_wait_time(u32 timeout) 344static int audit_set_backlog_wait_time(u32 timeout)
339{ 345{
340 return audit_do_config_change("audit_backlog_wait_time", 346 return audit_do_config_change("audit_backlog_wait_time",
341 &audit_backlog_wait_time_master, timeout); 347 &audit_backlog_wait_time, timeout);
342} 348}
343 349
344static int audit_set_enabled(u32 state) 350static int audit_set_enabled(u32 state)
@@ -365,29 +371,10 @@ static int audit_set_failure(u32 state)
365} 371}
366 372
367/* 373/*
368 * Queue skbs to be sent to auditd when/if it comes back. These skbs should
369 * already have been sent via prink/syslog and so if these messages are dropped
370 * it is not a huge concern since we already passed the audit_log_lost()
371 * notification and stuff. This is just nice to get audit messages during
372 * boot before auditd is running or messages generated while auditd is stopped.
373 * This only holds messages is audit_default is set, aka booting with audit=1
374 * or building your kernel that way.
375 */
376static void audit_hold_skb(struct sk_buff *skb)
377{
378 if (audit_default &&
379 (!audit_backlog_limit ||
380 skb_queue_len(&audit_skb_hold_queue) < audit_backlog_limit))
381 skb_queue_tail(&audit_skb_hold_queue, skb);
382 else
383 kfree_skb(skb);
384}
385
386/*
387 * For one reason or another this nlh isn't getting delivered to the userspace 374 * For one reason or another this nlh isn't getting delivered to the userspace
388 * audit daemon, just send it to printk. 375 * audit daemon, just send it to printk.
389 */ 376 */
390static void audit_printk_skb(struct sk_buff *skb) 377static void kauditd_printk_skb(struct sk_buff *skb)
391{ 378{
392 struct nlmsghdr *nlh = nlmsg_hdr(skb); 379 struct nlmsghdr *nlh = nlmsg_hdr(skb);
393 char *data = nlmsg_data(nlh); 380 char *data = nlmsg_data(nlh);
@@ -398,58 +385,123 @@ static void audit_printk_skb(struct sk_buff *skb)
398 else 385 else
399 audit_log_lost("printk limit exceeded"); 386 audit_log_lost("printk limit exceeded");
400 } 387 }
388}
389
390/**
391 * kauditd_hold_skb - Queue an audit record, waiting for auditd
392 * @skb: audit record
393 *
394 * Description:
395 * Queue the audit record, waiting for an instance of auditd. When this
396 * function is called we haven't given up yet on sending the record, but things
397 * are not looking good. The first thing we want to do is try to write the
398 * record via printk and then see if we want to try and hold on to the record
399 * and queue it, if we have room. If we want to hold on to the record, but we
400 * don't have room, record a record lost message.
401 */
402static void kauditd_hold_skb(struct sk_buff *skb)
403{
404 /* at this point it is uncertain if we will ever send this to auditd so
405 * try to send the message via printk before we go any further */
406 kauditd_printk_skb(skb);
407
408 /* can we just silently drop the message? */
409 if (!audit_default) {
410 kfree_skb(skb);
411 return;
412 }
413
414 /* if we have room, queue the message */
415 if (!audit_backlog_limit ||
416 skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
417 skb_queue_tail(&audit_hold_queue, skb);
418 return;
419 }
401 420
402 audit_hold_skb(skb); 421 /* we have no other options - drop the message */
422 audit_log_lost("kauditd hold queue overflow");
423 kfree_skb(skb);
403} 424}
404 425
405static void kauditd_send_skb(struct sk_buff *skb) 426/**
427 * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
428 * @skb: audit record
429 *
430 * Description:
431 * Not as serious as kauditd_hold_skb() as we still have a connected auditd,
432 * but for some reason we are having problems sending it audit records so
433 * queue the given record and attempt to resend.
434 */
435static void kauditd_retry_skb(struct sk_buff *skb)
406{ 436{
407 int err; 437 /* NOTE: because records should only live in the retry queue for a
408 int attempts = 0; 438 * short period of time, before either being sent or moved to the hold
409#define AUDITD_RETRIES 5 439 * queue, we don't currently enforce a limit on this queue */
440 skb_queue_tail(&audit_retry_queue, skb);
441}
442
443/**
444 * auditd_reset - Disconnect the auditd connection
445 *
446 * Description:
447 * Break the auditd/kauditd connection and move all the records in the retry
448 * queue into the hold queue in case auditd reconnects. The audit_cmd_mutex
449 * must be held when calling this function.
450 */
451static void auditd_reset(void)
452{
453 struct sk_buff *skb;
454
455 /* break the connection */
456 if (audit_sock) {
457 sock_put(audit_sock);
458 audit_sock = NULL;
459 }
460 audit_pid = 0;
461 audit_nlk_portid = 0;
462
463 /* flush all of the retry queue to the hold queue */
464 while ((skb = skb_dequeue(&audit_retry_queue)))
465 kauditd_hold_skb(skb);
466}
467
468/**
469 * kauditd_send_unicast_skb - Send a record via unicast to auditd
470 * @skb: audit record
471 */
472static int kauditd_send_unicast_skb(struct sk_buff *skb)
473{
474 int rc;
410 475
411restart: 476 /* if we know nothing is connected, don't even try the netlink call */
412 /* take a reference in case we can't send it and we want to hold it */ 477 if (!audit_pid)
478 return -ECONNREFUSED;
479
480 /* get an extra skb reference in case we fail to send */
413 skb_get(skb); 481 skb_get(skb);
414 err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); 482 rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
415 if (err < 0) { 483 if (rc >= 0) {
416 pr_err("netlink_unicast sending to audit_pid=%d returned error: %d\n",
417 audit_pid, err);
418 if (audit_pid) {
419 if (err == -ECONNREFUSED || err == -EPERM
420 || ++attempts >= AUDITD_RETRIES) {
421 char s[32];
422
423 snprintf(s, sizeof(s), "audit_pid=%d reset", audit_pid);
424 audit_log_lost(s);
425 audit_pid = 0;
426 audit_sock = NULL;
427 } else {
428 pr_warn("re-scheduling(#%d) write to audit_pid=%d\n",
429 attempts, audit_pid);
430 set_current_state(TASK_INTERRUPTIBLE);
431 schedule();
432 goto restart;
433 }
434 }
435 /* we might get lucky and get this in the next auditd */
436 audit_hold_skb(skb);
437 } else
438 /* drop the extra reference if sent ok */
439 consume_skb(skb); 484 consume_skb(skb);
485 rc = 0;
486 }
487
488 return rc;
440} 489}
441 490
442/* 491/*
443 * kauditd_send_multicast_skb - send the skb to multicast userspace listeners 492 * kauditd_send_multicast_skb - Send a record to any multicast listeners
493 * @skb: audit record
444 * 494 *
495 * Description:
445 * This function doesn't consume an skb as might be expected since it has to 496 * This function doesn't consume an skb as might be expected since it has to
446 * copy it anyways. 497 * copy it anyways.
447 */ 498 */
448static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask) 499static void kauditd_send_multicast_skb(struct sk_buff *skb)
449{ 500{
450 struct sk_buff *copy; 501 struct sk_buff *copy;
451 struct audit_net *aunet = net_generic(&init_net, audit_net_id); 502 struct audit_net *aunet = net_generic(&init_net, audit_net_id);
452 struct sock *sock = aunet->nlsk; 503 struct sock *sock = aunet->nlsk;
504 struct nlmsghdr *nlh;
453 505
454 if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG)) 506 if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
455 return; 507 return;
@@ -464,74 +516,161 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask)
464 * no reason for new multicast clients to continue with this 516 * no reason for new multicast clients to continue with this
465 * non-compliance. 517 * non-compliance.
466 */ 518 */
467 copy = skb_copy(skb, gfp_mask); 519 copy = skb_copy(skb, GFP_KERNEL);
468 if (!copy) 520 if (!copy)
469 return; 521 return;
522 nlh = nlmsg_hdr(copy);
523 nlh->nlmsg_len = skb->len;
470 524
471 nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask); 525 nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
472} 526}
473 527
474/* 528/**
475 * flush_hold_queue - empty the hold queue if auditd appears 529 * kauditd_wake_condition - Return true when it is time to wake kauditd_thread
476 *
477 * If auditd just started, drain the queue of messages already
478 * sent to syslog/printk. Remember loss here is ok. We already
479 * called audit_log_lost() if it didn't go out normally. so the
480 * race between the skb_dequeue and the next check for audit_pid
481 * doesn't matter.
482 * 530 *
483 * If you ever find kauditd to be too slow we can get a perf win 531 * Description:
484 * by doing our own locking and keeping better track if there 532 * This function is for use by the wait_event_freezable() call in
485 * are messages in this queue. I don't see the need now, but 533 * kauditd_thread().
486 * in 5 years when I want to play with this again I'll see this
487 * note and still have no friggin idea what i'm thinking today.
488 */ 534 */
489static void flush_hold_queue(void) 535static int kauditd_wake_condition(void)
490{ 536{
491 struct sk_buff *skb; 537 static int pid_last = 0;
492 538 int rc;
493 if (!audit_default || !audit_pid) 539 int pid = audit_pid;
494 return;
495
496 skb = skb_dequeue(&audit_skb_hold_queue);
497 if (likely(!skb))
498 return;
499 540
500 while (skb && audit_pid) { 541 /* wake on new messages or a change in the connected auditd */
501 kauditd_send_skb(skb); 542 rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last);
502 skb = skb_dequeue(&audit_skb_hold_queue); 543 if (rc)
503 } 544 pid_last = pid;
504 545
505 /* 546 return rc;
506 * if auditd just disappeared but we
507 * dequeued an skb we need to drop ref
508 */
509 consume_skb(skb);
510} 547}
511 548
512static int kauditd_thread(void *dummy) 549static int kauditd_thread(void *dummy)
513{ 550{
551 int rc;
552 int auditd = 0;
553 int reschedule = 0;
554 struct sk_buff *skb;
555 struct nlmsghdr *nlh;
556
557#define UNICAST_RETRIES 5
558#define AUDITD_BAD(x,y) \
559 ((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES)
560
561 /* NOTE: we do invalidate the auditd connection flag on any sending
562 * errors, but we only "restore" the connection flag at specific places
563 * in the loop in order to help ensure proper ordering of audit
564 * records */
565
514 set_freezable(); 566 set_freezable();
515 while (!kthread_should_stop()) { 567 while (!kthread_should_stop()) {
516 struct sk_buff *skb; 568 /* NOTE: possible area for future improvement is to look at
517 569 * the hold and retry queues, since only this thread
518 flush_hold_queue(); 570 * has access to these queues we might be able to do
571 * our own queuing and skip some/all of the locking */
572
573 /* NOTE: it might be a fun experiment to split the hold and
574 * retry queue handling to another thread, but the
575 * synchronization issues and other overhead might kill
576 * any performance gains */
577
578 /* attempt to flush the hold queue */
579 while (auditd && (skb = skb_dequeue(&audit_hold_queue))) {
580 rc = kauditd_send_unicast_skb(skb);
581 if (rc) {
582 /* requeue to the same spot */
583 skb_queue_head(&audit_hold_queue, skb);
584
585 auditd = 0;
586 if (AUDITD_BAD(rc, reschedule)) {
587 mutex_lock(&audit_cmd_mutex);
588 auditd_reset();
589 mutex_unlock(&audit_cmd_mutex);
590 reschedule = 0;
591 }
592 } else
593 /* we were able to send successfully */
594 reschedule = 0;
595 }
519 596
520 skb = skb_dequeue(&audit_skb_queue); 597 /* attempt to flush the retry queue */
598 while (auditd && (skb = skb_dequeue(&audit_retry_queue))) {
599 rc = kauditd_send_unicast_skb(skb);
600 if (rc) {
601 auditd = 0;
602 if (AUDITD_BAD(rc, reschedule)) {
603 kauditd_hold_skb(skb);
604 mutex_lock(&audit_cmd_mutex);
605 auditd_reset();
606 mutex_unlock(&audit_cmd_mutex);
607 reschedule = 0;
608 } else
609 /* temporary problem (we hope), queue
610 * to the same spot and retry */
611 skb_queue_head(&audit_retry_queue, skb);
612 } else
613 /* we were able to send successfully */
614 reschedule = 0;
615 }
521 616
617 /* standard queue processing, try to be as quick as possible */
618quick_loop:
619 skb = skb_dequeue(&audit_queue);
522 if (skb) { 620 if (skb) {
523 if (!audit_backlog_limit || 621 /* setup the netlink header, see the comments in
524 (skb_queue_len(&audit_skb_queue) <= audit_backlog_limit)) 622 * kauditd_send_multicast_skb() for length quirks */
525 wake_up(&audit_backlog_wait); 623 nlh = nlmsg_hdr(skb);
526 if (audit_pid) 624 nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
527 kauditd_send_skb(skb); 625
626 /* attempt to send to any multicast listeners */
627 kauditd_send_multicast_skb(skb);
628
629 /* attempt to send to auditd, queue on failure */
630 if (auditd) {
631 rc = kauditd_send_unicast_skb(skb);
632 if (rc) {
633 auditd = 0;
634 if (AUDITD_BAD(rc, reschedule)) {
635 mutex_lock(&audit_cmd_mutex);
636 auditd_reset();
637 mutex_unlock(&audit_cmd_mutex);
638 reschedule = 0;
639 }
640
641 /* move to the retry queue */
642 kauditd_retry_skb(skb);
643 } else
644 /* everything is working so go fast! */
645 goto quick_loop;
646 } else if (reschedule)
647 /* we are currently having problems, move to
648 * the retry queue */
649 kauditd_retry_skb(skb);
528 else 650 else
529 audit_printk_skb(skb); 651 /* dump the message via printk and hold it */
530 continue; 652 kauditd_hold_skb(skb);
531 } 653 } else {
654 /* we have flushed the backlog so wake everyone */
655 wake_up(&audit_backlog_wait);
656
657 /* if everything is okay with auditd (if present), go
658 * to sleep until there is something new in the queue
659 * or we have a change in the connected auditd;
660 * otherwise simply reschedule to give things a chance
661 * to recover */
662 if (reschedule) {
663 set_current_state(TASK_INTERRUPTIBLE);
664 schedule();
665 } else
666 wait_event_freezable(kauditd_wait,
667 kauditd_wake_condition());
532 668
533 wait_event_freezable(kauditd_wait, skb_queue_len(&audit_skb_queue)); 669 /* update the auditd connection status */
670 auditd = (audit_pid ? 1 : 0);
671 }
534 } 672 }
673
535 return 0; 674 return 0;
536} 675}
537 676
@@ -596,6 +735,7 @@ static int audit_send_reply_thread(void *arg)
596 kfree(reply); 735 kfree(reply);
597 return 0; 736 return 0;
598} 737}
738
599/** 739/**
600 * audit_send_reply - send an audit reply message via netlink 740 * audit_send_reply - send an audit reply message via netlink
601 * @request_skb: skb of request we are replying to (used to target the reply) 741 * @request_skb: skb of request we are replying to (used to target the reply)
@@ -832,16 +972,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
832 if (err) 972 if (err)
833 return err; 973 return err;
834 974
835 /* As soon as there's any sign of userspace auditd,
836 * start kauditd to talk to it */
837 if (!kauditd_task) {
838 kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
839 if (IS_ERR(kauditd_task)) {
840 err = PTR_ERR(kauditd_task);
841 kauditd_task = NULL;
842 return err;
843 }
844 }
845 seq = nlh->nlmsg_seq; 975 seq = nlh->nlmsg_seq;
846 data = nlmsg_data(nlh); 976 data = nlmsg_data(nlh);
847 977
@@ -855,9 +985,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
855 s.rate_limit = audit_rate_limit; 985 s.rate_limit = audit_rate_limit;
856 s.backlog_limit = audit_backlog_limit; 986 s.backlog_limit = audit_backlog_limit;
857 s.lost = atomic_read(&audit_lost); 987 s.lost = atomic_read(&audit_lost);
858 s.backlog = skb_queue_len(&audit_skb_queue); 988 s.backlog = skb_queue_len(&audit_queue);
859 s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL; 989 s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL;
860 s.backlog_wait_time = audit_backlog_wait_time_master; 990 s.backlog_wait_time = audit_backlog_wait_time;
861 audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); 991 audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
862 break; 992 break;
863 } 993 }
@@ -897,9 +1027,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
897 } 1027 }
898 if (audit_enabled != AUDIT_OFF) 1028 if (audit_enabled != AUDIT_OFF)
899 audit_log_config_change("audit_pid", new_pid, audit_pid, 1); 1029 audit_log_config_change("audit_pid", new_pid, audit_pid, 1);
900 audit_pid = new_pid; 1030 if (new_pid) {
901 audit_nlk_portid = NETLINK_CB(skb).portid; 1031 if (audit_sock)
902 audit_sock = skb->sk; 1032 sock_put(audit_sock);
1033 audit_pid = new_pid;
1034 audit_nlk_portid = NETLINK_CB(skb).portid;
1035 sock_hold(skb->sk);
1036 audit_sock = skb->sk;
1037 } else {
1038 auditd_reset();
1039 }
1040 wake_up_interruptible(&kauditd_wait);
903 } 1041 }
904 if (s.mask & AUDIT_STATUS_RATE_LIMIT) { 1042 if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
905 err = audit_set_rate_limit(s.rate_limit); 1043 err = audit_set_rate_limit(s.rate_limit);
@@ -1167,10 +1305,10 @@ static void __net_exit audit_net_exit(struct net *net)
1167{ 1305{
1168 struct audit_net *aunet = net_generic(net, audit_net_id); 1306 struct audit_net *aunet = net_generic(net, audit_net_id);
1169 struct sock *sock = aunet->nlsk; 1307 struct sock *sock = aunet->nlsk;
1170 if (sock == audit_sock) { 1308 mutex_lock(&audit_cmd_mutex);
1171 audit_pid = 0; 1309 if (sock == audit_sock)
1172 audit_sock = NULL; 1310 auditd_reset();
1173 } 1311 mutex_unlock(&audit_cmd_mutex);
1174 1312
1175 netlink_kernel_release(sock); 1313 netlink_kernel_release(sock);
1176 aunet->nlsk = NULL; 1314 aunet->nlsk = NULL;
@@ -1195,17 +1333,24 @@ static int __init audit_init(void)
1195 audit_default ? "enabled" : "disabled"); 1333 audit_default ? "enabled" : "disabled");
1196 register_pernet_subsys(&audit_net_ops); 1334 register_pernet_subsys(&audit_net_ops);
1197 1335
1198 skb_queue_head_init(&audit_skb_queue); 1336 skb_queue_head_init(&audit_queue);
1199 skb_queue_head_init(&audit_skb_hold_queue); 1337 skb_queue_head_init(&audit_retry_queue);
1338 skb_queue_head_init(&audit_hold_queue);
1200 audit_initialized = AUDIT_INITIALIZED; 1339 audit_initialized = AUDIT_INITIALIZED;
1201 audit_enabled = audit_default; 1340 audit_enabled = audit_default;
1202 audit_ever_enabled |= !!audit_default; 1341 audit_ever_enabled |= !!audit_default;
1203 1342
1204 audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
1205
1206 for (i = 0; i < AUDIT_INODE_BUCKETS; i++) 1343 for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
1207 INIT_LIST_HEAD(&audit_inode_hash[i]); 1344 INIT_LIST_HEAD(&audit_inode_hash[i]);
1208 1345
1346 kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
1347 if (IS_ERR(kauditd_task)) {
1348 int err = PTR_ERR(kauditd_task);
1349 panic("audit: failed to start the kauditd thread (%d)\n", err);
1350 }
1351
1352 audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
1353
1209 return 0; 1354 return 0;
1210} 1355}
1211__initcall(audit_init); 1356__initcall(audit_init);
@@ -1338,24 +1483,6 @@ static inline void audit_get_stamp(struct audit_context *ctx,
1338 } 1483 }
1339} 1484}
1340 1485
1341/*
1342 * Wait for auditd to drain the queue a little
1343 */
1344static long wait_for_auditd(long sleep_time)
1345{
1346 DECLARE_WAITQUEUE(wait, current);
1347
1348 if (audit_backlog_limit &&
1349 skb_queue_len(&audit_skb_queue) > audit_backlog_limit) {
1350 add_wait_queue_exclusive(&audit_backlog_wait, &wait);
1351 set_current_state(TASK_UNINTERRUPTIBLE);
1352 sleep_time = schedule_timeout(sleep_time);
1353 remove_wait_queue(&audit_backlog_wait, &wait);
1354 }
1355
1356 return sleep_time;
1357}
1358
1359/** 1486/**
1360 * audit_log_start - obtain an audit buffer 1487 * audit_log_start - obtain an audit buffer
1361 * @ctx: audit_context (may be NULL) 1488 * @ctx: audit_context (may be NULL)
@@ -1374,12 +1501,9 @@ static long wait_for_auditd(long sleep_time)
1374struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, 1501struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
1375 int type) 1502 int type)
1376{ 1503{
1377 struct audit_buffer *ab = NULL; 1504 struct audit_buffer *ab;
1378 struct timespec t; 1505 struct timespec t;
1379 unsigned int uninitialized_var(serial); 1506 unsigned int uninitialized_var(serial);
1380 int reserve = 5; /* Allow atomic callers to go up to five
1381 entries over the normal backlog limit */
1382 unsigned long timeout_start = jiffies;
1383 1507
1384 if (audit_initialized != AUDIT_INITIALIZED) 1508 if (audit_initialized != AUDIT_INITIALIZED)
1385 return NULL; 1509 return NULL;
@@ -1387,38 +1511,48 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
1387 if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE))) 1511 if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE)))
1388 return NULL; 1512 return NULL;
1389 1513
1390 if (gfp_mask & __GFP_DIRECT_RECLAIM) { 1514 /* don't ever fail/sleep on these two conditions:
1391 if (audit_pid && audit_pid == current->tgid) 1515 * 1. auditd generated record - since we need auditd to drain the
1392 gfp_mask &= ~__GFP_DIRECT_RECLAIM; 1516 * queue; also, when we are checking for auditd, compare PIDs using
1393 else 1517 * task_tgid_vnr() since auditd_pid is set in audit_receive_msg()
1394 reserve = 0; 1518 * using a PID anchored in the caller's namespace
1395 } 1519 * 2. audit command message - record types 1000 through 1099 inclusive
1396 1520 * are command messages/records used to manage the kernel subsystem
1397 while (audit_backlog_limit 1521 * and the audit userspace, blocking on these messages could cause
1398 && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) { 1522 * problems under load so don't do it (note: not all of these
1399 if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) { 1523 * command types are valid as record types, but it is quicker to
1400 long sleep_time; 1524 * just check two ints than a series of ints in a if/switch stmt) */
1525 if (!((audit_pid && audit_pid == task_tgid_vnr(current)) ||
1526 (type >= 1000 && type <= 1099))) {
1527 long sleep_time = audit_backlog_wait_time;
1528
1529 while (audit_backlog_limit &&
1530 (skb_queue_len(&audit_queue) > audit_backlog_limit)) {
1531 /* wake kauditd to try and flush the queue */
1532 wake_up_interruptible(&kauditd_wait);
1401 1533
1402 sleep_time = timeout_start + audit_backlog_wait_time - jiffies; 1534 /* sleep if we are allowed and we haven't exhausted our
1403 if (sleep_time > 0) { 1535 * backlog wait limit */
1404 sleep_time = wait_for_auditd(sleep_time); 1536 if ((gfp_mask & __GFP_DIRECT_RECLAIM) &&
1405 if (sleep_time > 0) 1537 (sleep_time > 0)) {
1406 continue; 1538 DECLARE_WAITQUEUE(wait, current);
1539
1540 add_wait_queue_exclusive(&audit_backlog_wait,
1541 &wait);
1542 set_current_state(TASK_UNINTERRUPTIBLE);
1543 sleep_time = schedule_timeout(sleep_time);
1544 remove_wait_queue(&audit_backlog_wait, &wait);
1545 } else {
1546 if (audit_rate_check() && printk_ratelimit())
1547 pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
1548 skb_queue_len(&audit_queue),
1549 audit_backlog_limit);
1550 audit_log_lost("backlog limit exceeded");
1551 return NULL;
1407 } 1552 }
1408 } 1553 }
1409 if (audit_rate_check() && printk_ratelimit())
1410 pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
1411 skb_queue_len(&audit_skb_queue),
1412 audit_backlog_limit);
1413 audit_log_lost("backlog limit exceeded");
1414 audit_backlog_wait_time = 0;
1415 wake_up(&audit_backlog_wait);
1416 return NULL;
1417 } 1554 }
1418 1555
1419 if (!reserve && !audit_backlog_wait_time)
1420 audit_backlog_wait_time = audit_backlog_wait_time_master;
1421
1422 ab = audit_buffer_alloc(ctx, gfp_mask, type); 1556 ab = audit_buffer_alloc(ctx, gfp_mask, type);
1423 if (!ab) { 1557 if (!ab) {
1424 audit_log_lost("out of memory in audit_log_start"); 1558 audit_log_lost("out of memory in audit_log_start");
@@ -1426,9 +1560,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
1426 } 1560 }
1427 1561
1428 audit_get_stamp(ab->ctx, &t, &serial); 1562 audit_get_stamp(ab->ctx, &t, &serial);
1429
1430 audit_log_format(ab, "audit(%lu.%03lu:%u): ", 1563 audit_log_format(ab, "audit(%lu.%03lu:%u): ",
1431 t.tv_sec, t.tv_nsec/1000000, serial); 1564 t.tv_sec, t.tv_nsec/1000000, serial);
1565
1432 return ab; 1566 return ab;
1433} 1567}
1434 1568
@@ -1978,10 +2112,10 @@ out:
1978 * audit_log_end - end one audit record 2112 * audit_log_end - end one audit record
1979 * @ab: the audit_buffer 2113 * @ab: the audit_buffer
1980 * 2114 *
1981 * netlink_unicast() cannot be called inside an irq context because it blocks 2115 * We can not do a netlink send inside an irq context because it blocks (last
1982 * (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed 2116 * arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed on a
1983 * on a queue and a tasklet is scheduled to remove them from the queue outside 2117 * queue and a tasklet is scheduled to remove them from the queue outside the
1984 * the irq context. May be called in any context. 2118 * irq context. May be called in any context.
1985 */ 2119 */
1986void audit_log_end(struct audit_buffer *ab) 2120void audit_log_end(struct audit_buffer *ab)
1987{ 2121{
@@ -1990,28 +2124,8 @@ void audit_log_end(struct audit_buffer *ab)
1990 if (!audit_rate_check()) { 2124 if (!audit_rate_check()) {
1991 audit_log_lost("rate limit exceeded"); 2125 audit_log_lost("rate limit exceeded");
1992 } else { 2126 } else {
1993 struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); 2127 skb_queue_tail(&audit_queue, ab->skb);
1994 2128 wake_up_interruptible(&kauditd_wait);
1995 nlh->nlmsg_len = ab->skb->len;
1996 kauditd_send_multicast_skb(ab->skb, ab->gfp_mask);
1997
1998 /*
1999 * The original kaudit unicast socket sends up messages with
2000 * nlmsg_len set to the payload length rather than the entire
2001 * message length. This breaks the standard set by netlink.
2002 * The existing auditd daemon assumes this breakage. Fixing
2003 * this would require co-ordinating a change in the established
2004 * protocol between the kaudit kernel subsystem and the auditd
2005 * userspace code.
2006 */
2007 nlh->nlmsg_len -= NLMSG_HDRLEN;
2008
2009 if (audit_pid) {
2010 skb_queue_tail(&audit_skb_queue, ab->skb);
2011 wake_up_interruptible(&kauditd_wait);
2012 } else {
2013 audit_printk_skb(ab->skb);
2014 }
2015 ab->skb = NULL; 2129 ab->skb = NULL;
2016 } 2130 }
2017 audit_buffer_free(ab); 2131 audit_buffer_free(ab);
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index f84f8d06e1f6..f75154889aa9 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -130,10 +130,9 @@ static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, c
130 ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); 130 ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
131 if (unlikely(!ab)) 131 if (unlikely(!ab))
132 return; 132 return;
133 audit_log_format(ab, "auid=%u ses=%u op=", 133 audit_log_format(ab, "auid=%u ses=%u op=%s",
134 from_kuid(&init_user_ns, audit_get_loginuid(current)), 134 from_kuid(&init_user_ns, audit_get_loginuid(current)),
135 audit_get_sessionid(current)); 135 audit_get_sessionid(current), op);
136 audit_log_string(ab, op);
137 audit_log_format(ab, " path="); 136 audit_log_format(ab, " path=");
138 audit_log_untrustedstring(ab, audit_mark->path); 137 audit_log_untrustedstring(ab, audit_mark->path);
139 audit_log_key(ab, rule->filterkey); 138 audit_log_key(ab, rule->filterkey);
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 25772476fa4a..055f11b0a50f 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -458,8 +458,7 @@ static void audit_tree_log_remove_rule(struct audit_krule *rule)
458 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 458 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
459 if (unlikely(!ab)) 459 if (unlikely(!ab))
460 return; 460 return;
461 audit_log_format(ab, "op="); 461 audit_log_format(ab, "op=remove_rule");
462 audit_log_string(ab, "remove_rule");
463 audit_log_format(ab, " dir="); 462 audit_log_format(ab, " dir=");
464 audit_log_untrustedstring(ab, rule->tree->pathname); 463 audit_log_untrustedstring(ab, rule->tree->pathname);
465 audit_log_key(ab, rule->filterkey); 464 audit_log_key(ab, rule->filterkey);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 0d302a87f21b..686e068ec3da 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -242,10 +242,9 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc
242 ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); 242 ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
243 if (unlikely(!ab)) 243 if (unlikely(!ab))
244 return; 244 return;
245 audit_log_format(ab, "auid=%u ses=%u op=", 245 audit_log_format(ab, "auid=%u ses=%u op=%s",
246 from_kuid(&init_user_ns, audit_get_loginuid(current)), 246 from_kuid(&init_user_ns, audit_get_loginuid(current)),
247 audit_get_sessionid(current)); 247 audit_get_sessionid(current), op);
248 audit_log_string(ab, op);
249 audit_log_format(ab, " path="); 248 audit_log_format(ab, " path=");
250 audit_log_untrustedstring(ab, w->path); 249 audit_log_untrustedstring(ab, w->path);
251 audit_log_key(ab, r->filterkey); 250 audit_log_key(ab, r->filterkey);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 85d9cac497e4..880519d6cf2a 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -363,6 +363,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
363 case AUDIT_EXIT: 363 case AUDIT_EXIT:
364 case AUDIT_SUCCESS: 364 case AUDIT_SUCCESS:
365 case AUDIT_INODE: 365 case AUDIT_INODE:
366 case AUDIT_SESSIONID:
366 /* bit ops are only useful on syscall args */ 367 /* bit ops are only useful on syscall args */
367 if (f->op == Audit_bitmask || f->op == Audit_bittest) 368 if (f->op == Audit_bitmask || f->op == Audit_bittest)
368 return -EINVAL; 369 return -EINVAL;
@@ -476,6 +477,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
476 if (!gid_valid(f->gid)) 477 if (!gid_valid(f->gid))
477 goto exit_free; 478 goto exit_free;
478 break; 479 break;
480 case AUDIT_SESSIONID:
479 case AUDIT_ARCH: 481 case AUDIT_ARCH:
480 entry->rule.arch_f = f; 482 entry->rule.arch_f = f;
481 break; 483 break;
@@ -1074,8 +1076,7 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re
1074 return; 1076 return;
1075 audit_log_format(ab, "auid=%u ses=%u" ,loginuid, sessionid); 1077 audit_log_format(ab, "auid=%u ses=%u" ,loginuid, sessionid);
1076 audit_log_task_context(ab); 1078 audit_log_task_context(ab);
1077 audit_log_format(ab, " op="); 1079 audit_log_format(ab, " op=%s", action);
1078 audit_log_string(ab, action);
1079 audit_log_key(ab, rule->filterkey); 1080 audit_log_key(ab, rule->filterkey);
1080 audit_log_format(ab, " list=%d res=%d", rule->listnr, res); 1081 audit_log_format(ab, " list=%d res=%d", rule->listnr, res);
1081 audit_log_end(ab); 1082 audit_log_end(ab);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2cd5256dbff7..cf1fa43512c1 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -446,6 +446,7 @@ static int audit_filter_rules(struct task_struct *tsk,
446 const struct cred *cred; 446 const struct cred *cred;
447 int i, need_sid = 1; 447 int i, need_sid = 1;
448 u32 sid; 448 u32 sid;
449 unsigned int sessionid;
449 450
450 cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation); 451 cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation);
451 452
@@ -508,6 +509,10 @@ static int audit_filter_rules(struct task_struct *tsk,
508 case AUDIT_FSGID: 509 case AUDIT_FSGID:
509 result = audit_gid_comparator(cred->fsgid, f->op, f->gid); 510 result = audit_gid_comparator(cred->fsgid, f->op, f->gid);
510 break; 511 break;
512 case AUDIT_SESSIONID:
513 sessionid = audit_get_sessionid(current);
514 result = audit_comparator(sessionid, f->op, f->val);
515 break;
511 case AUDIT_PERS: 516 case AUDIT_PERS:
512 result = audit_comparator(tsk->personality, f->op, f->val); 517 result = audit_comparator(tsk->personality, f->op, f->val);
513 break; 518 break;
@@ -1000,7 +1005,7 @@ static void audit_log_execve_info(struct audit_context *context,
1000 long len_rem; 1005 long len_rem;
1001 long len_full; 1006 long len_full;
1002 long len_buf; 1007 long len_buf;
1003 long len_abuf; 1008 long len_abuf = 0;
1004 long len_tmp; 1009 long len_tmp;
1005 bool require_data; 1010 bool require_data;
1006 bool encode; 1011 bool encode;
@@ -2025,8 +2030,11 @@ int audit_set_loginuid(kuid_t loginuid)
2025 goto out; 2030 goto out;
2026 2031
2027 /* are we setting or clearing? */ 2032 /* are we setting or clearing? */
2028 if (uid_valid(loginuid)) 2033 if (uid_valid(loginuid)) {
2029 sessionid = (unsigned int)atomic_inc_return(&session_id); 2034 sessionid = (unsigned int)atomic_inc_return(&session_id);
2035 if (unlikely(sessionid == (unsigned int)-1))
2036 sessionid = (unsigned int)atomic_inc_return(&session_id);
2037 }
2030 2038
2031 task->sessionid = sessionid; 2039 task->sessionid = sessionid;
2032 task->loginuid = loginuid; 2040 task->loginuid = loginuid;
diff --git a/kernel/capability.c b/kernel/capability.c
index 00411c82dac5..4984e1f552eb 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -457,6 +457,19 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns,
457EXPORT_SYMBOL(file_ns_capable); 457EXPORT_SYMBOL(file_ns_capable);
458 458
459/** 459/**
460 * privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode?
461 * @ns: The user namespace in question
462 * @inode: The inode in question
463 *
464 * Return true if the inode uid and gid are within the namespace.
465 */
466bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode)
467{
468 return kuid_has_mapping(ns, inode->i_uid) &&
469 kgid_has_mapping(ns, inode->i_gid);
470}
471
472/**
460 * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped 473 * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
461 * @inode: The inode in question 474 * @inode: The inode in question
462 * @cap: The capability in question 475 * @cap: The capability in question
@@ -469,7 +482,26 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap)
469{ 482{
470 struct user_namespace *ns = current_user_ns(); 483 struct user_namespace *ns = current_user_ns();
471 484
472 return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) && 485 return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode);
473 kgid_has_mapping(ns, inode->i_gid);
474} 486}
475EXPORT_SYMBOL(capable_wrt_inode_uidgid); 487EXPORT_SYMBOL(capable_wrt_inode_uidgid);
488
489/**
490 * ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace
491 * @tsk: The task that may be ptraced
492 * @ns: The user namespace to search for CAP_SYS_PTRACE in
493 *
494 * Return true if the task that is ptracing the current task had CAP_SYS_PTRACE
495 * in the specified user namespace.
496 */
497bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns)
498{
499 int ret = 0; /* An absent tracer adds no restrictions */
500 const struct cred *cred;
501 rcu_read_lock();
502 cred = rcu_dereference(tsk->ptracer_cred);
503 if (cred)
504 ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE);
505 rcu_read_unlock();
506 return (ret == 0);
507}
diff --git a/kernel/fork.c b/kernel/fork.c
index a439ac429669..869b8ccc00bf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -747,7 +747,8 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
747#endif 747#endif
748} 748}
749 749
750static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) 750static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
751 struct user_namespace *user_ns)
751{ 752{
752 mm->mmap = NULL; 753 mm->mmap = NULL;
753 mm->mm_rb = RB_ROOT; 754 mm->mm_rb = RB_ROOT;
@@ -787,6 +788,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
787 if (init_new_context(p, mm)) 788 if (init_new_context(p, mm))
788 goto fail_nocontext; 789 goto fail_nocontext;
789 790
791 mm->user_ns = get_user_ns(user_ns);
790 return mm; 792 return mm;
791 793
792fail_nocontext: 794fail_nocontext:
@@ -832,7 +834,7 @@ struct mm_struct *mm_alloc(void)
832 return NULL; 834 return NULL;
833 835
834 memset(mm, 0, sizeof(*mm)); 836 memset(mm, 0, sizeof(*mm));
835 return mm_init(mm, current); 837 return mm_init(mm, current, current_user_ns());
836} 838}
837 839
838/* 840/*
@@ -847,6 +849,7 @@ void __mmdrop(struct mm_struct *mm)
847 destroy_context(mm); 849 destroy_context(mm);
848 mmu_notifier_mm_destroy(mm); 850 mmu_notifier_mm_destroy(mm);
849 check_mm(mm); 851 check_mm(mm);
852 put_user_ns(mm->user_ns);
850 free_mm(mm); 853 free_mm(mm);
851} 854}
852EXPORT_SYMBOL_GPL(__mmdrop); 855EXPORT_SYMBOL_GPL(__mmdrop);
@@ -1128,7 +1131,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
1128 1131
1129 memcpy(mm, oldmm, sizeof(*mm)); 1132 memcpy(mm, oldmm, sizeof(*mm));
1130 1133
1131 if (!mm_init(mm, tsk)) 1134 if (!mm_init(mm, tsk, mm->user_ns))
1132 goto fail_nomem; 1135 goto fail_nomem;
1133 1136
1134 err = dup_mmap(mm, oldmm); 1137 err = dup_mmap(mm, oldmm);
diff --git a/kernel/padata.c b/kernel/padata.c
index 7848f0566403..05316c9f32da 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -64,15 +64,11 @@ static int padata_cpu_hash(struct parallel_data *pd)
64static void padata_parallel_worker(struct work_struct *parallel_work) 64static void padata_parallel_worker(struct work_struct *parallel_work)
65{ 65{
66 struct padata_parallel_queue *pqueue; 66 struct padata_parallel_queue *pqueue;
67 struct parallel_data *pd;
68 struct padata_instance *pinst;
69 LIST_HEAD(local_list); 67 LIST_HEAD(local_list);
70 68
71 local_bh_disable(); 69 local_bh_disable();
72 pqueue = container_of(parallel_work, 70 pqueue = container_of(parallel_work,
73 struct padata_parallel_queue, work); 71 struct padata_parallel_queue, work);
74 pd = pqueue->pd;
75 pinst = pd->pinst;
76 72
77 spin_lock(&pqueue->parallel.lock); 73 spin_lock(&pqueue->parallel.lock);
78 list_replace_init(&pqueue->parallel.list, &local_list); 74 list_replace_init(&pqueue->parallel.list, &local_list);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e6474f7272ec..49ba7c1ade9d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -27,6 +27,35 @@
27#include <linux/cn_proc.h> 27#include <linux/cn_proc.h>
28#include <linux/compat.h> 28#include <linux/compat.h>
29 29
30/*
31 * Access another process' address space via ptrace.
32 * Source/target buffer must be kernel space,
33 * Do not walk the page table directly, use get_user_pages
34 */
35int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
36 void *buf, int len, unsigned int gup_flags)
37{
38 struct mm_struct *mm;
39 int ret;
40
41 mm = get_task_mm(tsk);
42 if (!mm)
43 return 0;
44
45 if (!tsk->ptrace ||
46 (current != tsk->parent) ||
47 ((get_dumpable(mm) != SUID_DUMP_USER) &&
48 !ptracer_capable(tsk, mm->user_ns))) {
49 mmput(mm);
50 return 0;
51 }
52
53 ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
54 mmput(mm);
55
56 return ret;
57}
58
30 59
31/* 60/*
32 * ptrace a task: make the debugger its new parent and 61 * ptrace a task: make the debugger its new parent and
@@ -39,6 +68,9 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
39 BUG_ON(!list_empty(&child->ptrace_entry)); 68 BUG_ON(!list_empty(&child->ptrace_entry));
40 list_add(&child->ptrace_entry, &new_parent->ptraced); 69 list_add(&child->ptrace_entry, &new_parent->ptraced);
41 child->parent = new_parent; 70 child->parent = new_parent;
71 rcu_read_lock();
72 child->ptracer_cred = get_cred(__task_cred(new_parent));
73 rcu_read_unlock();
42} 74}
43 75
44/** 76/**
@@ -71,12 +103,16 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
71 */ 103 */
72void __ptrace_unlink(struct task_struct *child) 104void __ptrace_unlink(struct task_struct *child)
73{ 105{
106 const struct cred *old_cred;
74 BUG_ON(!child->ptrace); 107 BUG_ON(!child->ptrace);
75 108
76 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); 109 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
77 110
78 child->parent = child->real_parent; 111 child->parent = child->real_parent;
79 list_del_init(&child->ptrace_entry); 112 list_del_init(&child->ptrace_entry);
113 old_cred = child->ptracer_cred;
114 child->ptracer_cred = NULL;
115 put_cred(old_cred);
80 116
81 spin_lock(&child->sighand->siglock); 117 spin_lock(&child->sighand->siglock);
82 child->ptrace = 0; 118 child->ptrace = 0;
@@ -220,7 +256,7 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
220static int __ptrace_may_access(struct task_struct *task, unsigned int mode) 256static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
221{ 257{
222 const struct cred *cred = current_cred(), *tcred; 258 const struct cred *cred = current_cred(), *tcred;
223 int dumpable = 0; 259 struct mm_struct *mm;
224 kuid_t caller_uid; 260 kuid_t caller_uid;
225 kgid_t caller_gid; 261 kgid_t caller_gid;
226 262
@@ -271,16 +307,11 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
271 return -EPERM; 307 return -EPERM;
272ok: 308ok:
273 rcu_read_unlock(); 309 rcu_read_unlock();
274 smp_rmb(); 310 mm = task->mm;
275 if (task->mm) 311 if (mm &&
276 dumpable = get_dumpable(task->mm); 312 ((get_dumpable(mm) != SUID_DUMP_USER) &&
277 rcu_read_lock(); 313 !ptrace_has_cap(mm->user_ns, mode)))
278 if (dumpable != SUID_DUMP_USER && 314 return -EPERM;
279 !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
280 rcu_read_unlock();
281 return -EPERM;
282 }
283 rcu_read_unlock();
284 315
285 return security_ptrace_access_check(task, mode); 316 return security_ptrace_access_check(task, mode);
286} 317}
@@ -344,10 +375,6 @@ static int ptrace_attach(struct task_struct *task, long request,
344 375
345 if (seize) 376 if (seize)
346 flags |= PT_SEIZED; 377 flags |= PT_SEIZED;
347 rcu_read_lock();
348 if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
349 flags |= PT_PTRACE_CAP;
350 rcu_read_unlock();
351 task->ptrace = flags; 378 task->ptrace = flags;
352 379
353 __ptrace_link(task, current); 380 __ptrace_link(task, current);
@@ -537,7 +564,8 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst
537 int this_len, retval; 564 int this_len, retval;
538 565
539 this_len = (len > sizeof(buf)) ? sizeof(buf) : len; 566 this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
540 retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE); 567 retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE);
568
541 if (!retval) { 569 if (!retval) {
542 if (copied) 570 if (copied)
543 break; 571 break;
@@ -564,7 +592,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
564 this_len = (len > sizeof(buf)) ? sizeof(buf) : len; 592 this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
565 if (copy_from_user(buf, src, this_len)) 593 if (copy_from_user(buf, src, this_len))
566 return -EFAULT; 594 return -EFAULT;
567 retval = access_process_vm(tsk, dst, buf, this_len, 595 retval = ptrace_access_vm(tsk, dst, buf, this_len,
568 FOLL_FORCE | FOLL_WRITE); 596 FOLL_FORCE | FOLL_WRITE);
569 if (!retval) { 597 if (!retval) {
570 if (copied) 598 if (copied)
@@ -1128,7 +1156,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
1128 unsigned long tmp; 1156 unsigned long tmp;
1129 int copied; 1157 int copied;
1130 1158
1131 copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); 1159 copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE);
1132 if (copied != sizeof(tmp)) 1160 if (copied != sizeof(tmp))
1133 return -EIO; 1161 return -EIO;
1134 return put_user(tmp, (unsigned long __user *)data); 1162 return put_user(tmp, (unsigned long __user *)data);
@@ -1139,7 +1167,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
1139{ 1167{
1140 int copied; 1168 int copied;
1141 1169
1142 copied = access_process_vm(tsk, addr, &data, sizeof(data), 1170 copied = ptrace_access_vm(tsk, addr, &data, sizeof(data),
1143 FOLL_FORCE | FOLL_WRITE); 1171 FOLL_FORCE | FOLL_WRITE);
1144 return (copied == sizeof(data)) ? 0 : -EIO; 1172 return (copied == sizeof(data)) ? 0 : -EIO;
1145} 1173}
@@ -1157,7 +1185,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
1157 switch (request) { 1185 switch (request) {
1158 case PTRACE_PEEKTEXT: 1186 case PTRACE_PEEKTEXT:
1159 case PTRACE_PEEKDATA: 1187 case PTRACE_PEEKDATA:
1160 ret = access_process_vm(child, addr, &word, sizeof(word), 1188 ret = ptrace_access_vm(child, addr, &word, sizeof(word),
1161 FOLL_FORCE); 1189 FOLL_FORCE);
1162 if (ret != sizeof(word)) 1190 if (ret != sizeof(word))
1163 ret = -EIO; 1191 ret = -EIO;
@@ -1167,7 +1195,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
1167 1195
1168 case PTRACE_POKETEXT: 1196 case PTRACE_POKETEXT:
1169 case PTRACE_POKEDATA: 1197 case PTRACE_POKEDATA:
1170 ret = access_process_vm(child, addr, &data, sizeof(data), 1198 ret = ptrace_access_vm(child, addr, &data, sizeof(data),
1171 FOLL_FORCE | FOLL_WRITE); 1199 FOLL_FORCE | FOLL_WRITE);
1172 ret = (ret != sizeof(data) ? -EIO : 0); 1200 ret = (ret != sizeof(data) ? -EIO : 0);
1173 break; 1201 break;
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index bff9c774987a..f7ce79a46050 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -41,8 +41,7 @@
41 * outside of a lifetime-guarded section. In general, this 41 * outside of a lifetime-guarded section. In general, this
42 * is only needed for handling filters shared across tasks. 42 * is only needed for handling filters shared across tasks.
43 * @prev: points to a previously installed, or inherited, filter 43 * @prev: points to a previously installed, or inherited, filter
44 * @len: the number of instructions in the program 44 * @prog: the BPF program to evaluate
45 * @insnsi: the BPF program instructions to evaluate
46 * 45 *
47 * seccomp_filter objects are organized in a tree linked via the @prev 46 * seccomp_filter objects are organized in a tree linked via the @prev
48 * pointer. For any task, it appears to be a singly-linked list starting 47 * pointer. For any task, it appears to be a singly-linked list starting
@@ -168,8 +167,8 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
168} 167}
169 168
170/** 169/**
171 * seccomp_run_filters - evaluates all seccomp filters against @syscall 170 * seccomp_run_filters - evaluates all seccomp filters against @sd
172 * @syscall: number of the current system call 171 * @sd: optional seccomp data to be passed to filters
173 * 172 *
174 * Returns valid seccomp BPF response codes. 173 * Returns valid seccomp BPF response codes.
175 */ 174 */