aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-06-19 06:55:31 -0400
committerIngo Molnar <mingo@kernel.org>2013-06-19 06:55:31 -0400
commitd81344c50824a4d28a9397e97135d60075ac37ff (patch)
treed25c443fb4a764cd788db857c49dd3d3f8f722d3 /kernel
parent0de358f1c2642710d41190b73fbc295e675c4ab8 (diff)
parent29bb9e5a75684106a37593ad75ec75ff8312731b (diff)
Merge branch 'sched/urgent' into sched/core
Merge in fixes before applying ongoing new work. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c516
-rw-r--r--kernel/audit.h156
-rw-r--r--kernel/auditfilter.c363
-rw-r--r--kernel/auditsc.c407
-rw-r--r--kernel/context_tracking.c40
-rw-r--r--kernel/cpu/idle.c2
-rw-r--r--kernel/events/core.c240
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/irq/irqdomain.c20
-rw-r--r--kernel/kmod.c5
-rw-r--r--kernel/lockdep.c1
-rw-r--r--kernel/module.c21
-rw-r--r--kernel/params.c5
-rw-r--r--kernel/printk.c1
-rw-r--r--kernel/ptrace.c1
-rw-r--r--kernel/rcutree_plugin.h4
-rw-r--r--kernel/relay.c2
-rw-r--r--kernel/rwsem.c16
-rw-r--r--kernel/sched/core.c21
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl_binary.c4
-rw-r--r--kernel/time/Kconfig5
-rw-r--r--kernel/time/tick-broadcast.c10
-rw-r--r--kernel/time/tick-sched.c3
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/blktrace.c1
-rw-r--r--kernel/trace/ftrace.c126
-rw-r--r--kernel/trace/ring_buffer.c3
-rw-r--r--kernel/trace/trace.c9
-rw-r--r--kernel/trace/trace_events.c58
-rw-r--r--kernel/trace/trace_events_filter.c4
-rw-r--r--kernel/trace/trace_kprobe.c320
-rw-r--r--kernel/workqueue.c19
34 files changed, 1297 insertions, 1096 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 0b084fa44b1f..21c7fa615bd3 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -49,6 +49,8 @@
49#include <linux/slab.h> 49#include <linux/slab.h>
50#include <linux/err.h> 50#include <linux/err.h>
51#include <linux/kthread.h> 51#include <linux/kthread.h>
52#include <linux/kernel.h>
53#include <linux/syscalls.h>
52 54
53#include <linux/audit.h> 55#include <linux/audit.h>
54 56
@@ -265,7 +267,6 @@ void audit_log_lost(const char *message)
265} 267}
266 268
267static int audit_log_config_change(char *function_name, int new, int old, 269static int audit_log_config_change(char *function_name, int new, int old,
268 kuid_t loginuid, u32 sessionid, u32 sid,
269 int allow_changes) 270 int allow_changes)
270{ 271{
271 struct audit_buffer *ab; 272 struct audit_buffer *ab;
@@ -274,29 +275,17 @@ static int audit_log_config_change(char *function_name, int new, int old,
274 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 275 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
275 if (unlikely(!ab)) 276 if (unlikely(!ab))
276 return rc; 277 return rc;
277 audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new, 278 audit_log_format(ab, "%s=%d old=%d", function_name, new, old);
278 old, from_kuid(&init_user_ns, loginuid), sessionid); 279 audit_log_session_info(ab);
279 if (sid) { 280 rc = audit_log_task_context(ab);
280 char *ctx = NULL; 281 if (rc)
281 u32 len; 282 allow_changes = 0; /* Something weird, deny request */
282
283 rc = security_secid_to_secctx(sid, &ctx, &len);
284 if (rc) {
285 audit_log_format(ab, " sid=%u", sid);
286 allow_changes = 0; /* Something weird, deny request */
287 } else {
288 audit_log_format(ab, " subj=%s", ctx);
289 security_release_secctx(ctx, len);
290 }
291 }
292 audit_log_format(ab, " res=%d", allow_changes); 283 audit_log_format(ab, " res=%d", allow_changes);
293 audit_log_end(ab); 284 audit_log_end(ab);
294 return rc; 285 return rc;
295} 286}
296 287
297static int audit_do_config_change(char *function_name, int *to_change, 288static int audit_do_config_change(char *function_name, int *to_change, int new)
298 int new, kuid_t loginuid, u32 sessionid,
299 u32 sid)
300{ 289{
301 int allow_changes, rc = 0, old = *to_change; 290 int allow_changes, rc = 0, old = *to_change;
302 291
@@ -307,8 +296,7 @@ static int audit_do_config_change(char *function_name, int *to_change,
307 allow_changes = 1; 296 allow_changes = 1;
308 297
309 if (audit_enabled != AUDIT_OFF) { 298 if (audit_enabled != AUDIT_OFF) {
310 rc = audit_log_config_change(function_name, new, old, loginuid, 299 rc = audit_log_config_change(function_name, new, old, allow_changes);
311 sessionid, sid, allow_changes);
312 if (rc) 300 if (rc)
313 allow_changes = 0; 301 allow_changes = 0;
314 } 302 }
@@ -322,44 +310,37 @@ static int audit_do_config_change(char *function_name, int *to_change,
322 return rc; 310 return rc;
323} 311}
324 312
325static int audit_set_rate_limit(int limit, kuid_t loginuid, u32 sessionid, 313static int audit_set_rate_limit(int limit)
326 u32 sid)
327{ 314{
328 return audit_do_config_change("audit_rate_limit", &audit_rate_limit, 315 return audit_do_config_change("audit_rate_limit", &audit_rate_limit, limit);
329 limit, loginuid, sessionid, sid);
330} 316}
331 317
332static int audit_set_backlog_limit(int limit, kuid_t loginuid, u32 sessionid, 318static int audit_set_backlog_limit(int limit)
333 u32 sid)
334{ 319{
335 return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit, 320 return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit, limit);
336 limit, loginuid, sessionid, sid);
337} 321}
338 322
339static int audit_set_enabled(int state, kuid_t loginuid, u32 sessionid, u32 sid) 323static int audit_set_enabled(int state)
340{ 324{
341 int rc; 325 int rc;
342 if (state < AUDIT_OFF || state > AUDIT_LOCKED) 326 if (state < AUDIT_OFF || state > AUDIT_LOCKED)
343 return -EINVAL; 327 return -EINVAL;
344 328
345 rc = audit_do_config_change("audit_enabled", &audit_enabled, state, 329 rc = audit_do_config_change("audit_enabled", &audit_enabled, state);
346 loginuid, sessionid, sid);
347
348 if (!rc) 330 if (!rc)
349 audit_ever_enabled |= !!state; 331 audit_ever_enabled |= !!state;
350 332
351 return rc; 333 return rc;
352} 334}
353 335
354static int audit_set_failure(int state, kuid_t loginuid, u32 sessionid, u32 sid) 336static int audit_set_failure(int state)
355{ 337{
356 if (state != AUDIT_FAIL_SILENT 338 if (state != AUDIT_FAIL_SILENT
357 && state != AUDIT_FAIL_PRINTK 339 && state != AUDIT_FAIL_PRINTK
358 && state != AUDIT_FAIL_PANIC) 340 && state != AUDIT_FAIL_PANIC)
359 return -EINVAL; 341 return -EINVAL;
360 342
361 return audit_do_config_change("audit_failure", &audit_failure, state, 343 return audit_do_config_change("audit_failure", &audit_failure, state);
362 loginuid, sessionid, sid);
363} 344}
364 345
365/* 346/*
@@ -417,34 +398,53 @@ static void kauditd_send_skb(struct sk_buff *skb)
417 consume_skb(skb); 398 consume_skb(skb);
418} 399}
419 400
420static int kauditd_thread(void *dummy) 401/*
402 * flush_hold_queue - empty the hold queue if auditd appears
403 *
404 * If auditd just started, drain the queue of messages already
405 * sent to syslog/printk. Remember loss here is ok. We already
406 * called audit_log_lost() if it didn't go out normally. so the
407 * race between the skb_dequeue and the next check for audit_pid
408 * doesn't matter.
409 *
410 * If you ever find kauditd to be too slow we can get a perf win
411 * by doing our own locking and keeping better track if there
412 * are messages in this queue. I don't see the need now, but
413 * in 5 years when I want to play with this again I'll see this
414 * note and still have no friggin idea what i'm thinking today.
415 */
416static void flush_hold_queue(void)
421{ 417{
422 struct sk_buff *skb; 418 struct sk_buff *skb;
423 419
420 if (!audit_default || !audit_pid)
421 return;
422
423 skb = skb_dequeue(&audit_skb_hold_queue);
424 if (likely(!skb))
425 return;
426
427 while (skb && audit_pid) {
428 kauditd_send_skb(skb);
429 skb = skb_dequeue(&audit_skb_hold_queue);
430 }
431
432 /*
433 * if auditd just disappeared but we
434 * dequeued an skb we need to drop ref
435 */
436 if (skb)
437 consume_skb(skb);
438}
439
440static int kauditd_thread(void *dummy)
441{
424 set_freezable(); 442 set_freezable();
425 while (!kthread_should_stop()) { 443 while (!kthread_should_stop()) {
426 /* 444 struct sk_buff *skb;
427 * if auditd just started drain the queue of messages already 445 DECLARE_WAITQUEUE(wait, current);
428 * sent to syslog/printk. remember loss here is ok. we already 446
429 * called audit_log_lost() if it didn't go out normally. so the 447 flush_hold_queue();
430 * race between the skb_dequeue and the next check for audit_pid
431 * doesn't matter.
432 *
433 * if you ever find kauditd to be too slow we can get a perf win
434 * by doing our own locking and keeping better track if there
435 * are messages in this queue. I don't see the need now, but
436 * in 5 years when I want to play with this again I'll see this
437 * note and still have no friggin idea what i'm thinking today.
438 */
439 if (audit_default && audit_pid) {
440 skb = skb_dequeue(&audit_skb_hold_queue);
441 if (unlikely(skb)) {
442 while (skb && audit_pid) {
443 kauditd_send_skb(skb);
444 skb = skb_dequeue(&audit_skb_hold_queue);
445 }
446 }
447 }
448 448
449 skb = skb_dequeue(&audit_skb_queue); 449 skb = skb_dequeue(&audit_skb_queue);
450 wake_up(&audit_backlog_wait); 450 wake_up(&audit_backlog_wait);
@@ -453,19 +453,18 @@ static int kauditd_thread(void *dummy)
453 kauditd_send_skb(skb); 453 kauditd_send_skb(skb);
454 else 454 else
455 audit_printk_skb(skb); 455 audit_printk_skb(skb);
456 } else { 456 continue;
457 DECLARE_WAITQUEUE(wait, current); 457 }
458 set_current_state(TASK_INTERRUPTIBLE); 458 set_current_state(TASK_INTERRUPTIBLE);
459 add_wait_queue(&kauditd_wait, &wait); 459 add_wait_queue(&kauditd_wait, &wait);
460
461 if (!skb_queue_len(&audit_skb_queue)) {
462 try_to_freeze();
463 schedule();
464 }
465 460
466 __set_current_state(TASK_RUNNING); 461 if (!skb_queue_len(&audit_skb_queue)) {
467 remove_wait_queue(&kauditd_wait, &wait); 462 try_to_freeze();
463 schedule();
468 } 464 }
465
466 __set_current_state(TASK_RUNNING);
467 remove_wait_queue(&kauditd_wait, &wait);
469 } 468 }
470 return 0; 469 return 0;
471} 470}
@@ -579,13 +578,14 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
579 return -EPERM; 578 return -EPERM;
580 579
581 switch (msg_type) { 580 switch (msg_type) {
582 case AUDIT_GET:
583 case AUDIT_LIST: 581 case AUDIT_LIST:
584 case AUDIT_LIST_RULES:
585 case AUDIT_SET:
586 case AUDIT_ADD: 582 case AUDIT_ADD:
587 case AUDIT_ADD_RULE:
588 case AUDIT_DEL: 583 case AUDIT_DEL:
584 return -EOPNOTSUPP;
585 case AUDIT_GET:
586 case AUDIT_SET:
587 case AUDIT_LIST_RULES:
588 case AUDIT_ADD_RULE:
589 case AUDIT_DEL_RULE: 589 case AUDIT_DEL_RULE:
590 case AUDIT_SIGNAL_INFO: 590 case AUDIT_SIGNAL_INFO:
591 case AUDIT_TTY_GET: 591 case AUDIT_TTY_GET:
@@ -608,12 +608,10 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
608 return err; 608 return err;
609} 609}
610 610
611static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, 611static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
612 kuid_t auid, u32 ses, u32 sid)
613{ 612{
614 int rc = 0; 613 int rc = 0;
615 char *ctx = NULL; 614 uid_t uid = from_kuid(&init_user_ns, current_uid());
616 u32 len;
617 615
618 if (!audit_enabled) { 616 if (!audit_enabled) {
619 *ab = NULL; 617 *ab = NULL;
@@ -623,33 +621,21 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type,
623 *ab = audit_log_start(NULL, GFP_KERNEL, msg_type); 621 *ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
624 if (unlikely(!*ab)) 622 if (unlikely(!*ab))
625 return rc; 623 return rc;
626 audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u", 624 audit_log_format(*ab, "pid=%d uid=%u", task_tgid_vnr(current), uid);
627 task_tgid_vnr(current), 625 audit_log_session_info(*ab);
628 from_kuid(&init_user_ns, current_uid()), 626 audit_log_task_context(*ab);
629 from_kuid(&init_user_ns, auid), ses);
630 if (sid) {
631 rc = security_secid_to_secctx(sid, &ctx, &len);
632 if (rc)
633 audit_log_format(*ab, " ssid=%u", sid);
634 else {
635 audit_log_format(*ab, " subj=%s", ctx);
636 security_release_secctx(ctx, len);
637 }
638 }
639 627
640 return rc; 628 return rc;
641} 629}
642 630
643static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 631static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
644{ 632{
645 u32 seq, sid; 633 u32 seq;
646 void *data; 634 void *data;
647 struct audit_status *status_get, status_set; 635 struct audit_status *status_get, status_set;
648 int err; 636 int err;
649 struct audit_buffer *ab; 637 struct audit_buffer *ab;
650 u16 msg_type = nlh->nlmsg_type; 638 u16 msg_type = nlh->nlmsg_type;
651 kuid_t loginuid; /* loginuid of sender */
652 u32 sessionid;
653 struct audit_sig_info *sig_data; 639 struct audit_sig_info *sig_data;
654 char *ctx = NULL; 640 char *ctx = NULL;
655 u32 len; 641 u32 len;
@@ -668,9 +654,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
668 return err; 654 return err;
669 } 655 }
670 } 656 }
671 loginuid = audit_get_loginuid(current);
672 sessionid = audit_get_sessionid(current);
673 security_task_getsecid(current, &sid);
674 seq = nlh->nlmsg_seq; 657 seq = nlh->nlmsg_seq;
675 data = nlmsg_data(nlh); 658 data = nlmsg_data(nlh);
676 659
@@ -691,14 +674,12 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
691 return -EINVAL; 674 return -EINVAL;
692 status_get = (struct audit_status *)data; 675 status_get = (struct audit_status *)data;
693 if (status_get->mask & AUDIT_STATUS_ENABLED) { 676 if (status_get->mask & AUDIT_STATUS_ENABLED) {
694 err = audit_set_enabled(status_get->enabled, 677 err = audit_set_enabled(status_get->enabled);
695 loginuid, sessionid, sid);
696 if (err < 0) 678 if (err < 0)
697 return err; 679 return err;
698 } 680 }
699 if (status_get->mask & AUDIT_STATUS_FAILURE) { 681 if (status_get->mask & AUDIT_STATUS_FAILURE) {
700 err = audit_set_failure(status_get->failure, 682 err = audit_set_failure(status_get->failure);
701 loginuid, sessionid, sid);
702 if (err < 0) 683 if (err < 0)
703 return err; 684 return err;
704 } 685 }
@@ -706,22 +687,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
706 int new_pid = status_get->pid; 687 int new_pid = status_get->pid;
707 688
708 if (audit_enabled != AUDIT_OFF) 689 if (audit_enabled != AUDIT_OFF)
709 audit_log_config_change("audit_pid", new_pid, 690 audit_log_config_change("audit_pid", new_pid, audit_pid, 1);
710 audit_pid, loginuid,
711 sessionid, sid, 1);
712
713 audit_pid = new_pid; 691 audit_pid = new_pid;
714 audit_nlk_portid = NETLINK_CB(skb).portid; 692 audit_nlk_portid = NETLINK_CB(skb).portid;
715 } 693 }
716 if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) { 694 if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) {
717 err = audit_set_rate_limit(status_get->rate_limit, 695 err = audit_set_rate_limit(status_get->rate_limit);
718 loginuid, sessionid, sid);
719 if (err < 0) 696 if (err < 0)
720 return err; 697 return err;
721 } 698 }
722 if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) 699 if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
723 err = audit_set_backlog_limit(status_get->backlog_limit, 700 err = audit_set_backlog_limit(status_get->backlog_limit);
724 loginuid, sessionid, sid);
725 break; 701 break;
726 case AUDIT_USER: 702 case AUDIT_USER:
727 case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: 703 case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
@@ -729,25 +705,22 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
729 if (!audit_enabled && msg_type != AUDIT_USER_AVC) 705 if (!audit_enabled && msg_type != AUDIT_USER_AVC)
730 return 0; 706 return 0;
731 707
732 err = audit_filter_user(); 708 err = audit_filter_user(msg_type);
733 if (err == 1) { 709 if (err == 1) {
734 err = 0; 710 err = 0;
735 if (msg_type == AUDIT_USER_TTY) { 711 if (msg_type == AUDIT_USER_TTY) {
736 err = tty_audit_push_task(current, loginuid, 712 err = tty_audit_push_current();
737 sessionid);
738 if (err) 713 if (err)
739 break; 714 break;
740 } 715 }
741 audit_log_common_recv_msg(&ab, msg_type, 716 audit_log_common_recv_msg(&ab, msg_type);
742 loginuid, sessionid, sid);
743
744 if (msg_type != AUDIT_USER_TTY) 717 if (msg_type != AUDIT_USER_TTY)
745 audit_log_format(ab, " msg='%.1024s'", 718 audit_log_format(ab, " msg='%.1024s'",
746 (char *)data); 719 (char *)data);
747 else { 720 else {
748 int size; 721 int size;
749 722
750 audit_log_format(ab, " msg="); 723 audit_log_format(ab, " data=");
751 size = nlmsg_len(nlh); 724 size = nlmsg_len(nlh);
752 if (size > 0 && 725 if (size > 0 &&
753 ((unsigned char *)data)[size - 1] == '\0') 726 ((unsigned char *)data)[size - 1] == '\0')
@@ -758,50 +731,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
758 audit_log_end(ab); 731 audit_log_end(ab);
759 } 732 }
760 break; 733 break;
761 case AUDIT_ADD:
762 case AUDIT_DEL:
763 if (nlmsg_len(nlh) < sizeof(struct audit_rule))
764 return -EINVAL;
765 if (audit_enabled == AUDIT_LOCKED) {
766 audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE,
767 loginuid, sessionid, sid);
768
769 audit_log_format(ab, " audit_enabled=%d res=0",
770 audit_enabled);
771 audit_log_end(ab);
772 return -EPERM;
773 }
774 /* fallthrough */
775 case AUDIT_LIST:
776 err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid,
777 seq, data, nlmsg_len(nlh),
778 loginuid, sessionid, sid);
779 break;
780 case AUDIT_ADD_RULE: 734 case AUDIT_ADD_RULE:
781 case AUDIT_DEL_RULE: 735 case AUDIT_DEL_RULE:
782 if (nlmsg_len(nlh) < sizeof(struct audit_rule_data)) 736 if (nlmsg_len(nlh) < sizeof(struct audit_rule_data))
783 return -EINVAL; 737 return -EINVAL;
784 if (audit_enabled == AUDIT_LOCKED) { 738 if (audit_enabled == AUDIT_LOCKED) {
785 audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, 739 audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
786 loginuid, sessionid, sid); 740 audit_log_format(ab, " audit_enabled=%d res=0", audit_enabled);
787
788 audit_log_format(ab, " audit_enabled=%d res=0",
789 audit_enabled);
790 audit_log_end(ab); 741 audit_log_end(ab);
791 return -EPERM; 742 return -EPERM;
792 } 743 }
793 /* fallthrough */ 744 /* fallthrough */
794 case AUDIT_LIST_RULES: 745 case AUDIT_LIST_RULES:
795 err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid, 746 err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid,
796 seq, data, nlmsg_len(nlh), 747 seq, data, nlmsg_len(nlh));
797 loginuid, sessionid, sid);
798 break; 748 break;
799 case AUDIT_TRIM: 749 case AUDIT_TRIM:
800 audit_trim_trees(); 750 audit_trim_trees();
801 751 audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
802 audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE,
803 loginuid, sessionid, sid);
804
805 audit_log_format(ab, " op=trim res=1"); 752 audit_log_format(ab, " op=trim res=1");
806 audit_log_end(ab); 753 audit_log_end(ab);
807 break; 754 break;
@@ -831,8 +778,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
831 /* OK, here comes... */ 778 /* OK, here comes... */
832 err = audit_tag_tree(old, new); 779 err = audit_tag_tree(old, new);
833 780
834 audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, 781 audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
835 loginuid, sessionid, sid);
836 782
837 audit_log_format(ab, " op=make_equiv old="); 783 audit_log_format(ab, " op=make_equiv old=");
838 audit_log_untrustedstring(ab, old); 784 audit_log_untrustedstring(ab, old);
@@ -871,27 +817,30 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
871 struct audit_tty_status s; 817 struct audit_tty_status s;
872 struct task_struct *tsk = current; 818 struct task_struct *tsk = current;
873 819
874 spin_lock_irq(&tsk->sighand->siglock); 820 spin_lock(&tsk->sighand->siglock);
875 s.enabled = tsk->signal->audit_tty != 0; 821 s.enabled = tsk->signal->audit_tty != 0;
876 spin_unlock_irq(&tsk->sighand->siglock); 822 s.log_passwd = tsk->signal->audit_tty_log_passwd;
823 spin_unlock(&tsk->sighand->siglock);
877 824
878 audit_send_reply(NETLINK_CB(skb).portid, seq, 825 audit_send_reply(NETLINK_CB(skb).portid, seq,
879 AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); 826 AUDIT_TTY_GET, 0, 0, &s, sizeof(s));
880 break; 827 break;
881 } 828 }
882 case AUDIT_TTY_SET: { 829 case AUDIT_TTY_SET: {
883 struct audit_tty_status *s; 830 struct audit_tty_status s;
884 struct task_struct *tsk = current; 831 struct task_struct *tsk = current;
885 832
886 if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) 833 memset(&s, 0, sizeof(s));
887 return -EINVAL; 834 /* guard against past and future API changes */
888 s = data; 835 memcpy(&s, data, min(sizeof(s), (size_t)nlh->nlmsg_len));
889 if (s->enabled != 0 && s->enabled != 1) 836 if ((s.enabled != 0 && s.enabled != 1) ||
837 (s.log_passwd != 0 && s.log_passwd != 1))
890 return -EINVAL; 838 return -EINVAL;
891 839
892 spin_lock_irq(&tsk->sighand->siglock); 840 spin_lock(&tsk->sighand->siglock);
893 tsk->signal->audit_tty = s->enabled != 0; 841 tsk->signal->audit_tty = s.enabled;
894 spin_unlock_irq(&tsk->sighand->siglock); 842 tsk->signal->audit_tty_log_passwd = s.log_passwd;
843 spin_unlock(&tsk->sighand->siglock);
895 break; 844 break;
896 } 845 }
897 default: 846 default:
@@ -1434,6 +1383,14 @@ void audit_log_d_path(struct audit_buffer *ab, const char *prefix,
1434 kfree(pathname); 1383 kfree(pathname);
1435} 1384}
1436 1385
1386void audit_log_session_info(struct audit_buffer *ab)
1387{
1388 u32 sessionid = audit_get_sessionid(current);
1389 uid_t auid = from_kuid(&init_user_ns, audit_get_loginuid(current));
1390
1391 audit_log_format(ab, " auid=%u ses=%u\n", auid, sessionid);
1392}
1393
1437void audit_log_key(struct audit_buffer *ab, char *key) 1394void audit_log_key(struct audit_buffer *ab, char *key)
1438{ 1395{
1439 audit_log_format(ab, " key="); 1396 audit_log_format(ab, " key=");
@@ -1443,6 +1400,224 @@ void audit_log_key(struct audit_buffer *ab, char *key)
1443 audit_log_format(ab, "(null)"); 1400 audit_log_format(ab, "(null)");
1444} 1401}
1445 1402
1403void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap)
1404{
1405 int i;
1406
1407 audit_log_format(ab, " %s=", prefix);
1408 CAP_FOR_EACH_U32(i) {
1409 audit_log_format(ab, "%08x",
1410 cap->cap[(_KERNEL_CAPABILITY_U32S-1) - i]);
1411 }
1412}
1413
1414void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
1415{
1416 kernel_cap_t *perm = &name->fcap.permitted;
1417 kernel_cap_t *inh = &name->fcap.inheritable;
1418 int log = 0;
1419
1420 if (!cap_isclear(*perm)) {
1421 audit_log_cap(ab, "cap_fp", perm);
1422 log = 1;
1423 }
1424 if (!cap_isclear(*inh)) {
1425 audit_log_cap(ab, "cap_fi", inh);
1426 log = 1;
1427 }
1428
1429 if (log)
1430 audit_log_format(ab, " cap_fe=%d cap_fver=%x",
1431 name->fcap.fE, name->fcap_ver);
1432}
1433
1434static inline int audit_copy_fcaps(struct audit_names *name,
1435 const struct dentry *dentry)
1436{
1437 struct cpu_vfs_cap_data caps;
1438 int rc;
1439
1440 if (!dentry)
1441 return 0;
1442
1443 rc = get_vfs_caps_from_disk(dentry, &caps);
1444 if (rc)
1445 return rc;
1446
1447 name->fcap.permitted = caps.permitted;
1448 name->fcap.inheritable = caps.inheritable;
1449 name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
1450 name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >>
1451 VFS_CAP_REVISION_SHIFT;
1452
1453 return 0;
1454}
1455
1456/* Copy inode data into an audit_names. */
1457void audit_copy_inode(struct audit_names *name, const struct dentry *dentry,
1458 const struct inode *inode)
1459{
1460 name->ino = inode->i_ino;
1461 name->dev = inode->i_sb->s_dev;
1462 name->mode = inode->i_mode;
1463 name->uid = inode->i_uid;
1464 name->gid = inode->i_gid;
1465 name->rdev = inode->i_rdev;
1466 security_inode_getsecid(inode, &name->osid);
1467 audit_copy_fcaps(name, dentry);
1468}
1469
1470/**
1471 * audit_log_name - produce AUDIT_PATH record from struct audit_names
1472 * @context: audit_context for the task
1473 * @n: audit_names structure with reportable details
1474 * @path: optional path to report instead of audit_names->name
1475 * @record_num: record number to report when handling a list of names
1476 * @call_panic: optional pointer to int that will be updated if secid fails
1477 */
1478void audit_log_name(struct audit_context *context, struct audit_names *n,
1479 struct path *path, int record_num, int *call_panic)
1480{
1481 struct audit_buffer *ab;
1482 ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
1483 if (!ab)
1484 return;
1485
1486 audit_log_format(ab, "item=%d", record_num);
1487
1488 if (path)
1489 audit_log_d_path(ab, " name=", path);
1490 else if (n->name) {
1491 switch (n->name_len) {
1492 case AUDIT_NAME_FULL:
1493 /* log the full path */
1494 audit_log_format(ab, " name=");
1495 audit_log_untrustedstring(ab, n->name->name);
1496 break;
1497 case 0:
1498 /* name was specified as a relative path and the
1499 * directory component is the cwd */
1500 audit_log_d_path(ab, " name=", &context->pwd);
1501 break;
1502 default:
1503 /* log the name's directory component */
1504 audit_log_format(ab, " name=");
1505 audit_log_n_untrustedstring(ab, n->name->name,
1506 n->name_len);
1507 }
1508 } else
1509 audit_log_format(ab, " name=(null)");
1510
1511 if (n->ino != (unsigned long)-1) {
1512 audit_log_format(ab, " inode=%lu"
1513 " dev=%02x:%02x mode=%#ho"
1514 " ouid=%u ogid=%u rdev=%02x:%02x",
1515 n->ino,
1516 MAJOR(n->dev),
1517 MINOR(n->dev),
1518 n->mode,
1519 from_kuid(&init_user_ns, n->uid),
1520 from_kgid(&init_user_ns, n->gid),
1521 MAJOR(n->rdev),
1522 MINOR(n->rdev));
1523 }
1524 if (n->osid != 0) {
1525 char *ctx = NULL;
1526 u32 len;
1527 if (security_secid_to_secctx(
1528 n->osid, &ctx, &len)) {
1529 audit_log_format(ab, " osid=%u", n->osid);
1530 if (call_panic)
1531 *call_panic = 2;
1532 } else {
1533 audit_log_format(ab, " obj=%s", ctx);
1534 security_release_secctx(ctx, len);
1535 }
1536 }
1537
1538 audit_log_fcaps(ab, n);
1539 audit_log_end(ab);
1540}
1541
1542int audit_log_task_context(struct audit_buffer *ab)
1543{
1544 char *ctx = NULL;
1545 unsigned len;
1546 int error;
1547 u32 sid;
1548
1549 security_task_getsecid(current, &sid);
1550 if (!sid)
1551 return 0;
1552
1553 error = security_secid_to_secctx(sid, &ctx, &len);
1554 if (error) {
1555 if (error != -EINVAL)
1556 goto error_path;
1557 return 0;
1558 }
1559
1560 audit_log_format(ab, " subj=%s", ctx);
1561 security_release_secctx(ctx, len);
1562 return 0;
1563
1564error_path:
1565 audit_panic("error in audit_log_task_context");
1566 return error;
1567}
1568EXPORT_SYMBOL(audit_log_task_context);
1569
1570void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
1571{
1572 const struct cred *cred;
1573 char name[sizeof(tsk->comm)];
1574 struct mm_struct *mm = tsk->mm;
1575 char *tty;
1576
1577 if (!ab)
1578 return;
1579
1580 /* tsk == current */
1581 cred = current_cred();
1582
1583 spin_lock_irq(&tsk->sighand->siglock);
1584 if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
1585 tty = tsk->signal->tty->name;
1586 else
1587 tty = "(none)";
1588 spin_unlock_irq(&tsk->sighand->siglock);
1589
1590 audit_log_format(ab,
1591 " ppid=%ld pid=%d auid=%u uid=%u gid=%u"
1592 " euid=%u suid=%u fsuid=%u"
1593 " egid=%u sgid=%u fsgid=%u ses=%u tty=%s",
1594 sys_getppid(),
1595 tsk->pid,
1596 from_kuid(&init_user_ns, audit_get_loginuid(tsk)),
1597 from_kuid(&init_user_ns, cred->uid),
1598 from_kgid(&init_user_ns, cred->gid),
1599 from_kuid(&init_user_ns, cred->euid),
1600 from_kuid(&init_user_ns, cred->suid),
1601 from_kuid(&init_user_ns, cred->fsuid),
1602 from_kgid(&init_user_ns, cred->egid),
1603 from_kgid(&init_user_ns, cred->sgid),
1604 from_kgid(&init_user_ns, cred->fsgid),
1605 audit_get_sessionid(tsk), tty);
1606
1607 get_task_comm(name, tsk);
1608 audit_log_format(ab, " comm=");
1609 audit_log_untrustedstring(ab, name);
1610
1611 if (mm) {
1612 down_read(&mm->mmap_sem);
1613 if (mm->exe_file)
1614 audit_log_d_path(ab, " exe=", &mm->exe_file->f_path);
1615 up_read(&mm->mmap_sem);
1616 }
1617 audit_log_task_context(ab);
1618}
1619EXPORT_SYMBOL(audit_log_task_info);
1620
1446/** 1621/**
1447 * audit_log_link_denied - report a link restriction denial 1622 * audit_log_link_denied - report a link restriction denial
1448 * @operation: specific link opreation 1623 * @operation: specific link opreation
@@ -1451,19 +1626,28 @@ void audit_log_key(struct audit_buffer *ab, char *key)
1451void audit_log_link_denied(const char *operation, struct path *link) 1626void audit_log_link_denied(const char *operation, struct path *link)
1452{ 1627{
1453 struct audit_buffer *ab; 1628 struct audit_buffer *ab;
1629 struct audit_names *name;
1630
1631 name = kzalloc(sizeof(*name), GFP_NOFS);
1632 if (!name)
1633 return;
1454 1634
1635 /* Generate AUDIT_ANOM_LINK with subject, operation, outcome. */
1455 ab = audit_log_start(current->audit_context, GFP_KERNEL, 1636 ab = audit_log_start(current->audit_context, GFP_KERNEL,
1456 AUDIT_ANOM_LINK); 1637 AUDIT_ANOM_LINK);
1457 if (!ab) 1638 if (!ab)
1458 return; 1639 goto out;
1459 audit_log_format(ab, "op=%s action=denied", operation); 1640 audit_log_format(ab, "op=%s", operation);
1460 audit_log_format(ab, " pid=%d comm=", current->pid); 1641 audit_log_task_info(ab, current);
1461 audit_log_untrustedstring(ab, current->comm); 1642 audit_log_format(ab, " res=0");
1462 audit_log_d_path(ab, " path=", link);
1463 audit_log_format(ab, " dev=");
1464 audit_log_untrustedstring(ab, link->dentry->d_inode->i_sb->s_id);
1465 audit_log_format(ab, " ino=%lu", link->dentry->d_inode->i_ino);
1466 audit_log_end(ab); 1643 audit_log_end(ab);
1644
1645 /* Generate AUDIT_PATH record with object. */
1646 name->type = AUDIT_TYPE_NORMAL;
1647 audit_copy_inode(name, link->dentry, link->dentry->d_inode);
1648 audit_log_name(current->audit_context, name, link, 0, NULL);
1649out:
1650 kfree(name);
1467} 1651}
1468 1652
1469/** 1653/**
diff --git a/kernel/audit.h b/kernel/audit.h
index 11468d99dad0..1c95131ef760 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -22,6 +22,7 @@
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/audit.h> 23#include <linux/audit.h>
24#include <linux/skbuff.h> 24#include <linux/skbuff.h>
25#include <uapi/linux/mqueue.h>
25 26
26/* 0 = no checking 27/* 0 = no checking
27 1 = put_count checking 28 1 = put_count checking
@@ -29,6 +30,11 @@
29*/ 30*/
30#define AUDIT_DEBUG 0 31#define AUDIT_DEBUG 0
31 32
33/* AUDIT_NAMES is the number of slots we reserve in the audit_context
34 * for saving names from getname(). If we get more names we will allocate
35 * a name dynamically and also add those to the list anchored by names_list. */
36#define AUDIT_NAMES 5
37
32/* At task start time, the audit_state is set in the audit_context using 38/* At task start time, the audit_state is set in the audit_context using
33 a per-task filter. At syscall entry, the audit_state is augmented by 39 a per-task filter. At syscall entry, the audit_state is augmented by
34 the syscall filter. */ 40 the syscall filter. */
@@ -59,8 +65,158 @@ struct audit_entry {
59 struct audit_krule rule; 65 struct audit_krule rule;
60}; 66};
61 67
68struct audit_cap_data {
69 kernel_cap_t permitted;
70 kernel_cap_t inheritable;
71 union {
72 unsigned int fE; /* effective bit of file cap */
73 kernel_cap_t effective; /* effective set of process */
74 };
75};
76
77/* When fs/namei.c:getname() is called, we store the pointer in name and
78 * we don't let putname() free it (instead we free all of the saved
79 * pointers at syscall exit time).
80 *
81 * Further, in fs/namei.c:path_lookup() we store the inode and device.
82 */
83struct audit_names {
84 struct list_head list; /* audit_context->names_list */
85
86 struct filename *name;
87 int name_len; /* number of chars to log */
88 bool name_put; /* call __putname()? */
89
90 unsigned long ino;
91 dev_t dev;
92 umode_t mode;
93 kuid_t uid;
94 kgid_t gid;
95 dev_t rdev;
96 u32 osid;
97 struct audit_cap_data fcap;
98 unsigned int fcap_ver;
99 unsigned char type; /* record type */
100 /*
101 * This was an allocated audit_names and not from the array of
102 * names allocated in the task audit context. Thus this name
103 * should be freed on syscall exit.
104 */
105 bool should_free;
106};
107
108/* The per-task audit context. */
109struct audit_context {
110 int dummy; /* must be the first element */
111 int in_syscall; /* 1 if task is in a syscall */
112 enum audit_state state, current_state;
113 unsigned int serial; /* serial number for record */
114 int major; /* syscall number */
115 struct timespec ctime; /* time of syscall entry */
116 unsigned long argv[4]; /* syscall arguments */
117 long return_code;/* syscall return code */
118 u64 prio;
119 int return_valid; /* return code is valid */
120 /*
121 * The names_list is the list of all audit_names collected during this
122 * syscall. The first AUDIT_NAMES entries in the names_list will
123 * actually be from the preallocated_names array for performance
124 * reasons. Except during allocation they should never be referenced
125 * through the preallocated_names array and should only be found/used
126 * by running the names_list.
127 */
128 struct audit_names preallocated_names[AUDIT_NAMES];
129 int name_count; /* total records in names_list */
130 struct list_head names_list; /* struct audit_names->list anchor */
131 char *filterkey; /* key for rule that triggered record */
132 struct path pwd;
133 struct audit_aux_data *aux;
134 struct audit_aux_data *aux_pids;
135 struct sockaddr_storage *sockaddr;
136 size_t sockaddr_len;
137 /* Save things to print about task_struct */
138 pid_t pid, ppid;
139 kuid_t uid, euid, suid, fsuid;
140 kgid_t gid, egid, sgid, fsgid;
141 unsigned long personality;
142 int arch;
143
144 pid_t target_pid;
145 kuid_t target_auid;
146 kuid_t target_uid;
147 unsigned int target_sessionid;
148 u32 target_sid;
149 char target_comm[TASK_COMM_LEN];
150
151 struct audit_tree_refs *trees, *first_trees;
152 struct list_head killed_trees;
153 int tree_count;
154
155 int type;
156 union {
157 struct {
158 int nargs;
159 long args[6];
160 } socketcall;
161 struct {
162 kuid_t uid;
163 kgid_t gid;
164 umode_t mode;
165 u32 osid;
166 int has_perm;
167 uid_t perm_uid;
168 gid_t perm_gid;
169 umode_t perm_mode;
170 unsigned long qbytes;
171 } ipc;
172 struct {
173 mqd_t mqdes;
174 struct mq_attr mqstat;
175 } mq_getsetattr;
176 struct {
177 mqd_t mqdes;
178 int sigev_signo;
179 } mq_notify;
180 struct {
181 mqd_t mqdes;
182 size_t msg_len;
183 unsigned int msg_prio;
184 struct timespec abs_timeout;
185 } mq_sendrecv;
186 struct {
187 int oflag;
188 umode_t mode;
189 struct mq_attr attr;
190 } mq_open;
191 struct {
192 pid_t pid;
193 struct audit_cap_data cap;
194 } capset;
195 struct {
196 int fd;
197 int flags;
198 } mmap;
199 };
200 int fds[2];
201
202#if AUDIT_DEBUG
203 int put_count;
204 int ino_count;
205#endif
206};
207
62extern int audit_ever_enabled; 208extern int audit_ever_enabled;
63 209
210extern void audit_copy_inode(struct audit_names *name,
211 const struct dentry *dentry,
212 const struct inode *inode);
213extern void audit_log_cap(struct audit_buffer *ab, char *prefix,
214 kernel_cap_t *cap);
215extern void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name);
216extern void audit_log_name(struct audit_context *context,
217 struct audit_names *n, struct path *path,
218 int record_num, int *call_panic);
219
64extern int audit_pid; 220extern int audit_pid;
65 221
66#define AUDIT_INODE_BUCKETS 32 222#define AUDIT_INODE_BUCKETS 32
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 267436826c3b..6bd4a90d1991 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -310,121 +310,83 @@ static u32 audit_to_op(u32 op)
310 return n; 310 return n;
311} 311}
312 312
313 313/* check if an audit field is valid */
314/* Translate struct audit_rule to kernel's rule respresentation. 314static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
315 * Exists for backward compatibility with userspace. */
316static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
317{ 315{
318 struct audit_entry *entry; 316 switch(f->type) {
319 int err = 0; 317 case AUDIT_MSGTYPE:
320 int i; 318 if (entry->rule.listnr != AUDIT_FILTER_TYPE &&
321 319 entry->rule.listnr != AUDIT_FILTER_USER)
322 entry = audit_to_entry_common(rule); 320 return -EINVAL;
323 if (IS_ERR(entry)) 321 break;
324 goto exit_nofree; 322 };
325
326 for (i = 0; i < rule->field_count; i++) {
327 struct audit_field *f = &entry->rule.fields[i];
328 u32 n;
329
330 n = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS);
331
332 /* Support for legacy operators where
333 * AUDIT_NEGATE bit signifies != and otherwise assumes == */
334 if (n & AUDIT_NEGATE)
335 f->op = Audit_not_equal;
336 else if (!n)
337 f->op = Audit_equal;
338 else
339 f->op = audit_to_op(n);
340
341 entry->rule.vers_ops = (n & AUDIT_OPERATORS) ? 2 : 1;
342
343 f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS);
344 f->val = rule->values[i];
345 f->uid = INVALID_UID;
346 f->gid = INVALID_GID;
347
348 err = -EINVAL;
349 if (f->op == Audit_bad)
350 goto exit_free;
351
352 switch(f->type) {
353 default:
354 goto exit_free;
355 case AUDIT_UID:
356 case AUDIT_EUID:
357 case AUDIT_SUID:
358 case AUDIT_FSUID:
359 case AUDIT_LOGINUID:
360 /* bit ops not implemented for uid comparisons */
361 if (f->op == Audit_bitmask || f->op == Audit_bittest)
362 goto exit_free;
363
364 f->uid = make_kuid(current_user_ns(), f->val);
365 if (!uid_valid(f->uid))
366 goto exit_free;
367 break;
368 case AUDIT_GID:
369 case AUDIT_EGID:
370 case AUDIT_SGID:
371 case AUDIT_FSGID:
372 /* bit ops not implemented for gid comparisons */
373 if (f->op == Audit_bitmask || f->op == Audit_bittest)
374 goto exit_free;
375
376 f->gid = make_kgid(current_user_ns(), f->val);
377 if (!gid_valid(f->gid))
378 goto exit_free;
379 break;
380 case AUDIT_PID:
381 case AUDIT_PERS:
382 case AUDIT_MSGTYPE:
383 case AUDIT_PPID:
384 case AUDIT_DEVMAJOR:
385 case AUDIT_DEVMINOR:
386 case AUDIT_EXIT:
387 case AUDIT_SUCCESS:
388 /* bit ops are only useful on syscall args */
389 if (f->op == Audit_bitmask || f->op == Audit_bittest)
390 goto exit_free;
391 break;
392 case AUDIT_ARG0:
393 case AUDIT_ARG1:
394 case AUDIT_ARG2:
395 case AUDIT_ARG3:
396 break;
397 /* arch is only allowed to be = or != */
398 case AUDIT_ARCH:
399 if (f->op != Audit_not_equal && f->op != Audit_equal)
400 goto exit_free;
401 entry->rule.arch_f = f;
402 break;
403 case AUDIT_PERM:
404 if (f->val & ~15)
405 goto exit_free;
406 break;
407 case AUDIT_FILETYPE:
408 if (f->val & ~S_IFMT)
409 goto exit_free;
410 break;
411 case AUDIT_INODE:
412 err = audit_to_inode(&entry->rule, f);
413 if (err)
414 goto exit_free;
415 break;
416 }
417 }
418
419 if (entry->rule.inode_f && entry->rule.inode_f->op == Audit_not_equal)
420 entry->rule.inode_f = NULL;
421
422exit_nofree:
423 return entry;
424 323
425exit_free: 324 switch(f->type) {
426 audit_free_rule(entry); 325 default:
427 return ERR_PTR(err); 326 return -EINVAL;
327 case AUDIT_UID:
328 case AUDIT_EUID:
329 case AUDIT_SUID:
330 case AUDIT_FSUID:
331 case AUDIT_LOGINUID:
332 case AUDIT_OBJ_UID:
333 case AUDIT_GID:
334 case AUDIT_EGID:
335 case AUDIT_SGID:
336 case AUDIT_FSGID:
337 case AUDIT_OBJ_GID:
338 case AUDIT_PID:
339 case AUDIT_PERS:
340 case AUDIT_MSGTYPE:
341 case AUDIT_PPID:
342 case AUDIT_DEVMAJOR:
343 case AUDIT_DEVMINOR:
344 case AUDIT_EXIT:
345 case AUDIT_SUCCESS:
346 /* bit ops are only useful on syscall args */
347 if (f->op == Audit_bitmask || f->op == Audit_bittest)
348 return -EINVAL;
349 break;
350 case AUDIT_ARG0:
351 case AUDIT_ARG1:
352 case AUDIT_ARG2:
353 case AUDIT_ARG3:
354 case AUDIT_SUBJ_USER:
355 case AUDIT_SUBJ_ROLE:
356 case AUDIT_SUBJ_TYPE:
357 case AUDIT_SUBJ_SEN:
358 case AUDIT_SUBJ_CLR:
359 case AUDIT_OBJ_USER:
360 case AUDIT_OBJ_ROLE:
361 case AUDIT_OBJ_TYPE:
362 case AUDIT_OBJ_LEV_LOW:
363 case AUDIT_OBJ_LEV_HIGH:
364 case AUDIT_WATCH:
365 case AUDIT_DIR:
366 case AUDIT_FILTERKEY:
367 break;
368 case AUDIT_LOGINUID_SET:
369 if ((f->val != 0) && (f->val != 1))
370 return -EINVAL;
371 /* FALL THROUGH */
372 case AUDIT_ARCH:
373 if (f->op != Audit_not_equal && f->op != Audit_equal)
374 return -EINVAL;
375 break;
376 case AUDIT_PERM:
377 if (f->val & ~15)
378 return -EINVAL;
379 break;
380 case AUDIT_FILETYPE:
381 if (f->val & ~S_IFMT)
382 return -EINVAL;
383 break;
384 case AUDIT_FIELD_COMPARE:
385 if (f->val > AUDIT_MAX_FIELD_COMPARE)
386 return -EINVAL;
387 break;
388 };
389 return 0;
428} 390}
429 391
430/* Translate struct audit_rule_data to kernel's rule respresentation. */ 392/* Translate struct audit_rule_data to kernel's rule respresentation. */
@@ -459,17 +421,25 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
459 f->gid = INVALID_GID; 421 f->gid = INVALID_GID;
460 f->lsm_str = NULL; 422 f->lsm_str = NULL;
461 f->lsm_rule = NULL; 423 f->lsm_rule = NULL;
462 switch(f->type) { 424
425 /* Support legacy tests for a valid loginuid */
426 if ((f->type == AUDIT_LOGINUID) && (f->val == 4294967295)) {
427 f->type = AUDIT_LOGINUID_SET;
428 f->val = 0;
429 }
430
431 err = audit_field_valid(entry, f);
432 if (err)
433 goto exit_free;
434
435 err = -EINVAL;
436 switch (f->type) {
437 case AUDIT_LOGINUID:
463 case AUDIT_UID: 438 case AUDIT_UID:
464 case AUDIT_EUID: 439 case AUDIT_EUID:
465 case AUDIT_SUID: 440 case AUDIT_SUID:
466 case AUDIT_FSUID: 441 case AUDIT_FSUID:
467 case AUDIT_LOGINUID:
468 case AUDIT_OBJ_UID: 442 case AUDIT_OBJ_UID:
469 /* bit ops not implemented for uid comparisons */
470 if (f->op == Audit_bitmask || f->op == Audit_bittest)
471 goto exit_free;
472
473 f->uid = make_kuid(current_user_ns(), f->val); 443 f->uid = make_kuid(current_user_ns(), f->val);
474 if (!uid_valid(f->uid)) 444 if (!uid_valid(f->uid))
475 goto exit_free; 445 goto exit_free;
@@ -479,27 +449,10 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
479 case AUDIT_SGID: 449 case AUDIT_SGID:
480 case AUDIT_FSGID: 450 case AUDIT_FSGID:
481 case AUDIT_OBJ_GID: 451 case AUDIT_OBJ_GID:
482 /* bit ops not implemented for gid comparisons */
483 if (f->op == Audit_bitmask || f->op == Audit_bittest)
484 goto exit_free;
485
486 f->gid = make_kgid(current_user_ns(), f->val); 452 f->gid = make_kgid(current_user_ns(), f->val);
487 if (!gid_valid(f->gid)) 453 if (!gid_valid(f->gid))
488 goto exit_free; 454 goto exit_free;
489 break; 455 break;
490 case AUDIT_PID:
491 case AUDIT_PERS:
492 case AUDIT_MSGTYPE:
493 case AUDIT_PPID:
494 case AUDIT_DEVMAJOR:
495 case AUDIT_DEVMINOR:
496 case AUDIT_EXIT:
497 case AUDIT_SUCCESS:
498 case AUDIT_ARG0:
499 case AUDIT_ARG1:
500 case AUDIT_ARG2:
501 case AUDIT_ARG3:
502 break;
503 case AUDIT_ARCH: 456 case AUDIT_ARCH:
504 entry->rule.arch_f = f; 457 entry->rule.arch_f = f;
505 break; 458 break;
@@ -570,20 +523,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
570 entry->rule.buflen += f->val; 523 entry->rule.buflen += f->val;
571 entry->rule.filterkey = str; 524 entry->rule.filterkey = str;
572 break; 525 break;
573 case AUDIT_PERM:
574 if (f->val & ~15)
575 goto exit_free;
576 break;
577 case AUDIT_FILETYPE:
578 if (f->val & ~S_IFMT)
579 goto exit_free;
580 break;
581 case AUDIT_FIELD_COMPARE:
582 if (f->val > AUDIT_MAX_FIELD_COMPARE)
583 goto exit_free;
584 break;
585 default:
586 goto exit_free;
587 } 526 }
588 } 527 }
589 528
@@ -613,36 +552,6 @@ static inline size_t audit_pack_string(void **bufp, const char *str)
613 return len; 552 return len;
614} 553}
615 554
616/* Translate kernel rule respresentation to struct audit_rule.
617 * Exists for backward compatibility with userspace. */
618static struct audit_rule *audit_krule_to_rule(struct audit_krule *krule)
619{
620 struct audit_rule *rule;
621 int i;
622
623 rule = kzalloc(sizeof(*rule), GFP_KERNEL);
624 if (unlikely(!rule))
625 return NULL;
626
627 rule->flags = krule->flags | krule->listnr;
628 rule->action = krule->action;
629 rule->field_count = krule->field_count;
630 for (i = 0; i < rule->field_count; i++) {
631 rule->values[i] = krule->fields[i].val;
632 rule->fields[i] = krule->fields[i].type;
633
634 if (krule->vers_ops == 1) {
635 if (krule->fields[i].op == Audit_not_equal)
636 rule->fields[i] |= AUDIT_NEGATE;
637 } else {
638 rule->fields[i] |= audit_ops[krule->fields[i].op];
639 }
640 }
641 for (i = 0; i < AUDIT_BITMASK_SIZE; i++) rule->mask[i] = krule->mask[i];
642
643 return rule;
644}
645
646/* Translate kernel rule respresentation to struct audit_rule_data. */ 555/* Translate kernel rule respresentation to struct audit_rule_data. */
647static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) 556static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
648{ 557{
@@ -1055,35 +964,6 @@ out:
1055 return ret; 964 return ret;
1056} 965}
1057 966
1058/* List rules using struct audit_rule. Exists for backward
1059 * compatibility with userspace. */
1060static void audit_list(int pid, int seq, struct sk_buff_head *q)
1061{
1062 struct sk_buff *skb;
1063 struct audit_krule *r;
1064 int i;
1065
1066 /* This is a blocking read, so use audit_filter_mutex instead of rcu
1067 * iterator to sync with list writers. */
1068 for (i=0; i<AUDIT_NR_FILTERS; i++) {
1069 list_for_each_entry(r, &audit_rules_list[i], list) {
1070 struct audit_rule *rule;
1071
1072 rule = audit_krule_to_rule(r);
1073 if (unlikely(!rule))
1074 break;
1075 skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1,
1076 rule, sizeof(*rule));
1077 if (skb)
1078 skb_queue_tail(q, skb);
1079 kfree(rule);
1080 }
1081 }
1082 skb = audit_make_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0);
1083 if (skb)
1084 skb_queue_tail(q, skb);
1085}
1086
1087/* List rules using struct audit_rule_data. */ 967/* List rules using struct audit_rule_data. */
1088static void audit_list_rules(int pid, int seq, struct sk_buff_head *q) 968static void audit_list_rules(int pid, int seq, struct sk_buff_head *q)
1089{ 969{
@@ -1113,11 +993,11 @@ static void audit_list_rules(int pid, int seq, struct sk_buff_head *q)
1113} 993}
1114 994
1115/* Log rule additions and removals */ 995/* Log rule additions and removals */
1116static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid, 996static void audit_log_rule_change(char *action, struct audit_krule *rule, int res)
1117 char *action, struct audit_krule *rule,
1118 int res)
1119{ 997{
1120 struct audit_buffer *ab; 998 struct audit_buffer *ab;
999 uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(current));
1000 u32 sessionid = audit_get_sessionid(current);
1121 1001
1122 if (!audit_enabled) 1002 if (!audit_enabled)
1123 return; 1003 return;
@@ -1125,18 +1005,8 @@ static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid,
1125 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 1005 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
1126 if (!ab) 1006 if (!ab)
1127 return; 1007 return;
1128 audit_log_format(ab, "auid=%u ses=%u", 1008 audit_log_format(ab, "auid=%u ses=%u" ,loginuid, sessionid);
1129 from_kuid(&init_user_ns, loginuid), sessionid); 1009 audit_log_task_context(ab);
1130 if (sid) {
1131 char *ctx = NULL;
1132 u32 len;
1133 if (security_secid_to_secctx(sid, &ctx, &len))
1134 audit_log_format(ab, " ssid=%u", sid);
1135 else {
1136 audit_log_format(ab, " subj=%s", ctx);
1137 security_release_secctx(ctx, len);
1138 }
1139 }
1140 audit_log_format(ab, " op="); 1010 audit_log_format(ab, " op=");
1141 audit_log_string(ab, action); 1011 audit_log_string(ab, action);
1142 audit_log_key(ab, rule->filterkey); 1012 audit_log_key(ab, rule->filterkey);
@@ -1151,12 +1021,8 @@ static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid,
1151 * @seq: netlink audit message sequence (serial) number 1021 * @seq: netlink audit message sequence (serial) number
1152 * @data: payload data 1022 * @data: payload data
1153 * @datasz: size of payload data 1023 * @datasz: size of payload data
1154 * @loginuid: loginuid of sender
1155 * @sessionid: sessionid for netlink audit message
1156 * @sid: SE Linux Security ID of sender
1157 */ 1024 */
1158int audit_receive_filter(int type, int pid, int seq, void *data, 1025int audit_receive_filter(int type, int pid, int seq, void *data, size_t datasz)
1159 size_t datasz, kuid_t loginuid, u32 sessionid, u32 sid)
1160{ 1026{
1161 struct task_struct *tsk; 1027 struct task_struct *tsk;
1162 struct audit_netlink_list *dest; 1028 struct audit_netlink_list *dest;
@@ -1164,7 +1030,6 @@ int audit_receive_filter(int type, int pid, int seq, void *data,
1164 struct audit_entry *entry; 1030 struct audit_entry *entry;
1165 1031
1166 switch (type) { 1032 switch (type) {
1167 case AUDIT_LIST:
1168 case AUDIT_LIST_RULES: 1033 case AUDIT_LIST_RULES:
1169 /* We can't just spew out the rules here because we might fill 1034 /* We can't just spew out the rules here because we might fill
1170 * the available socket buffer space and deadlock waiting for 1035 * the available socket buffer space and deadlock waiting for
@@ -1179,10 +1044,7 @@ int audit_receive_filter(int type, int pid, int seq, void *data,
1179 skb_queue_head_init(&dest->q); 1044 skb_queue_head_init(&dest->q);
1180 1045
1181 mutex_lock(&audit_filter_mutex); 1046 mutex_lock(&audit_filter_mutex);
1182 if (type == AUDIT_LIST) 1047 audit_list_rules(pid, seq, &dest->q);
1183 audit_list(pid, seq, &dest->q);
1184 else
1185 audit_list_rules(pid, seq, &dest->q);
1186 mutex_unlock(&audit_filter_mutex); 1048 mutex_unlock(&audit_filter_mutex);
1187 1049
1188 tsk = kthread_run(audit_send_list, dest, "audit_send_list"); 1050 tsk = kthread_run(audit_send_list, dest, "audit_send_list");
@@ -1192,35 +1054,23 @@ int audit_receive_filter(int type, int pid, int seq, void *data,
1192 err = PTR_ERR(tsk); 1054 err = PTR_ERR(tsk);
1193 } 1055 }
1194 break; 1056 break;
1195 case AUDIT_ADD:
1196 case AUDIT_ADD_RULE: 1057 case AUDIT_ADD_RULE:
1197 if (type == AUDIT_ADD) 1058 entry = audit_data_to_entry(data, datasz);
1198 entry = audit_rule_to_entry(data);
1199 else
1200 entry = audit_data_to_entry(data, datasz);
1201 if (IS_ERR(entry)) 1059 if (IS_ERR(entry))
1202 return PTR_ERR(entry); 1060 return PTR_ERR(entry);
1203 1061
1204 err = audit_add_rule(entry); 1062 err = audit_add_rule(entry);
1205 audit_log_rule_change(loginuid, sessionid, sid, "add rule", 1063 audit_log_rule_change("add rule", &entry->rule, !err);
1206 &entry->rule, !err);
1207
1208 if (err) 1064 if (err)
1209 audit_free_rule(entry); 1065 audit_free_rule(entry);
1210 break; 1066 break;
1211 case AUDIT_DEL:
1212 case AUDIT_DEL_RULE: 1067 case AUDIT_DEL_RULE:
1213 if (type == AUDIT_DEL) 1068 entry = audit_data_to_entry(data, datasz);
1214 entry = audit_rule_to_entry(data);
1215 else
1216 entry = audit_data_to_entry(data, datasz);
1217 if (IS_ERR(entry)) 1069 if (IS_ERR(entry))
1218 return PTR_ERR(entry); 1070 return PTR_ERR(entry);
1219 1071
1220 err = audit_del_rule(entry); 1072 err = audit_del_rule(entry);
1221 audit_log_rule_change(loginuid, sessionid, sid, "remove rule", 1073 audit_log_rule_change("remove rule", &entry->rule, !err);
1222 &entry->rule, !err);
1223
1224 audit_free_rule(entry); 1074 audit_free_rule(entry);
1225 break; 1075 break;
1226 default: 1076 default:
@@ -1358,7 +1208,7 @@ int audit_compare_dname_path(const char *dname, const char *path, int parentlen)
1358 return strncmp(p, dname, dlen); 1208 return strncmp(p, dname, dlen);
1359} 1209}
1360 1210
1361static int audit_filter_user_rules(struct audit_krule *rule, 1211static int audit_filter_user_rules(struct audit_krule *rule, int type,
1362 enum audit_state *state) 1212 enum audit_state *state)
1363{ 1213{
1364 int i; 1214 int i;
@@ -1382,6 +1232,13 @@ static int audit_filter_user_rules(struct audit_krule *rule,
1382 result = audit_uid_comparator(audit_get_loginuid(current), 1232 result = audit_uid_comparator(audit_get_loginuid(current),
1383 f->op, f->uid); 1233 f->op, f->uid);
1384 break; 1234 break;
1235 case AUDIT_LOGINUID_SET:
1236 result = audit_comparator(audit_loginuid_set(current),
1237 f->op, f->val);
1238 break;
1239 case AUDIT_MSGTYPE:
1240 result = audit_comparator(type, f->op, f->val);
1241 break;
1385 case AUDIT_SUBJ_USER: 1242 case AUDIT_SUBJ_USER:
1386 case AUDIT_SUBJ_ROLE: 1243 case AUDIT_SUBJ_ROLE:
1387 case AUDIT_SUBJ_TYPE: 1244 case AUDIT_SUBJ_TYPE:
@@ -1408,7 +1265,7 @@ static int audit_filter_user_rules(struct audit_krule *rule,
1408 return 1; 1265 return 1;
1409} 1266}
1410 1267
1411int audit_filter_user(void) 1268int audit_filter_user(int type)
1412{ 1269{
1413 enum audit_state state = AUDIT_DISABLED; 1270 enum audit_state state = AUDIT_DISABLED;
1414 struct audit_entry *e; 1271 struct audit_entry *e;
@@ -1416,7 +1273,7 @@ int audit_filter_user(void)
1416 1273
1417 rcu_read_lock(); 1274 rcu_read_lock();
1418 list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_USER], list) { 1275 list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_USER], list) {
1419 if (audit_filter_user_rules(&e->rule, &state)) { 1276 if (audit_filter_user_rules(&e->rule, type, &state)) {
1420 if (state == AUDIT_DISABLED) 1277 if (state == AUDIT_DISABLED)
1421 ret = 0; 1278 ret = 0;
1422 break; 1279 break;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c68229411a7c..3c8a601324a2 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -76,11 +76,6 @@
76#define AUDITSC_SUCCESS 1 76#define AUDITSC_SUCCESS 1
77#define AUDITSC_FAILURE 2 77#define AUDITSC_FAILURE 2
78 78
79/* AUDIT_NAMES is the number of slots we reserve in the audit_context
80 * for saving names from getname(). If we get more names we will allocate
81 * a name dynamically and also add those to the list anchored by names_list. */
82#define AUDIT_NAMES 5
83
84/* no execve audit message should be longer than this (userspace limits) */ 79/* no execve audit message should be longer than this (userspace limits) */
85#define MAX_EXECVE_AUDIT_LEN 7500 80#define MAX_EXECVE_AUDIT_LEN 7500
86 81
@@ -90,44 +85,6 @@ int audit_n_rules;
90/* determines whether we collect data for signals sent */ 85/* determines whether we collect data for signals sent */
91int audit_signals; 86int audit_signals;
92 87
93struct audit_cap_data {
94 kernel_cap_t permitted;
95 kernel_cap_t inheritable;
96 union {
97 unsigned int fE; /* effective bit of a file capability */
98 kernel_cap_t effective; /* effective set of a process */
99 };
100};
101
102/* When fs/namei.c:getname() is called, we store the pointer in name and
103 * we don't let putname() free it (instead we free all of the saved
104 * pointers at syscall exit time).
105 *
106 * Further, in fs/namei.c:path_lookup() we store the inode and device.
107 */
108struct audit_names {
109 struct list_head list; /* audit_context->names_list */
110 struct filename *name;
111 unsigned long ino;
112 dev_t dev;
113 umode_t mode;
114 kuid_t uid;
115 kgid_t gid;
116 dev_t rdev;
117 u32 osid;
118 struct audit_cap_data fcap;
119 unsigned int fcap_ver;
120 int name_len; /* number of name's characters to log */
121 unsigned char type; /* record type */
122 bool name_put; /* call __putname() for this name */
123 /*
124 * This was an allocated audit_names and not from the array of
125 * names allocated in the task audit context. Thus this name
126 * should be freed on syscall exit
127 */
128 bool should_free;
129};
130
131struct audit_aux_data { 88struct audit_aux_data {
132 struct audit_aux_data *next; 89 struct audit_aux_data *next;
133 int type; 90 int type;
@@ -175,106 +132,6 @@ struct audit_tree_refs {
175 struct audit_chunk *c[31]; 132 struct audit_chunk *c[31];
176}; 133};
177 134
178/* The per-task audit context. */
179struct audit_context {
180 int dummy; /* must be the first element */
181 int in_syscall; /* 1 if task is in a syscall */
182 enum audit_state state, current_state;
183 unsigned int serial; /* serial number for record */
184 int major; /* syscall number */
185 struct timespec ctime; /* time of syscall entry */
186 unsigned long argv[4]; /* syscall arguments */
187 long return_code;/* syscall return code */
188 u64 prio;
189 int return_valid; /* return code is valid */
190 /*
191 * The names_list is the list of all audit_names collected during this
192 * syscall. The first AUDIT_NAMES entries in the names_list will
193 * actually be from the preallocated_names array for performance
194 * reasons. Except during allocation they should never be referenced
195 * through the preallocated_names array and should only be found/used
196 * by running the names_list.
197 */
198 struct audit_names preallocated_names[AUDIT_NAMES];
199 int name_count; /* total records in names_list */
200 struct list_head names_list; /* anchor for struct audit_names->list */
201 char * filterkey; /* key for rule that triggered record */
202 struct path pwd;
203 struct audit_aux_data *aux;
204 struct audit_aux_data *aux_pids;
205 struct sockaddr_storage *sockaddr;
206 size_t sockaddr_len;
207 /* Save things to print about task_struct */
208 pid_t pid, ppid;
209 kuid_t uid, euid, suid, fsuid;
210 kgid_t gid, egid, sgid, fsgid;
211 unsigned long personality;
212 int arch;
213
214 pid_t target_pid;
215 kuid_t target_auid;
216 kuid_t target_uid;
217 unsigned int target_sessionid;
218 u32 target_sid;
219 char target_comm[TASK_COMM_LEN];
220
221 struct audit_tree_refs *trees, *first_trees;
222 struct list_head killed_trees;
223 int tree_count;
224
225 int type;
226 union {
227 struct {
228 int nargs;
229 long args[6];
230 } socketcall;
231 struct {
232 kuid_t uid;
233 kgid_t gid;
234 umode_t mode;
235 u32 osid;
236 int has_perm;
237 uid_t perm_uid;
238 gid_t perm_gid;
239 umode_t perm_mode;
240 unsigned long qbytes;
241 } ipc;
242 struct {
243 mqd_t mqdes;
244 struct mq_attr mqstat;
245 } mq_getsetattr;
246 struct {
247 mqd_t mqdes;
248 int sigev_signo;
249 } mq_notify;
250 struct {
251 mqd_t mqdes;
252 size_t msg_len;
253 unsigned int msg_prio;
254 struct timespec abs_timeout;
255 } mq_sendrecv;
256 struct {
257 int oflag;
258 umode_t mode;
259 struct mq_attr attr;
260 } mq_open;
261 struct {
262 pid_t pid;
263 struct audit_cap_data cap;
264 } capset;
265 struct {
266 int fd;
267 int flags;
268 } mmap;
269 };
270 int fds[2];
271
272#if AUDIT_DEBUG
273 int put_count;
274 int ino_count;
275#endif
276};
277
278static inline int open_arg(int flags, int mask) 135static inline int open_arg(int flags, int mask)
279{ 136{
280 int n = ACC_MODE(flags); 137 int n = ACC_MODE(flags);
@@ -633,9 +490,23 @@ static int audit_filter_rules(struct task_struct *tsk,
633 break; 490 break;
634 case AUDIT_GID: 491 case AUDIT_GID:
635 result = audit_gid_comparator(cred->gid, f->op, f->gid); 492 result = audit_gid_comparator(cred->gid, f->op, f->gid);
493 if (f->op == Audit_equal) {
494 if (!result)
495 result = in_group_p(f->gid);
496 } else if (f->op == Audit_not_equal) {
497 if (result)
498 result = !in_group_p(f->gid);
499 }
636 break; 500 break;
637 case AUDIT_EGID: 501 case AUDIT_EGID:
638 result = audit_gid_comparator(cred->egid, f->op, f->gid); 502 result = audit_gid_comparator(cred->egid, f->op, f->gid);
503 if (f->op == Audit_equal) {
504 if (!result)
505 result = in_egroup_p(f->gid);
506 } else if (f->op == Audit_not_equal) {
507 if (result)
508 result = !in_egroup_p(f->gid);
509 }
639 break; 510 break;
640 case AUDIT_SGID: 511 case AUDIT_SGID:
641 result = audit_gid_comparator(cred->sgid, f->op, f->gid); 512 result = audit_gid_comparator(cred->sgid, f->op, f->gid);
@@ -742,6 +613,9 @@ static int audit_filter_rules(struct task_struct *tsk,
742 if (ctx) 613 if (ctx)
743 result = audit_uid_comparator(tsk->loginuid, f->op, f->uid); 614 result = audit_uid_comparator(tsk->loginuid, f->op, f->uid);
744 break; 615 break;
616 case AUDIT_LOGINUID_SET:
617 result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val);
618 break;
745 case AUDIT_SUBJ_USER: 619 case AUDIT_SUBJ_USER:
746 case AUDIT_SUBJ_ROLE: 620 case AUDIT_SUBJ_ROLE:
747 case AUDIT_SUBJ_TYPE: 621 case AUDIT_SUBJ_TYPE:
@@ -987,6 +861,8 @@ static inline void audit_free_names(struct audit_context *context)
987 861
988#if AUDIT_DEBUG == 2 862#if AUDIT_DEBUG == 2
989 if (context->put_count + context->ino_count != context->name_count) { 863 if (context->put_count + context->ino_count != context->name_count) {
864 int i = 0;
865
990 printk(KERN_ERR "%s:%d(:%d): major=%d in_syscall=%d" 866 printk(KERN_ERR "%s:%d(:%d): major=%d in_syscall=%d"
991 " name_count=%d put_count=%d" 867 " name_count=%d put_count=%d"
992 " ino_count=%d [NOT freeing]\n", 868 " ino_count=%d [NOT freeing]\n",
@@ -995,7 +871,7 @@ static inline void audit_free_names(struct audit_context *context)
995 context->name_count, context->put_count, 871 context->name_count, context->put_count,
996 context->ino_count); 872 context->ino_count);
997 list_for_each_entry(n, &context->names_list, list) { 873 list_for_each_entry(n, &context->names_list, list) {
998 printk(KERN_ERR "names[%d] = %p = %s\n", i, 874 printk(KERN_ERR "names[%d] = %p = %s\n", i++,
999 n->name, n->name->name ?: "(null)"); 875 n->name, n->name->name ?: "(null)");
1000 } 876 }
1001 dump_stack(); 877 dump_stack();
@@ -1010,7 +886,7 @@ static inline void audit_free_names(struct audit_context *context)
1010 list_for_each_entry_safe(n, next, &context->names_list, list) { 886 list_for_each_entry_safe(n, next, &context->names_list, list) {
1011 list_del(&n->list); 887 list_del(&n->list);
1012 if (n->name && n->name_put) 888 if (n->name && n->name_put)
1013 __putname(n->name); 889 final_putname(n->name);
1014 if (n->should_free) 890 if (n->should_free)
1015 kfree(n); 891 kfree(n);
1016 } 892 }
@@ -1093,88 +969,6 @@ static inline void audit_free_context(struct audit_context *context)
1093 kfree(context); 969 kfree(context);
1094} 970}
1095 971
1096void audit_log_task_context(struct audit_buffer *ab)
1097{
1098 char *ctx = NULL;
1099 unsigned len;
1100 int error;
1101 u32 sid;
1102
1103 security_task_getsecid(current, &sid);
1104 if (!sid)
1105 return;
1106
1107 error = security_secid_to_secctx(sid, &ctx, &len);
1108 if (error) {
1109 if (error != -EINVAL)
1110 goto error_path;
1111 return;
1112 }
1113
1114 audit_log_format(ab, " subj=%s", ctx);
1115 security_release_secctx(ctx, len);
1116 return;
1117
1118error_path:
1119 audit_panic("error in audit_log_task_context");
1120 return;
1121}
1122
1123EXPORT_SYMBOL(audit_log_task_context);
1124
1125void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
1126{
1127 const struct cred *cred;
1128 char name[sizeof(tsk->comm)];
1129 struct mm_struct *mm = tsk->mm;
1130 char *tty;
1131
1132 if (!ab)
1133 return;
1134
1135 /* tsk == current */
1136 cred = current_cred();
1137
1138 spin_lock_irq(&tsk->sighand->siglock);
1139 if (tsk->signal && tsk->signal->tty)
1140 tty = tsk->signal->tty->name;
1141 else
1142 tty = "(none)";
1143 spin_unlock_irq(&tsk->sighand->siglock);
1144
1145
1146 audit_log_format(ab,
1147 " ppid=%ld pid=%d auid=%u uid=%u gid=%u"
1148 " euid=%u suid=%u fsuid=%u"
1149 " egid=%u sgid=%u fsgid=%u ses=%u tty=%s",
1150 sys_getppid(),
1151 tsk->pid,
1152 from_kuid(&init_user_ns, tsk->loginuid),
1153 from_kuid(&init_user_ns, cred->uid),
1154 from_kgid(&init_user_ns, cred->gid),
1155 from_kuid(&init_user_ns, cred->euid),
1156 from_kuid(&init_user_ns, cred->suid),
1157 from_kuid(&init_user_ns, cred->fsuid),
1158 from_kgid(&init_user_ns, cred->egid),
1159 from_kgid(&init_user_ns, cred->sgid),
1160 from_kgid(&init_user_ns, cred->fsgid),
1161 tsk->sessionid, tty);
1162
1163 get_task_comm(name, tsk);
1164 audit_log_format(ab, " comm=");
1165 audit_log_untrustedstring(ab, name);
1166
1167 if (mm) {
1168 down_read(&mm->mmap_sem);
1169 if (mm->exe_file)
1170 audit_log_d_path(ab, " exe=", &mm->exe_file->f_path);
1171 up_read(&mm->mmap_sem);
1172 }
1173 audit_log_task_context(ab);
1174}
1175
1176EXPORT_SYMBOL(audit_log_task_info);
1177
1178static int audit_log_pid_context(struct audit_context *context, pid_t pid, 972static int audit_log_pid_context(struct audit_context *context, pid_t pid,
1179 kuid_t auid, kuid_t uid, unsigned int sessionid, 973 kuid_t auid, kuid_t uid, unsigned int sessionid,
1180 u32 sid, char *comm) 974 u32 sid, char *comm)
@@ -1191,12 +985,14 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid,
1191 audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid, 985 audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid,
1192 from_kuid(&init_user_ns, auid), 986 from_kuid(&init_user_ns, auid),
1193 from_kuid(&init_user_ns, uid), sessionid); 987 from_kuid(&init_user_ns, uid), sessionid);
1194 if (security_secid_to_secctx(sid, &ctx, &len)) { 988 if (sid) {
1195 audit_log_format(ab, " obj=(none)"); 989 if (security_secid_to_secctx(sid, &ctx, &len)) {
1196 rc = 1; 990 audit_log_format(ab, " obj=(none)");
1197 } else { 991 rc = 1;
1198 audit_log_format(ab, " obj=%s", ctx); 992 } else {
1199 security_release_secctx(ctx, len); 993 audit_log_format(ab, " obj=%s", ctx);
994 security_release_secctx(ctx, len);
995 }
1200 } 996 }
1201 audit_log_format(ab, " ocomm="); 997 audit_log_format(ab, " ocomm=");
1202 audit_log_untrustedstring(ab, comm); 998 audit_log_untrustedstring(ab, comm);
@@ -1390,35 +1186,6 @@ static void audit_log_execve_info(struct audit_context *context,
1390 kfree(buf); 1186 kfree(buf);
1391} 1187}
1392 1188
1393static void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap)
1394{
1395 int i;
1396
1397 audit_log_format(ab, " %s=", prefix);
1398 CAP_FOR_EACH_U32(i) {
1399 audit_log_format(ab, "%08x", cap->cap[(_KERNEL_CAPABILITY_U32S-1) - i]);
1400 }
1401}
1402
1403static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
1404{
1405 kernel_cap_t *perm = &name->fcap.permitted;
1406 kernel_cap_t *inh = &name->fcap.inheritable;
1407 int log = 0;
1408
1409 if (!cap_isclear(*perm)) {
1410 audit_log_cap(ab, "cap_fp", perm);
1411 log = 1;
1412 }
1413 if (!cap_isclear(*inh)) {
1414 audit_log_cap(ab, "cap_fi", inh);
1415 log = 1;
1416 }
1417
1418 if (log)
1419 audit_log_format(ab, " cap_fe=%d cap_fver=%x", name->fcap.fE, name->fcap_ver);
1420}
1421
1422static void show_special(struct audit_context *context, int *call_panic) 1189static void show_special(struct audit_context *context, int *call_panic)
1423{ 1190{
1424 struct audit_buffer *ab; 1191 struct audit_buffer *ab;
@@ -1516,68 +1283,6 @@ static void show_special(struct audit_context *context, int *call_panic)
1516 audit_log_end(ab); 1283 audit_log_end(ab);
1517} 1284}
1518 1285
1519static void audit_log_name(struct audit_context *context, struct audit_names *n,
1520 int record_num, int *call_panic)
1521{
1522 struct audit_buffer *ab;
1523 ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
1524 if (!ab)
1525 return; /* audit_panic has been called */
1526
1527 audit_log_format(ab, "item=%d", record_num);
1528
1529 if (n->name) {
1530 switch (n->name_len) {
1531 case AUDIT_NAME_FULL:
1532 /* log the full path */
1533 audit_log_format(ab, " name=");
1534 audit_log_untrustedstring(ab, n->name->name);
1535 break;
1536 case 0:
1537 /* name was specified as a relative path and the
1538 * directory component is the cwd */
1539 audit_log_d_path(ab, " name=", &context->pwd);
1540 break;
1541 default:
1542 /* log the name's directory component */
1543 audit_log_format(ab, " name=");
1544 audit_log_n_untrustedstring(ab, n->name->name,
1545 n->name_len);
1546 }
1547 } else
1548 audit_log_format(ab, " name=(null)");
1549
1550 if (n->ino != (unsigned long)-1) {
1551 audit_log_format(ab, " inode=%lu"
1552 " dev=%02x:%02x mode=%#ho"
1553 " ouid=%u ogid=%u rdev=%02x:%02x",
1554 n->ino,
1555 MAJOR(n->dev),
1556 MINOR(n->dev),
1557 n->mode,
1558 from_kuid(&init_user_ns, n->uid),
1559 from_kgid(&init_user_ns, n->gid),
1560 MAJOR(n->rdev),
1561 MINOR(n->rdev));
1562 }
1563 if (n->osid != 0) {
1564 char *ctx = NULL;
1565 u32 len;
1566 if (security_secid_to_secctx(
1567 n->osid, &ctx, &len)) {
1568 audit_log_format(ab, " osid=%u", n->osid);
1569 *call_panic = 2;
1570 } else {
1571 audit_log_format(ab, " obj=%s", ctx);
1572 security_release_secctx(ctx, len);
1573 }
1574 }
1575
1576 audit_log_fcaps(ab, n);
1577
1578 audit_log_end(ab);
1579}
1580
1581static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) 1286static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
1582{ 1287{
1583 int i, call_panic = 0; 1288 int i, call_panic = 0;
@@ -1695,7 +1400,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
1695 1400
1696 i = 0; 1401 i = 0;
1697 list_for_each_entry(n, &context->names_list, list) 1402 list_for_each_entry(n, &context->names_list, list)
1698 audit_log_name(context, n, i++, &call_panic); 1403 audit_log_name(context, n, NULL, i++, &call_panic);
1699 1404
1700 /* Send end of event record to help user space know we are finished */ 1405 /* Send end of event record to help user space know we are finished */
1701 ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); 1406 ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
@@ -2030,18 +1735,18 @@ void audit_putname(struct filename *name)
2030 BUG_ON(!context); 1735 BUG_ON(!context);
2031 if (!context->in_syscall) { 1736 if (!context->in_syscall) {
2032#if AUDIT_DEBUG == 2 1737#if AUDIT_DEBUG == 2
2033 printk(KERN_ERR "%s:%d(:%d): __putname(%p)\n", 1738 printk(KERN_ERR "%s:%d(:%d): final_putname(%p)\n",
2034 __FILE__, __LINE__, context->serial, name); 1739 __FILE__, __LINE__, context->serial, name);
2035 if (context->name_count) { 1740 if (context->name_count) {
2036 struct audit_names *n; 1741 struct audit_names *n;
2037 int i; 1742 int i = 0;
2038 1743
2039 list_for_each_entry(n, &context->names_list, list) 1744 list_for_each_entry(n, &context->names_list, list)
2040 printk(KERN_ERR "name[%d] = %p = %s\n", i, 1745 printk(KERN_ERR "name[%d] = %p = %s\n", i++,
2041 n->name, n->name->name ?: "(null)"); 1746 n->name, n->name->name ?: "(null)");
2042 } 1747 }
2043#endif 1748#endif
2044 __putname(name); 1749 final_putname(name);
2045 } 1750 }
2046#if AUDIT_DEBUG 1751#if AUDIT_DEBUG
2047 else { 1752 else {
@@ -2060,41 +1765,6 @@ void audit_putname(struct filename *name)
2060#endif 1765#endif
2061} 1766}
2062 1767
2063static inline int audit_copy_fcaps(struct audit_names *name, const struct dentry *dentry)
2064{
2065 struct cpu_vfs_cap_data caps;
2066 int rc;
2067
2068 if (!dentry)
2069 return 0;
2070
2071 rc = get_vfs_caps_from_disk(dentry, &caps);
2072 if (rc)
2073 return rc;
2074
2075 name->fcap.permitted = caps.permitted;
2076 name->fcap.inheritable = caps.inheritable;
2077 name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
2078 name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT;
2079
2080 return 0;
2081}
2082
2083
2084/* Copy inode data into an audit_names. */
2085static void audit_copy_inode(struct audit_names *name, const struct dentry *dentry,
2086 const struct inode *inode)
2087{
2088 name->ino = inode->i_ino;
2089 name->dev = inode->i_sb->s_dev;
2090 name->mode = inode->i_mode;
2091 name->uid = inode->i_uid;
2092 name->gid = inode->i_gid;
2093 name->rdev = inode->i_rdev;
2094 security_inode_getsecid(inode, &name->osid);
2095 audit_copy_fcaps(name, dentry);
2096}
2097
2098/** 1768/**
2099 * __audit_inode - store the inode and device from a lookup 1769 * __audit_inode - store the inode and device from a lookup
2100 * @name: name being audited 1770 * @name: name being audited
@@ -2303,7 +1973,7 @@ int audit_set_loginuid(kuid_t loginuid)
2303 unsigned int sessionid; 1973 unsigned int sessionid;
2304 1974
2305#ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE 1975#ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE
2306 if (uid_valid(task->loginuid)) 1976 if (audit_loginuid_set(task))
2307 return -EPERM; 1977 return -EPERM;
2308#else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */ 1978#else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
2309 if (!capable(CAP_AUDIT_CONTROL)) 1979 if (!capable(CAP_AUDIT_CONTROL))
@@ -2471,17 +2141,20 @@ int __audit_bprm(struct linux_binprm *bprm)
2471 2141
2472/** 2142/**
2473 * audit_socketcall - record audit data for sys_socketcall 2143 * audit_socketcall - record audit data for sys_socketcall
2474 * @nargs: number of args 2144 * @nargs: number of args, which should not be more than AUDITSC_ARGS.
2475 * @args: args array 2145 * @args: args array
2476 * 2146 *
2477 */ 2147 */
2478void __audit_socketcall(int nargs, unsigned long *args) 2148int __audit_socketcall(int nargs, unsigned long *args)
2479{ 2149{
2480 struct audit_context *context = current->audit_context; 2150 struct audit_context *context = current->audit_context;
2481 2151
2152 if (nargs <= 0 || nargs > AUDITSC_ARGS || !args)
2153 return -EINVAL;
2482 context->type = AUDIT_SOCKETCALL; 2154 context->type = AUDIT_SOCKETCALL;
2483 context->socketcall.nargs = nargs; 2155 context->socketcall.nargs = nargs;
2484 memcpy(context->socketcall.args, args, nargs * sizeof(unsigned long)); 2156 memcpy(context->socketcall.args, args, nargs * sizeof(unsigned long));
2157 return 0;
2485} 2158}
2486 2159
2487/** 2160/**
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 65349f07b878..66677003e223 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -71,6 +71,46 @@ void user_enter(void)
71 local_irq_restore(flags); 71 local_irq_restore(flags);
72} 72}
73 73
74#ifdef CONFIG_PREEMPT
75/**
76 * preempt_schedule_context - preempt_schedule called by tracing
77 *
78 * The tracing infrastructure uses preempt_enable_notrace to prevent
79 * recursion and tracing preempt enabling caused by the tracing
80 * infrastructure itself. But as tracing can happen in areas coming
81 * from userspace or just about to enter userspace, a preempt enable
82 * can occur before user_exit() is called. This will cause the scheduler
83 * to be called when the system is still in usermode.
84 *
85 * To prevent this, the preempt_enable_notrace will use this function
86 * instead of preempt_schedule() to exit user context if needed before
87 * calling the scheduler.
88 */
89void __sched notrace preempt_schedule_context(void)
90{
91 struct thread_info *ti = current_thread_info();
92 enum ctx_state prev_ctx;
93
94 if (likely(ti->preempt_count || irqs_disabled()))
95 return;
96
97 /*
98 * Need to disable preemption in case user_exit() is traced
99 * and the tracer calls preempt_enable_notrace() causing
100 * an infinite recursion.
101 */
102 preempt_disable_notrace();
103 prev_ctx = exception_enter();
104 preempt_enable_no_resched_notrace();
105
106 preempt_schedule();
107
108 preempt_disable_notrace();
109 exception_exit(prev_ctx);
110 preempt_enable_notrace();
111}
112EXPORT_SYMBOL_GPL(preempt_schedule_context);
113#endif /* CONFIG_PREEMPT */
74 114
75/** 115/**
76 * user_exit - Inform the context tracking that the CPU is 116 * user_exit - Inform the context tracking that the CPU is
diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c
index 8b86c0c68edf..d5585f5e038e 100644
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -40,11 +40,13 @@ __setup("hlt", cpu_idle_nopoll_setup);
40 40
41static inline int cpu_idle_poll(void) 41static inline int cpu_idle_poll(void)
42{ 42{
43 rcu_idle_enter();
43 trace_cpu_idle_rcuidle(0, smp_processor_id()); 44 trace_cpu_idle_rcuidle(0, smp_processor_id());
44 local_irq_enable(); 45 local_irq_enable();
45 while (!need_resched()) 46 while (!need_resched())
46 cpu_relax(); 47 cpu_relax();
47 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); 48 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
49 rcu_idle_exit();
48 return 1; 50 return 1;
49} 51}
50 52
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6b41c1899a8b..9dc297faf7c0 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4394,6 +4394,64 @@ perf_event_read_event(struct perf_event *event,
4394 perf_output_end(&handle); 4394 perf_output_end(&handle);
4395} 4395}
4396 4396
4397typedef int (perf_event_aux_match_cb)(struct perf_event *event, void *data);
4398typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data);
4399
4400static void
4401perf_event_aux_ctx(struct perf_event_context *ctx,
4402 perf_event_aux_match_cb match,
4403 perf_event_aux_output_cb output,
4404 void *data)
4405{
4406 struct perf_event *event;
4407
4408 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
4409 if (event->state < PERF_EVENT_STATE_INACTIVE)
4410 continue;
4411 if (!event_filter_match(event))
4412 continue;
4413 if (match(event, data))
4414 output(event, data);
4415 }
4416}
4417
4418static void
4419perf_event_aux(perf_event_aux_match_cb match,
4420 perf_event_aux_output_cb output,
4421 void *data,
4422 struct perf_event_context *task_ctx)
4423{
4424 struct perf_cpu_context *cpuctx;
4425 struct perf_event_context *ctx;
4426 struct pmu *pmu;
4427 int ctxn;
4428
4429 rcu_read_lock();
4430 list_for_each_entry_rcu(pmu, &pmus, entry) {
4431 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
4432 if (cpuctx->unique_pmu != pmu)
4433 goto next;
4434 perf_event_aux_ctx(&cpuctx->ctx, match, output, data);
4435 if (task_ctx)
4436 goto next;
4437 ctxn = pmu->task_ctx_nr;
4438 if (ctxn < 0)
4439 goto next;
4440 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
4441 if (ctx)
4442 perf_event_aux_ctx(ctx, match, output, data);
4443next:
4444 put_cpu_ptr(pmu->pmu_cpu_context);
4445 }
4446
4447 if (task_ctx) {
4448 preempt_disable();
4449 perf_event_aux_ctx(task_ctx, match, output, data);
4450 preempt_enable();
4451 }
4452 rcu_read_unlock();
4453}
4454
4397/* 4455/*
4398 * task tracking -- fork/exit 4456 * task tracking -- fork/exit
4399 * 4457 *
@@ -4416,8 +4474,9 @@ struct perf_task_event {
4416}; 4474};
4417 4475
4418static void perf_event_task_output(struct perf_event *event, 4476static void perf_event_task_output(struct perf_event *event,
4419 struct perf_task_event *task_event) 4477 void *data)
4420{ 4478{
4479 struct perf_task_event *task_event = data;
4421 struct perf_output_handle handle; 4480 struct perf_output_handle handle;
4422 struct perf_sample_data sample; 4481 struct perf_sample_data sample;
4423 struct task_struct *task = task_event->task; 4482 struct task_struct *task = task_event->task;
@@ -4445,62 +4504,11 @@ out:
4445 task_event->event_id.header.size = size; 4504 task_event->event_id.header.size = size;
4446} 4505}
4447 4506
4448static int perf_event_task_match(struct perf_event *event) 4507static int perf_event_task_match(struct perf_event *event,
4449{ 4508 void *data __maybe_unused)
4450 if (event->state < PERF_EVENT_STATE_INACTIVE)
4451 return 0;
4452
4453 if (!event_filter_match(event))
4454 return 0;
4455
4456 if (event->attr.comm || event->attr.mmap ||
4457 event->attr.mmap_data || event->attr.task)
4458 return 1;
4459
4460 return 0;
4461}
4462
4463static void perf_event_task_ctx(struct perf_event_context *ctx,
4464 struct perf_task_event *task_event)
4465{ 4509{
4466 struct perf_event *event; 4510 return event->attr.comm || event->attr.mmap ||
4467 4511 event->attr.mmap_data || event->attr.task;
4468 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
4469 if (perf_event_task_match(event))
4470 perf_event_task_output(event, task_event);
4471 }
4472}
4473
4474static void perf_event_task_event(struct perf_task_event *task_event)
4475{
4476 struct perf_cpu_context *cpuctx;
4477 struct perf_event_context *ctx;
4478 struct pmu *pmu;
4479 int ctxn;
4480
4481 rcu_read_lock();
4482 list_for_each_entry_rcu(pmu, &pmus, entry) {
4483 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
4484 if (cpuctx->unique_pmu != pmu)
4485 goto next;
4486 perf_event_task_ctx(&cpuctx->ctx, task_event);
4487
4488 ctx = task_event->task_ctx;
4489 if (!ctx) {
4490 ctxn = pmu->task_ctx_nr;
4491 if (ctxn < 0)
4492 goto next;
4493 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
4494 if (ctx)
4495 perf_event_task_ctx(ctx, task_event);
4496 }
4497next:
4498 put_cpu_ptr(pmu->pmu_cpu_context);
4499 }
4500 if (task_event->task_ctx)
4501 perf_event_task_ctx(task_event->task_ctx, task_event);
4502
4503 rcu_read_unlock();
4504} 4512}
4505 4513
4506static void perf_event_task(struct task_struct *task, 4514static void perf_event_task(struct task_struct *task,
@@ -4531,7 +4539,10 @@ static void perf_event_task(struct task_struct *task,
4531 }, 4539 },
4532 }; 4540 };
4533 4541
4534 perf_event_task_event(&task_event); 4542 perf_event_aux(perf_event_task_match,
4543 perf_event_task_output,
4544 &task_event,
4545 task_ctx);
4535} 4546}
4536 4547
4537void perf_event_fork(struct task_struct *task) 4548void perf_event_fork(struct task_struct *task)
@@ -4557,8 +4568,9 @@ struct perf_comm_event {
4557}; 4568};
4558 4569
4559static void perf_event_comm_output(struct perf_event *event, 4570static void perf_event_comm_output(struct perf_event *event,
4560 struct perf_comm_event *comm_event) 4571 void *data)
4561{ 4572{
4573 struct perf_comm_event *comm_event = data;
4562 struct perf_output_handle handle; 4574 struct perf_output_handle handle;
4563 struct perf_sample_data sample; 4575 struct perf_sample_data sample;
4564 int size = comm_event->event_id.header.size; 4576 int size = comm_event->event_id.header.size;
@@ -4585,39 +4597,16 @@ out:
4585 comm_event->event_id.header.size = size; 4597 comm_event->event_id.header.size = size;
4586} 4598}
4587 4599
4588static int perf_event_comm_match(struct perf_event *event) 4600static int perf_event_comm_match(struct perf_event *event,
4589{ 4601 void *data __maybe_unused)
4590 if (event->state < PERF_EVENT_STATE_INACTIVE)
4591 return 0;
4592
4593 if (!event_filter_match(event))
4594 return 0;
4595
4596 if (event->attr.comm)
4597 return 1;
4598
4599 return 0;
4600}
4601
4602static void perf_event_comm_ctx(struct perf_event_context *ctx,
4603 struct perf_comm_event *comm_event)
4604{ 4602{
4605 struct perf_event *event; 4603 return event->attr.comm;
4606
4607 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
4608 if (perf_event_comm_match(event))
4609 perf_event_comm_output(event, comm_event);
4610 }
4611} 4604}
4612 4605
4613static void perf_event_comm_event(struct perf_comm_event *comm_event) 4606static void perf_event_comm_event(struct perf_comm_event *comm_event)
4614{ 4607{
4615 struct perf_cpu_context *cpuctx;
4616 struct perf_event_context *ctx;
4617 char comm[TASK_COMM_LEN]; 4608 char comm[TASK_COMM_LEN];
4618 unsigned int size; 4609 unsigned int size;
4619 struct pmu *pmu;
4620 int ctxn;
4621 4610
4622 memset(comm, 0, sizeof(comm)); 4611 memset(comm, 0, sizeof(comm));
4623 strlcpy(comm, comm_event->task->comm, sizeof(comm)); 4612 strlcpy(comm, comm_event->task->comm, sizeof(comm));
@@ -4627,24 +4616,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
4627 comm_event->comm_size = size; 4616 comm_event->comm_size = size;
4628 4617
4629 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; 4618 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
4630 rcu_read_lock();
4631 list_for_each_entry_rcu(pmu, &pmus, entry) {
4632 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
4633 if (cpuctx->unique_pmu != pmu)
4634 goto next;
4635 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
4636 4619
4637 ctxn = pmu->task_ctx_nr; 4620 perf_event_aux(perf_event_comm_match,
4638 if (ctxn < 0) 4621 perf_event_comm_output,
4639 goto next; 4622 comm_event,
4640 4623 NULL);
4641 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
4642 if (ctx)
4643 perf_event_comm_ctx(ctx, comm_event);
4644next:
4645 put_cpu_ptr(pmu->pmu_cpu_context);
4646 }
4647 rcu_read_unlock();
4648} 4624}
4649 4625
4650void perf_event_comm(struct task_struct *task) 4626void perf_event_comm(struct task_struct *task)
@@ -4706,8 +4682,9 @@ struct perf_mmap_event {
4706}; 4682};
4707 4683
4708static void perf_event_mmap_output(struct perf_event *event, 4684static void perf_event_mmap_output(struct perf_event *event,
4709 struct perf_mmap_event *mmap_event) 4685 void *data)
4710{ 4686{
4687 struct perf_mmap_event *mmap_event = data;
4711 struct perf_output_handle handle; 4688 struct perf_output_handle handle;
4712 struct perf_sample_data sample; 4689 struct perf_sample_data sample;
4713 int size = mmap_event->event_id.header.size; 4690 int size = mmap_event->event_id.header.size;
@@ -4734,46 +4711,24 @@ out:
4734} 4711}
4735 4712
4736static int perf_event_mmap_match(struct perf_event *event, 4713static int perf_event_mmap_match(struct perf_event *event,
4737 struct perf_mmap_event *mmap_event, 4714 void *data)
4738 int executable)
4739{
4740 if (event->state < PERF_EVENT_STATE_INACTIVE)
4741 return 0;
4742
4743 if (!event_filter_match(event))
4744 return 0;
4745
4746 if ((!executable && event->attr.mmap_data) ||
4747 (executable && event->attr.mmap))
4748 return 1;
4749
4750 return 0;
4751}
4752
4753static void perf_event_mmap_ctx(struct perf_event_context *ctx,
4754 struct perf_mmap_event *mmap_event,
4755 int executable)
4756{ 4715{
4757 struct perf_event *event; 4716 struct perf_mmap_event *mmap_event = data;
4717 struct vm_area_struct *vma = mmap_event->vma;
4718 int executable = vma->vm_flags & VM_EXEC;
4758 4719
4759 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 4720 return (!executable && event->attr.mmap_data) ||
4760 if (perf_event_mmap_match(event, mmap_event, executable)) 4721 (executable && event->attr.mmap);
4761 perf_event_mmap_output(event, mmap_event);
4762 }
4763} 4722}
4764 4723
4765static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) 4724static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
4766{ 4725{
4767 struct perf_cpu_context *cpuctx;
4768 struct perf_event_context *ctx;
4769 struct vm_area_struct *vma = mmap_event->vma; 4726 struct vm_area_struct *vma = mmap_event->vma;
4770 struct file *file = vma->vm_file; 4727 struct file *file = vma->vm_file;
4771 unsigned int size; 4728 unsigned int size;
4772 char tmp[16]; 4729 char tmp[16];
4773 char *buf = NULL; 4730 char *buf = NULL;
4774 const char *name; 4731 const char *name;
4775 struct pmu *pmu;
4776 int ctxn;
4777 4732
4778 memset(tmp, 0, sizeof(tmp)); 4733 memset(tmp, 0, sizeof(tmp));
4779 4734
@@ -4829,27 +4784,10 @@ got_name:
4829 4784
4830 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; 4785 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
4831 4786
4832 rcu_read_lock(); 4787 perf_event_aux(perf_event_mmap_match,
4833 list_for_each_entry_rcu(pmu, &pmus, entry) { 4788 perf_event_mmap_output,
4834 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 4789 mmap_event,
4835 if (cpuctx->unique_pmu != pmu) 4790 NULL);
4836 goto next;
4837 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
4838 vma->vm_flags & VM_EXEC);
4839
4840 ctxn = pmu->task_ctx_nr;
4841 if (ctxn < 0)
4842 goto next;
4843
4844 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
4845 if (ctx) {
4846 perf_event_mmap_ctx(ctx, mmap_event,
4847 vma->vm_flags & VM_EXEC);
4848 }
4849next:
4850 put_cpu_ptr(pmu->pmu_cpu_context);
4851 }
4852 rcu_read_unlock();
4853 4791
4854 kfree(buf); 4792 kfree(buf);
4855} 4793}
diff --git a/kernel/fork.c b/kernel/fork.c
index 7d40687b1434..987b28a1f01b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -70,6 +70,7 @@
70#include <linux/khugepaged.h> 70#include <linux/khugepaged.h>
71#include <linux/signalfd.h> 71#include <linux/signalfd.h>
72#include <linux/uprobes.h> 72#include <linux/uprobes.h>
73#include <linux/aio.h>
73 74
74#include <asm/pgtable.h> 75#include <asm/pgtable.h>
75#include <asm/pgalloc.h> 76#include <asm/pgalloc.h>
@@ -1303,6 +1304,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1303 p->memcg_batch.do_batch = 0; 1304 p->memcg_batch.do_batch = 0;
1304 p->memcg_batch.memcg = NULL; 1305 p->memcg_batch.memcg = NULL;
1305#endif 1306#endif
1307#ifdef CONFIG_BCACHE
1308 p->sequential_io = 0;
1309 p->sequential_io_avg = 0;
1310#endif
1306 1311
1307 /* Perform scheduler related setup. Assign this task to a CPU. */ 1312 /* Perform scheduler related setup. Assign this task to a CPU. */
1308 sched_fork(p); 1313 sched_fork(p);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 96f3a1d9c379..5a83dde8ca0c 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -462,9 +462,23 @@ int irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
462 if (domain->ops->map) { 462 if (domain->ops->map) {
463 ret = domain->ops->map(domain, virq, hwirq); 463 ret = domain->ops->map(domain, virq, hwirq);
464 if (ret != 0) { 464 if (ret != 0) {
465 pr_err("irq-%i==>hwirq-0x%lx mapping failed: %d\n", 465 /*
466 virq, hwirq, ret); 466 * If map() returns -EPERM, this interrupt is protected
467 WARN_ON(1); 467 * by the firmware or some other service and shall not
468 * be mapped.
469 *
470 * Since on some platforms we blindly try to map everything
471 * we end up with a log full of backtraces.
472 *
473 * So instead, we silently fail on -EPERM, it is the
474 * responsibility of the PIC driver to display a relevant
475 * message if needed.
476 */
477 if (ret != -EPERM) {
478 pr_err("irq-%i==>hwirq-0x%lx mapping failed: %d\n",
479 virq, hwirq, ret);
480 WARN_ON(1);
481 }
468 irq_data->domain = NULL; 482 irq_data->domain = NULL;
469 irq_data->hwirq = 0; 483 irq_data->hwirq = 0;
470 goto err_unmap; 484 goto err_unmap;
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 1296e72e4161..8241906c4b61 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -569,6 +569,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
569 int retval = 0; 569 int retval = 0;
570 570
571 helper_lock(); 571 helper_lock();
572 if (!sub_info->path) {
573 retval = -EINVAL;
574 goto out;
575 }
576
572 if (sub_info->path[0] == '\0') 577 if (sub_info->path[0] == '\0')
573 goto out; 578 goto out;
574 579
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 6a3bccba7e7d..1f3186b37fd5 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2998,6 +2998,7 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
2998EXPORT_SYMBOL_GPL(lockdep_init_map); 2998EXPORT_SYMBOL_GPL(lockdep_init_map);
2999 2999
3000struct lock_class_key __lockdep_no_validate__; 3000struct lock_class_key __lockdep_no_validate__;
3001EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
3001 3002
3002static int 3003static int
3003print_lock_nested_lock_not_held(struct task_struct *curr, 3004print_lock_nested_lock_not_held(struct task_struct *curr,
diff --git a/kernel/module.c b/kernel/module.c
index b049939177f6..cab4bce49c23 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2431,10 +2431,10 @@ static void kmemleak_load_module(const struct module *mod,
2431 kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL); 2431 kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
2432 2432
2433 for (i = 1; i < info->hdr->e_shnum; i++) { 2433 for (i = 1; i < info->hdr->e_shnum; i++) {
2434 const char *name = info->secstrings + info->sechdrs[i].sh_name; 2434 /* Scan all writable sections that's not executable */
2435 if (!(info->sechdrs[i].sh_flags & SHF_ALLOC)) 2435 if (!(info->sechdrs[i].sh_flags & SHF_ALLOC) ||
2436 continue; 2436 !(info->sechdrs[i].sh_flags & SHF_WRITE) ||
2437 if (!strstarts(name, ".data") && !strstarts(name, ".bss")) 2437 (info->sechdrs[i].sh_flags & SHF_EXECINSTR))
2438 continue; 2438 continue;
2439 2439
2440 kmemleak_scan_area((void *)info->sechdrs[i].sh_addr, 2440 kmemleak_scan_area((void *)info->sechdrs[i].sh_addr,
@@ -2769,24 +2769,11 @@ static void find_module_sections(struct module *mod, struct load_info *info)
2769 mod->trace_events = section_objs(info, "_ftrace_events", 2769 mod->trace_events = section_objs(info, "_ftrace_events",
2770 sizeof(*mod->trace_events), 2770 sizeof(*mod->trace_events),
2771 &mod->num_trace_events); 2771 &mod->num_trace_events);
2772 /*
2773 * This section contains pointers to allocated objects in the trace
2774 * code and not scanning it leads to false positives.
2775 */
2776 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
2777 mod->num_trace_events, GFP_KERNEL);
2778#endif 2772#endif
2779#ifdef CONFIG_TRACING 2773#ifdef CONFIG_TRACING
2780 mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", 2774 mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
2781 sizeof(*mod->trace_bprintk_fmt_start), 2775 sizeof(*mod->trace_bprintk_fmt_start),
2782 &mod->num_trace_bprintk_fmt); 2776 &mod->num_trace_bprintk_fmt);
2783 /*
2784 * This section contains pointers to allocated objects in the trace
2785 * code and not scanning it leads to false positives.
2786 */
2787 kmemleak_scan_area(mod->trace_bprintk_fmt_start,
2788 sizeof(*mod->trace_bprintk_fmt_start) *
2789 mod->num_trace_bprintk_fmt, GFP_KERNEL);
2790#endif 2777#endif
2791#ifdef CONFIG_FTRACE_MCOUNT_RECORD 2778#ifdef CONFIG_FTRACE_MCOUNT_RECORD
2792 /* sechdrs[0].sh_size is always zero */ 2779 /* sechdrs[0].sh_size is always zero */
diff --git a/kernel/params.c b/kernel/params.c
index ed35345be536..53b958fcd639 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -613,10 +613,13 @@ static __modinit int add_sysfs_param(struct module_kobject *mk,
613 sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1), 613 sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1),
614 GFP_KERNEL); 614 GFP_KERNEL);
615 if (!new) { 615 if (!new) {
616 kfree(mk->mp); 616 kfree(attrs);
617 err = -ENOMEM; 617 err = -ENOMEM;
618 goto fail; 618 goto fail;
619 } 619 }
620 /* Despite looking like the typical realloc() bug, this is safe.
621 * We *want* the old 'attrs' to be freed either way, and we'll store
622 * the new one in the success case. */
620 attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL); 623 attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL);
621 if (!attrs) { 624 if (!attrs) {
622 err = -ENOMEM; 625 err = -ENOMEM;
diff --git a/kernel/printk.c b/kernel/printk.c
index 96dcfcd9a2d4..fa36e1494420 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -32,6 +32,7 @@
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/memblock.h> 34#include <linux/memblock.h>
35#include <linux/aio.h>
35#include <linux/syscalls.h> 36#include <linux/syscalls.h>
36#include <linux/kexec.h> 37#include <linux/kexec.h>
37#include <linux/kdb.h> 38#include <linux/kdb.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 17ae54da0ec2..aed981a3f69c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -17,6 +17,7 @@
17#include <linux/ptrace.h> 17#include <linux/ptrace.h>
18#include <linux/security.h> 18#include <linux/security.h>
19#include <linux/signal.h> 19#include <linux/signal.h>
20#include <linux/uio.h>
20#include <linux/audit.h> 21#include <linux/audit.h>
21#include <linux/pid_namespace.h> 22#include <linux/pid_namespace.h>
22#include <linux/syscalls.h> 23#include <linux/syscalls.h>
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 170814dc418f..3db5a375d8dd 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -88,7 +88,7 @@ static void __init rcu_bootup_announce_oddness(void)
88#ifdef CONFIG_RCU_NOCB_CPU 88#ifdef CONFIG_RCU_NOCB_CPU
89#ifndef CONFIG_RCU_NOCB_CPU_NONE 89#ifndef CONFIG_RCU_NOCB_CPU_NONE
90 if (!have_rcu_nocb_mask) { 90 if (!have_rcu_nocb_mask) {
91 alloc_bootmem_cpumask_var(&rcu_nocb_mask); 91 zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
92 have_rcu_nocb_mask = true; 92 have_rcu_nocb_mask = true;
93 } 93 }
94#ifdef CONFIG_RCU_NOCB_CPU_ZERO 94#ifdef CONFIG_RCU_NOCB_CPU_ZERO
@@ -1667,7 +1667,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj)
1667 rdtp->last_accelerate = jiffies; 1667 rdtp->last_accelerate = jiffies;
1668 1668
1669 /* Request timer delay depending on laziness, and round. */ 1669 /* Request timer delay depending on laziness, and round. */
1670 if (rdtp->all_lazy) { 1670 if (!rdtp->all_lazy) {
1671 *dj = round_up(rcu_idle_gp_delay + jiffies, 1671 *dj = round_up(rcu_idle_gp_delay + jiffies,
1672 rcu_idle_gp_delay) - jiffies; 1672 rcu_idle_gp_delay) - jiffies;
1673 } else { 1673 } else {
diff --git a/kernel/relay.c b/kernel/relay.c
index eef0d113b79e..b91488ba2e5a 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -234,7 +234,6 @@ static void relay_destroy_buf(struct rchan_buf *buf)
234static void relay_remove_buf(struct kref *kref) 234static void relay_remove_buf(struct kref *kref)
235{ 235{
236 struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); 236 struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
237 buf->chan->cb->remove_buf_file(buf->dentry);
238 relay_destroy_buf(buf); 237 relay_destroy_buf(buf);
239} 238}
240 239
@@ -484,6 +483,7 @@ static void relay_close_buf(struct rchan_buf *buf)
484{ 483{
485 buf->finalized = 1; 484 buf->finalized = 1;
486 del_timer_sync(&buf->timer); 485 del_timer_sync(&buf->timer);
486 buf->chan->cb->remove_buf_file(buf->dentry);
487 kref_put(&buf->kref, relay_remove_buf); 487 kref_put(&buf->kref, relay_remove_buf);
488} 488}
489 489
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index b3c6c3fcd847..cfff1435bdfb 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -126,6 +126,15 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
126 126
127EXPORT_SYMBOL(_down_write_nest_lock); 127EXPORT_SYMBOL(_down_write_nest_lock);
128 128
129void down_read_non_owner(struct rw_semaphore *sem)
130{
131 might_sleep();
132
133 __down_read(sem);
134}
135
136EXPORT_SYMBOL(down_read_non_owner);
137
129void down_write_nested(struct rw_semaphore *sem, int subclass) 138void down_write_nested(struct rw_semaphore *sem, int subclass)
130{ 139{
131 might_sleep(); 140 might_sleep();
@@ -136,6 +145,13 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
136 145
137EXPORT_SYMBOL(down_write_nested); 146EXPORT_SYMBOL(down_write_nested);
138 147
148void up_read_non_owner(struct rw_semaphore *sem)
149{
150 __up_read(sem);
151}
152
153EXPORT_SYMBOL(up_read_non_owner);
154
139#endif 155#endif
140 156
141 157
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 36f85be2932b..d8f071cc9f51 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -633,7 +633,19 @@ void wake_up_nohz_cpu(int cpu)
633static inline bool got_nohz_idle_kick(void) 633static inline bool got_nohz_idle_kick(void)
634{ 634{
635 int cpu = smp_processor_id(); 635 int cpu = smp_processor_id();
636 return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)); 636
637 if (!test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)))
638 return false;
639
640 if (idle_cpu(cpu) && !need_resched())
641 return true;
642
643 /*
644 * We can't run Idle Load Balance on this CPU for this time so we
645 * cancel it and clear NOHZ_BALANCE_KICK
646 */
647 clear_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
648 return false;
637} 649}
638 650
639#else /* CONFIG_NO_HZ_COMMON */ 651#else /* CONFIG_NO_HZ_COMMON */
@@ -1395,8 +1407,9 @@ static void sched_ttwu_pending(void)
1395 1407
1396void scheduler_ipi(void) 1408void scheduler_ipi(void)
1397{ 1409{
1398 if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick() 1410 if (llist_empty(&this_rq()->wake_list)
1399 && !tick_nohz_full_cpu(smp_processor_id())) 1411 && !tick_nohz_full_cpu(smp_processor_id())
1412 && !got_nohz_idle_kick())
1400 return; 1413 return;
1401 1414
1402 /* 1415 /*
@@ -1419,7 +1432,7 @@ void scheduler_ipi(void)
1419 /* 1432 /*
1420 * Check if someone kicked us for doing the nohz idle load balance. 1433 * Check if someone kicked us for doing the nohz idle load balance.
1421 */ 1434 */
1422 if (unlikely(got_nohz_idle_kick() && !need_resched())) { 1435 if (unlikely(got_nohz_idle_kick())) {
1423 this_rq()->idle_balance = 1; 1436 this_rq()->idle_balance = 1;
1424 raise_softirq_irqoff(SCHED_SOFTIRQ); 1437 raise_softirq_irqoff(SCHED_SOFTIRQ);
1425 } 1438 }
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index bfd6787b355a..7078052284fd 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -200,6 +200,7 @@ cond_syscall(sys_perf_event_open);
200/* fanotify! */ 200/* fanotify! */
201cond_syscall(sys_fanotify_init); 201cond_syscall(sys_fanotify_init);
202cond_syscall(sys_fanotify_mark); 202cond_syscall(sys_fanotify_mark);
203cond_syscall(compat_sys_fanotify_mark);
203 204
204/* open by handle */ 205/* open by handle */
205cond_syscall(sys_name_to_handle_at); 206cond_syscall(sys_name_to_handle_at);
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index ebf72358e86a..aea4a9ea6fc8 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -15,6 +15,7 @@
15#include <linux/netdevice.h> 15#include <linux/netdevice.h>
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/compat.h>
18 19
19#ifdef CONFIG_SYSCTL_SYSCALL 20#ifdef CONFIG_SYSCTL_SYSCALL
20 21
@@ -1447,7 +1448,6 @@ SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
1447 1448
1448 1449
1449#ifdef CONFIG_COMPAT 1450#ifdef CONFIG_COMPAT
1450#include <asm/compat.h>
1451 1451
1452struct compat_sysctl_args { 1452struct compat_sysctl_args {
1453 compat_uptr_t name; 1453 compat_uptr_t name;
@@ -1459,7 +1459,7 @@ struct compat_sysctl_args {
1459 compat_ulong_t __unused[4]; 1459 compat_ulong_t __unused[4];
1460}; 1460};
1461 1461
1462asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args) 1462COMPAT_SYSCALL_DEFINE1(sysctl, struct compat_sysctl_args __user *, args)
1463{ 1463{
1464 struct compat_sysctl_args tmp; 1464 struct compat_sysctl_args tmp;
1465 compat_size_t __user *compat_oldlenp; 1465 compat_size_t __user *compat_oldlenp;
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index e4c07b0692bb..70f27e89012b 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -12,11 +12,6 @@ config CLOCKSOURCE_WATCHDOG
12config ARCH_CLOCKSOURCE_DATA 12config ARCH_CLOCKSOURCE_DATA
13 bool 13 bool
14 14
15# Platforms has a persistent clock
16config ALWAYS_USE_PERSISTENT_CLOCK
17 bool
18 default n
19
20# Timekeeping vsyscall support 15# Timekeeping vsyscall support
21config GENERIC_TIME_VSYSCALL 16config GENERIC_TIME_VSYSCALL
22 bool 17 bool
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 206bbfb34e09..24938d577669 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -786,11 +786,11 @@ bool tick_broadcast_oneshot_available(void)
786 786
787void __init tick_broadcast_init(void) 787void __init tick_broadcast_init(void)
788{ 788{
789 alloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); 789 zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
790 alloc_cpumask_var(&tmpmask, GFP_NOWAIT); 790 zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
791#ifdef CONFIG_TICK_ONESHOT 791#ifdef CONFIG_TICK_ONESHOT
792 alloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); 792 zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
793 alloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT); 793 zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
794 alloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT); 794 zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
795#endif 795#endif
796} 796}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index bc67d4245e1d..f4208138fbf4 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -717,6 +717,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
717 if (unlikely(!cpu_online(cpu))) { 717 if (unlikely(!cpu_online(cpu))) {
718 if (cpu == tick_do_timer_cpu) 718 if (cpu == tick_do_timer_cpu)
719 tick_do_timer_cpu = TICK_DO_TIMER_NONE; 719 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
720 return false;
720 } 721 }
721 722
722 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) 723 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
@@ -1168,7 +1169,7 @@ void tick_cancel_sched_timer(int cpu)
1168 hrtimer_cancel(&ts->sched_timer); 1169 hrtimer_cancel(&ts->sched_timer);
1169# endif 1170# endif
1170 1171
1171 ts->nohz_mode = NOHZ_MODE_INACTIVE; 1172 memset(ts, 0, sizeof(*ts));
1172} 1173}
1173#endif 1174#endif
1174 1175
diff --git a/kernel/timer.c b/kernel/timer.c
index a860bba34412..15ffdb3f1948 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1539,12 +1539,12 @@ static int __cpuinit init_timers_cpu(int cpu)
1539 boot_done = 1; 1539 boot_done = 1;
1540 base = &boot_tvec_bases; 1540 base = &boot_tvec_bases;
1541 } 1541 }
1542 spin_lock_init(&base->lock);
1542 tvec_base_done[cpu] = 1; 1543 tvec_base_done[cpu] = 1;
1543 } else { 1544 } else {
1544 base = per_cpu(tvec_bases, cpu); 1545 base = per_cpu(tvec_bases, cpu);
1545 } 1546 }
1546 1547
1547 spin_lock_init(&base->lock);
1548 1548
1549 for (j = 0; j < TVN_SIZE; j++) { 1549 for (j = 0; j < TVN_SIZE; j++) {
1550 INIT_LIST_HEAD(base->tv5.vec + j); 1550 INIT_LIST_HEAD(base->tv5.vec + j);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5e9efd4b83a4..015f85aaca08 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -71,6 +71,7 @@ config TRACE_CLOCK
71config RING_BUFFER 71config RING_BUFFER
72 bool 72 bool
73 select TRACE_CLOCK 73 select TRACE_CLOCK
74 select IRQ_WORK
74 75
75config FTRACE_NMI_ENTER 76config FTRACE_NMI_ENTER
76 bool 77 bool
@@ -107,7 +108,6 @@ config TRACING
107 select BINARY_PRINTF 108 select BINARY_PRINTF
108 select EVENT_TRACING 109 select EVENT_TRACING
109 select TRACE_CLOCK 110 select TRACE_CLOCK
110 select IRQ_WORK
111 111
112config GENERIC_TRACER 112config GENERIC_TRACER
113 bool 113 bool
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index ed58a3216a6d..b8b8560bfb95 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1808,6 +1808,7 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1808 1808
1809 rwbs[i] = '\0'; 1809 rwbs[i] = '\0';
1810} 1810}
1811EXPORT_SYMBOL_GPL(blk_fill_rwbs);
1811 1812
1812#endif /* CONFIG_EVENT_TRACING */ 1813#endif /* CONFIG_EVENT_TRACING */
1813 1814
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8a5c017bb50c..b549b0f5b977 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -64,6 +64,13 @@
64 64
65#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL) 65#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
66 66
67#ifdef CONFIG_DYNAMIC_FTRACE
68#define INIT_REGEX_LOCK(opsname) \
69 .regex_lock = __MUTEX_INITIALIZER(opsname.regex_lock),
70#else
71#define INIT_REGEX_LOCK(opsname)
72#endif
73
67static struct ftrace_ops ftrace_list_end __read_mostly = { 74static struct ftrace_ops ftrace_list_end __read_mostly = {
68 .func = ftrace_stub, 75 .func = ftrace_stub,
69 .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB, 76 .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
@@ -131,6 +138,16 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
131 while (likely(op = rcu_dereference_raw((op)->next)) && \ 138 while (likely(op = rcu_dereference_raw((op)->next)) && \
132 unlikely((op) != &ftrace_list_end)) 139 unlikely((op) != &ftrace_list_end))
133 140
141static inline void ftrace_ops_init(struct ftrace_ops *ops)
142{
143#ifdef CONFIG_DYNAMIC_FTRACE
144 if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) {
145 mutex_init(&ops->regex_lock);
146 ops->flags |= FTRACE_OPS_FL_INITIALIZED;
147 }
148#endif
149}
150
134/** 151/**
135 * ftrace_nr_registered_ops - return number of ops registered 152 * ftrace_nr_registered_ops - return number of ops registered
136 * 153 *
@@ -907,7 +924,8 @@ static void unregister_ftrace_profiler(void)
907#else 924#else
908static struct ftrace_ops ftrace_profile_ops __read_mostly = { 925static struct ftrace_ops ftrace_profile_ops __read_mostly = {
909 .func = function_profile_call, 926 .func = function_profile_call,
910 .flags = FTRACE_OPS_FL_RECURSION_SAFE, 927 .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
928 INIT_REGEX_LOCK(ftrace_profile_ops)
911}; 929};
912 930
913static int register_ftrace_profiler(void) 931static int register_ftrace_profiler(void)
@@ -1103,11 +1121,10 @@ static struct ftrace_ops global_ops = {
1103 .func = ftrace_stub, 1121 .func = ftrace_stub,
1104 .notrace_hash = EMPTY_HASH, 1122 .notrace_hash = EMPTY_HASH,
1105 .filter_hash = EMPTY_HASH, 1123 .filter_hash = EMPTY_HASH,
1106 .flags = FTRACE_OPS_FL_RECURSION_SAFE, 1124 .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
1125 INIT_REGEX_LOCK(global_ops)
1107}; 1126};
1108 1127
1109static DEFINE_MUTEX(ftrace_regex_lock);
1110
1111struct ftrace_page { 1128struct ftrace_page {
1112 struct ftrace_page *next; 1129 struct ftrace_page *next;
1113 struct dyn_ftrace *records; 1130 struct dyn_ftrace *records;
@@ -1247,6 +1264,7 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
1247 1264
1248void ftrace_free_filter(struct ftrace_ops *ops) 1265void ftrace_free_filter(struct ftrace_ops *ops)
1249{ 1266{
1267 ftrace_ops_init(ops);
1250 free_ftrace_hash(ops->filter_hash); 1268 free_ftrace_hash(ops->filter_hash);
1251 free_ftrace_hash(ops->notrace_hash); 1269 free_ftrace_hash(ops->notrace_hash);
1252} 1270}
@@ -2441,7 +2459,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
2441 !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) || 2459 !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) ||
2442 2460
2443 ((iter->flags & FTRACE_ITER_ENABLED) && 2461 ((iter->flags & FTRACE_ITER_ENABLED) &&
2444 !(rec->flags & ~FTRACE_FL_MASK))) { 2462 !(rec->flags & FTRACE_FL_ENABLED))) {
2445 2463
2446 rec = NULL; 2464 rec = NULL;
2447 goto retry; 2465 goto retry;
@@ -2624,6 +2642,8 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
2624 struct ftrace_hash *hash; 2642 struct ftrace_hash *hash;
2625 int ret = 0; 2643 int ret = 0;
2626 2644
2645 ftrace_ops_init(ops);
2646
2627 if (unlikely(ftrace_disabled)) 2647 if (unlikely(ftrace_disabled))
2628 return -ENODEV; 2648 return -ENODEV;
2629 2649
@@ -2636,28 +2656,26 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
2636 return -ENOMEM; 2656 return -ENOMEM;
2637 } 2657 }
2638 2658
2659 iter->ops = ops;
2660 iter->flags = flag;
2661
2662 mutex_lock(&ops->regex_lock);
2663
2639 if (flag & FTRACE_ITER_NOTRACE) 2664 if (flag & FTRACE_ITER_NOTRACE)
2640 hash = ops->notrace_hash; 2665 hash = ops->notrace_hash;
2641 else 2666 else
2642 hash = ops->filter_hash; 2667 hash = ops->filter_hash;
2643 2668
2644 iter->ops = ops;
2645 iter->flags = flag;
2646
2647 if (file->f_mode & FMODE_WRITE) { 2669 if (file->f_mode & FMODE_WRITE) {
2648 mutex_lock(&ftrace_lock);
2649 iter->hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, hash); 2670 iter->hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, hash);
2650 mutex_unlock(&ftrace_lock);
2651
2652 if (!iter->hash) { 2671 if (!iter->hash) {
2653 trace_parser_put(&iter->parser); 2672 trace_parser_put(&iter->parser);
2654 kfree(iter); 2673 kfree(iter);
2655 return -ENOMEM; 2674 ret = -ENOMEM;
2675 goto out_unlock;
2656 } 2676 }
2657 } 2677 }
2658 2678
2659 mutex_lock(&ftrace_regex_lock);
2660
2661 if ((file->f_mode & FMODE_WRITE) && 2679 if ((file->f_mode & FMODE_WRITE) &&
2662 (file->f_flags & O_TRUNC)) 2680 (file->f_flags & O_TRUNC))
2663 ftrace_filter_reset(iter->hash); 2681 ftrace_filter_reset(iter->hash);
@@ -2677,7 +2695,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
2677 } 2695 }
2678 } else 2696 } else
2679 file->private_data = iter; 2697 file->private_data = iter;
2680 mutex_unlock(&ftrace_regex_lock); 2698
2699 out_unlock:
2700 mutex_unlock(&ops->regex_lock);
2681 2701
2682 return ret; 2702 return ret;
2683} 2703}
@@ -2910,6 +2930,8 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
2910static struct ftrace_ops trace_probe_ops __read_mostly = 2930static struct ftrace_ops trace_probe_ops __read_mostly =
2911{ 2931{
2912 .func = function_trace_probe_call, 2932 .func = function_trace_probe_call,
2933 .flags = FTRACE_OPS_FL_INITIALIZED,
2934 INIT_REGEX_LOCK(trace_probe_ops)
2913}; 2935};
2914 2936
2915static int ftrace_probe_registered; 2937static int ftrace_probe_registered;
@@ -2919,8 +2941,12 @@ static void __enable_ftrace_function_probe(void)
2919 int ret; 2941 int ret;
2920 int i; 2942 int i;
2921 2943
2922 if (ftrace_probe_registered) 2944 if (ftrace_probe_registered) {
2945 /* still need to update the function call sites */
2946 if (ftrace_enabled)
2947 ftrace_run_update_code(FTRACE_UPDATE_CALLS);
2923 return; 2948 return;
2949 }
2924 2950
2925 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { 2951 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
2926 struct hlist_head *hhd = &ftrace_func_hash[i]; 2952 struct hlist_head *hhd = &ftrace_func_hash[i];
@@ -2990,19 +3016,21 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
2990 if (WARN_ON(not)) 3016 if (WARN_ON(not))
2991 return -EINVAL; 3017 return -EINVAL;
2992 3018
2993 mutex_lock(&ftrace_lock); 3019 mutex_lock(&trace_probe_ops.regex_lock);
2994 3020
2995 hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); 3021 hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
2996 if (!hash) { 3022 if (!hash) {
2997 count = -ENOMEM; 3023 count = -ENOMEM;
2998 goto out_unlock; 3024 goto out;
2999 } 3025 }
3000 3026
3001 if (unlikely(ftrace_disabled)) { 3027 if (unlikely(ftrace_disabled)) {
3002 count = -ENODEV; 3028 count = -ENODEV;
3003 goto out_unlock; 3029 goto out;
3004 } 3030 }
3005 3031
3032 mutex_lock(&ftrace_lock);
3033
3006 do_for_each_ftrace_rec(pg, rec) { 3034 do_for_each_ftrace_rec(pg, rec) {
3007 3035
3008 if (!ftrace_match_record(rec, NULL, search, len, type)) 3036 if (!ftrace_match_record(rec, NULL, search, len, type))
@@ -3056,6 +3084,8 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3056 3084
3057 out_unlock: 3085 out_unlock:
3058 mutex_unlock(&ftrace_lock); 3086 mutex_unlock(&ftrace_lock);
3087 out:
3088 mutex_unlock(&trace_probe_ops.regex_lock);
3059 free_ftrace_hash(hash); 3089 free_ftrace_hash(hash);
3060 3090
3061 return count; 3091 return count;
@@ -3095,7 +3125,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3095 return; 3125 return;
3096 } 3126 }
3097 3127
3098 mutex_lock(&ftrace_lock); 3128 mutex_lock(&trace_probe_ops.regex_lock);
3099 3129
3100 hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); 3130 hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
3101 if (!hash) 3131 if (!hash)
@@ -3133,6 +3163,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3133 list_add(&entry->free_list, &free_list); 3163 list_add(&entry->free_list, &free_list);
3134 } 3164 }
3135 } 3165 }
3166 mutex_lock(&ftrace_lock);
3136 __disable_ftrace_function_probe(); 3167 __disable_ftrace_function_probe();
3137 /* 3168 /*
3138 * Remove after the disable is called. Otherwise, if the last 3169 * Remove after the disable is called. Otherwise, if the last
@@ -3144,9 +3175,10 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3144 list_del(&entry->free_list); 3175 list_del(&entry->free_list);
3145 ftrace_free_entry(entry); 3176 ftrace_free_entry(entry);
3146 } 3177 }
3178 mutex_unlock(&ftrace_lock);
3147 3179
3148 out_unlock: 3180 out_unlock:
3149 mutex_unlock(&ftrace_lock); 3181 mutex_unlock(&trace_probe_ops.regex_lock);
3150 free_ftrace_hash(hash); 3182 free_ftrace_hash(hash);
3151} 3183}
3152 3184
@@ -3256,18 +3288,17 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
3256 if (!cnt) 3288 if (!cnt)
3257 return 0; 3289 return 0;
3258 3290
3259 mutex_lock(&ftrace_regex_lock);
3260
3261 ret = -ENODEV;
3262 if (unlikely(ftrace_disabled))
3263 goto out_unlock;
3264
3265 if (file->f_mode & FMODE_READ) { 3291 if (file->f_mode & FMODE_READ) {
3266 struct seq_file *m = file->private_data; 3292 struct seq_file *m = file->private_data;
3267 iter = m->private; 3293 iter = m->private;
3268 } else 3294 } else
3269 iter = file->private_data; 3295 iter = file->private_data;
3270 3296
3297 if (unlikely(ftrace_disabled))
3298 return -ENODEV;
3299
3300 /* iter->hash is a local copy, so we don't need regex_lock */
3301
3271 parser = &iter->parser; 3302 parser = &iter->parser;
3272 read = trace_get_user(parser, ubuf, cnt, ppos); 3303 read = trace_get_user(parser, ubuf, cnt, ppos);
3273 3304
@@ -3276,14 +3307,12 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
3276 ret = ftrace_process_regex(iter->hash, parser->buffer, 3307 ret = ftrace_process_regex(iter->hash, parser->buffer,
3277 parser->idx, enable); 3308 parser->idx, enable);
3278 trace_parser_clear(parser); 3309 trace_parser_clear(parser);
3279 if (ret) 3310 if (ret < 0)
3280 goto out_unlock; 3311 goto out;
3281 } 3312 }
3282 3313
3283 ret = read; 3314 ret = read;
3284out_unlock: 3315 out:
3285 mutex_unlock(&ftrace_regex_lock);
3286
3287 return ret; 3316 return ret;
3288} 3317}
3289 3318
@@ -3335,16 +3364,19 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
3335 if (unlikely(ftrace_disabled)) 3364 if (unlikely(ftrace_disabled))
3336 return -ENODEV; 3365 return -ENODEV;
3337 3366
3367 mutex_lock(&ops->regex_lock);
3368
3338 if (enable) 3369 if (enable)
3339 orig_hash = &ops->filter_hash; 3370 orig_hash = &ops->filter_hash;
3340 else 3371 else
3341 orig_hash = &ops->notrace_hash; 3372 orig_hash = &ops->notrace_hash;
3342 3373
3343 hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); 3374 hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
3344 if (!hash) 3375 if (!hash) {
3345 return -ENOMEM; 3376 ret = -ENOMEM;
3377 goto out_regex_unlock;
3378 }
3346 3379
3347 mutex_lock(&ftrace_regex_lock);
3348 if (reset) 3380 if (reset)
3349 ftrace_filter_reset(hash); 3381 ftrace_filter_reset(hash);
3350 if (buf && !ftrace_match_records(hash, buf, len)) { 3382 if (buf && !ftrace_match_records(hash, buf, len)) {
@@ -3366,7 +3398,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
3366 mutex_unlock(&ftrace_lock); 3398 mutex_unlock(&ftrace_lock);
3367 3399
3368 out_regex_unlock: 3400 out_regex_unlock:
3369 mutex_unlock(&ftrace_regex_lock); 3401 mutex_unlock(&ops->regex_lock);
3370 3402
3371 free_ftrace_hash(hash); 3403 free_ftrace_hash(hash);
3372 return ret; 3404 return ret;
@@ -3392,6 +3424,7 @@ ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove,
3392int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, 3424int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
3393 int remove, int reset) 3425 int remove, int reset)
3394{ 3426{
3427 ftrace_ops_init(ops);
3395 return ftrace_set_addr(ops, ip, remove, reset, 1); 3428 return ftrace_set_addr(ops, ip, remove, reset, 1);
3396} 3429}
3397EXPORT_SYMBOL_GPL(ftrace_set_filter_ip); 3430EXPORT_SYMBOL_GPL(ftrace_set_filter_ip);
@@ -3416,6 +3449,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
3416int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, 3449int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
3417 int len, int reset) 3450 int len, int reset)
3418{ 3451{
3452 ftrace_ops_init(ops);
3419 return ftrace_set_regex(ops, buf, len, reset, 1); 3453 return ftrace_set_regex(ops, buf, len, reset, 1);
3420} 3454}
3421EXPORT_SYMBOL_GPL(ftrace_set_filter); 3455EXPORT_SYMBOL_GPL(ftrace_set_filter);
@@ -3434,6 +3468,7 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter);
3434int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, 3468int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
3435 int len, int reset) 3469 int len, int reset)
3436{ 3470{
3471 ftrace_ops_init(ops);
3437 return ftrace_set_regex(ops, buf, len, reset, 0); 3472 return ftrace_set_regex(ops, buf, len, reset, 0);
3438} 3473}
3439EXPORT_SYMBOL_GPL(ftrace_set_notrace); 3474EXPORT_SYMBOL_GPL(ftrace_set_notrace);
@@ -3524,6 +3559,8 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)
3524{ 3559{
3525 char *func; 3560 char *func;
3526 3561
3562 ftrace_ops_init(ops);
3563
3527 while (buf) { 3564 while (buf) {
3528 func = strsep(&buf, ","); 3565 func = strsep(&buf, ",");
3529 ftrace_set_regex(ops, func, strlen(func), 0, enable); 3566 ftrace_set_regex(ops, func, strlen(func), 0, enable);
@@ -3551,10 +3588,8 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
3551 int filter_hash; 3588 int filter_hash;
3552 int ret; 3589 int ret;
3553 3590
3554 mutex_lock(&ftrace_regex_lock);
3555 if (file->f_mode & FMODE_READ) { 3591 if (file->f_mode & FMODE_READ) {
3556 iter = m->private; 3592 iter = m->private;
3557
3558 seq_release(inode, file); 3593 seq_release(inode, file);
3559 } else 3594 } else
3560 iter = file->private_data; 3595 iter = file->private_data;
@@ -3567,6 +3602,8 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
3567 3602
3568 trace_parser_put(parser); 3603 trace_parser_put(parser);
3569 3604
3605 mutex_lock(&iter->ops->regex_lock);
3606
3570 if (file->f_mode & FMODE_WRITE) { 3607 if (file->f_mode & FMODE_WRITE) {
3571 filter_hash = !!(iter->flags & FTRACE_ITER_FILTER); 3608 filter_hash = !!(iter->flags & FTRACE_ITER_FILTER);
3572 3609
@@ -3584,10 +3621,11 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
3584 3621
3585 mutex_unlock(&ftrace_lock); 3622 mutex_unlock(&ftrace_lock);
3586 } 3623 }
3624
3625 mutex_unlock(&iter->ops->regex_lock);
3587 free_ftrace_hash(iter->hash); 3626 free_ftrace_hash(iter->hash);
3588 kfree(iter); 3627 kfree(iter);
3589 3628
3590 mutex_unlock(&ftrace_regex_lock);
3591 return 0; 3629 return 0;
3592} 3630}
3593 3631
@@ -4126,7 +4164,8 @@ void __init ftrace_init(void)
4126 4164
4127static struct ftrace_ops global_ops = { 4165static struct ftrace_ops global_ops = {
4128 .func = ftrace_stub, 4166 .func = ftrace_stub,
4129 .flags = FTRACE_OPS_FL_RECURSION_SAFE, 4167 .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
4168 INIT_REGEX_LOCK(global_ops)
4130}; 4169};
4131 4170
4132static int __init ftrace_nodyn_init(void) 4171static int __init ftrace_nodyn_init(void)
@@ -4180,8 +4219,9 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
4180} 4219}
4181 4220
4182static struct ftrace_ops control_ops = { 4221static struct ftrace_ops control_ops = {
4183 .func = ftrace_ops_control_func, 4222 .func = ftrace_ops_control_func,
4184 .flags = FTRACE_OPS_FL_RECURSION_SAFE, 4223 .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
4224 INIT_REGEX_LOCK(control_ops)
4185}; 4225};
4186 4226
4187static inline void 4227static inline void
@@ -4539,6 +4579,8 @@ int register_ftrace_function(struct ftrace_ops *ops)
4539{ 4579{
4540 int ret = -1; 4580 int ret = -1;
4541 4581
4582 ftrace_ops_init(ops);
4583
4542 mutex_lock(&ftrace_lock); 4584 mutex_lock(&ftrace_lock);
4543 4585
4544 ret = __register_ftrace_function(ops); 4586 ret = __register_ftrace_function(ops);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b59aea2c48c2..e444ff88f0a4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -620,6 +620,9 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
620 if (cpu == RING_BUFFER_ALL_CPUS) 620 if (cpu == RING_BUFFER_ALL_CPUS)
621 work = &buffer->irq_work; 621 work = &buffer->irq_work;
622 else { 622 else {
623 if (!cpumask_test_cpu(cpu, buffer->cpumask))
624 return -EINVAL;
625
623 cpu_buffer = buffer->buffers[cpu]; 626 cpu_buffer = buffer->buffers[cpu];
624 work = &cpu_buffer->irq_work; 627 work = &cpu_buffer->irq_work;
625 } 628 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ae6fa2d1cdf7..4d79485b3237 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6216,10 +6216,15 @@ __init static int tracer_alloc_buffers(void)
6216 6216
6217 trace_init_cmdlines(); 6217 trace_init_cmdlines();
6218 6218
6219 register_tracer(&nop_trace); 6219 /*
6220 6220 * register_tracer() might reference current_trace, so it
6221 * needs to be set before we register anything. This is
6222 * just a bootstrap of current_trace anyway.
6223 */
6221 global_trace.current_trace = &nop_trace; 6224 global_trace.current_trace = &nop_trace;
6222 6225
6226 register_tracer(&nop_trace);
6227
6223 /* All seems OK, enable tracing */ 6228 /* All seems OK, enable tracing */
6224 tracing_disabled = 0; 6229 tracing_disabled = 0;
6225 6230
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 53582e982e51..27963e2bf4bf 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -251,7 +251,8 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
251 switch (enable) { 251 switch (enable) {
252 case 0: 252 case 0:
253 /* 253 /*
254 * When soft_disable is set and enable is cleared, we want 254 * When soft_disable is set and enable is cleared, the sm_ref
255 * reference counter is decremented. If it reaches 0, we want
255 * to clear the SOFT_DISABLED flag but leave the event in the 256 * to clear the SOFT_DISABLED flag but leave the event in the
256 * state that it was. That is, if the event was enabled and 257 * state that it was. That is, if the event was enabled and
257 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED 258 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
@@ -263,6 +264,8 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
263 * "soft enable"s (clearing the SOFT_DISABLED bit) wont work. 264 * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
264 */ 265 */
265 if (soft_disable) { 266 if (soft_disable) {
267 if (atomic_dec_return(&file->sm_ref) > 0)
268 break;
266 disable = file->flags & FTRACE_EVENT_FL_SOFT_DISABLED; 269 disable = file->flags & FTRACE_EVENT_FL_SOFT_DISABLED;
267 clear_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags); 270 clear_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags);
268 } else 271 } else
@@ -291,8 +294,11 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
291 */ 294 */
292 if (!soft_disable) 295 if (!soft_disable)
293 clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); 296 clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
294 else 297 else {
298 if (atomic_inc_return(&file->sm_ref) > 1)
299 break;
295 set_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags); 300 set_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags);
301 }
296 302
297 if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) { 303 if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) {
298 304
@@ -623,6 +629,8 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
623 if (file->flags & FTRACE_EVENT_FL_ENABLED) { 629 if (file->flags & FTRACE_EVENT_FL_ENABLED) {
624 if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED) 630 if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)
625 buf = "0*\n"; 631 buf = "0*\n";
632 else if (file->flags & FTRACE_EVENT_FL_SOFT_MODE)
633 buf = "1*\n";
626 else 634 else
627 buf = "1\n"; 635 buf = "1\n";
628 } else 636 } else
@@ -1521,6 +1529,24 @@ __register_event(struct ftrace_event_call *call, struct module *mod)
1521 return 0; 1529 return 0;
1522} 1530}
1523 1531
1532static struct ftrace_event_file *
1533trace_create_new_event(struct ftrace_event_call *call,
1534 struct trace_array *tr)
1535{
1536 struct ftrace_event_file *file;
1537
1538 file = kmem_cache_alloc(file_cachep, GFP_TRACE);
1539 if (!file)
1540 return NULL;
1541
1542 file->event_call = call;
1543 file->tr = tr;
1544 atomic_set(&file->sm_ref, 0);
1545 list_add(&file->list, &tr->events);
1546
1547 return file;
1548}
1549
1524/* Add an event to a trace directory */ 1550/* Add an event to a trace directory */
1525static int 1551static int
1526__trace_add_new_event(struct ftrace_event_call *call, 1552__trace_add_new_event(struct ftrace_event_call *call,
@@ -1532,14 +1558,10 @@ __trace_add_new_event(struct ftrace_event_call *call,
1532{ 1558{
1533 struct ftrace_event_file *file; 1559 struct ftrace_event_file *file;
1534 1560
1535 file = kmem_cache_alloc(file_cachep, GFP_TRACE); 1561 file = trace_create_new_event(call, tr);
1536 if (!file) 1562 if (!file)
1537 return -ENOMEM; 1563 return -ENOMEM;
1538 1564
1539 file->event_call = call;
1540 file->tr = tr;
1541 list_add(&file->list, &tr->events);
1542
1543 return event_create_dir(tr->event_dir, file, id, enable, filter, format); 1565 return event_create_dir(tr->event_dir, file, id, enable, filter, format);
1544} 1566}
1545 1567
@@ -1554,14 +1576,10 @@ __trace_early_add_new_event(struct ftrace_event_call *call,
1554{ 1576{
1555 struct ftrace_event_file *file; 1577 struct ftrace_event_file *file;
1556 1578
1557 file = kmem_cache_alloc(file_cachep, GFP_TRACE); 1579 file = trace_create_new_event(call, tr);
1558 if (!file) 1580 if (!file)
1559 return -ENOMEM; 1581 return -ENOMEM;
1560 1582
1561 file->event_call = call;
1562 file->tr = tr;
1563 list_add(&file->list, &tr->events);
1564
1565 return 0; 1583 return 0;
1566} 1584}
1567 1585
@@ -2054,15 +2072,27 @@ event_enable_func(struct ftrace_hash *hash,
2054 out_reg: 2072 out_reg:
2055 /* Don't let event modules unload while probe registered */ 2073 /* Don't let event modules unload while probe registered */
2056 ret = try_module_get(file->event_call->mod); 2074 ret = try_module_get(file->event_call->mod);
2057 if (!ret) 2075 if (!ret) {
2076 ret = -EBUSY;
2058 goto out_free; 2077 goto out_free;
2078 }
2059 2079
2060 ret = __ftrace_event_enable_disable(file, 1, 1); 2080 ret = __ftrace_event_enable_disable(file, 1, 1);
2061 if (ret < 0) 2081 if (ret < 0)
2062 goto out_put; 2082 goto out_put;
2063 ret = register_ftrace_function_probe(glob, ops, data); 2083 ret = register_ftrace_function_probe(glob, ops, data);
2064 if (!ret) 2084 /*
2085 * The above returns on success the # of functions enabled,
2086 * but if it didn't find any functions it returns zero.
2087 * Consider no functions a failure too.
2088 */
2089 if (!ret) {
2090 ret = -ENOENT;
2091 goto out_disable;
2092 } else if (ret < 0)
2065 goto out_disable; 2093 goto out_disable;
2094 /* Just return zero, not the number of enabled functions */
2095 ret = 0;
2066 out: 2096 out:
2067 mutex_unlock(&event_mutex); 2097 mutex_unlock(&event_mutex);
2068 return ret; 2098 return ret;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index a6361178de5a..e1b653f7e1ca 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -750,7 +750,11 @@ static int filter_set_pred(struct event_filter *filter,
750 750
751static void __free_preds(struct event_filter *filter) 751static void __free_preds(struct event_filter *filter)
752{ 752{
753 int i;
754
753 if (filter->preds) { 755 if (filter->preds) {
756 for (i = 0; i < filter->n_preds; i++)
757 kfree(filter->preds[i].ops);
754 kfree(filter->preds); 758 kfree(filter->preds);
755 filter->preds = NULL; 759 filter->preds = NULL;
756 } 760 }
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 1865d5f76538..9f46e98ba8f2 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -27,7 +27,6 @@
27/** 27/**
28 * Kprobe event core functions 28 * Kprobe event core functions
29 */ 29 */
30
31struct trace_probe { 30struct trace_probe {
32 struct list_head list; 31 struct list_head list;
33 struct kretprobe rp; /* Use rp.kp for kprobe use */ 32 struct kretprobe rp; /* Use rp.kp for kprobe use */
@@ -36,6 +35,7 @@ struct trace_probe {
36 const char *symbol; /* symbol name */ 35 const char *symbol; /* symbol name */
37 struct ftrace_event_class class; 36 struct ftrace_event_class class;
38 struct ftrace_event_call call; 37 struct ftrace_event_call call;
38 struct ftrace_event_file * __rcu *files;
39 ssize_t size; /* trace entry size */ 39 ssize_t size; /* trace entry size */
40 unsigned int nr_args; 40 unsigned int nr_args;
41 struct probe_arg args[]; 41 struct probe_arg args[];
@@ -46,7 +46,7 @@ struct trace_probe {
46 (sizeof(struct probe_arg) * (n))) 46 (sizeof(struct probe_arg) * (n)))
47 47
48 48
49static __kprobes int trace_probe_is_return(struct trace_probe *tp) 49static __kprobes bool trace_probe_is_return(struct trace_probe *tp)
50{ 50{
51 return tp->rp.handler != NULL; 51 return tp->rp.handler != NULL;
52} 52}
@@ -183,12 +183,63 @@ static struct trace_probe *find_trace_probe(const char *event,
183 return NULL; 183 return NULL;
184} 184}
185 185
186/* Enable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */ 186static int trace_probe_nr_files(struct trace_probe *tp)
187static int enable_trace_probe(struct trace_probe *tp, int flag) 187{
188 struct ftrace_event_file **file;
189 int ret = 0;
190
191 /*
192 * Since all tp->files updater is protected by probe_enable_lock,
193 * we don't need to lock an rcu_read_lock.
194 */
195 file = rcu_dereference_raw(tp->files);
196 if (file)
197 while (*(file++))
198 ret++;
199
200 return ret;
201}
202
203static DEFINE_MUTEX(probe_enable_lock);
204
205/*
206 * Enable trace_probe
207 * if the file is NULL, enable "perf" handler, or enable "trace" handler.
208 */
209static int
210enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
188{ 211{
189 int ret = 0; 212 int ret = 0;
190 213
191 tp->flags |= flag; 214 mutex_lock(&probe_enable_lock);
215
216 if (file) {
217 struct ftrace_event_file **new, **old;
218 int n = trace_probe_nr_files(tp);
219
220 old = rcu_dereference_raw(tp->files);
221 /* 1 is for new one and 1 is for stopper */
222 new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *),
223 GFP_KERNEL);
224 if (!new) {
225 ret = -ENOMEM;
226 goto out_unlock;
227 }
228 memcpy(new, old, n * sizeof(struct ftrace_event_file *));
229 new[n] = file;
230 /* The last one keeps a NULL */
231
232 rcu_assign_pointer(tp->files, new);
233 tp->flags |= TP_FLAG_TRACE;
234
235 if (old) {
236 /* Make sure the probe is done with old files */
237 synchronize_sched();
238 kfree(old);
239 }
240 } else
241 tp->flags |= TP_FLAG_PROFILE;
242
192 if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) && 243 if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) &&
193 !trace_probe_has_gone(tp)) { 244 !trace_probe_has_gone(tp)) {
194 if (trace_probe_is_return(tp)) 245 if (trace_probe_is_return(tp))
@@ -197,19 +248,90 @@ static int enable_trace_probe(struct trace_probe *tp, int flag)
197 ret = enable_kprobe(&tp->rp.kp); 248 ret = enable_kprobe(&tp->rp.kp);
198 } 249 }
199 250
251 out_unlock:
252 mutex_unlock(&probe_enable_lock);
253
200 return ret; 254 return ret;
201} 255}
202 256
203/* Disable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */ 257static int
204static void disable_trace_probe(struct trace_probe *tp, int flag) 258trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file)
259{
260 struct ftrace_event_file **files;
261 int i;
262
263 /*
264 * Since all tp->files updater is protected by probe_enable_lock,
265 * we don't need to lock an rcu_read_lock.
266 */
267 files = rcu_dereference_raw(tp->files);
268 if (files) {
269 for (i = 0; files[i]; i++)
270 if (files[i] == file)
271 return i;
272 }
273
274 return -1;
275}
276
277/*
278 * Disable trace_probe
279 * if the file is NULL, disable "perf" handler, or disable "trace" handler.
280 */
281static int
282disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
205{ 283{
206 tp->flags &= ~flag; 284 int ret = 0;
285
286 mutex_lock(&probe_enable_lock);
287
288 if (file) {
289 struct ftrace_event_file **new, **old;
290 int n = trace_probe_nr_files(tp);
291 int i, j;
292
293 old = rcu_dereference_raw(tp->files);
294 if (n == 0 || trace_probe_file_index(tp, file) < 0) {
295 ret = -EINVAL;
296 goto out_unlock;
297 }
298
299 if (n == 1) { /* Remove the last file */
300 tp->flags &= ~TP_FLAG_TRACE;
301 new = NULL;
302 } else {
303 new = kzalloc(n * sizeof(struct ftrace_event_file *),
304 GFP_KERNEL);
305 if (!new) {
306 ret = -ENOMEM;
307 goto out_unlock;
308 }
309
310 /* This copy & check loop copies the NULL stopper too */
311 for (i = 0, j = 0; j < n && i < n + 1; i++)
312 if (old[i] != file)
313 new[j++] = old[i];
314 }
315
316 rcu_assign_pointer(tp->files, new);
317
318 /* Make sure the probe is done with old files */
319 synchronize_sched();
320 kfree(old);
321 } else
322 tp->flags &= ~TP_FLAG_PROFILE;
323
207 if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) { 324 if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) {
208 if (trace_probe_is_return(tp)) 325 if (trace_probe_is_return(tp))
209 disable_kretprobe(&tp->rp); 326 disable_kretprobe(&tp->rp);
210 else 327 else
211 disable_kprobe(&tp->rp.kp); 328 disable_kprobe(&tp->rp.kp);
212 } 329 }
330
331 out_unlock:
332 mutex_unlock(&probe_enable_lock);
333
334 return ret;
213} 335}
214 336
215/* Internal register function - just handle k*probes and flags */ 337/* Internal register function - just handle k*probes and flags */
@@ -723,9 +845,10 @@ static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
723} 845}
724 846
725/* Kprobe handler */ 847/* Kprobe handler */
726static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) 848static __kprobes void
849__kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs,
850 struct ftrace_event_file *ftrace_file)
727{ 851{
728 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
729 struct kprobe_trace_entry_head *entry; 852 struct kprobe_trace_entry_head *entry;
730 struct ring_buffer_event *event; 853 struct ring_buffer_event *event;
731 struct ring_buffer *buffer; 854 struct ring_buffer *buffer;
@@ -733,7 +856,10 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
733 unsigned long irq_flags; 856 unsigned long irq_flags;
734 struct ftrace_event_call *call = &tp->call; 857 struct ftrace_event_call *call = &tp->call;
735 858
736 tp->nhit++; 859 WARN_ON(call != ftrace_file->event_call);
860
861 if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
862 return;
737 863
738 local_save_flags(irq_flags); 864 local_save_flags(irq_flags);
739 pc = preempt_count(); 865 pc = preempt_count();
@@ -741,13 +867,14 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
741 dsize = __get_data_size(tp, regs); 867 dsize = __get_data_size(tp, regs);
742 size = sizeof(*entry) + tp->size + dsize; 868 size = sizeof(*entry) + tp->size + dsize;
743 869
744 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 870 event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
745 size, irq_flags, pc); 871 call->event.type,
872 size, irq_flags, pc);
746 if (!event) 873 if (!event)
747 return; 874 return;
748 875
749 entry = ring_buffer_event_data(event); 876 entry = ring_buffer_event_data(event);
750 entry->ip = (unsigned long)kp->addr; 877 entry->ip = (unsigned long)tp->rp.kp.addr;
751 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 878 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
752 879
753 if (!filter_current_check_discard(buffer, call, entry, event)) 880 if (!filter_current_check_discard(buffer, call, entry, event))
@@ -755,11 +882,31 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
755 irq_flags, pc, regs); 882 irq_flags, pc, regs);
756} 883}
757 884
885static __kprobes void
886kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs)
887{
888 /*
889 * Note: preempt is already disabled around the kprobe handler.
890 * However, we still need an smp_read_barrier_depends() corresponding
891 * to smp_wmb() in rcu_assign_pointer() to access the pointer.
892 */
893 struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
894
895 if (unlikely(!file))
896 return;
897
898 while (*file) {
899 __kprobe_trace_func(tp, regs, *file);
900 file++;
901 }
902}
903
758/* Kretprobe handler */ 904/* Kretprobe handler */
759static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, 905static __kprobes void
760 struct pt_regs *regs) 906__kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
907 struct pt_regs *regs,
908 struct ftrace_event_file *ftrace_file)
761{ 909{
762 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
763 struct kretprobe_trace_entry_head *entry; 910 struct kretprobe_trace_entry_head *entry;
764 struct ring_buffer_event *event; 911 struct ring_buffer_event *event;
765 struct ring_buffer *buffer; 912 struct ring_buffer *buffer;
@@ -767,14 +914,20 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
767 unsigned long irq_flags; 914 unsigned long irq_flags;
768 struct ftrace_event_call *call = &tp->call; 915 struct ftrace_event_call *call = &tp->call;
769 916
917 WARN_ON(call != ftrace_file->event_call);
918
919 if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
920 return;
921
770 local_save_flags(irq_flags); 922 local_save_flags(irq_flags);
771 pc = preempt_count(); 923 pc = preempt_count();
772 924
773 dsize = __get_data_size(tp, regs); 925 dsize = __get_data_size(tp, regs);
774 size = sizeof(*entry) + tp->size + dsize; 926 size = sizeof(*entry) + tp->size + dsize;
775 927
776 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 928 event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
777 size, irq_flags, pc); 929 call->event.type,
930 size, irq_flags, pc);
778 if (!event) 931 if (!event)
779 return; 932 return;
780 933
@@ -788,8 +941,28 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
788 irq_flags, pc, regs); 941 irq_flags, pc, regs);
789} 942}
790 943
944static __kprobes void
945kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
946 struct pt_regs *regs)
947{
948 /*
949 * Note: preempt is already disabled around the kprobe handler.
950 * However, we still need an smp_read_barrier_depends() corresponding
951 * to smp_wmb() in rcu_assign_pointer() to access the pointer.
952 */
953 struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
954
955 if (unlikely(!file))
956 return;
957
958 while (*file) {
959 __kretprobe_trace_func(tp, ri, regs, *file);
960 file++;
961 }
962}
963
791/* Event entry printers */ 964/* Event entry printers */
792enum print_line_t 965static enum print_line_t
793print_kprobe_event(struct trace_iterator *iter, int flags, 966print_kprobe_event(struct trace_iterator *iter, int flags,
794 struct trace_event *event) 967 struct trace_event *event)
795{ 968{
@@ -825,7 +998,7 @@ partial:
825 return TRACE_TYPE_PARTIAL_LINE; 998 return TRACE_TYPE_PARTIAL_LINE;
826} 999}
827 1000
828enum print_line_t 1001static enum print_line_t
829print_kretprobe_event(struct trace_iterator *iter, int flags, 1002print_kretprobe_event(struct trace_iterator *iter, int flags,
830 struct trace_event *event) 1003 struct trace_event *event)
831{ 1004{
@@ -975,10 +1148,9 @@ static int set_print_fmt(struct trace_probe *tp)
975#ifdef CONFIG_PERF_EVENTS 1148#ifdef CONFIG_PERF_EVENTS
976 1149
977/* Kprobe profile handler */ 1150/* Kprobe profile handler */
978static __kprobes void kprobe_perf_func(struct kprobe *kp, 1151static __kprobes void
979 struct pt_regs *regs) 1152kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
980{ 1153{
981 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
982 struct ftrace_event_call *call = &tp->call; 1154 struct ftrace_event_call *call = &tp->call;
983 struct kprobe_trace_entry_head *entry; 1155 struct kprobe_trace_entry_head *entry;
984 struct hlist_head *head; 1156 struct hlist_head *head;
@@ -997,7 +1169,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
997 if (!entry) 1169 if (!entry)
998 return; 1170 return;
999 1171
1000 entry->ip = (unsigned long)kp->addr; 1172 entry->ip = (unsigned long)tp->rp.kp.addr;
1001 memset(&entry[1], 0, dsize); 1173 memset(&entry[1], 0, dsize);
1002 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1174 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1003 1175
@@ -1007,10 +1179,10 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1007} 1179}
1008 1180
1009/* Kretprobe profile handler */ 1181/* Kretprobe profile handler */
1010static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, 1182static __kprobes void
1011 struct pt_regs *regs) 1183kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
1184 struct pt_regs *regs)
1012{ 1185{
1013 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1014 struct ftrace_event_call *call = &tp->call; 1186 struct ftrace_event_call *call = &tp->call;
1015 struct kretprobe_trace_entry_head *entry; 1187 struct kretprobe_trace_entry_head *entry;
1016 struct hlist_head *head; 1188 struct hlist_head *head;
@@ -1044,20 +1216,19 @@ int kprobe_register(struct ftrace_event_call *event,
1044 enum trace_reg type, void *data) 1216 enum trace_reg type, void *data)
1045{ 1217{
1046 struct trace_probe *tp = (struct trace_probe *)event->data; 1218 struct trace_probe *tp = (struct trace_probe *)event->data;
1219 struct ftrace_event_file *file = data;
1047 1220
1048 switch (type) { 1221 switch (type) {
1049 case TRACE_REG_REGISTER: 1222 case TRACE_REG_REGISTER:
1050 return enable_trace_probe(tp, TP_FLAG_TRACE); 1223 return enable_trace_probe(tp, file);
1051 case TRACE_REG_UNREGISTER: 1224 case TRACE_REG_UNREGISTER:
1052 disable_trace_probe(tp, TP_FLAG_TRACE); 1225 return disable_trace_probe(tp, file);
1053 return 0;
1054 1226
1055#ifdef CONFIG_PERF_EVENTS 1227#ifdef CONFIG_PERF_EVENTS
1056 case TRACE_REG_PERF_REGISTER: 1228 case TRACE_REG_PERF_REGISTER:
1057 return enable_trace_probe(tp, TP_FLAG_PROFILE); 1229 return enable_trace_probe(tp, NULL);
1058 case TRACE_REG_PERF_UNREGISTER: 1230 case TRACE_REG_PERF_UNREGISTER:
1059 disable_trace_probe(tp, TP_FLAG_PROFILE); 1231 return disable_trace_probe(tp, NULL);
1060 return 0;
1061 case TRACE_REG_PERF_OPEN: 1232 case TRACE_REG_PERF_OPEN:
1062 case TRACE_REG_PERF_CLOSE: 1233 case TRACE_REG_PERF_CLOSE:
1063 case TRACE_REG_PERF_ADD: 1234 case TRACE_REG_PERF_ADD:
@@ -1073,11 +1244,13 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1073{ 1244{
1074 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1245 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1075 1246
1247 tp->nhit++;
1248
1076 if (tp->flags & TP_FLAG_TRACE) 1249 if (tp->flags & TP_FLAG_TRACE)
1077 kprobe_trace_func(kp, regs); 1250 kprobe_trace_func(tp, regs);
1078#ifdef CONFIG_PERF_EVENTS 1251#ifdef CONFIG_PERF_EVENTS
1079 if (tp->flags & TP_FLAG_PROFILE) 1252 if (tp->flags & TP_FLAG_PROFILE)
1080 kprobe_perf_func(kp, regs); 1253 kprobe_perf_func(tp, regs);
1081#endif 1254#endif
1082 return 0; /* We don't tweek kernel, so just return 0 */ 1255 return 0; /* We don't tweek kernel, so just return 0 */
1083} 1256}
@@ -1087,11 +1260,13 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1087{ 1260{
1088 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1261 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1089 1262
1263 tp->nhit++;
1264
1090 if (tp->flags & TP_FLAG_TRACE) 1265 if (tp->flags & TP_FLAG_TRACE)
1091 kretprobe_trace_func(ri, regs); 1266 kretprobe_trace_func(tp, ri, regs);
1092#ifdef CONFIG_PERF_EVENTS 1267#ifdef CONFIG_PERF_EVENTS
1093 if (tp->flags & TP_FLAG_PROFILE) 1268 if (tp->flags & TP_FLAG_PROFILE)
1094 kretprobe_perf_func(ri, regs); 1269 kretprobe_perf_func(tp, ri, regs);
1095#endif 1270#endif
1096 return 0; /* We don't tweek kernel, so just return 0 */ 1271 return 0; /* We don't tweek kernel, so just return 0 */
1097} 1272}
@@ -1189,11 +1364,24 @@ static __used int kprobe_trace_selftest_target(int a1, int a2, int a3,
1189 return a1 + a2 + a3 + a4 + a5 + a6; 1364 return a1 + a2 + a3 + a4 + a5 + a6;
1190} 1365}
1191 1366
1367static struct ftrace_event_file *
1368find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr)
1369{
1370 struct ftrace_event_file *file;
1371
1372 list_for_each_entry(file, &tr->events, list)
1373 if (file->event_call == &tp->call)
1374 return file;
1375
1376 return NULL;
1377}
1378
1192static __init int kprobe_trace_self_tests_init(void) 1379static __init int kprobe_trace_self_tests_init(void)
1193{ 1380{
1194 int ret, warn = 0; 1381 int ret, warn = 0;
1195 int (*target)(int, int, int, int, int, int); 1382 int (*target)(int, int, int, int, int, int);
1196 struct trace_probe *tp; 1383 struct trace_probe *tp;
1384 struct ftrace_event_file *file;
1197 1385
1198 target = kprobe_trace_selftest_target; 1386 target = kprobe_trace_selftest_target;
1199 1387
@@ -1203,31 +1391,43 @@ static __init int kprobe_trace_self_tests_init(void)
1203 "$stack $stack0 +0($stack)", 1391 "$stack $stack0 +0($stack)",
1204 create_trace_probe); 1392 create_trace_probe);
1205 if (WARN_ON_ONCE(ret)) { 1393 if (WARN_ON_ONCE(ret)) {
1206 pr_warning("error on probing function entry.\n"); 1394 pr_warn("error on probing function entry.\n");
1207 warn++; 1395 warn++;
1208 } else { 1396 } else {
1209 /* Enable trace point */ 1397 /* Enable trace point */
1210 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); 1398 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
1211 if (WARN_ON_ONCE(tp == NULL)) { 1399 if (WARN_ON_ONCE(tp == NULL)) {
1212 pr_warning("error on getting new probe.\n"); 1400 pr_warn("error on getting new probe.\n");
1213 warn++; 1401 warn++;
1214 } else 1402 } else {
1215 enable_trace_probe(tp, TP_FLAG_TRACE); 1403 file = find_trace_probe_file(tp, top_trace_array());
1404 if (WARN_ON_ONCE(file == NULL)) {
1405 pr_warn("error on getting probe file.\n");
1406 warn++;
1407 } else
1408 enable_trace_probe(tp, file);
1409 }
1216 } 1410 }
1217 1411
1218 ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target " 1412 ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target "
1219 "$retval", create_trace_probe); 1413 "$retval", create_trace_probe);
1220 if (WARN_ON_ONCE(ret)) { 1414 if (WARN_ON_ONCE(ret)) {
1221 pr_warning("error on probing function return.\n"); 1415 pr_warn("error on probing function return.\n");
1222 warn++; 1416 warn++;
1223 } else { 1417 } else {
1224 /* Enable trace point */ 1418 /* Enable trace point */
1225 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); 1419 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
1226 if (WARN_ON_ONCE(tp == NULL)) { 1420 if (WARN_ON_ONCE(tp == NULL)) {
1227 pr_warning("error on getting new probe.\n"); 1421 pr_warn("error on getting 2nd new probe.\n");
1228 warn++; 1422 warn++;
1229 } else 1423 } else {
1230 enable_trace_probe(tp, TP_FLAG_TRACE); 1424 file = find_trace_probe_file(tp, top_trace_array());
1425 if (WARN_ON_ONCE(file == NULL)) {
1426 pr_warn("error on getting probe file.\n");
1427 warn++;
1428 } else
1429 enable_trace_probe(tp, file);
1430 }
1231 } 1431 }
1232 1432
1233 if (warn) 1433 if (warn)
@@ -1238,27 +1438,39 @@ static __init int kprobe_trace_self_tests_init(void)
1238 /* Disable trace points before removing it */ 1438 /* Disable trace points before removing it */
1239 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); 1439 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
1240 if (WARN_ON_ONCE(tp == NULL)) { 1440 if (WARN_ON_ONCE(tp == NULL)) {
1241 pr_warning("error on getting test probe.\n"); 1441 pr_warn("error on getting test probe.\n");
1242 warn++; 1442 warn++;
1243 } else 1443 } else {
1244 disable_trace_probe(tp, TP_FLAG_TRACE); 1444 file = find_trace_probe_file(tp, top_trace_array());
1445 if (WARN_ON_ONCE(file == NULL)) {
1446 pr_warn("error on getting probe file.\n");
1447 warn++;
1448 } else
1449 disable_trace_probe(tp, file);
1450 }
1245 1451
1246 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); 1452 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
1247 if (WARN_ON_ONCE(tp == NULL)) { 1453 if (WARN_ON_ONCE(tp == NULL)) {
1248 pr_warning("error on getting 2nd test probe.\n"); 1454 pr_warn("error on getting 2nd test probe.\n");
1249 warn++; 1455 warn++;
1250 } else 1456 } else {
1251 disable_trace_probe(tp, TP_FLAG_TRACE); 1457 file = find_trace_probe_file(tp, top_trace_array());
1458 if (WARN_ON_ONCE(file == NULL)) {
1459 pr_warn("error on getting probe file.\n");
1460 warn++;
1461 } else
1462 disable_trace_probe(tp, file);
1463 }
1252 1464
1253 ret = traceprobe_command("-:testprobe", create_trace_probe); 1465 ret = traceprobe_command("-:testprobe", create_trace_probe);
1254 if (WARN_ON_ONCE(ret)) { 1466 if (WARN_ON_ONCE(ret)) {
1255 pr_warning("error on deleting a probe.\n"); 1467 pr_warn("error on deleting a probe.\n");
1256 warn++; 1468 warn++;
1257 } 1469 }
1258 1470
1259 ret = traceprobe_command("-:testprobe2", create_trace_probe); 1471 ret = traceprobe_command("-:testprobe2", create_trace_probe);
1260 if (WARN_ON_ONCE(ret)) { 1472 if (WARN_ON_ONCE(ret)) {
1261 pr_warning("error on deleting a probe.\n"); 1473 pr_warn("error on deleting a probe.\n");
1262 warn++; 1474 warn++;
1263 } 1475 }
1264 1476
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4aa9f5bc6b2d..ee8e29a2320c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -296,7 +296,7 @@ static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
296static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS]; 296static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
297 297
298struct workqueue_struct *system_wq __read_mostly; 298struct workqueue_struct *system_wq __read_mostly;
299EXPORT_SYMBOL_GPL(system_wq); 299EXPORT_SYMBOL(system_wq);
300struct workqueue_struct *system_highpri_wq __read_mostly; 300struct workqueue_struct *system_highpri_wq __read_mostly;
301EXPORT_SYMBOL_GPL(system_highpri_wq); 301EXPORT_SYMBOL_GPL(system_highpri_wq);
302struct workqueue_struct *system_long_wq __read_mostly; 302struct workqueue_struct *system_long_wq __read_mostly;
@@ -1411,7 +1411,7 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
1411 local_irq_restore(flags); 1411 local_irq_restore(flags);
1412 return ret; 1412 return ret;
1413} 1413}
1414EXPORT_SYMBOL_GPL(queue_work_on); 1414EXPORT_SYMBOL(queue_work_on);
1415 1415
1416void delayed_work_timer_fn(unsigned long __data) 1416void delayed_work_timer_fn(unsigned long __data)
1417{ 1417{
@@ -1485,7 +1485,7 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1485 local_irq_restore(flags); 1485 local_irq_restore(flags);
1486 return ret; 1486 return ret;
1487} 1487}
1488EXPORT_SYMBOL_GPL(queue_delayed_work_on); 1488EXPORT_SYMBOL(queue_delayed_work_on);
1489 1489
1490/** 1490/**
1491 * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU 1491 * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
@@ -2059,6 +2059,7 @@ static bool manage_workers(struct worker *worker)
2059 if (unlikely(!mutex_trylock(&pool->manager_mutex))) { 2059 if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
2060 spin_unlock_irq(&pool->lock); 2060 spin_unlock_irq(&pool->lock);
2061 mutex_lock(&pool->manager_mutex); 2061 mutex_lock(&pool->manager_mutex);
2062 spin_lock_irq(&pool->lock);
2062 ret = true; 2063 ret = true;
2063 } 2064 }
2064 2065
@@ -4311,6 +4312,12 @@ bool current_is_workqueue_rescuer(void)
4311 * no synchronization around this function and the test result is 4312 * no synchronization around this function and the test result is
4312 * unreliable and only useful as advisory hints or for debugging. 4313 * unreliable and only useful as advisory hints or for debugging.
4313 * 4314 *
4315 * If @cpu is WORK_CPU_UNBOUND, the test is performed on the local CPU.
4316 * Note that both per-cpu and unbound workqueues may be associated with
4317 * multiple pool_workqueues which have separate congested states. A
4318 * workqueue being congested on one CPU doesn't mean the workqueue is also
4319 * contested on other CPUs / NUMA nodes.
4320 *
4314 * RETURNS: 4321 * RETURNS:
4315 * %true if congested, %false otherwise. 4322 * %true if congested, %false otherwise.
4316 */ 4323 */
@@ -4321,6 +4328,9 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
4321 4328
4322 rcu_read_lock_sched(); 4329 rcu_read_lock_sched();
4323 4330
4331 if (cpu == WORK_CPU_UNBOUND)
4332 cpu = smp_processor_id();
4333
4324 if (!(wq->flags & WQ_UNBOUND)) 4334 if (!(wq->flags & WQ_UNBOUND))
4325 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); 4335 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
4326 else 4336 else
@@ -4895,7 +4905,8 @@ static void __init wq_numa_init(void)
4895 BUG_ON(!tbl); 4905 BUG_ON(!tbl);
4896 4906
4897 for_each_node(node) 4907 for_each_node(node)
4898 BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL, node)); 4908 BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
4909 node_online(node) ? node : NUMA_NO_NODE));
4899 4910
4900 for_each_possible_cpu(cpu) { 4911 for_each_possible_cpu(cpu) {
4901 node = cpu_to_node(cpu); 4912 node = cpu_to_node(cpu);