diff options
Diffstat (limited to 'kernel')
40 files changed, 632 insertions, 375 deletions
diff --git a/kernel/.gitignore b/kernel/.gitignore index b3097bde4e9c..790d83c7d160 100644 --- a/kernel/.gitignore +++ b/kernel/.gitignore | |||
| @@ -5,3 +5,4 @@ config_data.h | |||
| 5 | config_data.gz | 5 | config_data.gz |
| 6 | timeconst.h | 6 | timeconst.h |
| 7 | hz.bc | 7 | hz.bc |
| 8 | x509_certificate_list | ||
diff --git a/kernel/Makefile b/kernel/Makefile index 09a9c94f42bd..bbaf7d59c1bb 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -41,8 +41,9 @@ ifneq ($(CONFIG_SMP),y) | |||
| 41 | obj-y += up.o | 41 | obj-y += up.o |
| 42 | endif | 42 | endif |
| 43 | obj-$(CONFIG_UID16) += uid16.o | 43 | obj-$(CONFIG_UID16) += uid16.o |
| 44 | obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o | ||
| 44 | obj-$(CONFIG_MODULES) += module.o | 45 | obj-$(CONFIG_MODULES) += module.o |
| 45 | obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o | 46 | obj-$(CONFIG_MODULE_SIG) += module_signing.o |
| 46 | obj-$(CONFIG_KALLSYMS) += kallsyms.o | 47 | obj-$(CONFIG_KALLSYMS) += kallsyms.o |
| 47 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o | 48 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o |
| 48 | obj-$(CONFIG_KEXEC) += kexec.o | 49 | obj-$(CONFIG_KEXEC) += kexec.o |
| @@ -122,19 +123,52 @@ targets += timeconst.h | |||
| 122 | $(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE | 123 | $(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE |
| 123 | $(call if_changed,bc) | 124 | $(call if_changed,bc) |
| 124 | 125 | ||
| 125 | ifeq ($(CONFIG_MODULE_SIG),y) | 126 | ############################################################################### |
| 127 | # | ||
| 128 | # Roll all the X.509 certificates that we can find together and pull them into | ||
| 129 | # the kernel so that they get loaded into the system trusted keyring during | ||
| 130 | # boot. | ||
| 126 | # | 131 | # |
| 127 | # Pull the signing certificate and any extra certificates into the kernel | 132 | # We look in the source root and the build root for all files whose name ends |
| 133 | # in ".x509". Unfortunately, this will generate duplicate filenames, so we | ||
| 134 | # have make canonicalise the pathnames and then sort them to discard the | ||
| 135 | # duplicates. | ||
| 128 | # | 136 | # |
| 137 | ############################################################################### | ||
| 138 | ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y) | ||
| 139 | X509_CERTIFICATES-y := $(wildcard *.x509) $(wildcard $(srctree)/*.x509) | ||
| 140 | X509_CERTIFICATES-$(CONFIG_MODULE_SIG) += signing_key.x509 | ||
| 141 | X509_CERTIFICATES := $(sort $(foreach CERT,$(X509_CERTIFICATES-y), \ | ||
| 142 | $(or $(realpath $(CERT)),$(CERT)))) | ||
| 143 | |||
| 144 | ifeq ($(X509_CERTIFICATES),) | ||
| 145 | $(warning *** No X.509 certificates found ***) | ||
| 146 | endif | ||
| 147 | |||
| 148 | ifneq ($(wildcard $(obj)/.x509.list),) | ||
| 149 | ifneq ($(shell cat $(obj)/.x509.list),$(X509_CERTIFICATES)) | ||
| 150 | $(info X.509 certificate list changed) | ||
| 151 | $(shell rm $(obj)/.x509.list) | ||
| 152 | endif | ||
| 153 | endif | ||
| 154 | |||
| 155 | kernel/system_certificates.o: $(obj)/x509_certificate_list | ||
| 129 | 156 | ||
| 130 | quiet_cmd_touch = TOUCH $@ | 157 | quiet_cmd_x509certs = CERTS $@ |
| 131 | cmd_touch = touch $@ | 158 | cmd_x509certs = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; echo " - Including cert $(X509)") |
| 132 | 159 | ||
| 133 | extra_certificates: | 160 | targets += $(obj)/x509_certificate_list |
| 134 | $(call cmd,touch) | 161 | $(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list |
| 162 | $(call if_changed,x509certs) | ||
| 135 | 163 | ||
| 136 | kernel/modsign_certificate.o: signing_key.x509 extra_certificates | 164 | targets += $(obj)/.x509.list |
| 165 | $(obj)/.x509.list: | ||
| 166 | @echo $(X509_CERTIFICATES) >$@ | ||
| 137 | 167 | ||
| 168 | clean-files := x509_certificate_list .x509.list | ||
| 169 | endif | ||
| 170 | |||
| 171 | ifeq ($(CONFIG_MODULE_SIG),y) | ||
| 138 | ############################################################################### | 172 | ############################################################################### |
| 139 | # | 173 | # |
| 140 | # If module signing is requested, say by allyesconfig, but a key has not been | 174 | # If module signing is requested, say by allyesconfig, but a key has not been |
diff --git a/kernel/audit.c b/kernel/audit.c index 7b0e23a740ce..906ae5a0233a 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
| @@ -60,7 +60,6 @@ | |||
| 60 | #ifdef CONFIG_SECURITY | 60 | #ifdef CONFIG_SECURITY |
| 61 | #include <linux/security.h> | 61 | #include <linux/security.h> |
| 62 | #endif | 62 | #endif |
| 63 | #include <net/netlink.h> | ||
| 64 | #include <linux/freezer.h> | 63 | #include <linux/freezer.h> |
| 65 | #include <linux/tty.h> | 64 | #include <linux/tty.h> |
| 66 | #include <linux/pid_namespace.h> | 65 | #include <linux/pid_namespace.h> |
| @@ -140,6 +139,17 @@ static struct task_struct *kauditd_task; | |||
| 140 | static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); | 139 | static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); |
| 141 | static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); | 140 | static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); |
| 142 | 141 | ||
| 142 | static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION, | ||
| 143 | .mask = -1, | ||
| 144 | .features = 0, | ||
| 145 | .lock = 0,}; | ||
| 146 | |||
| 147 | static char *audit_feature_names[2] = { | ||
| 148 | "only_unset_loginuid", | ||
| 149 | "loginuid_immutable", | ||
| 150 | }; | ||
| 151 | |||
| 152 | |||
| 143 | /* Serialize requests from userspace. */ | 153 | /* Serialize requests from userspace. */ |
| 144 | DEFINE_MUTEX(audit_cmd_mutex); | 154 | DEFINE_MUTEX(audit_cmd_mutex); |
| 145 | 155 | ||
| @@ -584,6 +594,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) | |||
| 584 | return -EOPNOTSUPP; | 594 | return -EOPNOTSUPP; |
| 585 | case AUDIT_GET: | 595 | case AUDIT_GET: |
| 586 | case AUDIT_SET: | 596 | case AUDIT_SET: |
| 597 | case AUDIT_GET_FEATURE: | ||
| 598 | case AUDIT_SET_FEATURE: | ||
| 587 | case AUDIT_LIST_RULES: | 599 | case AUDIT_LIST_RULES: |
| 588 | case AUDIT_ADD_RULE: | 600 | case AUDIT_ADD_RULE: |
| 589 | case AUDIT_DEL_RULE: | 601 | case AUDIT_DEL_RULE: |
| @@ -613,7 +625,7 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type) | |||
| 613 | int rc = 0; | 625 | int rc = 0; |
| 614 | uid_t uid = from_kuid(&init_user_ns, current_uid()); | 626 | uid_t uid = from_kuid(&init_user_ns, current_uid()); |
| 615 | 627 | ||
| 616 | if (!audit_enabled) { | 628 | if (!audit_enabled && msg_type != AUDIT_USER_AVC) { |
| 617 | *ab = NULL; | 629 | *ab = NULL; |
| 618 | return rc; | 630 | return rc; |
| 619 | } | 631 | } |
| @@ -628,6 +640,94 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type) | |||
| 628 | return rc; | 640 | return rc; |
| 629 | } | 641 | } |
| 630 | 642 | ||
| 643 | int is_audit_feature_set(int i) | ||
| 644 | { | ||
| 645 | return af.features & AUDIT_FEATURE_TO_MASK(i); | ||
| 646 | } | ||
| 647 | |||
| 648 | |||
| 649 | static int audit_get_feature(struct sk_buff *skb) | ||
| 650 | { | ||
| 651 | u32 seq; | ||
| 652 | |||
| 653 | seq = nlmsg_hdr(skb)->nlmsg_seq; | ||
| 654 | |||
| 655 | audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, | ||
| 656 | &af, sizeof(af)); | ||
| 657 | |||
| 658 | return 0; | ||
| 659 | } | ||
| 660 | |||
| 661 | static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature, | ||
| 662 | u32 old_lock, u32 new_lock, int res) | ||
| 663 | { | ||
| 664 | struct audit_buffer *ab; | ||
| 665 | |||
| 666 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE); | ||
| 667 | audit_log_format(ab, "feature=%s new=%d old=%d old_lock=%d new_lock=%d res=%d", | ||
| 668 | audit_feature_names[which], !!old_feature, !!new_feature, | ||
| 669 | !!old_lock, !!new_lock, res); | ||
| 670 | audit_log_end(ab); | ||
| 671 | } | ||
| 672 | |||
| 673 | static int audit_set_feature(struct sk_buff *skb) | ||
| 674 | { | ||
| 675 | struct audit_features *uaf; | ||
| 676 | int i; | ||
| 677 | |||
| 678 | BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > sizeof(audit_feature_names)/sizeof(audit_feature_names[0])); | ||
| 679 | uaf = nlmsg_data(nlmsg_hdr(skb)); | ||
| 680 | |||
| 681 | /* if there is ever a version 2 we should handle that here */ | ||
| 682 | |||
| 683 | for (i = 0; i <= AUDIT_LAST_FEATURE; i++) { | ||
| 684 | u32 feature = AUDIT_FEATURE_TO_MASK(i); | ||
| 685 | u32 old_feature, new_feature, old_lock, new_lock; | ||
| 686 | |||
| 687 | /* if we are not changing this feature, move along */ | ||
| 688 | if (!(feature & uaf->mask)) | ||
| 689 | continue; | ||
| 690 | |||
| 691 | old_feature = af.features & feature; | ||
| 692 | new_feature = uaf->features & feature; | ||
| 693 | new_lock = (uaf->lock | af.lock) & feature; | ||
| 694 | old_lock = af.lock & feature; | ||
| 695 | |||
| 696 | /* are we changing a locked feature? */ | ||
| 697 | if ((af.lock & feature) && (new_feature != old_feature)) { | ||
| 698 | audit_log_feature_change(i, old_feature, new_feature, | ||
| 699 | old_lock, new_lock, 0); | ||
| 700 | return -EPERM; | ||
| 701 | } | ||
| 702 | } | ||
| 703 | /* nothing invalid, do the changes */ | ||
| 704 | for (i = 0; i <= AUDIT_LAST_FEATURE; i++) { | ||
| 705 | u32 feature = AUDIT_FEATURE_TO_MASK(i); | ||
| 706 | u32 old_feature, new_feature, old_lock, new_lock; | ||
| 707 | |||
| 708 | /* if we are not changing this feature, move along */ | ||
| 709 | if (!(feature & uaf->mask)) | ||
| 710 | continue; | ||
| 711 | |||
| 712 | old_feature = af.features & feature; | ||
| 713 | new_feature = uaf->features & feature; | ||
| 714 | old_lock = af.lock & feature; | ||
| 715 | new_lock = (uaf->lock | af.lock) & feature; | ||
| 716 | |||
| 717 | if (new_feature != old_feature) | ||
| 718 | audit_log_feature_change(i, old_feature, new_feature, | ||
| 719 | old_lock, new_lock, 1); | ||
| 720 | |||
| 721 | if (new_feature) | ||
| 722 | af.features |= feature; | ||
| 723 | else | ||
| 724 | af.features &= ~feature; | ||
| 725 | af.lock |= new_lock; | ||
| 726 | } | ||
| 727 | |||
| 728 | return 0; | ||
| 729 | } | ||
| 730 | |||
| 631 | static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | 731 | static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) |
| 632 | { | 732 | { |
| 633 | u32 seq; | 733 | u32 seq; |
| @@ -659,6 +759,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 659 | 759 | ||
| 660 | switch (msg_type) { | 760 | switch (msg_type) { |
| 661 | case AUDIT_GET: | 761 | case AUDIT_GET: |
| 762 | memset(&status_set, 0, sizeof(status_set)); | ||
| 662 | status_set.enabled = audit_enabled; | 763 | status_set.enabled = audit_enabled; |
| 663 | status_set.failure = audit_failure; | 764 | status_set.failure = audit_failure; |
| 664 | status_set.pid = audit_pid; | 765 | status_set.pid = audit_pid; |
| @@ -670,7 +771,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 670 | &status_set, sizeof(status_set)); | 771 | &status_set, sizeof(status_set)); |
| 671 | break; | 772 | break; |
| 672 | case AUDIT_SET: | 773 | case AUDIT_SET: |
| 673 | if (nlh->nlmsg_len < sizeof(struct audit_status)) | 774 | if (nlmsg_len(nlh) < sizeof(struct audit_status)) |
| 674 | return -EINVAL; | 775 | return -EINVAL; |
| 675 | status_get = (struct audit_status *)data; | 776 | status_get = (struct audit_status *)data; |
| 676 | if (status_get->mask & AUDIT_STATUS_ENABLED) { | 777 | if (status_get->mask & AUDIT_STATUS_ENABLED) { |
| @@ -699,6 +800,16 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 699 | if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) | 800 | if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) |
| 700 | err = audit_set_backlog_limit(status_get->backlog_limit); | 801 | err = audit_set_backlog_limit(status_get->backlog_limit); |
| 701 | break; | 802 | break; |
| 803 | case AUDIT_GET_FEATURE: | ||
| 804 | err = audit_get_feature(skb); | ||
| 805 | if (err) | ||
| 806 | return err; | ||
| 807 | break; | ||
| 808 | case AUDIT_SET_FEATURE: | ||
| 809 | err = audit_set_feature(skb); | ||
| 810 | if (err) | ||
| 811 | return err; | ||
| 812 | break; | ||
| 702 | case AUDIT_USER: | 813 | case AUDIT_USER: |
| 703 | case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: | 814 | case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: |
| 704 | case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: | 815 | case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: |
| @@ -715,7 +826,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 715 | } | 826 | } |
| 716 | audit_log_common_recv_msg(&ab, msg_type); | 827 | audit_log_common_recv_msg(&ab, msg_type); |
| 717 | if (msg_type != AUDIT_USER_TTY) | 828 | if (msg_type != AUDIT_USER_TTY) |
| 718 | audit_log_format(ab, " msg='%.1024s'", | 829 | audit_log_format(ab, " msg='%.*s'", |
| 830 | AUDIT_MESSAGE_TEXT_MAX, | ||
| 719 | (char *)data); | 831 | (char *)data); |
| 720 | else { | 832 | else { |
| 721 | int size; | 833 | int size; |
| @@ -818,7 +930,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 818 | struct task_struct *tsk = current; | 930 | struct task_struct *tsk = current; |
| 819 | 931 | ||
| 820 | spin_lock(&tsk->sighand->siglock); | 932 | spin_lock(&tsk->sighand->siglock); |
| 821 | s.enabled = tsk->signal->audit_tty != 0; | 933 | s.enabled = tsk->signal->audit_tty; |
| 822 | s.log_passwd = tsk->signal->audit_tty_log_passwd; | 934 | s.log_passwd = tsk->signal->audit_tty_log_passwd; |
| 823 | spin_unlock(&tsk->sighand->siglock); | 935 | spin_unlock(&tsk->sighand->siglock); |
| 824 | 936 | ||
| @@ -832,7 +944,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 832 | 944 | ||
| 833 | memset(&s, 0, sizeof(s)); | 945 | memset(&s, 0, sizeof(s)); |
| 834 | /* guard against past and future API changes */ | 946 | /* guard against past and future API changes */ |
| 835 | memcpy(&s, data, min(sizeof(s), (size_t)nlh->nlmsg_len)); | 947 | memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); |
| 836 | if ((s.enabled != 0 && s.enabled != 1) || | 948 | if ((s.enabled != 0 && s.enabled != 1) || |
| 837 | (s.log_passwd != 0 && s.log_passwd != 1)) | 949 | (s.log_passwd != 0 && s.log_passwd != 1)) |
| 838 | return -EINVAL; | 950 | return -EINVAL; |
| @@ -1067,13 +1179,6 @@ static void wait_for_auditd(unsigned long sleep_time) | |||
| 1067 | remove_wait_queue(&audit_backlog_wait, &wait); | 1179 | remove_wait_queue(&audit_backlog_wait, &wait); |
| 1068 | } | 1180 | } |
| 1069 | 1181 | ||
| 1070 | /* Obtain an audit buffer. This routine does locking to obtain the | ||
| 1071 | * audit buffer, but then no locking is required for calls to | ||
| 1072 | * audit_log_*format. If the tsk is a task that is currently in a | ||
| 1073 | * syscall, then the syscall is marked as auditable and an audit record | ||
| 1074 | * will be written at syscall exit. If there is no associated task, tsk | ||
| 1075 | * should be NULL. */ | ||
| 1076 | |||
| 1077 | /** | 1182 | /** |
| 1078 | * audit_log_start - obtain an audit buffer | 1183 | * audit_log_start - obtain an audit buffer |
| 1079 | * @ctx: audit_context (may be NULL) | 1184 | * @ctx: audit_context (may be NULL) |
| @@ -1389,7 +1494,7 @@ void audit_log_session_info(struct audit_buffer *ab) | |||
| 1389 | u32 sessionid = audit_get_sessionid(current); | 1494 | u32 sessionid = audit_get_sessionid(current); |
| 1390 | uid_t auid = from_kuid(&init_user_ns, audit_get_loginuid(current)); | 1495 | uid_t auid = from_kuid(&init_user_ns, audit_get_loginuid(current)); |
| 1391 | 1496 | ||
| 1392 | audit_log_format(ab, " auid=%u ses=%u\n", auid, sessionid); | 1497 | audit_log_format(ab, " auid=%u ses=%u", auid, sessionid); |
| 1393 | } | 1498 | } |
| 1394 | 1499 | ||
| 1395 | void audit_log_key(struct audit_buffer *ab, char *key) | 1500 | void audit_log_key(struct audit_buffer *ab, char *key) |
| @@ -1536,6 +1641,26 @@ void audit_log_name(struct audit_context *context, struct audit_names *n, | |||
| 1536 | } | 1641 | } |
| 1537 | } | 1642 | } |
| 1538 | 1643 | ||
| 1644 | /* log the audit_names record type */ | ||
| 1645 | audit_log_format(ab, " nametype="); | ||
| 1646 | switch(n->type) { | ||
| 1647 | case AUDIT_TYPE_NORMAL: | ||
| 1648 | audit_log_format(ab, "NORMAL"); | ||
| 1649 | break; | ||
| 1650 | case AUDIT_TYPE_PARENT: | ||
| 1651 | audit_log_format(ab, "PARENT"); | ||
| 1652 | break; | ||
| 1653 | case AUDIT_TYPE_CHILD_DELETE: | ||
| 1654 | audit_log_format(ab, "DELETE"); | ||
| 1655 | break; | ||
| 1656 | case AUDIT_TYPE_CHILD_CREATE: | ||
| 1657 | audit_log_format(ab, "CREATE"); | ||
| 1658 | break; | ||
| 1659 | default: | ||
| 1660 | audit_log_format(ab, "UNKNOWN"); | ||
| 1661 | break; | ||
| 1662 | } | ||
| 1663 | |||
| 1539 | audit_log_fcaps(ab, n); | 1664 | audit_log_fcaps(ab, n); |
| 1540 | audit_log_end(ab); | 1665 | audit_log_end(ab); |
| 1541 | } | 1666 | } |
diff --git a/kernel/audit.h b/kernel/audit.h index 123c9b7c3979..b779642b29af 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
| @@ -197,6 +197,9 @@ struct audit_context { | |||
| 197 | int fd; | 197 | int fd; |
| 198 | int flags; | 198 | int flags; |
| 199 | } mmap; | 199 | } mmap; |
| 200 | struct { | ||
| 201 | int argc; | ||
| 202 | } execve; | ||
| 200 | }; | 203 | }; |
| 201 | int fds[2]; | 204 | int fds[2]; |
| 202 | 205 | ||
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index f7aee8be7fb2..51f3fd4c1ed3 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
| @@ -343,6 +343,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f) | |||
| 343 | case AUDIT_DEVMINOR: | 343 | case AUDIT_DEVMINOR: |
| 344 | case AUDIT_EXIT: | 344 | case AUDIT_EXIT: |
| 345 | case AUDIT_SUCCESS: | 345 | case AUDIT_SUCCESS: |
| 346 | case AUDIT_INODE: | ||
| 346 | /* bit ops are only useful on syscall args */ | 347 | /* bit ops are only useful on syscall args */ |
| 347 | if (f->op == Audit_bitmask || f->op == Audit_bittest) | 348 | if (f->op == Audit_bitmask || f->op == Audit_bittest) |
| 348 | return -EINVAL; | 349 | return -EINVAL; |
| @@ -423,7 +424,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
| 423 | f->lsm_rule = NULL; | 424 | f->lsm_rule = NULL; |
| 424 | 425 | ||
| 425 | /* Support legacy tests for a valid loginuid */ | 426 | /* Support legacy tests for a valid loginuid */ |
| 426 | if ((f->type == AUDIT_LOGINUID) && (f->val == ~0U)) { | 427 | if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) { |
| 427 | f->type = AUDIT_LOGINUID_SET; | 428 | f->type = AUDIT_LOGINUID_SET; |
| 428 | f->val = 0; | 429 | f->val = 0; |
| 429 | } | 430 | } |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 9845cb32b60a..90594c9f7552 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
| @@ -95,13 +95,6 @@ struct audit_aux_data { | |||
| 95 | /* Number of target pids per aux struct. */ | 95 | /* Number of target pids per aux struct. */ |
| 96 | #define AUDIT_AUX_PIDS 16 | 96 | #define AUDIT_AUX_PIDS 16 |
| 97 | 97 | ||
| 98 | struct audit_aux_data_execve { | ||
| 99 | struct audit_aux_data d; | ||
| 100 | int argc; | ||
| 101 | int envc; | ||
| 102 | struct mm_struct *mm; | ||
| 103 | }; | ||
| 104 | |||
| 105 | struct audit_aux_data_pids { | 98 | struct audit_aux_data_pids { |
| 106 | struct audit_aux_data d; | 99 | struct audit_aux_data d; |
| 107 | pid_t target_pid[AUDIT_AUX_PIDS]; | 100 | pid_t target_pid[AUDIT_AUX_PIDS]; |
| @@ -121,12 +114,6 @@ struct audit_aux_data_bprm_fcaps { | |||
| 121 | struct audit_cap_data new_pcap; | 114 | struct audit_cap_data new_pcap; |
| 122 | }; | 115 | }; |
| 123 | 116 | ||
| 124 | struct audit_aux_data_capset { | ||
| 125 | struct audit_aux_data d; | ||
| 126 | pid_t pid; | ||
| 127 | struct audit_cap_data cap; | ||
| 128 | }; | ||
| 129 | |||
| 130 | struct audit_tree_refs { | 117 | struct audit_tree_refs { |
| 131 | struct audit_tree_refs *next; | 118 | struct audit_tree_refs *next; |
| 132 | struct audit_chunk *c[31]; | 119 | struct audit_chunk *c[31]; |
| @@ -566,7 +553,7 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
| 566 | break; | 553 | break; |
| 567 | case AUDIT_INODE: | 554 | case AUDIT_INODE: |
| 568 | if (name) | 555 | if (name) |
| 569 | result = (name->ino == f->val); | 556 | result = audit_comparator(name->ino, f->op, f->val); |
| 570 | else if (ctx) { | 557 | else if (ctx) { |
| 571 | list_for_each_entry(n, &ctx->names_list, list) { | 558 | list_for_each_entry(n, &ctx->names_list, list) { |
| 572 | if (audit_comparator(n->ino, f->op, f->val)) { | 559 | if (audit_comparator(n->ino, f->op, f->val)) { |
| @@ -943,8 +930,10 @@ int audit_alloc(struct task_struct *tsk) | |||
| 943 | return 0; /* Return if not auditing. */ | 930 | return 0; /* Return if not auditing. */ |
| 944 | 931 | ||
| 945 | state = audit_filter_task(tsk, &key); | 932 | state = audit_filter_task(tsk, &key); |
| 946 | if (state == AUDIT_DISABLED) | 933 | if (state == AUDIT_DISABLED) { |
| 934 | clear_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT); | ||
| 947 | return 0; | 935 | return 0; |
| 936 | } | ||
| 948 | 937 | ||
| 949 | if (!(context = audit_alloc_context(state))) { | 938 | if (!(context = audit_alloc_context(state))) { |
| 950 | kfree(key); | 939 | kfree(key); |
| @@ -1149,20 +1138,16 @@ static int audit_log_single_execve_arg(struct audit_context *context, | |||
| 1149 | } | 1138 | } |
| 1150 | 1139 | ||
| 1151 | static void audit_log_execve_info(struct audit_context *context, | 1140 | static void audit_log_execve_info(struct audit_context *context, |
| 1152 | struct audit_buffer **ab, | 1141 | struct audit_buffer **ab) |
| 1153 | struct audit_aux_data_execve *axi) | ||
| 1154 | { | 1142 | { |
| 1155 | int i, len; | 1143 | int i, len; |
| 1156 | size_t len_sent = 0; | 1144 | size_t len_sent = 0; |
| 1157 | const char __user *p; | 1145 | const char __user *p; |
| 1158 | char *buf; | 1146 | char *buf; |
| 1159 | 1147 | ||
| 1160 | if (axi->mm != current->mm) | 1148 | p = (const char __user *)current->mm->arg_start; |
| 1161 | return; /* execve failed, no additional info */ | ||
| 1162 | |||
| 1163 | p = (const char __user *)axi->mm->arg_start; | ||
| 1164 | 1149 | ||
| 1165 | audit_log_format(*ab, "argc=%d", axi->argc); | 1150 | audit_log_format(*ab, "argc=%d", context->execve.argc); |
| 1166 | 1151 | ||
| 1167 | /* | 1152 | /* |
| 1168 | * we need some kernel buffer to hold the userspace args. Just | 1153 | * we need some kernel buffer to hold the userspace args. Just |
| @@ -1176,7 +1161,7 @@ static void audit_log_execve_info(struct audit_context *context, | |||
| 1176 | return; | 1161 | return; |
| 1177 | } | 1162 | } |
| 1178 | 1163 | ||
| 1179 | for (i = 0; i < axi->argc; i++) { | 1164 | for (i = 0; i < context->execve.argc; i++) { |
| 1180 | len = audit_log_single_execve_arg(context, ab, i, | 1165 | len = audit_log_single_execve_arg(context, ab, i, |
| 1181 | &len_sent, p, buf); | 1166 | &len_sent, p, buf); |
| 1182 | if (len <= 0) | 1167 | if (len <= 0) |
| @@ -1279,6 +1264,9 @@ static void show_special(struct audit_context *context, int *call_panic) | |||
| 1279 | audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd, | 1264 | audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd, |
| 1280 | context->mmap.flags); | 1265 | context->mmap.flags); |
| 1281 | break; } | 1266 | break; } |
| 1267 | case AUDIT_EXECVE: { | ||
| 1268 | audit_log_execve_info(context, &ab); | ||
| 1269 | break; } | ||
| 1282 | } | 1270 | } |
| 1283 | audit_log_end(ab); | 1271 | audit_log_end(ab); |
| 1284 | } | 1272 | } |
| @@ -1325,11 +1313,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
| 1325 | 1313 | ||
| 1326 | switch (aux->type) { | 1314 | switch (aux->type) { |
| 1327 | 1315 | ||
| 1328 | case AUDIT_EXECVE: { | ||
| 1329 | struct audit_aux_data_execve *axi = (void *)aux; | ||
| 1330 | audit_log_execve_info(context, &ab, axi); | ||
| 1331 | break; } | ||
| 1332 | |||
| 1333 | case AUDIT_BPRM_FCAPS: { | 1316 | case AUDIT_BPRM_FCAPS: { |
| 1334 | struct audit_aux_data_bprm_fcaps *axs = (void *)aux; | 1317 | struct audit_aux_data_bprm_fcaps *axs = (void *)aux; |
| 1335 | audit_log_format(ab, "fver=%x", axs->fcap_ver); | 1318 | audit_log_format(ab, "fver=%x", axs->fcap_ver); |
| @@ -1964,6 +1947,43 @@ int auditsc_get_stamp(struct audit_context *ctx, | |||
| 1964 | /* global counter which is incremented every time something logs in */ | 1947 | /* global counter which is incremented every time something logs in */ |
| 1965 | static atomic_t session_id = ATOMIC_INIT(0); | 1948 | static atomic_t session_id = ATOMIC_INIT(0); |
| 1966 | 1949 | ||
| 1950 | static int audit_set_loginuid_perm(kuid_t loginuid) | ||
| 1951 | { | ||
| 1952 | /* if we are unset, we don't need privs */ | ||
| 1953 | if (!audit_loginuid_set(current)) | ||
| 1954 | return 0; | ||
| 1955 | /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/ | ||
| 1956 | if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE)) | ||
| 1957 | return -EPERM; | ||
| 1958 | /* it is set, you need permission */ | ||
| 1959 | if (!capable(CAP_AUDIT_CONTROL)) | ||
| 1960 | return -EPERM; | ||
| 1961 | /* reject if this is not an unset and we don't allow that */ | ||
| 1962 | if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid)) | ||
| 1963 | return -EPERM; | ||
| 1964 | return 0; | ||
| 1965 | } | ||
| 1966 | |||
| 1967 | static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, | ||
| 1968 | unsigned int oldsessionid, unsigned int sessionid, | ||
| 1969 | int rc) | ||
| 1970 | { | ||
| 1971 | struct audit_buffer *ab; | ||
| 1972 | uid_t uid, ologinuid, nloginuid; | ||
| 1973 | |||
| 1974 | uid = from_kuid(&init_user_ns, task_uid(current)); | ||
| 1975 | ologinuid = from_kuid(&init_user_ns, koldloginuid); | ||
| 1976 | nloginuid = from_kuid(&init_user_ns, kloginuid), | ||
| 1977 | |||
| 1978 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); | ||
| 1979 | if (!ab) | ||
| 1980 | return; | ||
| 1981 | audit_log_format(ab, "pid=%d uid=%u old auid=%u new auid=%u old " | ||
| 1982 | "ses=%u new ses=%u res=%d", current->pid, uid, ologinuid, | ||
| 1983 | nloginuid, oldsessionid, sessionid, !rc); | ||
| 1984 | audit_log_end(ab); | ||
| 1985 | } | ||
| 1986 | |||
| 1967 | /** | 1987 | /** |
| 1968 | * audit_set_loginuid - set current task's audit_context loginuid | 1988 | * audit_set_loginuid - set current task's audit_context loginuid |
| 1969 | * @loginuid: loginuid value | 1989 | * @loginuid: loginuid value |
| @@ -1975,37 +1995,26 @@ static atomic_t session_id = ATOMIC_INIT(0); | |||
| 1975 | int audit_set_loginuid(kuid_t loginuid) | 1995 | int audit_set_loginuid(kuid_t loginuid) |
| 1976 | { | 1996 | { |
| 1977 | struct task_struct *task = current; | 1997 | struct task_struct *task = current; |
| 1978 | struct audit_context *context = task->audit_context; | 1998 | unsigned int oldsessionid, sessionid = (unsigned int)-1; |
| 1979 | unsigned int sessionid; | 1999 | kuid_t oldloginuid; |
| 2000 | int rc; | ||
| 1980 | 2001 | ||
| 1981 | #ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE | 2002 | oldloginuid = audit_get_loginuid(current); |
| 1982 | if (audit_loginuid_set(task)) | 2003 | oldsessionid = audit_get_sessionid(current); |
| 1983 | return -EPERM; | ||
| 1984 | #else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */ | ||
| 1985 | if (!capable(CAP_AUDIT_CONTROL)) | ||
| 1986 | return -EPERM; | ||
| 1987 | #endif /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */ | ||
| 1988 | 2004 | ||
| 1989 | sessionid = atomic_inc_return(&session_id); | 2005 | rc = audit_set_loginuid_perm(loginuid); |
| 1990 | if (context && context->in_syscall) { | 2006 | if (rc) |
| 1991 | struct audit_buffer *ab; | 2007 | goto out; |
| 2008 | |||
| 2009 | /* are we setting or clearing? */ | ||
| 2010 | if (uid_valid(loginuid)) | ||
| 2011 | sessionid = atomic_inc_return(&session_id); | ||
| 1992 | 2012 | ||
| 1993 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); | ||
| 1994 | if (ab) { | ||
| 1995 | audit_log_format(ab, "login pid=%d uid=%u " | ||
| 1996 | "old auid=%u new auid=%u" | ||
| 1997 | " old ses=%u new ses=%u", | ||
| 1998 | task->pid, | ||
| 1999 | from_kuid(&init_user_ns, task_uid(task)), | ||
| 2000 | from_kuid(&init_user_ns, task->loginuid), | ||
| 2001 | from_kuid(&init_user_ns, loginuid), | ||
| 2002 | task->sessionid, sessionid); | ||
| 2003 | audit_log_end(ab); | ||
| 2004 | } | ||
| 2005 | } | ||
| 2006 | task->sessionid = sessionid; | 2013 | task->sessionid = sessionid; |
| 2007 | task->loginuid = loginuid; | 2014 | task->loginuid = loginuid; |
| 2008 | return 0; | 2015 | out: |
| 2016 | audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc); | ||
| 2017 | return rc; | ||
| 2009 | } | 2018 | } |
| 2010 | 2019 | ||
| 2011 | /** | 2020 | /** |
| @@ -2126,22 +2135,12 @@ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mo | |||
| 2126 | context->ipc.has_perm = 1; | 2135 | context->ipc.has_perm = 1; |
| 2127 | } | 2136 | } |
| 2128 | 2137 | ||
| 2129 | int __audit_bprm(struct linux_binprm *bprm) | 2138 | void __audit_bprm(struct linux_binprm *bprm) |
| 2130 | { | 2139 | { |
| 2131 | struct audit_aux_data_execve *ax; | ||
| 2132 | struct audit_context *context = current->audit_context; | 2140 | struct audit_context *context = current->audit_context; |
| 2133 | 2141 | ||
| 2134 | ax = kmalloc(sizeof(*ax), GFP_KERNEL); | 2142 | context->type = AUDIT_EXECVE; |
| 2135 | if (!ax) | 2143 | context->execve.argc = bprm->argc; |
| 2136 | return -ENOMEM; | ||
| 2137 | |||
| 2138 | ax->argc = bprm->argc; | ||
| 2139 | ax->envc = bprm->envc; | ||
| 2140 | ax->mm = bprm->mm; | ||
| 2141 | ax->d.type = AUDIT_EXECVE; | ||
| 2142 | ax->d.next = context->aux; | ||
| 2143 | context->aux = (void *)ax; | ||
| 2144 | return 0; | ||
| 2145 | } | 2144 | } |
| 2146 | 2145 | ||
| 2147 | 2146 | ||
diff --git a/kernel/bounds.c b/kernel/bounds.c index 578782ef6ae1..5253204afdca 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | #include <linux/kbuild.h> | 11 | #include <linux/kbuild.h> |
| 12 | #include <linux/page_cgroup.h> | 12 | #include <linux/page_cgroup.h> |
| 13 | #include <linux/log2.h> | 13 | #include <linux/log2.h> |
| 14 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock_types.h> |
| 15 | 15 | ||
| 16 | void foo(void) | 16 | void foo(void) |
| 17 | { | 17 | { |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e0839bcd48c8..8b729c278b64 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -90,6 +90,14 @@ static DEFINE_MUTEX(cgroup_mutex); | |||
| 90 | static DEFINE_MUTEX(cgroup_root_mutex); | 90 | static DEFINE_MUTEX(cgroup_root_mutex); |
| 91 | 91 | ||
| 92 | /* | 92 | /* |
| 93 | * cgroup destruction makes heavy use of work items and there can be a lot | ||
| 94 | * of concurrent destructions. Use a separate workqueue so that cgroup | ||
| 95 | * destruction work items don't end up filling up max_active of system_wq | ||
| 96 | * which may lead to deadlock. | ||
| 97 | */ | ||
| 98 | static struct workqueue_struct *cgroup_destroy_wq; | ||
| 99 | |||
| 100 | /* | ||
| 93 | * Generate an array of cgroup subsystem pointers. At boot time, this is | 101 | * Generate an array of cgroup subsystem pointers. At boot time, this is |
| 94 | * populated with the built in subsystems, and modular subsystems are | 102 | * populated with the built in subsystems, and modular subsystems are |
| 95 | * registered after that. The mutable section of this array is protected by | 103 | * registered after that. The mutable section of this array is protected by |
| @@ -191,6 +199,7 @@ static void cgroup_destroy_css_killed(struct cgroup *cgrp); | |||
| 191 | static int cgroup_destroy_locked(struct cgroup *cgrp); | 199 | static int cgroup_destroy_locked(struct cgroup *cgrp); |
| 192 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], | 200 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], |
| 193 | bool is_add); | 201 | bool is_add); |
| 202 | static int cgroup_file_release(struct inode *inode, struct file *file); | ||
| 194 | 203 | ||
| 195 | /** | 204 | /** |
| 196 | * cgroup_css - obtain a cgroup's css for the specified subsystem | 205 | * cgroup_css - obtain a cgroup's css for the specified subsystem |
| @@ -871,7 +880,7 @@ static void cgroup_free_rcu(struct rcu_head *head) | |||
| 871 | struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); | 880 | struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); |
| 872 | 881 | ||
| 873 | INIT_WORK(&cgrp->destroy_work, cgroup_free_fn); | 882 | INIT_WORK(&cgrp->destroy_work, cgroup_free_fn); |
| 874 | schedule_work(&cgrp->destroy_work); | 883 | queue_work(cgroup_destroy_wq, &cgrp->destroy_work); |
| 875 | } | 884 | } |
| 876 | 885 | ||
| 877 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 886 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) |
| @@ -895,11 +904,6 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
| 895 | iput(inode); | 904 | iput(inode); |
| 896 | } | 905 | } |
| 897 | 906 | ||
| 898 | static int cgroup_delete(const struct dentry *d) | ||
| 899 | { | ||
| 900 | return 1; | ||
| 901 | } | ||
| 902 | |||
| 903 | static void remove_dir(struct dentry *d) | 907 | static void remove_dir(struct dentry *d) |
| 904 | { | 908 | { |
| 905 | struct dentry *parent = dget(d->d_parent); | 909 | struct dentry *parent = dget(d->d_parent); |
| @@ -1486,7 +1490,7 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
| 1486 | { | 1490 | { |
| 1487 | static const struct dentry_operations cgroup_dops = { | 1491 | static const struct dentry_operations cgroup_dops = { |
| 1488 | .d_iput = cgroup_diput, | 1492 | .d_iput = cgroup_diput, |
| 1489 | .d_delete = cgroup_delete, | 1493 | .d_delete = always_delete_dentry, |
| 1490 | }; | 1494 | }; |
| 1491 | 1495 | ||
| 1492 | struct inode *inode = | 1496 | struct inode *inode = |
| @@ -2426,7 +2430,7 @@ static const struct file_operations cgroup_seqfile_operations = { | |||
| 2426 | .read = seq_read, | 2430 | .read = seq_read, |
| 2427 | .write = cgroup_file_write, | 2431 | .write = cgroup_file_write, |
| 2428 | .llseek = seq_lseek, | 2432 | .llseek = seq_lseek, |
| 2429 | .release = single_release, | 2433 | .release = cgroup_file_release, |
| 2430 | }; | 2434 | }; |
| 2431 | 2435 | ||
| 2432 | static int cgroup_file_open(struct inode *inode, struct file *file) | 2436 | static int cgroup_file_open(struct inode *inode, struct file *file) |
| @@ -2487,6 +2491,8 @@ static int cgroup_file_release(struct inode *inode, struct file *file) | |||
| 2487 | ret = cft->release(inode, file); | 2491 | ret = cft->release(inode, file); |
| 2488 | if (css->ss) | 2492 | if (css->ss) |
| 2489 | css_put(css); | 2493 | css_put(css); |
| 2494 | if (file->f_op == &cgroup_seqfile_operations) | ||
| 2495 | single_release(inode, file); | ||
| 2490 | return ret; | 2496 | return ret; |
| 2491 | } | 2497 | } |
| 2492 | 2498 | ||
| @@ -4254,7 +4260,7 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head) | |||
| 4254 | * css_put(). dput() requires process context which we don't have. | 4260 | * css_put(). dput() requires process context which we don't have. |
| 4255 | */ | 4261 | */ |
| 4256 | INIT_WORK(&css->destroy_work, css_free_work_fn); | 4262 | INIT_WORK(&css->destroy_work, css_free_work_fn); |
| 4257 | schedule_work(&css->destroy_work); | 4263 | queue_work(cgroup_destroy_wq, &css->destroy_work); |
| 4258 | } | 4264 | } |
| 4259 | 4265 | ||
| 4260 | static void css_release(struct percpu_ref *ref) | 4266 | static void css_release(struct percpu_ref *ref) |
| @@ -4544,7 +4550,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref) | |||
| 4544 | container_of(ref, struct cgroup_subsys_state, refcnt); | 4550 | container_of(ref, struct cgroup_subsys_state, refcnt); |
| 4545 | 4551 | ||
| 4546 | INIT_WORK(&css->destroy_work, css_killed_work_fn); | 4552 | INIT_WORK(&css->destroy_work, css_killed_work_fn); |
| 4547 | schedule_work(&css->destroy_work); | 4553 | queue_work(cgroup_destroy_wq, &css->destroy_work); |
| 4548 | } | 4554 | } |
| 4549 | 4555 | ||
| 4550 | /** | 4556 | /** |
| @@ -5068,6 +5074,22 @@ out: | |||
| 5068 | return err; | 5074 | return err; |
| 5069 | } | 5075 | } |
| 5070 | 5076 | ||
| 5077 | static int __init cgroup_wq_init(void) | ||
| 5078 | { | ||
| 5079 | /* | ||
| 5080 | * There isn't much point in executing destruction path in | ||
| 5081 | * parallel. Good chunk is serialized with cgroup_mutex anyway. | ||
| 5082 | * Use 1 for @max_active. | ||
| 5083 | * | ||
| 5084 | * We would prefer to do this in cgroup_init() above, but that | ||
| 5085 | * is called before init_workqueues(): so leave this until after. | ||
| 5086 | */ | ||
| 5087 | cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); | ||
| 5088 | BUG_ON(!cgroup_destroy_wq); | ||
| 5089 | return 0; | ||
| 5090 | } | ||
| 5091 | core_initcall(cgroup_wq_init); | ||
| 5092 | |||
| 5071 | /* | 5093 | /* |
| 5072 | * proc_cgroup_show() | 5094 | * proc_cgroup_show() |
| 5073 | * - Print task's cgroup paths into seq_file, one line for each hierarchy | 5095 | * - Print task's cgroup paths into seq_file, one line for each hierarchy |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 6bf981e13c43..4772034b4b17 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -1033,8 +1033,10 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
| 1033 | need_loop = task_has_mempolicy(tsk) || | 1033 | need_loop = task_has_mempolicy(tsk) || |
| 1034 | !nodes_intersects(*newmems, tsk->mems_allowed); | 1034 | !nodes_intersects(*newmems, tsk->mems_allowed); |
| 1035 | 1035 | ||
| 1036 | if (need_loop) | 1036 | if (need_loop) { |
| 1037 | local_irq_disable(); | ||
| 1037 | write_seqcount_begin(&tsk->mems_allowed_seq); | 1038 | write_seqcount_begin(&tsk->mems_allowed_seq); |
| 1039 | } | ||
| 1038 | 1040 | ||
| 1039 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); | 1041 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); |
| 1040 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); | 1042 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); |
| @@ -1042,8 +1044,10 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
| 1042 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); | 1044 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); |
| 1043 | tsk->mems_allowed = *newmems; | 1045 | tsk->mems_allowed = *newmems; |
| 1044 | 1046 | ||
| 1045 | if (need_loop) | 1047 | if (need_loop) { |
| 1046 | write_seqcount_end(&tsk->mems_allowed_seq); | 1048 | write_seqcount_end(&tsk->mems_allowed_seq); |
| 1049 | local_irq_enable(); | ||
| 1050 | } | ||
| 1047 | 1051 | ||
| 1048 | task_unlock(tsk); | 1052 | task_unlock(tsk); |
| 1049 | } | 1053 | } |
diff --git a/kernel/events/core.c b/kernel/events/core.c index d724e7757cd1..72348dc192c1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -5680,11 +5680,6 @@ static void swevent_hlist_put(struct perf_event *event) | |||
| 5680 | { | 5680 | { |
| 5681 | int cpu; | 5681 | int cpu; |
| 5682 | 5682 | ||
| 5683 | if (event->cpu != -1) { | ||
| 5684 | swevent_hlist_put_cpu(event, event->cpu); | ||
| 5685 | return; | ||
| 5686 | } | ||
| 5687 | |||
| 5688 | for_each_possible_cpu(cpu) | 5683 | for_each_possible_cpu(cpu) |
| 5689 | swevent_hlist_put_cpu(event, cpu); | 5684 | swevent_hlist_put_cpu(event, cpu); |
| 5690 | } | 5685 | } |
| @@ -5718,9 +5713,6 @@ static int swevent_hlist_get(struct perf_event *event) | |||
| 5718 | int err; | 5713 | int err; |
| 5719 | int cpu, failed_cpu; | 5714 | int cpu, failed_cpu; |
| 5720 | 5715 | ||
| 5721 | if (event->cpu != -1) | ||
| 5722 | return swevent_hlist_get_cpu(event, event->cpu); | ||
| 5723 | |||
| 5724 | get_online_cpus(); | 5716 | get_online_cpus(); |
| 5725 | for_each_possible_cpu(cpu) { | 5717 | for_each_possible_cpu(cpu) { |
| 5726 | err = swevent_hlist_get_cpu(event, cpu); | 5718 | err = swevent_hlist_get_cpu(event, cpu); |
diff --git a/kernel/extable.c b/kernel/extable.c index 832cb28105bb..763faf037ec1 100644 --- a/kernel/extable.c +++ b/kernel/extable.c | |||
| @@ -61,7 +61,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) | |||
| 61 | static inline int init_kernel_text(unsigned long addr) | 61 | static inline int init_kernel_text(unsigned long addr) |
| 62 | { | 62 | { |
| 63 | if (addr >= (unsigned long)_sinittext && | 63 | if (addr >= (unsigned long)_sinittext && |
| 64 | addr <= (unsigned long)_einittext) | 64 | addr < (unsigned long)_einittext) |
| 65 | return 1; | 65 | return 1; |
| 66 | return 0; | 66 | return 0; |
| 67 | } | 67 | } |
| @@ -69,7 +69,7 @@ static inline int init_kernel_text(unsigned long addr) | |||
| 69 | int core_kernel_text(unsigned long addr) | 69 | int core_kernel_text(unsigned long addr) |
| 70 | { | 70 | { |
| 71 | if (addr >= (unsigned long)_stext && | 71 | if (addr >= (unsigned long)_stext && |
| 72 | addr <= (unsigned long)_etext) | 72 | addr < (unsigned long)_etext) |
| 73 | return 1; | 73 | return 1; |
| 74 | 74 | ||
| 75 | if (system_state == SYSTEM_BOOTING && | 75 | if (system_state == SYSTEM_BOOTING && |
diff --git a/kernel/futex.c b/kernel/futex.c index 80ba086f021d..f6ff0191ecf7 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -251,6 +251,9 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) | |||
| 251 | return -EINVAL; | 251 | return -EINVAL; |
| 252 | address -= key->both.offset; | 252 | address -= key->both.offset; |
| 253 | 253 | ||
| 254 | if (unlikely(!access_ok(rw, uaddr, sizeof(u32)))) | ||
| 255 | return -EFAULT; | ||
| 256 | |||
| 254 | /* | 257 | /* |
| 255 | * PROCESS_PRIVATE futexes are fast. | 258 | * PROCESS_PRIVATE futexes are fast. |
| 256 | * As the mm cannot disappear under us and the 'key' only needs | 259 | * As the mm cannot disappear under us and the 'key' only needs |
| @@ -259,8 +262,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) | |||
| 259 | * but access_ok() should be faster than find_vma() | 262 | * but access_ok() should be faster than find_vma() |
| 260 | */ | 263 | */ |
| 261 | if (!fshared) { | 264 | if (!fshared) { |
| 262 | if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))) | ||
| 263 | return -EFAULT; | ||
| 264 | key->private.mm = mm; | 265 | key->private.mm = mm; |
| 265 | key->private.address = address; | 266 | key->private.address = address; |
| 266 | get_futex_key_refs(key); | 267 | get_futex_key_refs(key); |
| @@ -288,7 +289,7 @@ again: | |||
| 288 | put_page(page); | 289 | put_page(page); |
| 289 | /* serialize against __split_huge_page_splitting() */ | 290 | /* serialize against __split_huge_page_splitting() */ |
| 290 | local_irq_disable(); | 291 | local_irq_disable(); |
| 291 | if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) { | 292 | if (likely(__get_user_pages_fast(address, 1, !ro, &page) == 1)) { |
| 292 | page_head = compound_head(page); | 293 | page_head = compound_head(page); |
| 293 | /* | 294 | /* |
| 294 | * page_head is valid pointer but we must pin | 295 | * page_head is valid pointer but we must pin |
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index cb228bf21760..abcd6ca86cb7 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c | |||
| @@ -50,7 +50,7 @@ static void resume_irqs(bool want_early) | |||
| 50 | bool is_early = desc->action && | 50 | bool is_early = desc->action && |
| 51 | desc->action->flags & IRQF_EARLY_RESUME; | 51 | desc->action->flags & IRQF_EARLY_RESUME; |
| 52 | 52 | ||
| 53 | if (is_early != want_early) | 53 | if (!is_early && want_early) |
| 54 | continue; | 54 | continue; |
| 55 | 55 | ||
| 56 | raw_spin_lock_irqsave(&desc->lock, flags); | 56 | raw_spin_lock_irqsave(&desc->lock, flags); |
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h index 1162f1030f18..3320b84cc60f 100644 --- a/kernel/irq/settings.h +++ b/kernel/irq/settings.h | |||
| @@ -14,6 +14,7 @@ enum { | |||
| 14 | _IRQ_NO_BALANCING = IRQ_NO_BALANCING, | 14 | _IRQ_NO_BALANCING = IRQ_NO_BALANCING, |
| 15 | _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD, | 15 | _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD, |
| 16 | _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, | 16 | _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, |
| 17 | _IRQ_IS_POLLED = IRQ_IS_POLLED, | ||
| 17 | _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, | 18 | _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, |
| 18 | }; | 19 | }; |
| 19 | 20 | ||
| @@ -26,6 +27,7 @@ enum { | |||
| 26 | #define IRQ_NOAUTOEN GOT_YOU_MORON | 27 | #define IRQ_NOAUTOEN GOT_YOU_MORON |
| 27 | #define IRQ_NESTED_THREAD GOT_YOU_MORON | 28 | #define IRQ_NESTED_THREAD GOT_YOU_MORON |
| 28 | #define IRQ_PER_CPU_DEVID GOT_YOU_MORON | 29 | #define IRQ_PER_CPU_DEVID GOT_YOU_MORON |
| 30 | #define IRQ_IS_POLLED GOT_YOU_MORON | ||
| 29 | #undef IRQF_MODIFY_MASK | 31 | #undef IRQF_MODIFY_MASK |
| 30 | #define IRQF_MODIFY_MASK GOT_YOU_MORON | 32 | #define IRQF_MODIFY_MASK GOT_YOU_MORON |
| 31 | 33 | ||
| @@ -147,3 +149,8 @@ static inline bool irq_settings_is_nested_thread(struct irq_desc *desc) | |||
| 147 | { | 149 | { |
| 148 | return desc->status_use_accessors & _IRQ_NESTED_THREAD; | 150 | return desc->status_use_accessors & _IRQ_NESTED_THREAD; |
| 149 | } | 151 | } |
| 152 | |||
| 153 | static inline bool irq_settings_is_polled(struct irq_desc *desc) | ||
| 154 | { | ||
| 155 | return desc->status_use_accessors & _IRQ_IS_POLLED; | ||
| 156 | } | ||
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 7b5f012bde9d..a1d8cc63b56e 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
| @@ -67,8 +67,13 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force) | |||
| 67 | 67 | ||
| 68 | raw_spin_lock(&desc->lock); | 68 | raw_spin_lock(&desc->lock); |
| 69 | 69 | ||
| 70 | /* PER_CPU and nested thread interrupts are never polled */ | 70 | /* |
| 71 | if (irq_settings_is_per_cpu(desc) || irq_settings_is_nested_thread(desc)) | 71 | * PER_CPU, nested thread interrupts and interrupts explicitely |
| 72 | * marked polled are excluded from polling. | ||
| 73 | */ | ||
| 74 | if (irq_settings_is_per_cpu(desc) || | ||
| 75 | irq_settings_is_nested_thread(desc) || | ||
| 76 | irq_settings_is_polled(desc)) | ||
| 72 | goto out; | 77 | goto out; |
| 73 | 78 | ||
| 74 | /* | 79 | /* |
| @@ -268,7 +273,8 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc, | |||
| 268 | void note_interrupt(unsigned int irq, struct irq_desc *desc, | 273 | void note_interrupt(unsigned int irq, struct irq_desc *desc, |
| 269 | irqreturn_t action_ret) | 274 | irqreturn_t action_ret) |
| 270 | { | 275 | { |
| 271 | if (desc->istate & IRQS_POLL_INPROGRESS) | 276 | if (desc->istate & IRQS_POLL_INPROGRESS || |
| 277 | irq_settings_is_polled(desc)) | ||
| 272 | return; | 278 | return; |
| 273 | 279 | ||
| 274 | /* we get here again via the threaded handler */ | 280 | /* we get here again via the threaded handler */ |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 490afc03627e..d0d8fca54065 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
| @@ -47,6 +47,9 @@ u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; | |||
| 47 | size_t vmcoreinfo_size; | 47 | size_t vmcoreinfo_size; |
| 48 | size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); | 48 | size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); |
| 49 | 49 | ||
| 50 | /* Flag to indicate we are going to kexec a new kernel */ | ||
| 51 | bool kexec_in_progress = false; | ||
| 52 | |||
| 50 | /* Location of the reserved area for the crash kernel */ | 53 | /* Location of the reserved area for the crash kernel */ |
| 51 | struct resource crashk_res = { | 54 | struct resource crashk_res = { |
| 52 | .name = "Crash kernel", | 55 | .name = "Crash kernel", |
| @@ -1675,6 +1678,7 @@ int kernel_kexec(void) | |||
| 1675 | } else | 1678 | } else |
| 1676 | #endif | 1679 | #endif |
| 1677 | { | 1680 | { |
| 1681 | kexec_in_progress = true; | ||
| 1678 | kernel_restart_prepare(NULL); | 1682 | kernel_restart_prepare(NULL); |
| 1679 | printk(KERN_EMERG "Starting new kernel\n"); | 1683 | printk(KERN_EMERG "Starting new kernel\n"); |
| 1680 | machine_shutdown(); | 1684 | machine_shutdown(); |
diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S deleted file mode 100644 index 4a9a86d12c8b..000000000000 --- a/kernel/modsign_certificate.S +++ /dev/null | |||
| @@ -1,12 +0,0 @@ | |||
| 1 | #include <linux/export.h> | ||
| 2 | |||
| 3 | #define GLOBAL(name) \ | ||
| 4 | .globl VMLINUX_SYMBOL(name); \ | ||
| 5 | VMLINUX_SYMBOL(name): | ||
| 6 | |||
| 7 | .section ".init.data","aw" | ||
| 8 | |||
| 9 | GLOBAL(modsign_certificate_list) | ||
| 10 | .incbin "signing_key.x509" | ||
| 11 | .incbin "extra_certificates" | ||
| 12 | GLOBAL(modsign_certificate_list_end) | ||
diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c deleted file mode 100644 index 7cbd4507a7e6..000000000000 --- a/kernel/modsign_pubkey.c +++ /dev/null | |||
| @@ -1,104 +0,0 @@ | |||
| 1 | /* Public keys for module signature verification | ||
| 2 | * | ||
| 3 | * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. | ||
| 4 | * Written by David Howells (dhowells@redhat.com) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public Licence | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the Licence, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/kernel.h> | ||
| 13 | #include <linux/sched.h> | ||
| 14 | #include <linux/cred.h> | ||
| 15 | #include <linux/err.h> | ||
| 16 | #include <keys/asymmetric-type.h> | ||
| 17 | #include "module-internal.h" | ||
| 18 | |||
| 19 | struct key *modsign_keyring; | ||
| 20 | |||
| 21 | extern __initconst const u8 modsign_certificate_list[]; | ||
| 22 | extern __initconst const u8 modsign_certificate_list_end[]; | ||
| 23 | |||
| 24 | /* | ||
| 25 | * We need to make sure ccache doesn't cache the .o file as it doesn't notice | ||
| 26 | * if modsign.pub changes. | ||
| 27 | */ | ||
| 28 | static __initconst const char annoy_ccache[] = __TIME__ "foo"; | ||
| 29 | |||
| 30 | /* | ||
| 31 | * Load the compiled-in keys | ||
| 32 | */ | ||
| 33 | static __init int module_verify_init(void) | ||
| 34 | { | ||
| 35 | pr_notice("Initialise module verification\n"); | ||
| 36 | |||
| 37 | modsign_keyring = keyring_alloc(".module_sign", | ||
| 38 | KUIDT_INIT(0), KGIDT_INIT(0), | ||
| 39 | current_cred(), | ||
| 40 | ((KEY_POS_ALL & ~KEY_POS_SETATTR) | | ||
| 41 | KEY_USR_VIEW | KEY_USR_READ), | ||
| 42 | KEY_ALLOC_NOT_IN_QUOTA, NULL); | ||
| 43 | if (IS_ERR(modsign_keyring)) | ||
| 44 | panic("Can't allocate module signing keyring\n"); | ||
| 45 | |||
| 46 | return 0; | ||
| 47 | } | ||
| 48 | |||
| 49 | /* | ||
| 50 | * Must be initialised before we try and load the keys into the keyring. | ||
| 51 | */ | ||
| 52 | device_initcall(module_verify_init); | ||
| 53 | |||
| 54 | /* | ||
| 55 | * Load the compiled-in keys | ||
| 56 | */ | ||
| 57 | static __init int load_module_signing_keys(void) | ||
| 58 | { | ||
| 59 | key_ref_t key; | ||
| 60 | const u8 *p, *end; | ||
| 61 | size_t plen; | ||
| 62 | |||
| 63 | pr_notice("Loading module verification certificates\n"); | ||
| 64 | |||
| 65 | end = modsign_certificate_list_end; | ||
| 66 | p = modsign_certificate_list; | ||
| 67 | while (p < end) { | ||
| 68 | /* Each cert begins with an ASN.1 SEQUENCE tag and must be more | ||
| 69 | * than 256 bytes in size. | ||
| 70 | */ | ||
| 71 | if (end - p < 4) | ||
| 72 | goto dodgy_cert; | ||
| 73 | if (p[0] != 0x30 && | ||
| 74 | p[1] != 0x82) | ||
| 75 | goto dodgy_cert; | ||
| 76 | plen = (p[2] << 8) | p[3]; | ||
| 77 | plen += 4; | ||
| 78 | if (plen > end - p) | ||
| 79 | goto dodgy_cert; | ||
| 80 | |||
| 81 | key = key_create_or_update(make_key_ref(modsign_keyring, 1), | ||
| 82 | "asymmetric", | ||
| 83 | NULL, | ||
| 84 | p, | ||
| 85 | plen, | ||
| 86 | (KEY_POS_ALL & ~KEY_POS_SETATTR) | | ||
| 87 | KEY_USR_VIEW, | ||
| 88 | KEY_ALLOC_NOT_IN_QUOTA); | ||
| 89 | if (IS_ERR(key)) | ||
| 90 | pr_err("MODSIGN: Problem loading in-kernel X.509 certificate (%ld)\n", | ||
| 91 | PTR_ERR(key)); | ||
| 92 | else | ||
| 93 | pr_notice("MODSIGN: Loaded cert '%s'\n", | ||
| 94 | key_ref_to_ptr(key)->description); | ||
| 95 | p += plen; | ||
| 96 | } | ||
| 97 | |||
| 98 | return 0; | ||
| 99 | |||
| 100 | dodgy_cert: | ||
| 101 | pr_err("MODSIGN: Problem parsing in-kernel X.509 certificate list\n"); | ||
| 102 | return 0; | ||
| 103 | } | ||
| 104 | late_initcall(load_module_signing_keys); | ||
diff --git a/kernel/module-internal.h b/kernel/module-internal.h index 24f9247b7d02..915e123a430f 100644 --- a/kernel/module-internal.h +++ b/kernel/module-internal.h | |||
| @@ -9,6 +9,4 @@ | |||
| 9 | * 2 of the Licence, or (at your option) any later version. | 9 | * 2 of the Licence, or (at your option) any later version. |
| 10 | */ | 10 | */ |
| 11 | 11 | ||
| 12 | extern struct key *modsign_keyring; | ||
| 13 | |||
| 14 | extern int mod_verify_sig(const void *mod, unsigned long *_modlen); | 12 | extern int mod_verify_sig(const void *mod, unsigned long *_modlen); |
diff --git a/kernel/module_signing.c b/kernel/module_signing.c index f2970bddc5ea..be5b8fac4bd0 100644 --- a/kernel/module_signing.c +++ b/kernel/module_signing.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <crypto/public_key.h> | 14 | #include <crypto/public_key.h> |
| 15 | #include <crypto/hash.h> | 15 | #include <crypto/hash.h> |
| 16 | #include <keys/asymmetric-type.h> | 16 | #include <keys/asymmetric-type.h> |
| 17 | #include <keys/system_keyring.h> | ||
| 17 | #include "module-internal.h" | 18 | #include "module-internal.h" |
| 18 | 19 | ||
| 19 | /* | 20 | /* |
| @@ -28,7 +29,7 @@ | |||
| 28 | */ | 29 | */ |
| 29 | struct module_signature { | 30 | struct module_signature { |
| 30 | u8 algo; /* Public-key crypto algorithm [enum pkey_algo] */ | 31 | u8 algo; /* Public-key crypto algorithm [enum pkey_algo] */ |
| 31 | u8 hash; /* Digest algorithm [enum pkey_hash_algo] */ | 32 | u8 hash; /* Digest algorithm [enum hash_algo] */ |
| 32 | u8 id_type; /* Key identifier type [enum pkey_id_type] */ | 33 | u8 id_type; /* Key identifier type [enum pkey_id_type] */ |
| 33 | u8 signer_len; /* Length of signer's name */ | 34 | u8 signer_len; /* Length of signer's name */ |
| 34 | u8 key_id_len; /* Length of key identifier */ | 35 | u8 key_id_len; /* Length of key identifier */ |
| @@ -39,7 +40,7 @@ struct module_signature { | |||
| 39 | /* | 40 | /* |
| 40 | * Digest the module contents. | 41 | * Digest the module contents. |
| 41 | */ | 42 | */ |
| 42 | static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash, | 43 | static struct public_key_signature *mod_make_digest(enum hash_algo hash, |
| 43 | const void *mod, | 44 | const void *mod, |
| 44 | unsigned long modlen) | 45 | unsigned long modlen) |
| 45 | { | 46 | { |
| @@ -54,7 +55,7 @@ static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash, | |||
| 54 | /* Allocate the hashing algorithm we're going to need and find out how | 55 | /* Allocate the hashing algorithm we're going to need and find out how |
| 55 | * big the hash operational data will be. | 56 | * big the hash operational data will be. |
| 56 | */ | 57 | */ |
| 57 | tfm = crypto_alloc_shash(pkey_hash_algo[hash], 0, 0); | 58 | tfm = crypto_alloc_shash(hash_algo_name[hash], 0, 0); |
| 58 | if (IS_ERR(tfm)) | 59 | if (IS_ERR(tfm)) |
| 59 | return (PTR_ERR(tfm) == -ENOENT) ? ERR_PTR(-ENOPKG) : ERR_CAST(tfm); | 60 | return (PTR_ERR(tfm) == -ENOENT) ? ERR_PTR(-ENOPKG) : ERR_CAST(tfm); |
| 60 | 61 | ||
| @@ -157,7 +158,7 @@ static struct key *request_asymmetric_key(const char *signer, size_t signer_len, | |||
| 157 | 158 | ||
| 158 | pr_debug("Look up: \"%s\"\n", id); | 159 | pr_debug("Look up: \"%s\"\n", id); |
| 159 | 160 | ||
| 160 | key = keyring_search(make_key_ref(modsign_keyring, 1), | 161 | key = keyring_search(make_key_ref(system_trusted_keyring, 1), |
| 161 | &key_type_asymmetric, id); | 162 | &key_type_asymmetric, id); |
| 162 | if (IS_ERR(key)) | 163 | if (IS_ERR(key)) |
| 163 | pr_warn("Request for unknown module key '%s' err %ld\n", | 164 | pr_warn("Request for unknown module key '%s' err %ld\n", |
| @@ -217,7 +218,7 @@ int mod_verify_sig(const void *mod, unsigned long *_modlen) | |||
| 217 | return -ENOPKG; | 218 | return -ENOPKG; |
| 218 | 219 | ||
| 219 | if (ms.hash >= PKEY_HASH__LAST || | 220 | if (ms.hash >= PKEY_HASH__LAST || |
| 220 | !pkey_hash_algo[ms.hash]) | 221 | !hash_algo_name[ms.hash]) |
| 221 | return -ENOPKG; | 222 | return -ENOPKG; |
| 222 | 223 | ||
| 223 | key = request_asymmetric_key(sig, ms.signer_len, | 224 | key = request_asymmetric_key(sig, ms.signer_len, |
diff --git a/kernel/padata.c b/kernel/padata.c index 07af2c95dcfe..2abd25d79cc8 100644 --- a/kernel/padata.c +++ b/kernel/padata.c | |||
| @@ -46,6 +46,7 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) | |||
| 46 | 46 | ||
| 47 | static int padata_cpu_hash(struct parallel_data *pd) | 47 | static int padata_cpu_hash(struct parallel_data *pd) |
| 48 | { | 48 | { |
| 49 | unsigned int seq_nr; | ||
| 49 | int cpu_index; | 50 | int cpu_index; |
| 50 | 51 | ||
| 51 | /* | 52 | /* |
| @@ -53,10 +54,8 @@ static int padata_cpu_hash(struct parallel_data *pd) | |||
| 53 | * seq_nr mod. number of cpus in use. | 54 | * seq_nr mod. number of cpus in use. |
| 54 | */ | 55 | */ |
| 55 | 56 | ||
| 56 | spin_lock(&pd->seq_lock); | 57 | seq_nr = atomic_inc_return(&pd->seq_nr); |
| 57 | cpu_index = pd->seq_nr % cpumask_weight(pd->cpumask.pcpu); | 58 | cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu); |
| 58 | pd->seq_nr++; | ||
| 59 | spin_unlock(&pd->seq_lock); | ||
| 60 | 59 | ||
| 61 | return padata_index_to_cpu(pd, cpu_index); | 60 | return padata_index_to_cpu(pd, cpu_index); |
| 62 | } | 61 | } |
| @@ -429,7 +428,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, | |||
| 429 | padata_init_pqueues(pd); | 428 | padata_init_pqueues(pd); |
| 430 | padata_init_squeues(pd); | 429 | padata_init_squeues(pd); |
| 431 | setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); | 430 | setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); |
| 432 | pd->seq_nr = 0; | 431 | atomic_set(&pd->seq_nr, -1); |
| 433 | atomic_set(&pd->reorder_objects, 0); | 432 | atomic_set(&pd->reorder_objects, 0); |
| 434 | atomic_set(&pd->refcnt, 0); | 433 | atomic_set(&pd->refcnt, 0); |
| 435 | pd->pinst = pinst; | 434 | pd->pinst = pinst; |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 10c22cae83a0..b38109e204af 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
| @@ -792,7 +792,8 @@ void free_basic_memory_bitmaps(void) | |||
| 792 | { | 792 | { |
| 793 | struct memory_bitmap *bm1, *bm2; | 793 | struct memory_bitmap *bm1, *bm2; |
| 794 | 794 | ||
| 795 | BUG_ON(!(forbidden_pages_map && free_pages_map)); | 795 | if (WARN_ON(!(forbidden_pages_map && free_pages_map))) |
| 796 | return; | ||
| 796 | 797 | ||
| 797 | bm1 = forbidden_pages_map; | 798 | bm1 = forbidden_pages_map; |
| 798 | bm2 = free_pages_map; | 799 | bm2 = free_pages_map; |
diff --git a/kernel/power/user.c b/kernel/power/user.c index 24850270c802..98d357584cd6 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
| @@ -70,6 +70,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) | |||
| 70 | data->swap = swsusp_resume_device ? | 70 | data->swap = swsusp_resume_device ? |
| 71 | swap_type_of(swsusp_resume_device, 0, NULL) : -1; | 71 | swap_type_of(swsusp_resume_device, 0, NULL) : -1; |
| 72 | data->mode = O_RDONLY; | 72 | data->mode = O_RDONLY; |
| 73 | data->free_bitmaps = false; | ||
| 73 | error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); | 74 | error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); |
| 74 | if (error) | 75 | if (error) |
| 75 | pm_notifier_call_chain(PM_POST_HIBERNATION); | 76 | pm_notifier_call_chain(PM_POST_HIBERNATION); |
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 6abb03dff5c0..08a765232432 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h | |||
| @@ -1632,7 +1632,7 @@ module_param(rcu_idle_gp_delay, int, 0644); | |||
| 1632 | static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; | 1632 | static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; |
| 1633 | module_param(rcu_idle_lazy_gp_delay, int, 0644); | 1633 | module_param(rcu_idle_lazy_gp_delay, int, 0644); |
| 1634 | 1634 | ||
| 1635 | extern int tick_nohz_enabled; | 1635 | extern int tick_nohz_active; |
| 1636 | 1636 | ||
| 1637 | /* | 1637 | /* |
| 1638 | * Try to advance callbacks for all flavors of RCU on the current CPU, but | 1638 | * Try to advance callbacks for all flavors of RCU on the current CPU, but |
| @@ -1729,7 +1729,7 @@ static void rcu_prepare_for_idle(int cpu) | |||
| 1729 | int tne; | 1729 | int tne; |
| 1730 | 1730 | ||
| 1731 | /* Handle nohz enablement switches conservatively. */ | 1731 | /* Handle nohz enablement switches conservatively. */ |
| 1732 | tne = ACCESS_ONCE(tick_nohz_enabled); | 1732 | tne = ACCESS_ONCE(tick_nohz_active); |
| 1733 | if (tne != rdtp->tick_nohz_enabled_snap) { | 1733 | if (tne != rdtp->tick_nohz_enabled_snap) { |
| 1734 | if (rcu_cpu_has_callbacks(cpu, NULL)) | 1734 | if (rcu_cpu_has_callbacks(cpu, NULL)) |
| 1735 | invoke_rcu_core(); /* force nohz to see update. */ | 1735 | invoke_rcu_core(); /* force nohz to see update. */ |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c1808606ee5f..e85cda20ab2b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -2660,6 +2660,7 @@ asmlinkage void __sched notrace preempt_schedule(void) | |||
| 2660 | } while (need_resched()); | 2660 | } while (need_resched()); |
| 2661 | } | 2661 | } |
| 2662 | EXPORT_SYMBOL(preempt_schedule); | 2662 | EXPORT_SYMBOL(preempt_schedule); |
| 2663 | #endif /* CONFIG_PREEMPT */ | ||
| 2663 | 2664 | ||
| 2664 | /* | 2665 | /* |
| 2665 | * this is the entry point to schedule() from kernel preemption | 2666 | * this is the entry point to schedule() from kernel preemption |
| @@ -2693,8 +2694,6 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
| 2693 | exception_exit(prev_state); | 2694 | exception_exit(prev_state); |
| 2694 | } | 2695 | } |
| 2695 | 2696 | ||
| 2696 | #endif /* CONFIG_PREEMPT */ | ||
| 2697 | |||
| 2698 | int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, | 2697 | int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, |
| 2699 | void *key) | 2698 | void *key) |
| 2700 | { | 2699 | { |
| @@ -4762,7 +4761,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
| 4762 | cpumask_clear_cpu(rq->cpu, old_rd->span); | 4761 | cpumask_clear_cpu(rq->cpu, old_rd->span); |
| 4763 | 4762 | ||
| 4764 | /* | 4763 | /* |
| 4765 | * If we dont want to free the old_rt yet then | 4764 | * If we dont want to free the old_rd yet then |
| 4766 | * set old_rd to NULL to skip the freeing later | 4765 | * set old_rd to NULL to skip the freeing later |
| 4767 | * in this function: | 4766 | * in this function: |
| 4768 | */ | 4767 | */ |
| @@ -4910,8 +4909,9 @@ static void update_top_cache_domain(int cpu) | |||
| 4910 | if (sd) { | 4909 | if (sd) { |
| 4911 | id = cpumask_first(sched_domain_span(sd)); | 4910 | id = cpumask_first(sched_domain_span(sd)); |
| 4912 | size = cpumask_weight(sched_domain_span(sd)); | 4911 | size = cpumask_weight(sched_domain_span(sd)); |
| 4913 | rcu_assign_pointer(per_cpu(sd_busy, cpu), sd->parent); | 4912 | sd = sd->parent; /* sd_busy */ |
| 4914 | } | 4913 | } |
| 4914 | rcu_assign_pointer(per_cpu(sd_busy, cpu), sd); | ||
| 4915 | 4915 | ||
| 4916 | rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); | 4916 | rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); |
| 4917 | per_cpu(sd_llc_size, cpu) = size; | 4917 | per_cpu(sd_llc_size, cpu) = size; |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e8b652ebe027..fd773ade1a31 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -5379,10 +5379,31 @@ void update_group_power(struct sched_domain *sd, int cpu) | |||
| 5379 | */ | 5379 | */ |
| 5380 | 5380 | ||
| 5381 | for_each_cpu(cpu, sched_group_cpus(sdg)) { | 5381 | for_each_cpu(cpu, sched_group_cpus(sdg)) { |
| 5382 | struct sched_group *sg = cpu_rq(cpu)->sd->groups; | 5382 | struct sched_group_power *sgp; |
| 5383 | struct rq *rq = cpu_rq(cpu); | ||
| 5383 | 5384 | ||
| 5384 | power_orig += sg->sgp->power_orig; | 5385 | /* |
| 5385 | power += sg->sgp->power; | 5386 | * build_sched_domains() -> init_sched_groups_power() |
| 5387 | * gets here before we've attached the domains to the | ||
| 5388 | * runqueues. | ||
| 5389 | * | ||
| 5390 | * Use power_of(), which is set irrespective of domains | ||
| 5391 | * in update_cpu_power(). | ||
| 5392 | * | ||
| 5393 | * This avoids power/power_orig from being 0 and | ||
| 5394 | * causing divide-by-zero issues on boot. | ||
| 5395 | * | ||
| 5396 | * Runtime updates will correct power_orig. | ||
| 5397 | */ | ||
| 5398 | if (unlikely(!rq->sd)) { | ||
| 5399 | power_orig += power_of(cpu); | ||
| 5400 | power += power_of(cpu); | ||
| 5401 | continue; | ||
| 5402 | } | ||
| 5403 | |||
| 5404 | sgp = rq->sd->groups->sgp; | ||
| 5405 | power_orig += sgp->power_orig; | ||
| 5406 | power += sgp->power; | ||
| 5386 | } | 5407 | } |
| 5387 | } else { | 5408 | } else { |
| 5388 | /* | 5409 | /* |
diff --git a/kernel/system_certificates.S b/kernel/system_certificates.S new file mode 100644 index 000000000000..3e9868d47535 --- /dev/null +++ b/kernel/system_certificates.S | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | #include <linux/export.h> | ||
| 2 | #include <linux/init.h> | ||
| 3 | |||
| 4 | __INITRODATA | ||
| 5 | |||
| 6 | .align 8 | ||
| 7 | .globl VMLINUX_SYMBOL(system_certificate_list) | ||
| 8 | VMLINUX_SYMBOL(system_certificate_list): | ||
| 9 | __cert_list_start: | ||
| 10 | .incbin "kernel/x509_certificate_list" | ||
| 11 | __cert_list_end: | ||
| 12 | |||
| 13 | .align 8 | ||
| 14 | .globl VMLINUX_SYMBOL(system_certificate_list_size) | ||
| 15 | VMLINUX_SYMBOL(system_certificate_list_size): | ||
| 16 | #ifdef CONFIG_64BIT | ||
| 17 | .quad __cert_list_end - __cert_list_start | ||
| 18 | #else | ||
| 19 | .long __cert_list_end - __cert_list_start | ||
| 20 | #endif | ||
diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c new file mode 100644 index 000000000000..52ebc70263f4 --- /dev/null +++ b/kernel/system_keyring.c | |||
| @@ -0,0 +1,105 @@ | |||
| 1 | /* System trusted keyring for trusted public keys | ||
| 2 | * | ||
| 3 | * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. | ||
| 4 | * Written by David Howells (dhowells@redhat.com) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public Licence | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the Licence, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/export.h> | ||
| 13 | #include <linux/kernel.h> | ||
| 14 | #include <linux/sched.h> | ||
| 15 | #include <linux/cred.h> | ||
| 16 | #include <linux/err.h> | ||
| 17 | #include <keys/asymmetric-type.h> | ||
| 18 | #include <keys/system_keyring.h> | ||
| 19 | #include "module-internal.h" | ||
| 20 | |||
| 21 | struct key *system_trusted_keyring; | ||
| 22 | EXPORT_SYMBOL_GPL(system_trusted_keyring); | ||
| 23 | |||
| 24 | extern __initconst const u8 system_certificate_list[]; | ||
| 25 | extern __initconst const unsigned long system_certificate_list_size; | ||
| 26 | |||
| 27 | /* | ||
| 28 | * Load the compiled-in keys | ||
| 29 | */ | ||
| 30 | static __init int system_trusted_keyring_init(void) | ||
| 31 | { | ||
| 32 | pr_notice("Initialise system trusted keyring\n"); | ||
| 33 | |||
| 34 | system_trusted_keyring = | ||
| 35 | keyring_alloc(".system_keyring", | ||
| 36 | KUIDT_INIT(0), KGIDT_INIT(0), current_cred(), | ||
| 37 | ((KEY_POS_ALL & ~KEY_POS_SETATTR) | | ||
| 38 | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH), | ||
| 39 | KEY_ALLOC_NOT_IN_QUOTA, NULL); | ||
| 40 | if (IS_ERR(system_trusted_keyring)) | ||
| 41 | panic("Can't allocate system trusted keyring\n"); | ||
| 42 | |||
| 43 | set_bit(KEY_FLAG_TRUSTED_ONLY, &system_trusted_keyring->flags); | ||
| 44 | return 0; | ||
| 45 | } | ||
| 46 | |||
| 47 | /* | ||
| 48 | * Must be initialised before we try and load the keys into the keyring. | ||
| 49 | */ | ||
| 50 | device_initcall(system_trusted_keyring_init); | ||
| 51 | |||
| 52 | /* | ||
| 53 | * Load the compiled-in list of X.509 certificates. | ||
| 54 | */ | ||
| 55 | static __init int load_system_certificate_list(void) | ||
| 56 | { | ||
| 57 | key_ref_t key; | ||
| 58 | const u8 *p, *end; | ||
| 59 | size_t plen; | ||
| 60 | |||
| 61 | pr_notice("Loading compiled-in X.509 certificates\n"); | ||
| 62 | |||
| 63 | p = system_certificate_list; | ||
| 64 | end = p + system_certificate_list_size; | ||
| 65 | while (p < end) { | ||
| 66 | /* Each cert begins with an ASN.1 SEQUENCE tag and must be more | ||
| 67 | * than 256 bytes in size. | ||
| 68 | */ | ||
| 69 | if (end - p < 4) | ||
| 70 | goto dodgy_cert; | ||
| 71 | if (p[0] != 0x30 && | ||
| 72 | p[1] != 0x82) | ||
| 73 | goto dodgy_cert; | ||
| 74 | plen = (p[2] << 8) | p[3]; | ||
| 75 | plen += 4; | ||
| 76 | if (plen > end - p) | ||
| 77 | goto dodgy_cert; | ||
| 78 | |||
| 79 | key = key_create_or_update(make_key_ref(system_trusted_keyring, 1), | ||
| 80 | "asymmetric", | ||
| 81 | NULL, | ||
| 82 | p, | ||
| 83 | plen, | ||
| 84 | ((KEY_POS_ALL & ~KEY_POS_SETATTR) | | ||
| 85 | KEY_USR_VIEW | KEY_USR_READ), | ||
| 86 | KEY_ALLOC_NOT_IN_QUOTA | | ||
| 87 | KEY_ALLOC_TRUSTED); | ||
| 88 | if (IS_ERR(key)) { | ||
| 89 | pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", | ||
| 90 | PTR_ERR(key)); | ||
| 91 | } else { | ||
| 92 | pr_notice("Loaded X.509 cert '%s'\n", | ||
| 93 | key_ref_to_ptr(key)->description); | ||
| 94 | key_ref_put(key); | ||
| 95 | } | ||
| 96 | p += plen; | ||
| 97 | } | ||
| 98 | |||
| 99 | return 0; | ||
| 100 | |||
| 101 | dodgy_cert: | ||
| 102 | pr_err("Problem parsing in-kernel X.509 certificate list\n"); | ||
| 103 | return 0; | ||
| 104 | } | ||
| 105 | late_initcall(load_system_certificate_list); | ||
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 9f4618eb51c8..13d2f7cd65db 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
| @@ -673,17 +673,18 @@ err: | |||
| 673 | nlmsg_free(rep_skb); | 673 | nlmsg_free(rep_skb); |
| 674 | } | 674 | } |
| 675 | 675 | ||
| 676 | static struct genl_ops taskstats_ops = { | 676 | static const struct genl_ops taskstats_ops[] = { |
| 677 | .cmd = TASKSTATS_CMD_GET, | 677 | { |
| 678 | .doit = taskstats_user_cmd, | 678 | .cmd = TASKSTATS_CMD_GET, |
| 679 | .policy = taskstats_cmd_get_policy, | 679 | .doit = taskstats_user_cmd, |
| 680 | .flags = GENL_ADMIN_PERM, | 680 | .policy = taskstats_cmd_get_policy, |
| 681 | }; | 681 | .flags = GENL_ADMIN_PERM, |
| 682 | 682 | }, | |
| 683 | static struct genl_ops cgroupstats_ops = { | 683 | { |
| 684 | .cmd = CGROUPSTATS_CMD_GET, | 684 | .cmd = CGROUPSTATS_CMD_GET, |
| 685 | .doit = cgroupstats_user_cmd, | 685 | .doit = cgroupstats_user_cmd, |
| 686 | .policy = cgroupstats_cmd_get_policy, | 686 | .policy = cgroupstats_cmd_get_policy, |
| 687 | }, | ||
| 687 | }; | 688 | }; |
| 688 | 689 | ||
| 689 | /* Needed early in initialization */ | 690 | /* Needed early in initialization */ |
| @@ -702,26 +703,13 @@ static int __init taskstats_init(void) | |||
| 702 | { | 703 | { |
| 703 | int rc; | 704 | int rc; |
| 704 | 705 | ||
| 705 | rc = genl_register_family(&family); | 706 | rc = genl_register_family_with_ops(&family, taskstats_ops); |
| 706 | if (rc) | 707 | if (rc) |
| 707 | return rc; | 708 | return rc; |
| 708 | 709 | ||
| 709 | rc = genl_register_ops(&family, &taskstats_ops); | ||
| 710 | if (rc < 0) | ||
| 711 | goto err; | ||
| 712 | |||
| 713 | rc = genl_register_ops(&family, &cgroupstats_ops); | ||
| 714 | if (rc < 0) | ||
| 715 | goto err_cgroup_ops; | ||
| 716 | |||
| 717 | family_registered = 1; | 710 | family_registered = 1; |
| 718 | pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); | 711 | pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); |
| 719 | return 0; | 712 | return 0; |
| 720 | err_cgroup_ops: | ||
| 721 | genl_unregister_ops(&family, &taskstats_ops); | ||
| 722 | err: | ||
| 723 | genl_unregister_family(&family); | ||
| 724 | return rc; | ||
| 725 | } | 713 | } |
| 726 | 714 | ||
| 727 | /* | 715 | /* |
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 64522ecdfe0e..162b03ab0ad2 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
| @@ -33,6 +33,21 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device); | |||
| 33 | */ | 33 | */ |
| 34 | ktime_t tick_next_period; | 34 | ktime_t tick_next_period; |
| 35 | ktime_t tick_period; | 35 | ktime_t tick_period; |
| 36 | |||
| 37 | /* | ||
| 38 | * tick_do_timer_cpu is a timer core internal variable which holds the CPU NR | ||
| 39 | * which is responsible for calling do_timer(), i.e. the timekeeping stuff. This | ||
| 40 | * variable has two functions: | ||
| 41 | * | ||
| 42 | * 1) Prevent a thundering herd issue of a gazillion of CPUs trying to grab the | ||
| 43 | * timekeeping lock all at once. Only the CPU which is assigned to do the | ||
| 44 | * update is handling it. | ||
| 45 | * | ||
| 46 | * 2) Hand off the duty in the NOHZ idle case by setting the value to | ||
| 47 | * TICK_DO_TIMER_NONE, i.e. a non existing CPU. So the next cpu which looks | ||
| 48 | * at it will take over and keep the time keeping alive. The handover | ||
| 49 | * procedure also covers cpu hotplug. | ||
| 50 | */ | ||
| 36 | int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; | 51 | int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; |
| 37 | 52 | ||
| 38 | /* | 53 | /* |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3612fc77f834..ea20f7d1ac2c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -361,8 +361,8 @@ void __init tick_nohz_init(void) | |||
| 361 | /* | 361 | /* |
| 362 | * NO HZ enabled ? | 362 | * NO HZ enabled ? |
| 363 | */ | 363 | */ |
| 364 | int tick_nohz_enabled __read_mostly = 1; | 364 | static int tick_nohz_enabled __read_mostly = 1; |
| 365 | 365 | int tick_nohz_active __read_mostly; | |
| 366 | /* | 366 | /* |
| 367 | * Enable / Disable tickless mode | 367 | * Enable / Disable tickless mode |
| 368 | */ | 368 | */ |
| @@ -465,7 +465,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) | |||
| 465 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 465 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
| 466 | ktime_t now, idle; | 466 | ktime_t now, idle; |
| 467 | 467 | ||
| 468 | if (!tick_nohz_enabled) | 468 | if (!tick_nohz_active) |
| 469 | return -1; | 469 | return -1; |
| 470 | 470 | ||
| 471 | now = ktime_get(); | 471 | now = ktime_get(); |
| @@ -506,7 +506,7 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) | |||
| 506 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 506 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
| 507 | ktime_t now, iowait; | 507 | ktime_t now, iowait; |
| 508 | 508 | ||
| 509 | if (!tick_nohz_enabled) | 509 | if (!tick_nohz_active) |
| 510 | return -1; | 510 | return -1; |
| 511 | 511 | ||
| 512 | now = ktime_get(); | 512 | now = ktime_get(); |
| @@ -711,8 +711,10 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | |||
| 711 | return false; | 711 | return false; |
| 712 | } | 712 | } |
| 713 | 713 | ||
| 714 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | 714 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) { |
| 715 | ts->sleep_length = (ktime_t) { .tv64 = NSEC_PER_SEC/HZ }; | ||
| 715 | return false; | 716 | return false; |
| 717 | } | ||
| 716 | 718 | ||
| 717 | if (need_resched()) | 719 | if (need_resched()) |
| 718 | return false; | 720 | return false; |
| @@ -799,11 +801,6 @@ void tick_nohz_idle_enter(void) | |||
| 799 | local_irq_disable(); | 801 | local_irq_disable(); |
| 800 | 802 | ||
| 801 | ts = &__get_cpu_var(tick_cpu_sched); | 803 | ts = &__get_cpu_var(tick_cpu_sched); |
| 802 | /* | ||
| 803 | * set ts->inidle unconditionally. even if the system did not | ||
| 804 | * switch to nohz mode the cpu frequency governers rely on the | ||
| 805 | * update of the idle time accounting in tick_nohz_start_idle(). | ||
| 806 | */ | ||
| 807 | ts->inidle = 1; | 804 | ts->inidle = 1; |
| 808 | __tick_nohz_idle_enter(ts); | 805 | __tick_nohz_idle_enter(ts); |
| 809 | 806 | ||
| @@ -973,7 +970,7 @@ static void tick_nohz_switch_to_nohz(void) | |||
| 973 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 970 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
| 974 | ktime_t next; | 971 | ktime_t next; |
| 975 | 972 | ||
| 976 | if (!tick_nohz_enabled) | 973 | if (!tick_nohz_active) |
| 977 | return; | 974 | return; |
| 978 | 975 | ||
| 979 | local_irq_disable(); | 976 | local_irq_disable(); |
| @@ -981,7 +978,7 @@ static void tick_nohz_switch_to_nohz(void) | |||
| 981 | local_irq_enable(); | 978 | local_irq_enable(); |
| 982 | return; | 979 | return; |
| 983 | } | 980 | } |
| 984 | 981 | tick_nohz_active = 1; | |
| 985 | ts->nohz_mode = NOHZ_MODE_LOWRES; | 982 | ts->nohz_mode = NOHZ_MODE_LOWRES; |
| 986 | 983 | ||
| 987 | /* | 984 | /* |
| @@ -1139,8 +1136,10 @@ void tick_setup_sched_timer(void) | |||
| 1139 | } | 1136 | } |
| 1140 | 1137 | ||
| 1141 | #ifdef CONFIG_NO_HZ_COMMON | 1138 | #ifdef CONFIG_NO_HZ_COMMON |
| 1142 | if (tick_nohz_enabled) | 1139 | if (tick_nohz_enabled) { |
| 1143 | ts->nohz_mode = NOHZ_MODE_HIGHRES; | 1140 | ts->nohz_mode = NOHZ_MODE_HIGHRES; |
| 1141 | tick_nohz_active = 1; | ||
| 1142 | } | ||
| 1144 | #endif | 1143 | #endif |
| 1145 | } | 1144 | } |
| 1146 | #endif /* HIGH_RES_TIMERS */ | 1145 | #endif /* HIGH_RES_TIMERS */ |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3abf53418b67..87b4f00284c9 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -1347,7 +1347,7 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk) | |||
| 1347 | tk->xtime_nsec -= remainder; | 1347 | tk->xtime_nsec -= remainder; |
| 1348 | tk->xtime_nsec += 1ULL << tk->shift; | 1348 | tk->xtime_nsec += 1ULL << tk->shift; |
| 1349 | tk->ntp_error += remainder << tk->ntp_error_shift; | 1349 | tk->ntp_error += remainder << tk->ntp_error_shift; |
| 1350 | 1350 | tk->ntp_error -= (1ULL << tk->shift) << tk->ntp_error_shift; | |
| 1351 | } | 1351 | } |
| 1352 | #else | 1352 | #else |
| 1353 | #define old_vsyscall_fixup(tk) | 1353 | #define old_vsyscall_fixup(tk) |
diff --git a/kernel/timer.c b/kernel/timer.c index 6582b82fa966..accfd241b9e5 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -1518,9 +1518,8 @@ static int init_timers_cpu(int cpu) | |||
| 1518 | /* | 1518 | /* |
| 1519 | * The APs use this path later in boot | 1519 | * The APs use this path later in boot |
| 1520 | */ | 1520 | */ |
| 1521 | base = kmalloc_node(sizeof(*base), | 1521 | base = kzalloc_node(sizeof(*base), GFP_KERNEL, |
| 1522 | GFP_KERNEL | __GFP_ZERO, | 1522 | cpu_to_node(cpu)); |
| 1523 | cpu_to_node(cpu)); | ||
| 1524 | if (!base) | 1523 | if (!base) |
| 1525 | return -ENOMEM; | 1524 | return -ENOMEM; |
| 1526 | 1525 | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 22fa55696760..0e9f9eaade2f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -367,9 +367,6 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list, | |||
| 367 | 367 | ||
| 368 | static int __register_ftrace_function(struct ftrace_ops *ops) | 368 | static int __register_ftrace_function(struct ftrace_ops *ops) |
| 369 | { | 369 | { |
| 370 | if (unlikely(ftrace_disabled)) | ||
| 371 | return -ENODEV; | ||
| 372 | |||
| 373 | if (FTRACE_WARN_ON(ops == &global_ops)) | 370 | if (FTRACE_WARN_ON(ops == &global_ops)) |
| 374 | return -EINVAL; | 371 | return -EINVAL; |
| 375 | 372 | ||
| @@ -428,9 +425,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) | |||
| 428 | { | 425 | { |
| 429 | int ret; | 426 | int ret; |
| 430 | 427 | ||
| 431 | if (ftrace_disabled) | ||
| 432 | return -ENODEV; | ||
| 433 | |||
| 434 | if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED))) | 428 | if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED))) |
| 435 | return -EBUSY; | 429 | return -EBUSY; |
| 436 | 430 | ||
| @@ -2088,10 +2082,15 @@ static void ftrace_startup_enable(int command) | |||
| 2088 | static int ftrace_startup(struct ftrace_ops *ops, int command) | 2082 | static int ftrace_startup(struct ftrace_ops *ops, int command) |
| 2089 | { | 2083 | { |
| 2090 | bool hash_enable = true; | 2084 | bool hash_enable = true; |
| 2085 | int ret; | ||
| 2091 | 2086 | ||
| 2092 | if (unlikely(ftrace_disabled)) | 2087 | if (unlikely(ftrace_disabled)) |
| 2093 | return -ENODEV; | 2088 | return -ENODEV; |
| 2094 | 2089 | ||
| 2090 | ret = __register_ftrace_function(ops); | ||
| 2091 | if (ret) | ||
| 2092 | return ret; | ||
| 2093 | |||
| 2095 | ftrace_start_up++; | 2094 | ftrace_start_up++; |
| 2096 | command |= FTRACE_UPDATE_CALLS; | 2095 | command |= FTRACE_UPDATE_CALLS; |
| 2097 | 2096 | ||
| @@ -2113,12 +2112,17 @@ static int ftrace_startup(struct ftrace_ops *ops, int command) | |||
| 2113 | return 0; | 2112 | return 0; |
| 2114 | } | 2113 | } |
| 2115 | 2114 | ||
| 2116 | static void ftrace_shutdown(struct ftrace_ops *ops, int command) | 2115 | static int ftrace_shutdown(struct ftrace_ops *ops, int command) |
| 2117 | { | 2116 | { |
| 2118 | bool hash_disable = true; | 2117 | bool hash_disable = true; |
| 2118 | int ret; | ||
| 2119 | 2119 | ||
| 2120 | if (unlikely(ftrace_disabled)) | 2120 | if (unlikely(ftrace_disabled)) |
| 2121 | return; | 2121 | return -ENODEV; |
| 2122 | |||
| 2123 | ret = __unregister_ftrace_function(ops); | ||
| 2124 | if (ret) | ||
| 2125 | return ret; | ||
| 2122 | 2126 | ||
| 2123 | ftrace_start_up--; | 2127 | ftrace_start_up--; |
| 2124 | /* | 2128 | /* |
| @@ -2153,9 +2157,10 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command) | |||
| 2153 | } | 2157 | } |
| 2154 | 2158 | ||
| 2155 | if (!command || !ftrace_enabled) | 2159 | if (!command || !ftrace_enabled) |
| 2156 | return; | 2160 | return 0; |
| 2157 | 2161 | ||
| 2158 | ftrace_run_update_code(command); | 2162 | ftrace_run_update_code(command); |
| 2163 | return 0; | ||
| 2159 | } | 2164 | } |
| 2160 | 2165 | ||
| 2161 | static void ftrace_startup_sysctl(void) | 2166 | static void ftrace_startup_sysctl(void) |
| @@ -3060,16 +3065,13 @@ static void __enable_ftrace_function_probe(void) | |||
| 3060 | if (i == FTRACE_FUNC_HASHSIZE) | 3065 | if (i == FTRACE_FUNC_HASHSIZE) |
| 3061 | return; | 3066 | return; |
| 3062 | 3067 | ||
| 3063 | ret = __register_ftrace_function(&trace_probe_ops); | 3068 | ret = ftrace_startup(&trace_probe_ops, 0); |
| 3064 | if (!ret) | ||
| 3065 | ret = ftrace_startup(&trace_probe_ops, 0); | ||
| 3066 | 3069 | ||
| 3067 | ftrace_probe_registered = 1; | 3070 | ftrace_probe_registered = 1; |
| 3068 | } | 3071 | } |
| 3069 | 3072 | ||
| 3070 | static void __disable_ftrace_function_probe(void) | 3073 | static void __disable_ftrace_function_probe(void) |
| 3071 | { | 3074 | { |
| 3072 | int ret; | ||
| 3073 | int i; | 3075 | int i; |
| 3074 | 3076 | ||
| 3075 | if (!ftrace_probe_registered) | 3077 | if (!ftrace_probe_registered) |
| @@ -3082,9 +3084,7 @@ static void __disable_ftrace_function_probe(void) | |||
| 3082 | } | 3084 | } |
| 3083 | 3085 | ||
| 3084 | /* no more funcs left */ | 3086 | /* no more funcs left */ |
| 3085 | ret = __unregister_ftrace_function(&trace_probe_ops); | 3087 | ftrace_shutdown(&trace_probe_ops, 0); |
| 3086 | if (!ret) | ||
| 3087 | ftrace_shutdown(&trace_probe_ops, 0); | ||
| 3088 | 3088 | ||
| 3089 | ftrace_probe_registered = 0; | 3089 | ftrace_probe_registered = 0; |
| 3090 | } | 3090 | } |
| @@ -4366,12 +4366,15 @@ core_initcall(ftrace_nodyn_init); | |||
| 4366 | static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } | 4366 | static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } |
| 4367 | static inline void ftrace_startup_enable(int command) { } | 4367 | static inline void ftrace_startup_enable(int command) { } |
| 4368 | /* Keep as macros so we do not need to define the commands */ | 4368 | /* Keep as macros so we do not need to define the commands */ |
| 4369 | # define ftrace_startup(ops, command) \ | 4369 | # define ftrace_startup(ops, command) \ |
| 4370 | ({ \ | 4370 | ({ \ |
| 4371 | (ops)->flags |= FTRACE_OPS_FL_ENABLED; \ | 4371 | int ___ret = __register_ftrace_function(ops); \ |
| 4372 | 0; \ | 4372 | if (!___ret) \ |
| 4373 | (ops)->flags |= FTRACE_OPS_FL_ENABLED; \ | ||
| 4374 | ___ret; \ | ||
| 4373 | }) | 4375 | }) |
| 4374 | # define ftrace_shutdown(ops, command) do { } while (0) | 4376 | # define ftrace_shutdown(ops, command) __unregister_ftrace_function(ops) |
| 4377 | |||
| 4375 | # define ftrace_startup_sysctl() do { } while (0) | 4378 | # define ftrace_startup_sysctl() do { } while (0) |
| 4376 | # define ftrace_shutdown_sysctl() do { } while (0) | 4379 | # define ftrace_shutdown_sysctl() do { } while (0) |
| 4377 | 4380 | ||
| @@ -4780,9 +4783,7 @@ int register_ftrace_function(struct ftrace_ops *ops) | |||
| 4780 | 4783 | ||
| 4781 | mutex_lock(&ftrace_lock); | 4784 | mutex_lock(&ftrace_lock); |
| 4782 | 4785 | ||
| 4783 | ret = __register_ftrace_function(ops); | 4786 | ret = ftrace_startup(ops, 0); |
| 4784 | if (!ret) | ||
| 4785 | ret = ftrace_startup(ops, 0); | ||
| 4786 | 4787 | ||
| 4787 | mutex_unlock(&ftrace_lock); | 4788 | mutex_unlock(&ftrace_lock); |
| 4788 | 4789 | ||
| @@ -4801,9 +4802,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) | |||
| 4801 | int ret; | 4802 | int ret; |
| 4802 | 4803 | ||
| 4803 | mutex_lock(&ftrace_lock); | 4804 | mutex_lock(&ftrace_lock); |
| 4804 | ret = __unregister_ftrace_function(ops); | 4805 | ret = ftrace_shutdown(ops, 0); |
| 4805 | if (!ret) | ||
| 4806 | ftrace_shutdown(ops, 0); | ||
| 4807 | mutex_unlock(&ftrace_lock); | 4806 | mutex_unlock(&ftrace_lock); |
| 4808 | 4807 | ||
| 4809 | return ret; | 4808 | return ret; |
| @@ -4997,6 +4996,13 @@ ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state, | |||
| 4997 | return NOTIFY_DONE; | 4996 | return NOTIFY_DONE; |
| 4998 | } | 4997 | } |
| 4999 | 4998 | ||
| 4999 | /* Just a place holder for function graph */ | ||
| 5000 | static struct ftrace_ops fgraph_ops __read_mostly = { | ||
| 5001 | .func = ftrace_stub, | ||
| 5002 | .flags = FTRACE_OPS_FL_STUB | FTRACE_OPS_FL_GLOBAL | | ||
| 5003 | FTRACE_OPS_FL_RECURSION_SAFE, | ||
| 5004 | }; | ||
| 5005 | |||
| 5000 | int register_ftrace_graph(trace_func_graph_ret_t retfunc, | 5006 | int register_ftrace_graph(trace_func_graph_ret_t retfunc, |
| 5001 | trace_func_graph_ent_t entryfunc) | 5007 | trace_func_graph_ent_t entryfunc) |
| 5002 | { | 5008 | { |
| @@ -5023,7 +5029,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, | |||
| 5023 | ftrace_graph_return = retfunc; | 5029 | ftrace_graph_return = retfunc; |
| 5024 | ftrace_graph_entry = entryfunc; | 5030 | ftrace_graph_entry = entryfunc; |
| 5025 | 5031 | ||
| 5026 | ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); | 5032 | ret = ftrace_startup(&fgraph_ops, FTRACE_START_FUNC_RET); |
| 5027 | 5033 | ||
| 5028 | out: | 5034 | out: |
| 5029 | mutex_unlock(&ftrace_lock); | 5035 | mutex_unlock(&ftrace_lock); |
| @@ -5040,7 +5046,7 @@ void unregister_ftrace_graph(void) | |||
| 5040 | ftrace_graph_active--; | 5046 | ftrace_graph_active--; |
| 5041 | ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; | 5047 | ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; |
| 5042 | ftrace_graph_entry = ftrace_graph_entry_stub; | 5048 | ftrace_graph_entry = ftrace_graph_entry_stub; |
| 5043 | ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET); | 5049 | ftrace_shutdown(&fgraph_ops, FTRACE_STOP_FUNC_RET); |
| 5044 | unregister_pm_notifier(&ftrace_suspend_notifier); | 5050 | unregister_pm_notifier(&ftrace_suspend_notifier); |
| 5045 | unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); | 5051 | unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); |
| 5046 | 5052 | ||
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 78e27e3b52ac..e854f420e033 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
| @@ -24,6 +24,12 @@ static int total_ref_count; | |||
| 24 | static int perf_trace_event_perm(struct ftrace_event_call *tp_event, | 24 | static int perf_trace_event_perm(struct ftrace_event_call *tp_event, |
| 25 | struct perf_event *p_event) | 25 | struct perf_event *p_event) |
| 26 | { | 26 | { |
| 27 | if (tp_event->perf_perm) { | ||
| 28 | int ret = tp_event->perf_perm(tp_event, p_event); | ||
| 29 | if (ret) | ||
| 30 | return ret; | ||
| 31 | } | ||
| 32 | |||
| 27 | /* The ftrace function trace is allowed only for root. */ | 33 | /* The ftrace function trace is allowed only for root. */ |
| 28 | if (ftrace_event_is_function(tp_event) && | 34 | if (ftrace_event_is_function(tp_event) && |
| 29 | perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) | 35 | perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) |
| @@ -173,7 +179,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, | |||
| 173 | int perf_trace_init(struct perf_event *p_event) | 179 | int perf_trace_init(struct perf_event *p_event) |
| 174 | { | 180 | { |
| 175 | struct ftrace_event_call *tp_event; | 181 | struct ftrace_event_call *tp_event; |
| 176 | int event_id = p_event->attr.config; | 182 | u64 event_id = p_event->attr.config; |
| 177 | int ret = -EINVAL; | 183 | int ret = -EINVAL; |
| 178 | 184 | ||
| 179 | mutex_lock(&event_mutex); | 185 | mutex_lock(&event_mutex); |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f919a2e21bf3..a11800ae96de 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
| @@ -2314,6 +2314,9 @@ int event_trace_del_tracer(struct trace_array *tr) | |||
| 2314 | /* Disable any running events */ | 2314 | /* Disable any running events */ |
| 2315 | __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); | 2315 | __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); |
| 2316 | 2316 | ||
| 2317 | /* Access to events are within rcu_read_lock_sched() */ | ||
| 2318 | synchronize_sched(); | ||
| 2319 | |||
| 2317 | down_write(&trace_event_sem); | 2320 | down_write(&trace_event_sem); |
| 2318 | __trace_remove_event_dirs(tr); | 2321 | __trace_remove_event_dirs(tr); |
| 2319 | debugfs_remove_recursive(tr->event_dir); | 2322 | debugfs_remove_recursive(tr->event_dir); |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index e4b6d11bdf78..ea90eb5f6f17 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
| @@ -431,11 +431,6 @@ static void unreg_event_syscall_enter(struct ftrace_event_file *file, | |||
| 431 | if (!tr->sys_refcount_enter) | 431 | if (!tr->sys_refcount_enter) |
| 432 | unregister_trace_sys_enter(ftrace_syscall_enter, tr); | 432 | unregister_trace_sys_enter(ftrace_syscall_enter, tr); |
| 433 | mutex_unlock(&syscall_trace_lock); | 433 | mutex_unlock(&syscall_trace_lock); |
| 434 | /* | ||
| 435 | * Callers expect the event to be completely disabled on | ||
| 436 | * return, so wait for current handlers to finish. | ||
| 437 | */ | ||
| 438 | synchronize_sched(); | ||
| 439 | } | 434 | } |
| 440 | 435 | ||
| 441 | static int reg_event_syscall_exit(struct ftrace_event_file *file, | 436 | static int reg_event_syscall_exit(struct ftrace_event_file *file, |
| @@ -474,11 +469,6 @@ static void unreg_event_syscall_exit(struct ftrace_event_file *file, | |||
| 474 | if (!tr->sys_refcount_exit) | 469 | if (!tr->sys_refcount_exit) |
| 475 | unregister_trace_sys_exit(ftrace_syscall_exit, tr); | 470 | unregister_trace_sys_exit(ftrace_syscall_exit, tr); |
| 476 | mutex_unlock(&syscall_trace_lock); | 471 | mutex_unlock(&syscall_trace_lock); |
| 477 | /* | ||
| 478 | * Callers expect the event to be completely disabled on | ||
| 479 | * return, so wait for current handlers to finish. | ||
| 480 | */ | ||
| 481 | synchronize_sched(); | ||
| 482 | } | 472 | } |
| 483 | 473 | ||
| 484 | static int __init init_syscall_trace(struct ftrace_event_call *call) | 474 | static int __init init_syscall_trace(struct ftrace_event_call *call) |
diff --git a/kernel/user.c b/kernel/user.c index 5bbb91988e69..a3a0dbfda329 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
| @@ -51,6 +51,10 @@ struct user_namespace init_user_ns = { | |||
| 51 | .owner = GLOBAL_ROOT_UID, | 51 | .owner = GLOBAL_ROOT_UID, |
| 52 | .group = GLOBAL_ROOT_GID, | 52 | .group = GLOBAL_ROOT_GID, |
| 53 | .proc_inum = PROC_USER_INIT_INO, | 53 | .proc_inum = PROC_USER_INIT_INO, |
| 54 | #ifdef CONFIG_KEYS_KERBEROS_CACHE | ||
| 55 | .krb_cache_register_sem = | ||
| 56 | __RWSEM_INITIALIZER(init_user_ns.krb_cache_register_sem), | ||
| 57 | #endif | ||
| 54 | }; | 58 | }; |
| 55 | EXPORT_SYMBOL_GPL(init_user_ns); | 59 | EXPORT_SYMBOL_GPL(init_user_ns); |
| 56 | 60 | ||
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 13fb1134ba58..240fb62cf394 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
| @@ -101,6 +101,9 @@ int create_user_ns(struct cred *new) | |||
| 101 | 101 | ||
| 102 | set_cred_user_ns(new, ns); | 102 | set_cred_user_ns(new, ns); |
| 103 | 103 | ||
| 104 | #ifdef CONFIG_PERSISTENT_KEYRINGS | ||
| 105 | init_rwsem(&ns->persistent_keyring_register_sem); | ||
| 106 | #endif | ||
| 104 | return 0; | 107 | return 0; |
| 105 | } | 108 | } |
| 106 | 109 | ||
| @@ -130,6 +133,9 @@ void free_user_ns(struct user_namespace *ns) | |||
| 130 | 133 | ||
| 131 | do { | 134 | do { |
| 132 | parent = ns->parent; | 135 | parent = ns->parent; |
| 136 | #ifdef CONFIG_PERSISTENT_KEYRINGS | ||
| 137 | key_put(ns->persistent_keyring_register); | ||
| 138 | #endif | ||
| 133 | proc_free_inum(ns->proc_inum); | 139 | proc_free_inum(ns->proc_inum); |
| 134 | kmem_cache_free(user_ns_cachep, ns); | 140 | kmem_cache_free(user_ns_cachep, ns); |
| 135 | ns = parent; | 141 | ns = parent; |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 987293d03ebc..b010eac595d2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -305,6 +305,9 @@ static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER); | |||
| 305 | /* I: attributes used when instantiating standard unbound pools on demand */ | 305 | /* I: attributes used when instantiating standard unbound pools on demand */ |
| 306 | static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS]; | 306 | static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS]; |
| 307 | 307 | ||
| 308 | /* I: attributes used when instantiating ordered pools on demand */ | ||
| 309 | static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS]; | ||
| 310 | |||
| 308 | struct workqueue_struct *system_wq __read_mostly; | 311 | struct workqueue_struct *system_wq __read_mostly; |
| 309 | EXPORT_SYMBOL(system_wq); | 312 | EXPORT_SYMBOL(system_wq); |
| 310 | struct workqueue_struct *system_highpri_wq __read_mostly; | 313 | struct workqueue_struct *system_highpri_wq __read_mostly; |
| @@ -518,14 +521,21 @@ static inline void debug_work_activate(struct work_struct *work) { } | |||
| 518 | static inline void debug_work_deactivate(struct work_struct *work) { } | 521 | static inline void debug_work_deactivate(struct work_struct *work) { } |
| 519 | #endif | 522 | #endif |
| 520 | 523 | ||
| 521 | /* allocate ID and assign it to @pool */ | 524 | /** |
| 525 | * worker_pool_assign_id - allocate ID and assing it to @pool | ||
| 526 | * @pool: the pool pointer of interest | ||
| 527 | * | ||
| 528 | * Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned | ||
| 529 | * successfully, -errno on failure. | ||
| 530 | */ | ||
| 522 | static int worker_pool_assign_id(struct worker_pool *pool) | 531 | static int worker_pool_assign_id(struct worker_pool *pool) |
| 523 | { | 532 | { |
| 524 | int ret; | 533 | int ret; |
| 525 | 534 | ||
| 526 | lockdep_assert_held(&wq_pool_mutex); | 535 | lockdep_assert_held(&wq_pool_mutex); |
| 527 | 536 | ||
| 528 | ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL); | 537 | ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE, |
| 538 | GFP_KERNEL); | ||
| 529 | if (ret >= 0) { | 539 | if (ret >= 0) { |
| 530 | pool->id = ret; | 540 | pool->id = ret; |
| 531 | return 0; | 541 | return 0; |
| @@ -1320,7 +1330,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, | |||
| 1320 | 1330 | ||
| 1321 | debug_work_activate(work); | 1331 | debug_work_activate(work); |
| 1322 | 1332 | ||
| 1323 | /* if dying, only works from the same workqueue are allowed */ | 1333 | /* if draining, only works from the same workqueue are allowed */ |
| 1324 | if (unlikely(wq->flags & __WQ_DRAINING) && | 1334 | if (unlikely(wq->flags & __WQ_DRAINING) && |
| 1325 | WARN_ON_ONCE(!is_chained_work(wq))) | 1335 | WARN_ON_ONCE(!is_chained_work(wq))) |
| 1326 | return; | 1336 | return; |
| @@ -1736,16 +1746,17 @@ static struct worker *create_worker(struct worker_pool *pool) | |||
| 1736 | if (IS_ERR(worker->task)) | 1746 | if (IS_ERR(worker->task)) |
| 1737 | goto fail; | 1747 | goto fail; |
| 1738 | 1748 | ||
| 1749 | set_user_nice(worker->task, pool->attrs->nice); | ||
| 1750 | |||
| 1751 | /* prevent userland from meddling with cpumask of workqueue workers */ | ||
| 1752 | worker->task->flags |= PF_NO_SETAFFINITY; | ||
| 1753 | |||
| 1739 | /* | 1754 | /* |
| 1740 | * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any | 1755 | * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any |
| 1741 | * online CPUs. It'll be re-applied when any of the CPUs come up. | 1756 | * online CPUs. It'll be re-applied when any of the CPUs come up. |
| 1742 | */ | 1757 | */ |
| 1743 | set_user_nice(worker->task, pool->attrs->nice); | ||
| 1744 | set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); | 1758 | set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); |
| 1745 | 1759 | ||
| 1746 | /* prevent userland from meddling with cpumask of workqueue workers */ | ||
| 1747 | worker->task->flags |= PF_NO_SETAFFINITY; | ||
| 1748 | |||
| 1749 | /* | 1760 | /* |
| 1750 | * The caller is responsible for ensuring %POOL_DISASSOCIATED | 1761 | * The caller is responsible for ensuring %POOL_DISASSOCIATED |
| 1751 | * remains stable across this function. See the comments above the | 1762 | * remains stable across this function. See the comments above the |
| @@ -2840,19 +2851,6 @@ already_gone: | |||
| 2840 | return false; | 2851 | return false; |
| 2841 | } | 2852 | } |
| 2842 | 2853 | ||
| 2843 | static bool __flush_work(struct work_struct *work) | ||
| 2844 | { | ||
| 2845 | struct wq_barrier barr; | ||
| 2846 | |||
| 2847 | if (start_flush_work(work, &barr)) { | ||
| 2848 | wait_for_completion(&barr.done); | ||
| 2849 | destroy_work_on_stack(&barr.work); | ||
| 2850 | return true; | ||
| 2851 | } else { | ||
| 2852 | return false; | ||
| 2853 | } | ||
| 2854 | } | ||
| 2855 | |||
| 2856 | /** | 2854 | /** |
| 2857 | * flush_work - wait for a work to finish executing the last queueing instance | 2855 | * flush_work - wait for a work to finish executing the last queueing instance |
| 2858 | * @work: the work to flush | 2856 | * @work: the work to flush |
| @@ -2866,10 +2864,18 @@ static bool __flush_work(struct work_struct *work) | |||
| 2866 | */ | 2864 | */ |
| 2867 | bool flush_work(struct work_struct *work) | 2865 | bool flush_work(struct work_struct *work) |
| 2868 | { | 2866 | { |
| 2867 | struct wq_barrier barr; | ||
| 2868 | |||
| 2869 | lock_map_acquire(&work->lockdep_map); | 2869 | lock_map_acquire(&work->lockdep_map); |
| 2870 | lock_map_release(&work->lockdep_map); | 2870 | lock_map_release(&work->lockdep_map); |
| 2871 | 2871 | ||
| 2872 | return __flush_work(work); | 2872 | if (start_flush_work(work, &barr)) { |
| 2873 | wait_for_completion(&barr.done); | ||
| 2874 | destroy_work_on_stack(&barr.work); | ||
| 2875 | return true; | ||
| 2876 | } else { | ||
| 2877 | return false; | ||
| 2878 | } | ||
| 2873 | } | 2879 | } |
| 2874 | EXPORT_SYMBOL_GPL(flush_work); | 2880 | EXPORT_SYMBOL_GPL(flush_work); |
| 2875 | 2881 | ||
| @@ -4106,7 +4112,7 @@ out_unlock: | |||
| 4106 | static int alloc_and_link_pwqs(struct workqueue_struct *wq) | 4112 | static int alloc_and_link_pwqs(struct workqueue_struct *wq) |
| 4107 | { | 4113 | { |
| 4108 | bool highpri = wq->flags & WQ_HIGHPRI; | 4114 | bool highpri = wq->flags & WQ_HIGHPRI; |
| 4109 | int cpu; | 4115 | int cpu, ret; |
| 4110 | 4116 | ||
| 4111 | if (!(wq->flags & WQ_UNBOUND)) { | 4117 | if (!(wq->flags & WQ_UNBOUND)) { |
| 4112 | wq->cpu_pwqs = alloc_percpu(struct pool_workqueue); | 4118 | wq->cpu_pwqs = alloc_percpu(struct pool_workqueue); |
| @@ -4126,6 +4132,13 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq) | |||
| 4126 | mutex_unlock(&wq->mutex); | 4132 | mutex_unlock(&wq->mutex); |
| 4127 | } | 4133 | } |
| 4128 | return 0; | 4134 | return 0; |
| 4135 | } else if (wq->flags & __WQ_ORDERED) { | ||
| 4136 | ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]); | ||
| 4137 | /* there should only be single pwq for ordering guarantee */ | ||
| 4138 | WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node || | ||
| 4139 | wq->pwqs.prev != &wq->dfl_pwq->pwqs_node), | ||
| 4140 | "ordering guarantee broken for workqueue %s\n", wq->name); | ||
| 4141 | return ret; | ||
| 4129 | } else { | 4142 | } else { |
| 4130 | return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]); | 4143 | return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]); |
| 4131 | } | 4144 | } |
| @@ -4814,14 +4827,7 @@ long work_on_cpu(int cpu, long (*fn)(void *), void *arg) | |||
| 4814 | 4827 | ||
| 4815 | INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn); | 4828 | INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn); |
| 4816 | schedule_work_on(cpu, &wfc.work); | 4829 | schedule_work_on(cpu, &wfc.work); |
| 4817 | 4830 | flush_work(&wfc.work); | |
| 4818 | /* | ||
| 4819 | * The work item is on-stack and can't lead to deadlock through | ||
| 4820 | * flushing. Use __flush_work() to avoid spurious lockdep warnings | ||
| 4821 | * when work_on_cpu()s are nested. | ||
| 4822 | */ | ||
| 4823 | __flush_work(&wfc.work); | ||
| 4824 | |||
| 4825 | return wfc.ret; | 4831 | return wfc.ret; |
| 4826 | } | 4832 | } |
| 4827 | EXPORT_SYMBOL_GPL(work_on_cpu); | 4833 | EXPORT_SYMBOL_GPL(work_on_cpu); |
| @@ -5009,10 +5015,6 @@ static int __init init_workqueues(void) | |||
| 5009 | int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL }; | 5015 | int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL }; |
| 5010 | int i, cpu; | 5016 | int i, cpu; |
| 5011 | 5017 | ||
| 5012 | /* make sure we have enough bits for OFFQ pool ID */ | ||
| 5013 | BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) < | ||
| 5014 | WORK_CPU_END * NR_STD_WORKER_POOLS); | ||
| 5015 | |||
| 5016 | WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); | 5018 | WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); |
| 5017 | 5019 | ||
| 5018 | pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); | 5020 | pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); |
| @@ -5051,13 +5053,23 @@ static int __init init_workqueues(void) | |||
| 5051 | } | 5053 | } |
| 5052 | } | 5054 | } |
| 5053 | 5055 | ||
| 5054 | /* create default unbound wq attrs */ | 5056 | /* create default unbound and ordered wq attrs */ |
| 5055 | for (i = 0; i < NR_STD_WORKER_POOLS; i++) { | 5057 | for (i = 0; i < NR_STD_WORKER_POOLS; i++) { |
| 5056 | struct workqueue_attrs *attrs; | 5058 | struct workqueue_attrs *attrs; |
| 5057 | 5059 | ||
| 5058 | BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); | 5060 | BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); |
| 5059 | attrs->nice = std_nice[i]; | 5061 | attrs->nice = std_nice[i]; |
| 5060 | unbound_std_wq_attrs[i] = attrs; | 5062 | unbound_std_wq_attrs[i] = attrs; |
| 5063 | |||
| 5064 | /* | ||
| 5065 | * An ordered wq should have only one pwq as ordering is | ||
| 5066 | * guaranteed by max_active which is enforced by pwqs. | ||
| 5067 | * Turn off NUMA so that dfl_pwq is used for all nodes. | ||
| 5068 | */ | ||
| 5069 | BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); | ||
| 5070 | attrs->nice = std_nice[i]; | ||
| 5071 | attrs->no_numa = true; | ||
| 5072 | ordered_wq_attrs[i] = attrs; | ||
| 5061 | } | 5073 | } |
| 5062 | 5074 | ||
| 5063 | system_wq = alloc_workqueue("events", 0, 0); | 5075 | system_wq = alloc_workqueue("events", 0, 0); |
